All checks were successful
Test / rust-fmt-check (pull_request) Successful in 1m51s
Test / frontend-tests (pull_request) Successful in 1m51s
Test / frontend-typecheck (pull_request) Successful in 1m55s
Test / rust-clippy (pull_request) Successful in 3m11s
Test / rust-tests (pull_request) Successful in 4m27s
PR Review Automation / review (pull_request) Successful in 4m47s
The previous regex matched any line containing "password", "token", etc. near certain punctuation characters. This silently removed function signatures, variable declarations, and test assertions from the context sent to the LLM — causing it to hallucinate 3 BLOCKERs per review: - "function signature missing" (the `password: &str` param was scrubbed) - "filter body empty" (the filter condition containing "password" was scrubbed) - "password passed unencrypted" (the decrypt_token call line was scrubbed) Fix: match actual credential VALUES only: - Well-known token formats (AKIA..., ghp_..., xox...) - keyword = "long_quoted_literal" (25+ chars, clearly a value not a name) - Standalone base64 blob lines (60+ chars, PEM-style) Never scrub a line just because it contains a credential-related word.
186 lines
9.7 KiB
YAML
186 lines
9.7 KiB
YAML
name: PR Review Automation
|
|
|
|
on:
|
|
pull_request:
|
|
types: [opened, synchronize, reopened, edited]
|
|
|
|
concurrency:
|
|
group: pr-review-${{ github.event.pull_request.number }}
|
|
cancel-in-progress: true
|
|
|
|
jobs:
|
|
review:
|
|
runs-on: ubuntu-latest
|
|
permissions:
|
|
pull-requests: write
|
|
container:
|
|
image: ubuntu:22.04
|
|
options: --dns 8.8.8.8 --dns 1.1.1.1
|
|
steps:
|
|
- name: Install dependencies
|
|
shell: bash
|
|
run: |
|
|
set -euo pipefail
|
|
apt-get update -qq && apt-get install -y -qq git curl jq
|
|
|
|
- name: Checkout code
|
|
shell: bash
|
|
env:
|
|
REPOSITORY: ${{ github.repository }}
|
|
run: |
|
|
set -euo pipefail
|
|
git init
|
|
git remote add origin "https://gogs.tftsr.com/${REPOSITORY}.git"
|
|
git fetch --depth=1 origin ${{ github.head_ref }}
|
|
git checkout FETCH_HEAD
|
|
|
|
- name: Build review context
|
|
id: context
|
|
shell: bash
|
|
run: |
|
|
set -euo pipefail
|
|
git fetch origin ${{ github.base_ref }}
|
|
|
|
# List changed source files (exclude generated/lock files)
|
|
git diff --name-only origin/${{ github.base_ref }}..HEAD \
|
|
-- ':!Cargo.lock' ':!package-lock.json' ':!*.lock' \
|
|
> /tmp/pr_files.txt
|
|
|
|
FILE_COUNT=$(wc -l < /tmp/pr_files.txt | tr -d ' ')
|
|
echo "files_changed=${FILE_COUNT}" >> $GITHUB_OUTPUT
|
|
|
|
if [ "$FILE_COUNT" -eq 0 ]; then
|
|
echo "No reviewable files changed."
|
|
echo "diff_size=0" >> $GITHUB_OUTPUT
|
|
exit 0
|
|
fi
|
|
|
|
# Build context: full file content for each changed file.
|
|
# Files <= 500 lines: include complete content.
|
|
# Files > 500 lines: include the per-file diff with generous context (±50 lines).
|
|
#
|
|
# Secret scrubbing: match actual credential VALUES only — known API key formats,
|
|
# or keyword="long_quoted_literal" (25+ chars). Never scrub on keyword alone,
|
|
# which would silently delete function signatures, variable declarations, and tests.
|
|
SECRET_PATTERN='AKIA[A-Z0-9]{16}|gh[opsu]_[A-Za-z0-9_]{36,}|xox[baprs]-[0-9]{10,13}-[0-9]{10,13}-[a-zA-Z0-9]{24}|(password|token|api_key|secret)[[:space:]]*=[[:space:]]*["'"'"'][A-Za-z0-9+/_\-!@#]{25,}["'"'"']'
|
|
# Only strip lines that are ENTIRELY a long base64 blob (e.g. PEM cert bodies)
|
|
B64_PATTERN='^[[:space:]]*[A-Za-z0-9+/]{60,}={0,2}[[:space:]]*$'
|
|
|
|
> /tmp/pr_context.txt
|
|
while IFS= read -r file; do
|
|
[ -f "$file" ] || continue
|
|
lines=$(wc -l < "$file" | tr -d ' ')
|
|
printf '\n════════ FILE: %s (%s lines) ════════\n' "$file" "$lines" >> /tmp/pr_context.txt
|
|
if [ "$lines" -le 500 ]; then
|
|
# Full file — model sees the complete implementation
|
|
grep -v -E "$SECRET_PATTERN" "$file" \
|
|
| grep -v -E "$B64_PATTERN" \
|
|
>> /tmp/pr_context.txt || true
|
|
else
|
|
# Large file — emit annotated diff hunks (±50 lines of context each)
|
|
printf '[File too large for full view (%s lines) — showing changed sections only]\n' "$lines" >> /tmp/pr_context.txt
|
|
git diff -U50 origin/${{ github.base_ref }}..HEAD -- "$file" \
|
|
| grep -v -E "$SECRET_PATTERN" \
|
|
| grep -v -E "$B64_PATTERN" \
|
|
>> /tmp/pr_context.txt || true
|
|
fi
|
|
done < /tmp/pr_files.txt
|
|
|
|
TOTAL=$(wc -l < /tmp/pr_context.txt | tr -d ' ')
|
|
echo "diff_size=${TOTAL}" >> $GITHUB_OUTPUT
|
|
|
|
# Cap at 6000 lines so we stay within the model's context window
|
|
if [ "$TOTAL" -gt 6000 ]; then
|
|
head -n 6000 /tmp/pr_context.txt > /tmp/pr_context_capped.txt
|
|
mv /tmp/pr_context_capped.txt /tmp/pr_context.txt
|
|
echo "[CONTEXT TRUNCATED at 6000 lines — ${TOTAL} total]" >> /tmp/pr_context.txt
|
|
fi
|
|
|
|
- name: Analyze with LLM
|
|
id: analyze
|
|
if: steps.context.outputs.diff_size != '0'
|
|
shell: bash
|
|
env:
|
|
LITELLM_URL: http://172.0.0.29:11434/v1
|
|
LITELLM_API_KEY: ${{ secrets.OLLAMA_API_KEY }}
|
|
PR_TITLE: ${{ github.event.pull_request.title }}
|
|
PR_NUMBER: ${{ github.event.pull_request.number }}
|
|
run: |
|
|
set -euo pipefail
|
|
CHANGED_FILES=$(tr '\n' ' ' < /tmp/pr_files.txt)
|
|
CONTEXT=$(cat /tmp/pr_context.txt)
|
|
|
|
PROMPT="You are a senior engineer performing a code review for the following pull request.\n\nPR Title: ${PR_TITLE}\nFiles changed: ${CHANGED_FILES}\n\n## What you are reading\n\nEach section below contains the COMPLETE, FINAL content of one changed file after the PR's changes have been applied. This is not a diff — it is the full file. For files over 500 lines, only the changed sections are shown (marked with + / - lines), but surrounding context is included.\n\nYou have full visibility into every function signature, every variable, every import in each file. There are no missing parameters, no truncated signatures, no partial implementations.\n\n---\n${CONTEXT}\n---\n\n## Instructions\n\nRead every file above completely before writing anything.\n\nThen, for each potential issue:\n1. Confirm it exists in the code above — quote the exact line.\n2. Confirm it is a real problem (not something that looks unusual but is intentional).\n3. If either check fails, discard the finding silently — do not mention it in your output.\n\nDo NOT show your verification reasoning. Do NOT mention findings you discarded. Only output confirmed issues.\n\nSeverity levels:\n- BLOCKER: provably broken — will fail to compile, corrupt data, or introduce a security vulnerability\n- WARNING: works today but carries real risk that should be fixed before merge\n- SUGGESTION: minor improvement worth a follow-up PR\n\nFocus on: security bugs, logic errors, data loss, race conditions, injection vectors, unhandled error paths that could silently corrupt state.\n\nIgnore: style preferences, missing comments, code organisation opinions, speculative future improvements.\n\n## Output format (strict — do not deviate)\n\n**Summary** (2-3 sentences describing what the PR does and your overall assessment)\n\n**Findings**\n- [SEVERITY] file:line — one-line description\n Evidence: exact quoted line(s)\n Fix: concrete suggested change\n\n(If there are no findings, write: No findings.)\n\n**Verdict**: APPROVE / APPROVE WITH COMMENTS / REQUEST CHANGES"
|
|
|
|
BODY=$(jq -cn \
|
|
--arg model "qwen3-coder-next" \
|
|
--arg content "$PROMPT" \
|
|
'{model: $model, messages: [{role: "user", content: $content}], stream: false}')
|
|
echo "[$(date -u +%Y-%m-%dT%H:%M:%SZ)] PR #${PR_NUMBER} - Calling liteLLM API (${#BODY} bytes)..."
|
|
HTTP_CODE=$(curl -s --max-time 300 --connect-timeout 30 \
|
|
--retry 3 --retry-delay 10 --retry-connrefused --retry-max-time 300 \
|
|
-o /tmp/llm_response.json -w "%{http_code}" \
|
|
-X POST "$LITELLM_URL/chat/completions" \
|
|
-H "Authorization: Bearer $LITELLM_API_KEY" \
|
|
-H "Content-Type: application/json" \
|
|
-d "$BODY")
|
|
echo "HTTP status: $HTTP_CODE"
|
|
echo "Response file size: $(wc -c < /tmp/llm_response.json) bytes"
|
|
if [ "$HTTP_CODE" != "200" ]; then
|
|
echo "ERROR: liteLLM returned HTTP $HTTP_CODE"
|
|
cat /tmp/llm_response.json
|
|
exit 1
|
|
fi
|
|
if ! jq empty /tmp/llm_response.json 2>/dev/null; then
|
|
echo "ERROR: Invalid JSON response from liteLLM"
|
|
cat /tmp/llm_response.json
|
|
exit 1
|
|
fi
|
|
REVIEW=$(jq -r '.choices[0].message.content // empty' /tmp/llm_response.json)
|
|
if [ -z "$REVIEW" ]; then
|
|
echo "ERROR: No content in liteLLM response"
|
|
exit 1
|
|
fi
|
|
echo "Review length: ${#REVIEW} chars"
|
|
echo "$REVIEW" > /tmp/pr_review.txt
|
|
|
|
- name: Post review comment
|
|
if: always() && steps.context.outputs.diff_size != '0'
|
|
shell: bash
|
|
env:
|
|
TF_TOKEN: ${{ secrets.TFT_GITEA_TOKEN }}
|
|
PR_NUMBER: ${{ github.event.pull_request.number }}
|
|
REPOSITORY: ${{ github.repository }}
|
|
run: |
|
|
set -euo pipefail
|
|
if [ -z "${TF_TOKEN:-}" ]; then
|
|
echo "ERROR: TFT_GITEA_TOKEN secret is not set"
|
|
exit 1
|
|
fi
|
|
if [ -f "/tmp/pr_review.txt" ] && [ -s "/tmp/pr_review.txt" ]; then
|
|
REVIEW_BODY=$(head -c 65536 /tmp/pr_review.txt)
|
|
BODY=$(jq -n \
|
|
--arg body "Automated PR Review (qwen3-coder-next via liteLLM):\n\n${REVIEW_BODY}" \
|
|
'{body: $body, event: "COMMENT"}')
|
|
else
|
|
BODY=$(jq -n \
|
|
'{body: "Automated PR Review could not be completed - LLM analysis failed or produced no output.", event: "COMMENT"}')
|
|
fi
|
|
HTTP_CODE=$(curl -s --max-time 30 --connect-timeout 10 \
|
|
-o /tmp/review_post_response.json -w "%{http_code}" \
|
|
-X POST "https://gogs.tftsr.com/api/v1/repos/${REPOSITORY}/pulls/${PR_NUMBER}/reviews" \
|
|
-H "Authorization: Bearer $TF_TOKEN" \
|
|
-H "Content-Type: application/json" \
|
|
-d "$BODY")
|
|
echo "[$(date -u +%Y-%m-%dT%H:%M:%SZ)] Post review HTTP status: $HTTP_CODE"
|
|
if [ "$HTTP_CODE" != "200" ] && [ "$HTTP_CODE" != "201" ]; then
|
|
echo "ERROR: Failed to post review (HTTP $HTTP_CODE)"
|
|
cat /tmp/review_post_response.json
|
|
exit 1
|
|
fi
|
|
|
|
- name: Cleanup
|
|
if: always()
|
|
shell: bash
|
|
run: rm -f /tmp/pr_diff.txt /tmp/pr_context.txt /tmp/llm_response.json /tmp/pr_review.txt /tmp/pr_review_post_response.json /tmp/pr_files.txt
|