tftsr-devops_investigation/.gitea/workflows/pr-review.yml
Shaun Arman 06956940e2
Some checks failed
Test / rust-fmt-check (pull_request) Successful in 1m46s
Test / frontend-typecheck (pull_request) Successful in 1m49s
Test / frontend-tests (pull_request) Successful in 1m46s
Test / rust-clippy (pull_request) Failing after 3m12s
PR Review Automation / review (pull_request) Successful in 4m37s
Test / rust-tests (pull_request) Successful in 4m34s
fix(ci): reduce AI review hallucinations in pr-review workflow
Three changes:
- Exclude Cargo.lock/lockfiles from the diff — removes ~163 lines of
  hash noise that waste the review budget with no value
- Raise line cap from 500 to 3000 and add a truncation notice when
  the diff is cut, so the model knows the diff is incomplete
- Harden prompt: require quoted evidence for every finding; add explicit
  self-verification step for missing-identifier claims (search full diff
  before raising); tighten no-hallucinate instruction
2026-05-31 14:08:10 -05:00

151 lines
8.3 KiB
YAML

name: PR Review Automation
on:
pull_request:
types: [opened, synchronize, reopened, edited]
concurrency:
group: pr-review-${{ github.event.pull_request.number }}
cancel-in-progress: true
jobs:
review:
runs-on: ubuntu-latest
permissions:
pull-requests: write
container:
image: ubuntu:22.04
options: --dns 8.8.8.8 --dns 1.1.1.1
steps:
- name: Install dependencies
shell: bash
run: |
set -euo pipefail
apt-get update -qq && apt-get install -y -qq git curl jq
- name: Checkout code
shell: bash
env:
REPOSITORY: ${{ github.repository }}
run: |
set -euo pipefail
git init
git remote add origin "https://gogs.tftsr.com/${REPOSITORY}.git"
git fetch --depth=1 origin ${{ github.head_ref }}
git checkout FETCH_HEAD
- name: Get PR diff
id: diff
shell: bash
run: |
set -euo pipefail
git fetch origin ${{ github.base_ref }}
# Exclude generated/lock files — they add noise with no review value
git diff origin/${{ github.base_ref }}..HEAD \
-- ':!Cargo.lock' ':!package-lock.json' ':!*.lock' \
> /tmp/pr_diff.txt
FULL_SIZE=$(wc -l < /tmp/pr_diff.txt | tr -d ' ')
echo "diff_size=${FULL_SIZE}" >> $GITHUB_OUTPUT
echo "diff_full_size=${FULL_SIZE}" >> $GITHUB_OUTPUT
# Build a manifest of changed files so the prompt can include it
git diff --name-only origin/${{ github.base_ref }}..HEAD \
-- ':!Cargo.lock' ':!package-lock.json' ':!*.lock' \
> /tmp/pr_files.txt
- name: Analyze with LLM
id: analyze
if: steps.diff.outputs.diff_size != '0'
shell: bash
env:
LITELLM_URL: http://172.0.0.29:11434/v1
LITELLM_API_KEY: ${{ secrets.OLLAMA_API_KEY }}
PR_TITLE: ${{ github.event.pull_request.title }}
PR_NUMBER: ${{ github.event.pull_request.number }}
run: |
set -euo pipefail
if grep -q "^Binary files" /tmp/pr_diff.txt; then
echo "WARNING: Binary file changes detected — they will be excluded from analysis"
fi
CHANGED_FILES=$(cat /tmp/pr_files.txt | tr '\n' ' ')
FULL_LINES=$(wc -l < /tmp/pr_diff.txt | tr -d ' ')
DIFF_CONTENT=$(head -n 3000 /tmp/pr_diff.txt \
| grep -v -E '^[+-].*(password[[:space:]]*[=:"'"'"']|token[[:space:]]*[=:"'"'"']|secret[[:space:]]*[=:"'"'"']|api_key[[:space:]]*[=:"'"'"']|private_key[[:space:]]*[=:"'"'"']|Authorization:[[:space:]]|AKIA[A-Z0-9]{16}|xox[baprs]-[0-9]{10,13}-[0-9]{10,13}-[a-zA-Z0-9]{24}|gh[opsu]_[A-Za-z0-9_]{36,}|https?://[^@[:space:]]+:[^@[:space:]]+@)' \
| grep -v -E '^[+-].*[A-Za-z0-9+/]{40,}={0,2}([^A-Za-z0-9+/=]|$)')
if [ "$FULL_LINES" -gt 3000 ]; then
TRUNCATION_NOTICE="NOTE: This diff was truncated at 3000 lines (full diff is ${FULL_LINES} lines). Code appearing near the end of the diff may be incomplete. Do NOT raise findings about code that appears to be missing or incomplete — it may simply be outside the truncated window."
else
TRUNCATION_NOTICE="This diff is complete (${FULL_LINES} lines, no truncation)."
fi
PROMPT="You are a senior engineer performing a focused code review. Your review must be grounded strictly in the diff provided — do not speculate about code you cannot see.\n\nPR Title: ${PR_TITLE}\nFiles changed: ${CHANGED_FILES}\n${TRUNCATION_NOTICE}\n\nDiff:\n${DIFF_CONTENT}\n\n## Instructions\n\n1. **Read the entire diff before writing anything.** Do not begin composing your review until you have read every line of the diff above.\n\n2. **Verify before claiming.** Before raising any finding:\n a. Quote the exact line(s) from the diff that support it.\n b. For findings about missing identifiers (undeclared variables, missing parameters, undefined functions): search the ENTIRE diff for the identifier. If it appears anywhere in the diff, the finding is WRONG — discard it.\n c. If you cannot quote supporting evidence from the diff, do not raise the finding.\n\n3. **Do not hallucinate.** You may only raise issues visible in the diff. If a concern requires reading a file not shown in the diff, write 'outside the scope of this diff' and skip it. Never infer that code is broken from partial information.\n\n4. **Distinguish severity:**\n - BLOCKER: provably broken from what is shown — will fail to compile or cause data loss\n - WARNING: works but has a real risk that should be addressed before merge\n - SUGGESTION: improvement worth considering in a follow-up PR\n\n5. **Keep it concise.** Lead with a one-paragraph summary, then list only genuine findings with quoted evidence.\n\n## Output format\n\n**Summary** (1 paragraph)\n\n**Findings** (each must include a quoted line from the diff)\n- [BLOCKER/WARNING/SUGGESTION] filename:line — description, quoted evidence, suggested fix\n\n**Verdict**: APPROVE / APPROVE WITH COMMENTS / REQUEST CHANGES"
BODY=$(jq -cn \
--arg model "qwen3-coder-next" \
--arg content "$PROMPT" \
'{model: $model, messages: [{role: "user", content: $content}], stream: false}')
echo "[$(date -u +%Y-%m-%dT%H:%M:%SZ)] PR #${PR_NUMBER} - Calling liteLLM API (${#BODY} bytes)..."
HTTP_CODE=$(curl -s --max-time 300 --connect-timeout 30 \
--retry 3 --retry-delay 10 --retry-connrefused --retry-max-time 300 \
-o /tmp/llm_response.json -w "%{http_code}" \
-X POST "$LITELLM_URL/chat/completions" \
-H "Authorization: Bearer $LITELLM_API_KEY" \
-H "Content-Type: application/json" \
-d "$BODY")
echo "HTTP status: $HTTP_CODE"
echo "Response file size: $(wc -c < /tmp/llm_response.json) bytes"
if [ "$HTTP_CODE" != "200" ]; then
echo "ERROR: liteLLM returned HTTP $HTTP_CODE"
cat /tmp/llm_response.json
exit 1
fi
if ! jq empty /tmp/llm_response.json 2>/dev/null; then
echo "ERROR: Invalid JSON response from liteLLM"
cat /tmp/llm_response.json
exit 1
fi
REVIEW=$(jq -r '.choices[0].message.content // empty' /tmp/llm_response.json)
if [ -z "$REVIEW" ]; then
echo "ERROR: No content in liteLLM response"
exit 1
fi
echo "Review length: ${#REVIEW} chars"
echo "$REVIEW" > /tmp/pr_review.txt
- name: Post review comment
if: always() && steps.diff.outputs.diff_size != '0'
shell: bash
env:
TF_TOKEN: ${{ secrets.TFT_GITEA_TOKEN }}
PR_NUMBER: ${{ github.event.pull_request.number }}
REPOSITORY: ${{ github.repository }}
run: |
set -euo pipefail
if [ -z "${TF_TOKEN:-}" ]; then
echo "ERROR: TFT_GITEA_TOKEN secret is not set"
exit 1
fi
if [ -f "/tmp/pr_review.txt" ] && [ -s "/tmp/pr_review.txt" ]; then
REVIEW_BODY=$(head -c 65536 /tmp/pr_review.txt)
BODY=$(jq -n \
--arg body "Automated PR Review (qwen3-coder-next via liteLLM):\n\n${REVIEW_BODY}\n\n---\n*automated code review*" \
'{body: $body, event: "COMMENT"}')
else
BODY=$(jq -n \
'{body: "Automated PR Review could not be completed - LLM analysis failed or produced no output.", event: "COMMENT"}')
fi
HTTP_CODE=$(curl -s --max-time 30 --connect-timeout 10 \
-o /tmp/review_post_response.json -w "%{http_code}" \
-X POST "https://gogs.tftsr.com/api/v1/repos/${REPOSITORY}/pulls/${PR_NUMBER}/reviews" \
-H "Authorization: Bearer $TF_TOKEN" \
-H "Content-Type: application/json" \
-d "$BODY")
echo "[$(date -u +%Y-%m-%dT%H:%M:%SZ)] Post review HTTP status: $HTTP_CODE"
if [ "$HTTP_CODE" != "200" ] && [ "$HTTP_CODE" != "201" ]; then
echo "ERROR: Failed to post review (HTTP $HTTP_CODE)"
cat /tmp/review_post_response.json
exit 1
fi
- name: Cleanup
if: always()
shell: bash
run: rm -f /tmp/pr_diff.txt /tmp/llm_response.json /tmp/pr_review.txt /tmp/review_post_response.json