ci(pr-review): iterative batch review for large diffs
Some checks failed
Test / frontend-tests (pull_request) Successful in 1m41s
Test / frontend-typecheck (pull_request) Successful in 1m58s
PR Review Automation / review (pull_request) Successful in 10m22s
Test / rust-fmt-check (pull_request) Failing after 12m51s
Test / rust-clippy (pull_request) Successful in 14m40s
Test / rust-tests (pull_request) Successful in 16m29s
Some checks failed
Test / frontend-tests (pull_request) Successful in 1m41s
Test / frontend-typecheck (pull_request) Successful in 1m58s
PR Review Automation / review (pull_request) Successful in 10m22s
Test / rust-fmt-check (pull_request) Failing after 12m51s
Test / rust-clippy (pull_request) Successful in 14m40s
Test / rust-tests (pull_request) Successful in 16m29s
Replaces the single monolithic LLM call (which timed out on PRs >~150KB) with a batching strategy: - Changed files are grouped into batches capped at 2500 lines each, always respecting file boundaries (no file is split across batches) - Each batch is sent as an independent LLM call (max-time 600s) - Verdicts are aggregated: harshest verdict across all batches wins - Single-batch PRs produce identical output to the old workflow - Multi-batch PRs get a combined review with per-batch sections and an Overall Verdict line at the end - Partial failures are tolerated: if some batches succeed, the review is posted with a note about any failed batches - Cleanup removes /tmp/pr_batch_*.txt alongside existing temp files
This commit is contained in:
parent
9b94e99e4d
commit
c8399bcdeb
@ -31,7 +31,7 @@ jobs:
|
|||||||
git fetch --depth=1 origin ${{ github.head_ref }}
|
git fetch --depth=1 origin ${{ github.head_ref }}
|
||||||
git checkout FETCH_HEAD
|
git checkout FETCH_HEAD
|
||||||
|
|
||||||
- name: Build review context
|
- name: Build review batches
|
||||||
id: context
|
id: context
|
||||||
shell: bash
|
shell: bash
|
||||||
run: |
|
run: |
|
||||||
@ -49,13 +49,10 @@ jobs:
|
|||||||
if [ "$FILE_COUNT" -eq 0 ]; then
|
if [ "$FILE_COUNT" -eq 0 ]; then
|
||||||
echo "No reviewable files changed."
|
echo "No reviewable files changed."
|
||||||
echo "diff_size=0" >> $GITHUB_OUTPUT
|
echo "diff_size=0" >> $GITHUB_OUTPUT
|
||||||
|
echo "batch_count=0" >> $GITHUB_OUTPUT
|
||||||
exit 0
|
exit 0
|
||||||
fi
|
fi
|
||||||
|
|
||||||
# Build context: full file content for each changed file.
|
|
||||||
# Files <= 500 lines: include complete content.
|
|
||||||
# Files > 500 lines: include the per-file diff with generous context (±50 lines).
|
|
||||||
#
|
|
||||||
# Secret scrubbing: match actual credential VALUES only — known API key formats,
|
# Secret scrubbing: match actual credential VALUES only — known API key formats,
|
||||||
# or keyword="long_quoted_literal" (25+ chars). Never scrub on keyword alone,
|
# or keyword="long_quoted_literal" (25+ chars). Never scrub on keyword alone,
|
||||||
# which would silently delete function signatures, variable declarations, and tests.
|
# which would silently delete function signatures, variable declarations, and tests.
|
||||||
@ -63,35 +60,53 @@ jobs:
|
|||||||
# Only strip lines that are ENTIRELY a long base64 blob (e.g. PEM cert bodies)
|
# Only strip lines that are ENTIRELY a long base64 blob (e.g. PEM cert bodies)
|
||||||
B64_PATTERN='^[[:space:]]*[A-Za-z0-9+/]{60,}={0,2}[[:space:]]*$'
|
B64_PATTERN='^[[:space:]]*[A-Za-z0-9+/]{60,}={0,2}[[:space:]]*$'
|
||||||
|
|
||||||
> /tmp/pr_context.txt
|
# Split changed files into batches capped at MAX_BATCH_LINES each.
|
||||||
|
# File boundaries are always respected — a file is never split across batches.
|
||||||
|
MAX_BATCH_LINES=2500
|
||||||
|
BATCH=1
|
||||||
|
BATCH_LINES=0
|
||||||
|
TOTAL_LINES=0
|
||||||
|
BATCH_FILE="/tmp/pr_batch_001.txt"
|
||||||
|
> "$BATCH_FILE"
|
||||||
|
|
||||||
while IFS= read -r file; do
|
while IFS= read -r file; do
|
||||||
[ -f "$file" ] || continue
|
[ -f "$file" ] || continue
|
||||||
lines=$(wc -l < "$file" | tr -d ' ')
|
file_lines=$(wc -l < "$file" | tr -d ' ')
|
||||||
printf '\n════════ FILE: %s (%s lines) ════════\n' "$file" "$lines" >> /tmp/pr_context.txt
|
|
||||||
if [ "$lines" -le 500 ]; then
|
# Build context for this individual file into a temp file
|
||||||
# Full file — model sees the complete implementation
|
{
|
||||||
grep -v -E "$SECRET_PATTERN" "$file" \
|
printf '\n════════ FILE: %s (%s lines) ════════\n' "$file" "$file_lines"
|
||||||
| grep -v -E "$B64_PATTERN" \
|
if [ "$file_lines" -le 500 ]; then
|
||||||
>> /tmp/pr_context.txt || true
|
grep -v -E "$SECRET_PATTERN" "$file" | grep -v -E "$B64_PATTERN" || true
|
||||||
else
|
else
|
||||||
# Large file — emit annotated diff hunks (±50 lines of context each)
|
printf '[File too large for full view (%s lines) — showing changed sections only]\n' "$file_lines"
|
||||||
printf '[File too large for full view (%s lines) — showing changed sections only]\n' "$lines" >> /tmp/pr_context.txt
|
git diff -U50 origin/${{ github.base_ref }}..HEAD -- "$file" \
|
||||||
git diff -U50 origin/${{ github.base_ref }}..HEAD -- "$file" \
|
| grep -v -E "$SECRET_PATTERN" \
|
||||||
| grep -v -E "$SECRET_PATTERN" \
|
| grep -v -E "$B64_PATTERN" \
|
||||||
| grep -v -E "$B64_PATTERN" \
|
|| true
|
||||||
>> /tmp/pr_context.txt || true
|
fi
|
||||||
|
} > /tmp/fc_tmp.txt
|
||||||
|
|
||||||
|
FC_LINES=$(wc -l < /tmp/fc_tmp.txt | tr -d ' ')
|
||||||
|
|
||||||
|
# Start a new batch if this file would overflow the current one (and batch is not empty)
|
||||||
|
if [ "$BATCH_LINES" -gt 0 ] && [ $((BATCH_LINES + FC_LINES)) -gt $MAX_BATCH_LINES ]; then
|
||||||
|
BATCH=$((BATCH + 1))
|
||||||
|
BATCH_LINES=0
|
||||||
|
BATCH_FILE="/tmp/pr_batch_$(printf '%03d' $BATCH).txt"
|
||||||
|
> "$BATCH_FILE"
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
cat /tmp/fc_tmp.txt >> "$BATCH_FILE"
|
||||||
|
BATCH_LINES=$((BATCH_LINES + FC_LINES))
|
||||||
|
TOTAL_LINES=$((TOTAL_LINES + FC_LINES))
|
||||||
done < /tmp/pr_files.txt
|
done < /tmp/pr_files.txt
|
||||||
|
|
||||||
TOTAL=$(wc -l < /tmp/pr_context.txt | tr -d ' ')
|
rm -f /tmp/fc_tmp.txt
|
||||||
echo "diff_size=${TOTAL}" >> $GITHUB_OUTPUT
|
|
||||||
|
|
||||||
# Cap at 6000 lines so we stay within the model's context window
|
echo "batch_count=${BATCH}" >> $GITHUB_OUTPUT
|
||||||
if [ "$TOTAL" -gt 6000 ]; then
|
echo "diff_size=${TOTAL_LINES}" >> $GITHUB_OUTPUT
|
||||||
head -n 6000 /tmp/pr_context.txt > /tmp/pr_context_capped.txt
|
echo "Built ${BATCH} batch(es) from ${FILE_COUNT} files (${TOTAL_LINES} total lines)"
|
||||||
mv /tmp/pr_context_capped.txt /tmp/pr_context.txt
|
|
||||||
echo "[CONTEXT TRUNCATED at 6000 lines — ${TOTAL} total]" >> /tmp/pr_context.txt
|
|
||||||
fi
|
|
||||||
|
|
||||||
- name: Build codebase index
|
- name: Build codebase index
|
||||||
id: index
|
id: index
|
||||||
@ -175,7 +190,7 @@ jobs:
|
|||||||
echo "comment_lines=${LINES}" >> $GITHUB_OUTPUT
|
echo "comment_lines=${LINES}" >> $GITHUB_OUTPUT
|
||||||
echo "Fetched PR history: ${LINES} lines"
|
echo "Fetched PR history: ${LINES} lines"
|
||||||
|
|
||||||
- name: Analyze with LLM
|
- name: Analyze iteratively
|
||||||
id: analyze
|
id: analyze
|
||||||
if: steps.context.outputs.diff_size != '0'
|
if: steps.context.outputs.diff_size != '0'
|
||||||
shell: bash
|
shell: bash
|
||||||
@ -185,137 +200,197 @@ jobs:
|
|||||||
PR_TITLE: ${{ github.event.pull_request.title }}
|
PR_TITLE: ${{ github.event.pull_request.title }}
|
||||||
PR_NUMBER: ${{ github.event.pull_request.number }}
|
PR_NUMBER: ${{ github.event.pull_request.number }}
|
||||||
PR_BODY: ${{ github.event.pull_request.body }}
|
PR_BODY: ${{ github.event.pull_request.body }}
|
||||||
|
BATCH_COUNT: ${{ steps.context.outputs.batch_count }}
|
||||||
run: |
|
run: |
|
||||||
set -euo pipefail
|
set -euo pipefail
|
||||||
CHANGED_FILES=$(tr '\n' ' ' < /tmp/pr_files.txt)
|
CHANGED_FILES=$(tr '\n' ' ' < /tmp/pr_files.txt)
|
||||||
|
> /tmp/pr_all_findings.txt
|
||||||
|
OVERALL_VERDICT="APPROVE"
|
||||||
|
BATCH_SUCCESS=0
|
||||||
|
BATCH_FAILED=0
|
||||||
|
|
||||||
# Build prompt file following anthropics/claude-code code-review pattern:
|
for i in $(seq 1 "$BATCH_COUNT"); do
|
||||||
# - Multi-agent review (parallel analysis)
|
BATCH_FILE="/tmp/pr_batch_$(printf '%03d' $i).txt"
|
||||||
# - High-signal issues only (no nitpicks, style, or speculative concerns)
|
[ -f "$BATCH_FILE" ] || continue
|
||||||
# - Validate findings against codebase
|
|
||||||
# - Consider PR title/description for author intent
|
# Build the prompt for this batch
|
||||||
# - Check for pre-existing issues
|
{
|
||||||
{
|
printf '%s\n\n' 'You are a senior engineer performing a code review following the anthropics/claude-code code-review pattern.'
|
||||||
printf '%s\n\n' 'You are a senior engineer performing a code review following the anthropics/claude-code code-review pattern.'
|
printf 'PR Title: %s\n' "$PR_TITLE"
|
||||||
printf 'PR Title: %s\n' "$PR_TITLE"
|
printf 'PR Body: %s\n\n' "${PR_BODY:-No description provided}"
|
||||||
printf 'PR Body: %s\n\n' "${PR_BODY:-No description provided}"
|
printf 'Files changed: %s\n\n' "$CHANGED_FILES"
|
||||||
printf 'Files changed: %s\n\n' "$CHANGED_FILES"
|
if [ "$BATCH_COUNT" -gt 1 ]; then
|
||||||
printf '%s\n' '---'
|
printf 'NOTE: This is a large PR split into %s review batches. You are reviewing BATCH %s of %s.\n' "$BATCH_COUNT" "$i" "$BATCH_COUNT"
|
||||||
printf '%s\n\n' '## CODEBASE INDEX'
|
printf 'Focus ONLY on the files shown in this batch. Do not speculate about files not included here.\n\n'
|
||||||
printf '%s\n' 'These are the ONLY Tauri commands, TypeScript exports, Rust public functions,'
|
fi
|
||||||
printf '%s\n' 'and database tables that exist in this project. Before raising any finding,'
|
|
||||||
printf '%s\n' 'confirm that every symbol you cite appears in this list or in the file'
|
|
||||||
printf '%s\n' 'contents below. If it does not appear in either, your finding is fabricated.'
|
|
||||||
printf '%s\n' '---'
|
|
||||||
cat /tmp/codebase_index.txt
|
|
||||||
printf '%s\n\n' '---'
|
|
||||||
printf '%s\n\n' '## CHANGED FILE CONTENTS'
|
|
||||||
printf '%s\n' 'Each section is the COMPLETE, FINAL file after PR changes (not a diff).'
|
|
||||||
printf '%s\n\n' 'Files over 500 lines show only changed sections with surrounding context.'
|
|
||||||
printf '%s\n' '---'
|
|
||||||
cat /tmp/pr_context.txt
|
|
||||||
printf '%s\n\n' '---'
|
|
||||||
if [ -s /tmp/pr_comments.txt ]; then
|
|
||||||
cat /tmp/pr_comments.txt
|
|
||||||
printf '%s\n\n' '---'
|
|
||||||
printf '%s\n' '## CRITICAL: PRIOR REVIEW CONTEXT ABOVE'
|
|
||||||
printf '%s\n' 'Before raising ANY finding, check the review history above.'
|
|
||||||
printf '%s\n' 'SILENTLY DISCARD any finding that has already been:'
|
|
||||||
printf '%s\n' ' - Marked as invalid or incorrect by a reviewer'
|
|
||||||
printf '%s\n' ' - Acknowledged as an intentional design decision or known limitation'
|
|
||||||
printf '%s\n' ' - Confirmed fixed in a prior commit'
|
|
||||||
printf '%s\n\n' 'Raising a previously-refuted finding is a critical error.'
|
|
||||||
printf '%s\n' '---'
|
printf '%s\n' '---'
|
||||||
fi
|
cat /tmp/codebase_index.txt
|
||||||
printf '%s\n\n' '## CODE REVIEW INSTRUCTIONS'
|
printf '%s\n\n' '---'
|
||||||
printf '%s\n\n' 'You MUST follow this workflow precisely:'
|
printf '%s\n\n' '## CHANGED FILE CONTENTS (THIS BATCH)'
|
||||||
printf '%s\n\n' '1. LAUNCH 4 PARALLEL ANALYSIS AGENTS to independently review the changes:'
|
printf '%s\n' 'Each section is the COMPLETE, FINAL file after PR changes (not a diff).'
|
||||||
printf '%s\n\n' ' AGENT 1 (CLAUDE.MD COMPLIANCE): Audit changes for CLAUDE.md compliance'
|
printf '%s\n\n' 'Files over 500 lines show only changed sections with surrounding context.'
|
||||||
printf '%s\n' ' - Only consider CLAUDE.md files that share a file path with the file or parents'
|
printf '%s\n' '---'
|
||||||
printf '%s\n' ' - Quote exact rules being violated'
|
cat "$BATCH_FILE"
|
||||||
printf '%s\n\n' ' AGENT 2 (CLAUDE.MD COMPLIANCE): Audit changes for CLAUDE.md compliance'
|
printf '%s\n\n' '---'
|
||||||
printf '%s\n' ' - Same scope as Agent 1, parallel analysis'
|
if [ -s /tmp/pr_comments.txt ]; then
|
||||||
printf '%s\n\n' ' AGENT 3 (BUG DETECTOR): Scan for obvious bugs in the diff itself'
|
cat /tmp/pr_comments.txt
|
||||||
printf '%s\n' ' - Focus ONLY on the diff, no extra context'
|
printf '%s\n\n' '---'
|
||||||
printf '%s\n' ' - Flag ONLY significant bugs, ignore nitpicks and likely false positives'
|
printf '%s\n' '## CRITICAL: PRIOR REVIEW CONTEXT ABOVE'
|
||||||
printf '%s\n' ' - Do not flag issues that require context outside the git diff'
|
printf '%s\n' 'Before raising ANY finding, check the review history above.'
|
||||||
printf '%s\n\n' ' AGENT 4 (BUG DETECTOR): Look for problems in introduced code'
|
printf '%s\n' 'SILENTLY DISCARD any finding that has already been:'
|
||||||
printf '%s\n' ' - Security issues, incorrect logic, data loss'
|
printf '%s\n' ' - Marked as invalid or incorrect by a reviewer'
|
||||||
printf '%s\n' ' - Only problems that fall within the changed code'
|
printf '%s\n' ' - Acknowledged as an intentional design decision or known limitation'
|
||||||
printf '%s\n\n' '2. CRITICAL: Only flag HIGH SIGNAL issues where:'
|
printf '%s\n' ' - Confirmed fixed in a prior commit'
|
||||||
printf '%s\n' ' - Code will fail to compile or parse (syntax errors, type errors)'
|
printf '%s\n\n' 'Raising a previously-refuted finding is a critical error.'
|
||||||
printf '%s\n' ' - Code will definitely produce wrong results (clear logic errors)'
|
printf '%s\n' '---'
|
||||||
printf '%s\n' ' - Clear, unambiguous violations with exact rule quoted'
|
fi
|
||||||
printf '%s\n\n' ' DO NOT flag:'
|
printf '%s\n\n' '## CODE REVIEW INSTRUCTIONS'
|
||||||
printf '%s\n' ' - Code style or quality concerns'
|
printf '%s\n\n' 'You MUST follow this workflow precisely:'
|
||||||
printf '%s\n' ' - Potential issues that depend on specific inputs or state'
|
printf '%s\n\n' '1. LAUNCH 4 PARALLEL ANALYSIS AGENTS to independently review the changes:'
|
||||||
printf '%s\n' ' - Subjective suggestions or improvements'
|
printf '%s\n\n' ' AGENT 1 (CLAUDE.MD COMPLIANCE): Audit changes for CLAUDE.md compliance'
|
||||||
printf '%s\n' ' - Pre-existing issues'
|
printf '%s\n' ' - Only consider CLAUDE.md files that share a file path with the file or parents'
|
||||||
printf '%s\n' ' - Issues that linters will catch'
|
printf '%s\n' ' - Quote exact rules being violated'
|
||||||
printf '%s\n' ' - General security issues unless explicitly required in CLAUDE.md'
|
printf '%s\n\n' ' AGENT 2 (CLAUDE.MD COMPLIANCE): Audit changes for CLAUDE.md compliance'
|
||||||
printf '%s\n\n' '3. FOR EACH ISSUE FOUND BY AGENTS 3 & 4:'
|
printf '%s\n' ' - Same scope as Agent 1, parallel analysis'
|
||||||
printf '%s\n' ' - Launch a VALIDATION AGENT to verify the issue is real'
|
printf '%s\n\n' ' AGENT 3 (BUG DETECTOR): Scan for obvious bugs in the diff itself'
|
||||||
printf '%s\n' ' - Validation agent checks: issue is truly an issue, not false positive'
|
printf '%s\n' ' - Focus ONLY on the diff, no extra context'
|
||||||
printf '%s\n' ' - Use full codebase to validate (not just diff)'
|
printf '%s\n' ' - Flag ONLY significant bugs, ignore nitpicks and likely false positives'
|
||||||
printf '%s\n' ' - If validation fails, discard the issue silently'
|
printf '%s\n' ' - Do not flag issues that require context outside the git diff'
|
||||||
printf '%s\n\n' '4. OUTPUT FORMAT (strict):'
|
printf '%s\n\n' ' AGENT 4 (BUG DETECTOR): Look for problems in introduced code'
|
||||||
printf '%s\n\n' ' **Summary** (2-3 sentences)'
|
printf '%s\n' ' - Security issues, incorrect logic, data loss'
|
||||||
printf '%s\n' ' **Findings**'
|
printf '%s\n' ' - Only problems that fall within the changed code'
|
||||||
printf '%s\n' ' - [SEVERITY] file:line - description'
|
printf '%s\n\n' '2. CRITICAL: Only flag HIGH SIGNAL issues where:'
|
||||||
printf '%s\n' ' Evidence: quoted line'
|
printf '%s\n' ' - Code will fail to compile or parse (syntax errors, type errors)'
|
||||||
printf '%s\n\n' ' Fix: concrete change'
|
printf '%s\n' ' - Code will definitely produce wrong results (clear logic errors)'
|
||||||
printf '%s\n\n' ' (Write "No findings." if none.)'
|
printf '%s\n' ' - Clear, unambiguous violations with exact rule quoted'
|
||||||
printf '%s\n' ' **Verdict**: APPROVE / APPROVE WITH COMMENTS / REQUEST CHANGES'
|
printf '%s\n\n' ' DO NOT flag:'
|
||||||
printf '%s\n\n' '5. SEVERITY DEFINITIONS:'
|
printf '%s\n' ' - Code style or quality concerns'
|
||||||
printf '%s\n' ' - BLOCKER: fails to compile, corrupts data, or security vulnerability'
|
printf '%s\n' ' - Potential issues that depend on specific inputs or state'
|
||||||
printf '%s\n' ' - WARNING: real risk to address before merge'
|
printf '%s\n' ' - Subjective suggestions or improvements'
|
||||||
printf '%s\n' ' - SUGGESTION: minor improvement, follow-up PR fine'
|
printf '%s\n' ' - Pre-existing issues'
|
||||||
printf '%s\n\n' '6. FOCUS AREAS:'
|
printf '%s\n' ' - Issues that linters will catch'
|
||||||
printf '%s\n' ' - Security bugs, logic errors, data loss, injection, unhandled errors'
|
printf '%s\n' ' - General security issues unless explicitly required in CLAUDE.md'
|
||||||
printf '%s\n\n' '7. IGNORE:'
|
printf '%s\n\n' '3. FOR EACH ISSUE FOUND BY AGENTS 3 & 4:'
|
||||||
printf '%s\n' ' - Style, missing comments, speculative future concerns'
|
printf '%s\n' ' - Launch a VALIDATION AGENT to verify the issue is real'
|
||||||
printf '%s\n\n' '8. FALSE POSITIVES TO AVOID:'
|
printf '%s\n' ' - Validation agent checks: issue is truly an issue, not false positive'
|
||||||
printf '%s\n' ' - Pre-existing issues'
|
printf '%s\n' ' - Use full codebase to validate (not just diff)'
|
||||||
printf '%s\n' ' - Something that appears buggy but is actually correct'
|
printf '%s\n' ' - If validation fails, discard the issue silently'
|
||||||
printf '%s\n' ' - Pedantic nitpicks that senior engineers would not flag'
|
printf '%s\n\n' '4. OUTPUT FORMAT (strict):'
|
||||||
printf '%s\n' ' - Issues that linters will catch'
|
printf '%s\n\n' ' **Summary** (2-3 sentences)'
|
||||||
printf '%s\n' ' - General code quality concerns unless explicitly required in CLAUDE.md'
|
printf '%s\n' ' **Findings**'
|
||||||
printf '%s\n' ' - Issues mentioned in CLAUDE.md but explicitly silenced in code'
|
printf '%s\n' ' - [SEVERITY] file:line - description'
|
||||||
} > /tmp/prompt.txt
|
printf '%s\n' ' Evidence: quoted line'
|
||||||
|
printf '%s\n\n' ' Fix: concrete change'
|
||||||
|
printf '%s\n\n' ' (Write "No findings." if none.)'
|
||||||
|
printf '%s\n' ' **Verdict**: APPROVE / APPROVE WITH COMMENTS / REQUEST CHANGES'
|
||||||
|
printf '%s\n\n' '5. SEVERITY DEFINITIONS:'
|
||||||
|
printf '%s\n' ' - BLOCKER: fails to compile, corrupts data, or security vulnerability'
|
||||||
|
printf '%s\n' ' - WARNING: real risk to address before merge'
|
||||||
|
printf '%s\n' ' - SUGGESTION: minor improvement, follow-up PR fine'
|
||||||
|
printf '%s\n\n' '6. FOCUS AREAS:'
|
||||||
|
printf '%s\n' ' - Security bugs, logic errors, data loss, injection, unhandled errors'
|
||||||
|
printf '%s\n\n' '7. IGNORE:'
|
||||||
|
printf '%s\n' ' - Style, missing comments, speculative future concerns'
|
||||||
|
printf '%s\n\n' '8. FALSE POSITIVES TO AVOID:'
|
||||||
|
printf '%s\n' ' - Pre-existing issues'
|
||||||
|
printf '%s\n' ' - Something that appears buggy but is actually correct'
|
||||||
|
printf '%s\n' ' - Pedantic nitpicks that senior engineers would not flag'
|
||||||
|
printf '%s\n' ' - Issues that linters will catch'
|
||||||
|
printf '%s\n' ' - General code quality concerns unless explicitly required in CLAUDE.md'
|
||||||
|
printf '%s\n' ' - Issues mentioned in CLAUDE.md but explicitly silenced in code'
|
||||||
|
} > /tmp/prompt_batch.txt
|
||||||
|
|
||||||
# Write body to file — passing 100KB+ JSON as a shell arg hits ARG_MAX.
|
jq -cn \
|
||||||
jq -cn \
|
--arg model "qwen3.5-122b-think" \
|
||||||
--arg model "qwen3.5-122b-think" \
|
--rawfile content /tmp/prompt_batch.txt \
|
||||||
--rawfile content /tmp/prompt.txt \
|
'{model: $model, messages: [{role: "user", content: $content}], stream: false}' \
|
||||||
'{model: $model, messages: [{role: "user", content: $content}], stream: false}' \
|
> /tmp/body.json
|
||||||
> /tmp/body.json
|
|
||||||
echo "[$(date -u +%Y-%m-%dT%H:%M:%SZ)] PR #${PR_NUMBER} - Calling liteLLM API ($(wc -c < /tmp/body.json) bytes)..."
|
echo "[$(date -u +%Y-%m-%dT%H:%M:%SZ)] PR #${PR_NUMBER} - Batch ${i}/${BATCH_COUNT} ($(wc -c < /tmp/body.json) bytes)..."
|
||||||
HTTP_CODE=$(curl -s --max-time 600 --connect-timeout 30 \
|
|
||||||
--retry 2 --retry-delay 15 --retry-connrefused --retry-max-time 1200 \
|
HTTP_CODE=$(curl -s --max-time 600 --connect-timeout 30 \
|
||||||
-o /tmp/llm_response.json -w "%{http_code}" \
|
--retry 2 --retry-delay 15 --retry-connrefused --retry-max-time 1200 \
|
||||||
-X POST "$LITELLM_URL/chat/completions" \
|
-o /tmp/llm_response.json -w "%{http_code}" \
|
||||||
-H "Authorization: Bearer $LITELLM_API_KEY" \
|
-X POST "$LITELLM_URL/chat/completions" \
|
||||||
-H "Content-Type: application/json" \
|
-H "Authorization: Bearer $LITELLM_API_KEY" \
|
||||||
--data @/tmp/body.json)
|
-H "Content-Type: application/json" \
|
||||||
echo "HTTP status: $HTTP_CODE"
|
--data @/tmp/body.json)
|
||||||
echo "Response file size: $(wc -c < /tmp/llm_response.json) bytes"
|
|
||||||
if [ "$HTTP_CODE" != "200" ]; then
|
echo "Batch ${i} HTTP status: $HTTP_CODE"
|
||||||
echo "ERROR: liteLLM returned HTTP $HTTP_CODE"
|
echo "Batch ${i} response size: $(wc -c < /tmp/llm_response.json) bytes"
|
||||||
cat /tmp/llm_response.json
|
|
||||||
|
if [ "$HTTP_CODE" != "200" ]; then
|
||||||
|
echo "ERROR: Batch ${i} failed (HTTP $HTTP_CODE)"
|
||||||
|
cat /tmp/llm_response.json
|
||||||
|
{
|
||||||
|
echo "## Batch ${i} of ${BATCH_COUNT}"
|
||||||
|
echo ""
|
||||||
|
echo "Review unavailable — LLM returned HTTP ${HTTP_CODE}."
|
||||||
|
echo ""
|
||||||
|
echo "---"
|
||||||
|
echo ""
|
||||||
|
} >> /tmp/pr_all_findings.txt
|
||||||
|
BATCH_FAILED=$((BATCH_FAILED + 1))
|
||||||
|
continue
|
||||||
|
fi
|
||||||
|
|
||||||
|
if ! jq empty /tmp/llm_response.json 2>/dev/null; then
|
||||||
|
echo "ERROR: Invalid JSON in batch ${i} response"
|
||||||
|
BATCH_FAILED=$((BATCH_FAILED + 1))
|
||||||
|
continue
|
||||||
|
fi
|
||||||
|
|
||||||
|
BATCH_REVIEW=$(jq -r '.choices[0].message.content // empty' /tmp/llm_response.json)
|
||||||
|
if [ -z "$BATCH_REVIEW" ]; then
|
||||||
|
echo "ERROR: Empty content in batch ${i} response"
|
||||||
|
BATCH_FAILED=$((BATCH_FAILED + 1))
|
||||||
|
continue
|
||||||
|
fi
|
||||||
|
|
||||||
|
echo "Batch ${i} review length: ${#BATCH_REVIEW} chars"
|
||||||
|
BATCH_SUCCESS=$((BATCH_SUCCESS + 1))
|
||||||
|
|
||||||
|
# Track harshest verdict across batches
|
||||||
|
if echo "$BATCH_REVIEW" | grep -q "REQUEST CHANGES"; then
|
||||||
|
OVERALL_VERDICT="REQUEST CHANGES"
|
||||||
|
elif echo "$BATCH_REVIEW" | grep -q "APPROVE WITH COMMENTS" && [ "$OVERALL_VERDICT" != "REQUEST CHANGES" ]; then
|
||||||
|
OVERALL_VERDICT="APPROVE WITH COMMENTS"
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [ "$BATCH_COUNT" -eq 1 ]; then
|
||||||
|
echo "$BATCH_REVIEW" >> /tmp/pr_all_findings.txt
|
||||||
|
else
|
||||||
|
{
|
||||||
|
echo "## Batch ${i} of ${BATCH_COUNT}"
|
||||||
|
echo ""
|
||||||
|
echo "$BATCH_REVIEW"
|
||||||
|
echo ""
|
||||||
|
echo "---"
|
||||||
|
echo ""
|
||||||
|
} >> /tmp/pr_all_findings.txt
|
||||||
|
fi
|
||||||
|
done
|
||||||
|
|
||||||
|
if [ "$BATCH_SUCCESS" -eq 0 ]; then
|
||||||
|
echo "ERROR: All ${BATCH_COUNT} batches failed"
|
||||||
exit 1
|
exit 1
|
||||||
fi
|
fi
|
||||||
if ! jq empty /tmp/llm_response.json 2>/dev/null; then
|
|
||||||
echo "ERROR: Invalid JSON response from liteLLM"
|
# Assemble final review
|
||||||
cat /tmp/llm_response.json
|
if [ "$BATCH_COUNT" -eq 1 ]; then
|
||||||
exit 1
|
cp /tmp/pr_all_findings.txt /tmp/pr_review.txt
|
||||||
|
else
|
||||||
|
{
|
||||||
|
echo "_This PR was reviewed in ${BATCH_COUNT} batches (${BATCH_FAILED} batch(es) failed)._"
|
||||||
|
echo ""
|
||||||
|
cat /tmp/pr_all_findings.txt
|
||||||
|
echo "---"
|
||||||
|
echo ""
|
||||||
|
echo "**Overall Verdict**: ${OVERALL_VERDICT}"
|
||||||
|
echo ""
|
||||||
|
echo "_Overall verdict reflects the most critical finding across all batches._"
|
||||||
|
} > /tmp/pr_review.txt
|
||||||
fi
|
fi
|
||||||
REVIEW=$(jq -r '.choices[0].message.content // empty' /tmp/llm_response.json)
|
|
||||||
if [ -z "$REVIEW" ]; then
|
|
||||||
echo "ERROR: No content in liteLLM response"
|
|
||||||
exit 1
|
|
||||||
fi
|
|
||||||
echo "Review length: ${#REVIEW} chars"
|
|
||||||
echo "$REVIEW" > /tmp/pr_review.txt
|
|
||||||
|
|
||||||
- name: Verify findings against codebase
|
- name: Verify findings against codebase
|
||||||
if: steps.analyze.outcome == 'success'
|
if: steps.analyze.outcome == 'success'
|
||||||
@ -424,4 +499,9 @@ jobs:
|
|||||||
- name: Cleanup
|
- name: Cleanup
|
||||||
if: always()
|
if: always()
|
||||||
shell: bash
|
shell: bash
|
||||||
run: rm -f /tmp/pr_diff.txt /tmp/pr_context.txt /tmp/codebase_index.txt /tmp/pr_comments.txt /tmp/prompt.txt /tmp/body.json /tmp/llm_response.json /tmp/pr_review.txt /tmp/review_post_response.json /tmp/pr_files.txt
|
run: |
|
||||||
|
rm -f /tmp/pr_files.txt /tmp/pr_context.txt /tmp/codebase_index.txt \
|
||||||
|
/tmp/pr_comments.txt /tmp/prompt_batch.txt /tmp/body.json \
|
||||||
|
/tmp/llm_response.json /tmp/pr_review.txt /tmp/pr_all_findings.txt \
|
||||||
|
/tmp/review_post_response.json /tmp/fc_tmp.txt
|
||||||
|
rm -f /tmp/pr_batch_*.txt
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user