ci(pr-review): iterative batch review for large diffs

Replaces the single monolithic LLM call (which timed out on PRs >~150KB) with a batching strategy: - Changed files are grouped into batches capped at 2500 lines each, always respecting file boundaries (no file is split across batches) - Each batch is sent as an independent LLM call (max-time 600s) - Verdicts are aggregated: harshest verdict across all batches wins - Single-batch PRs produce identical output to the old workflow - Multi-batch PRs get a combined review with per-batch sections and an Overall Verdict line at the end - Partial failures are tolerated: if some batches succeed, the review is posted with a note about any failed batches - Cleanup removes /tmp/pr_batch_*.txt alongside existing temp files
2026-06-20 19:54:00 -05:00 · 2026-06-20 19:54:00 -05:00 · c8399bcdeb
commit c8399bcdeb
parent 9b94e99e4d
1 changed files with 232 additions and 152 deletions
--- a/.gitea/workflows/pr-review.yml
+++ b/.gitea/workflows/pr-review.yml
@ -31,7 +31,7 @@ jobs:
          git fetch --depth=1 origin ${{ github.head_ref }}
          git checkout FETCH_HEAD

-      - name: Build review context
+      - name: Build review batches
        id: context
        shell: bash
        run: |
@ -49,13 +49,10 @@ jobs:
          if [ "$FILE_COUNT" -eq 0 ]; then
            echo "No reviewable files changed."
            echo "diff_size=0" >> $GITHUB_OUTPUT
+            echo "batch_count=0" >> $GITHUB_OUTPUT
            exit 0
          fi

-          # Build context: full file content for each changed file.
-          # Files <= 500 lines: include complete content.
-          # Files > 500 lines: include the per-file diff with generous context (±50 lines).
-          #
          # Secret scrubbing: match actual credential VALUES only — known API key formats,
          # or keyword="long_quoted_literal" (25+ chars). Never scrub on keyword alone,
          # which would silently delete function signatures, variable declarations, and tests.
@ -63,35 +60,53 @@ jobs:
          # Only strip lines that are ENTIRELY a long base64 blob (e.g. PEM cert bodies)
          B64_PATTERN='^[[:space:]]*[A-Za-z0-9+/]{60,}={0,2}[[:space:]]*$'

-          > /tmp/pr_context.txt
+          # Split changed files into batches capped at MAX_BATCH_LINES each.
+          # File boundaries are always respected — a file is never split across batches.
+          MAX_BATCH_LINES=2500
+          BATCH=1
+          BATCH_LINES=0
+          TOTAL_LINES=0
+          BATCH_FILE="/tmp/pr_batch_001.txt"
+          > "$BATCH_FILE"
+
          while IFS= read -r file; do
            [ -f "$file" ] || continue
-            lines=$(wc -l < "$file" | tr -d ' ')
-            printf '\n════════ FILE: %s (%s lines) ════════\n' "$file" "$lines" >> /tmp/pr_context.txt
-            if [ "$lines" -le 500 ]; then
-              # Full file — model sees the complete implementation
-              grep -v -E "$SECRET_PATTERN" "$file" \
-                | grep -v -E "$B64_PATTERN" \
-                >> /tmp/pr_context.txt || true
+            file_lines=$(wc -l < "$file" | tr -d ' ')
+
+            # Build context for this individual file into a temp file
+            {
+              printf '\n════════ FILE: %s (%s lines) ════════\n' "$file" "$file_lines"
+              if [ "$file_lines" -le 500 ]; then
+                grep -v -E "$SECRET_PATTERN" "$file" | grep -v -E "$B64_PATTERN" || true
              else
-              # Large file — emit annotated diff hunks (±50 lines of context each)
-              printf '[File too large for full view (%s lines) — showing changed sections only]\n' "$lines" >> /tmp/pr_context.txt
+                printf '[File too large for full view (%s lines) — showing changed sections only]\n' "$file_lines"
                git diff -U50 origin/${{ github.base_ref }}..HEAD -- "$file" \
                  | grep -v -E "$SECRET_PATTERN" \
                  | grep -v -E "$B64_PATTERN" \
-                >> /tmp/pr_context.txt || true
+                  || true
              fi
+            } > /tmp/fc_tmp.txt
+
+            FC_LINES=$(wc -l < /tmp/fc_tmp.txt | tr -d ' ')
+
+            # Start a new batch if this file would overflow the current one (and batch is not empty)
+            if [ "$BATCH_LINES" -gt 0 ] && [ $((BATCH_LINES + FC_LINES)) -gt $MAX_BATCH_LINES ]; then
+              BATCH=$((BATCH + 1))
+              BATCH_LINES=0
+              BATCH_FILE="/tmp/pr_batch_$(printf '%03d' $BATCH).txt"
+              > "$BATCH_FILE"
+            fi
+
+            cat /tmp/fc_tmp.txt >> "$BATCH_FILE"
+            BATCH_LINES=$((BATCH_LINES + FC_LINES))
+            TOTAL_LINES=$((TOTAL_LINES + FC_LINES))
          done < /tmp/pr_files.txt

-          TOTAL=$(wc -l < /tmp/pr_context.txt | tr -d ' ')
-          echo "diff_size=${TOTAL}" >> $GITHUB_OUTPUT
+          rm -f /tmp/fc_tmp.txt

-          # Cap at 6000 lines so we stay within the model's context window
-          if [ "$TOTAL" -gt 6000 ]; then
-            head -n 6000 /tmp/pr_context.txt > /tmp/pr_context_capped.txt
-            mv /tmp/pr_context_capped.txt /tmp/pr_context.txt
-            echo "[CONTEXT TRUNCATED at 6000 lines — ${TOTAL} total]" >> /tmp/pr_context.txt
-          fi
+          echo "batch_count=${BATCH}" >> $GITHUB_OUTPUT
+          echo "diff_size=${TOTAL_LINES}" >> $GITHUB_OUTPUT
+          echo "Built ${BATCH} batch(es) from ${FILE_COUNT} files (${TOTAL_LINES} total lines)"

      - name: Build codebase index
        id: index
@ -175,7 +190,7 @@ jobs:
          echo "comment_lines=${LINES}" >> $GITHUB_OUTPUT
          echo "Fetched PR history: ${LINES} lines"

-      - name: Analyze with LLM
+      - name: Analyze iteratively
        id: analyze
        if: steps.context.outputs.diff_size != '0'
        shell: bash
@ -185,35 +200,37 @@ jobs:
          PR_TITLE: ${{ github.event.pull_request.title }}
          PR_NUMBER: ${{ github.event.pull_request.number }}
          PR_BODY: ${{ github.event.pull_request.body }}
+          BATCH_COUNT: ${{ steps.context.outputs.batch_count }}
        run: |
          set -euo pipefail
          CHANGED_FILES=$(tr '\n' ' ' < /tmp/pr_files.txt)
+          > /tmp/pr_all_findings.txt
+          OVERALL_VERDICT="APPROVE"
+          BATCH_SUCCESS=0
+          BATCH_FAILED=0

-          # Build prompt file following anthropics/claude-code code-review pattern:
-          # - Multi-agent review (parallel analysis)
-          # - High-signal issues only (no nitpicks, style, or speculative concerns)
-          # - Validate findings against codebase
-          # - Consider PR title/description for author intent
-          # - Check for pre-existing issues
+          for i in $(seq 1 "$BATCH_COUNT"); do
+            BATCH_FILE="/tmp/pr_batch_$(printf '%03d' $i).txt"
+            [ -f "$BATCH_FILE" ] || continue
+
+            # Build the prompt for this batch
            {
              printf '%s\n\n' 'You are a senior engineer performing a code review following the anthropics/claude-code code-review pattern.'
              printf 'PR Title: %s\n' "$PR_TITLE"
              printf 'PR Body: %s\n\n' "${PR_BODY:-No description provided}"
              printf 'Files changed: %s\n\n' "$CHANGED_FILES"
-            printf '%s\n' '---'
-            printf '%s\n\n' '## CODEBASE INDEX'
-            printf '%s\n' 'These are the ONLY Tauri commands, TypeScript exports, Rust public functions,'
-            printf '%s\n' 'and database tables that exist in this project. Before raising any finding,'
-            printf '%s\n' 'confirm that every symbol you cite appears in this list or in the file'
-            printf '%s\n' 'contents below. If it does not appear in either, your finding is fabricated.'
+              if [ "$BATCH_COUNT" -gt 1 ]; then
+                printf 'NOTE: This is a large PR split into %s review batches. You are reviewing BATCH %s of %s.\n' "$BATCH_COUNT" "$i" "$BATCH_COUNT"
+                printf 'Focus ONLY on the files shown in this batch. Do not speculate about files not included here.\n\n'
+              fi
              printf '%s\n' '---'
              cat /tmp/codebase_index.txt
              printf '%s\n\n' '---'
-            printf '%s\n\n' '## CHANGED FILE CONTENTS'
+              printf '%s\n\n' '## CHANGED FILE CONTENTS (THIS BATCH)'
              printf '%s\n' 'Each section is the COMPLETE, FINAL file after PR changes (not a diff).'
              printf '%s\n\n' 'Files over 500 lines show only changed sections with surrounding context.'
              printf '%s\n' '---'
-            cat /tmp/pr_context.txt
+              cat "$BATCH_FILE"
              printf '%s\n\n' '---'
              if [ -s /tmp/pr_comments.txt ]; then
                cat /tmp/pr_comments.txt
@ -281,15 +298,16 @@ jobs:
              printf '%s\n' '   - Issues that linters will catch'
              printf '%s\n' '   - General code quality concerns unless explicitly required in CLAUDE.md'
              printf '%s\n' '   - Issues mentioned in CLAUDE.md but explicitly silenced in code'
-          } > /tmp/prompt.txt
+            } > /tmp/prompt_batch.txt

-          # Write body to file — passing 100KB+ JSON as a shell arg hits ARG_MAX.
            jq -cn \
              --arg model "qwen3.5-122b-think" \
-            --rawfile content /tmp/prompt.txt \
+              --rawfile content /tmp/prompt_batch.txt \
              '{model: $model, messages: [{role: "user", content: $content}], stream: false}' \
              > /tmp/body.json
-          echo "[$(date -u +%Y-%m-%dT%H:%M:%SZ)] PR #${PR_NUMBER} - Calling liteLLM API ($(wc -c < /tmp/body.json) bytes)..."
+
+            echo "[$(date -u +%Y-%m-%dT%H:%M:%SZ)] PR #${PR_NUMBER} - Batch ${i}/${BATCH_COUNT} ($(wc -c < /tmp/body.json) bytes)..."
+
            HTTP_CODE=$(curl -s --max-time 600 --connect-timeout 30 \
              --retry 2 --retry-delay 15 --retry-connrefused --retry-max-time 1200 \
              -o /tmp/llm_response.json -w "%{http_code}" \
@ -297,25 +315,82 @@ jobs:
              -H "Authorization: Bearer $LITELLM_API_KEY" \
              -H "Content-Type: application/json" \
              --data @/tmp/body.json)
-          echo "HTTP status: $HTTP_CODE"
-          echo "Response file size: $(wc -c < /tmp/llm_response.json) bytes"
+
+            echo "Batch ${i} HTTP status: $HTTP_CODE"
+            echo "Batch ${i} response size: $(wc -c < /tmp/llm_response.json) bytes"
+
            if [ "$HTTP_CODE" != "200" ]; then
-            echo "ERROR: liteLLM returned HTTP $HTTP_CODE"
+              echo "ERROR: Batch ${i} failed (HTTP $HTTP_CODE)"
              cat /tmp/llm_response.json
-            exit 1
+              {
+                echo "## Batch ${i} of ${BATCH_COUNT}"
+                echo ""
+                echo "Review unavailable — LLM returned HTTP ${HTTP_CODE}."
+                echo ""
+                echo "---"
+                echo ""
+              } >> /tmp/pr_all_findings.txt
+              BATCH_FAILED=$((BATCH_FAILED + 1))
+              continue
            fi
+
            if ! jq empty /tmp/llm_response.json 2>/dev/null; then
-            echo "ERROR: Invalid JSON response from liteLLM"
-            cat /tmp/llm_response.json
+              echo "ERROR: Invalid JSON in batch ${i} response"
+              BATCH_FAILED=$((BATCH_FAILED + 1))
+              continue
+            fi
+
+            BATCH_REVIEW=$(jq -r '.choices[0].message.content // empty' /tmp/llm_response.json)
+            if [ -z "$BATCH_REVIEW" ]; then
+              echo "ERROR: Empty content in batch ${i} response"
+              BATCH_FAILED=$((BATCH_FAILED + 1))
+              continue
+            fi
+
+            echo "Batch ${i} review length: ${#BATCH_REVIEW} chars"
+            BATCH_SUCCESS=$((BATCH_SUCCESS + 1))
+
+            # Track harshest verdict across batches
+            if echo "$BATCH_REVIEW" | grep -q "REQUEST CHANGES"; then
+              OVERALL_VERDICT="REQUEST CHANGES"
+            elif echo "$BATCH_REVIEW" | grep -q "APPROVE WITH COMMENTS" && [ "$OVERALL_VERDICT" != "REQUEST CHANGES" ]; then
+              OVERALL_VERDICT="APPROVE WITH COMMENTS"
+            fi
+
+            if [ "$BATCH_COUNT" -eq 1 ]; then
+              echo "$BATCH_REVIEW" >> /tmp/pr_all_findings.txt
+            else
+              {
+                echo "## Batch ${i} of ${BATCH_COUNT}"
+                echo ""
+                echo "$BATCH_REVIEW"
+                echo ""
+                echo "---"
+                echo ""
+              } >> /tmp/pr_all_findings.txt
+            fi
+          done
+
+          if [ "$BATCH_SUCCESS" -eq 0 ]; then
+            echo "ERROR: All ${BATCH_COUNT} batches failed"
            exit 1
          fi
-          REVIEW=$(jq -r '.choices[0].message.content // empty' /tmp/llm_response.json)
-          if [ -z "$REVIEW" ]; then
-            echo "ERROR: No content in liteLLM response"
-            exit 1
+
+          # Assemble final review
+          if [ "$BATCH_COUNT" -eq 1 ]; then
+            cp /tmp/pr_all_findings.txt /tmp/pr_review.txt
+          else
+            {
+              echo "_This PR was reviewed in ${BATCH_COUNT} batches (${BATCH_FAILED} batch(es) failed)._"
+              echo ""
+              cat /tmp/pr_all_findings.txt
+              echo "---"
+              echo ""
+              echo "**Overall Verdict**: ${OVERALL_VERDICT}"
+              echo ""
+              echo "_Overall verdict reflects the most critical finding across all batches._"
+            } > /tmp/pr_review.txt
          fi
-          echo "Review length: ${#REVIEW} chars"
-          echo "$REVIEW" > /tmp/pr_review.txt

      - name: Verify findings against codebase
        if: steps.analyze.outcome == 'success'
@ -424,4 +499,9 @@ jobs:
      - name: Cleanup
        if: always()
        shell: bash
-        run: rm -f /tmp/pr_diff.txt /tmp/pr_context.txt /tmp/codebase_index.txt /tmp/pr_comments.txt /tmp/prompt.txt /tmp/body.json /tmp/llm_response.json /tmp/pr_review.txt /tmp/review_post_response.json /tmp/pr_files.txt
+        run: |
+          rm -f /tmp/pr_files.txt /tmp/pr_context.txt /tmp/codebase_index.txt \
+                /tmp/pr_comments.txt /tmp/prompt_batch.txt /tmp/body.json \
+                /tmp/llm_response.json /tmp/pr_review.txt /tmp/pr_all_findings.txt \
+                /tmp/review_post_response.json /tmp/fc_tmp.txt
+          rm -f /tmp/pr_batch_*.txt