From 3d6270fb33705e545475dbcd9efd5a2731014ceb Mon Sep 17 00:00:00 2001 From: Shaun Arman Date: Sun, 31 May 2026 15:06:09 -0500 Subject: [PATCH] fix(ci): replace heredoc with printf to fix YAML block scalar breakage Shell heredocs with unindented bodies (line 1 content) terminate YAML run: | block scalars. The YAML parser sees the unindented heredoc body as leaving the block, making the workflow file unparseable -- Gitea silently stops creating runs for a workflow with invalid YAML. Replace the single-quoted heredoc prompt with a group of printf + cat calls. Every line stays properly indented within the YAML block scalar. Use jq --rawfile instead of --arg to load the prompt from a temp file, which also eliminates shell escaping hazards for large strings. --- .gitea/workflows/pr-review.yml | 118 ++++++++++++--------------------- 1 file changed, 41 insertions(+), 77 deletions(-) diff --git a/.gitea/workflows/pr-review.yml b/.gitea/workflows/pr-review.yml index 3e1eb9aa..4b4689dd 100644 --- a/.gitea/workflows/pr-review.yml +++ b/.gitea/workflows/pr-review.yml @@ -148,86 +148,50 @@ jobs: run: | set -euo pipefail CHANGED_FILES=$(tr '\n' ' ' < /tmp/pr_files.txt) - INDEX=$(cat /tmp/codebase_index.txt) - CONTEXT=$(cat /tmp/pr_context.txt) - # Build the prompt via a single-quoted heredoc so the shell never - # interprets backticks, dollar signs, or other special characters inside. - # Variables that must expand ($PR_TITLE etc.) are spliced in by jq --arg, - # not by shell interpolation, so the prompt text itself is always literal. - PROMPT_TEMPLATE=$(cat << 'ENDPROMPT' -You are a senior engineer performing a code review for the following pull request. - -PR Title: __PR_TITLE__ -Files changed: __CHANGED_FILES__ - ---- -__INDEX__ ---- - -## Changed file contents - -Each section below contains the COMPLETE, FINAL content of one changed file after -the PR's changes have been applied. This is the full file — not a diff. For files -over 500 lines, only the changed sections are shown with surrounding context. - ---- -__CONTEXT__ ---- - -## Instructions - -Before raising any finding: -1. Confirm every symbol (function name, command name, variable) you cite exists in - the CODEBASE INDEX above or in the file contents above. If it appears in neither, - discard the finding — it does not exist in this project. -2. Quote the exact line(s) from the file contents that support the finding. -3. Confirm the issue is a genuine problem, not intentional design. -4. If any step fails, discard the finding silently — do not mention it. - -Do NOT show your reasoning process. Do NOT mention discarded findings. -Output only confirmed issues. - -Severity levels: -- BLOCKER: will fail to compile, corrupt data, or introduce a security vulnerability -- WARNING: real risk that should be addressed before merge -- SUGGESTION: minor improvement, acceptable as a follow-up PR - -Focus on: security bugs, logic errors, data loss, injection vectors, unhandled -error paths that could silently corrupt state. -Ignore: style preferences, missing comments, speculative future concerns. - -## Output format (do not deviate) - -**Summary** (2-3 sentences: what the PR does and your overall assessment) - -**Findings** -- [SEVERITY] file:line -- description - Evidence: quoted line from the file above - Fix: concrete suggested change - -(Write "No findings." if there are none.) - -**Verdict**: APPROVE / APPROVE WITH COMMENTS / REQUEST CHANGES -ENDPROMPT -) - - # Splice runtime values into the template using sed so nothing is eval'd - PROMPT=$(printf '%s' "$PROMPT_TEMPLATE" \ - | sed "s|__PR_TITLE__|${PR_TITLE}|g" \ - | sed "s|__CHANGED_FILES__|${CHANGED_FILES}|g") - # INDEX and CONTEXT may contain special sed chars — use python for those - PROMPT=$(python3 -c " -import sys -template = sys.stdin.read() -index = open('/tmp/codebase_index.txt').read() -context = open('/tmp/pr_context.txt').read() -print(template.replace('__INDEX__', index).replace('__CONTEXT__', context), end='') -" <<< "$PROMPT") + # Build prompt with printf + cat so every line stays indented within + # the YAML run: | block. Heredocs with unindented bodies terminate the + # YAML block scalar, breaking the workflow file entirely. + { + printf 'You are a senior engineer performing a code review.\n\n' + printf 'PR Title: %s\n' "$PR_TITLE" + printf 'Files changed: %s\n\n' "$CHANGED_FILES" + printf -- '---\n' + cat /tmp/codebase_index.txt + printf -- '---\n\n' + printf '## Changed file contents\n\n' + printf 'Each section is the COMPLETE, FINAL file after PR changes (not a diff).\n' + printf 'Files over 500 lines show only changed sections with surrounding context.\n\n' + printf -- '---\n' + cat /tmp/pr_context.txt + printf -- '---\n\n' + printf '## Instructions\n\n' + printf 'Before raising any finding:\n' + printf '1. Confirm every symbol you cite exists in the CODEBASE INDEX or file\n' + printf ' contents above. If absent from both, discard the finding.\n' + printf '2. Quote the exact line(s) from the file contents that support it.\n' + printf '3. Confirm the issue is genuine, not intentional design.\n' + printf '4. If any step fails, discard silently -- do not mention it.\n\n' + printf 'Do NOT show reasoning. Only output confirmed issues.\n\n' + printf 'Severity:\n' + printf '- BLOCKER: fails to compile, corrupts data, or security vulnerability\n' + printf '- WARNING: real risk to address before merge\n' + printf '- SUGGESTION: minor improvement, follow-up PR fine\n\n' + printf 'Focus: security bugs, logic errors, data loss, injection, unhandled errors.\n' + printf 'Ignore: style, missing comments, speculative future concerns.\n\n' + printf '## Output format (strict)\n\n' + printf '**Summary** (2-3 sentences)\n\n' + printf '**Findings**\n' + printf '- [SEVERITY] file:line -- description\n' + printf ' Evidence: quoted line\n' + printf ' Fix: concrete change\n\n' + printf '(Write "No findings." if none.)\n\n' + printf '**Verdict**: APPROVE / APPROVE WITH COMMENTS / REQUEST CHANGES\n' + } > /tmp/prompt.txt BODY=$(jq -cn \ --arg model "qwen3-coder-next" \ - --arg content "$PROMPT" \ + --rawfile content /tmp/prompt.txt \ '{model: $model, messages: [{role: "user", content: $content}], stream: false}') echo "[$(date -u +%Y-%m-%dT%H:%M:%SZ)] PR #${PR_NUMBER} - Calling liteLLM API (${#BODY} bytes)..." HTTP_CODE=$(curl -s --max-time 300 --connect-timeout 30 \ @@ -364,4 +328,4 @@ print(template.replace('__INDEX__', index).replace('__CONTEXT__', context), end= - name: Cleanup if: always() shell: bash - run: rm -f /tmp/pr_diff.txt /tmp/pr_context.txt /tmp/codebase_index.txt /tmp/llm_response.json /tmp/pr_review.txt /tmp/review_post_response.json /tmp/pr_files.txt + run: rm -f /tmp/pr_diff.txt /tmp/pr_context.txt /tmp/codebase_index.txt /tmp/prompt.txt /tmp/llm_response.json /tmp/pr_review.txt /tmp/review_post_response.json /tmp/pr_files.txt