#!/usr/bin/env bash # Cross-lineage scrum review driver. # # Per feedback_cross_lineage_review.md: Opus + Kimi + Qwen3-coder # review the same diff via /v1/chat. Convergent findings (≥2 # reviewers) = high-signal real bugs; single-reviewer = lineage # catch. # # Usage: # ./scripts/scrum_review.sh # # Outputs verbatim verdicts to: # reports/scrum/_evidence//verdicts/_.md set -euo pipefail cd "$(dirname "$0")/.." DIFF_FILE="${1:-}" BUNDLE_LABEL="${2:-}" DATE="${SCRUM_DATE:-$(date +%Y-%m-%d)}" GATEWAY="${LH_GATEWAY:-http://127.0.0.1:3110}" OUT_DIR="reports/scrum/_evidence/${DATE}/verdicts" if [ -z "$DIFF_FILE" ] || [ ! -f "$DIFF_FILE" ]; then echo "usage: $0 " echo "example: $0 reports/scrum/_evidence/2026-04-30/diffs/bundle_A_config_refactor.diff config_refactor" exit 1 fi mkdir -p "$OUT_DIR" DIFF_BYTES=$(wc -c < "$DIFF_FILE") DIFF_LINES=$(wc -l < "$DIFF_FILE") echo "[scrum] $BUNDLE_LABEL — $DIFF_LINES lines · $DIFF_BYTES bytes · 3 reviewers" # Diff-size guard. Per the 2026-05-02 disposition: a 165KB bundle # produced 0 convergent findings + 3 confabulated BLOCKs because Kimi # and Qwen gave up at <300 output tokens (input-token spent on # scanning, not analysis). Sweet spot per per-component runs is # ≤60KB. SCRUM_FORCE_OVERSIZE=1 lets operators override for cases # where splitting isn't possible. if [ "$DIFF_BYTES" -gt 100000 ] && [ "${SCRUM_FORCE_OVERSIZE:-0}" != "1" ]; then echo "[scrum] ABORT: diff is ${DIFF_BYTES} bytes (>100KB)." echo " Big diffs make Kimi/Qwen give up early — split into" echo " per-component bundles ≤60KB each, then re-run." echo " Override (NOT recommended): SCRUM_FORCE_OVERSIZE=1" exit 2 fi if [ "$DIFF_BYTES" -gt 60000 ]; then echo "[scrum] WARN: diff is ${DIFF_BYTES} bytes (>60KB) — non-Opus" echo " lineages may produce thin output. Per-component split" echo " is preferred. Continuing." fi # System prompt — same shape as the Rust auditor's review template, # tightened per feedback_cross_lineage_review.md (lead with verdict). SYSTEM='You are a senior code reviewer in a 3-lineage cross-review. Your verdict feeds a convergent-finding gate (≥2 reviewers = real bug). Be terse, evidence-based, and lead with the verdict. For each finding, output one block. The format is STRICT — a post-processor greps WHERE: lines across all 3 reviewers to find convergent findings, so the file path must appear EXACTLY as it does in the diff (e.g. `cmd/foo/main.go:42`, not `foo/main.go:42`). SEVERITY: BLOCK | WARN | INFO WHERE: : WHAT: one-sentence description WHY: one-sentence rationale grounded in the diff Severity ladder: - BLOCK = ship-blocker. Wrong correctness, security flaw, broken contract, lost test coverage, panic on real input, secret leak. Worth blocking the PR. - WARN = real but non-blocking. Race condition under specific load, missing edge case, weak naming making future bugs likely, regression risk in adjacent code. - INFO = nit / style / better-name suggestion / dead-code remnant. Skip the analysis preamble. Lead with the first BLOCK/WARN/INFO block. End with an empty "VERDICT:" line of "ship | ship-with-fixes | hold" + ≤15 word summary. Never invent line numbers — only cite lines the diff shows. Never repeat a file:line in two findings — combine them.' REVIEWERS=( "opus|opencode/claude-opus-4-7" "kimi|openrouter/moonshotai/kimi-k2-0905" "qwen|openrouter/qwen/qwen3-coder" ) DIFF_CONTENT=$(cat "$DIFF_FILE") run_review() { local short="$1" model="$2" local out="$OUT_DIR/${BUNDLE_LABEL}_${short}.md" local user="Review the following diff. Bundle: $BUNDLE_LABEL. \`\`\`diff $DIFF_CONTENT \`\`\`" printf " %-6s %s ... " "$short" "$model" local t0=$SECONDS local status # Build the body via temp files — both jq's --arg AND curl's # --data run into the kernel's argv limit (~128KB) when the diff # is large. Voice-ai full bundle was 156K and hit it twice. # Piping through files (and using --rawfile for jq) sidesteps both. local body_file user_file sys_file body_file=$(mktemp); user_file=$(mktemp); sys_file=$(mktemp) printf '%s' "$user" > "$user_file" printf '%s' "$SYSTEM" > "$sys_file" jq -n --arg model "$model" --rawfile sys "$sys_file" --rawfile user "$user_file" \ '{model:$model, max_tokens:4096, messages:[{role:"system",content:$sys},{role:"user",content:$user}]}' \ > "$body_file" status=$(curl -sS -o /tmp/scrum_resp.json -w '%{http_code}' --max-time 240 \ -X POST "$GATEWAY/v1/chat" \ -H 'Content-Type: application/json' \ --data-binary "@$body_file") rm -f "$body_file" "$user_file" "$sys_file" local elapsed=$((SECONDS - t0)) if [ "$status" != "200" ]; then printf "✗ HTTP %s (%ds)\n" "$status" "$elapsed" head -c 300 /tmp/scrum_resp.json echo return 1 fi local content latency tokens_in tokens_out content=$(jq -r '.content' /tmp/scrum_resp.json) latency=$(jq -r '.latency_ms' /tmp/scrum_resp.json) tokens_in=$(jq -r '.input_tokens // 0' /tmp/scrum_resp.json) tokens_out=$(jq -r '.output_tokens // 0' /tmp/scrum_resp.json) { echo "# Scrum review — $BUNDLE_LABEL — $short ($model)" echo echo "**Latency:** ${latency}ms · **Tokens:** ${tokens_in} in / ${tokens_out} out · **Date:** ${DATE}" echo echo "---" echo echo "$content" } > "$out" printf "✓ %dms · %dt-out → %s\n" "$latency" "$tokens_out" "$out" } for r in "${REVIEWERS[@]}"; do short="${r%%|*}" model="${r#*|}" run_review "$short" "$model" || true done # ─── Convergence tally ──────────────────────────────────────────── # Walk the 3 verdicts, extract WHERE: lines + their SEVERITY, dedupe # across reviewers. Output a tally file showing what ≥2 reviewers # flagged (real-bug signal) vs 1-reviewer (lineage catch / possibly # confabulation). TALLY="$OUT_DIR/${BUNDLE_LABEL}_tally.md" { echo "# Convergence tally — $BUNDLE_LABEL" echo echo "**Date:** ${DATE} · **Diff:** ${DIFF_LINES} lines / ${DIFF_BYTES} bytes" echo echo "## Findings by location" echo echo "| Reviewers | Severity | Where | Hits |" echo "|---|---|---|---:|" for v in "$OUT_DIR/${BUNDLE_LABEL}"_{opus,kimi,qwen}.md; do [ -f "$v" ] || continue short=$(basename "$v" .md | sed "s|.*${BUNDLE_LABEL}_||") grep -E "^(SEVERITY|WHERE):" "$v" 2>/dev/null \ | awk -v r="$short" ' /^SEVERITY:/ { sev = $2; next } /^WHERE:/ { sub(/^WHERE: */, "") # Drop trailing parenthetical ("(or )") if it crept in. sub(/\s*\(.*$/, "") print r "|" sev "|" $0 }' done | sort -u -t'|' -k1,1 -k3,3 \ | sort -t'|' -k3 \ | awk -F'|' ' # Aggregate by location. Dedup reviewers within a location # (multiple findings from the same lineage at the same WHERE # collapse to a single entry — that is reviewer self-repeat, # not convergence). Track distinct reviewers + their highest # severity across that location. function rank(s) { return s == "BLOCK" ? 3 : s == "WARN" ? 2 : 1 } function sevname(r) { return r == 3 ? "BLOCK" : r == 2 ? "WARN" : "INFO" } { key=$3 if (!(key in seen)) { seen[key]=""; sev_rank[key]=0 } # split seen[key] on ";" and check if reviewer already present present=0 n=split(seen[key], a, ";") for (i=1;i<=n;i++) if (a[i]==$1) { present=1; break } if (!present) { seen[key] = seen[key] == "" ? $1 : seen[key] ";" $1 distinct_n[key]++ } r = rank($2) if (r > sev_rank[key]) { sev_rank[key]=r; sev_max[key]=$2 } } END { for (k in distinct_n) { # Reviewers column shows distinct lineages joined by "+" gsub(";", "+", seen[k]) printf "%s|%s|%s|%d\n", seen[k], sev_max[k], k, distinct_n[k] } } ' \ | sort -t'|' -k4nr -k1 \ | awk -F'|' '{ printf "| %s | %s | `%s` | %d |\n", $1, $2, $3, $4 }' echo echo "(Convergent rows above are those whose Reviewers column contains a '+' — i.e. ≥2 lineages flagged the same location.)" echo echo "## Verdict line per reviewer" echo for v in "$OUT_DIR/${BUNDLE_LABEL}"_{opus,kimi,qwen}.md; do [ -f "$v" ] || continue short=$(basename "$v" .md | sed "s|.*${BUNDLE_LABEL}_||") line=$(grep -E "^VERDICT:" "$v" 2>/dev/null | head -1) echo "- **${short}**: ${line:-_no VERDICT line emitted_}" done } > "$TALLY" echo "[scrum] tally → $TALLY" # Convergent count from the tally body — count rows where the Hits # column is ≥2 (distinct-reviewer count, after the awk dedup above). CONV=$(awk -F'|' '$2 ~ /^ [0-9]+ $/ && ($5 + 0) >= 2 {n++} END {print n+0}' "$TALLY") TOTAL=$(awk -F'|' '$2 ~ /^ [0-9]+ $/ {n++} END {print n+0}' "$TALLY") # (The above scans rows of the tally table where the Hits column — # cell 5 in `| reviewers | sev | where | hits |` — parses as int.) # Fall back to a simpler check if the table parsing finds nothing. if [ "$TOTAL" = "0" ]; then TOTAL=$(grep -c "^| " "$TALLY" | awk '{print $1 - 1}') # subtract header row CONV=$(awk '/^\|/ && $4 != "" && ($4 + 0) >= 2 {n++} END {print n+0}' "$TALLY") fi echo "[scrum] $BUNDLE_LABEL: $CONV convergent / $TOTAL distinct findings" echo "[scrum] $BUNDLE_LABEL complete"