auditor: kb_query surfaces scrum-master reviews for files in PR diff

Wires the cohesion-plan Phase C link: the scrum-master pipeline writes per-file reviews to data/_kb/scrum_reviews.jsonl on accept; the auditor now reads that same file and emits one kb_query finding per scrum review whose `file` matches a path in the PR's diff. Severity heuristic: attempt 1-3 → info, attempt 4+ → warn. Reaching the cloud specialist (attempt 4+) means the ladder had to escalate, which is meaningful signal reviewers should see. Tree-split fired is also surfaced in the finding summary. audit.ts now passes pr.files.map(f => f.path) into runKbCheck (the old signature dropped it on the floor). Also adds auditor/audit_one.ts — a dry-run CLI for auditing a single PR without posting to Gitea, useful for verifying check behavior without spamming review comments. Verified: after writing scrum_reviews for auditor/audit.ts and mcp-server/observer.ts (both in PR #7), audit_one 7 surfaced both as info findings with preview + accepted_model + tree_split flag. A scrum review for playbook_memory.rs (NOT in PR #7) was correctly filtered out.
scrum_master: tree-split + scrum_reviews.jsonl writer + truncation warning
2026-04-22 21:18:21 -05:00 · 2026-04-22 21:17:53 -05:00
4 changed files with 241 additions and 5 deletions
--- a/auditor/audit.ts
+++ b/auditor/audit.ts
@ -52,7 +52,7 @@ export async function auditPr(pr: PrSnapshot, opts: AuditOptions = {}): Promise<
    runStaticCheck(diff),
    opts.skip_dynamic ? Promise.resolve(stubFinding("dynamic", "skipped by options")) : runDynamicCheck(),
    opts.skip_inference ? Promise.resolve(stubFinding("inference", "skipped by options")) : runInferenceCheck(claims, diff),
-    runKbCheck(claims),
+    runKbCheck(claims, pr.files.map(f => f.path)),
  ]);
  const allFindings: Finding[] = [
--- a/auditor/audit_one.ts
+++ b/auditor/audit_one.ts
@ -0,0 +1,68 @@
 // One-shot dry-run audit of a single PR. Useful for verifying check
 // behavior (kb_query scrum surfacing, inference prompts, etc.) without
 // posting to Gitea. Does NOT touch state.json and does NOT post
 // commit status or PR comments.
 //
 // Run: bun run auditor/audit_one.ts <pr-number>
 import { getPrSnapshot } from "./gitea.ts";
 import { auditPr } from "./audit.ts";
 async function main() {
  const prNumRaw = process.argv[2];
  if (!prNumRaw) {
    console.error("usage: bun run auditor/audit_one.ts <pr-number>");
    process.exit(2);
  }
  const prNum = Number(prNumRaw);
  if (!Number.isFinite(prNum)) {
    console.error(`invalid PR number: ${prNumRaw}`);
    process.exit(2);
  }
  console.log(`[audit_one] fetching PR #${prNum}...`);
  const pr = await getPrSnapshot(prNum);
  console.log(`[audit_one] PR #${pr.number}: "${pr.title}" (head=${pr.head_sha.slice(0, 12)})`);
  console.log(`[audit_one] files in diff: ${pr.files.length}`);
  for (const f of pr.files) console.log(`  - ${f.path} (+${f.additions}/-${f.deletions})`);
  console.log("");
  const verdict = await auditPr(pr, {
    dry_run: true,                      // no Gitea posting
    skip_dynamic: true,                  // don't run fixture
    skip_inference: process.env.LH_AUDITOR_SKIP_INFERENCE === "1",
  });
  console.log("\n═══ VERDICT ═══");
  console.log(`overall:  ${verdict.overall}`);
  console.log(`one-liner: ${verdict.one_liner}`);
  console.log(`findings:  total=${verdict.metrics.findings_total} block=${verdict.metrics.findings_block} warn=${verdict.metrics.findings_warn} info=${verdict.metrics.findings_info}`);
  console.log("");
  // Print findings, highlighting kb_query scrum surfacing
  const byCheck: Record<string, typeof verdict.findings> = {};
  for (const f of verdict.findings) (byCheck[f.check] ||= []).push(f);
  for (const [check, findings] of Object.entries(byCheck)) {
    console.log(`── ${check} (${findings.length}) ──`);
    for (const f of findings) {
      const tag = f.severity === "block" ? "🛑" : f.severity === "warn" ? "⚠️ " : "ℹ️ ";
      console.log(`  ${tag} [${f.severity}] ${f.summary}`);
      if (f.summary.includes("scrum-master")) {
        for (const e of f.evidence) {
          console.log(`      → ${e.slice(0, 200)}`);
        }
      }
    }
  }
  const scrumFindings = verdict.findings.filter(f => f.summary.includes("scrum-master"));
  console.log("");
  console.log(`═══ SCRUM WIRE CHECK: ${scrumFindings.length} scrum-master findings surfaced by kb_query ═══`);
  if (scrumFindings.length === 0) {
    console.log("  (none — either no matching scrum_reviews.jsonl rows, or files didn't match PR diff)");
  }
  process.exit(0);
 }
 main().catch(e => { console.error("[audit_one] fatal:", e); process.exit(1); });
--- a/auditor/checks/kb_query.ts
+++ b/auditor/checks/kb_query.ts
@ -8,6 +8,7 @@
 // What this check reads (all file-backed, append-only or periodic):
 //   data/_kb/outcomes.jsonl         — per-scenario outcomes (kb.ts)
 //   data/_kb/error_corrections.jsonl — fail→succeed deltas on same sig
 //   data/_kb/scrum_reviews.jsonl     — scrum-master accepted reviews
 //   data/_observer/ops.jsonl         — observer ring → disk stream
 //   data/_bot/cycles/*.json          — bot cycle results
 //
@ -21,10 +22,11 @@ import type { Claim, Finding } from "../types.ts";
 const KB_DIR = "/home/profit/lakehouse/data/_kb";
 const OBSERVER_OPS = "/home/profit/lakehouse/data/_observer/ops.jsonl";
 const BOT_CYCLES_DIR = "/home/profit/lakehouse/data/_bot/cycles";
 const SCRUM_REVIEWS_JSONL = "/home/profit/lakehouse/data/_kb/scrum_reviews.jsonl";
 const TAIL_LINES = 500;
 const MAX_BOT_CYCLE_FILES = 30;
-export async function runKbCheck(claims: Claim[]): Promise<Finding[]> {
+export async function runKbCheck(claims: Claim[], prFiles: string[] = []): Promise<Finding[]> {
  const findings: Finding[] = [];
  // 1. Recent scenario outcomes: are strong-claim-style phrases showing
@ -48,6 +50,15 @@ export async function runKbCheck(claims: Claim[]): Promise<Finding[]> {
  const obsFindings = await checkObserverStream();
  findings.push(...obsFindings);
  // 5. Scrum-master reviews — surface prior accepted reviews for any
  //    file in this PR's diff. Cohesion plan Phase C wire: the
  //    auditor gets to "borrow" the scrum-master's deeper per-file
  //    analysis instead of re-doing that work.
  if (prFiles.length > 0) {
    const scrumFindings = await checkScrumReviews(prFiles);
    findings.push(...scrumFindings);
  }
  return findings;
 }
@ -181,3 +192,54 @@ function observerBySource(ops: any[]): string {
  }
  return Object.entries(c).sort((a, b) => b[1] - a[1]).map(([k, v]) => `${k}=${v}`).join(", ") || "empty";
 }
 // Scrum-master reviews — the scrum pipeline writes one row per
 // accepted per-file review. We match reviews whose `file` matches
 // any path in the PR's diff, then surface the *preview* + which
 // model the escalation ladder had to reach. If the scrum-master
 // needed the 123B specialist or larger to resolve a file, that's
 // a meaningful signal about the code's complexity — and it's
 // surfaced to the PR without the auditor having to re-run the
 // escalation ladder itself.
 async function checkScrumReviews(prFiles: string[]): Promise<Finding[]> {
  const rows = await tailJsonl<any>(SCRUM_REVIEWS_JSONL, TAIL_LINES);
  if (rows.length === 0) return [];
  // Match by exact file OR filename suffix — PR files arrive as
  // `auditor/audit.ts`-style relative paths; scrum stores the same.
  const norm = (p: string) => p.replace(/^\/+/, "").replace(/^home\/profit\/lakehouse\//, "");
  const prSet = new Set(prFiles.map(norm));
  // Keep only the most recent review per file (last-wins).
  const latestByFile = new Map<string, any>();
  for (const r of rows) {
    const f = norm(String(r.file ?? ""));
    if (!f) continue;
    if (!prSet.has(f)) continue;
    latestByFile.set(f, r);
  }
  if (latestByFile.size === 0) return [];
  const findings: Finding[] = [];
  for (const [file, r] of latestByFile) {
    const model = String(r.accepted_model ?? "?");
    const attempt = r.accepted_on_attempt ?? "?";
    const treeSplit = !!r.tree_split_fired;
    // Heuristic: if the scrum-master had to escalate past attempt 3,
    // or had to tree-split, that's context the PR reviewer should see.
    // Severity: info for low-escalation, warn if escalated far up
    // the ladder (cloud specialist required).
    const heavyEscalation = Number(attempt) >= 4;
    const sev: "warn" | "info" = heavyEscalation ? "warn" : "info";
    findings.push({
      check: "kb_query",
      severity: sev,
      summary: `scrum-master review for \`${file}\` — accepted on attempt ${attempt} by \`${model}\`${treeSplit ? " (tree-split)" : ""}`,
      evidence: [
        `reviewed_at: ${r.reviewed_at ?? "?"}`,
        `preview: ${String(r.suggestions_preview ?? "").slice(0, 300).replace(/\n/g, " ")}`,
      ],
    });
  }
  return findings;
 }
--- a/tests/real-world/scrum_master_pipeline.ts
+++ b/tests/real-world/scrum_master_pipeline.ts
@ -27,6 +27,14 @@ const CHUNK_SIZE = 800;
 const CHUNK_OVERLAP = 120;
 const TOP_K_CONTEXT = 5;
 const MAX_ATTEMPTS = 6;
 // Files larger than this get tree-split instead of truncated. Fixes the
 // 6KB false-positive class (model claiming a field is "missing" when
 // it exists past the context cutoff).
 const FILE_TREE_SPLIT_THRESHOLD = 6000;
 const FILE_SHARD_SIZE = 3500;
 // Appended jsonl so auditor's kb_query can surface scrum findings for
 // files touched by a PR under review. Part of cohesion plan Phase C.
 const SCRUM_REVIEWS_JSONL = "/home/profit/lakehouse/data/_kb/scrum_reviews.jsonl";
 const OUT_DIR = `/home/profit/lakehouse/tests/real-world/runs/scrum_${Date.now().toString(36)}`;
 const PRD_PATH = "/home/profit/lakehouse/docs/PRD.md";
@ -61,6 +69,8 @@ type Chunk = { id: string; text: string; embedding: number[]; origin: string; of
 interface FileReview {
  file: string;
  file_bytes: number;
  tree_split_fired: boolean;
  shards_summarized: number;
  top_prd_chunks: Array<{ origin: string; offset: number; score: number }>;
  top_proposal_chunks: Array<{ origin: string; offset: number; score: number }>;
  attempts_made: number;
@ -153,6 +163,47 @@ function retrieveTopK(query_emb: number[], pool: Chunk[], k: number): Chunk[] {
    .map(x => ({ ...x.c, _score: x.score } as any));
 }
 // Tree-split a large file: shard it, summarize each shard against
 // the review question, merge into a scratchpad. Uses cloud because
 // the summarization step needs quality > speed. Returns the
 // scratchpad (full-file distillation) and the cloud-call count.
 async function treeSplitFile(
  filePath: string,
  content: string,
 ): Promise<{ scratchpad: string; shards: number; cloud_calls: number }> {
  const shards: Array<{ from: number; to: number; text: string }> = [];
  for (let i = 0; i < content.length; i += FILE_SHARD_SIZE) {
    const end = Math.min(i + FILE_SHARD_SIZE, content.length);
    shards.push({ from: i, to: end, text: content.slice(i, end) });
  }
  let scratchpad = "";
  let cloud_calls = 0;
  log(`  tree-split: ${content.length} chars → ${shards.length} shards of ${FILE_SHARD_SIZE}`);
  for (const [si, shard] of shards.entries()) {
    const prompt = `You are summarizing ONE SHARD of a source file as part of a multi-shard review. File: ${filePath}. Shard ${si + 1}/${shards.length} (bytes ${shard.from}..${shard.to}).
 ─────── shard source ───────
 ${shard.text}
 ─────── end shard ───────
 Scratchpad of prior shards (if empty, this is shard 1):
 ${scratchpad || "(empty)"}
 Extract ONLY facts useful for reviewing this file against its PRD: function names + purposes, struct fields + types, invariants, edge cases, TODO markers, error-handling style. Under 150 words. No prose outside the extracted facts.`;
    const r = await chat({
      provider: "ollama_cloud",
      model: "gpt-oss:120b",
      prompt,
      max_tokens: 400,
    });
    cloud_calls += 1;
    if (r.content) {
      scratchpad += `\n--- shard ${si + 1} (bytes ${shard.from}..${shard.to}) ---\n${r.content.trim()}`;
    }
  }
  return { scratchpad, shards: shards.length, cloud_calls };
 }
 async function reviewFile(
  filePath: string,
  prd_chunks: Chunk[],
@ -180,11 +231,39 @@ async function reviewFile(
    ...topPlan.map(c => `[PLAN @${c.offset}]\n${c.text.slice(0, 600)}`),
  ].join("\n\n");
  // Files bigger than FILE_TREE_SPLIT_THRESHOLD get tree-split.
  // Summarize each shard to a scratchpad, then review against the
  // scratchpad instead of the truncated first chunk. Prevents the
  // false-positive pattern where the model claims a field is
  // "missing" because it's past the context cutoff.
  let sourceForPrompt: string;
  let treeSplitFired = false;
  let shardsSummarized = 0;
  let extraCloudCalls = 0;
  if (content.length > FILE_TREE_SPLIT_THRESHOLD) {
    treeSplitFired = true;
    const ts = await treeSplitFile(rel, content);
    shardsSummarized = ts.shards;
    extraCloudCalls = ts.cloud_calls;
    sourceForPrompt = `[FULL-FILE SCRATCHPAD — distilled from ${ts.shards} shards via tree-split]\n${ts.scratchpad}`;
  } else {
    sourceForPrompt = content;
  }
  // Prompt — when tree-split fired, include an explicit instruction
  // not to claim a field/function is "missing" because the scratchpad
  // is a distillation not the full file. Attacks the rubric-tuning
  // concern J called out.
  const truncationWarning = treeSplitFired
    ? `\nIMPORTANT: the "source" below is a multi-shard distillation (tree-split across ${shardsSummarized} shards), NOT the full raw file. DO NOT claim any field, function, or feature is "missing" based on its absence from this distillation — the distillation may have elided it. Only call out gaps that appear DIRECTLY contradicted by the PRD excerpts.\n`
    : "";
  const baseTask = `You are reviewing one source file against the Lakehouse PRD and an active cohesion-integration plan.
-FILE: ${rel}
+FILE: ${rel} (${content.length} bytes${treeSplitFired ? `, tree-split into ${shardsSummarized} shards` : ""})
 ${truncationWarning}
 ─────── source ───────
-${content.slice(0, 6000)}${content.length > 6000 ? "\n[... truncated after 6KB ...]" : ""}
+${sourceForPrompt}
 ─────── end source ───────
 ${contextBlock}
@ -234,9 +313,11 @@ Respond with markdown. Be specific, not generic. Cite file-region + PRD-chunk-of
    break;
  }
-  return {
+  const review: FileReview = {
    file: rel,
    file_bytes: content.length,
    tree_split_fired: treeSplitFired,
    shards_summarized: shardsSummarized,
    top_prd_chunks: topPrd.map(c => ({ origin: c.origin, offset: c.offset, score: (c as any)._score })),
    top_proposal_chunks: topPlan.map(c => ({ origin: c.origin, offset: c.offset, score: (c as any)._score })),
    attempts_made: history.length,
@ -246,6 +327,31 @@ Respond with markdown. Be specific, not generic. Cite file-region + PRD-chunk-of
    suggestions: accepted ?? "[no acceptable answer after escalation ladder exhausted]",
    duration_ms: Date.now() - t0,
  };
  // Append to the shared scrum-reviews jsonl so the auditor's
  // kb_query check can surface relevant reviews for files in a
  // PR diff. Cohesion plan Phase C wire.
  if (accepted) {
    const { appendFile, mkdir } = await import("node:fs/promises");
    const { dirname } = await import("node:path");
    await mkdir(dirname(SCRUM_REVIEWS_JSONL), { recursive: true });
    const row = {
      file: rel,
      reviewed_at: new Date().toISOString(),
      accepted_model: acceptedModel,
      accepted_on_attempt: acceptedOn,
      attempts_made: history.length,
      tree_split_fired: treeSplitFired,
      suggestions_preview: accepted.slice(0, 2000),
    };
    try {
      await appendFile(SCRUM_REVIEWS_JSONL, JSON.stringify(row) + "\n");
    } catch (e) {
      console.error(`[scrum] failed to append scrum_reviews.jsonl: ${(e as Error).message}`);
    }
  }
  return review;
 }
 async function loadAndChunk(path: string, origin_tag: string): Promise<Chunk[]> {