2026-04-23 05:29:39 +00:00
1 changed files with 109 additions and 3 deletions
--- a/tests/real-world/scrum_master_pipeline.ts
+++ b/tests/real-world/scrum_master_pipeline.ts
@ -27,6 +27,14 @@ const CHUNK_SIZE = 800;
 const CHUNK_OVERLAP = 120;
 const TOP_K_CONTEXT = 5;
 const MAX_ATTEMPTS = 6;
+// Files larger than this get tree-split instead of truncated. Fixes the
+// 6KB false-positive class (model claiming a field is "missing" when
+// it exists past the context cutoff).
+const FILE_TREE_SPLIT_THRESHOLD = 6000;
+const FILE_SHARD_SIZE = 3500;
+// Appended jsonl so auditor's kb_query can surface scrum findings for
+// files touched by a PR under review. Part of cohesion plan Phase C.
+const SCRUM_REVIEWS_JSONL = "/home/profit/lakehouse/data/_kb/scrum_reviews.jsonl";
 const OUT_DIR = `/home/profit/lakehouse/tests/real-world/runs/scrum_${Date.now().toString(36)}`;

 const PRD_PATH = "/home/profit/lakehouse/docs/PRD.md";
@ -61,6 +69,8 @@ type Chunk = { id: string; text: string; embedding: number[]; origin: string; of
 interface FileReview {
  file: string;
  file_bytes: number;
+  tree_split_fired: boolean;
+  shards_summarized: number;
  top_prd_chunks: Array<{ origin: string; offset: number; score: number }>;
  top_proposal_chunks: Array<{ origin: string; offset: number; score: number }>;
  attempts_made: number;
@ -153,6 +163,47 @@ function retrieveTopK(query_emb: number[], pool: Chunk[], k: number): Chunk[] {
    .map(x => ({ ...x.c, _score: x.score } as any));
 }

+// Tree-split a large file: shard it, summarize each shard against
+// the review question, merge into a scratchpad. Uses cloud because
+// the summarization step needs quality > speed. Returns the
+// scratchpad (full-file distillation) and the cloud-call count.
+async function treeSplitFile(
+  filePath: string,
+  content: string,
+): Promise<{ scratchpad: string; shards: number; cloud_calls: number }> {
+  const shards: Array<{ from: number; to: number; text: string }> = [];
+  for (let i = 0; i < content.length; i += FILE_SHARD_SIZE) {
+    const end = Math.min(i + FILE_SHARD_SIZE, content.length);
+    shards.push({ from: i, to: end, text: content.slice(i, end) });
+  }
+  let scratchpad = "";
+  let cloud_calls = 0;
+  log(`  tree-split: ${content.length} chars → ${shards.length} shards of ${FILE_SHARD_SIZE}`);
+  for (const [si, shard] of shards.entries()) {
+    const prompt = `You are summarizing ONE SHARD of a source file as part of a multi-shard review. File: ${filePath}. Shard ${si + 1}/${shards.length} (bytes ${shard.from}..${shard.to}).
+
+─────── shard source ───────
+${shard.text}
+─────── end shard ───────
+
+Scratchpad of prior shards (if empty, this is shard 1):
+${scratchpad || "(empty)"}
+
+Extract ONLY facts useful for reviewing this file against its PRD: function names + purposes, struct fields + types, invariants, edge cases, TODO markers, error-handling style. Under 150 words. No prose outside the extracted facts.`;
+    const r = await chat({
+      provider: "ollama_cloud",
+      model: "gpt-oss:120b",
+      prompt,
+      max_tokens: 400,
+    });
+    cloud_calls += 1;
+    if (r.content) {
+      scratchpad += `\n--- shard ${si + 1} (bytes ${shard.from}..${shard.to}) ---\n${r.content.trim()}`;
+    }
+  }
+  return { scratchpad, shards: shards.length, cloud_calls };
+}
+
 async function reviewFile(
  filePath: string,
  prd_chunks: Chunk[],
@ -180,11 +231,39 @@ async function reviewFile(
    ...topPlan.map(c => `[PLAN @${c.offset}]\n${c.text.slice(0, 600)}`),
  ].join("\n\n");

+  // Files bigger than FILE_TREE_SPLIT_THRESHOLD get tree-split.
+  // Summarize each shard to a scratchpad, then review against the
+  // scratchpad instead of the truncated first chunk. Prevents the
+  // false-positive pattern where the model claims a field is
+  // "missing" because it's past the context cutoff.
+  let sourceForPrompt: string;
+  let treeSplitFired = false;
+  let shardsSummarized = 0;
+  let extraCloudCalls = 0;
+  if (content.length > FILE_TREE_SPLIT_THRESHOLD) {
+    treeSplitFired = true;
+    const ts = await treeSplitFile(rel, content);
+    shardsSummarized = ts.shards;
+    extraCloudCalls = ts.cloud_calls;
+    sourceForPrompt = `[FULL-FILE SCRATCHPAD — distilled from ${ts.shards} shards via tree-split]\n${ts.scratchpad}`;
+  } else {
+    sourceForPrompt = content;
+  }
+
+  // Prompt — when tree-split fired, include an explicit instruction
+  // not to claim a field/function is "missing" because the scratchpad
+  // is a distillation not the full file. Attacks the rubric-tuning
+  // concern J called out.
+  const truncationWarning = treeSplitFired
+    ? `\nIMPORTANT: the "source" below is a multi-shard distillation (tree-split across ${shardsSummarized} shards), NOT the full raw file. DO NOT claim any field, function, or feature is "missing" based on its absence from this distillation — the distillation may have elided it. Only call out gaps that appear DIRECTLY contradicted by the PRD excerpts.\n`
+    : "";
+
  const baseTask = `You are reviewing one source file against the Lakehouse PRD and an active cohesion-integration plan.

-FILE: ${rel}
+FILE: ${rel} (${content.length} bytes${treeSplitFired ? `, tree-split into ${shardsSummarized} shards` : ""})
+${truncationWarning}
 ─────── source ───────
-${content.slice(0, 6000)}${content.length > 6000 ? "\n[... truncated after 6KB ...]" : ""}
+${sourceForPrompt}
 ─────── end source ───────

 ${contextBlock}
@ -234,9 +313,11 @@ Respond with markdown. Be specific, not generic. Cite file-region + PRD-chunk-of
    break;
  }

-  return {
+  const review: FileReview = {
    file: rel,
    file_bytes: content.length,
+    tree_split_fired: treeSplitFired,
+    shards_summarized: shardsSummarized,
    top_prd_chunks: topPrd.map(c => ({ origin: c.origin, offset: c.offset, score: (c as any)._score })),
    top_proposal_chunks: topPlan.map(c => ({ origin: c.origin, offset: c.offset, score: (c as any)._score })),
    attempts_made: history.length,
@ -246,6 +327,31 @@ Respond with markdown. Be specific, not generic. Cite file-region + PRD-chunk-of
    suggestions: accepted ?? "[no acceptable answer after escalation ladder exhausted]",
    duration_ms: Date.now() - t0,
  };
+
+  // Append to the shared scrum-reviews jsonl so the auditor's
+  // kb_query check can surface relevant reviews for files in a
+  // PR diff. Cohesion plan Phase C wire.
+  if (accepted) {
+    const { appendFile, mkdir } = await import("node:fs/promises");
+    const { dirname } = await import("node:path");
+    await mkdir(dirname(SCRUM_REVIEWS_JSONL), { recursive: true });
+    const row = {
+      file: rel,
+      reviewed_at: new Date().toISOString(),
+      accepted_model: acceptedModel,
+      accepted_on_attempt: acceptedOn,
+      attempts_made: history.length,
+      tree_split_fired: treeSplitFired,
+      suggestions_preview: accepted.slice(0, 2000),
+    };
+    try {
+      await appendFile(SCRUM_REVIEWS_JSONL, JSON.stringify(row) + "\n");
+    } catch (e) {
+      console.error(`[scrum] failed to append scrum_reviews.jsonl: ${(e as Error).message}`);
+    }
+  }
+
+  return review;
 }

 async function loadAndChunk(path: string, origin_tag: string): Promise<Chunk[]> {