lakehouse/tests/real-world/scrum_master_pipeline.ts

// Scrum-master orchestrator — pulls git repo source + PRD + a change
// proposal, chunks everything, hands each code piece to the proven
// escalation ladder (small-local → big-local → cloud → specialist →
// biggest) with learning context between attempts. Collects per-file
// suggestions in a coherent handoff report.
//
// What it composes (everything below is already shipped + proven):
//   - Chunker + embeddings (sidecar /embed, nomic-embed-text)
//   - In-memory cosine retrieval (top-K PRD + plan chunks per file)
//   - Escalation ladder (6 tiers, cycling on empty/error/thin-answer)
//   - Per-attempt learning-context injection (prior failures → prompt)
//   - Tree-split fallback when combined context exceeds budget
//   - JSONL output per file + summary
//
// Deliberate scope limit: TARGET_FILES is 3 files by default. The
// pipeline works at larger N, but at ~90s/file × 3 files = 4-5 min,
// 15 files = 22 min. Bump via env LH_SCRUM_FILES="path1,path2,...".
//
// Run: bun run tests/real-world/scrum_master_pipeline.ts

import { readFile, writeFile, mkdir } from "node:fs/promises";
import { createHash } from "node:crypto";

const GATEWAY = "http://localhost:3100";
const SIDECAR = "http://localhost:3200";
const CHUNK_SIZE = 800;
const CHUNK_OVERLAP = 120;
const TOP_K_CONTEXT = 5;
const MAX_ATTEMPTS = 6;
// Files larger than this get tree-split instead of truncated. Fixes the
// 6KB false-positive class (model claiming a field is "missing" when
// it exists past the context cutoff).
const FILE_TREE_SPLIT_THRESHOLD = 6000;
const FILE_SHARD_SIZE = 3500;
// Appended jsonl so auditor's kb_query can surface scrum findings for
// files touched by a PR under review. Part of cohesion plan Phase C.
const SCRUM_REVIEWS_JSONL = "/home/profit/lakehouse/data/_kb/scrum_reviews.jsonl";
const OUT_DIR = `/home/profit/lakehouse/tests/real-world/runs/scrum_${Date.now().toString(36)}`;

const PRD_PATH = "/home/profit/lakehouse/docs/PRD.md";
// Using CONTROL_PLANE_PRD as the "suggested changes" doc since it
// describes the Phase 38-44 target architecture and is on main.
// COHESION_INTEGRATION_PLAN.md is still on PR #7 branch.
const PROPOSAL_PATH = "/home/profit/lakehouse/docs/CONTROL_PLANE_PRD.md";

// Scoped target: 3 representative source files by default.
// The scrum-master walks these in order and produces one suggestion
// set per file. Override via env for a wider sweep.
const DEFAULT_TARGETS = [
  "/home/profit/lakehouse/crates/vectord/src/playbook_memory.rs",
  "/home/profit/lakehouse/crates/vectord/src/doc_drift.rs",
  "/home/profit/lakehouse/auditor/audit.ts",
];
const TARGET_FILES: string[] = process.env.LH_SCRUM_FILES
  ? process.env.LH_SCRUM_FILES.split(",").map(s => s.trim())
  : DEFAULT_TARGETS;

const LADDER: Array<{ provider: "ollama" | "ollama_cloud"; model: string; note: string }> = [
  { provider: "ollama",       model: "qwen3.5:latest",       note: "local 7B" },
  { provider: "ollama",       model: "qwen3:latest",         note: "local 7B (peer)" },
  { provider: "ollama",       model: "gpt-oss:20b",          note: "local 20B" },
  { provider: "ollama_cloud", model: "gpt-oss:120b",         note: "cloud 120B" },
  { provider: "ollama_cloud", model: "devstral-2:123b",      note: "cloud 123B coding specialist" },
  { provider: "ollama_cloud", model: "mistral-large-3:675b", note: "cloud 675B last-ditch" },
];

type Chunk = { id: string; text: string; embedding: number[]; origin: string; offset: number };

interface FileReview {
  file: string;
  file_bytes: number;
  tree_split_fired: boolean;
  shards_summarized: number;
  top_prd_chunks: Array<{ origin: string; offset: number; score: number }>;
  top_proposal_chunks: Array<{ origin: string; offset: number; score: number }>;
  attempts_made: number;
  attempts_history: Array<{ n: number; model: string; status: "accepted" | "thin" | "error"; chars: number; error?: string }>;
  accepted_on: number | null;
  escalated_to_model: string;
  suggestions: string;
  duration_ms: number;
}

function log(msg: string) { console.log(`[scrum] ${msg}`); }

function cosine(a: number[], b: number[]): number {
  let dot = 0, na = 0, nb = 0;
  for (let i = 0; i < a.length; i++) { dot += a[i] * b[i]; na += a[i] * a[i]; nb += b[i] * b[i]; }
  return na && nb ? dot / (Math.sqrt(na) * Math.sqrt(nb)) : 0;
}

function chunkText(text: string): Array<{ text: string; offset: number }> {
  const out: Array<{ text: string; offset: number }> = [];
  for (let i = 0; i < text.length; ) {
    const end = Math.min(i + CHUNK_SIZE, text.length);
    const slice = text.slice(i, end).trim();
    if (slice.length > 60) out.push({ text: slice, offset: i });
    if (end >= text.length) break;
    i = end - CHUNK_OVERLAP;
  }
  return out;
}

async function embedBatch(texts: string[]): Promise<number[][]> {
  const r = await fetch(`${SIDECAR}/embed`, {
    method: "POST", headers: { "content-type": "application/json" },
    body: JSON.stringify({ texts }),
    signal: AbortSignal.timeout(120000),
  });
  if (!r.ok) throw new Error(`embed ${r.status}`);
  return (await r.json() as any).embeddings;
}

async function chat(opts: {
  provider: "ollama" | "ollama_cloud",
  model: string,
  prompt: string,
  max_tokens?: number,
}): Promise<{ content: string; error?: string; prompt_tokens: number; completion_tokens: number }> {
  try {
    const r = await fetch(`${GATEWAY}/v1/chat`, {
      method: "POST", headers: { "content-type": "application/json" },
      body: JSON.stringify({
        provider: opts.provider,
        model: opts.model,
        messages: [{ role: "user", content: opts.prompt }],
        max_tokens: opts.max_tokens ?? 1500,
        temperature: 0.2,
        think: false,
      }),
      signal: AbortSignal.timeout(180000),
    });
    if (!r.ok) return { content: "", error: `/v1/chat ${r.status}: ${(await r.text()).slice(0, 200)}`, prompt_tokens: 0, completion_tokens: 0 };
    const j: any = await r.json();
    return {
      content: j.choices?.[0]?.message?.content ?? "",
      prompt_tokens: j.usage?.prompt_tokens ?? 0,
      completion_tokens: j.usage?.completion_tokens ?? 0,
    };
  } catch (e) {
    return { content: "", error: (e as Error).message, prompt_tokens: 0, completion_tokens: 0 };
  }
}

// Accept a file-review answer if it's substantive + structured.
// We're not validating Rust here — we're validating that the model
// produced a coherent suggestion set.
function isAcceptable(answer: string): boolean {
  if (answer.length < 200) return false;                    // too thin
  // Must at least try a structured form — numbered list, bullets,
  // or sections. Models that just hand-wave fail.
  const hasStructure = /^\s*[-*]\s/m.test(answer)
                    || /^\s*\d+\.\s/m.test(answer)
                    || /^\s*#/m.test(answer);
  return hasStructure;
}

function retrieveTopK(query_emb: number[], pool: Chunk[], k: number): Chunk[] {
  return pool
    .map(c => ({ c, score: cosine(query_emb, c.embedding) }))
    .sort((a, b) => b.score - a.score)
    .slice(0, k)
    .map(x => ({ ...x.c, _score: x.score } as any));
}

// Tree-split a large file: shard it, summarize each shard against
// the review question, merge into a scratchpad. Uses cloud because
// the summarization step needs quality > speed. Returns the
// scratchpad (full-file distillation) and the cloud-call count.
async function treeSplitFile(
  filePath: string,
  content: string,
): Promise<{ scratchpad: string; shards: number; cloud_calls: number }> {
  const shards: Array<{ from: number; to: number; text: string }> = [];
  for (let i = 0; i < content.length; i += FILE_SHARD_SIZE) {
    const end = Math.min(i + FILE_SHARD_SIZE, content.length);
    shards.push({ from: i, to: end, text: content.slice(i, end) });
  }
  let scratchpad = "";
  let cloud_calls = 0;
  log(`  tree-split: ${content.length} chars → ${shards.length} shards of ${FILE_SHARD_SIZE}`);
  for (const [si, shard] of shards.entries()) {
    const prompt = `You are summarizing ONE SHARD of a source file as part of a multi-shard review. File: ${filePath}. Shard ${si + 1}/${shards.length} (bytes ${shard.from}..${shard.to}).

─────── shard source ───────
${shard.text}
─────── end shard ───────

Scratchpad of prior shards (if empty, this is shard 1):
${scratchpad || "(empty)"}

Extract ONLY facts useful for reviewing this file against its PRD: function names + purposes, struct fields + types, invariants, edge cases, TODO markers, error-handling style. Under 150 words. No prose outside the extracted facts.`;
    const r = await chat({
      provider: "ollama_cloud",
      model: "gpt-oss:120b",
      prompt,
      max_tokens: 400,
    });
    cloud_calls += 1;
    if (r.content) {
      scratchpad += `\n--- shard ${si + 1} (bytes ${shard.from}..${shard.to}) ---\n${r.content.trim()}`;
    }
  }
  return { scratchpad, shards: shards.length, cloud_calls };
}

async function reviewFile(
  filePath: string,
  prd_chunks: Chunk[],
  proposal_chunks: Chunk[],
): Promise<FileReview> {
  const t0 = Date.now();
  log(`file: ${filePath}`);
  const content = await readFile(filePath, "utf8");
  const rel = filePath.replace("/home/profit/lakehouse/", "");

  // Build a query embedding from the first ~800 chars of the file
  // (good enough for topical retrieval).
  const seed = content.slice(0, 800);
  const [seedEmb] = await embedBatch([seed]);

  const topPrd = retrieveTopK(seedEmb, prd_chunks, TOP_K_CONTEXT);
  const topPlan = retrieveTopK(seedEmb, proposal_chunks, TOP_K_CONTEXT);
  log(`  retrieved ${topPrd.length} PRD chunks + ${topPlan.length} proposal chunks`);

  const contextBlock = [
    "═══ RELEVANT PRD EXCERPTS ═══",
    ...topPrd.map(c => `[PRD @${c.offset}]\n${c.text.slice(0, 600)}`),
    "",
    "═══ RELEVANT CHANGE PROPOSAL EXCERPTS ═══",
    ...topPlan.map(c => `[PLAN @${c.offset}]\n${c.text.slice(0, 600)}`),
  ].join("\n\n");

  // Files bigger than FILE_TREE_SPLIT_THRESHOLD get tree-split.
  // Summarize each shard to a scratchpad, then review against the
  // scratchpad instead of the truncated first chunk. Prevents the
  // false-positive pattern where the model claims a field is
  // "missing" because it's past the context cutoff.
  let sourceForPrompt: string;
  let treeSplitFired = false;
  let shardsSummarized = 0;
  let extraCloudCalls = 0;
  if (content.length > FILE_TREE_SPLIT_THRESHOLD) {
    treeSplitFired = true;
    const ts = await treeSplitFile(rel, content);
    shardsSummarized = ts.shards;
    extraCloudCalls = ts.cloud_calls;
    sourceForPrompt = `[FULL-FILE SCRATCHPAD — distilled from ${ts.shards} shards via tree-split]\n${ts.scratchpad}`;
  } else {
    sourceForPrompt = content;
  }

  // Prompt — when tree-split fired, include an explicit instruction
  // not to claim a field/function is "missing" because the scratchpad
  // is a distillation not the full file. Attacks the rubric-tuning
  // concern J called out.
  const truncationWarning = treeSplitFired
    ? `\nIMPORTANT: the "source" below is a multi-shard distillation (tree-split across ${shardsSummarized} shards), NOT the full raw file. DO NOT claim any field, function, or feature is "missing" based on its absence from this distillation — the distillation may have elided it. Only call out gaps that appear DIRECTLY contradicted by the PRD excerpts.\n`
    : "";

  const baseTask = `You are reviewing one source file against the Lakehouse PRD and an active cohesion-integration plan.

FILE: ${rel} (${content.length} bytes${treeSplitFired ? `, tree-split into ${shardsSummarized} shards` : ""})
${truncationWarning}
─────── source ───────
${sourceForPrompt}
─────── end source ───────

${contextBlock}

Produce a structured review with:
1. Alignment score (1-10) between this file and the PRD intent
2. 3-5 concrete suggested changes (bullet points), each naming a specific function/line and what to change
3. Any gap where this file's behavior contradicts the PRD or the proposal

Respond with markdown. Be specific, not generic. Cite file-region + PRD-chunk-offset when relevant.`;

  const history: FileReview["attempts_history"] = [];
  let accepted: string | null = null;
  let acceptedModel = "";
  let acceptedOn = 0;

  for (let i = 0; i < MAX_ATTEMPTS; i++) {
    const n = i + 1;
    const rung = LADDER[i];
    const learning = history.length > 0
      ? `\n\n═══ PRIOR ATTEMPTS FAILED. Specific issues to fix: ═══\n${history.map(h => `Attempt ${h.n} (${h.model}, ${h.chars} chars): ${h.status} — ${h.error ?? "thin/unstructured answer"}`).join("\n")}\n═══`
      : "";

    log(`  attempt ${n}/${MAX_ATTEMPTS}: ${rung.provider}::${rung.model}${learning ? " [w/ learning]" : ""}`);
    const r = await chat({
      provider: rung.provider,
      model: rung.model,
      prompt: baseTask + learning,
      max_tokens: 1500,
    });

    if (r.error) {
      history.push({ n, model: rung.model, status: "error", chars: 0, error: r.error.slice(0, 180) });
      log(`    ✗ error: ${r.error.slice(0, 80)}`);
      continue;
    }
    if (!isAcceptable(r.content)) {
      history.push({ n, model: rung.model, status: "thin", chars: r.content.length, error: `thin/unstructured (${r.content.length} chars)` });
      log(`    ✗ thin/unstructured (${r.content.length} chars)`);
      continue;
    }
    history.push({ n, model: rung.model, status: "accepted", chars: r.content.length });
    accepted = r.content;
    acceptedModel = `${rung.provider}/${rung.model}`;
    acceptedOn = n;
    log(`    ✓ ACCEPTED on attempt ${n} (${rung.model}, ${r.content.length} chars)`);
    break;
  }

  const review: FileReview = {
    file: rel,
    file_bytes: content.length,
    tree_split_fired: treeSplitFired,
    shards_summarized: shardsSummarized,
    top_prd_chunks: topPrd.map(c => ({ origin: c.origin, offset: c.offset, score: (c as any)._score })),
    top_proposal_chunks: topPlan.map(c => ({ origin: c.origin, offset: c.offset, score: (c as any)._score })),
    attempts_made: history.length,
    attempts_history: history,
    accepted_on: acceptedOn || null,
    escalated_to_model: acceptedModel,
    suggestions: accepted ?? "[no acceptable answer after escalation ladder exhausted]",
    duration_ms: Date.now() - t0,
  };

  // Append to the shared scrum-reviews jsonl so the auditor's
  // kb_query check can surface relevant reviews for files in a
  // PR diff. Cohesion plan Phase C wire.
  if (accepted) {
    const { appendFile, mkdir } = await import("node:fs/promises");
    const { dirname } = await import("node:path");
    await mkdir(dirname(SCRUM_REVIEWS_JSONL), { recursive: true });
    const row = {
      file: rel,
      reviewed_at: new Date().toISOString(),
      accepted_model: acceptedModel,
      accepted_on_attempt: acceptedOn,
      attempts_made: history.length,
      tree_split_fired: treeSplitFired,
      suggestions_preview: accepted.slice(0, 2000),
      schema_version: 2,
      scrum_master_reviewed: true,
    };
    try {
      await appendFile(SCRUM_REVIEWS_JSONL, JSON.stringify(row) + "\n");
    } catch (e) {
      console.error(`[scrum] failed to append scrum_reviews.jsonl: ${(e as Error).message}`);
    }

    // Route the accepted review through llm_team's fact extractor so
    // its entities + relationships land in audit_facts.jsonl alongside
    // inference-side extractions. Same index, two sources. Tagged
    // source:"scrum_review" + scrum_master_reviewed:true so downstream
    // queries can filter by provenance. Reviews shorter than 120
    // chars are skipped — they're usually one-liners ("LGTM") with
    // no extractable knowledge.
    if (accepted.length >= 120 && process.env.LH_SCRUM_SKIP_EXTRACT !== "1") {
      try {
        const { extractFacts } = await import("../../auditor/fact_extractor.ts");
        const ex = await extractFacts(accepted);
        if (!ex.error || ex.entities.length + ex.facts.length > 0) {
          const factRow = {
            pr_number: 0,                   // scrum runs outside a PR scope
            file: rel,
            head_sha: "",                    // no SHA scope; scope is the file+timestamp
            extracted_at: ex.extracted_at,
            extractor: ex.extractor_model,
            verifier: ex.verifier_model,
            llm_team_run_id: ex.llm_team_run_id ?? null,
            facts: ex.facts,
            entities: ex.entities,
            relationships: ex.relationships,
            verification_preview: ex.verification.slice(0, 400),
            schema_version: 2,
            source: "scrum_review",
            scrum_master_reviewed: true,
          };
          const AUDIT_FACTS_JSONL = "/home/profit/lakehouse/data/_kb/audit_facts.jsonl";
          await appendFile(AUDIT_FACTS_JSONL, JSON.stringify(factRow) + "\n");
        }
      } catch (e) {
        console.error(`[scrum] fact extraction failed for ${rel}: ${(e as Error).message}`);
      }
    }
  }

  return review;
}

async function loadAndChunk(path: string, origin_tag: string): Promise<Chunk[]> {
  const text = await readFile(path, "utf8");
  const raw = chunkText(text);
  const embs = await embedBatch(raw.map(r => r.text));
  return raw.map((r, i) => ({
    id: createHash("sha256").update(r.text).digest("hex").slice(0, 10),
    text: r.text,
    embedding: embs[i],
    origin: origin_tag,
    offset: r.offset,
  }));
}

async function main() {
  await mkdir(OUT_DIR, { recursive: true });
  log(`output: ${OUT_DIR}`);
  log(`targets: ${TARGET_FILES.length} files`);

  log("loading + embedding PRD...");
  const prd_chunks = await loadAndChunk(PRD_PATH, "PRD");
  log(`  PRD: ${prd_chunks.length} chunks`);

  log("loading + embedding cohesion plan...");
  const plan_chunks = await loadAndChunk(PROPOSAL_PATH, "COHESION_PLAN");
  log(`  plan: ${plan_chunks.length} chunks`);

  log("");
  log("─── scrum master: walking target files ───");

  const reviews: FileReview[] = [];
  for (const f of TARGET_FILES) {
    const review = await reviewFile(f, prd_chunks, plan_chunks);
    reviews.push(review);
    await writeFile(
      `${OUT_DIR}/review_${review.file.replace(/\//g, "_")}.json`,
      JSON.stringify(review, null, 2),
    );
    log(`  → ${review.file}: ${review.accepted_on ? `accepted on ${review.accepted_on} by ${review.escalated_to_model}` : "UNRESOLVED"} (${review.duration_ms}ms)`);
  }

  // Consolidated scrum-master report
  const report_md: string[] = [];
  report_md.push(`# Scrum-master review\n`);
  report_md.push(`Generated: ${new Date().toISOString()}`);
  report_md.push(`Files reviewed: ${reviews.length}`);
  report_md.push(`Total duration: ${(reviews.reduce((s, r) => s + r.duration_ms, 0) / 1000).toFixed(1)}s\n`);
  for (const r of reviews) {
    report_md.push(`\n## ${r.file}`);
    report_md.push(`- **Accepted on attempt:** ${r.accepted_on ?? "NOT resolved after 6 attempts"}`);
    report_md.push(`- **Escalated to:** \`${r.escalated_to_model || "—"}\``);
    report_md.push(`- **Total attempts:** ${r.attempts_made}`);
    if (r.attempts_history.length > 1) {
      report_md.push(`- **Attempt history:**`);
      for (const h of r.attempts_history) {
        report_md.push(`  - ${h.n}: \`${h.model}\` → ${h.status}${h.error ? ` (${h.error.slice(0, 100)})` : ""}`);
      }
    }
    report_md.push(`\n### Suggestions\n\n${r.suggestions}\n`);
  }
  await writeFile(`${OUT_DIR}/scrum_report.md`, report_md.join("\n"));

  const summary = {
    ran_at: new Date().toISOString(),
    target_count: TARGET_FILES.length,
    resolved: reviews.filter(r => r.accepted_on !== null).length,
    total_attempts: reviews.reduce((s, r) => s + r.attempts_made, 0),
    total_duration_ms: reviews.reduce((s, r) => s + r.duration_ms, 0),
    per_file: reviews.map(r => ({ file: r.file, accepted_on: r.accepted_on, model: r.escalated_to_model, attempts: r.attempts_made, ms: r.duration_ms })),
  };
  await writeFile(`${OUT_DIR}/summary.json`, JSON.stringify(summary, null, 2));

  log("");
  log("═══ SCRUM REPORT ═══");
  log(`  files: ${summary.target_count}, resolved: ${summary.resolved}, total attempts: ${summary.total_attempts}`);
  log(`  total time: ${(summary.total_duration_ms / 1000).toFixed(1)}s`);
  log("");
  for (const p of summary.per_file) {
    const mark = p.accepted_on ? "✓" : "✗";
    log(`  ${mark} ${p.file.padEnd(60)} attempt ${p.accepted_on ?? "—"}/${p.attempts} ${p.model} ${p.ms}ms`);
  }
  log("");
  log(`report: ${OUT_DIR}/scrum_report.md`);

  process.exit(summary.resolved === summary.target_count ? 0 : 1);
}

main().catch(e => { console.error("[scrum] fatal:", e); process.exit(2); });