lakehouse/scripts/mode_pass5_summarize.ts

#!/usr/bin/env bun
/**
 * Pass 5 variance summarizer. Reads data/_kb/mode_experiments.jsonl
 * since a timestamp, groups by (mode|corpus), reports mean ± stddev
 * of grounded finding count, plus a head-to-head wins/losses table
 * vs the isolation baseline.
 *
 * Usage:
 *   bun run scripts/mode_pass5_summarize.ts                        # default 2h
 *   bun run scripts/mode_pass5_summarize.ts --since 2026-04-26T22  # explicit
 */

import { readFileSync, existsSync } from "node:fs";

const argSince = (() => {
  const i = Bun.argv.indexOf("--since");
  return i >= 0 ? Bun.argv[i + 1] : new Date(Date.now() - 2 * 60 * 60 * 1000).toISOString();
})();

const JSONL = "data/_kb/mode_experiments.jsonl";
if (!existsSync(JSONL)) { console.error(`no ${JSONL}`); process.exit(1); }

interface Row {
  ts: string; mode: string; file_path: string; response: string;
  sources: { matrix_corpus?: string | string[] | null };
  latency_ms: number;
}

function corpusKey(c: any): string {
  if (!c) return "";
  if (typeof c === "string") return c;
  if (Array.isArray(c)) return c.length === 0 ? "" : [...c].sort().join("+");
  return "";
}
const condKey = (r: Row) => {
  const c = corpusKey(r.sources?.matrix_corpus);
  return c ? `${r.mode}|${c}` : r.mode;
};

// Reuse the same grounding logic as mode_compare — symbols cited in
// findings rows must appear in the focus file, and any line numbers
// must fall within EOF.
function extractFindings(md: string): { symbols: string[]; lines: number[] }[] {
  const sec = /(?:^|\n)#{1,3}[^\na-zA-Z]*(?:Ranked\s+)?Findings?[^\n]*\n/i;
  const m = md.match(sec);
  let section = md;
  if (m && m.index !== undefined) {
    const after = md.slice(m.index + m[0].length);
    const stop = after.search(/\n#{1,3}[^\na-zA-Z]*(?:Patch|Suggestion|Reference|Summary|Concrete)/i);
    section = stop >= 0 ? after.slice(0, stop) : after;
  }
  // Three row shapes:
  //   1) numbered:        `| 1 | ... |`
  //   2) path-with-line:  `| service.rs:106 | ... |`
  //   3) path-with-sym:   `| crates/vectord/src/pathway_memory.rs:load_fn (≈L220) | ... |`
  // Pick whichever shape matches the most rows (ties favor numbered).
  const numbered = section.split("\n").filter(l => /^\|\s*\*?\*?\d+\*?\*?\s*\|/.test(l));
  const pathRows = section.split("\n").filter(l => /^\|\s*[a-z_/\.][a-z_/\.0-9]*\.(rs|ts|py)\b/i.test(l));
  const rows = numbered.length >= pathRows.length ? numbered : pathRows;
  return rows.map(row => {
    const sym = new Set<string>();
    for (const t of row.matchAll(/`([A-Za-z_][A-Za-z0-9_:]*)`/g)) sym.add(t[1]);
    for (const t of row.matchAll(/\b([a-z][a-z0-9_]{4,})\b/g)) sym.add(t[1]);
    const lines: number[] = [];
    for (const t of row.matchAll(/[:\-](\d{2,5})/g)) lines.push(parseInt(t[1]));
    return { symbols: [...sym], lines };
  });
}

function grounded(md: string, file: string): { total: number; grounded: number; oob: number } {
  const content = readFileSync(file, "utf8");
  const eof = content.split("\n").length;
  const findings = extractFindings(md);
  let g = 0, oob = 0;
  for (const f of findings) {
    const symHit = f.symbols.length > 0 && f.symbols.some(s => content.includes(s));
    const lineOob = f.lines.length > 0 && f.lines.some(l => l > eof);
    if (lineOob) oob++;
    if (symHit && !lineOob) g++;
  }
  return { total: findings.length, grounded: g, oob };
}

const lines = readFileSync(JSONL, "utf8").split("\n").filter(Boolean);
const rows: Row[] = [];
for (const l of lines) {
  try {
    const r: Row = JSON.parse(l);
    if (r.ts < argSince) continue;
    rows.push(r);
  } catch {}
}

if (rows.length === 0) { console.error(`no rows since ${argSince}`); process.exit(1); }

// Group: condition → file → array of grounded counts
type CellArr = { grnd: number[]; total: number[]; oob: number[]; ms: number[] };
const byCond: Record<string, Record<string, CellArr>> = {};
for (const r of rows) {
  const k = condKey(r);
  byCond[k] ??= {};
  byCond[k][r.file_path] ??= { grnd: [], total: [], oob: [], ms: [] };
  const g = grounded(r.response, r.file_path);
  byCond[k][r.file_path].grnd.push(g.grounded);
  byCond[k][r.file_path].total.push(g.total);
  byCond[k][r.file_path].oob.push(g.oob);
  byCond[k][r.file_path].ms.push(r.latency_ms);
}

function stats(xs: number[]): { n: number; mean: number; sd: number; min: number; max: number } {
  const n = xs.length;
  if (n === 0) return { n: 0, mean: 0, sd: 0, min: 0, max: 0 };
  const mean = xs.reduce((s, x) => s + x, 0) / n;
  const variance = n === 1 ? 0 : xs.reduce((s, x) => s + (x - mean) ** 2, 0) / (n - 1);
  return { n, mean, sd: Math.sqrt(variance), min: Math.min(...xs), max: Math.max(...xs) };
}

const conditions = Object.keys(byCond).sort();
const files = [...new Set(rows.map(r => r.file_path))].sort();

console.log(`\n═══ Pass 5 variance — since ${argSince} ═══\n`);
console.log(`  ${rows.length} rows · ${conditions.length} conditions · ${files.length} files\n`);

for (const file of files) {
  console.log(`📄 ${file}`);
  console.log(`  ${"condition".padEnd(56)}  n  ${"grounded mean ± sd".padStart(20)}  ${"range".padStart(8)}  ${"oob".padStart(4)}  ${"avg ms".padStart(7)}`);
  console.log(`  ${"─".repeat(56)} ─── ${"─".repeat(20)} ${"─".repeat(8)} ${"─".repeat(4)} ${"─".repeat(7)}`);
  for (const c of conditions) {
    const cell = byCond[c]?.[file];
    if (!cell || cell.grnd.length === 0) continue;
    const s = stats(cell.grnd);
    const oobSum = cell.oob.reduce((a, b) => a + b, 0);
    const msMean = cell.ms.reduce((a, b) => a + b, 0) / cell.ms.length;
    const meanSd = `${s.mean.toFixed(1)} ± ${s.sd.toFixed(1)}`;
    const range = `[${s.min}-${s.max}]`;
    console.log(`  ${c.padEnd(56)} ${String(s.n).padStart(3)} ${meanSd.padStart(20)} ${range.padStart(8)} ${String(oobSum).padStart(4)} ${Math.round(msMean / 1000).toString().padStart(5)}s`);
  }
  console.log("");
}

// Head-to-head: for each condition vs isolation baseline, count rep-by-rep
// wins across the same file. Requires equal rep counts.
console.log(`═══ Head-to-head: each condition vs isolation, rep-by-rep ═══\n`);
const isoKey = conditions.find(c => c.startsWith("codereview_isolation"));
if (!isoKey) {
  console.log("  no isolation rows in window");
} else {
  console.log(`  baseline: ${isoKey}\n`);
  console.log(`  ${"challenger".padEnd(56)} wins losses ties  Δ mean grnd`);
  console.log(`  ${"─".repeat(56)} ${"─".repeat(4)} ${"─".repeat(6)} ${"─".repeat(4)} ${"─".repeat(12)}`);
  for (const c of conditions) {
    if (c === isoKey) continue;
    let wins = 0, losses = 0, ties = 0, deltaSum = 0, n = 0;
    for (const file of files) {
      const isoArr = byCond[isoKey]?.[file]?.grnd ?? [];
      const cArr = byCond[c]?.[file]?.grnd ?? [];
      const k = Math.min(isoArr.length, cArr.length);
      for (let i = 0; i < k; i++) {
        if (cArr[i] > isoArr[i]) wins++;
        else if (cArr[i] < isoArr[i]) losses++;
        else ties++;
        deltaSum += cArr[i] - isoArr[i];
        n++;
      }
    }
    const dMean = n > 0 ? (deltaSum / n).toFixed(2) : "—";
    console.log(`  ${c.padEnd(56)} ${String(wins).padStart(4)} ${String(losses).padStart(6)} ${String(ties).padStart(4)} ${dMean.padStart(12)}`);
  }
}