#!/usr/bin/env bun /** * Mode comparison aggregator — reads data/_kb/mode_experiments.jsonl * (written per-call by /v1/mode/execute) and surfaces the cross-mode * comparison matrix that lets us see what each enrichment dimension * is actually doing. * * Per file, per mode, computes: * - response_chars * - finding_count (rows in markdown tables — heuristic, regex) * - pathway_citations (mentions of "Pathway memory" or "📚") * - latency_ms * - matrix_chunks_kept / dropped * * Then surfaces: * - per file, what each mode produced (rows next to each other) * - per mode, average response_chars + latency * - which modes ALWAYS underperform vs codereview_lakehouse * - which signals (bug fingerprints, matrix) correlate with output size * * Usage: bun run scripts/mode_compare.ts [--jsonl path] [--since 2026-04-26] */ import { readFileSync, existsSync } from "node:fs"; interface Row { ts: string; mode: string; model: string; task_class: string; file_path: string; enriched_prompt_chars: number; response_chars: number; latency_ms: number; sources: { focus_file_bytes?: number; bug_fingerprints_count?: number; matrix_chunks_kept?: number; matrix_chunks_dropped?: number; relevance_filter_used?: boolean; flags?: any; }; response: string; } function parseArgs(): { jsonl: string; since: string | null } { const args = Bun.argv.slice(2); const out: Record = {}; for (let i = 0; i < args.length; i++) { const a = args[i]; if (a.startsWith("--")) out[a.slice(2)] = args[++i] ?? ""; } return { jsonl: out.jsonl ?? "data/_kb/mode_experiments.jsonl", since: out.since || null, }; } function loadRows(path: string, since: string | null): Row[] { if (!existsSync(path)) { console.error(`[compare] no log file at ${path}`); process.exit(1); } const lines = readFileSync(path, "utf8").split("\n").filter(Boolean); const rows: Row[] = []; for (const line of lines) { try { const r: Row = JSON.parse(line); if (since && r.ts < since) continue; rows.push(r); } catch { // skip malformed } } return rows; } function countFindings(md: string): number { // Markdown table rows that look like findings: `| | ...` or `| **N** | ...` // Heuristic — adversarial framing produces ranked tables. const matches = md.match(/^\|\s*\*?\*?\d+\*?\*?\s*\|/gm); return matches ? matches.length : 0; } function countPathwayCitations(md: string): number { // How many times the model referenced the pathway memory preamble. const re = /pathway\s*memory|📚/gi; return (md.match(re) ?? []).length; } function pad(s: string | number, n: number, right = false): string { const str = String(s); if (str.length >= n) return str.slice(0, n); return right ? " ".repeat(n - str.length) + str : str + " ".repeat(n - str.length); } function main() { const { jsonl, since } = parseArgs(); const rows = loadRows(jsonl, since); if (rows.length === 0) { console.error("[compare] no rows after filter"); process.exit(1); } // Group by file → mode const byFile: Record> = {}; const allModes = new Set(); for (const r of rows) { byFile[r.file_path] ??= {}; byFile[r.file_path][r.mode] = r; // last-write-wins per mode per file allModes.add(r.mode); } const modesSorted = [...allModes].sort(); // Per-file matrix console.log("\n═══ PER-FILE COMPARISON ═══\n"); for (const file of Object.keys(byFile).sort()) { console.log(`📄 ${file}`); console.log( ` ${pad("mode", 28)} ${pad("resp", 6, true)} ${pad("findings", 8, true)} ${pad("path_cit", 8, true)} ${pad("ms", 7, true)} ${pad("mtx k/d", 9, true)} ${pad("bug_fp", 6, true)}` ); console.log(` ${"─".repeat(28)} ${"─".repeat(6)} ${"─".repeat(8)} ${"─".repeat(8)} ${"─".repeat(7)} ${"─".repeat(9)} ${"─".repeat(6)}`); for (const mode of modesSorted) { const r = byFile[file][mode]; if (!r) { console.log(` ${pad(mode, 28)} ${pad("—", 6, true)}`); continue; } const findings = countFindings(r.response); const cits = countPathwayCitations(r.response); const mk = r.sources.matrix_chunks_kept ?? 0; const md = r.sources.matrix_chunks_dropped ?? 0; const bf = r.sources.bug_fingerprints_count ?? 0; console.log( ` ${pad(mode, 28)} ${pad(r.response_chars, 6, true)} ${pad(findings, 8, true)} ${pad(cits, 8, true)} ${pad(r.latency_ms, 7, true)} ${pad(`${mk}/${mk + md}`, 9, true)} ${pad(bf, 6, true)}` ); } console.log(""); } // Per-mode averages console.log("═══ PER-MODE AGGREGATE ═══\n"); console.log(` ${pad("mode", 28)} ${pad("n", 4, true)} ${pad("avg resp", 9, true)} ${pad("avg find", 9, true)} ${pad("avg cit", 8, true)} ${pad("avg ms", 8, true)}`); console.log(` ${"─".repeat(28)} ${"─".repeat(4)} ${"─".repeat(9)} ${"─".repeat(9)} ${"─".repeat(8)} ${"─".repeat(8)}`); for (const mode of modesSorted) { const modeRows = rows.filter(r => r.mode === mode); if (modeRows.length === 0) continue; const n = modeRows.length; const avgResp = Math.round(modeRows.reduce((s, r) => s + r.response_chars, 0) / n); const avgFind = Math.round(10 * modeRows.reduce((s, r) => s + countFindings(r.response), 0) / n) / 10; const avgCit = Math.round(10 * modeRows.reduce((s, r) => s + countPathwayCitations(r.response), 0) / n) / 10; const avgMs = Math.round(modeRows.reduce((s, r) => s + r.latency_ms, 0) / n); console.log( ` ${pad(mode, 28)} ${pad(n, 4, true)} ${pad(avgResp, 9, true)} ${pad(avgFind, 9, true)} ${pad(avgCit, 8, true)} ${pad(avgMs, 8, true)}` ); } // Mode-relative: how often does each mode produce MORE findings than lakehouse? console.log("\n═══ MODE vs codereview_lakehouse (per file) ═══\n"); console.log(` ${pad("mode", 28)} ${pad("wins", 5, true)} ${pad("losses", 7, true)} ${pad("ties", 5, true)} ${pad("Δ avg findings", 16, true)}`); console.log(` ${"─".repeat(28)} ${"─".repeat(5)} ${"─".repeat(7)} ${"─".repeat(5)} ${"─".repeat(16)}`); for (const mode of modesSorted) { if (mode === "codereview_lakehouse") continue; let wins = 0, losses = 0, ties = 0, totalDelta = 0, n = 0; for (const file of Object.keys(byFile)) { const baseline = byFile[file]["codereview_lakehouse"]; const challenger = byFile[file][mode]; if (!baseline || !challenger) continue; const bf = countFindings(baseline.response); const cf = countFindings(challenger.response); if (cf > bf) wins++; else if (cf < bf) losses++; else ties++; totalDelta += cf - bf; n++; } if (n === 0) continue; const avgDelta = (totalDelta / n).toFixed(1); console.log( ` ${pad(mode, 28)} ${pad(wins, 5, true)} ${pad(losses, 7, true)} ${pad(ties, 5, true)} ${pad(avgDelta, 16, true)}` ); } console.log("\n[compare] done\n"); } main();