#!/usr/bin/env bun /** * Pass 5: variance test for the 2026-04-26 paid-model bake-off. * * The pass-4 single-rep sweep showed isolation beating every matrix * condition by 1.0-1.4 grounded findings/file on grok-4.1-fast. This * harness runs N reps × M conditions on the file where the effect was * sharpest (pathway_memory.rs, 1355 lines) so we can decide whether * the deltas are real signal or run-to-run noise. * * Conditions: * 1. codereview_isolation — no matrix * 2. codereview_lakehouse + corpus=lakehouse_arch_v1 — A only * 3. codereview_lakehouse + corpus=lakehouse_symbols_v1 — C only * 4. codereview_lakehouse (modes.toml default) — A+C composed * * Output appends per-call to data/_kb/mode_experiments.jsonl. Aggregate * with `bun run scripts/mode_compare.ts --since ` and read the * grounded column with multiple rows per (mode|corpus) key. * * Usage: * bun run scripts/mode_pass5_variance_paid.ts * LH_REPS=3 LH_FILE=crates/queryd/src/delta.rs bun run scripts/mode_pass5_variance_paid.ts */ const GATEWAY = process.env.LH_GATEWAY ?? "http://localhost:3100"; const MODEL = process.env.LH_MODEL ?? "x-ai/grok-4.1-fast"; const FILE = process.env.LH_FILE ?? "crates/vectord/src/pathway_memory.rs"; const REPS = Number(process.env.LH_REPS ?? 5); interface Condition { label: string; mode: string; corpus?: string | string[]; } const CONDITIONS: Condition[] = [ { label: "isolation ", mode: "codereview_isolation" }, { label: "arch_only ", mode: "codereview_lakehouse", corpus: "lakehouse_arch_v1" }, { label: "symbols_only ", mode: "codereview_lakehouse", corpus: "lakehouse_symbols_v1" }, { label: "composed (A+C) ", mode: "codereview_lakehouse" /* uses modes.toml default */ }, ]; async function runOne(c: Condition, rep: number): Promise<{ ok: boolean; latency_ms?: number; resp_chars?: number; error?: string }> { const body: any = { task_class: "scrum_review", file_path: FILE, force_mode: c.mode, force_model: MODEL, }; if (c.corpus !== undefined) body.force_matrix_corpus = c.corpus; try { const r = await fetch(`${GATEWAY}/v1/mode/execute`, { method: "POST", headers: { "content-type": "application/json" }, body: JSON.stringify(body), signal: AbortSignal.timeout(240_000), }); if (!r.ok) { const txt = await r.text().catch(() => ""); return { ok: false, error: `HTTP ${r.status}: ${txt.slice(0, 160)}` }; } const j: any = await r.json(); return { ok: true, latency_ms: j.latency_ms, resp_chars: (j.response ?? "").length }; } catch (e: any) { return { ok: false, error: e.message }; } } async function main() { const total = CONDITIONS.length * REPS; console.log(`[pass5] file=${FILE}`); console.log(`[pass5] model=${MODEL} · ${CONDITIONS.length} conditions × ${REPS} reps = ${total} runs`); console.log(""); let i = 0; const startTs = new Date().toISOString(); for (let rep = 1; rep <= REPS; rep++) { for (const c of CONDITIONS) { i++; process.stdout.write(` [${i}/${total}] rep=${rep} ${c.label}... `); const r = await runOne(c, rep); if (r.ok) { console.log(`✓ ${r.resp_chars} chars · ${((r.latency_ms ?? 0) / 1000).toFixed(1)}s`); } else { console.log(`✗ ${r.error}`); } } } console.log(`\n[pass5] complete · started ${startTs}`); console.log(`[pass5] aggregate: bun run scripts/mode_compare.ts --since ${startTs}`); } main().catch(e => { console.error(e); process.exit(1); });