lakehouse/scripts/mode_pass5_variance_paid.ts

#!/usr/bin/env bun
/**
 * Pass 5: variance test for the 2026-04-26 paid-model bake-off.
 *
 * The pass-4 single-rep sweep showed isolation beating every matrix
 * condition by 1.0-1.4 grounded findings/file on grok-4.1-fast. This
 * harness runs N reps × M conditions on the file where the effect was
 * sharpest (pathway_memory.rs, 1355 lines) so we can decide whether
 * the deltas are real signal or run-to-run noise.
 *
 * Conditions:
 *   1. codereview_isolation                                — no matrix
 *   2. codereview_lakehouse + corpus=lakehouse_arch_v1     — A only
 *   3. codereview_lakehouse + corpus=lakehouse_symbols_v1  — C only
 *   4. codereview_lakehouse (modes.toml default)           — A+C composed
 *
 * Output appends per-call to data/_kb/mode_experiments.jsonl. Aggregate
 * with `bun run scripts/mode_compare.ts --since <ts>` and read the
 * grounded column with multiple rows per (mode|corpus) key.
 *
 * Usage:
 *   bun run scripts/mode_pass5_variance_paid.ts
 *   LH_REPS=3 LH_FILE=crates/queryd/src/delta.rs bun run scripts/mode_pass5_variance_paid.ts
 */

const GATEWAY = process.env.LH_GATEWAY ?? "http://localhost:3100";
const MODEL = process.env.LH_MODEL ?? "x-ai/grok-4.1-fast";
const FILE = process.env.LH_FILE ?? "crates/vectord/src/pathway_memory.rs";
const REPS = Number(process.env.LH_REPS ?? 5);

interface Condition {
  label: string;
  mode: string;
  corpus?: string | string[];
}

const ALL_CONDITIONS: Condition[] = [
  { label: "isolation       ",  mode: "codereview_isolation"  },
  { label: "arch_only       ",  mode: "codereview_lakehouse",  corpus: "lakehouse_arch_v1" },
  { label: "symbols_only    ",  mode: "codereview_lakehouse",  corpus: "lakehouse_symbols_v1" },
  { label: "composed (A+C)  ",  mode: "codereview_lakehouse"  /* uses modes.toml default */ },
];

// Optional whitelist via env: LH_CONDITIONS=isolation,composed limits the
// run to a subset (matches against the trimmed `label`). Useful when only
// the head-to-head pair matters and saves ~50% latency on slow rungs.
const wantedLabels = (process.env.LH_CONDITIONS ?? "")
  .split(",").map(s => s.trim().toLowerCase()).filter(Boolean);
const CONDITIONS: Condition[] = wantedLabels.length === 0
  ? ALL_CONDITIONS
  : ALL_CONDITIONS.filter(c => wantedLabels.some(w => c.label.trim().toLowerCase().startsWith(w)));

async function runOne(c: Condition, rep: number): Promise<{ ok: boolean; latency_ms?: number; resp_chars?: number; error?: string }> {
  const body: any = {
    task_class: "scrum_review",
    file_path: FILE,
    force_mode: c.mode,
    force_model: MODEL,
  };
  if (c.corpus !== undefined) body.force_matrix_corpus = c.corpus;

  try {
    const r = await fetch(`${GATEWAY}/v1/mode/execute`, {
      method: "POST",
      headers: { "content-type": "application/json" },
      body: JSON.stringify(body),
      signal: AbortSignal.timeout(240_000),
    });
    if (!r.ok) {
      const txt = await r.text().catch(() => "");
      return { ok: false, error: `HTTP ${r.status}: ${txt.slice(0, 160)}` };
    }
    const j: any = await r.json();
    return { ok: true, latency_ms: j.latency_ms, resp_chars: (j.response ?? "").length };
  } catch (e: any) {
    return { ok: false, error: e.message };
  }
}

async function main() {
  const total = CONDITIONS.length * REPS;
  console.log(`[pass5] file=${FILE}`);
  console.log(`[pass5] model=${MODEL} · ${CONDITIONS.length} conditions × ${REPS} reps = ${total} runs`);
  console.log("");

  let i = 0;
  const startTs = new Date().toISOString();
  for (let rep = 1; rep <= REPS; rep++) {
    for (const c of CONDITIONS) {
      i++;
      process.stdout.write(`  [${i}/${total}] rep=${rep} ${c.label}... `);
      const r = await runOne(c, rep);
      if (r.ok) {
        console.log(`✓ ${r.resp_chars} chars · ${((r.latency_ms ?? 0) / 1000).toFixed(1)}s`);
      } else {
        console.log(`✗ ${r.error}`);
      }
    }
  }

  console.log(`\n[pass5] complete · started ${startTs}`);
  console.log(`[pass5] aggregate: bun run scripts/mode_compare.ts --since ${startTs}`);
}

main().catch(e => { console.error(e); process.exit(1); });