lakehouse/scripts/mode_experiment.ts

#!/usr/bin/env bun
/**
 * Mode experiment harness — sweeps a set of files through every native
 * mode, calling /v1/mode/execute serially. Results land in the
 * mode_experiments.jsonl that the gateway already writes (the runner
 * appends per-call). This script just orchestrates the calls.
 *
 * Usage:
 *   bun run scripts/mode_experiment.ts \
 *     --files crates/queryd/src/delta.rs,crates/queryd/src/service.rs \
 *     --modes codereview_lakehouse,codereview_null,codereview_isolation,codereview_matrix_only \
 *     --model openai/gpt-oss-120b:free
 *
 * Defaults: 5 modes × $LH_EXPERIMENT_FILES files (or 2 default targets) ×
 * one model. Cloud-quota-resilient — uses OpenRouter free model unless
 * --model overrides.
 */

const GATEWAY = process.env.LH_GATEWAY ?? "http://localhost:3100";
const TASK_CLASS = process.env.LH_EXPERIMENT_TASK ?? "scrum_review";

const ALL_MODES = [
  "codereview_lakehouse",
  "codereview_null",
  "codereview_isolation",
  "codereview_matrix_only",
  "codereview_playbook_only",
];

const DEFAULT_FILES = [
  "crates/queryd/src/delta.rs",
  "crates/queryd/src/service.rs",
];

function parseArgs(): { files: string[]; modes: string[]; model: string; corpus: string[] } {
  const args = Bun.argv.slice(2);
  const out: Record<string, string> = {};
  for (let i = 0; i < args.length; i++) {
    const a = args[i];
    if (a.startsWith("--")) out[a.slice(2)] = args[++i] ?? "";
  }
  const files = (out.files ?? DEFAULT_FILES.join(",")).split(",").map(s => s.trim()).filter(Boolean);
  const modes = (out.modes ?? ALL_MODES.join(",")).split(",").map(s => s.trim()).filter(Boolean);
  // Default to the paid OpenRouter primary (matches scrum_master_pipeline
  // ladder rung 1). Pass `--model openai/gpt-oss-120b:free` if you want
  // the old free-tier baseline. See SCRUM_MASTER_SPEC.md for the ladder.
  const model = out.model ?? "x-ai/grok-4.1-fast";
  const corpus = (out.corpus ?? "").split(",").map(s => s.trim()).filter(Boolean);
  return { files, modes, model, corpus };
}

interface RunResult {
  file: string;
  mode: string;
  ok: boolean;
  latency_ms?: number;
  response_chars?: number;
  enriched_chars?: number;
  bug_fingerprints?: number;
  matrix_kept?: number;
  matrix_dropped?: number;
  error?: string;
}

async function runOne(file: string, mode: string, model: string, corpus: string[]): Promise<RunResult> {
  const t0 = Date.now();
  try {
    const body: any = {
      task_class: TASK_CLASS,
      file_path: file,
      force_mode: mode,
      force_model: model,
    };
    if (corpus.length === 1) body.force_matrix_corpus = corpus[0];
    else if (corpus.length > 1) body.force_matrix_corpus = corpus;
    const r = await fetch(`${GATEWAY}/v1/mode/execute`, {
      method: "POST",
      headers: { "content-type": "application/json" },
      body: JSON.stringify(body),
      signal: AbortSignal.timeout(240_000),
    });
    if (!r.ok) {
      const body = await r.text().catch(() => "");
      return { file, mode, ok: false, error: `HTTP ${r.status}: ${body.slice(0, 200)}` };
    }
    const j: any = await r.json();
    return {
      file, mode, ok: true,
      latency_ms: j.latency_ms,
      response_chars: (j.response ?? "").length,
      enriched_chars: j.enriched_prompt_chars,
      bug_fingerprints: j.sources?.bug_fingerprints_count,
      matrix_kept: j.sources?.matrix_chunks_kept,
      matrix_dropped: j.sources?.matrix_chunks_dropped,
    };
  } catch (e: any) {
    return { file, mode, ok: false, error: e.message, latency_ms: Date.now() - t0 };
  }
}

async function main() {
  const { files, modes, model, corpus } = parseArgs();
  console.log(`[experiment] files=${files.length} × modes=${modes.length} = ${files.length * modes.length} runs`);
  console.log(`[experiment] model=${model} task=${TASK_CLASS} gateway=${GATEWAY}`);
  if (corpus.length > 0) console.log(`[experiment] corpus override: ${corpus.join(" + ")}`);
  console.log("");

  const results: RunResult[] = [];
  let i = 0;
  for (const file of files) {
    for (const mode of modes) {
      i++;
      process.stdout.write(`  [${i}/${files.length * modes.length}] ${mode.padEnd(28)} ${file}  ... `);
      const r = await runOne(file, mode, model, corpus);
      results.push(r);
      if (r.ok) {
        console.log(
          `✓ ${(r.response_chars ?? 0).toString().padStart(5)} chars | ` +
          `prompt ${(r.enriched_chars ?? 0).toString().padStart(5)} chars | ` +
          `${((r.latency_ms ?? 0) / 1000).toFixed(1).padStart(5)}s | ` +
          `bug=${r.bug_fingerprints ?? "-"} mtx=${r.matrix_kept ?? 0}/${(r.matrix_kept ?? 0) + (r.matrix_dropped ?? 0)}`
        );
      } else {
        console.log(`✗ ${r.error}`);
      }
    }
  }

  console.log("");
  console.log(`[experiment] complete · ${results.filter(r => r.ok).length}/${results.length} succeeded`);
  console.log(`[experiment] full per-call detail in data/_kb/mode_experiments.jsonl`);
  console.log(`[experiment] aggregate with: bun run scripts/mode_compare.ts`);
}

main().catch(e => { console.error(e); process.exit(1); });