#!/usr/bin/env bun /** * Mode experiment harness — sweeps a set of files through every native * mode, calling /v1/mode/execute serially. Results land in the * mode_experiments.jsonl that the gateway already writes (the runner * appends per-call). This script just orchestrates the calls. * * Usage: * bun run scripts/mode_experiment.ts \ * --files crates/queryd/src/delta.rs,crates/queryd/src/service.rs \ * --modes codereview_lakehouse,codereview_null,codereview_isolation,codereview_matrix_only \ * --model openai/gpt-oss-120b:free * * Defaults: 5 modes × $LH_EXPERIMENT_FILES files (or 2 default targets) × * one model. Cloud-quota-resilient — uses OpenRouter free model unless * --model overrides. */ const GATEWAY = process.env.LH_GATEWAY ?? "http://localhost:3100"; const TASK_CLASS = process.env.LH_EXPERIMENT_TASK ?? "scrum_review"; const ALL_MODES = [ "codereview_lakehouse", "codereview_null", "codereview_isolation", "codereview_matrix_only", "codereview_playbook_only", ]; const DEFAULT_FILES = [ "crates/queryd/src/delta.rs", "crates/queryd/src/service.rs", ]; function parseArgs(): { files: string[]; modes: string[]; model: string; corpus: string[] } { const args = Bun.argv.slice(2); const out: Record = {}; for (let i = 0; i < args.length; i++) { const a = args[i]; if (a.startsWith("--")) out[a.slice(2)] = args[++i] ?? ""; } const files = (out.files ?? DEFAULT_FILES.join(",")).split(",").map(s => s.trim()).filter(Boolean); const modes = (out.modes ?? ALL_MODES.join(",")).split(",").map(s => s.trim()).filter(Boolean); // Default to the paid OpenRouter primary (matches scrum_master_pipeline // ladder rung 1). Pass `--model openai/gpt-oss-120b:free` if you want // the old free-tier baseline. See SCRUM_MASTER_SPEC.md for the ladder. const model = out.model ?? "x-ai/grok-4.1-fast"; const corpus = (out.corpus ?? "").split(",").map(s => s.trim()).filter(Boolean); return { files, modes, model, corpus }; } interface RunResult { file: string; mode: string; ok: boolean; latency_ms?: number; response_chars?: number; enriched_chars?: number; bug_fingerprints?: number; matrix_kept?: number; matrix_dropped?: number; error?: string; } async function runOne(file: string, mode: string, model: string, corpus: string[]): Promise { const t0 = Date.now(); try { const body: any = { task_class: TASK_CLASS, file_path: file, force_mode: mode, force_model: model, }; if (corpus.length === 1) body.force_matrix_corpus = corpus[0]; else if (corpus.length > 1) body.force_matrix_corpus = corpus; const r = await fetch(`${GATEWAY}/v1/mode/execute`, { method: "POST", headers: { "content-type": "application/json" }, body: JSON.stringify(body), signal: AbortSignal.timeout(240_000), }); if (!r.ok) { const body = await r.text().catch(() => ""); return { file, mode, ok: false, error: `HTTP ${r.status}: ${body.slice(0, 200)}` }; } const j: any = await r.json(); return { file, mode, ok: true, latency_ms: j.latency_ms, response_chars: (j.response ?? "").length, enriched_chars: j.enriched_prompt_chars, bug_fingerprints: j.sources?.bug_fingerprints_count, matrix_kept: j.sources?.matrix_chunks_kept, matrix_dropped: j.sources?.matrix_chunks_dropped, }; } catch (e: any) { return { file, mode, ok: false, error: e.message, latency_ms: Date.now() - t0 }; } } async function main() { const { files, modes, model, corpus } = parseArgs(); console.log(`[experiment] files=${files.length} × modes=${modes.length} = ${files.length * modes.length} runs`); console.log(`[experiment] model=${model} task=${TASK_CLASS} gateway=${GATEWAY}`); if (corpus.length > 0) console.log(`[experiment] corpus override: ${corpus.join(" + ")}`); console.log(""); const results: RunResult[] = []; let i = 0; for (const file of files) { for (const mode of modes) { i++; process.stdout.write(` [${i}/${files.length * modes.length}] ${mode.padEnd(28)} ${file} ... `); const r = await runOne(file, mode, model, corpus); results.push(r); if (r.ok) { console.log( `✓ ${(r.response_chars ?? 0).toString().padStart(5)} chars | ` + `prompt ${(r.enriched_chars ?? 0).toString().padStart(5)} chars | ` + `${((r.latency_ms ?? 0) / 1000).toFixed(1).padStart(5)}s | ` + `bug=${r.bug_fingerprints ?? "-"} mtx=${r.matrix_kept ?? 0}/${(r.matrix_kept ?? 0) + (r.matrix_dropped ?? 0)}` ); } else { console.log(`✗ ${r.error}`); } } } console.log(""); console.log(`[experiment] complete · ${results.filter(r => r.ok).length}/${results.length} succeeded`); console.log(`[experiment] full per-call detail in data/_kb/mode_experiments.jsonl`); console.log(`[experiment] aggregate with: bun run scripts/mode_compare.ts`); } main().catch(e => { console.error(e); process.exit(1); });