lakehouse/scripts/mode_pass2_corpus_sweep.ts
root 56bf30cfd8
Some checks failed
lakehouse/auditor 1 blocking issue: todo!() macro call in tests/real-world/scrum_master_pipeline.ts
v1/mode: override knobs + staffing native runner + pass 2/3/4 harnesses
Setup for the corpus-tightening experiment sweep (J 2026-04-26 — "now
is the only cheap window before the corpus gets large and refactoring
costs go up").

Override params on /v1/mode/execute (additive — old callers unaffected):
  force_matrix_corpus      — Pass 2: try alternate corpora per call
  force_relevance_threshold — Pass 2: sweep filter strictness
  force_temperature         — Pass 3: variance test

New native mode `staffing_inference_lakehouse` (Pass 4):
  - Same composer architecture as codereview_lakehouse
  - Staffing framing: coordinator producing fillable|contingent|
    unfillable verdict + ranked candidate list with playbook citations
  - matrix_corpus = workers_500k_v8
  - Validates that modes-as-prompt-molders generalizes beyond code
  - Framing explicitly says "do NOT fabricate workers" — the staffing
    analog of the lakehouse mode's symbol-grounding requirement

Three sweep harnesses:
  scripts/mode_pass2_corpus_sweep.ts — 4 corpora × 4 thresholds × 5 files
  scripts/mode_pass3_variance.ts     — 3 files × 3 temps × 5 reps
  scripts/mode_pass4_staffing.ts     — 5 fill requests through staffing mode

Each appends per-call rows to data/_kb/mode_experiments.jsonl which
mode_compare.ts already aggregates with grounding column.

Pass 1 (10 files × 5 modes broad sweep) currently running via the
existing scripts/mode_experiment.ts — gateway restart deferred until
it completes so the new override knobs aren't enabled mid-experiment.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-26 01:55:12 -05:00

122 lines
4.4 KiB
TypeScript
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env bun
/**
* Pass 2: matrix corpus + relevance threshold sweep.
*
* For each (corpus, threshold) combination, run codereview_matrix_only
* on the same N files. Compares which corpus actually adds grounded
* findings vs codereview_isolation (matrix-off baseline).
*
* Output: data/_kb/mode_experiments.jsonl gets one row per call,
* tagged via the force_matrix_corpus + force_relevance_threshold
* fields visible in `sources`. Aggregator can then group by corpus.
*
* Usage: bun run scripts/mode_pass2_corpus_sweep.ts
*/
const GATEWAY = process.env.LH_GATEWAY ?? "http://localhost:3100";
const MODEL = process.env.LH_MODEL ?? "openai/gpt-oss-120b:free";
const FILES = (process.env.LH_FILES ?? [
"crates/queryd/src/delta.rs",
"crates/queryd/src/service.rs",
"crates/vectord/src/pathway_memory.rs",
"crates/gateway/src/v1/mode.rs",
"crates/aibridge/src/client.rs",
].join(",")).split(",");
const CORPORA = (process.env.LH_CORPORA ?? [
"distilled_procedural_v20260423102847",
"distilled_factual_v20260423095819",
"distilled_config_hint_v20260423102847",
"kb_team_runs_v1",
].join(",")).split(",");
const THRESHOLDS = (process.env.LH_THRESHOLDS ?? "0.2,0.3,0.4,0.5").split(",").map(Number);
interface Result {
corpus: string;
threshold: number;
file: string;
ok: boolean;
matrix_kept?: number;
matrix_dropped?: number;
response_chars?: number;
latency_ms?: number;
error?: string;
}
async function runOne(corpus: string, threshold: number, file: string): Promise<Result> {
try {
const r = await fetch(`${GATEWAY}/v1/mode/execute`, {
method: "POST",
headers: { "content-type": "application/json" },
body: JSON.stringify({
task_class: "scrum_review",
file_path: file,
force_mode: "codereview_matrix_only",
force_model: MODEL,
force_matrix_corpus: corpus,
force_relevance_threshold: threshold,
}),
signal: AbortSignal.timeout(180_000),
});
if (!r.ok) {
const body = await r.text().catch(() => "");
return { corpus, threshold, file, ok: false, error: `HTTP ${r.status}: ${body.slice(0, 150)}` };
}
const j: any = await r.json();
return {
corpus, threshold, file, ok: true,
matrix_kept: j.sources?.matrix_chunks_kept,
matrix_dropped: j.sources?.matrix_chunks_dropped,
response_chars: (j.response ?? "").length,
latency_ms: j.latency_ms,
};
} catch (e: any) {
return { corpus, threshold, file, ok: false, error: e.message };
}
}
async function main() {
const total = CORPORA.length * THRESHOLDS.length * FILES.length;
console.log(`[pass2] corpora=${CORPORA.length} × thresholds=${THRESHOLDS.length} × files=${FILES.length} = ${total} runs`);
console.log(`[pass2] model=${MODEL}\n`);
let i = 0;
const results: Result[] = [];
for (const corpus of CORPORA) {
for (const threshold of THRESHOLDS) {
for (const file of FILES) {
i++;
process.stdout.write(` [${i}/${total}] corpus=${corpus.slice(0, 30).padEnd(30)} thr=${threshold.toFixed(1)} ${file.slice(-32).padStart(32)} ... `);
const r = await runOne(corpus, threshold, file);
results.push(r);
if (r.ok) {
const total_chunks = (r.matrix_kept ?? 0) + (r.matrix_dropped ?? 0);
console.log(`✓ k=${r.matrix_kept}/${total_chunks} resp=${r.response_chars} ${((r.latency_ms ?? 0) / 1000).toFixed(1)}s`);
} else {
console.log(`${r.error}`);
}
}
}
}
console.log(`\n[pass2] complete · ${results.filter(r => r.ok).length}/${results.length} succeeded`);
// Per-corpus×threshold roll-up of kept-rate (the matrix usefulness proxy).
console.log(`\n[pass2] kept-rate by corpus × threshold (avg chunks kept per call):`);
console.log(` ${"corpus".padEnd(40)} ${THRESHOLDS.map(t => `thr=${t.toFixed(1)}`).join(" ").padStart(35)}`);
for (const corpus of CORPORA) {
const cells = THRESHOLDS.map(t => {
const matched = results.filter(r => r.ok && r.corpus === corpus && r.threshold === t);
if (matched.length === 0) return " — ";
const avgKept = matched.reduce((s, r) => s + (r.matrix_kept ?? 0), 0) / matched.length;
return avgKept.toFixed(1).padStart(5);
}).join(" ");
console.log(` ${corpus.slice(0, 40).padEnd(40)} ${cells}`);
}
console.log(`\n[pass2] aggregate findings/groundedness with: bun run scripts/mode_compare.ts`);
}
main().catch(e => { console.error(e); process.exit(1); });