remove 7 more orphaned experimental scripts from scripts/

Continuing the test-code-in-main cleanup. These are sequential mode-runner experiment passes (2/3/4/5) that completed and whose findings were captured in pathway_memory + the matrix index — the scripts themselves are dead weight. Plus two one-off probe scripts. Removed (all 0 refs in production code or automation): - mode_pass2_corpus_sweep.ts — 2026-04 corpus sweep experiment - mode_pass3_variance.ts — variance measurement run - mode_pass4_staffing.ts — staffing-domain pass - mode_pass5_summarize.ts — summarization variance - mode_pass5_variance_paid.ts — paid-model variance - overnight_proof.sh — overnight stress probe (output in logs/) - ab_t3_test.sh — T3 overseer A/B test (output captured in KB) Verified: 0 references in package.json / justfile / Makefile / any active .ts/.rs/.sh file. Two mentions remain in docs/recon and docs/MODE_RUNNER_ TUNING_PLAN — those are historical design-doc references, not consumers. KEPT in scripts/ (have live consumers OR are runtime tools): - mode_experiment.ts (14 refs), mode_compare.ts (7 refs) - lance_smoke.sh, build_*_corpus.ts, staffing_demo.py, lance_tune.py, generate_demo.py, generate_workers.py, copilot.py, kb_measure.py, kb_staffer_report.py, analyze_chicago_contracts.ts, dump_raw_corpus.sh, check_phase44_callers.sh, autonomous_agent.py, build_answers_corpus.ts, build_lakehouse_corpus.ts, build_scrum_findings_corpus.ts, build_symbols_corpus.ts, e2e_pipeline_check.sh, scale_test.py, scale_10m_test.sh, run_staffer_demo.sh, stress_test.py Build clean. If any of these are needed back: git show HEAD~1 -- scripts/<file> Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-03 02:06:29 -05:00 · 2026-05-03 02:06:29 -05:00 · f4ebd2278b
commit f4ebd2278b
parent 6aafd41785
7 changed files with 0 additions and 1166 deletions
--- a/scripts/ab_t3_test.sh
+++ b/scripts/ab_t3_test.sh
@ -1,77 +0,0 @@
-#!/usr/bin/env bash
-# A/B test of T3 overseer: does it actually make subsequent runs better?
-# Chains Run B (T3 seed) → Run C (T3 + read-back) → Run D (T3 cloud).
-# Run A is assumed already complete (launched separately). Aggregates
-# metrics at the end into ab_scorecard.json.
-
-set -e
-cd "$(dirname "$0")/.."
-
-export OLLAMA_CLOUD_KEY="$(python3 -c "import json; print(json.load(open('/root/llm_team_config.json'))['providers']['ollama_cloud']['api_key'])")"
-
-echo "▶ A/B test start at $(date -Iseconds)"
-echo "▶ prior lessons dir: $(ls data/_playbook_lessons 2>/dev/null | wc -l) files"
-
-# Run B — T3 enabled local, no prior lessons should exist yet
-echo "──── RUN B: T3 local, seeds first lesson ────"
-bun tests/multi-agent/scenario.ts > /tmp/lakehouse_ab_B.log 2>&1 || true
-echo "  B exit=$?"
-ls data/_playbook_lessons/*.json 2>/dev/null | head -5
-
-# Run C — T3 enabled local, B's lesson should load
-echo "──── RUN C: T3 local, reads B's lesson ────"
-bun tests/multi-agent/scenario.ts > /tmp/lakehouse_ab_C.log 2>&1 || true
-echo "  C exit=$?"
-
-# Run D — T3 enabled CLOUD (gpt-oss:120b), reads B+C lessons
-echo "──── RUN D: T3 cloud, reads B+C lessons ────"
-LH_OVERVIEW_CLOUD=1 bun tests/multi-agent/scenario.ts > /tmp/lakehouse_ab_D.log 2>&1 || true
-echo "  D exit=$?"
-
-echo "▶ all runs done at $(date -Iseconds)"
-echo "▶ scorecard:"
-ls -1dt tests/multi-agent/playbooks/scenario-* | head -4 | tac | python3 -c "
-import sys, os, json
-
-runs = [l.strip() for l in sys.stdin if l.strip()]
-labels = ['A(no-T3)','B(T3-seed)','C(T3-read)','D(T3-cloud)']
-# Prepend Run A: most recent BEFORE the ab_t3_test kicked off is Run A
-# (launched separately). But we only picked up the most recent 4 runs.
-# Actually: ab_t3_test runs B/C/D, so recent 3 = B,C,D. Run A is the one
-# BEFORE those — find it separately.
-# Reread to include Run A:
-import subprocess
-all_runs = subprocess.check_output(['bash','-c','ls -1dt tests/multi-agent/playbooks/scenario-* | head -8']).decode().strip().split('\n')
-# The 4 most recent are D, C, B, A (reverse chronological).
-top4 = list(reversed(all_runs[:4]))  # oldest first → A,B,C,D
-rows = []
-for i, path in enumerate(top4):
-    try:
-        results = json.load(open(os.path.join(path, 'results.json')))
-    except FileNotFoundError:
-        continue
-    ok = sum(1 for r in results if r.get('ok'))
-    turns = sum(r.get('turns', 0) for r in results)
-    gaps = sum(len(r.get('gap_signals', [])) for r in results)
-    cites = sum(len(r.get('playbook_citations') or []) for r in results)
-    prior = []
-    try:
-        prior = json.load(open(os.path.join(path, 'prior_lessons.json')))
-    except FileNotFoundError:
-        pass
-    rows.append({
-        'label': labels[i] if i < len(labels) else f'run{i}',
-        'path': path,
-        'ok_events': ok,
-        'total_events': len(results),
-        'total_turns': turns,
-        'total_gaps': gaps,
-        'total_citations': cites,
-        'prior_lessons_loaded': len(prior),
-    })
-
-scorecard = {'generated_at': __import__('datetime').datetime.utcnow().isoformat()+'Z', 'runs': rows}
-open('tests/multi-agent/playbooks/ab_scorecard.json','w').write(json.dumps(scorecard, indent=2))
-print(json.dumps(scorecard, indent=2))
-"
-echo "▶ saved: tests/multi-agent/playbooks/ab_scorecard.json"
--- a/scripts/mode_pass2_corpus_sweep.ts
+++ b/scripts/mode_pass2_corpus_sweep.ts
@ -1,121 +0,0 @@
-#!/usr/bin/env bun
-/**
- * Pass 2: matrix corpus + relevance threshold sweep.
- *
- * For each (corpus, threshold) combination, run codereview_matrix_only
- * on the same N files. Compares which corpus actually adds grounded
- * findings vs codereview_isolation (matrix-off baseline).
- *
- * Output: data/_kb/mode_experiments.jsonl gets one row per call,
- * tagged via the force_matrix_corpus + force_relevance_threshold
- * fields visible in `sources`. Aggregator can then group by corpus.
- *
- * Usage: bun run scripts/mode_pass2_corpus_sweep.ts
- */
-
-const GATEWAY = process.env.LH_GATEWAY ?? "http://localhost:3100";
-const MODEL = process.env.LH_MODEL ?? "openai/gpt-oss-120b:free";
-
-const FILES = (process.env.LH_FILES ?? [
-  "crates/queryd/src/delta.rs",
-  "crates/queryd/src/service.rs",
-  "crates/vectord/src/pathway_memory.rs",
-  "crates/gateway/src/v1/mode.rs",
-  "crates/aibridge/src/client.rs",
-].join(",")).split(",");
-
-const CORPORA = (process.env.LH_CORPORA ?? [
-  "distilled_procedural_v20260423102847",
-  "distilled_factual_v20260423095819",
-  "distilled_config_hint_v20260423102847",
-  "kb_team_runs_v1",
-].join(",")).split(",");
-
-const THRESHOLDS = (process.env.LH_THRESHOLDS ?? "0.2,0.3,0.4,0.5").split(",").map(Number);
-
-interface Result {
-  corpus: string;
-  threshold: number;
-  file: string;
-  ok: boolean;
-  matrix_kept?: number;
-  matrix_dropped?: number;
-  response_chars?: number;
-  latency_ms?: number;
-  error?: string;
-}
-
-async function runOne(corpus: string, threshold: number, file: string): Promise<Result> {
-  try {
-    const r = await fetch(`${GATEWAY}/v1/mode/execute`, {
-      method: "POST",
-      headers: { "content-type": "application/json" },
-      body: JSON.stringify({
-        task_class: "scrum_review",
-        file_path: file,
-        force_mode: "codereview_matrix_only",
-        force_model: MODEL,
-        force_matrix_corpus: corpus,
-        force_relevance_threshold: threshold,
-      }),
-      signal: AbortSignal.timeout(180_000),
-    });
-    if (!r.ok) {
-      const body = await r.text().catch(() => "");
-      return { corpus, threshold, file, ok: false, error: `HTTP ${r.status}: ${body.slice(0, 150)}` };
-    }
-    const j: any = await r.json();
-    return {
-      corpus, threshold, file, ok: true,
-      matrix_kept: j.sources?.matrix_chunks_kept,
-      matrix_dropped: j.sources?.matrix_chunks_dropped,
-      response_chars: (j.response ?? "").length,
-      latency_ms: j.latency_ms,
-    };
-  } catch (e: any) {
-    return { corpus, threshold, file, ok: false, error: e.message };
-  }
-}
-
-async function main() {
-  const total = CORPORA.length * THRESHOLDS.length * FILES.length;
-  console.log(`[pass2] corpora=${CORPORA.length} × thresholds=${THRESHOLDS.length} × files=${FILES.length} = ${total} runs`);
-  console.log(`[pass2] model=${MODEL}\n`);
-  let i = 0;
-  const results: Result[] = [];
-  for (const corpus of CORPORA) {
-    for (const threshold of THRESHOLDS) {
-      for (const file of FILES) {
-        i++;
-        process.stdout.write(`  [${i}/${total}] corpus=${corpus.slice(0, 30).padEnd(30)} thr=${threshold.toFixed(1)} ${file.slice(-32).padStart(32)} ... `);
-        const r = await runOne(corpus, threshold, file);
-        results.push(r);
-        if (r.ok) {
-          const total_chunks = (r.matrix_kept ?? 0) + (r.matrix_dropped ?? 0);
-          console.log(`✓ k=${r.matrix_kept}/${total_chunks} resp=${r.response_chars} ${((r.latency_ms ?? 0) / 1000).toFixed(1)}s`);
-        } else {
-          console.log(`✗ ${r.error}`);
-        }
-      }
-    }
-  }
-
-  console.log(`\n[pass2] complete · ${results.filter(r => r.ok).length}/${results.length} succeeded`);
-
-  // Per-corpus×threshold roll-up of kept-rate (the matrix usefulness proxy).
-  console.log(`\n[pass2] kept-rate by corpus × threshold (avg chunks kept per call):`);
-  console.log(`  ${"corpus".padEnd(40)} ${THRESHOLDS.map(t => `thr=${t.toFixed(1)}`).join(" ").padStart(35)}`);
-  for (const corpus of CORPORA) {
-    const cells = THRESHOLDS.map(t => {
-      const matched = results.filter(r => r.ok && r.corpus === corpus && r.threshold === t);
-      if (matched.length === 0) return "  —  ";
-      const avgKept = matched.reduce((s, r) => s + (r.matrix_kept ?? 0), 0) / matched.length;
-      return avgKept.toFixed(1).padStart(5);
-    }).join(" ");
-    console.log(`  ${corpus.slice(0, 40).padEnd(40)} ${cells}`);
-  }
-
-  console.log(`\n[pass2] aggregate findings/groundedness with: bun run scripts/mode_compare.ts`);
-}
-
-main().catch(e => { console.error(e); process.exit(1); });
--- a/scripts/mode_pass3_variance.ts
+++ b/scripts/mode_pass3_variance.ts
@ -1,109 +0,0 @@
-#!/usr/bin/env bun
-/**
- * Pass 3: variance test.
- *
- * Runs codereview_lakehouse on the SAME file N times at each of M
- * temperatures. Measures run-to-run stability of grounded finding
- * count, response size, and latency. Anything <100% groundedness
- * is a leak; track which symbols got hallucinated.
- *
- * Output appends to data/_kb/mode_experiments.jsonl. The aggregator
- * can group by ts and identify variance buckets.
- *
- * Usage: bun run scripts/mode_pass3_variance.ts
- */
-
-const GATEWAY = process.env.LH_GATEWAY ?? "http://localhost:3100";
-const MODEL = process.env.LH_MODEL ?? "openai/gpt-oss-120b:free";
-
-const FILES = (process.env.LH_FILES ?? [
-  "crates/queryd/src/delta.rs",
-  "crates/vectord/src/pathway_memory.rs",
-  "crates/gateway/src/v1/mode.rs",
-].join(",")).split(",");
-
-const TEMPS = (process.env.LH_TEMPS ?? "0.0,0.1,0.3").split(",").map(Number);
-const REPS = Number(process.env.LH_REPS ?? 5);
-
-interface Result {
-  file: string;
-  temp: number;
-  rep: number;
-  ok: boolean;
-  response_chars?: number;
-  latency_ms?: number;
-  error?: string;
-}
-
-async function runOne(file: string, temp: number, rep: number): Promise<Result> {
-  try {
-    const r = await fetch(`${GATEWAY}/v1/mode/execute`, {
-      method: "POST",
-      headers: { "content-type": "application/json" },
-      body: JSON.stringify({
-        task_class: "scrum_review",
-        file_path: file,
-        force_mode: "codereview_lakehouse",
-        force_model: MODEL,
-        force_temperature: temp,
-      }),
-      signal: AbortSignal.timeout(180_000),
-    });
-    if (!r.ok) {
-      const body = await r.text().catch(() => "");
-      return { file, temp, rep, ok: false, error: `HTTP ${r.status}: ${body.slice(0, 150)}` };
-    }
-    const j: any = await r.json();
-    return {
-      file, temp, rep, ok: true,
-      response_chars: (j.response ?? "").length,
-      latency_ms: j.latency_ms,
-    };
-  } catch (e: any) {
-    return { file, temp, rep, ok: false, error: e.message };
-  }
-}
-
-async function main() {
-  const total = FILES.length * TEMPS.length * REPS;
-  console.log(`[pass3] files=${FILES.length} × temps=${TEMPS.length} × reps=${REPS} = ${total} runs`);
-  console.log(`[pass3] model=${MODEL}\n`);
-  let i = 0;
-  const results: Result[] = [];
-  for (const file of FILES) {
-    for (const temp of TEMPS) {
-      for (let rep = 1; rep <= REPS; rep++) {
-        i++;
-        process.stdout.write(`  [${i}/${total}] temp=${temp.toFixed(1)} rep=${rep}/${REPS} ${file.slice(-32).padStart(32)} ... `);
-        const r = await runOne(file, temp, rep);
-        results.push(r);
-        if (r.ok) {
-          console.log(`✓ resp=${r.response_chars} ${((r.latency_ms ?? 0) / 1000).toFixed(1)}s`);
-        } else {
-          console.log(`✗ ${r.error}`);
-        }
-      }
-    }
-  }
-
-  console.log(`\n[pass3] complete · ${results.filter(r => r.ok).length}/${results.length} succeeded`);
-
-  // Per-file × temp variance summary (response_chars stddev as a quick
-  // proxy for output instability).
-  console.log(`\n[pass3] response_chars variance (mean ± stddev) by file × temp:`);
-  console.log(`  ${"file".padEnd(40)} ${TEMPS.map(t => `temp=${t.toFixed(1)}`.padStart(20)).join(" ")}`);
-  for (const file of FILES) {
-    const cells = TEMPS.map(t => {
-      const xs = results.filter(r => r.ok && r.file === file && r.temp === t).map(r => r.response_chars ?? 0);
-      if (xs.length === 0) return "          —          ";
-      const mean = xs.reduce((s, x) => s + x, 0) / xs.length;
-      const sd = Math.sqrt(xs.reduce((s, x) => s + Math.pow(x - mean, 2), 0) / xs.length);
-      return `${Math.round(mean).toString().padStart(7)} ± ${Math.round(sd).toString().padEnd(6)}`.padStart(20);
-    }).join(" ");
-    console.log(`  ${file.slice(0, 40).padEnd(40)} ${cells}`);
-  }
-
-  console.log(`\n[pass3] grounding variance via: bun run scripts/mode_compare.ts (look for grounded-N column drift)`);
-}
-
-main().catch(e => { console.error(e); process.exit(1); });
--- a/scripts/mode_pass4_staffing.ts
+++ b/scripts/mode_pass4_staffing.ts
@ -1,127 +0,0 @@
-#!/usr/bin/env bun
-/**
- * Pass 4: staffing_inference_lakehouse cross-domain validation.
- *
- * Runs the staffing-domain mode against synthetic fill requests.
- * Validates that the modes-as-prompt-molders architecture generalizes
- * beyond code review — the composer pattern (file_content + bug
- * fingerprints + relevance-filtered matrix + domain framing) should
- * produce grounded staffing recommendations the same way it produces
- * grounded code reviews.
- *
- * Each fill request is posted as `file_content` (since the runner's
- * shape expects file content; for staffing it's the request payload).
- * file_path is set to a synthetic path under requests/ so pathway
- * memory bucketing groups requests by geo+role.
- *
- * Usage: bun run scripts/mode_pass4_staffing.ts
- */
-
-const GATEWAY = process.env.LH_GATEWAY ?? "http://localhost:3100";
-const MODEL = process.env.LH_MODEL ?? "openai/gpt-oss-120b:free";
-
-interface FillRequest {
-  city: string;
-  state: string;
-  role: string;
-  count: number;
-  deadline: string;
-  notes?: string;
-}
-
-const REQUESTS: FillRequest[] = [
-  { city: "Toledo", state: "OH", role: "Welder", count: 2, deadline: "2026-04-29", notes: "OSHA 10 required" },
-  { city: "Nashville", state: "TN", role: "Forklift Operator", count: 3, deadline: "2026-05-01" },
-  { city: "Chicago", state: "IL", role: "Assembler", count: 5, deadline: "2026-04-30", notes: "second shift" },
-  { city: "South Bend", state: "IN", role: "Electrician", count: 1, deadline: "2026-04-28", notes: "journeyman license" },
-  { city: "Murfreesboro", state: "TN", role: "Packaging Operator", count: 4, deadline: "2026-05-02" },
-];
-
-function requestToPayload(req: FillRequest): string {
-  return [
-    `# Fill Request`,
-    `Role: ${req.role} × ${req.count}`,
-    `Location: ${req.city}, ${req.state}`,
-    `Deadline: ${req.deadline}`,
-    req.notes ? `Notes: ${req.notes}` : "",
-    "",
-    "Recommend candidates from the matrix data. Cite playbook references.",
-  ].filter(Boolean).join("\n");
-}
-
-interface Result {
-  req: FillRequest;
-  ok: boolean;
-  response_chars?: number;
-  bug_fingerprints?: number;
-  matrix_kept?: number;
-  matrix_dropped?: number;
-  latency_ms?: number;
-  error?: string;
-  preview?: string;
-}
-
-async function runOne(req: FillRequest): Promise<Result> {
-  const payload = requestToPayload(req);
-  const file_path = `requests/${req.role.toLowerCase().replace(/\s+/g, "_")}_${req.city.toLowerCase().replace(/\s+/g, "_")}_${req.state}.md`;
-  try {
-    const r = await fetch(`${GATEWAY}/v1/mode/execute`, {
-      method: "POST",
-      headers: { "content-type": "application/json" },
-      body: JSON.stringify({
-        task_class: "staffing_inference",
-        file_path,
-        file_content: payload,
-        force_mode: "staffing_inference_lakehouse",
-        force_model: MODEL,
-      }),
-      signal: AbortSignal.timeout(180_000),
-    });
-    if (!r.ok) {
-      const body = await r.text().catch(() => "");
-      return { req, ok: false, error: `HTTP ${r.status}: ${body.slice(0, 200)}` };
-    }
-    const j: any = await r.json();
-    return {
-      req, ok: true,
-      response_chars: (j.response ?? "").length,
-      bug_fingerprints: j.sources?.bug_fingerprints_count,
-      matrix_kept: j.sources?.matrix_chunks_kept,
-      matrix_dropped: j.sources?.matrix_chunks_dropped,
-      latency_ms: j.latency_ms,
-      preview: (j.response ?? "").slice(0, 400),
-    };
-  } catch (e: any) {
-    return { req, ok: false, error: e.message };
-  }
-}
-
-async function main() {
-  console.log(`[pass4] requests=${REQUESTS.length} model=${MODEL} mode=staffing_inference_lakehouse\n`);
-  let i = 0;
-  const results: Result[] = [];
-  for (const req of REQUESTS) {
-    i++;
-    process.stdout.write(`  [${i}/${REQUESTS.length}] ${req.role.padEnd(22)} × ${req.count} in ${req.city}, ${req.state} ... `);
-    const r = await runOne(req);
-    results.push(r);
-    if (r.ok) {
-      console.log(`✓ resp=${r.response_chars} bug=${r.bug_fingerprints ?? 0} mtx=${r.matrix_kept ?? 0}/${(r.matrix_kept ?? 0) + (r.matrix_dropped ?? 0)} ${((r.latency_ms ?? 0) / 1000).toFixed(1)}s`);
-    } else {
-      console.log(`✗ ${r.error}`);
-    }
-  }
-
-  console.log(`\n[pass4] complete · ${results.filter(r => r.ok).length}/${results.length} succeeded\n`);
-
-  // Show first successful response head to verify the framing actually
-  // produced staffing-style output (verdict + ranked candidates) not
-  // generic prose.
-  const first = results.find(r => r.ok && r.preview);
-  if (first) {
-    console.log(`[pass4] first successful response preview (${first.req.city} ${first.req.role}):`);
-    console.log(first.preview!.split("\n").map(l => "  | " + l).join("\n"));
-  }
-}
-
-main().catch(e => { console.error(e); process.exit(1); });
--- a/scripts/mode_pass5_summarize.ts
+++ b/scripts/mode_pass5_summarize.ts
@ -1,169 +0,0 @@
-#!/usr/bin/env bun
-/**
- * Pass 5 variance summarizer. Reads data/_kb/mode_experiments.jsonl
- * since a timestamp, groups by (mode|corpus), reports mean ± stddev
- * of grounded finding count, plus a head-to-head wins/losses table
- * vs the isolation baseline.
- *
- * Usage:
- *   bun run scripts/mode_pass5_summarize.ts                        # default 2h
- *   bun run scripts/mode_pass5_summarize.ts --since 2026-04-26T22  # explicit
- */
-
-import { readFileSync, existsSync } from "node:fs";
-
-const argSince = (() => {
-  const i = Bun.argv.indexOf("--since");
-  return i >= 0 ? Bun.argv[i + 1] : new Date(Date.now() - 2 * 60 * 60 * 1000).toISOString();
-})();
-
-const JSONL = "data/_kb/mode_experiments.jsonl";
-if (!existsSync(JSONL)) { console.error(`no ${JSONL}`); process.exit(1); }
-
-interface Row {
-  ts: string; mode: string; file_path: string; response: string;
-  sources: { matrix_corpus?: string | string[] | null };
-  latency_ms: number;
-}
-
-function corpusKey(c: any): string {
-  if (!c) return "";
-  if (typeof c === "string") return c;
-  if (Array.isArray(c)) return c.length === 0 ? "" : [...c].sort().join("+");
-  return "";
-}
-const condKey = (r: Row) => {
-  const c = corpusKey(r.sources?.matrix_corpus);
-  return c ? `${r.mode}|${c}` : r.mode;
-};
-
-// Reuse the same grounding logic as mode_compare — symbols cited in
-// findings rows must appear in the focus file, and any line numbers
-// must fall within EOF.
-function extractFindings(md: string): { symbols: string[]; lines: number[] }[] {
-  const sec = /(?:^|\n)#{1,3}[^\na-zA-Z]*(?:Ranked\s+)?Findings?[^\n]*\n/i;
-  const m = md.match(sec);
-  let section = md;
-  if (m && m.index !== undefined) {
-    const after = md.slice(m.index + m[0].length);
-    const stop = after.search(/\n#{1,3}[^\na-zA-Z]*(?:Patch|Suggestion|Reference|Summary|Concrete)/i);
-    section = stop >= 0 ? after.slice(0, stop) : after;
-  }
-  // Three row shapes:
-  //   1) numbered:        `| 1 | ... |`
-  //   2) path-with-line:  `| service.rs:106 | ... |`
-  //   3) path-with-sym:   `| crates/vectord/src/pathway_memory.rs:load_fn (≈L220) | ... |`
-  // Pick whichever shape matches the most rows (ties favor numbered).
-  const numbered = section.split("\n").filter(l => /^\|\s*\*?\*?\d+\*?\*?\s*\|/.test(l));
-  const pathRows = section.split("\n").filter(l => /^\|\s*[a-z_/\.][a-z_/\.0-9]*\.(rs|ts|py)\b/i.test(l));
-  const rows = numbered.length >= pathRows.length ? numbered : pathRows;
-  return rows.map(row => {
-    const sym = new Set<string>();
-    for (const t of row.matchAll(/`([A-Za-z_][A-Za-z0-9_:]*)`/g)) sym.add(t[1]);
-    for (const t of row.matchAll(/\b([a-z][a-z0-9_]{4,})\b/g)) sym.add(t[1]);
-    const lines: number[] = [];
-    for (const t of row.matchAll(/[:\-](\d{2,5})/g)) lines.push(parseInt(t[1]));
-    return { symbols: [...sym], lines };
-  });
-}
-
-function grounded(md: string, file: string): { total: number; grounded: number; oob: number } {
-  const content = readFileSync(file, "utf8");
-  const eof = content.split("\n").length;
-  const findings = extractFindings(md);
-  let g = 0, oob = 0;
-  for (const f of findings) {
-    const symHit = f.symbols.length > 0 && f.symbols.some(s => content.includes(s));
-    const lineOob = f.lines.length > 0 && f.lines.some(l => l > eof);
-    if (lineOob) oob++;
-    if (symHit && !lineOob) g++;
-  }
-  return { total: findings.length, grounded: g, oob };
-}
-
-const lines = readFileSync(JSONL, "utf8").split("\n").filter(Boolean);
-const rows: Row[] = [];
-for (const l of lines) {
-  try {
-    const r: Row = JSON.parse(l);
-    if (r.ts < argSince) continue;
-    rows.push(r);
-  } catch {}
-}
-
-if (rows.length === 0) { console.error(`no rows since ${argSince}`); process.exit(1); }
-
-// Group: condition → file → array of grounded counts
-type CellArr = { grnd: number[]; total: number[]; oob: number[]; ms: number[] };
-const byCond: Record<string, Record<string, CellArr>> = {};
-for (const r of rows) {
-  const k = condKey(r);
-  byCond[k] ??= {};
-  byCond[k][r.file_path] ??= { grnd: [], total: [], oob: [], ms: [] };
-  const g = grounded(r.response, r.file_path);
-  byCond[k][r.file_path].grnd.push(g.grounded);
-  byCond[k][r.file_path].total.push(g.total);
-  byCond[k][r.file_path].oob.push(g.oob);
-  byCond[k][r.file_path].ms.push(r.latency_ms);
-}
-
-function stats(xs: number[]): { n: number; mean: number; sd: number; min: number; max: number } {
-  const n = xs.length;
-  if (n === 0) return { n: 0, mean: 0, sd: 0, min: 0, max: 0 };
-  const mean = xs.reduce((s, x) => s + x, 0) / n;
-  const variance = n === 1 ? 0 : xs.reduce((s, x) => s + (x - mean) ** 2, 0) / (n - 1);
-  return { n, mean, sd: Math.sqrt(variance), min: Math.min(...xs), max: Math.max(...xs) };
-}
-
-const conditions = Object.keys(byCond).sort();
-const files = [...new Set(rows.map(r => r.file_path))].sort();
-
-console.log(`\n═══ Pass 5 variance — since ${argSince} ═══\n`);
-console.log(`  ${rows.length} rows · ${conditions.length} conditions · ${files.length} files\n`);
-
-for (const file of files) {
-  console.log(`📄 ${file}`);
-  console.log(`  ${"condition".padEnd(56)}  n  ${"grounded mean ± sd".padStart(20)}  ${"range".padStart(8)}  ${"oob".padStart(4)}  ${"avg ms".padStart(7)}`);
-  console.log(`  ${"─".repeat(56)} ─── ${"─".repeat(20)} ${"─".repeat(8)} ${"─".repeat(4)} ${"─".repeat(7)}`);
-  for (const c of conditions) {
-    const cell = byCond[c]?.[file];
-    if (!cell || cell.grnd.length === 0) continue;
-    const s = stats(cell.grnd);
-    const oobSum = cell.oob.reduce((a, b) => a + b, 0);
-    const msMean = cell.ms.reduce((a, b) => a + b, 0) / cell.ms.length;
-    const meanSd = `${s.mean.toFixed(1)} ± ${s.sd.toFixed(1)}`;
-    const range = `[${s.min}-${s.max}]`;
-    console.log(`  ${c.padEnd(56)} ${String(s.n).padStart(3)} ${meanSd.padStart(20)} ${range.padStart(8)} ${String(oobSum).padStart(4)} ${Math.round(msMean / 1000).toString().padStart(5)}s`);
-  }
-  console.log("");
-}
-
-// Head-to-head: for each condition vs isolation baseline, count rep-by-rep
-// wins across the same file. Requires equal rep counts.
-console.log(`═══ Head-to-head: each condition vs isolation, rep-by-rep ═══\n`);
-const isoKey = conditions.find(c => c.startsWith("codereview_isolation"));
-if (!isoKey) {
-  console.log("  no isolation rows in window");
-} else {
-  console.log(`  baseline: ${isoKey}\n`);
-  console.log(`  ${"challenger".padEnd(56)} wins losses ties  Δ mean grnd`);
-  console.log(`  ${"─".repeat(56)} ${"─".repeat(4)} ${"─".repeat(6)} ${"─".repeat(4)} ${"─".repeat(12)}`);
-  for (const c of conditions) {
-    if (c === isoKey) continue;
-    let wins = 0, losses = 0, ties = 0, deltaSum = 0, n = 0;
-    for (const file of files) {
-      const isoArr = byCond[isoKey]?.[file]?.grnd ?? [];
-      const cArr = byCond[c]?.[file]?.grnd ?? [];
-      const k = Math.min(isoArr.length, cArr.length);
-      for (let i = 0; i < k; i++) {
-        if (cArr[i] > isoArr[i]) wins++;
-        else if (cArr[i] < isoArr[i]) losses++;
-        else ties++;
-        deltaSum += cArr[i] - isoArr[i];
-        n++;
-      }
-    }
-    const dMean = n > 0 ? (deltaSum / n).toFixed(2) : "—";
-    console.log(`  ${c.padEnd(56)} ${String(wins).padStart(4)} ${String(losses).padStart(6)} ${String(ties).padStart(4)} ${dMean.padStart(12)}`);
-  }
-}
--- a/scripts/mode_pass5_variance_paid.ts
+++ b/scripts/mode_pass5_variance_paid.ts
@ -1,105 +0,0 @@
-#!/usr/bin/env bun
-/**
- * Pass 5: variance test for the 2026-04-26 paid-model bake-off.
- *
- * The pass-4 single-rep sweep showed isolation beating every matrix
- * condition by 1.0-1.4 grounded findings/file on grok-4.1-fast. This
- * harness runs N reps × M conditions on the file where the effect was
- * sharpest (pathway_memory.rs, 1355 lines) so we can decide whether
- * the deltas are real signal or run-to-run noise.
- *
- * Conditions:
- *   1. codereview_isolation                                — no matrix
- *   2. codereview_lakehouse + corpus=lakehouse_arch_v1     — A only
- *   3. codereview_lakehouse + corpus=lakehouse_symbols_v1  — C only
- *   4. codereview_lakehouse (modes.toml default)           — A+C composed
- *
- * Output appends per-call to data/_kb/mode_experiments.jsonl. Aggregate
- * with `bun run scripts/mode_compare.ts --since <ts>` and read the
- * grounded column with multiple rows per (mode|corpus) key.
- *
- * Usage:
- *   bun run scripts/mode_pass5_variance_paid.ts
- *   LH_REPS=3 LH_FILE=crates/queryd/src/delta.rs bun run scripts/mode_pass5_variance_paid.ts
- */
-
-const GATEWAY = process.env.LH_GATEWAY ?? "http://localhost:3100";
-const MODEL = process.env.LH_MODEL ?? "x-ai/grok-4.1-fast";
-const FILE = process.env.LH_FILE ?? "crates/vectord/src/pathway_memory.rs";
-const REPS = Number(process.env.LH_REPS ?? 5);
-
-interface Condition {
-  label: string;
-  mode: string;
-  corpus?: string | string[];
-}
-
-const ALL_CONDITIONS: Condition[] = [
-  { label: "isolation       ",  mode: "codereview_isolation"  },
-  { label: "arch_only       ",  mode: "codereview_lakehouse",  corpus: "lakehouse_arch_v1" },
-  { label: "symbols_only    ",  mode: "codereview_lakehouse",  corpus: "lakehouse_symbols_v1" },
-  { label: "composed (A+C)  ",  mode: "codereview_lakehouse"  /* uses modes.toml default */ },
-];
-
-// Optional whitelist via env: LH_CONDITIONS=isolation,composed limits the
-// run to a subset (matches against the trimmed `label`). Useful when only
-// the head-to-head pair matters and saves ~50% latency on slow rungs.
-const wantedLabels = (process.env.LH_CONDITIONS ?? "")
-  .split(",").map(s => s.trim().toLowerCase()).filter(Boolean);
-const CONDITIONS: Condition[] = wantedLabels.length === 0
-  ? ALL_CONDITIONS
-  : ALL_CONDITIONS.filter(c => wantedLabels.some(w => c.label.trim().toLowerCase().startsWith(w)));
-
-async function runOne(c: Condition, rep: number): Promise<{ ok: boolean; latency_ms?: number; resp_chars?: number; error?: string }> {
-  const body: any = {
-    task_class: "scrum_review",
-    file_path: FILE,
-    force_mode: c.mode,
-    force_model: MODEL,
-  };
-  if (c.corpus !== undefined) body.force_matrix_corpus = c.corpus;
-
-  try {
-    const r = await fetch(`${GATEWAY}/v1/mode/execute`, {
-      method: "POST",
-      headers: { "content-type": "application/json" },
-      body: JSON.stringify(body),
-      signal: AbortSignal.timeout(240_000),
-    });
-    if (!r.ok) {
-      const txt = await r.text().catch(() => "");
-      return { ok: false, error: `HTTP ${r.status}: ${txt.slice(0, 160)}` };
-    }
-    const j: any = await r.json();
-    return { ok: true, latency_ms: j.latency_ms, resp_chars: (j.response ?? "").length };
-  } catch (e: any) {
-    return { ok: false, error: e.message };
-  }
-}
-
-async function main() {
-  const total = CONDITIONS.length * REPS;
-  console.log(`[pass5] file=${FILE}`);
-  console.log(`[pass5] model=${MODEL} · ${CONDITIONS.length} conditions × ${REPS} reps = ${total} runs`);
-  console.log("");
-
-  let i = 0;
-  const startTs = new Date().toISOString();
-  for (let rep = 1; rep <= REPS; rep++) {
-    for (const c of CONDITIONS) {
-      i++;
-      process.stdout.write(`  [${i}/${total}] rep=${rep} ${c.label}... `);
-      const r = await runOne(c, rep);
-      if (r.ok) {
-        console.log(`✓ ${r.resp_chars} chars · ${((r.latency_ms ?? 0) / 1000).toFixed(1)}s`);
-      } else {
-        console.log(`✗ ${r.error}`);
-      }
-    }
-  }
-
-  console.log(`\n[pass5] complete · started ${startTs}`);
-  console.log(`[pass5] aggregate: bun run scripts/mode_compare.ts --since ${startTs}`);
-}
-
-main().catch(e => { console.error(e); process.exit(1); });
--- a/scripts/overnight_proof.sh
+++ b/scripts/overnight_proof.sh
@ -1,458 +0,0 @@
-#!/bin/bash
-# OVERNIGHT PROOF — the test that settles it
-# Runs unattended: embed 500K, build indexes, measure recall,
-# autonomous agent test, sustained load. ~3 hours total.
-#
-# Monitor: tail -f /home/profit/lakehouse/logs/overnight_proof.log
-
-set -uo pipefail
-
-LOG="/home/profit/lakehouse/logs/overnight_proof.log"
-STATE="/tmp/overnight_proof_state"
-LOCK="/tmp/overnight_proof.lock"
-LH="http://localhost:3100"
-GW="http://localhost:3700"
-
-mkdir -p /home/profit/lakehouse/logs
-
-if [ -f "$LOCK" ] && kill -0 "$(cat $LOCK)" 2>/dev/null; then
-    echo "$(date) Already running" >> "$LOG"
-    exit 0
-fi
-echo $$ > "$LOCK"
-trap "rm -f $LOCK" EXIT
-
-log() { echo "$(date '+%H:%M:%S') $1" | tee -a "$LOG"; }
-
-touch "$STATE"
-step=$(cat "$STATE" 2>/dev/null || echo "embed")
-
-log "═══ OVERNIGHT PROOF: step=$step ═══"
-
-case "$step" in
-
-embed)
-    log "STEP 1/5: Embedding 500K workers through Ollama (~40 min)"
-    log "  This is the real test — actual nomic-embed-text embeddings, not random vectors"
-
-    python3 << 'PYEOF' >> "$LOG" 2>&1
-import json, time, sys
-from urllib.request import Request, urlopen
-from urllib.error import HTTPError
-
-LH = "http://localhost:3100"
-
-def post(path, body, timeout=300):
-    r = Request(f"{LH}{path}", json.dumps(body).encode(), headers={"Content-Type": "application/json"})
-    try: return json.loads(urlopen(r, timeout=timeout).read())
-    except HTTPError as e: return {"error": e.read().decode()[:200]}
-    except Exception as e: return {"error": str(e)}
-
-# Fetch 500K resume_text for embedding
-print("Fetching resume texts from workers_500k...")
-r = post("/query/sql", {"sql": "SELECT worker_id, resume_text FROM workers_500k LIMIT 500000"})
-if "error" in r:
-    print(f"SQL error: {r['error']}")
-    sys.exit(1)
-
-rows = r.get("rows", [])
-print(f"Got {len(rows)} rows")
-
-# Build docs for embedding
-docs = []
-for row in rows:
-    wid = row.get("worker_id", "")
-    text = row.get("resume_text", "")
-    if text and len(text) > 20:
-        docs.append({"id": f"W500K-{wid}", "text": text})
-
-print(f"{len(docs)} docs ready for embedding")
-
-# Chunk into batches of 50K to avoid timeout issues
-BATCH = 50000
-for batch_start in range(0, len(docs), BATCH):
-    batch = docs[batch_start:batch_start + BATCH]
-    batch_num = batch_start // BATCH + 1
-    total_batches = (len(docs) + BATCH - 1) // BATCH
-    idx_name = f"workers_500k_v{batch_num}"
-
-    print(f"\nBatch {batch_num}/{total_batches}: {len(batch)} docs → index '{idx_name}'")
-    t0 = time.time()
-
-    r = post("/vectors/index", {
-        "index_name": idx_name,
-        "source": "workers_500k",
-        "documents": batch,
-        "chunk_size": 500,
-        "overlap": 50,
-    }, timeout=600)
-
-    if "error" in r:
-        print(f"  Index creation error: {r['error']}")
-        continue
-
-    job_id = r.get("job_id")
-    chunks = r.get("chunks", 0)
-    print(f"  Job {job_id}: {chunks} chunks, embedding in background...")
-
-    # Wait for this batch to complete
-    for _ in range(600):  # 50 min max per batch
-        time.sleep(5)
-        status = post(f"/vectors/jobs/{job_id}", None) if job_id else {"status": "unknown"}
-        if isinstance(status, dict):
-            state = status.get("status", "unknown")
-            progress = status.get("embedded_chunks", 0)
-            if state == "completed":
-                elapsed = time.time() - t0
-                rate = chunks / elapsed if elapsed > 0 else 0
-                print(f"  DONE: {chunks} chunks in {elapsed:.0f}s ({rate:.0f}/sec)")
-                break
-            elif state == "failed":
-                print(f"  FAILED: {status.get('error', 'unknown')}")
-                break
-            sys.stdout.write(f"\r  {state}: {progress}/{chunks} chunks...")
-            sys.stdout.flush()
-    print()
-
-print("\nAll batches submitted. Checking indexes...")
-r = post("/vectors/indexes", None)
-if not isinstance(r, list): r = []
-for idx in r:
-    if "500k" in idx.get("index_name", ""):
-        print(f"  {idx['index_name']}: {idx['chunk_count']} chunks")
-
-print("STEP 1 COMPLETE")
-PYEOF
-
-    if grep -q "STEP 1 COMPLETE" "$LOG"; then
-        echo "build_indexes" > "$STATE"
-        log "Embedding complete — moving to index build"
-    else
-        log "Embedding may still be running — will check on next heartbeat"
-        echo "check_embed" > "$STATE"
-    fi
-    ;;
-
-check_embed)
-    log "Checking embedding job status..."
-    python3 -c "
-import json
-from urllib.request import urlopen
-r = json.loads(urlopen('http://localhost:3100/vectors/jobs', timeout=30).read())
-running = [j for j in r if j.get('status') == 'running']
-completed = [j for j in r if j.get('status') == 'completed' and '500k' in j.get('index_name','')]
-print(f'Running: {len(running)}, Completed 500K: {len(completed)}')
-if not running:
-    print('ALL_DONE')
-" >> "$LOG" 2>&1
-
-    if grep -q "ALL_DONE" "$LOG"; then
-        echo "build_indexes" > "$STATE"
-    fi
-    ;;
-
-build_indexes)
-    log "STEP 2/5: Building HNSW + Lance on real 500K embeddings"
-
-    python3 << 'PYEOF' >> "$LOG" 2>&1
-import json, time
-from urllib.request import Request, urlopen
-
-LH = "http://localhost:3100"
-def post(path, body, timeout=600):
-    r = Request(f"{LH}{path}", json.dumps(body).encode(), headers={"Content-Type": "application/json"})
-    return json.loads(urlopen(r, timeout=timeout).read())
-
-# Find the first 500K index
-indexes = json.loads(urlopen(f"{LH}/vectors/indexes", timeout=30).read())
-idx_500k = [i for i in indexes if "500k" in i.get("index_name","")]
-if not idx_500k:
-    print("No 500K index found — embedding may not be complete")
-    exit(1)
-
-idx_name = idx_500k[0]["index_name"]
-chunks = idx_500k[0]["chunk_count"]
-print(f"Using index: {idx_name} ({chunks} chunks)")
-
-# Build HNSW
-print(f"Building HNSW on {chunks} real embeddings...")
-t0 = time.time()
-r = post("/vectors/hnsw/build", {"index_name": idx_name})
-print(f"  HNSW: {r.get('vectors',0)} vectors in {time.time()-t0:.0f}s")
-
-# Migrate to Lance
-print(f"Migrating to Lance...")
-t0 = time.time()
-r = post(f"/vectors/lance/migrate/{idx_name}", {})
-stats = r.get("stats", {})
-print(f"  Lance: {stats.get('rows_written',0)} rows in {stats.get('duration_secs',0):.1f}s")
-
-# Build IVF_PQ on Lance
-# sqrt(50K) ≈ 224 partitions for a 50K batch
-print(f"Building IVF_PQ on Lance...")
-t0 = time.time()
-r = post(f"/vectors/lance/index/{idx_name}", {"num_partitions": 224, "num_bits": 8, "num_sub_vectors": 192})
-print(f"  IVF_PQ: built in {r.get('build_time_secs',0):.0f}s")
-
-# Build scalar btree
-print(f"Building scalar btree on doc_id...")
-r = post(f"/vectors/lance/scalar-index/{idx_name}/doc_id", {})
-print(f"  Btree: built in {r.get('build_time_secs',0):.1f}s")
-
-print("STEP 2 COMPLETE")
-PYEOF
-
-    if grep -q "STEP 2 COMPLETE" "$LOG"; then
-        echo "recall_test" > "$STATE"
-    fi
-    ;;
-
-recall_test)
-    log "STEP 3/5: Measuring recall on REAL embeddings"
-
-    python3 << 'PYEOF' >> "$LOG" 2>&1
-import json, time
-from urllib.request import Request, urlopen
-
-LH = "http://localhost:3100"
-def post(path, body, timeout=300):
-    r = Request(f"{LH}{path}", json.dumps(body).encode(), headers={"Content-Type": "application/json"})
-    return json.loads(urlopen(r, timeout=timeout).read())
-
-# Find 500K index
-indexes = json.loads(urlopen(f"{LH}/vectors/indexes", timeout=30).read())
-idx_500k = [i for i in indexes if "500k" in i.get("index_name","")]
-if not idx_500k:
-    print("No 500K index — skipping recall")
-    exit(0)
-idx_name = idx_500k[0]["index_name"]
-
-# Auto-generate eval harness
-print(f"Generating eval harness for {idx_name}...")
-r = post(f"/vectors/hnsw/evals/{idx_name}_recall/autogen", {
-    "index_name": idx_name, "sample_count": 50, "k": 10,
-})
-print(f"  Harness: {len(r.get('queries',[]))} queries, k={r.get('k',10)}")
-
-# HNSW recall
-print("Measuring HNSW recall...")
-r = post("/vectors/hnsw/trial", {
-    "index_name": idx_name,
-    "harness": f"{idx_name}_recall",
-    "config": {"ef_construction": 80, "ef_search": 30, "seed": 42},
-})
-print(f"  HNSW recall@10: {r.get('metrics',{}).get('recall_at_k',0):.4f}")
-print(f"  HNSW p50: {r.get('metrics',{}).get('search_latency_p50_us',0):.0f}us")
-
-# Lance recall
-print("Measuring Lance IVF_PQ recall...")
-r = post(f"/vectors/lance/recall/{idx_name}", {
-    "harness": f"{idx_name}_recall", "top_k": 10,
-})
-print(f"  Lance recall@10: {r.get('mean_recall',0):.4f}")
-print(f"  Lance p50: {r.get('latency_p50_us',0):.0f}us")
-
-print("STEP 3 COMPLETE")
-PYEOF
-
-    if grep -q "STEP 3 COMPLETE" "$LOG"; then
-        echo "autonomous_test" > "$STATE"
-    fi
-    ;;
-
-autonomous_test)
-    log "STEP 4/5: 100 staffing questions — LOCAL MODEL ONLY, no human steering"
-
-    python3 << 'PYEOF' >> "$LOG" 2>&1
-import json, time, random
-from urllib.request import Request, urlopen
-from urllib.error import HTTPError
-
-GW = "http://localhost:3700"
-LH = "http://localhost:3100"
-random.seed(2026)
-
-def gw(path, body=None, timeout=180):
-    data = json.dumps(body).encode() if body else None
-    method = "POST" if body else "GET"
-    r = Request(f"{GW}{path}", data=data, method=method, headers={"Content-Type":"application/json"} if body else {})
-    try: return json.loads(urlopen(r, timeout=timeout).read())
-    except HTTPError as e: return {"error": e.read().decode()[:200]}
-    except Exception as e: return {"error": str(e)}
-
-def sql(query):
-    r = gw("/sql", {"sql": query})
-    return r.get("rows", []) if "error" not in r else []
-
-ROLES = ["Forklift Operator","Machine Operator","Assembler","Loader","Quality Tech",
-         "Welder","Sanitation Worker","Shipping Clerk","Production Worker","Maintenance Tech"]
-STATES = ["IL","IN","OH","MO","TN","KY","WI","MI"]
-
-print("═══ 100 AUTONOMOUS OPERATIONS ═══")
-passed = 0
-failed = 0
-total_ms = 0
-
-# Mix of operation types
-for i in range(100):
-    op_type = random.choices(["match","count","aggregate","lookup"], weights=[50,25,15,10])[0]
-    role = random.choice(ROLES)
-    state = random.choice(STATES)
-    rel = round(random.uniform(0.6, 0.9), 2)
-
-    t0 = time.time()
-    ok = False
-    detail = ""
-
-    if op_type == "match":
-        r = gw("/search", {
-            "question": f"Find {role} workers in {state}",
-            "sql_filter": f"role = '{role}' AND state = '{state}' AND CAST(reliability AS DOUBLE) >= {rel}",
-            "dataset": "workers_500k", "top_k": 5, "generate": False,
-        })
-        matched = len(r.get("sources", []))
-        ok = matched > 0 or r.get("sql_matches", 0) == 0  # 0 matches is ok if SQL found 0
-        detail = f"match: {matched} results (sql={r.get('sql_matches',0)})"
-
-    elif op_type == "count":
-        truth = sql(f"SELECT COUNT(*) cnt FROM workers_500k WHERE role = '{role}' AND state = '{state}'")
-        expected = truth[0]["cnt"] if truth else 0
-        # Use keyword classifier logic: count → SQL
-        r = gw("/sql", {"sql": f"SELECT COUNT(*) cnt FROM workers_500k WHERE role = '{role}' AND state = '{state}'"})
-        got = r.get("rows", [{}])[0].get("cnt", -1) if "error" not in r else -1
-        ok = got == expected
-        detail = f"count: got={got} expected={expected}"
-
-    elif op_type == "aggregate":
-        r = gw("/sql", {"sql": f"SELECT ROUND(AVG(CAST(reliability AS DOUBLE)),3) avg FROM workers_500k WHERE role = '{role}' AND state = '{state}'"})
-        ok = "error" not in r and r.get("rows")
-        detail = f"aggregate: {r.get('rows',[{}])[0] if ok else r.get('error','?')[:40]}"
-
-    elif op_type == "lookup":
-        wid = random.randint(1, 500000)
-        r = gw(f"/worker/{wid}")
-        ok = r.get("rows") and len(r["rows"]) > 0
-        detail = f"lookup: worker {wid} {'found' if ok else 'not found'}"
-
-    ms = (time.time()-t0)*1000
-    total_ms += ms
-    if ok: passed += 1
-    else: failed += 1
-
-    if i % 20 == 0 or not ok:
-        icon = "OK" if ok else "FAIL"
-        print(f"  [{i+1:3d}/100] {icon} {op_type:10s} {detail[:50]:50s} ({ms:.0f}ms)")
-
-pct = passed / 100 * 100
-print(f"\n═══ RESULT: {passed}/100 passed ({pct:.0f}%) in {total_ms/1000:.1f}s ═══")
-print(f"  avg latency: {total_ms/100:.0f}ms per operation")
-
-# Log to playbook
-gw("/log", {
-    "operation": f"autonomous_100: {passed}/100 ({pct:.0f}%)",
-    "approach": "keyword routing + SQL + hybrid, local model only",
-    "result": f"passed={passed} failed={failed} avg_ms={total_ms/100:.0f}",
-    "context": "overnight proof step 4",
-})
-
-if pct >= 90:
-    print("STEP 4 COMPLETE — AUTONOMOUS TEST PASSED")
-else:
-    print(f"STEP 4 COMPLETE — {pct:.0f}% (below 90% target)")
-PYEOF
-
-    echo "sustained_load" > "$STATE"
-    ;;
-
-sustained_load)
-    log "STEP 5/5: Sustained load — 30 minutes of continuous operations"
-
-    python3 << 'PYEOF' >> "$LOG" 2>&1
-import json, time, random, concurrent.futures
-from urllib.request import Request, urlopen
-from urllib.error import HTTPError
-
-GW = "http://localhost:3700"
-random.seed(42)
-
-def gw(path, body=None):
-    data = json.dumps(body).encode() if body else None
-    r = Request(f"{GW}{path}", data=data, method="POST" if body else "GET",
-                headers={"Content-Type":"application/json"} if body else {})
-    try: return json.loads(urlopen(r, timeout=60).read())
-    except: return {"error": "timeout"}
-
-ROLES = ["Forklift Operator","Machine Operator","Assembler","Loader","Quality Tech"]
-STATES = ["IL","IN","OH","MO"]
-
-print("═══ SUSTAINED LOAD: 30 minutes ═══")
-duration = 30 * 60  # 30 minutes
-t_start = time.time()
-ops = 0
-errors = 0
-cycle = 0
-
-while time.time() - t_start < duration:
-    cycle += 1
-    batch_ops = 0
-    batch_errors = 0
-
-    # Fire 10 concurrent operations
-    with concurrent.futures.ThreadPoolExecutor(max_workers=10) as pool:
-        futures = []
-        for _ in range(10):
-            role = random.choice(ROLES)
-            state = random.choice(STATES)
-            futures.append(pool.submit(gw, "/sql", {
-                "sql": f"SELECT COUNT(*) FROM workers_500k WHERE role = '{role}' AND state = '{state}'"
-            }))
-        for f in concurrent.futures.as_completed(futures):
-            r = f.result()
-            batch_ops += 1
-            if "error" in r: batch_errors += 1
-
-    ops += batch_ops
-    errors += batch_errors
-    elapsed = time.time() - t_start
-    remaining = duration - elapsed
-
-    if cycle % 30 == 0:  # Log every ~30 cycles
-        rate = ops / elapsed
-        print(f"  {elapsed/60:.0f}min: {ops} ops ({rate:.0f}/sec) errors={errors} remaining={remaining/60:.0f}min")
-
-    time.sleep(1)  # 1 sec between batches
-
-elapsed = time.time() - t_start
-rate = ops / elapsed
-print(f"\n═══ SUSTAINED LOAD COMPLETE ═══")
-print(f"  Duration: {elapsed/60:.1f} minutes")
-print(f"  Operations: {ops}")
-print(f"  Rate: {rate:.0f} ops/sec")
-print(f"  Errors: {errors} ({100*errors/max(ops,1):.1f}%)")
-print(f"  STEP 5 COMPLETE")
-PYEOF
-
-    echo "report" > "$STATE"
-    ;;
-
-report)
-    log ""
-    log "═══════════════════════════════════════════════════════"
-    log "  OVERNIGHT PROOF — COMPLETE"
-    log "═══════════════════════════════════════════════════════"
-    log "  Step 1: 500K real embeddings via Ollama"
-    log "  Step 2: HNSW + Lance indexes on real data"
-    log "  Step 3: Recall measured on real embeddings"
-    log "  Step 4: 100 autonomous operations (no human)"
-    log "  Step 5: 30 min sustained concurrent load"
-    log ""
-    log "  Full log: $LOG"
-    log "═══════════════════════════════════════════════════════"
-    echo "done" > "$STATE"
-    ;;
-
-done)
-    log "Overnight proof already complete."
-    ;;
-
-esac