From f4ebd2278b5efb4c3adf2f89ac7ae8f767d8200f Mon Sep 17 00:00:00 2001 From: root Date: Sun, 3 May 2026 02:06:29 -0500 Subject: [PATCH] remove 7 more orphaned experimental scripts from scripts/ MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Continuing the test-code-in-main cleanup. These are sequential mode-runner experiment passes (2/3/4/5) that completed and whose findings were captured in pathway_memory + the matrix index — the scripts themselves are dead weight. Plus two one-off probe scripts. Removed (all 0 refs in production code or automation): - mode_pass2_corpus_sweep.ts — 2026-04 corpus sweep experiment - mode_pass3_variance.ts — variance measurement run - mode_pass4_staffing.ts — staffing-domain pass - mode_pass5_summarize.ts — summarization variance - mode_pass5_variance_paid.ts — paid-model variance - overnight_proof.sh — overnight stress probe (output in logs/) - ab_t3_test.sh — T3 overseer A/B test (output captured in KB) Verified: 0 references in package.json / justfile / Makefile / any active .ts/.rs/.sh file. Two mentions remain in docs/recon and docs/MODE_RUNNER_ TUNING_PLAN — those are historical design-doc references, not consumers. KEPT in scripts/ (have live consumers OR are runtime tools): - mode_experiment.ts (14 refs), mode_compare.ts (7 refs) - lance_smoke.sh, build_*_corpus.ts, staffing_demo.py, lance_tune.py, generate_demo.py, generate_workers.py, copilot.py, kb_measure.py, kb_staffer_report.py, analyze_chicago_contracts.ts, dump_raw_corpus.sh, check_phase44_callers.sh, autonomous_agent.py, build_answers_corpus.ts, build_lakehouse_corpus.ts, build_scrum_findings_corpus.ts, build_symbols_corpus.ts, e2e_pipeline_check.sh, scale_test.py, scale_10m_test.sh, run_staffer_demo.sh, stress_test.py Build clean. If any of these are needed back: git show HEAD~1 -- scripts/ Co-Authored-By: Claude Opus 4.7 (1M context) --- scripts/ab_t3_test.sh | 77 ----- scripts/mode_pass2_corpus_sweep.ts | 121 -------- scripts/mode_pass3_variance.ts | 109 ------- scripts/mode_pass4_staffing.ts | 127 -------- scripts/mode_pass5_summarize.ts | 169 ---------- scripts/mode_pass5_variance_paid.ts | 105 ------- scripts/overnight_proof.sh | 458 ---------------------------- 7 files changed, 1166 deletions(-) delete mode 100755 scripts/ab_t3_test.sh delete mode 100644 scripts/mode_pass2_corpus_sweep.ts delete mode 100644 scripts/mode_pass3_variance.ts delete mode 100644 scripts/mode_pass4_staffing.ts delete mode 100644 scripts/mode_pass5_summarize.ts delete mode 100644 scripts/mode_pass5_variance_paid.ts delete mode 100755 scripts/overnight_proof.sh diff --git a/scripts/ab_t3_test.sh b/scripts/ab_t3_test.sh deleted file mode 100755 index 035dcea..0000000 --- a/scripts/ab_t3_test.sh +++ /dev/null @@ -1,77 +0,0 @@ -#!/usr/bin/env bash -# A/B test of T3 overseer: does it actually make subsequent runs better? -# Chains Run B (T3 seed) → Run C (T3 + read-back) → Run D (T3 cloud). -# Run A is assumed already complete (launched separately). Aggregates -# metrics at the end into ab_scorecard.json. - -set -e -cd "$(dirname "$0")/.." - -export OLLAMA_CLOUD_KEY="$(python3 -c "import json; print(json.load(open('/root/llm_team_config.json'))['providers']['ollama_cloud']['api_key'])")" - -echo "▶ A/B test start at $(date -Iseconds)" -echo "▶ prior lessons dir: $(ls data/_playbook_lessons 2>/dev/null | wc -l) files" - -# Run B — T3 enabled local, no prior lessons should exist yet -echo "──── RUN B: T3 local, seeds first lesson ────" -bun tests/multi-agent/scenario.ts > /tmp/lakehouse_ab_B.log 2>&1 || true -echo " B exit=$?" -ls data/_playbook_lessons/*.json 2>/dev/null | head -5 - -# Run C — T3 enabled local, B's lesson should load -echo "──── RUN C: T3 local, reads B's lesson ────" -bun tests/multi-agent/scenario.ts > /tmp/lakehouse_ab_C.log 2>&1 || true -echo " C exit=$?" - -# Run D — T3 enabled CLOUD (gpt-oss:120b), reads B+C lessons -echo "──── RUN D: T3 cloud, reads B+C lessons ────" -LH_OVERVIEW_CLOUD=1 bun tests/multi-agent/scenario.ts > /tmp/lakehouse_ab_D.log 2>&1 || true -echo " D exit=$?" - -echo "▶ all runs done at $(date -Iseconds)" -echo "▶ scorecard:" -ls -1dt tests/multi-agent/playbooks/scenario-* | head -4 | tac | python3 -c " -import sys, os, json - -runs = [l.strip() for l in sys.stdin if l.strip()] -labels = ['A(no-T3)','B(T3-seed)','C(T3-read)','D(T3-cloud)'] -# Prepend Run A: most recent BEFORE the ab_t3_test kicked off is Run A -# (launched separately). But we only picked up the most recent 4 runs. -# Actually: ab_t3_test runs B/C/D, so recent 3 = B,C,D. Run A is the one -# BEFORE those — find it separately. -# Reread to include Run A: -import subprocess -all_runs = subprocess.check_output(['bash','-c','ls -1dt tests/multi-agent/playbooks/scenario-* | head -8']).decode().strip().split('\n') -# The 4 most recent are D, C, B, A (reverse chronological). -top4 = list(reversed(all_runs[:4])) # oldest first → A,B,C,D -rows = [] -for i, path in enumerate(top4): - try: - results = json.load(open(os.path.join(path, 'results.json'))) - except FileNotFoundError: - continue - ok = sum(1 for r in results if r.get('ok')) - turns = sum(r.get('turns', 0) for r in results) - gaps = sum(len(r.get('gap_signals', [])) for r in results) - cites = sum(len(r.get('playbook_citations') or []) for r in results) - prior = [] - try: - prior = json.load(open(os.path.join(path, 'prior_lessons.json'))) - except FileNotFoundError: - pass - rows.append({ - 'label': labels[i] if i < len(labels) else f'run{i}', - 'path': path, - 'ok_events': ok, - 'total_events': len(results), - 'total_turns': turns, - 'total_gaps': gaps, - 'total_citations': cites, - 'prior_lessons_loaded': len(prior), - }) - -scorecard = {'generated_at': __import__('datetime').datetime.utcnow().isoformat()+'Z', 'runs': rows} -open('tests/multi-agent/playbooks/ab_scorecard.json','w').write(json.dumps(scorecard, indent=2)) -print(json.dumps(scorecard, indent=2)) -" -echo "▶ saved: tests/multi-agent/playbooks/ab_scorecard.json" diff --git a/scripts/mode_pass2_corpus_sweep.ts b/scripts/mode_pass2_corpus_sweep.ts deleted file mode 100644 index 04596a7..0000000 --- a/scripts/mode_pass2_corpus_sweep.ts +++ /dev/null @@ -1,121 +0,0 @@ -#!/usr/bin/env bun -/** - * Pass 2: matrix corpus + relevance threshold sweep. - * - * For each (corpus, threshold) combination, run codereview_matrix_only - * on the same N files. Compares which corpus actually adds grounded - * findings vs codereview_isolation (matrix-off baseline). - * - * Output: data/_kb/mode_experiments.jsonl gets one row per call, - * tagged via the force_matrix_corpus + force_relevance_threshold - * fields visible in `sources`. Aggregator can then group by corpus. - * - * Usage: bun run scripts/mode_pass2_corpus_sweep.ts - */ - -const GATEWAY = process.env.LH_GATEWAY ?? "http://localhost:3100"; -const MODEL = process.env.LH_MODEL ?? "openai/gpt-oss-120b:free"; - -const FILES = (process.env.LH_FILES ?? [ - "crates/queryd/src/delta.rs", - "crates/queryd/src/service.rs", - "crates/vectord/src/pathway_memory.rs", - "crates/gateway/src/v1/mode.rs", - "crates/aibridge/src/client.rs", -].join(",")).split(","); - -const CORPORA = (process.env.LH_CORPORA ?? [ - "distilled_procedural_v20260423102847", - "distilled_factual_v20260423095819", - "distilled_config_hint_v20260423102847", - "kb_team_runs_v1", -].join(",")).split(","); - -const THRESHOLDS = (process.env.LH_THRESHOLDS ?? "0.2,0.3,0.4,0.5").split(",").map(Number); - -interface Result { - corpus: string; - threshold: number; - file: string; - ok: boolean; - matrix_kept?: number; - matrix_dropped?: number; - response_chars?: number; - latency_ms?: number; - error?: string; -} - -async function runOne(corpus: string, threshold: number, file: string): Promise { - try { - const r = await fetch(`${GATEWAY}/v1/mode/execute`, { - method: "POST", - headers: { "content-type": "application/json" }, - body: JSON.stringify({ - task_class: "scrum_review", - file_path: file, - force_mode: "codereview_matrix_only", - force_model: MODEL, - force_matrix_corpus: corpus, - force_relevance_threshold: threshold, - }), - signal: AbortSignal.timeout(180_000), - }); - if (!r.ok) { - const body = await r.text().catch(() => ""); - return { corpus, threshold, file, ok: false, error: `HTTP ${r.status}: ${body.slice(0, 150)}` }; - } - const j: any = await r.json(); - return { - corpus, threshold, file, ok: true, - matrix_kept: j.sources?.matrix_chunks_kept, - matrix_dropped: j.sources?.matrix_chunks_dropped, - response_chars: (j.response ?? "").length, - latency_ms: j.latency_ms, - }; - } catch (e: any) { - return { corpus, threshold, file, ok: false, error: e.message }; - } -} - -async function main() { - const total = CORPORA.length * THRESHOLDS.length * FILES.length; - console.log(`[pass2] corpora=${CORPORA.length} × thresholds=${THRESHOLDS.length} × files=${FILES.length} = ${total} runs`); - console.log(`[pass2] model=${MODEL}\n`); - let i = 0; - const results: Result[] = []; - for (const corpus of CORPORA) { - for (const threshold of THRESHOLDS) { - for (const file of FILES) { - i++; - process.stdout.write(` [${i}/${total}] corpus=${corpus.slice(0, 30).padEnd(30)} thr=${threshold.toFixed(1)} ${file.slice(-32).padStart(32)} ... `); - const r = await runOne(corpus, threshold, file); - results.push(r); - if (r.ok) { - const total_chunks = (r.matrix_kept ?? 0) + (r.matrix_dropped ?? 0); - console.log(`✓ k=${r.matrix_kept}/${total_chunks} resp=${r.response_chars} ${((r.latency_ms ?? 0) / 1000).toFixed(1)}s`); - } else { - console.log(`✗ ${r.error}`); - } - } - } - } - - console.log(`\n[pass2] complete · ${results.filter(r => r.ok).length}/${results.length} succeeded`); - - // Per-corpus×threshold roll-up of kept-rate (the matrix usefulness proxy). - console.log(`\n[pass2] kept-rate by corpus × threshold (avg chunks kept per call):`); - console.log(` ${"corpus".padEnd(40)} ${THRESHOLDS.map(t => `thr=${t.toFixed(1)}`).join(" ").padStart(35)}`); - for (const corpus of CORPORA) { - const cells = THRESHOLDS.map(t => { - const matched = results.filter(r => r.ok && r.corpus === corpus && r.threshold === t); - if (matched.length === 0) return " — "; - const avgKept = matched.reduce((s, r) => s + (r.matrix_kept ?? 0), 0) / matched.length; - return avgKept.toFixed(1).padStart(5); - }).join(" "); - console.log(` ${corpus.slice(0, 40).padEnd(40)} ${cells}`); - } - - console.log(`\n[pass2] aggregate findings/groundedness with: bun run scripts/mode_compare.ts`); -} - -main().catch(e => { console.error(e); process.exit(1); }); diff --git a/scripts/mode_pass3_variance.ts b/scripts/mode_pass3_variance.ts deleted file mode 100644 index 5d2b44e..0000000 --- a/scripts/mode_pass3_variance.ts +++ /dev/null @@ -1,109 +0,0 @@ -#!/usr/bin/env bun -/** - * Pass 3: variance test. - * - * Runs codereview_lakehouse on the SAME file N times at each of M - * temperatures. Measures run-to-run stability of grounded finding - * count, response size, and latency. Anything <100% groundedness - * is a leak; track which symbols got hallucinated. - * - * Output appends to data/_kb/mode_experiments.jsonl. The aggregator - * can group by ts and identify variance buckets. - * - * Usage: bun run scripts/mode_pass3_variance.ts - */ - -const GATEWAY = process.env.LH_GATEWAY ?? "http://localhost:3100"; -const MODEL = process.env.LH_MODEL ?? "openai/gpt-oss-120b:free"; - -const FILES = (process.env.LH_FILES ?? [ - "crates/queryd/src/delta.rs", - "crates/vectord/src/pathway_memory.rs", - "crates/gateway/src/v1/mode.rs", -].join(",")).split(","); - -const TEMPS = (process.env.LH_TEMPS ?? "0.0,0.1,0.3").split(",").map(Number); -const REPS = Number(process.env.LH_REPS ?? 5); - -interface Result { - file: string; - temp: number; - rep: number; - ok: boolean; - response_chars?: number; - latency_ms?: number; - error?: string; -} - -async function runOne(file: string, temp: number, rep: number): Promise { - try { - const r = await fetch(`${GATEWAY}/v1/mode/execute`, { - method: "POST", - headers: { "content-type": "application/json" }, - body: JSON.stringify({ - task_class: "scrum_review", - file_path: file, - force_mode: "codereview_lakehouse", - force_model: MODEL, - force_temperature: temp, - }), - signal: AbortSignal.timeout(180_000), - }); - if (!r.ok) { - const body = await r.text().catch(() => ""); - return { file, temp, rep, ok: false, error: `HTTP ${r.status}: ${body.slice(0, 150)}` }; - } - const j: any = await r.json(); - return { - file, temp, rep, ok: true, - response_chars: (j.response ?? "").length, - latency_ms: j.latency_ms, - }; - } catch (e: any) { - return { file, temp, rep, ok: false, error: e.message }; - } -} - -async function main() { - const total = FILES.length * TEMPS.length * REPS; - console.log(`[pass3] files=${FILES.length} × temps=${TEMPS.length} × reps=${REPS} = ${total} runs`); - console.log(`[pass3] model=${MODEL}\n`); - let i = 0; - const results: Result[] = []; - for (const file of FILES) { - for (const temp of TEMPS) { - for (let rep = 1; rep <= REPS; rep++) { - i++; - process.stdout.write(` [${i}/${total}] temp=${temp.toFixed(1)} rep=${rep}/${REPS} ${file.slice(-32).padStart(32)} ... `); - const r = await runOne(file, temp, rep); - results.push(r); - if (r.ok) { - console.log(`✓ resp=${r.response_chars} ${((r.latency_ms ?? 0) / 1000).toFixed(1)}s`); - } else { - console.log(`✗ ${r.error}`); - } - } - } - } - - console.log(`\n[pass3] complete · ${results.filter(r => r.ok).length}/${results.length} succeeded`); - - // Per-file × temp variance summary (response_chars stddev as a quick - // proxy for output instability). - console.log(`\n[pass3] response_chars variance (mean ± stddev) by file × temp:`); - console.log(` ${"file".padEnd(40)} ${TEMPS.map(t => `temp=${t.toFixed(1)}`.padStart(20)).join(" ")}`); - for (const file of FILES) { - const cells = TEMPS.map(t => { - const xs = results.filter(r => r.ok && r.file === file && r.temp === t).map(r => r.response_chars ?? 0); - if (xs.length === 0) return " — "; - const mean = xs.reduce((s, x) => s + x, 0) / xs.length; - const sd = Math.sqrt(xs.reduce((s, x) => s + Math.pow(x - mean, 2), 0) / xs.length); - return `${Math.round(mean).toString().padStart(7)} ± ${Math.round(sd).toString().padEnd(6)}`.padStart(20); - }).join(" "); - console.log(` ${file.slice(0, 40).padEnd(40)} ${cells}`); - } - - console.log(`\n[pass3] grounding variance via: bun run scripts/mode_compare.ts (look for grounded-N column drift)`); -} - -main().catch(e => { console.error(e); process.exit(1); }); diff --git a/scripts/mode_pass4_staffing.ts b/scripts/mode_pass4_staffing.ts deleted file mode 100644 index 8e68253..0000000 --- a/scripts/mode_pass4_staffing.ts +++ /dev/null @@ -1,127 +0,0 @@ -#!/usr/bin/env bun -/** - * Pass 4: staffing_inference_lakehouse cross-domain validation. - * - * Runs the staffing-domain mode against synthetic fill requests. - * Validates that the modes-as-prompt-molders architecture generalizes - * beyond code review — the composer pattern (file_content + bug - * fingerprints + relevance-filtered matrix + domain framing) should - * produce grounded staffing recommendations the same way it produces - * grounded code reviews. - * - * Each fill request is posted as `file_content` (since the runner's - * shape expects file content; for staffing it's the request payload). - * file_path is set to a synthetic path under requests/ so pathway - * memory bucketing groups requests by geo+role. - * - * Usage: bun run scripts/mode_pass4_staffing.ts - */ - -const GATEWAY = process.env.LH_GATEWAY ?? "http://localhost:3100"; -const MODEL = process.env.LH_MODEL ?? "openai/gpt-oss-120b:free"; - -interface FillRequest { - city: string; - state: string; - role: string; - count: number; - deadline: string; - notes?: string; -} - -const REQUESTS: FillRequest[] = [ - { city: "Toledo", state: "OH", role: "Welder", count: 2, deadline: "2026-04-29", notes: "OSHA 10 required" }, - { city: "Nashville", state: "TN", role: "Forklift Operator", count: 3, deadline: "2026-05-01" }, - { city: "Chicago", state: "IL", role: "Assembler", count: 5, deadline: "2026-04-30", notes: "second shift" }, - { city: "South Bend", state: "IN", role: "Electrician", count: 1, deadline: "2026-04-28", notes: "journeyman license" }, - { city: "Murfreesboro", state: "TN", role: "Packaging Operator", count: 4, deadline: "2026-05-02" }, -]; - -function requestToPayload(req: FillRequest): string { - return [ - `# Fill Request`, - `Role: ${req.role} × ${req.count}`, - `Location: ${req.city}, ${req.state}`, - `Deadline: ${req.deadline}`, - req.notes ? `Notes: ${req.notes}` : "", - "", - "Recommend candidates from the matrix data. Cite playbook references.", - ].filter(Boolean).join("\n"); -} - -interface Result { - req: FillRequest; - ok: boolean; - response_chars?: number; - bug_fingerprints?: number; - matrix_kept?: number; - matrix_dropped?: number; - latency_ms?: number; - error?: string; - preview?: string; -} - -async function runOne(req: FillRequest): Promise { - const payload = requestToPayload(req); - const file_path = `requests/${req.role.toLowerCase().replace(/\s+/g, "_")}_${req.city.toLowerCase().replace(/\s+/g, "_")}_${req.state}.md`; - try { - const r = await fetch(`${GATEWAY}/v1/mode/execute`, { - method: "POST", - headers: { "content-type": "application/json" }, - body: JSON.stringify({ - task_class: "staffing_inference", - file_path, - file_content: payload, - force_mode: "staffing_inference_lakehouse", - force_model: MODEL, - }), - signal: AbortSignal.timeout(180_000), - }); - if (!r.ok) { - const body = await r.text().catch(() => ""); - return { req, ok: false, error: `HTTP ${r.status}: ${body.slice(0, 200)}` }; - } - const j: any = await r.json(); - return { - req, ok: true, - response_chars: (j.response ?? "").length, - bug_fingerprints: j.sources?.bug_fingerprints_count, - matrix_kept: j.sources?.matrix_chunks_kept, - matrix_dropped: j.sources?.matrix_chunks_dropped, - latency_ms: j.latency_ms, - preview: (j.response ?? "").slice(0, 400), - }; - } catch (e: any) { - return { req, ok: false, error: e.message }; - } -} - -async function main() { - console.log(`[pass4] requests=${REQUESTS.length} model=${MODEL} mode=staffing_inference_lakehouse\n`); - let i = 0; - const results: Result[] = []; - for (const req of REQUESTS) { - i++; - process.stdout.write(` [${i}/${REQUESTS.length}] ${req.role.padEnd(22)} × ${req.count} in ${req.city}, ${req.state} ... `); - const r = await runOne(req); - results.push(r); - if (r.ok) { - console.log(`✓ resp=${r.response_chars} bug=${r.bug_fingerprints ?? 0} mtx=${r.matrix_kept ?? 0}/${(r.matrix_kept ?? 0) + (r.matrix_dropped ?? 0)} ${((r.latency_ms ?? 0) / 1000).toFixed(1)}s`); - } else { - console.log(`✗ ${r.error}`); - } - } - - console.log(`\n[pass4] complete · ${results.filter(r => r.ok).length}/${results.length} succeeded\n`); - - // Show first successful response head to verify the framing actually - // produced staffing-style output (verdict + ranked candidates) not - // generic prose. - const first = results.find(r => r.ok && r.preview); - if (first) { - console.log(`[pass4] first successful response preview (${first.req.city} ${first.req.role}):`); - console.log(first.preview!.split("\n").map(l => " | " + l).join("\n")); - } -} - -main().catch(e => { console.error(e); process.exit(1); }); diff --git a/scripts/mode_pass5_summarize.ts b/scripts/mode_pass5_summarize.ts deleted file mode 100644 index 0b90657..0000000 --- a/scripts/mode_pass5_summarize.ts +++ /dev/null @@ -1,169 +0,0 @@ -#!/usr/bin/env bun -/** - * Pass 5 variance summarizer. Reads data/_kb/mode_experiments.jsonl - * since a timestamp, groups by (mode|corpus), reports mean ± stddev - * of grounded finding count, plus a head-to-head wins/losses table - * vs the isolation baseline. - * - * Usage: - * bun run scripts/mode_pass5_summarize.ts # default 2h - * bun run scripts/mode_pass5_summarize.ts --since 2026-04-26T22 # explicit - */ - -import { readFileSync, existsSync } from "node:fs"; - -const argSince = (() => { - const i = Bun.argv.indexOf("--since"); - return i >= 0 ? Bun.argv[i + 1] : new Date(Date.now() - 2 * 60 * 60 * 1000).toISOString(); -})(); - -const JSONL = "data/_kb/mode_experiments.jsonl"; -if (!existsSync(JSONL)) { console.error(`no ${JSONL}`); process.exit(1); } - -interface Row { - ts: string; mode: string; file_path: string; response: string; - sources: { matrix_corpus?: string | string[] | null }; - latency_ms: number; -} - -function corpusKey(c: any): string { - if (!c) return ""; - if (typeof c === "string") return c; - if (Array.isArray(c)) return c.length === 0 ? "" : [...c].sort().join("+"); - return ""; -} -const condKey = (r: Row) => { - const c = corpusKey(r.sources?.matrix_corpus); - return c ? `${r.mode}|${c}` : r.mode; -}; - -// Reuse the same grounding logic as mode_compare — symbols cited in -// findings rows must appear in the focus file, and any line numbers -// must fall within EOF. -function extractFindings(md: string): { symbols: string[]; lines: number[] }[] { - const sec = /(?:^|\n)#{1,3}[^\na-zA-Z]*(?:Ranked\s+)?Findings?[^\n]*\n/i; - const m = md.match(sec); - let section = md; - if (m && m.index !== undefined) { - const after = md.slice(m.index + m[0].length); - const stop = after.search(/\n#{1,3}[^\na-zA-Z]*(?:Patch|Suggestion|Reference|Summary|Concrete)/i); - section = stop >= 0 ? after.slice(0, stop) : after; - } - // Three row shapes: - // 1) numbered: `| 1 | ... |` - // 2) path-with-line: `| service.rs:106 | ... |` - // 3) path-with-sym: `| crates/vectord/src/pathway_memory.rs:load_fn (≈L220) | ... |` - // Pick whichever shape matches the most rows (ties favor numbered). - const numbered = section.split("\n").filter(l => /^\|\s*\*?\*?\d+\*?\*?\s*\|/.test(l)); - const pathRows = section.split("\n").filter(l => /^\|\s*[a-z_/\.][a-z_/\.0-9]*\.(rs|ts|py)\b/i.test(l)); - const rows = numbered.length >= pathRows.length ? numbered : pathRows; - return rows.map(row => { - const sym = new Set(); - for (const t of row.matchAll(/`([A-Za-z_][A-Za-z0-9_:]*)`/g)) sym.add(t[1]); - for (const t of row.matchAll(/\b([a-z][a-z0-9_]{4,})\b/g)) sym.add(t[1]); - const lines: number[] = []; - for (const t of row.matchAll(/[:\-](\d{2,5})/g)) lines.push(parseInt(t[1])); - return { symbols: [...sym], lines }; - }); -} - -function grounded(md: string, file: string): { total: number; grounded: number; oob: number } { - const content = readFileSync(file, "utf8"); - const eof = content.split("\n").length; - const findings = extractFindings(md); - let g = 0, oob = 0; - for (const f of findings) { - const symHit = f.symbols.length > 0 && f.symbols.some(s => content.includes(s)); - const lineOob = f.lines.length > 0 && f.lines.some(l => l > eof); - if (lineOob) oob++; - if (symHit && !lineOob) g++; - } - return { total: findings.length, grounded: g, oob }; -} - -const lines = readFileSync(JSONL, "utf8").split("\n").filter(Boolean); -const rows: Row[] = []; -for (const l of lines) { - try { - const r: Row = JSON.parse(l); - if (r.ts < argSince) continue; - rows.push(r); - } catch {} -} - -if (rows.length === 0) { console.error(`no rows since ${argSince}`); process.exit(1); } - -// Group: condition → file → array of grounded counts -type CellArr = { grnd: number[]; total: number[]; oob: number[]; ms: number[] }; -const byCond: Record> = {}; -for (const r of rows) { - const k = condKey(r); - byCond[k] ??= {}; - byCond[k][r.file_path] ??= { grnd: [], total: [], oob: [], ms: [] }; - const g = grounded(r.response, r.file_path); - byCond[k][r.file_path].grnd.push(g.grounded); - byCond[k][r.file_path].total.push(g.total); - byCond[k][r.file_path].oob.push(g.oob); - byCond[k][r.file_path].ms.push(r.latency_ms); -} - -function stats(xs: number[]): { n: number; mean: number; sd: number; min: number; max: number } { - const n = xs.length; - if (n === 0) return { n: 0, mean: 0, sd: 0, min: 0, max: 0 }; - const mean = xs.reduce((s, x) => s + x, 0) / n; - const variance = n === 1 ? 0 : xs.reduce((s, x) => s + (x - mean) ** 2, 0) / (n - 1); - return { n, mean, sd: Math.sqrt(variance), min: Math.min(...xs), max: Math.max(...xs) }; -} - -const conditions = Object.keys(byCond).sort(); -const files = [...new Set(rows.map(r => r.file_path))].sort(); - -console.log(`\n═══ Pass 5 variance — since ${argSince} ═══\n`); -console.log(` ${rows.length} rows · ${conditions.length} conditions · ${files.length} files\n`); - -for (const file of files) { - console.log(`📄 ${file}`); - console.log(` ${"condition".padEnd(56)} n ${"grounded mean ± sd".padStart(20)} ${"range".padStart(8)} ${"oob".padStart(4)} ${"avg ms".padStart(7)}`); - console.log(` ${"─".repeat(56)} ─── ${"─".repeat(20)} ${"─".repeat(8)} ${"─".repeat(4)} ${"─".repeat(7)}`); - for (const c of conditions) { - const cell = byCond[c]?.[file]; - if (!cell || cell.grnd.length === 0) continue; - const s = stats(cell.grnd); - const oobSum = cell.oob.reduce((a, b) => a + b, 0); - const msMean = cell.ms.reduce((a, b) => a + b, 0) / cell.ms.length; - const meanSd = `${s.mean.toFixed(1)} ± ${s.sd.toFixed(1)}`; - const range = `[${s.min}-${s.max}]`; - console.log(` ${c.padEnd(56)} ${String(s.n).padStart(3)} ${meanSd.padStart(20)} ${range.padStart(8)} ${String(oobSum).padStart(4)} ${Math.round(msMean / 1000).toString().padStart(5)}s`); - } - console.log(""); -} - -// Head-to-head: for each condition vs isolation baseline, count rep-by-rep -// wins across the same file. Requires equal rep counts. -console.log(`═══ Head-to-head: each condition vs isolation, rep-by-rep ═══\n`); -const isoKey = conditions.find(c => c.startsWith("codereview_isolation")); -if (!isoKey) { - console.log(" no isolation rows in window"); -} else { - console.log(` baseline: ${isoKey}\n`); - console.log(` ${"challenger".padEnd(56)} wins losses ties Δ mean grnd`); - console.log(` ${"─".repeat(56)} ${"─".repeat(4)} ${"─".repeat(6)} ${"─".repeat(4)} ${"─".repeat(12)}`); - for (const c of conditions) { - if (c === isoKey) continue; - let wins = 0, losses = 0, ties = 0, deltaSum = 0, n = 0; - for (const file of files) { - const isoArr = byCond[isoKey]?.[file]?.grnd ?? []; - const cArr = byCond[c]?.[file]?.grnd ?? []; - const k = Math.min(isoArr.length, cArr.length); - for (let i = 0; i < k; i++) { - if (cArr[i] > isoArr[i]) wins++; - else if (cArr[i] < isoArr[i]) losses++; - else ties++; - deltaSum += cArr[i] - isoArr[i]; - n++; - } - } - const dMean = n > 0 ? (deltaSum / n).toFixed(2) : "—"; - console.log(` ${c.padEnd(56)} ${String(wins).padStart(4)} ${String(losses).padStart(6)} ${String(ties).padStart(4)} ${dMean.padStart(12)}`); - } -} diff --git a/scripts/mode_pass5_variance_paid.ts b/scripts/mode_pass5_variance_paid.ts deleted file mode 100644 index 47dbe29..0000000 --- a/scripts/mode_pass5_variance_paid.ts +++ /dev/null @@ -1,105 +0,0 @@ -#!/usr/bin/env bun -/** - * Pass 5: variance test for the 2026-04-26 paid-model bake-off. - * - * The pass-4 single-rep sweep showed isolation beating every matrix - * condition by 1.0-1.4 grounded findings/file on grok-4.1-fast. This - * harness runs N reps × M conditions on the file where the effect was - * sharpest (pathway_memory.rs, 1355 lines) so we can decide whether - * the deltas are real signal or run-to-run noise. - * - * Conditions: - * 1. codereview_isolation — no matrix - * 2. codereview_lakehouse + corpus=lakehouse_arch_v1 — A only - * 3. codereview_lakehouse + corpus=lakehouse_symbols_v1 — C only - * 4. codereview_lakehouse (modes.toml default) — A+C composed - * - * Output appends per-call to data/_kb/mode_experiments.jsonl. Aggregate - * with `bun run scripts/mode_compare.ts --since ` and read the - * grounded column with multiple rows per (mode|corpus) key. - * - * Usage: - * bun run scripts/mode_pass5_variance_paid.ts - * LH_REPS=3 LH_FILE=crates/queryd/src/delta.rs bun run scripts/mode_pass5_variance_paid.ts - */ - -const GATEWAY = process.env.LH_GATEWAY ?? "http://localhost:3100"; -const MODEL = process.env.LH_MODEL ?? "x-ai/grok-4.1-fast"; -const FILE = process.env.LH_FILE ?? "crates/vectord/src/pathway_memory.rs"; -const REPS = Number(process.env.LH_REPS ?? 5); - -interface Condition { - label: string; - mode: string; - corpus?: string | string[]; -} - -const ALL_CONDITIONS: Condition[] = [ - { label: "isolation ", mode: "codereview_isolation" }, - { label: "arch_only ", mode: "codereview_lakehouse", corpus: "lakehouse_arch_v1" }, - { label: "symbols_only ", mode: "codereview_lakehouse", corpus: "lakehouse_symbols_v1" }, - { label: "composed (A+C) ", mode: "codereview_lakehouse" /* uses modes.toml default */ }, -]; - -// Optional whitelist via env: LH_CONDITIONS=isolation,composed limits the -// run to a subset (matches against the trimmed `label`). Useful when only -// the head-to-head pair matters and saves ~50% latency on slow rungs. -const wantedLabels = (process.env.LH_CONDITIONS ?? "") - .split(",").map(s => s.trim().toLowerCase()).filter(Boolean); -const CONDITIONS: Condition[] = wantedLabels.length === 0 - ? ALL_CONDITIONS - : ALL_CONDITIONS.filter(c => wantedLabels.some(w => c.label.trim().toLowerCase().startsWith(w))); - -async function runOne(c: Condition, rep: number): Promise<{ ok: boolean; latency_ms?: number; resp_chars?: number; error?: string }> { - const body: any = { - task_class: "scrum_review", - file_path: FILE, - force_mode: c.mode, - force_model: MODEL, - }; - if (c.corpus !== undefined) body.force_matrix_corpus = c.corpus; - - try { - const r = await fetch(`${GATEWAY}/v1/mode/execute`, { - method: "POST", - headers: { "content-type": "application/json" }, - body: JSON.stringify(body), - signal: AbortSignal.timeout(240_000), - }); - if (!r.ok) { - const txt = await r.text().catch(() => ""); - return { ok: false, error: `HTTP ${r.status}: ${txt.slice(0, 160)}` }; - } - const j: any = await r.json(); - return { ok: true, latency_ms: j.latency_ms, resp_chars: (j.response ?? "").length }; - } catch (e: any) { - return { ok: false, error: e.message }; - } -} - -async function main() { - const total = CONDITIONS.length * REPS; - console.log(`[pass5] file=${FILE}`); - console.log(`[pass5] model=${MODEL} · ${CONDITIONS.length} conditions × ${REPS} reps = ${total} runs`); - console.log(""); - - let i = 0; - const startTs = new Date().toISOString(); - for (let rep = 1; rep <= REPS; rep++) { - for (const c of CONDITIONS) { - i++; - process.stdout.write(` [${i}/${total}] rep=${rep} ${c.label}... `); - const r = await runOne(c, rep); - if (r.ok) { - console.log(`✓ ${r.resp_chars} chars · ${((r.latency_ms ?? 0) / 1000).toFixed(1)}s`); - } else { - console.log(`✗ ${r.error}`); - } - } - } - - console.log(`\n[pass5] complete · started ${startTs}`); - console.log(`[pass5] aggregate: bun run scripts/mode_compare.ts --since ${startTs}`); -} - -main().catch(e => { console.error(e); process.exit(1); }); diff --git a/scripts/overnight_proof.sh b/scripts/overnight_proof.sh deleted file mode 100755 index 0b90949..0000000 --- a/scripts/overnight_proof.sh +++ /dev/null @@ -1,458 +0,0 @@ -#!/bin/bash -# OVERNIGHT PROOF — the test that settles it -# Runs unattended: embed 500K, build indexes, measure recall, -# autonomous agent test, sustained load. ~3 hours total. -# -# Monitor: tail -f /home/profit/lakehouse/logs/overnight_proof.log - -set -uo pipefail - -LOG="/home/profit/lakehouse/logs/overnight_proof.log" -STATE="/tmp/overnight_proof_state" -LOCK="/tmp/overnight_proof.lock" -LH="http://localhost:3100" -GW="http://localhost:3700" - -mkdir -p /home/profit/lakehouse/logs - -if [ -f "$LOCK" ] && kill -0 "$(cat $LOCK)" 2>/dev/null; then - echo "$(date) Already running" >> "$LOG" - exit 0 -fi -echo $$ > "$LOCK" -trap "rm -f $LOCK" EXIT - -log() { echo "$(date '+%H:%M:%S') $1" | tee -a "$LOG"; } - -touch "$STATE" -step=$(cat "$STATE" 2>/dev/null || echo "embed") - -log "═══ OVERNIGHT PROOF: step=$step ═══" - -case "$step" in - -embed) - log "STEP 1/5: Embedding 500K workers through Ollama (~40 min)" - log " This is the real test — actual nomic-embed-text embeddings, not random vectors" - - python3 << 'PYEOF' >> "$LOG" 2>&1 -import json, time, sys -from urllib.request import Request, urlopen -from urllib.error import HTTPError - -LH = "http://localhost:3100" - -def post(path, body, timeout=300): - r = Request(f"{LH}{path}", json.dumps(body).encode(), headers={"Content-Type": "application/json"}) - try: return json.loads(urlopen(r, timeout=timeout).read()) - except HTTPError as e: return {"error": e.read().decode()[:200]} - except Exception as e: return {"error": str(e)} - -# Fetch 500K resume_text for embedding -print("Fetching resume texts from workers_500k...") -r = post("/query/sql", {"sql": "SELECT worker_id, resume_text FROM workers_500k LIMIT 500000"}) -if "error" in r: - print(f"SQL error: {r['error']}") - sys.exit(1) - -rows = r.get("rows", []) -print(f"Got {len(rows)} rows") - -# Build docs for embedding -docs = [] -for row in rows: - wid = row.get("worker_id", "") - text = row.get("resume_text", "") - if text and len(text) > 20: - docs.append({"id": f"W500K-{wid}", "text": text}) - -print(f"{len(docs)} docs ready for embedding") - -# Chunk into batches of 50K to avoid timeout issues -BATCH = 50000 -for batch_start in range(0, len(docs), BATCH): - batch = docs[batch_start:batch_start + BATCH] - batch_num = batch_start // BATCH + 1 - total_batches = (len(docs) + BATCH - 1) // BATCH - idx_name = f"workers_500k_v{batch_num}" - - print(f"\nBatch {batch_num}/{total_batches}: {len(batch)} docs → index '{idx_name}'") - t0 = time.time() - - r = post("/vectors/index", { - "index_name": idx_name, - "source": "workers_500k", - "documents": batch, - "chunk_size": 500, - "overlap": 50, - }, timeout=600) - - if "error" in r: - print(f" Index creation error: {r['error']}") - continue - - job_id = r.get("job_id") - chunks = r.get("chunks", 0) - print(f" Job {job_id}: {chunks} chunks, embedding in background...") - - # Wait for this batch to complete - for _ in range(600): # 50 min max per batch - time.sleep(5) - status = post(f"/vectors/jobs/{job_id}", None) if job_id else {"status": "unknown"} - if isinstance(status, dict): - state = status.get("status", "unknown") - progress = status.get("embedded_chunks", 0) - if state == "completed": - elapsed = time.time() - t0 - rate = chunks / elapsed if elapsed > 0 else 0 - print(f" DONE: {chunks} chunks in {elapsed:.0f}s ({rate:.0f}/sec)") - break - elif state == "failed": - print(f" FAILED: {status.get('error', 'unknown')}") - break - sys.stdout.write(f"\r {state}: {progress}/{chunks} chunks...") - sys.stdout.flush() - print() - -print("\nAll batches submitted. Checking indexes...") -r = post("/vectors/indexes", None) -if not isinstance(r, list): r = [] -for idx in r: - if "500k" in idx.get("index_name", ""): - print(f" {idx['index_name']}: {idx['chunk_count']} chunks") - -print("STEP 1 COMPLETE") -PYEOF - - if grep -q "STEP 1 COMPLETE" "$LOG"; then - echo "build_indexes" > "$STATE" - log "Embedding complete — moving to index build" - else - log "Embedding may still be running — will check on next heartbeat" - echo "check_embed" > "$STATE" - fi - ;; - -check_embed) - log "Checking embedding job status..." - python3 -c " -import json -from urllib.request import urlopen -r = json.loads(urlopen('http://localhost:3100/vectors/jobs', timeout=30).read()) -running = [j for j in r if j.get('status') == 'running'] -completed = [j for j in r if j.get('status') == 'completed' and '500k' in j.get('index_name','')] -print(f'Running: {len(running)}, Completed 500K: {len(completed)}') -if not running: - print('ALL_DONE') -" >> "$LOG" 2>&1 - - if grep -q "ALL_DONE" "$LOG"; then - echo "build_indexes" > "$STATE" - fi - ;; - -build_indexes) - log "STEP 2/5: Building HNSW + Lance on real 500K embeddings" - - python3 << 'PYEOF' >> "$LOG" 2>&1 -import json, time -from urllib.request import Request, urlopen - -LH = "http://localhost:3100" -def post(path, body, timeout=600): - r = Request(f"{LH}{path}", json.dumps(body).encode(), headers={"Content-Type": "application/json"}) - return json.loads(urlopen(r, timeout=timeout).read()) - -# Find the first 500K index -indexes = json.loads(urlopen(f"{LH}/vectors/indexes", timeout=30).read()) -idx_500k = [i for i in indexes if "500k" in i.get("index_name","")] -if not idx_500k: - print("No 500K index found — embedding may not be complete") - exit(1) - -idx_name = idx_500k[0]["index_name"] -chunks = idx_500k[0]["chunk_count"] -print(f"Using index: {idx_name} ({chunks} chunks)") - -# Build HNSW -print(f"Building HNSW on {chunks} real embeddings...") -t0 = time.time() -r = post("/vectors/hnsw/build", {"index_name": idx_name}) -print(f" HNSW: {r.get('vectors',0)} vectors in {time.time()-t0:.0f}s") - -# Migrate to Lance -print(f"Migrating to Lance...") -t0 = time.time() -r = post(f"/vectors/lance/migrate/{idx_name}", {}) -stats = r.get("stats", {}) -print(f" Lance: {stats.get('rows_written',0)} rows in {stats.get('duration_secs',0):.1f}s") - -# Build IVF_PQ on Lance -# sqrt(50K) ≈ 224 partitions for a 50K batch -print(f"Building IVF_PQ on Lance...") -t0 = time.time() -r = post(f"/vectors/lance/index/{idx_name}", {"num_partitions": 224, "num_bits": 8, "num_sub_vectors": 192}) -print(f" IVF_PQ: built in {r.get('build_time_secs',0):.0f}s") - -# Build scalar btree -print(f"Building scalar btree on doc_id...") -r = post(f"/vectors/lance/scalar-index/{idx_name}/doc_id", {}) -print(f" Btree: built in {r.get('build_time_secs',0):.1f}s") - -print("STEP 2 COMPLETE") -PYEOF - - if grep -q "STEP 2 COMPLETE" "$LOG"; then - echo "recall_test" > "$STATE" - fi - ;; - -recall_test) - log "STEP 3/5: Measuring recall on REAL embeddings" - - python3 << 'PYEOF' >> "$LOG" 2>&1 -import json, time -from urllib.request import Request, urlopen - -LH = "http://localhost:3100" -def post(path, body, timeout=300): - r = Request(f"{LH}{path}", json.dumps(body).encode(), headers={"Content-Type": "application/json"}) - return json.loads(urlopen(r, timeout=timeout).read()) - -# Find 500K index -indexes = json.loads(urlopen(f"{LH}/vectors/indexes", timeout=30).read()) -idx_500k = [i for i in indexes if "500k" in i.get("index_name","")] -if not idx_500k: - print("No 500K index — skipping recall") - exit(0) -idx_name = idx_500k[0]["index_name"] - -# Auto-generate eval harness -print(f"Generating eval harness for {idx_name}...") -r = post(f"/vectors/hnsw/evals/{idx_name}_recall/autogen", { - "index_name": idx_name, "sample_count": 50, "k": 10, -}) -print(f" Harness: {len(r.get('queries',[]))} queries, k={r.get('k',10)}") - -# HNSW recall -print("Measuring HNSW recall...") -r = post("/vectors/hnsw/trial", { - "index_name": idx_name, - "harness": f"{idx_name}_recall", - "config": {"ef_construction": 80, "ef_search": 30, "seed": 42}, -}) -print(f" HNSW recall@10: {r.get('metrics',{}).get('recall_at_k',0):.4f}") -print(f" HNSW p50: {r.get('metrics',{}).get('search_latency_p50_us',0):.0f}us") - -# Lance recall -print("Measuring Lance IVF_PQ recall...") -r = post(f"/vectors/lance/recall/{idx_name}", { - "harness": f"{idx_name}_recall", "top_k": 10, -}) -print(f" Lance recall@10: {r.get('mean_recall',0):.4f}") -print(f" Lance p50: {r.get('latency_p50_us',0):.0f}us") - -print("STEP 3 COMPLETE") -PYEOF - - if grep -q "STEP 3 COMPLETE" "$LOG"; then - echo "autonomous_test" > "$STATE" - fi - ;; - -autonomous_test) - log "STEP 4/5: 100 staffing questions — LOCAL MODEL ONLY, no human steering" - - python3 << 'PYEOF' >> "$LOG" 2>&1 -import json, time, random -from urllib.request import Request, urlopen -from urllib.error import HTTPError - -GW = "http://localhost:3700" -LH = "http://localhost:3100" -random.seed(2026) - -def gw(path, body=None, timeout=180): - data = json.dumps(body).encode() if body else None - method = "POST" if body else "GET" - r = Request(f"{GW}{path}", data=data, method=method, headers={"Content-Type":"application/json"} if body else {}) - try: return json.loads(urlopen(r, timeout=timeout).read()) - except HTTPError as e: return {"error": e.read().decode()[:200]} - except Exception as e: return {"error": str(e)} - -def sql(query): - r = gw("/sql", {"sql": query}) - return r.get("rows", []) if "error" not in r else [] - -ROLES = ["Forklift Operator","Machine Operator","Assembler","Loader","Quality Tech", - "Welder","Sanitation Worker","Shipping Clerk","Production Worker","Maintenance Tech"] -STATES = ["IL","IN","OH","MO","TN","KY","WI","MI"] - -print("═══ 100 AUTONOMOUS OPERATIONS ═══") -passed = 0 -failed = 0 -total_ms = 0 - -# Mix of operation types -for i in range(100): - op_type = random.choices(["match","count","aggregate","lookup"], weights=[50,25,15,10])[0] - role = random.choice(ROLES) - state = random.choice(STATES) - rel = round(random.uniform(0.6, 0.9), 2) - - t0 = time.time() - ok = False - detail = "" - - if op_type == "match": - r = gw("/search", { - "question": f"Find {role} workers in {state}", - "sql_filter": f"role = '{role}' AND state = '{state}' AND CAST(reliability AS DOUBLE) >= {rel}", - "dataset": "workers_500k", "top_k": 5, "generate": False, - }) - matched = len(r.get("sources", [])) - ok = matched > 0 or r.get("sql_matches", 0) == 0 # 0 matches is ok if SQL found 0 - detail = f"match: {matched} results (sql={r.get('sql_matches',0)})" - - elif op_type == "count": - truth = sql(f"SELECT COUNT(*) cnt FROM workers_500k WHERE role = '{role}' AND state = '{state}'") - expected = truth[0]["cnt"] if truth else 0 - # Use keyword classifier logic: count → SQL - r = gw("/sql", {"sql": f"SELECT COUNT(*) cnt FROM workers_500k WHERE role = '{role}' AND state = '{state}'"}) - got = r.get("rows", [{}])[0].get("cnt", -1) if "error" not in r else -1 - ok = got == expected - detail = f"count: got={got} expected={expected}" - - elif op_type == "aggregate": - r = gw("/sql", {"sql": f"SELECT ROUND(AVG(CAST(reliability AS DOUBLE)),3) avg FROM workers_500k WHERE role = '{role}' AND state = '{state}'"}) - ok = "error" not in r and r.get("rows") - detail = f"aggregate: {r.get('rows',[{}])[0] if ok else r.get('error','?')[:40]}" - - elif op_type == "lookup": - wid = random.randint(1, 500000) - r = gw(f"/worker/{wid}") - ok = r.get("rows") and len(r["rows"]) > 0 - detail = f"lookup: worker {wid} {'found' if ok else 'not found'}" - - ms = (time.time()-t0)*1000 - total_ms += ms - if ok: passed += 1 - else: failed += 1 - - if i % 20 == 0 or not ok: - icon = "OK" if ok else "FAIL" - print(f" [{i+1:3d}/100] {icon} {op_type:10s} {detail[:50]:50s} ({ms:.0f}ms)") - -pct = passed / 100 * 100 -print(f"\n═══ RESULT: {passed}/100 passed ({pct:.0f}%) in {total_ms/1000:.1f}s ═══") -print(f" avg latency: {total_ms/100:.0f}ms per operation") - -# Log to playbook -gw("/log", { - "operation": f"autonomous_100: {passed}/100 ({pct:.0f}%)", - "approach": "keyword routing + SQL + hybrid, local model only", - "result": f"passed={passed} failed={failed} avg_ms={total_ms/100:.0f}", - "context": "overnight proof step 4", -}) - -if pct >= 90: - print("STEP 4 COMPLETE — AUTONOMOUS TEST PASSED") -else: - print(f"STEP 4 COMPLETE — {pct:.0f}% (below 90% target)") -PYEOF - - echo "sustained_load" > "$STATE" - ;; - -sustained_load) - log "STEP 5/5: Sustained load — 30 minutes of continuous operations" - - python3 << 'PYEOF' >> "$LOG" 2>&1 -import json, time, random, concurrent.futures -from urllib.request import Request, urlopen -from urllib.error import HTTPError - -GW = "http://localhost:3700" -random.seed(42) - -def gw(path, body=None): - data = json.dumps(body).encode() if body else None - r = Request(f"{GW}{path}", data=data, method="POST" if body else "GET", - headers={"Content-Type":"application/json"} if body else {}) - try: return json.loads(urlopen(r, timeout=60).read()) - except: return {"error": "timeout"} - -ROLES = ["Forklift Operator","Machine Operator","Assembler","Loader","Quality Tech"] -STATES = ["IL","IN","OH","MO"] - -print("═══ SUSTAINED LOAD: 30 minutes ═══") -duration = 30 * 60 # 30 minutes -t_start = time.time() -ops = 0 -errors = 0 -cycle = 0 - -while time.time() - t_start < duration: - cycle += 1 - batch_ops = 0 - batch_errors = 0 - - # Fire 10 concurrent operations - with concurrent.futures.ThreadPoolExecutor(max_workers=10) as pool: - futures = [] - for _ in range(10): - role = random.choice(ROLES) - state = random.choice(STATES) - futures.append(pool.submit(gw, "/sql", { - "sql": f"SELECT COUNT(*) FROM workers_500k WHERE role = '{role}' AND state = '{state}'" - })) - for f in concurrent.futures.as_completed(futures): - r = f.result() - batch_ops += 1 - if "error" in r: batch_errors += 1 - - ops += batch_ops - errors += batch_errors - elapsed = time.time() - t_start - remaining = duration - elapsed - - if cycle % 30 == 0: # Log every ~30 cycles - rate = ops / elapsed - print(f" {elapsed/60:.0f}min: {ops} ops ({rate:.0f}/sec) errors={errors} remaining={remaining/60:.0f}min") - - time.sleep(1) # 1 sec between batches - -elapsed = time.time() - t_start -rate = ops / elapsed -print(f"\n═══ SUSTAINED LOAD COMPLETE ═══") -print(f" Duration: {elapsed/60:.1f} minutes") -print(f" Operations: {ops}") -print(f" Rate: {rate:.0f} ops/sec") -print(f" Errors: {errors} ({100*errors/max(ops,1):.1f}%)") -print(f" STEP 5 COMPLETE") -PYEOF - - echo "report" > "$STATE" - ;; - -report) - log "" - log "═══════════════════════════════════════════════════════" - log " OVERNIGHT PROOF — COMPLETE" - log "═══════════════════════════════════════════════════════" - log " Step 1: 500K real embeddings via Ollama" - log " Step 2: HNSW + Lance indexes on real data" - log " Step 3: Recall measured on real embeddings" - log " Step 4: 100 autonomous operations (no human)" - log " Step 5: 30 min sustained concurrent load" - log "" - log " Full log: $LOG" - log "═══════════════════════════════════════════════════════" - echo "done" > "$STATE" - ;; - -done) - log "Overnight proof already complete." - ;; - -esac