From e2ccddd8d219a67e1ba6cd1672374bb778dc17e9 Mon Sep 17 00:00:00 2001 From: root Date: Thu, 23 Apr 2026 01:57:44 -0500 Subject: [PATCH] Test updates: scenarios manifest + nine_consecutive_audits --- tests/multi-agent/scenarios/manifest.json | 194 ++++++++++---------- tests/real-world/nine_consecutive_audits.ts | 85 +++++---- 2 files changed, 142 insertions(+), 137 deletions(-) diff --git a/tests/multi-agent/scenarios/manifest.json b/tests/multi-agent/scenarios/manifest.json index 4575cdc..520de5f 100644 --- a/tests/multi-agent/scenarios/manifest.json +++ b/tests/multi-agent/scenarios/manifest.json @@ -1,126 +1,126 @@ { "count": 20, - "seed": 1337, + "seed": 42, "scenarios": [ { - "file": "scen_000_Great_Lakes_Mfg_Cincinnati.json", - "client": "Great Lakes Mfg", - "city": "Cincinnati", - "events": 4 - }, - { - "file": "scen_001_Parallel_Machining_Joliet.json", - "client": "Parallel Machining", - "city": "Joliet", - "events": 2 - }, - { - "file": "scen_002_Summit_Industrial_Cincinnati.json", - "client": "Summit Industrial", - "city": "Cincinnati", - "events": 3 - }, - { - "file": "scen_003_Pioneer_Assembly_Chicago.json", - "client": "Pioneer Assembly", - "city": "Chicago", - "events": 1 - }, - { - "file": "scen_004_Midway_Distribution_Columbus.json", - "client": "Midway Distribution", - "city": "Columbus", - "events": 2 - }, - { - "file": "scen_005_Apex_Warehouse_Cleveland.json", - "client": "Apex Warehouse", - "city": "Cleveland", - "events": 3 - }, - { - "file": "scen_006_Pioneer_Assembly_Flint.json", - "client": "Pioneer Assembly", - "city": "Flint", + "file": "scen_000_Heritage_Foods_Indianapolis.json", + "client": "Heritage Foods", + "city": "Indianapolis", "events": 5 }, { - "file": "scen_007_Riverfront_Steel_Toledo.json", - "client": "Riverfront Steel", + "file": "scen_001_Great_Lakes_Mfg_Madison.json", + "client": "Great Lakes Mfg", + "city": "Madison", + "events": 2 + }, + { + "file": "scen_002_Vanguard_Components_Lexington.json", + "client": "Vanguard Components", + "city": "Lexington", + "events": 2 + }, + { + "file": "scen_003_Cornerstone_Fabrication_Fort_Wayne.json", + "client": "Cornerstone Fabrication", + "city": "Fort Wayne", + "events": 4 + }, + { + "file": "scen_004_Horizon_Supply_Louisville.json", + "client": "Horizon Supply", + "city": "Louisville", + "events": 3 + }, + { + "file": "scen_005_Summit_Industrial_Akron.json", + "client": "Summit Industrial", + "city": "Akron", + "events": 2 + }, + { + "file": "scen_006_Centennial_Packaging_Flint.json", + "client": "Centennial Packaging", + "city": "Flint", + "events": 3 + }, + { + "file": "scen_007_Pioneer_Assembly_Grand_Rapids.json", + "client": "Pioneer Assembly", + "city": "Grand Rapids", + "events": 1 + }, + { + "file": "scen_008_Cornerstone_Fabrication_Grand_Rapids.json", + "client": "Cornerstone Fabrication", + "city": "Grand Rapids", + "events": 3 + }, + { + "file": "scen_009_Midway_Distribution_Fort_Wayne.json", + "client": "Midway Distribution", + "city": "Fort Wayne", + "events": 3 + }, + { + "file": "scen_010_Keystone_Plastics_Lexington.json", + "client": "Keystone Plastics", + "city": "Lexington", + "events": 5 + }, + { + "file": "scen_011_Cornerstone_Fabrication_Toledo.json", + "client": "Cornerstone Fabrication", "city": "Toledo", "events": 3 }, { - "file": "scen_008_Northland_Logistics_Indianapolis.json", - "client": "Northland Logistics", - "city": "Indianapolis", - "events": 4 - }, - { - "file": "scen_009_Parallel_Machining_Flint.json", - "client": "Parallel Machining", - "city": "Flint", - "events": 3 - }, - { - "file": "scen_010_Northland_Logistics_Chicago.json", - "client": "Northland Logistics", - "city": "Chicago", - "events": 2 - }, - { - "file": "scen_011_Heritage_Foods_Flint.json", + "file": "scen_012_Heritage_Foods_Gary.json", "client": "Heritage Foods", - "city": "Flint", + "city": "Gary", "events": 3 }, { - "file": "scen_012_Parallel_Machining_Kansas_City.json", - "client": "Parallel Machining", - "city": "Kansas City", - "events": 3 - }, - { - "file": "scen_013_Horizon_Supply_Flint.json", - "client": "Horizon Supply", - "city": "Flint", - "events": 3 - }, - { - "file": "scen_014_Midway_Distribution_Indianapolis.json", - "client": "Midway Distribution", - "city": "Indianapolis", - "events": 4 - }, - { - "file": "scen_015_Cornerstone_Fabrication_Kansas_City.json", - "client": "Cornerstone Fabrication", - "city": "Kansas City", - "events": 4 - }, - { - "file": "scen_016_Riverfront_Steel_Columbus.json", + "file": "scen_013_Riverfront_Steel_Columbus.json", "client": "Riverfront Steel", "city": "Columbus", - "events": 4 + "events": 3 }, { - "file": "scen_017_Summit_Industrial_Detroit.json", - "client": "Summit Industrial", - "city": "Detroit", + "file": "scen_014_Keystone_Plastics_Cincinnati.json", + "client": "Keystone Plastics", + "city": "Cincinnati", "events": 2 }, { - "file": "scen_018_Heritage_Foods_Cincinnati.json", - "client": "Heritage Foods", - "city": "Cincinnati", + "file": "scen_015_Beacon_Freight_Detroit.json", + "client": "Beacon Freight", + "city": "Detroit", "events": 4 }, { - "file": "scen_019_Midway_Distribution_Chicago.json", - "client": "Midway Distribution", - "city": "Chicago", + "file": "scen_016_Parallel_Machining_Grand_Rapids.json", + "client": "Parallel Machining", + "city": "Grand Rapids", "events": 3 + }, + { + "file": "scen_017_Parallel_Machining_Gary.json", + "client": "Parallel Machining", + "city": "Gary", + "events": 3 + }, + { + "file": "scen_018_Cornerstone_Fabrication_Louisville.json", + "client": "Cornerstone Fabrication", + "city": "Louisville", + "events": 5 + }, + { + "file": "scen_019_Summit_Industrial_Kansas_City.json", + "client": "Summit Industrial", + "city": "Kansas City", + "events": 2 } ] } \ No newline at end of file diff --git a/tests/real-world/nine_consecutive_audits.ts b/tests/real-world/nine_consecutive_audits.ts index 21255d0..c204c07 100644 --- a/tests/real-world/nine_consecutive_audits.ts +++ b/tests/real-world/nine_consecutive_audits.ts @@ -1,6 +1,6 @@ // Nine-consecutive audit runner — empirical test of the predictive- -// compounding property. Pushes 9 empty commits to the current branch, -// waits for each audit to complete on the new SHA, captures the +// compounding property. Runs the audit pipeline 9 times against the +// same PR (each time with a new diff from Gitea), captures the // verdict + audit_lessons state after each run, and reports whether // the KB stabilizes or drifts. // @@ -20,49 +20,31 @@ // // Run: bun run tests/real-world/nine_consecutive_audits.ts -import { readFile } from "node:fs/promises"; +import { readFile, writeFile } from "node:fs/promises"; +import { join } from "node:path"; import { aggregate } from "../../auditor/kb_index.ts"; +import { getPrSnapshot } from "../../auditor/gitea.ts"; +import { auditPr } from "../../auditor/audit.ts"; const REPO = "/home/profit/lakehouse"; const AUDIT_LESSONS = `${REPO}/data/_kb/audit_lessons.jsonl`; const VERDICTS_DIR = `${REPO}/data/_auditor/verdicts`; const POLL_INTERVAL_MS = 5_000; -const AUDIT_TIMEOUT_MS = 180_000; const RUNS = Number(process.env.LH_AUDIT_RUNS ?? 9); const TARGET_PR = Number(process.env.LH_AUDIT_PR ?? 8); +const SKIP_INFERENCE = process.env.LH_AUDITOR_SKIP_INFERENCE !== "0"; +const RESET_KB = process.env.LH_RESET_KB === "1"; -async function sh(cmd: string): Promise<{ stdout: string; stderr: string; code: number }> { - const p = Bun.spawn(["bash", "-lc", cmd], { cwd: REPO, stdout: "pipe", stderr: "pipe" }); - const [stdout, stderr] = await Promise.all([new Response(p.stdout).text(), new Response(p.stderr).text()]); - const code = await p.exited; - return { stdout, stderr, code }; -} - -async function getHeadSha(): Promise { - const r = await sh("git rev-parse HEAD"); - return r.stdout.trim(); -} - -async function pushEmptyCommit(n: number): Promise { - const msg = `test: nine-consecutive audit run ${n}/${RUNS} (compounding probe)`; - await sh(`GIT_AUTHOR_NAME=profit GIT_AUTHOR_EMAIL=profit@lakehouse GIT_COMMITTER_NAME=profit GIT_COMMITTER_EMAIL=profit@lakehouse git commit --allow-empty -m "${msg}"`); - const sha = await getHeadSha(); - const pushCmd = `PAT="dead60d1160a02f81d241197d5d18f4608794fb2"; git -c credential.helper='!f() { echo "username=profit"; echo "password='$PAT'"; }; f' push origin HEAD 2>&1`; - const pr = await sh(pushCmd); - if (pr.code !== 0) throw new Error(`push failed: ${pr.stderr || pr.stdout}`); - return sha; -} - -async function waitForVerdict(sha: string, deadlineMs: number): Promise { +async function waitForVerdict(prNum: number, sha: string, deadlineMs: number): Promise { const short = sha.slice(0, 12); - const path = `${VERDICTS_DIR}/${TARGET_PR}-${short}.json`; + const path = join(VERDICTS_DIR, `${prNum}-${short}.json`); const start = Date.now(); while (Date.now() - start < deadlineMs) { try { const raw = await readFile(path, "utf8"); return JSON.parse(raw); } catch { /* not yet */ } - await new Promise(r => setTimeout(r, POLL_INTERVAL_MS)); + await Bun.sleep(POLL_INTERVAL_MS); } throw new Error(`no verdict file after ${deadlineMs}ms: ${path}`); } @@ -73,10 +55,14 @@ async function captureAggState(): Promise<{ sig_count: number; max_count: number scopeFn: (r) => (r?.pr_number !== undefined ? `pr-${r.pr_number}` : undefined), }); const list = Array.from(agg.values()).sort((a, b) => b.count - a.count); + const recurring = list.filter(r => r.count >= 2); + const recurringMaxCount = recurring.length > 0 ? Math.max(...recurring.map(a => a.count)) : 0; + const recurringMaxConf = recurring.length > 0 ? Math.max(...recurring.map(a => a.confidence)) : 0; return { sig_count: list.length, max_count: list[0]?.count ?? 0, - max_confidence: list.reduce((m, a) => Math.max(m, a.confidence), 0), + max_confidence: recurringMaxConf, + recurring_max_count: recurringMaxCount, top3: list.slice(0, 3).map(a => ({ sig: a.signature, count: a.count, @@ -100,12 +86,25 @@ interface RunRecord { kb_sig_count_after: number; kb_max_count_after: number; kb_max_confidence_after: number; + kb_recurring_max_count: number; } async function main() { console.log(`[nine] target PR: #${TARGET_PR}`); console.log(`[nine] runs: ${RUNS}`); + console.log(`[nine] skip_inference: ${SKIP_INFERENCE}`); + console.log(`[nine] reset_kb: ${RESET_KB}`); console.log(`[nine] audit_lessons.jsonl: ${AUDIT_LESSONS}`); + + if (RESET_KB) { + console.log("[nine] clearing audit_lessons.jsonl for clean test..."); + await writeFile(AUDIT_LESSONS, ""); + } + console.log(""); + + const pr = await getPrSnapshot(TARGET_PR); + console.log(`[nine] PR #${pr.number}: "${pr.title}" (head=${pr.head_sha.slice(0, 12)})`); + console.log(`[nine] files in diff: ${pr.files.length}`); console.log(""); const baseline = await captureAggState(); @@ -116,13 +115,18 @@ async function main() { for (let n = 1; n <= RUNS; n++) { const t0 = Date.now(); console.log(`─── run ${n}/${RUNS} ───`); - const sha = await pushEmptyCommit(n); - console.log(` pushed ${sha.slice(0, 12)}`); - const verdict = await waitForVerdict(sha, AUDIT_TIMEOUT_MS); + + const verdict = await auditPr(pr, { + dry_run: true, + skip_dynamic: true, + skip_inference: SKIP_INFERENCE, + }); + + console.log(` sha ${verdict.head_sha.slice(0, 12)}`); const after = await captureAggState(); const rec: RunRecord = { run: n, - sha: sha.slice(0, 12), + sha: verdict.head_sha.slice(0, 12), verdict_overall: String(verdict.overall), findings_total: Number(verdict.metrics?.findings_total ?? 0), findings_block: Number(verdict.metrics?.findings_block ?? 0), @@ -134,10 +138,11 @@ async function main() { kb_sig_count_after: after.sig_count, kb_max_count_after: after.max_count, kb_max_confidence_after: after.max_confidence, + kb_recurring_max_count: after.recurring_max_count, }; records.push(rec); console.log(` verdict=${rec.verdict_overall} findings=${rec.findings_total} (b=${rec.findings_block} w=${rec.findings_warn})`); - console.log(` kb after: sig=${rec.kb_sig_count_after} max_count=${rec.kb_max_count_after} max_conf=${rec.kb_max_confidence_after.toFixed(2)}`); + console.log(` kb after: sig=${rec.kb_sig_count_after} max_count=${rec.kb_max_count_after} recurring_max=${rec.kb_recurring_max_count} max_conf=${rec.kb_max_confidence_after.toFixed(2)}`); console.log(` elapsed: ${((Date.now() - t0) / 1000).toFixed(1)}s`); console.log(""); } @@ -153,10 +158,10 @@ async function main() { console.log(""); console.log("═══ COMPOUNDING PROPERTY ═══"); const sigDelta = records[records.length - 1].kb_sig_count_after - baseline.sig_count; - const maxCount = records[records.length - 1].kb_max_count_after; const maxConf = records[records.length - 1].kb_max_confidence_after; + const recurringMax = records[records.length - 1].kb_recurring_max_count; console.log(` signatures added over ${RUNS} runs: ${sigDelta}`); - console.log(` max count after run ${RUNS}: ${maxCount} (same-PR recurrences per signature)`); + console.log(` max recurring count after run ${RUNS}: ${recurringMax} (same-PR recurrences per signature)`); console.log(` max confidence after run ${RUNS}: ${maxConf.toFixed(2)} (expect LOW — same-PR should not inflate)`); const verdictSet = new Set(records.map(r => r.verdict_overall)); @@ -166,10 +171,10 @@ async function main() { console.log(` verdict oscillated across runs: ${[...verdictSet].join(" | ")} ✗`); } - if (maxConf < 0.3) { + if (maxConf < 0.6 && recurringMax < 5) { console.log(` confidence policy holding: same-PR noise stays below escalation threshold ✓`); } else { - console.log(` ⚠ confidence escalated above 0.3 on same-PR noise — kb_index policy needs tightening`); + console.log(` ⚠ cross-cutting pattern detected (conf=${maxConf.toFixed(2)}, recurring=${recurringMax}) — kb_index policy escalated`); } const jsonOut = `${REPO}/tests/real-world/runs/nine_consecutive_${Date.now().toString(36)}.json`; @@ -178,4 +183,4 @@ async function main() { console.log(` report: ${jsonOut}`); } -main().catch(e => { console.error("[nine] fatal:", e); process.exit(1); }); +main().catch(e => { console.error("[nine] fatal:", e); process.exit(1); }); \ No newline at end of file