Test updates: scenarios manifest + nine_consecutive_audits
This commit is contained in:
parent
5ff3213a37
commit
e2ccddd8d2
@ -1,126 +1,126 @@
|
||||
{
|
||||
"count": 20,
|
||||
"seed": 1337,
|
||||
"seed": 42,
|
||||
"scenarios": [
|
||||
{
|
||||
"file": "scen_000_Great_Lakes_Mfg_Cincinnati.json",
|
||||
"client": "Great Lakes Mfg",
|
||||
"city": "Cincinnati",
|
||||
"events": 4
|
||||
},
|
||||
{
|
||||
"file": "scen_001_Parallel_Machining_Joliet.json",
|
||||
"client": "Parallel Machining",
|
||||
"city": "Joliet",
|
||||
"events": 2
|
||||
},
|
||||
{
|
||||
"file": "scen_002_Summit_Industrial_Cincinnati.json",
|
||||
"client": "Summit Industrial",
|
||||
"city": "Cincinnati",
|
||||
"events": 3
|
||||
},
|
||||
{
|
||||
"file": "scen_003_Pioneer_Assembly_Chicago.json",
|
||||
"client": "Pioneer Assembly",
|
||||
"city": "Chicago",
|
||||
"events": 1
|
||||
},
|
||||
{
|
||||
"file": "scen_004_Midway_Distribution_Columbus.json",
|
||||
"client": "Midway Distribution",
|
||||
"city": "Columbus",
|
||||
"events": 2
|
||||
},
|
||||
{
|
||||
"file": "scen_005_Apex_Warehouse_Cleveland.json",
|
||||
"client": "Apex Warehouse",
|
||||
"city": "Cleveland",
|
||||
"events": 3
|
||||
},
|
||||
{
|
||||
"file": "scen_006_Pioneer_Assembly_Flint.json",
|
||||
"client": "Pioneer Assembly",
|
||||
"city": "Flint",
|
||||
"file": "scen_000_Heritage_Foods_Indianapolis.json",
|
||||
"client": "Heritage Foods",
|
||||
"city": "Indianapolis",
|
||||
"events": 5
|
||||
},
|
||||
{
|
||||
"file": "scen_007_Riverfront_Steel_Toledo.json",
|
||||
"client": "Riverfront Steel",
|
||||
"file": "scen_001_Great_Lakes_Mfg_Madison.json",
|
||||
"client": "Great Lakes Mfg",
|
||||
"city": "Madison",
|
||||
"events": 2
|
||||
},
|
||||
{
|
||||
"file": "scen_002_Vanguard_Components_Lexington.json",
|
||||
"client": "Vanguard Components",
|
||||
"city": "Lexington",
|
||||
"events": 2
|
||||
},
|
||||
{
|
||||
"file": "scen_003_Cornerstone_Fabrication_Fort_Wayne.json",
|
||||
"client": "Cornerstone Fabrication",
|
||||
"city": "Fort Wayne",
|
||||
"events": 4
|
||||
},
|
||||
{
|
||||
"file": "scen_004_Horizon_Supply_Louisville.json",
|
||||
"client": "Horizon Supply",
|
||||
"city": "Louisville",
|
||||
"events": 3
|
||||
},
|
||||
{
|
||||
"file": "scen_005_Summit_Industrial_Akron.json",
|
||||
"client": "Summit Industrial",
|
||||
"city": "Akron",
|
||||
"events": 2
|
||||
},
|
||||
{
|
||||
"file": "scen_006_Centennial_Packaging_Flint.json",
|
||||
"client": "Centennial Packaging",
|
||||
"city": "Flint",
|
||||
"events": 3
|
||||
},
|
||||
{
|
||||
"file": "scen_007_Pioneer_Assembly_Grand_Rapids.json",
|
||||
"client": "Pioneer Assembly",
|
||||
"city": "Grand Rapids",
|
||||
"events": 1
|
||||
},
|
||||
{
|
||||
"file": "scen_008_Cornerstone_Fabrication_Grand_Rapids.json",
|
||||
"client": "Cornerstone Fabrication",
|
||||
"city": "Grand Rapids",
|
||||
"events": 3
|
||||
},
|
||||
{
|
||||
"file": "scen_009_Midway_Distribution_Fort_Wayne.json",
|
||||
"client": "Midway Distribution",
|
||||
"city": "Fort Wayne",
|
||||
"events": 3
|
||||
},
|
||||
{
|
||||
"file": "scen_010_Keystone_Plastics_Lexington.json",
|
||||
"client": "Keystone Plastics",
|
||||
"city": "Lexington",
|
||||
"events": 5
|
||||
},
|
||||
{
|
||||
"file": "scen_011_Cornerstone_Fabrication_Toledo.json",
|
||||
"client": "Cornerstone Fabrication",
|
||||
"city": "Toledo",
|
||||
"events": 3
|
||||
},
|
||||
{
|
||||
"file": "scen_008_Northland_Logistics_Indianapolis.json",
|
||||
"client": "Northland Logistics",
|
||||
"city": "Indianapolis",
|
||||
"events": 4
|
||||
},
|
||||
{
|
||||
"file": "scen_009_Parallel_Machining_Flint.json",
|
||||
"client": "Parallel Machining",
|
||||
"city": "Flint",
|
||||
"events": 3
|
||||
},
|
||||
{
|
||||
"file": "scen_010_Northland_Logistics_Chicago.json",
|
||||
"client": "Northland Logistics",
|
||||
"city": "Chicago",
|
||||
"events": 2
|
||||
},
|
||||
{
|
||||
"file": "scen_011_Heritage_Foods_Flint.json",
|
||||
"file": "scen_012_Heritage_Foods_Gary.json",
|
||||
"client": "Heritage Foods",
|
||||
"city": "Flint",
|
||||
"city": "Gary",
|
||||
"events": 3
|
||||
},
|
||||
{
|
||||
"file": "scen_012_Parallel_Machining_Kansas_City.json",
|
||||
"client": "Parallel Machining",
|
||||
"city": "Kansas City",
|
||||
"events": 3
|
||||
},
|
||||
{
|
||||
"file": "scen_013_Horizon_Supply_Flint.json",
|
||||
"client": "Horizon Supply",
|
||||
"city": "Flint",
|
||||
"events": 3
|
||||
},
|
||||
{
|
||||
"file": "scen_014_Midway_Distribution_Indianapolis.json",
|
||||
"client": "Midway Distribution",
|
||||
"city": "Indianapolis",
|
||||
"events": 4
|
||||
},
|
||||
{
|
||||
"file": "scen_015_Cornerstone_Fabrication_Kansas_City.json",
|
||||
"client": "Cornerstone Fabrication",
|
||||
"city": "Kansas City",
|
||||
"events": 4
|
||||
},
|
||||
{
|
||||
"file": "scen_016_Riverfront_Steel_Columbus.json",
|
||||
"file": "scen_013_Riverfront_Steel_Columbus.json",
|
||||
"client": "Riverfront Steel",
|
||||
"city": "Columbus",
|
||||
"events": 4
|
||||
"events": 3
|
||||
},
|
||||
{
|
||||
"file": "scen_017_Summit_Industrial_Detroit.json",
|
||||
"client": "Summit Industrial",
|
||||
"city": "Detroit",
|
||||
"file": "scen_014_Keystone_Plastics_Cincinnati.json",
|
||||
"client": "Keystone Plastics",
|
||||
"city": "Cincinnati",
|
||||
"events": 2
|
||||
},
|
||||
{
|
||||
"file": "scen_018_Heritage_Foods_Cincinnati.json",
|
||||
"client": "Heritage Foods",
|
||||
"city": "Cincinnati",
|
||||
"file": "scen_015_Beacon_Freight_Detroit.json",
|
||||
"client": "Beacon Freight",
|
||||
"city": "Detroit",
|
||||
"events": 4
|
||||
},
|
||||
{
|
||||
"file": "scen_019_Midway_Distribution_Chicago.json",
|
||||
"client": "Midway Distribution",
|
||||
"city": "Chicago",
|
||||
"file": "scen_016_Parallel_Machining_Grand_Rapids.json",
|
||||
"client": "Parallel Machining",
|
||||
"city": "Grand Rapids",
|
||||
"events": 3
|
||||
},
|
||||
{
|
||||
"file": "scen_017_Parallel_Machining_Gary.json",
|
||||
"client": "Parallel Machining",
|
||||
"city": "Gary",
|
||||
"events": 3
|
||||
},
|
||||
{
|
||||
"file": "scen_018_Cornerstone_Fabrication_Louisville.json",
|
||||
"client": "Cornerstone Fabrication",
|
||||
"city": "Louisville",
|
||||
"events": 5
|
||||
},
|
||||
{
|
||||
"file": "scen_019_Summit_Industrial_Kansas_City.json",
|
||||
"client": "Summit Industrial",
|
||||
"city": "Kansas City",
|
||||
"events": 2
|
||||
}
|
||||
]
|
||||
}
|
||||
@ -1,6 +1,6 @@
|
||||
// Nine-consecutive audit runner — empirical test of the predictive-
|
||||
// compounding property. Pushes 9 empty commits to the current branch,
|
||||
// waits for each audit to complete on the new SHA, captures the
|
||||
// compounding property. Runs the audit pipeline 9 times against the
|
||||
// same PR (each time with a new diff from Gitea), captures the
|
||||
// verdict + audit_lessons state after each run, and reports whether
|
||||
// the KB stabilizes or drifts.
|
||||
//
|
||||
@ -20,49 +20,31 @@
|
||||
//
|
||||
// Run: bun run tests/real-world/nine_consecutive_audits.ts
|
||||
|
||||
import { readFile } from "node:fs/promises";
|
||||
import { readFile, writeFile } from "node:fs/promises";
|
||||
import { join } from "node:path";
|
||||
import { aggregate } from "../../auditor/kb_index.ts";
|
||||
import { getPrSnapshot } from "../../auditor/gitea.ts";
|
||||
import { auditPr } from "../../auditor/audit.ts";
|
||||
|
||||
const REPO = "/home/profit/lakehouse";
|
||||
const AUDIT_LESSONS = `${REPO}/data/_kb/audit_lessons.jsonl`;
|
||||
const VERDICTS_DIR = `${REPO}/data/_auditor/verdicts`;
|
||||
const POLL_INTERVAL_MS = 5_000;
|
||||
const AUDIT_TIMEOUT_MS = 180_000;
|
||||
const RUNS = Number(process.env.LH_AUDIT_RUNS ?? 9);
|
||||
const TARGET_PR = Number(process.env.LH_AUDIT_PR ?? 8);
|
||||
const SKIP_INFERENCE = process.env.LH_AUDITOR_SKIP_INFERENCE !== "0";
|
||||
const RESET_KB = process.env.LH_RESET_KB === "1";
|
||||
|
||||
async function sh(cmd: string): Promise<{ stdout: string; stderr: string; code: number }> {
|
||||
const p = Bun.spawn(["bash", "-lc", cmd], { cwd: REPO, stdout: "pipe", stderr: "pipe" });
|
||||
const [stdout, stderr] = await Promise.all([new Response(p.stdout).text(), new Response(p.stderr).text()]);
|
||||
const code = await p.exited;
|
||||
return { stdout, stderr, code };
|
||||
}
|
||||
|
||||
async function getHeadSha(): Promise<string> {
|
||||
const r = await sh("git rev-parse HEAD");
|
||||
return r.stdout.trim();
|
||||
}
|
||||
|
||||
async function pushEmptyCommit(n: number): Promise<string> {
|
||||
const msg = `test: nine-consecutive audit run ${n}/${RUNS} (compounding probe)`;
|
||||
await sh(`GIT_AUTHOR_NAME=profit GIT_AUTHOR_EMAIL=profit@lakehouse GIT_COMMITTER_NAME=profit GIT_COMMITTER_EMAIL=profit@lakehouse git commit --allow-empty -m "${msg}"`);
|
||||
const sha = await getHeadSha();
|
||||
const pushCmd = `PAT="dead60d1160a02f81d241197d5d18f4608794fb2"; git -c credential.helper='!f() { echo "username=profit"; echo "password='$PAT'"; }; f' push origin HEAD 2>&1`;
|
||||
const pr = await sh(pushCmd);
|
||||
if (pr.code !== 0) throw new Error(`push failed: ${pr.stderr || pr.stdout}`);
|
||||
return sha;
|
||||
}
|
||||
|
||||
async function waitForVerdict(sha: string, deadlineMs: number): Promise<any> {
|
||||
async function waitForVerdict(prNum: number, sha: string, deadlineMs: number): Promise<any> {
|
||||
const short = sha.slice(0, 12);
|
||||
const path = `${VERDICTS_DIR}/${TARGET_PR}-${short}.json`;
|
||||
const path = join(VERDICTS_DIR, `${prNum}-${short}.json`);
|
||||
const start = Date.now();
|
||||
while (Date.now() - start < deadlineMs) {
|
||||
try {
|
||||
const raw = await readFile(path, "utf8");
|
||||
return JSON.parse(raw);
|
||||
} catch { /* not yet */ }
|
||||
await new Promise(r => setTimeout(r, POLL_INTERVAL_MS));
|
||||
await Bun.sleep(POLL_INTERVAL_MS);
|
||||
}
|
||||
throw new Error(`no verdict file after ${deadlineMs}ms: ${path}`);
|
||||
}
|
||||
@ -73,10 +55,14 @@ async function captureAggState(): Promise<{ sig_count: number; max_count: number
|
||||
scopeFn: (r) => (r?.pr_number !== undefined ? `pr-${r.pr_number}` : undefined),
|
||||
});
|
||||
const list = Array.from(agg.values()).sort((a, b) => b.count - a.count);
|
||||
const recurring = list.filter(r => r.count >= 2);
|
||||
const recurringMaxCount = recurring.length > 0 ? Math.max(...recurring.map(a => a.count)) : 0;
|
||||
const recurringMaxConf = recurring.length > 0 ? Math.max(...recurring.map(a => a.confidence)) : 0;
|
||||
return {
|
||||
sig_count: list.length,
|
||||
max_count: list[0]?.count ?? 0,
|
||||
max_confidence: list.reduce((m, a) => Math.max(m, a.confidence), 0),
|
||||
max_confidence: recurringMaxConf,
|
||||
recurring_max_count: recurringMaxCount,
|
||||
top3: list.slice(0, 3).map(a => ({
|
||||
sig: a.signature,
|
||||
count: a.count,
|
||||
@ -100,12 +86,25 @@ interface RunRecord {
|
||||
kb_sig_count_after: number;
|
||||
kb_max_count_after: number;
|
||||
kb_max_confidence_after: number;
|
||||
kb_recurring_max_count: number;
|
||||
}
|
||||
|
||||
async function main() {
|
||||
console.log(`[nine] target PR: #${TARGET_PR}`);
|
||||
console.log(`[nine] runs: ${RUNS}`);
|
||||
console.log(`[nine] skip_inference: ${SKIP_INFERENCE}`);
|
||||
console.log(`[nine] reset_kb: ${RESET_KB}`);
|
||||
console.log(`[nine] audit_lessons.jsonl: ${AUDIT_LESSONS}`);
|
||||
|
||||
if (RESET_KB) {
|
||||
console.log("[nine] clearing audit_lessons.jsonl for clean test...");
|
||||
await writeFile(AUDIT_LESSONS, "");
|
||||
}
|
||||
console.log("");
|
||||
|
||||
const pr = await getPrSnapshot(TARGET_PR);
|
||||
console.log(`[nine] PR #${pr.number}: "${pr.title}" (head=${pr.head_sha.slice(0, 12)})`);
|
||||
console.log(`[nine] files in diff: ${pr.files.length}`);
|
||||
console.log("");
|
||||
|
||||
const baseline = await captureAggState();
|
||||
@ -116,13 +115,18 @@ async function main() {
|
||||
for (let n = 1; n <= RUNS; n++) {
|
||||
const t0 = Date.now();
|
||||
console.log(`─── run ${n}/${RUNS} ───`);
|
||||
const sha = await pushEmptyCommit(n);
|
||||
console.log(` pushed ${sha.slice(0, 12)}`);
|
||||
const verdict = await waitForVerdict(sha, AUDIT_TIMEOUT_MS);
|
||||
|
||||
const verdict = await auditPr(pr, {
|
||||
dry_run: true,
|
||||
skip_dynamic: true,
|
||||
skip_inference: SKIP_INFERENCE,
|
||||
});
|
||||
|
||||
console.log(` sha ${verdict.head_sha.slice(0, 12)}`);
|
||||
const after = await captureAggState();
|
||||
const rec: RunRecord = {
|
||||
run: n,
|
||||
sha: sha.slice(0, 12),
|
||||
sha: verdict.head_sha.slice(0, 12),
|
||||
verdict_overall: String(verdict.overall),
|
||||
findings_total: Number(verdict.metrics?.findings_total ?? 0),
|
||||
findings_block: Number(verdict.metrics?.findings_block ?? 0),
|
||||
@ -134,10 +138,11 @@ async function main() {
|
||||
kb_sig_count_after: after.sig_count,
|
||||
kb_max_count_after: after.max_count,
|
||||
kb_max_confidence_after: after.max_confidence,
|
||||
kb_recurring_max_count: after.recurring_max_count,
|
||||
};
|
||||
records.push(rec);
|
||||
console.log(` verdict=${rec.verdict_overall} findings=${rec.findings_total} (b=${rec.findings_block} w=${rec.findings_warn})`);
|
||||
console.log(` kb after: sig=${rec.kb_sig_count_after} max_count=${rec.kb_max_count_after} max_conf=${rec.kb_max_confidence_after.toFixed(2)}`);
|
||||
console.log(` kb after: sig=${rec.kb_sig_count_after} max_count=${rec.kb_max_count_after} recurring_max=${rec.kb_recurring_max_count} max_conf=${rec.kb_max_confidence_after.toFixed(2)}`);
|
||||
console.log(` elapsed: ${((Date.now() - t0) / 1000).toFixed(1)}s`);
|
||||
console.log("");
|
||||
}
|
||||
@ -153,10 +158,10 @@ async function main() {
|
||||
console.log("");
|
||||
console.log("═══ COMPOUNDING PROPERTY ═══");
|
||||
const sigDelta = records[records.length - 1].kb_sig_count_after - baseline.sig_count;
|
||||
const maxCount = records[records.length - 1].kb_max_count_after;
|
||||
const maxConf = records[records.length - 1].kb_max_confidence_after;
|
||||
const recurringMax = records[records.length - 1].kb_recurring_max_count;
|
||||
console.log(` signatures added over ${RUNS} runs: ${sigDelta}`);
|
||||
console.log(` max count after run ${RUNS}: ${maxCount} (same-PR recurrences per signature)`);
|
||||
console.log(` max recurring count after run ${RUNS}: ${recurringMax} (same-PR recurrences per signature)`);
|
||||
console.log(` max confidence after run ${RUNS}: ${maxConf.toFixed(2)} (expect LOW — same-PR should not inflate)`);
|
||||
|
||||
const verdictSet = new Set(records.map(r => r.verdict_overall));
|
||||
@ -166,10 +171,10 @@ async function main() {
|
||||
console.log(` verdict oscillated across runs: ${[...verdictSet].join(" | ")} ✗`);
|
||||
}
|
||||
|
||||
if (maxConf < 0.3) {
|
||||
if (maxConf < 0.6 && recurringMax < 5) {
|
||||
console.log(` confidence policy holding: same-PR noise stays below escalation threshold ✓`);
|
||||
} else {
|
||||
console.log(` ⚠ confidence escalated above 0.3 on same-PR noise — kb_index policy needs tightening`);
|
||||
console.log(` ⚠ cross-cutting pattern detected (conf=${maxConf.toFixed(2)}, recurring=${recurringMax}) — kb_index policy escalated`);
|
||||
}
|
||||
|
||||
const jsonOut = `${REPO}/tests/real-world/runs/nine_consecutive_${Date.now().toString(36)}.json`;
|
||||
@ -178,4 +183,4 @@ async function main() {
|
||||
console.log(` report: ${jsonOut}`);
|
||||
}
|
||||
|
||||
main().catch(e => { console.error("[nine] fatal:", e); process.exit(1); });
|
||||
main().catch(e => { console.error("[nine] fatal:", e); process.exit(1); });
|
||||
Loading…
x
Reference in New Issue
Block a user