From 1e00eb4472d12abcebe56cc26704ffd74535e67a Mon Sep 17 00:00:00 2001 From: profit Date: Wed, 22 Apr 2026 22:09:35 -0500 Subject: [PATCH] =?UTF-8?q?auditor:=20inference=20temp=3D0,=20think=3Dfals?= =?UTF-8?q?e=20=E2=80=94=20kill=20signature=20creep?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 9-run empirical test showed 20 of 27 audit_lessons signatures were singletons (count=1) — the cloud producing slightly-different summary phrasings for the SAME underlying claim on each audit, each hashing to a fresh signature. That's the creep J flagged — not explosive, but steady ~2 new sigs per run, unbounded over hundreds of runs. Root cause: temperature=0.2 + think=true was letting variable prose leak into the classification output. Fix: temp=0 (greedy sample → identical input yields identical output on same model version), think=false (no reasoning trace variance), max_tokens 3000→1500 (tighter bound prevents tail wander). The compounding policy itself was validated by the 9 runs: - 7 recurring claims (the legitimate signals) all at conf 0.08-0.20 - ratingSeverity() correctly held them at info (below 0.3 threshold) - cross-PR signal test separately confirmed conf=1.00 → sev=block Also: LH_AUDIT_RUNS env so the test can validate with smaller N. --- auditor/checks/inference.ts | 16 +++++++++++++--- tests/real-world/nine_consecutive_audits.ts | 2 +- 2 files changed, 14 insertions(+), 4 deletions(-) diff --git a/auditor/checks/inference.ts b/auditor/checks/inference.ts index d3a0bde..5cddbc7 100644 --- a/auditor/checks/inference.ts +++ b/auditor/checks/inference.ts @@ -112,9 +112,19 @@ export async function runInferenceCheck(claims: Claim[], diff: string): Promise< { role: "system", content: systemMsg }, { role: "user", content: userMsg }, ], - max_tokens: 3000, - temperature: 0.2, - think: true, // T3 overseer should reason — JSON shape is still required + // Deterministic classification mode — temp=0 is greedy-sample, + // so identical input → identical output on the same model + // version. think=false disables the reasoning trace that was + // letting variable prose leak into the classification output + // and inflate the audit_lessons signature set (observed as + // sig_count creep across the 9-run empirical test). + // + // max_tokens tightened to 1500 — the structured JSON response + // fits comfortably in 1500 tokens for typical PRs (~7 claims); + // the old 3000 just gave the model room to wander. + max_tokens: 1500, + temperature: 0, + think: false, }), signal: AbortSignal.timeout(CALL_TIMEOUT_MS), }); diff --git a/tests/real-world/nine_consecutive_audits.ts b/tests/real-world/nine_consecutive_audits.ts index 999a9de..21255d0 100644 --- a/tests/real-world/nine_consecutive_audits.ts +++ b/tests/real-world/nine_consecutive_audits.ts @@ -28,7 +28,7 @@ const AUDIT_LESSONS = `${REPO}/data/_kb/audit_lessons.jsonl`; const VERDICTS_DIR = `${REPO}/data/_auditor/verdicts`; const POLL_INTERVAL_MS = 5_000; const AUDIT_TIMEOUT_MS = 180_000; -const RUNS = 9; +const RUNS = Number(process.env.LH_AUDIT_RUNS ?? 9); const TARGET_PR = Number(process.env.LH_AUDIT_PR ?? 8); async function sh(cmd: string): Promise<{ stdout: string; stderr: string; code: number }> {