Audit pipeline PR #9: determinism + fact extraction + verifier gate + KB stats #9

Merged
profit merged 34 commits from test/enrich-prd-pipeline into main 2026-04-23 05:29:39 +00:00
Showing only changes of commit 47f1ca73e7 - Show all commits

View File

@ -112,19 +112,22 @@ export async function runInferenceCheck(claims: Claim[], diff: string): Promise<
{ role: "system", content: systemMsg },
{ role: "user", content: userMsg },
],
// Deterministic classification mode — temp=0 is greedy-sample,
// so identical input → identical output on the same model
// version. think=false disables the reasoning trace that was
// letting variable prose leak into the classification output
// and inflate the audit_lessons signature set (observed as
// sig_count creep across the 9-run empirical test).
// Deterministic classification — temp=0 is greedy-sample, so
// identical input yields identical output on the same model
// version. This kills the signature creep we observed in the
// 9-run empirical test (sig_count 16→27 from cloud phrasing
// variance at temp=0.2).
//
// max_tokens tightened to 1500 — the structured JSON response
// fits comfortably in 1500 tokens for typical PRs (~7 claims);
// the old 3000 just gave the model room to wander.
max_tokens: 1500,
// IMPORTANT: keep think=true. gpt-oss:120b is a reasoning
// model; setting think=false caused it to return empty content
// on large prompts (observed during Level 1 validation: 13421
// tokens used, empty content returned). The reasoning trace is
// variable prose, but at temp=0 the FINAL classification is
// still deterministic because greedy sampling converges to
// the same conclusion from the same starting state.
max_tokens: 3000,
temperature: 0,
think: false,
think: true,
}),
signal: AbortSignal.timeout(CALL_TIMEOUT_MS),
});