Audit pipeline PR #9: determinism + fact extraction + verifier gate + KB stats #9
@ -51,11 +51,20 @@ const WEAK_PATTERNS: RegExp[] = [
|
||||
|
||||
// Empirical claims: runtime measurements / observed outcomes that can't
|
||||
// be verified from a diff (only from the actual run that produced
|
||||
// them). Example: "6/6 iterations complete, 58 cloud calls, 306s
|
||||
// end-to-end" — true, but only the test's own summary.json can
|
||||
// confirm it. Classifying as empirical lets the inference check skip
|
||||
// them). Classifying as empirical lets the inference check skip
|
||||
// diff-verification and saves the ladder for falsifiable claims.
|
||||
//
|
||||
// Two classes share this bucket because they share the skip discipline:
|
||||
//
|
||||
// 1. Runtime metrics — "58 cloud calls", "306s end-to-end"
|
||||
// 2. History/proof refs — "verified on PR #8", "was flipping across runs"
|
||||
//
|
||||
// Both are assertions about state outside the current diff. The cloud
|
||||
// would flag them as "not backed" — but that's a false positive: the
|
||||
// proof lives in the referenced run, prior commit, or test output, not
|
||||
// in the added lines the cloud is reading.
|
||||
const EMPIRICAL_PATTERNS: RegExp[] = [
|
||||
// ─── Runtime metrics ───
|
||||
// Iteration / attempt counts: "6/6 iterations", "attempt 5", "accepted on attempt 3"
|
||||
/\b\d+\s*\/\s*\d+\s+(iterations?|attempts?|cycles?|runs?|shards?)\b/i,
|
||||
/\b(accepted|resolved|converged)\s+on\s+attempt\s+\d+\b/i,
|
||||
@ -66,6 +75,27 @@ const EMPIRICAL_PATTERNS: RegExp[] = [
|
||||
// "escalated through N tiers", "N distinct models"
|
||||
/\bescalated\s+through\s+\d+\b/i,
|
||||
/\b\d+\s+distinct\s+(model|tier)s?\b/i,
|
||||
|
||||
// ─── History / proof references ───
|
||||
// "verified on PR #8", "verified end-to-end on PR 8", "tested against PR #4"
|
||||
/\bverified\s+(?:end[- ]to[- ]end\s+)?(?:on|against|in)\s+(?:PR|commit|prior|the\s+\w+\s+audit)\s*#?\w*/i,
|
||||
/\btested\s+(?:against|in|on)\s+(?:PR|commit|prior)\s*#?\w*/i,
|
||||
// Direct PR/commit references: "PR #8", "on PR 9", "from commit abc123"
|
||||
/\b(?:on|from|in|via|per)\s+PR\s*#?\d+\b/i,
|
||||
/\b(?:from|in|per|against)\s+commit\s+[0-9a-f]{6,}/i,
|
||||
// Observational descriptions of prior behavior: "was flipping", "was X before", "previously observed"
|
||||
/\b(?:was|were)\s+(?:flipping|drifting|inconsistent|non[- ]deterministic|creeping)\b/i,
|
||||
/\bpreviously\s+(?:observed|flagged|reported|seen|landed)\b/i,
|
||||
/\bused\s+to\s+(?:flip|fail|flag|reject|block)\b/i,
|
||||
/\bobserved\s+(?:in|during|on|across)\s+(?:PR|prior|\d+\s+(?:runs?|audits?))/i,
|
||||
// "flipping/drifting across N runs" — historical variance description
|
||||
/\b(?:flipping|drifting|varying|oscillating)\s+across\s+(?:\d+\s+)?(?:runs?|audits?|iterations?)\b/i,
|
||||
// "the proven X" referring to prior work (proven is a STRONG pattern
|
||||
// but in context "the proven FOO" is usually a historical reference,
|
||||
// not a fresh claim). We catch it here so the empirical skip wins.
|
||||
/\bthe\s+proven\s+(?:escalation\s+ladder|pipeline|flow|loop|tier|path)/i,
|
||||
// "from the 9-run test", "across the 5-run validation"
|
||||
/\b(?:from|across|in|during)\s+the\s+\d+[- ]run\s+(?:test|validation|probe|experiment)/i,
|
||||
];
|
||||
|
||||
export interface ParsedClaims {
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user