From b25e36881c332f3eeedd06eac048e2e55ebc0391 Mon Sep 17 00:00:00 2001 From: profit Date: Wed, 22 Apr 2026 23:53:07 -0500 Subject: [PATCH] claim_parser: history/proof claims join empirical class MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit PR #9's 4 block findings were all from commit message references to prior work ("on PR #8", "the proven X", "flipping across N runs"). The cloud reviewer correctly said "the current diff does not prove that", but the claim was never about the current diff — the proof lives in the referenced prior PR or test run. Extended EMPIRICAL_PATTERNS to cover two shared classes: 1. Runtime metrics (existing) — "58 cloud calls", "306s elapsed" 2. History/proof refs (new) — "verified on PR #8", "was flipping across 9 runs", "the proven escalation ladder", "previously observed in PR #6", "tested against commit abc1234" Both skip diff-verification for the same reason: the proof is outside the diff. Folded into the existing bucket rather than adding a new strength tier — the skip discipline is identical so there's no value in splitting them. Unit-tested on PR #9's actual failing lines: all 5 historical claims now classify as empirical; fresh claims like "Phase 45 shipped" stay strong; pure implementation descriptions ("implements deterministic classification") still don't match (expected — they're not claims, they're restatements). --- auditor/claim_parser.ts | 36 +++++++++++++++++++++++++++++++++--- 1 file changed, 33 insertions(+), 3 deletions(-) diff --git a/auditor/claim_parser.ts b/auditor/claim_parser.ts index 0b00663..8f05fa5 100644 --- a/auditor/claim_parser.ts +++ b/auditor/claim_parser.ts @@ -51,11 +51,20 @@ const WEAK_PATTERNS: RegExp[] = [ // Empirical claims: runtime measurements / observed outcomes that can't // be verified from a diff (only from the actual run that produced -// them). Example: "6/6 iterations complete, 58 cloud calls, 306s -// end-to-end" — true, but only the test's own summary.json can -// confirm it. Classifying as empirical lets the inference check skip +// them). Classifying as empirical lets the inference check skip // diff-verification and saves the ladder for falsifiable claims. +// +// Two classes share this bucket because they share the skip discipline: +// +// 1. Runtime metrics — "58 cloud calls", "306s end-to-end" +// 2. History/proof refs — "verified on PR #8", "was flipping across runs" +// +// Both are assertions about state outside the current diff. The cloud +// would flag them as "not backed" — but that's a false positive: the +// proof lives in the referenced run, prior commit, or test output, not +// in the added lines the cloud is reading. const EMPIRICAL_PATTERNS: RegExp[] = [ + // ─── Runtime metrics ─── // Iteration / attempt counts: "6/6 iterations", "attempt 5", "accepted on attempt 3" /\b\d+\s*\/\s*\d+\s+(iterations?|attempts?|cycles?|runs?|shards?)\b/i, /\b(accepted|resolved|converged)\s+on\s+attempt\s+\d+\b/i, @@ -66,6 +75,27 @@ const EMPIRICAL_PATTERNS: RegExp[] = [ // "escalated through N tiers", "N distinct models" /\bescalated\s+through\s+\d+\b/i, /\b\d+\s+distinct\s+(model|tier)s?\b/i, + + // ─── History / proof references ─── + // "verified on PR #8", "verified end-to-end on PR 8", "tested against PR #4" + /\bverified\s+(?:end[- ]to[- ]end\s+)?(?:on|against|in)\s+(?:PR|commit|prior|the\s+\w+\s+audit)\s*#?\w*/i, + /\btested\s+(?:against|in|on)\s+(?:PR|commit|prior)\s*#?\w*/i, + // Direct PR/commit references: "PR #8", "on PR 9", "from commit abc123" + /\b(?:on|from|in|via|per)\s+PR\s*#?\d+\b/i, + /\b(?:from|in|per|against)\s+commit\s+[0-9a-f]{6,}/i, + // Observational descriptions of prior behavior: "was flipping", "was X before", "previously observed" + /\b(?:was|were)\s+(?:flipping|drifting|inconsistent|non[- ]deterministic|creeping)\b/i, + /\bpreviously\s+(?:observed|flagged|reported|seen|landed)\b/i, + /\bused\s+to\s+(?:flip|fail|flag|reject|block)\b/i, + /\bobserved\s+(?:in|during|on|across)\s+(?:PR|prior|\d+\s+(?:runs?|audits?))/i, + // "flipping/drifting across N runs" — historical variance description + /\b(?:flipping|drifting|varying|oscillating)\s+across\s+(?:\d+\s+)?(?:runs?|audits?|iterations?)\b/i, + // "the proven X" referring to prior work (proven is a STRONG pattern + // but in context "the proven FOO" is usually a historical reference, + // not a fresh claim). We catch it here so the empirical skip wins. + /\bthe\s+proven\s+(?:escalation\s+ladder|pipeline|flow|loop|tier|path)/i, + // "from the 9-run test", "across the 5-run validation" + /\b(?:from|across|in|during)\s+the\s+\d+[- ]run\s+(?:test|validation|probe|experiment)/i, ]; export interface ParsedClaims {