// Claim parser — reads commit messages + PR body, extracts ship-claims. // // A "ship-claim" is any phrase that asserts functionality is working, // tested, complete, or landed. These are the assertions the downstream // checks (static/dynamic/inference/kb) try to falsify. // // Heuristic approach (regex + strength grading) — intentionally NOT // using an LLM here. Reason: the inference check already asks a cloud // model "does this match the claim?". The parser's job is to surface // the claim substrates, not judge them. Over-engineering the parser // risks false-negatives when the cloud model was going to catch it // anyway. import type { Claim, PrSnapshot } from "./types.ts"; // Strong claims: explicit end-to-end + verification vocabulary const STRONG_PATTERNS: RegExp[] = [ /\bverified\s+(end[- ]to[- ]end|live|in\s+production|against)\b/i, /\btested\s+(live|end[- ]to[- ]end|against|with)\b/i, /\bworks\s+(end[- ]to[- ]end|live|in\s+production)\b/i, /\bproduction[- ]ready\b/i, /\bfully\s+(functional|wired|working)\b/i, /\bphase\s+\d+(\.\d+)?\s+(shipped|complete|done|landed)\b/i, /\bground\s+truth\b/i, /\bproven\b/i, ]; // Moderate claims: asserted completion or pass but without the strong // verification qualifier. const MODERATE_PATTERNS: RegExp[] = [ /\bshipped\b/i, /\blanded\b/i, /\bgreen\b/i, /\b(tests?\s+)?pass(ing|ed)\b/i, /\bcomplet(e|ed)\b/i, /\bdone\b/i, /\bwired\b/i, /\bfixed\b/i, /\bworks\b/i, ]; // Weak claims: aspirational or hedged. Usually low-risk but recorded // for completeness. const WEAK_PATTERNS: RegExp[] = [ /\bshould\s+work\b/i, /\bexpected\s+to\b/i, /\bintended\s+to\b/i, /\bwill\s+(work|handle|support)\b/i, /\bprobably\b/i, ]; // Empirical claims: runtime measurements / observed outcomes that can't // be verified from a diff (only from the actual run that produced // them). Classifying as empirical lets the inference check skip // diff-verification and saves the ladder for falsifiable claims. // // Two classes share this bucket because they share the skip discipline: // // 1. Runtime metrics — "58 cloud calls", "306s end-to-end" // 2. History/proof refs — "verified on PR #8", "was flipping across runs" // // Both are assertions about state outside the current diff. The cloud // would flag them as "not backed" — but that's a false positive: the // proof lives in the referenced run, prior commit, or test output, not // in the added lines the cloud is reading. const EMPIRICAL_PATTERNS: RegExp[] = [ // ─── Runtime metrics ─── // Iteration / attempt counts: "6/6 iterations", "attempt 5", "accepted on attempt 3" /\b\d+\s*\/\s*\d+\s+(iterations?|attempts?|cycles?|runs?|shards?)\b/i, /\b(accepted|resolved|converged)\s+on\s+attempt\s+\d+\b/i, // Runtime metrics: "58 cloud calls", "306s end-to-end", "245s total", "5931 chars" /\b\d+\s+(cloud\s+)?calls?\b/i, /\b\d+\s*(ms|s|seconds?|minutes?|m)\s+(end[- ]to[- ]end|total|elapsed|duration)\b/i, /\b\d+\s+chars?\b.*\b(accepted|generated|produced)\b/i, // "escalated through N tiers", "N distinct models" /\bescalated\s+through\s+\d+\b/i, /\b\d+\s+distinct\s+(model|tier)s?\b/i, // ─── History / proof references ─── // "verified on PR #8", "verified end-to-end on PR 8", "tested against PR #4" /\bverified\s+(?:end[- ]to[- ]end\s+)?(?:on|against|in)\s+(?:PR|commit|prior|the\s+\w+\s+audit)\s*#?\w*/i, /\btested\s+(?:against|in|on)\s+(?:PR|commit|prior)\s*#?\w*/i, // Direct PR/commit references: "PR #8", "on PR 9", "from commit abc123" /\b(?:on|from|in|via|per)\s+PR\s*#?\d+\b/i, /\b(?:from|in|per|against)\s+commit\s+[0-9a-f]{6,}/i, // Observational descriptions of prior behavior: "was flipping", "was X before", "previously observed" /\b(?:was|were)\s+(?:flipping|drifting|inconsistent|non[- ]deterministic|creeping)\b/i, /\bpreviously\s+(?:observed|flagged|reported|seen|landed)\b/i, /\bused\s+to\s+(?:flip|fail|flag|reject|block)\b/i, /\bobserved\s+(?:in|during|on|across)\s+(?:PR|prior|\d+\s+(?:runs?|audits?))/i, // "flipping/drifting across N runs" — historical variance description /\b(?:flipping|drifting|varying|oscillating)\s+across\s+(?:\d+\s+)?(?:runs?|audits?|iterations?)\b/i, // "the proven X" referring to prior work (proven is a STRONG pattern // but in context "the proven FOO" is usually a historical reference, // not a fresh claim). We catch it here so the empirical skip wins. /\bthe\s+proven\s+(?:escalation\s+ladder|pipeline|flow|loop|tier|path)/i, // "from the 9-run test", "across the 5-run validation" /\b(?:from|across|in|during)\s+the\s+\d+[- ]run\s+(?:test|validation|probe|experiment)/i, ]; export interface ParsedClaims { claims: Claim[]; commits_scanned: number; } export function parseClaims(pr: PrSnapshot): ParsedClaims { const claims: Claim[] = []; // PR body — every matching line becomes a claim at location "pr_body:N" if (pr.body) { scanText(pr.body, "pr_body", pr.head_sha, claims); } // Each commit message gets its own scan. for (const c of pr.commits) { if (!c.message) continue; scanText(c.message, `commit:${c.sha.slice(0, 8)}`, c.sha, claims); } return { claims, commits_scanned: pr.commits.length }; } function scanText(text: string, location_prefix: string, commit_sha: string, out: Claim[]): void { const lines = text.split(/\r?\n/); for (let i = 0; i < lines.length; i++) { const line = lines[i]; if (line.length < 3) continue; // Empirical match wins over everything else — if a line ALSO // contains a moderate word like "complete", we still want to // classify it as empirical so the inference check doesn't ask // the cloud to prove "58 cloud calls" from the diff. Order: // empirical → strong → moderate → weak. const empirical = firstMatch(line, EMPIRICAL_PATTERNS); if (empirical) { out.push({ text: line.trim().slice(0, 200), commit_sha, location: `${location_prefix}:${i + 1}`, strength: "empirical", }); continue; } const strong = firstMatch(line, STRONG_PATTERNS); if (strong) { out.push({ text: line.trim().slice(0, 200), commit_sha, location: `${location_prefix}:${i + 1}`, strength: "strong", }); continue; } const moderate = firstMatch(line, MODERATE_PATTERNS); if (moderate) { out.push({ text: line.trim().slice(0, 200), commit_sha, location: `${location_prefix}:${i + 1}`, strength: "moderate", }); continue; } const weak = firstMatch(line, WEAK_PATTERNS); if (weak) { out.push({ text: line.trim().slice(0, 200), commit_sha, location: `${location_prefix}:${i + 1}`, strength: "weak", }); } } } function firstMatch(text: string, patterns: RegExp[]): RegExp | null { for (const p of patterns) { if (p.test(text)) return p; } return null; }