2026-04-22 09:13:35 +00:00
1 changed files with 119 additions and 0 deletions
--- a/auditor/claim_parser.ts
+++ b/auditor/claim_parser.ts
@ -0,0 +1,119 @@
+// Claim parser — reads commit messages + PR body, extracts ship-claims.
+//
+// A "ship-claim" is any phrase that asserts functionality is working,
+// tested, complete, or landed. These are the assertions the downstream
+// checks (static/dynamic/inference/kb) try to falsify.
+//
+// Heuristic approach (regex + strength grading) — intentionally NOT
+// using an LLM here. Reason: the inference check already asks a cloud
+// model "does this match the claim?". The parser's job is to surface
+// the claim substrates, not judge them. Over-engineering the parser
+// risks false-negatives when the cloud model was going to catch it
+// anyway.
+
+import type { Claim, PrSnapshot } from "./types.ts";
+
+// Strong claims: explicit end-to-end + verification vocabulary
+const STRONG_PATTERNS: RegExp[] = [
+  /\bverified\s+(end[- ]to[- ]end|live|in\s+production|against)\b/i,
+  /\btested\s+(live|end[- ]to[- ]end|against|with)\b/i,
+  /\bworks\s+(end[- ]to[- ]end|live|in\s+production)\b/i,
+  /\bproduction[- ]ready\b/i,
+  /\bfully\s+(functional|wired|working)\b/i,
+  /\bphase\s+\d+(\.\d+)?\s+(shipped|complete|done|landed)\b/i,
+  /\bground\s+truth\b/i,
+  /\bproven\b/i,
+];
+
+// Moderate claims: asserted completion or pass but without the strong
+// verification qualifier.
+const MODERATE_PATTERNS: RegExp[] = [
+  /\bshipped\b/i,
+  /\blanded\b/i,
+  /\bgreen\b/i,
+  /\b(tests?\s+)?pass(ing|ed)\b/i,
+  /\bcomplet(e|ed)\b/i,
+  /\bdone\b/i,
+  /\bwired\b/i,
+  /\bfixed\b/i,
+  /\bworks\b/i,
+];
+
+// Weak claims: aspirational or hedged. Usually low-risk but recorded
+// for completeness.
+const WEAK_PATTERNS: RegExp[] = [
+  /\bshould\s+work\b/i,
+  /\bexpected\s+to\b/i,
+  /\bintended\s+to\b/i,
+  /\bwill\s+(work|handle|support)\b/i,
+  /\bprobably\b/i,
+];
+
+export interface ParsedClaims {
+  claims: Claim[];
+  commits_scanned: number;
+}
+
+export function parseClaims(pr: PrSnapshot): ParsedClaims {
+  const claims: Claim[] = [];
+
+  // PR body — every matching line becomes a claim at location "pr_body:N"
+  if (pr.body) {
+    scanText(pr.body, "pr_body", pr.head_sha, claims);
+  }
+
+  // Each commit message gets its own scan.
+  for (const c of pr.commits) {
+    if (!c.message) continue;
+    scanText(c.message, `commit:${c.sha.slice(0, 8)}`, c.sha, claims);
+  }
+
+  return { claims, commits_scanned: pr.commits.length };
+}
+
+function scanText(text: string, location_prefix: string, commit_sha: string, out: Claim[]): void {
+  const lines = text.split(/\r?\n/);
+  for (let i = 0; i < lines.length; i++) {
+    const line = lines[i];
+    if (line.length < 3) continue;
+
+    // Strong patterns first — if a line matches strong, it's strong,
+    // don't double-count as moderate.
+    const strong = firstMatch(line, STRONG_PATTERNS);
+    if (strong) {
+      out.push({
+        text: line.trim().slice(0, 200),
+        commit_sha,
+        location: `${location_prefix}:${i + 1}`,
+        strength: "strong",
+      });
+      continue;
+    }
+    const moderate = firstMatch(line, MODERATE_PATTERNS);
+    if (moderate) {
+      out.push({
+        text: line.trim().slice(0, 200),
+        commit_sha,
+        location: `${location_prefix}:${i + 1}`,
+        strength: "moderate",
+      });
+      continue;
+    }
+    const weak = firstMatch(line, WEAK_PATTERNS);
+    if (weak) {
+      out.push({
+        text: line.trim().slice(0, 200),
+        commit_sha,
+        location: `${location_prefix}:${i + 1}`,
+        strength: "weak",
+      });
+    }
+  }
+}
+
+function firstMatch(text: string, patterns: RegExp[]): RegExp | null {
+  for (const p of patterns) {
+    if (p.test(text)) return p;
+  }
+  return null;
+}