Audit pipeline PR #9: determinism + fact extraction + verifier gate + KB stats #9
@ -78,8 +78,11 @@ const EMPIRICAL_PATTERNS: RegExp[] = [
|
||||
|
||||
// ─── History / proof references ───
|
||||
// "verified on PR #8", "verified end-to-end on PR 8", "tested against PR #4"
|
||||
/\bverified\s+(?:end[- ]to[- ]end\s+)?(?:on|against|in)\s+(?:PR|commit|prior|the\s+\w+\s+audit)\s*#?\w*/i,
|
||||
/\btested\s+(?:against|in|on)\s+(?:PR|commit|prior)\s*#?\w*/i,
|
||||
// Require PR#N / commit-hash / "prior <word>" to avoid matching
|
||||
// "verified ... in production" (PR without \b-ish anchor previously
|
||||
// consumed "pr" of "production").
|
||||
/\bverified\s+(?:end[- ]to[- ]end\s+)?(?:on|against|in)\s+(?:PR\s*#?\d+|commit\s+[0-9a-f]{6,}|prior\s+\w+|the\s+\w+\s+audit)\b/i,
|
||||
/\btested\s+(?:against|in|on)\s+(?:PR\s*#?\d+|commit\s+[0-9a-f]{6,}|prior\s+\w+)\b/i,
|
||||
// Direct PR/commit references: "PR #8", "on PR 9", "from commit abc123"
|
||||
/\b(?:on|from|in|via|per)\s+PR\s*#?\d+\b/i,
|
||||
/\b(?:from|in|per|against)\s+commit\s+[0-9a-f]{6,}/i,
|
||||
@ -131,7 +134,7 @@ function scanText(text: string, location_prefix: string, commit_sha: string, out
|
||||
// classify it as empirical so the inference check doesn't ask
|
||||
// the cloud to prove "58 cloud calls" from the diff. Order:
|
||||
// empirical → strong → moderate → weak.
|
||||
const empirical = firstMatch(line, EMPIRICAL_PATTERNS);
|
||||
const empirical = firstUnquotedMatch(line, EMPIRICAL_PATTERNS);
|
||||
if (empirical) {
|
||||
out.push({
|
||||
text: line.trim().slice(0, 200),
|
||||
@ -141,7 +144,7 @@ function scanText(text: string, location_prefix: string, commit_sha: string, out
|
||||
});
|
||||
continue;
|
||||
}
|
||||
const strong = firstMatch(line, STRONG_PATTERNS);
|
||||
const strong = firstUnquotedMatch(line, STRONG_PATTERNS);
|
||||
if (strong) {
|
||||
out.push({
|
||||
text: line.trim().slice(0, 200),
|
||||
@ -151,7 +154,7 @@ function scanText(text: string, location_prefix: string, commit_sha: string, out
|
||||
});
|
||||
continue;
|
||||
}
|
||||
const moderate = firstMatch(line, MODERATE_PATTERNS);
|
||||
const moderate = firstUnquotedMatch(line, MODERATE_PATTERNS);
|
||||
if (moderate) {
|
||||
out.push({
|
||||
text: line.trim().slice(0, 200),
|
||||
@ -161,7 +164,7 @@ function scanText(text: string, location_prefix: string, commit_sha: string, out
|
||||
});
|
||||
continue;
|
||||
}
|
||||
const weak = firstMatch(line, WEAK_PATTERNS);
|
||||
const weak = firstUnquotedMatch(line, WEAK_PATTERNS);
|
||||
if (weak) {
|
||||
out.push({
|
||||
text: line.trim().slice(0, 200),
|
||||
@ -173,9 +176,35 @@ function scanText(text: string, location_prefix: string, commit_sha: string, out
|
||||
}
|
||||
}
|
||||
|
||||
function firstMatch(text: string, patterns: RegExp[]): RegExp | null {
|
||||
// Match a pattern only when its match position is NOT inside a quoted
|
||||
// string on the line. Mirrors the same guard in auditor/checks/static.ts
|
||||
// — the two files have the same false-positive class: PR authors
|
||||
// quote pattern examples in commit message bodies (e.g. `"Phase 45
|
||||
// shipped"` as a test example) and without this guard those quoted
|
||||
// references get flagged as fresh ship-claims. Only skips when the
|
||||
// match itself falls inside quotes; real (unquoted) uses of the same
|
||||
// vocabulary still classify correctly.
|
||||
function firstUnquotedMatch(text: string, patterns: RegExp[]): RegExp | null {
|
||||
for (const p of patterns) {
|
||||
if (p.test(text)) return p;
|
||||
const m = text.match(p);
|
||||
if (!m || typeof m.index !== "number") continue;
|
||||
if (isInsideQuotedString(text, m.index)) continue;
|
||||
return p;
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
// Walks left→right toggling in-quote state on each unescaped quote.
|
||||
// Good enough for single-line claims; multi-line strings aren't parsed.
|
||||
function isInsideQuotedString(line: string, pos: number): boolean {
|
||||
let inDouble = false, inSingle = false, inBacktick = false;
|
||||
for (let i = 0; i < pos; i++) {
|
||||
const c = line[i];
|
||||
const esc = i > 0 && line[i - 1] === "\\";
|
||||
if (esc) continue;
|
||||
if (c === '"' && !inSingle && !inBacktick) inDouble = !inDouble;
|
||||
else if (c === "'" && !inDouble && !inBacktick) inSingle = !inSingle;
|
||||
else if (c === "`" && !inDouble && !inSingle) inBacktick = !inBacktick;
|
||||
}
|
||||
return inDouble || inSingle || inBacktick;
|
||||
}
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user