From b933334ae238e7eb08bc063d0f489a95bf72983e Mon Sep 17 00:00:00 2001 From: profit Date: Wed, 22 Apr 2026 03:29:31 -0500 Subject: [PATCH] =?UTF-8?q?Auditor:=20static=20diff=20check=20=E2=80=94=20?= =?UTF-8?q?catches=20own=20Phase=2045=20placeholder?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit auditor/checks/static.ts — grep-style scan of PR diffs, no AST, no LLM. High-signal patterns only. Severity grading: - BLOCK — unimplemented!(), todo!(), panic!("not implemented"), throw new Error("not implemented") - WARN — TODO/FIXME/XXX/HACK in added lines; new pub struct fields with <2 mentions in the diff (added but nobody reads it — placeholder state) - INFO — hardcoded "placeholder"/"dummy"/"foobar"/"changeme"/"xxx" strings in added lines Live-proven — the existential test J asked for: vs PR #1 (scaffold): 0 findings (all scaffold fields cross- reference within the diff) vs commit 2a4b81b (Phase 5 WARN: every DocRef field (tool, 45 first slice — I version_seen, snippet_hash, source_url, half-admitted placeholder): seen_at) added with 0 read-sites in the diff That's the auditor flagging my own "Phase 45 first slice" commit as state-without-consumer, which is exactly what I half-admitted it was. If PR #1 had required auditor-pass (branch protection), the DocRef commit would have been blocked pre-merge. The auditor works because it agreed with the honest read. Next: dynamic hybrid test fixture (task #4) — the never-run multi- layer pipeline test. --- auditor/checks/static.ts | 147 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 147 insertions(+) create mode 100644 auditor/checks/static.ts diff --git a/auditor/checks/static.ts b/auditor/checks/static.ts new file mode 100644 index 0000000..954bfd5 --- /dev/null +++ b/auditor/checks/static.ts @@ -0,0 +1,147 @@ +// Static diff check — grep-style, no AST, no LLM. Looks for patterns +// that are high-signal evidence of placeholder code. +// +// Findings are severity-graded: +// block — explicit non-impl markers (unimplemented!, todo!, +// panic!("not implemented"), throw new Error("not implemented")) +// warn — TODO / FIXME / XXX / HACK comments on added lines, +// new struct fields with no read-site anywhere in the diff, +// suspiciously-empty function bodies ({ Ok(()) } / {} when +// the commit message claims the fn "implements" something) +// info — hardcoded "test" / "dummy" / "placeholder" strings in +// added lines (could be real, just flag for inspection) +// +// Consumes: raw unified diff text from Gitea. + +import type { Finding } from "../types.ts"; + +// Rust + TypeScript patterns that almost always indicate "this is +// not actually implemented yet." +const BLOCK_PATTERNS: Array<{ re: RegExp; why: string }> = [ + { re: /\bunimplemented!\s*\(/, why: "unimplemented!() macro call" }, + { re: /\btodo!\s*\(/, why: "todo!() macro call" }, + { re: /panic!\s*\(\s*"(?:not implemented|TODO|not yet|unimpl)/i, why: "panic! with not-implemented message" }, + { re: /throw\s+new\s+Error\s*\(\s*['"](?:not implemented|TODO|unimpl)/i, why: "throw Error 'not implemented'" }, +]; + +const WARN_COMMENT_PATTERNS: Array<{ re: RegExp; why: string }> = [ + { re: /^\+.*\/\/\s*(TODO|FIXME|XXX|HACK)\b/i, why: "TODO/FIXME/XXX/HACK comment added" }, + { re: /^\+.*#\s*(TODO|FIXME|XXX|HACK)\b/i, why: "TODO/FIXME/XXX/HACK comment added" }, +]; + +const INFO_HARDCODED_PATTERNS: Array<{ re: RegExp; why: string }> = [ + { re: /"(?:placeholder|dummy|foobar|xxx|replaceme|changeme)"/i, why: "suspicious hardcoded string" }, +]; + +export function runStaticCheck(diff: string): Finding[] { + const findings: Finding[] = []; + + // Per-file walk: only look at ADDED lines (prefix '+' but not '+++' + // which is the diff header). + const perFile = splitDiffByFile(diff); + + for (const [path, lines] of perFile) { + // Skip diff bookkeeping + pure-delete files + if (!lines.some(l => l.startsWith("+") && !l.startsWith("+++"))) continue; + + for (let idx = 0; idx < lines.length; idx++) { + const line = lines[idx]; + if (!line.startsWith("+") || line.startsWith("+++")) continue; + const added = line.slice(1); + + for (const { re, why } of BLOCK_PATTERNS) { + if (re.test(added)) { + findings.push({ + check: "static", + severity: "block", + summary: `${why} in ${path}`, + evidence: [`${path}:+${idx + 1}: ${added.trim().slice(0, 160)}`], + }); + } + } + for (const { re, why } of WARN_COMMENT_PATTERNS) { + if (re.test(line)) { + findings.push({ + check: "static", + severity: "warn", + summary: `${why} in ${path}`, + evidence: [`${path}:+${idx + 1}: ${added.trim().slice(0, 160)}`], + }); + } + } + for (const { re, why } of INFO_HARDCODED_PATTERNS) { + if (re.test(added)) { + findings.push({ + check: "static", + severity: "info", + summary: `${why} in ${path}`, + evidence: [`${path}:+${idx + 1}: ${added.trim().slice(0, 160)}`], + }); + } + } + } + + // "Field added but never read" heuristic — catches exactly the + // Phase 45 DocRef placeholder pattern. Limited to the diff itself: + // we're not doing a full-codebase grep here (too noisy; callers + // elsewhere might exist). The point is: if NEITHER this diff nor + // any other line in the diff reads the field, the PR is shipping + // state without a consumer. + const addedLines = lines.filter(l => l.startsWith("+") && !l.startsWith("+++")) + .map(l => l.slice(1)); + const newFields = extractNewFields(addedLines); + for (const field of newFields) { + const readPattern = new RegExp(`[\\.:]\\s*${escape(field)}\\b|\\b${escape(field)}\\s*:`); + // The definition line itself matches readPattern — filter it out + // by requiring at least TWO lines in the diff mention the field + // (one defines, one reads). + const hits = addedLines.filter(l => readPattern.test(l)); + if (hits.length < 2) { + findings.push({ + check: "static", + severity: "warn", + summary: `field '${field}' added in ${path} but no read-site in the diff — could be placeholder state without a consumer`, + evidence: [`${path}: added '${field}' with no reader; rest of diff has ${hits.length - 1} mentions`], + }); + } + } + } + + return findings; +} + +function splitDiffByFile(diff: string): Map { + const out = new Map(); + let current: string | null = null; + let buf: string[] = []; + for (const line of diff.split(/\r?\n/)) { + const m = line.match(/^diff --git a\/(\S+) b\/(\S+)/); + if (m) { + if (current) out.set(current, buf); + current = m[2]; + buf = []; + continue; + } + buf.push(line); + } + if (current) out.set(current, buf); + return out; +} + +// Extract new `pub name: Type,` fields from added lines. Rust syntax. +// Narrowly-scoped: only matches at the start of a trimmed line, +// requires `pub ` prefix, ignores `pub fn` / `pub struct` / etc. +function extractNewFields(addedLines: string[]): string[] { + const fields = new Set(); + for (const line of addedLines) { + const t = line.trim(); + // pub NAME: Type, + const m = t.match(/^pub\s+(?!fn\b|struct\b|enum\b|mod\b|use\b|trait\b|impl\b|const\b|static\b|type\b)(\w+)\s*:/); + if (m) fields.add(m[1]); + } + return Array.from(fields); +} + +function escape(s: string): string { + return s.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"); +}