// Static diff check — grep-style, no AST, no LLM. Looks for patterns // that are high-signal evidence of placeholder code. // // Findings are severity-graded: // block — explicit non-impl markers (unimplemented!, todo!, // panic!("not implemented"), throw new Error("not implemented")) // warn — TODO / FIXME / XXX / HACK comments on added lines, // new struct fields with no read-site anywhere in the diff, // suspiciously-empty function bodies ({ Ok(()) } / {} when // the commit message claims the fn "implements" something) // info — hardcoded "test" / "dummy" / "placeholder" strings in // added lines (could be real, just flag for inspection) // // Consumes: raw unified diff text from Gitea. import type { Finding } from "../types.ts"; // Rust + TypeScript patterns that almost always indicate "this is // not actually implemented yet." const BLOCK_PATTERNS: Array<{ re: RegExp; why: string }> = [ { re: /\bunimplemented!\s*\(/, why: "unimplemented!() macro call" }, { re: /\btodo!\s*\(/, why: "todo!() macro call" }, { re: /panic!\s*\(\s*"(?:not implemented|TODO|not yet|unimpl)/i, why: "panic! with not-implemented message" }, { re: /throw\s+new\s+Error\s*\(\s*['"](?:not implemented|TODO|unimpl)/i, why: "throw Error 'not implemented'" }, ]; const WARN_COMMENT_PATTERNS: Array<{ re: RegExp; why: string }> = [ { re: /^\+.*\/\/\s*(TODO|FIXME|XXX|HACK)\b/i, why: "TODO/FIXME/XXX/HACK comment added" }, { re: /^\+.*#\s*(TODO|FIXME|XXX|HACK)\b/i, why: "TODO/FIXME/XXX/HACK comment added" }, ]; const INFO_HARDCODED_PATTERNS: Array<{ re: RegExp; why: string }> = [ { re: /"(?:placeholder|dummy|foobar|xxx|replaceme|changeme)"/i, why: "suspicious hardcoded string" }, ]; export function runStaticCheck(diff: string): Finding[] { const findings: Finding[] = []; // Per-file walk: only look at ADDED lines (prefix '+' but not '+++' // which is the diff header). const perFile = splitDiffByFile(diff); for (const [path, lines] of perFile) { // Skip diff bookkeeping + pure-delete files if (!lines.some(l => l.startsWith("+") && !l.startsWith("+++"))) continue; // The auditor's own check files literally contain the BLOCK // patterns as regex definitions (BLOCK_PATTERNS in this file, // prompt examples in inference.ts). Skipping BLOCK scan on these // specific paths prevents the checker from self-flagging its own // string literals. WARN/INFO patterns still run — those genuinely // could indicate problems in the checker's own code (TODO // comments don't self-define). const isAuditorCheckerFile = path.startsWith("auditor/checks/") || path.startsWith("auditor/fixtures/"); // Track multi-line backtick-template state across the file. Walks // all post-merge lines (context + added, skipping removed lines) // in order and keeps `inMultilineBacktick` flipping on each // unescaped backtick. Pre-2026-04-26 the per-line walk in // isInsideQuotedString missed `todo!()` matches inside docstring // template literals because the opening backtick lived on a // line above the match. Now we OR the file-level state into the // per-line check. let inMultilineBacktick = false; for (let idx = 0; idx < lines.length; idx++) { const line = lines[idx]; // Diff bookkeeping lines and removed lines don't contribute to // the post-merge file's string state. if (line.startsWith("+++") || line.startsWith("---") || line.startsWith("@@") || line.startsWith("\\ No newline")) continue; if (line.startsWith("-")) continue; const isAdded = line.startsWith("+"); // Strip the diff prefix (' ' for context, '+' for added). const body = (isAdded || line.startsWith(" ")) ? line.slice(1) : line; if (isAdded) { const added = body; if (!isAuditorCheckerFile) { for (const { re, why } of BLOCK_PATTERNS) { const m = added.match(re); if (m && typeof m.index === "number") { // Skip if the match sits inside a quoted string literal — // this is how rubric files (tests/real-world/*, prompt // templates) legitimately reference the patterns they // guard against, without actually executing them. if (inMultilineBacktick || isInsideQuotedString(added, m.index)) continue; findings.push({ check: "static", severity: "block", summary: `${why} in ${path}`, evidence: [`${path}:+${idx + 1}: ${added.trim().slice(0, 160)}`], }); } } } for (const { re, why } of WARN_COMMENT_PATTERNS) { if (re.test(line)) { findings.push({ check: "static", severity: "warn", summary: `${why} in ${path}`, evidence: [`${path}:+${idx + 1}: ${added.trim().slice(0, 160)}`], }); } } for (const { re, why } of INFO_HARDCODED_PATTERNS) { if (re.test(added)) { findings.push({ check: "static", severity: "info", summary: `${why} in ${path}`, evidence: [`${path}:+${idx + 1}: ${added.trim().slice(0, 160)}`], }); } } } // Update file-level multi-line backtick state by walking THIS // line's unescaped backticks. Both context and added lines // contribute (they're both in the post-merge file). Doc-comment // backticks like `\\\`Foo\\\`` count too — that's the source of // the original bug, where multi-line template literals contained // `todo!()` references. inMultilineBacktick = updateBacktickState(body, inMultilineBacktick); } // "Field added but never read" heuristic — catches exactly the // Phase 45 DocRef placeholder pattern. Limited to the diff itself: // we're not doing a full-codebase grep here (too noisy; callers // elsewhere might exist). The point is: if NEITHER this diff nor // any other line in the diff reads the field, the PR is shipping // state without a consumer. // // Serde exemption: if the field's parent struct derives Serialize // or Deserialize, the read-site is the macro itself — JSON // round-trips consume every public field. Without this exemption // the check produces false positives on every response/request // struct shipped through `/v1/*`. const addedLines = lines.filter(l => l.startsWith("+") && !l.startsWith("+++")) .map(l => l.slice(1)); const newFields = extractNewFieldsWithLine(lines); const seenNames = new Set(); for (const { name: field, lineIdx } of newFields) { if (seenNames.has(field)) continue; seenNames.add(field); if (parentStructHasSerdeDerive(lines, lineIdx)) continue; const readPattern = new RegExp(`[\\.:]\\s*${escape(field)}\\b|\\b${escape(field)}\\s*:`); // The definition line itself matches readPattern — filter it out // by requiring at least TWO lines in the diff mention the field // (one defines, one reads). const hits = addedLines.filter(l => readPattern.test(l)); if (hits.length < 2) { findings.push({ check: "static", severity: "warn", summary: `field '${field}' added in ${path} but no read-site in the diff — could be placeholder state without a consumer`, evidence: [`${path}: added '${field}' with no reader; rest of diff has ${hits.length - 1} mentions`], }); } } } return findings; } function splitDiffByFile(diff: string): Map { const out = new Map(); let current: string | null = null; let buf: string[] = []; for (const line of diff.split(/\r?\n/)) { const m = line.match(/^diff --git a\/(\S+) b\/(\S+)/); if (m) { if (current) out.set(current, buf); current = m[2]; buf = []; continue; } buf.push(line); } if (current) out.set(current, buf); return out; } // Extract new `pub name: Type,` fields from the per-file diff lines, // keeping each occurrence's line index so the caller can resolve the // parent struct. Same narrow rules as before: starts with `pub `, // excludes `pub fn` / `pub struct` / etc. function extractNewFieldsWithLine(lines: string[]): Array<{ name: string; lineIdx: number }> { const out: Array<{ name: string; lineIdx: number }> = []; for (let i = 0; i < lines.length; i++) { const line = lines[i]; if (!line.startsWith("+") || line.startsWith("+++")) continue; const t = line.slice(1).trim(); const m = t.match(/^pub\s+(?!fn\b|struct\b|enum\b|mod\b|use\b|trait\b|impl\b|const\b|static\b|type\b)(\w+)\s*:/); if (m) out.push({ name: m[1], lineIdx: i }); } return out; } // True if the field at `fieldLineIdx` lives inside a struct whose // declaration carries `#[derive(... Serialize|Deserialize ...)]`. We // walk backward through the diff (added + context lines both count — // a struct declaration unchanged by the PR still appears as context) // to find the nearest `pub struct` boundary, then scan a few lines // above it for derive attributes. Conservative bounds: // - 80 lines back to find `struct` (struct definitions can grow large) // - 8 lines above the `struct` keyword for attribute lines // Stops the struct-search early if we hit a `}` at zero indent // (the previous scope) or another `pub struct` (we left ours). function parentStructHasSerdeDerive(lines: string[], fieldLineIdx: number): boolean { let structLineIdx = -1; for (let i = fieldLineIdx - 1; i >= 0 && i >= fieldLineIdx - 80; i--) { const raw = lines[i]; if (typeof raw !== "string" || raw.length === 0) continue; const body = stripDiffPrefix(raw); const trimmed = body.trim(); if (/^pub\s+struct\s+\w/.test(trimmed)) { structLineIdx = i; break; } // Closing brace at column 0 means the enclosing scope ended above // the field — we're not actually inside a struct. if (body.startsWith("}")) return false; } if (structLineIdx < 0) return false; for (let j = structLineIdx - 1; j >= 0 && j >= structLineIdx - 8; j--) { const raw = lines[j]; if (typeof raw !== "string") continue; const trimmed = stripDiffPrefix(raw).trim(); if (trimmed === "" || trimmed.startsWith("//") || trimmed.startsWith("///")) continue; if (!trimmed.startsWith("#[")) break; if (/derive\s*\([^)]*\b(Serialize|Deserialize)\b/.test(trimmed)) return true; } return false; } // Strip leading +/-/space from a unified-diff line, leaving the raw // source line. Handles the case where the line is shorter than 1 char // (rare but real for empty-context lines). function stripDiffPrefix(line: string): string { if (line.length === 0) return line; const c = line[0]; if (c === "+" || c === "-" || c === " ") return line.slice(1); return line; } // Walk a single line and toggle the cross-line backtick state on each // unescaped backtick. Single-quote and double-quote runs are line- // bounded in JS/TS/Rust by language rules (string literals don't span // newlines without explicit `\` continuation), so we only track // backticks across lines. Returns the new state for the next line. function updateBacktickState(line: string, inBacktick: boolean): boolean { let state = inBacktick; let inDouble = false; let inSingle = false; for (let i = 0; i < line.length; i++) { const c = line[i]; const esc = i > 0 && line[i - 1] === "\\"; if (esc) continue; // Inside a multi-line backtick template, single/double quotes // don't open new strings — they're literal characters of the // template. Same applies the other way around. if (c === '"' && !inSingle && !state) inDouble = !inDouble; else if (c === "'" && !inDouble && !state) inSingle = !inSingle; else if (c === "`" && !inDouble && !inSingle) state = !state; } return state; } // True if `pos` falls inside a double- or single-quoted string on this // line (backtick template literals too). Walks left→right toggling the // "in quote" state on each unescaped quote. Per-line only — the file- // level walk in runStaticCheck handles multi-line backtick templates // via updateBacktickState. function isInsideQuotedString(line: string, pos: number): boolean { let inDouble = false, inSingle = false, inBacktick = false; for (let i = 0; i < pos; i++) { const c = line[i]; const esc = i > 0 && line[i - 1] === "\\"; if (esc) continue; if (c === '"' && !inSingle && !inBacktick) inDouble = !inDouble; else if (c === "'" && !inDouble && !inBacktick) inSingle = !inSingle; else if (c === "`" && !inDouble && !inSingle) inBacktick = !inBacktick; } return inDouble || inSingle || inBacktick; } function escape(s: string): string { return s.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"); }