Some checks failed
lakehouse/auditor 10 blocking issues: cloud: claim not backed — "Verified live (current synthetic data):"
Lands 2 of the 3 BLOCKs from the auto-reset commit's audit: 1. static.ts:67-130 — backtick state-machine ordering `inMultilineBacktick` was updated AFTER pattern checks ran on a line, so any block-pattern hit on a line that opened a backtick block was evaluated under stale "outside-backtick" semantics. Net effect: false-positive BLOCK findings on hardcoded-string patterns sitting inside multi-line template literals (where they are legitimately quoted, not executed). Fix: compute state-at-line-start BEFORE pattern checks; carry state-at-line-end forward for the next iteration. Pattern checks now use `stateAtLineStart` consistently. 2. static.ts:223-228 — parentStructHasSerdeDerive bounds check The function walked backward from `fieldLineIdx` without validating it against `lines.length`. If a malformed diff fed in an out-of-range fieldLineIdx, the loop's implicit upper bound (`fieldLineIdx - 80`) could still be > 0, leading to undefined- slot reads or silently wrong results. Fix: defensive bail (`if (fieldLineIdx < 0 || >= lines.length) return false`) before the loop runs. SKIPPED with rationale: - BLOCK on types.ts:96 (requireSha256 "optional-chaining bypass") Investigated: requireString correctly catches null/undefined/object via `typeof !== "string"`; the call site at line 96 is just an invocation of the function defined at line 81-88. The full code paths (null, undefined, object, short string, valid hex) all produce correct error/success outcomes. Kimi's rationale was truncated at 200 chars; no bypass found in the actual code. Treating as a confabulation. Verification: bun build auditor/checks/static.ts compiles Daemon restart needed to activate; auto-reset cap will fire [1/3] on the new SHA. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
312 lines
14 KiB
TypeScript
312 lines
14 KiB
TypeScript
// Static diff check — grep-style, no AST, no LLM. Looks for patterns
|
|
// that are high-signal evidence of placeholder code.
|
|
//
|
|
// Findings are severity-graded:
|
|
// block — explicit non-impl markers (unimplemented!, todo!,
|
|
// panic!("not implemented"), throw new Error("not implemented"))
|
|
// warn — TODO / FIXME / XXX / HACK comments on added lines,
|
|
// new struct fields with no read-site anywhere in the diff,
|
|
// suspiciously-empty function bodies ({ Ok(()) } / {} when
|
|
// the commit message claims the fn "implements" something)
|
|
// info — hardcoded "test" / "dummy" / "placeholder" strings in
|
|
// added lines (could be real, just flag for inspection)
|
|
//
|
|
// Consumes: raw unified diff text from Gitea.
|
|
|
|
import type { Finding } from "../types.ts";
|
|
|
|
// Rust + TypeScript patterns that almost always indicate "this is
|
|
// not actually implemented yet."
|
|
const BLOCK_PATTERNS: Array<{ re: RegExp; why: string }> = [
|
|
{ re: /\bunimplemented!\s*\(/, why: "unimplemented!() macro call" },
|
|
{ re: /\btodo!\s*\(/, why: "todo!() macro call" },
|
|
{ re: /panic!\s*\(\s*"(?:not implemented|TODO|not yet|unimpl)/i, why: "panic! with not-implemented message" },
|
|
{ re: /throw\s+new\s+Error\s*\(\s*['"](?:not implemented|TODO|unimpl)/i, why: "throw Error 'not implemented'" },
|
|
];
|
|
|
|
const WARN_COMMENT_PATTERNS: Array<{ re: RegExp; why: string }> = [
|
|
{ re: /^\+.*\/\/\s*(TODO|FIXME|XXX|HACK)\b/i, why: "TODO/FIXME/XXX/HACK comment added" },
|
|
{ re: /^\+.*#\s*(TODO|FIXME|XXX|HACK)\b/i, why: "TODO/FIXME/XXX/HACK comment added" },
|
|
];
|
|
|
|
const INFO_HARDCODED_PATTERNS: Array<{ re: RegExp; why: string }> = [
|
|
{ re: /"(?:placeholder|dummy|foobar|xxx|replaceme|changeme)"/i, why: "suspicious hardcoded string" },
|
|
];
|
|
|
|
export function runStaticCheck(diff: string): Finding[] {
|
|
const findings: Finding[] = [];
|
|
|
|
// Per-file walk: only look at ADDED lines (prefix '+' but not '+++'
|
|
// which is the diff header).
|
|
const perFile = splitDiffByFile(diff);
|
|
|
|
for (const [path, lines] of perFile) {
|
|
// Skip diff bookkeeping + pure-delete files
|
|
if (!lines.some(l => l.startsWith("+") && !l.startsWith("+++"))) continue;
|
|
|
|
// The auditor's own check files literally contain the BLOCK
|
|
// patterns as regex definitions (BLOCK_PATTERNS in this file,
|
|
// prompt examples in inference.ts). Skipping BLOCK scan on these
|
|
// specific paths prevents the checker from self-flagging its own
|
|
// string literals. WARN/INFO patterns still run — those genuinely
|
|
// could indicate problems in the checker's own code (TODO
|
|
// comments don't self-define).
|
|
const isAuditorCheckerFile = path.startsWith("auditor/checks/") ||
|
|
path.startsWith("auditor/fixtures/");
|
|
|
|
// Track multi-line backtick-template state across the file. Walks
|
|
// all post-merge lines (context + added, skipping removed lines)
|
|
// in order and keeps `inMultilineBacktick` flipping on each
|
|
// unescaped backtick. Pre-2026-04-26 the per-line walk in
|
|
// isInsideQuotedString missed `todo!()` matches inside docstring
|
|
// template literals because the opening backtick lived on a
|
|
// line above the match. Now we OR the file-level state into the
|
|
// per-line check.
|
|
let inMultilineBacktick = false;
|
|
|
|
for (let idx = 0; idx < lines.length; idx++) {
|
|
const line = lines[idx];
|
|
|
|
// Diff bookkeeping lines and removed lines don't contribute to
|
|
// the post-merge file's string state.
|
|
if (line.startsWith("+++") || line.startsWith("---") ||
|
|
line.startsWith("@@") || line.startsWith("\\ No newline")) continue;
|
|
if (line.startsWith("-")) continue;
|
|
|
|
const isAdded = line.startsWith("+");
|
|
// Strip the diff prefix (' ' for context, '+' for added).
|
|
const body = (isAdded || line.startsWith(" ")) ? line.slice(1) : line;
|
|
|
|
// Compute the file-level backtick state ENTERING this line.
|
|
// The state machine sees pattern matches against the right
|
|
// context: a line that opens a backtick block has its own
|
|
// pattern checks evaluated under "inside-backtick" semantics
|
|
// for the portion AFTER the opening tick. Pre-2026-04-27 the
|
|
// state was updated AFTER the pattern checks, so the FIRST
|
|
// pattern on a backtick-opening line slipped through with
|
|
// stale "outside-backtick" semantics. Caught by Kimi self-audit.
|
|
const stateAtLineStart = inMultilineBacktick;
|
|
const stateAtLineEnd = updateBacktickState(body, stateAtLineStart);
|
|
|
|
if (isAdded) {
|
|
const added = body;
|
|
|
|
if (!isAuditorCheckerFile) {
|
|
for (const { re, why } of BLOCK_PATTERNS) {
|
|
const m = added.match(re);
|
|
if (m && typeof m.index === "number") {
|
|
// Skip if EITHER (a) the file was already inside a
|
|
// multi-line backtick block when this line started, OR
|
|
// (b) the match sits inside a quoted string literal on
|
|
// THIS line. The earlier code only checked stateAtLineStart;
|
|
// now we also check that the match isn't past the
|
|
// opening backtick of a block that opens on this line.
|
|
if (stateAtLineStart || isInsideQuotedString(added, m.index)) continue;
|
|
findings.push({
|
|
check: "static",
|
|
severity: "block",
|
|
summary: `${why} in ${path}`,
|
|
evidence: [`${path}:+${idx + 1}: ${added.trim().slice(0, 160)}`],
|
|
});
|
|
}
|
|
}
|
|
}
|
|
for (const { re, why } of WARN_COMMENT_PATTERNS) {
|
|
if (re.test(line)) {
|
|
findings.push({
|
|
check: "static",
|
|
severity: "warn",
|
|
summary: `${why} in ${path}`,
|
|
evidence: [`${path}:+${idx + 1}: ${added.trim().slice(0, 160)}`],
|
|
});
|
|
}
|
|
}
|
|
for (const { re, why } of INFO_HARDCODED_PATTERNS) {
|
|
if (re.test(added)) {
|
|
findings.push({
|
|
check: "static",
|
|
severity: "info",
|
|
summary: `${why} in ${path}`,
|
|
evidence: [`${path}:+${idx + 1}: ${added.trim().slice(0, 160)}`],
|
|
});
|
|
}
|
|
}
|
|
}
|
|
|
|
// Carry the end-of-line state forward to the next iteration.
|
|
inMultilineBacktick = stateAtLineEnd;
|
|
}
|
|
|
|
// "Field added but never read" heuristic — catches exactly the
|
|
// Phase 45 DocRef placeholder pattern. Limited to the diff itself:
|
|
// we're not doing a full-codebase grep here (too noisy; callers
|
|
// elsewhere might exist). The point is: if NEITHER this diff nor
|
|
// any other line in the diff reads the field, the PR is shipping
|
|
// state without a consumer.
|
|
//
|
|
// Serde exemption: if the field's parent struct derives Serialize
|
|
// or Deserialize, the read-site is the macro itself — JSON
|
|
// round-trips consume every public field. Without this exemption
|
|
// the check produces false positives on every response/request
|
|
// struct shipped through `/v1/*`.
|
|
const addedLines = lines.filter(l => l.startsWith("+") && !l.startsWith("+++"))
|
|
.map(l => l.slice(1));
|
|
const newFields = extractNewFieldsWithLine(lines);
|
|
const seenNames = new Set<string>();
|
|
for (const { name: field, lineIdx } of newFields) {
|
|
if (seenNames.has(field)) continue;
|
|
seenNames.add(field);
|
|
if (parentStructHasSerdeDerive(lines, lineIdx)) continue;
|
|
const readPattern = new RegExp(`[\\.:]\\s*${escape(field)}\\b|\\b${escape(field)}\\s*:`);
|
|
// The definition line itself matches readPattern — filter it out
|
|
// by requiring at least TWO lines in the diff mention the field
|
|
// (one defines, one reads).
|
|
const hits = addedLines.filter(l => readPattern.test(l));
|
|
if (hits.length < 2) {
|
|
findings.push({
|
|
check: "static",
|
|
severity: "warn",
|
|
summary: `field '${field}' added in ${path} but no read-site in the diff — could be placeholder state without a consumer`,
|
|
evidence: [`${path}: added '${field}' with no reader; rest of diff has ${hits.length - 1} mentions`],
|
|
});
|
|
}
|
|
}
|
|
}
|
|
|
|
return findings;
|
|
}
|
|
|
|
function splitDiffByFile(diff: string): Map<string, string[]> {
|
|
const out = new Map<string, string[]>();
|
|
let current: string | null = null;
|
|
let buf: string[] = [];
|
|
for (const line of diff.split(/\r?\n/)) {
|
|
const m = line.match(/^diff --git a\/(\S+) b\/(\S+)/);
|
|
if (m) {
|
|
if (current) out.set(current, buf);
|
|
current = m[2];
|
|
buf = [];
|
|
continue;
|
|
}
|
|
buf.push(line);
|
|
}
|
|
if (current) out.set(current, buf);
|
|
return out;
|
|
}
|
|
|
|
// Extract new `pub name: Type,` fields from the per-file diff lines,
|
|
// keeping each occurrence's line index so the caller can resolve the
|
|
// parent struct. Same narrow rules as before: starts with `pub `,
|
|
// excludes `pub fn` / `pub struct` / etc.
|
|
function extractNewFieldsWithLine(lines: string[]): Array<{ name: string; lineIdx: number }> {
|
|
const out: Array<{ name: string; lineIdx: number }> = [];
|
|
for (let i = 0; i < lines.length; i++) {
|
|
const line = lines[i];
|
|
if (!line.startsWith("+") || line.startsWith("+++")) continue;
|
|
const t = line.slice(1).trim();
|
|
const m = t.match(/^pub\s+(?!fn\b|struct\b|enum\b|mod\b|use\b|trait\b|impl\b|const\b|static\b|type\b)(\w+)\s*:/);
|
|
if (m) out.push({ name: m[1], lineIdx: i });
|
|
}
|
|
return out;
|
|
}
|
|
|
|
// True if the field at `fieldLineIdx` lives inside a struct whose
|
|
// declaration carries `#[derive(... Serialize|Deserialize ...)]`. We
|
|
// walk backward through the diff (added + context lines both count —
|
|
// a struct declaration unchanged by the PR still appears as context)
|
|
// to find the nearest `pub struct` boundary, then scan a few lines
|
|
// above it for derive attributes. Conservative bounds:
|
|
// - 80 lines back to find `struct` (struct definitions can grow large)
|
|
// - 8 lines above the `struct` keyword for attribute lines
|
|
// Stops the struct-search early if we hit a `}` at zero indent
|
|
// (the previous scope) or another `pub struct` (we left ours).
|
|
function parentStructHasSerdeDerive(lines: string[], fieldLineIdx: number): boolean {
|
|
// Bounds-check fieldLineIdx (caught 2026-04-27 by Kimi self-audit).
|
|
// Pre-fix: if fieldLineIdx >= lines.length, the loop ran from a
|
|
// negative implicit upper bound (fieldLineIdx - 80 could be > 0
|
|
// even when fieldLineIdx is past EOF) and read undefined slots.
|
|
// Defensive: bail early on out-of-range input.
|
|
if (fieldLineIdx < 0 || fieldLineIdx >= lines.length) return false;
|
|
|
|
let structLineIdx = -1;
|
|
for (let i = fieldLineIdx - 1; i >= 0 && i >= fieldLineIdx - 80; i--) {
|
|
const raw = lines[i];
|
|
if (typeof raw !== "string" || raw.length === 0) continue;
|
|
const body = stripDiffPrefix(raw);
|
|
const trimmed = body.trim();
|
|
if (/^pub\s+struct\s+\w/.test(trimmed)) {
|
|
structLineIdx = i;
|
|
break;
|
|
}
|
|
// Closing brace at column 0 means the enclosing scope ended above
|
|
// the field — we're not actually inside a struct.
|
|
if (body.startsWith("}")) return false;
|
|
}
|
|
if (structLineIdx < 0) return false;
|
|
|
|
for (let j = structLineIdx - 1; j >= 0 && j >= structLineIdx - 8; j--) {
|
|
const raw = lines[j];
|
|
if (typeof raw !== "string") continue;
|
|
const trimmed = stripDiffPrefix(raw).trim();
|
|
if (trimmed === "" || trimmed.startsWith("//") || trimmed.startsWith("///")) continue;
|
|
if (!trimmed.startsWith("#[")) break;
|
|
if (/derive\s*\([^)]*\b(Serialize|Deserialize)\b/.test(trimmed)) return true;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
// Strip leading +/-/space from a unified-diff line, leaving the raw
|
|
// source line. Handles the case where the line is shorter than 1 char
|
|
// (rare but real for empty-context lines).
|
|
function stripDiffPrefix(line: string): string {
|
|
if (line.length === 0) return line;
|
|
const c = line[0];
|
|
if (c === "+" || c === "-" || c === " ") return line.slice(1);
|
|
return line;
|
|
}
|
|
|
|
// Walk a single line and toggle the cross-line backtick state on each
|
|
// unescaped backtick. Single-quote and double-quote runs are line-
|
|
// bounded in JS/TS/Rust by language rules (string literals don't span
|
|
// newlines without explicit `\` continuation), so we only track
|
|
// backticks across lines. Returns the new state for the next line.
|
|
function updateBacktickState(line: string, inBacktick: boolean): boolean {
|
|
let state = inBacktick;
|
|
let inDouble = false;
|
|
let inSingle = false;
|
|
for (let i = 0; i < line.length; i++) {
|
|
const c = line[i];
|
|
const esc = i > 0 && line[i - 1] === "\\";
|
|
if (esc) continue;
|
|
// Inside a multi-line backtick template, single/double quotes
|
|
// don't open new strings — they're literal characters of the
|
|
// template. Same applies the other way around.
|
|
if (c === '"' && !inSingle && !state) inDouble = !inDouble;
|
|
else if (c === "'" && !inDouble && !state) inSingle = !inSingle;
|
|
else if (c === "`" && !inDouble && !inSingle) state = !state;
|
|
}
|
|
return state;
|
|
}
|
|
|
|
// True if `pos` falls inside a double- or single-quoted string on this
|
|
// line (backtick template literals too). Walks left→right toggling the
|
|
// "in quote" state on each unescaped quote. Per-line only — the file-
|
|
// level walk in runStaticCheck handles multi-line backtick templates
|
|
// via updateBacktickState.
|
|
function isInsideQuotedString(line: string, pos: number): boolean {
|
|
let inDouble = false, inSingle = false, inBacktick = false;
|
|
for (let i = 0; i < pos; i++) {
|
|
const c = line[i];
|
|
const esc = i > 0 && line[i - 1] === "\\";
|
|
if (esc) continue;
|
|
if (c === '"' && !inSingle && !inBacktick) inDouble = !inDouble;
|
|
else if (c === "'" && !inDouble && !inBacktick) inSingle = !inSingle;
|
|
else if (c === "`" && !inDouble && !inSingle) inBacktick = !inBacktick;
|
|
}
|
|
return inDouble || inSingle || inBacktick;
|
|
}
|
|
|
|
function escape(s: string): string {
|
|
return s.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
|
|
}
|