From 0306dd88c1ba6b288de5248ef447298f376810a2 Mon Sep 17 00:00:00 2001 From: profit Date: Wed, 22 Apr 2026 21:31:35 -0500 Subject: [PATCH] =?UTF-8?q?auditor:=20close=20the=20verdict=E2=86=92playbo?= =?UTF-8?q?ok=20loop=20+=20fix=20rubric-string=20false=20positive?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two changes that fell out of running the auto-loop for real on PR #8: 1. The systemd auditor blocked PR #8 on 'unimplemented!()' / 'todo!()' in tests/real-world/hard_task_escalation.ts — but those strings are the rubric itself, not macro calls. Added isInsideQuotedString() detection in static.ts: BLOCK_PATTERNS now skip matches that fall inside double-quoted / single-quoted / backtick string literals on the added line. WARN/INFO patterns still run — a TODO comment in a string is still a valid signal. 2. Verdicts were being persisted to disk but never fed back as learning signal. Added appendAuditLessons() — every block/warn finding writes a JSONL row to data/_kb/audit_lessons.jsonl with a path-agnostic signature (strips file paths, line numbers, commit hashes) so the SAME class of finding on DIFFERENT files dedups to one signature. kb_query now tails audit_lessons.jsonl and emits recurrence findings: 2 distinct PRs hit a signature = info, 3-4 = warn, 5+ = block. Severity ramps on distinct-PR count, not total rows, so a single unfixed PR being re-audited doesn't inflate its own recurrence score. Fires on post-verdict fire-and-forget (can't break the audit if disk write fails). The learning loop is now closed: each audit contributes to the KB that guides the next audit. Tested: unit tests for normalizedSignature confirmed path-agnostic dedup; static.ts regression tests confirmed rubric strings no longer trip BLOCK while real unquoted unimplemented!() still does. --- auditor/audit.ts | 52 +++++++++++++++++++++++++++- auditor/checks/kb_query.ts | 71 ++++++++++++++++++++++++++++++++++++++ auditor/checks/static.ts | 27 ++++++++++++++- 3 files changed, 148 insertions(+), 2 deletions(-) diff --git a/auditor/audit.ts b/auditor/audit.ts index ae5fdc8..8fb0aee 100644 --- a/auditor/audit.ts +++ b/auditor/audit.ts @@ -12,7 +12,8 @@ // review — reviews have self-review restrictions on Gitea and the // auditor currently uses the same PAT as the PR author). -import { readFile, writeFile, mkdir } from "node:fs/promises"; +import { readFile, writeFile, mkdir, appendFile } from "node:fs/promises"; +import { createHash } from "node:crypto"; import { join } from "node:path"; import type { PrSnapshot, Verdict, Finding } from "./types.ts"; import { getPrDiff, postCommitStatus, postIssueComment } from "./gitea.ts"; @@ -24,6 +25,10 @@ import { runInferenceCheck } from "./checks/inference.ts"; import { runKbCheck } from "./checks/kb_query.ts"; const VERDICTS_DIR = "/home/profit/lakehouse/data/_auditor/verdicts"; +// Playbook for audit findings — one row per block/warn finding from a +// verdict. kb_query tails this next audit and escalates recurrences. +// Structured as JSONL so it's cheap to append and cheap to tail. +const AUDIT_LESSONS_JSONL = "/home/profit/lakehouse/data/_kb/audit_lessons.jsonl"; export interface AuditOptions { // Skip the cloud inference call (fast path for iteration). Default false. @@ -80,6 +85,15 @@ export async function auditPr(pr: PrSnapshot, opts: AuditOptions = {}): Promise< await persistVerdict(verdict); + // Feedback loop — every block/warn finding becomes a row in + // audit_lessons.jsonl, dedup-keyed by (check, normalized-summary). + // The next audit's kb_query reads these and escalates recurring + // findings so we don't lose the "this pattern has been flagged + // before" signal across runs. Fire-and-forget; failure here must + // not break the audit. + appendAuditLessons(verdict).catch(e => + console.error(`[audit] audit_lessons append failed: ${(e as Error).message}`)); + if (!opts.dry_run) { await postToGitea(verdict); } @@ -87,6 +101,42 @@ export async function auditPr(pr: PrSnapshot, opts: AuditOptions = {}): Promise< return verdict; } +// Normalizes a finding summary for dedup: strips path-specific tails +// ("in path/to/file.ts" → "in "), line numbers, and long +// commit-hash snippets. The goal is: the SAME class of finding on +// DIFFERENT files should share a signature, so we can measure +// "this pattern keeps showing up." +function normalizedSignature(f: Finding): string { + const summary = String(f.summary) + .replace(/\bin\s+\S+\.(ts|rs|js|py|md)\b/gi, "in ") + .replace(/:\+?\d+\b/g, ":") + .replace(/[0-9a-f]{8,}/gi, "") + .replace(/\s+/g, " ") + .trim() + .slice(0, 240); + const src = `${f.check}::${f.severity}::${summary}`; + return createHash("sha256").update(src).digest("hex").slice(0, 16); +} + +async function appendAuditLessons(v: Verdict): Promise { + const actionable = v.findings.filter(f => f.severity === "block" || f.severity === "warn"); + if (actionable.length === 0) return; + await mkdir(join(AUDIT_LESSONS_JSONL, ".."), { recursive: true }); + const rows: string[] = []; + for (const f of actionable) { + rows.push(JSON.stringify({ + signature: normalizedSignature(f), + check: f.check, + severity: f.severity, + summary: f.summary, + pr_number: v.pr_number, + head_sha: v.head_sha, + audited_at: v.audited_at, + })); + } + await appendFile(AUDIT_LESSONS_JSONL, rows.join("\n") + "\n"); +} + async function persistVerdict(v: Verdict): Promise { await mkdir(VERDICTS_DIR, { recursive: true }); const filename = `${v.pr_number}-${v.head_sha.slice(0, 12)}.json`; diff --git a/auditor/checks/kb_query.ts b/auditor/checks/kb_query.ts index f666538..0c79242 100644 --- a/auditor/checks/kb_query.ts +++ b/auditor/checks/kb_query.ts @@ -23,8 +23,14 @@ const KB_DIR = "/home/profit/lakehouse/data/_kb"; const OBSERVER_OPS = "/home/profit/lakehouse/data/_observer/ops.jsonl"; const BOT_CYCLES_DIR = "/home/profit/lakehouse/data/_bot/cycles"; const SCRUM_REVIEWS_JSONL = "/home/profit/lakehouse/data/_kb/scrum_reviews.jsonl"; +const AUDIT_LESSONS_JSONL = "/home/profit/lakehouse/data/_kb/audit_lessons.jsonl"; const TAIL_LINES = 500; const MAX_BOT_CYCLE_FILES = 30; +// Recurrence threshold — at this count a warn becomes a block. +// The rationale: three independent audits all flagging the SAME +// pattern signature is strong evidence the pattern is a real +// problem, not noise. One occurrence = info, two = warn, three+ = block. +const RECURRENCE_BLOCK_THRESHOLD = 3; export async function runKbCheck(claims: Claim[], prFiles: string[] = []): Promise { const findings: Finding[] = []; @@ -59,6 +65,18 @@ export async function runKbCheck(claims: Claim[], prFiles: string[] = []): Promi findings.push(...scrumFindings); } + // 6. Audit-lessons feedback loop — summarize the top recurring + // patterns from prior audits' block/warn findings. If the same + // pattern signature has fired 3+ times across prior audits, + // emit it as a block-severity finding so reviewers know this + // is a known-recurring class, not a one-off. Does NOT couple + // to the current audit's static/inference findings (those run + // in parallel and we can't see them here) — the amplification + // is emergent: if the current audit's finding-summary matches + // a top recurrence, the reviewer sees both. + const auditLessonFindings = await checkAuditLessons(); + findings.push(...auditLessonFindings); + return findings; } @@ -193,6 +211,59 @@ function observerBySource(ops: any[]): string { return Object.entries(c).sort((a, b) => b[1] - a[1]).map(([k, v]) => `${k}=${v}`).join(", ") || "empty"; } +// Audit-lessons — reads data/_kb/audit_lessons.jsonl (populated by +// every audit's appendAuditLessons). Groups rows by `signature` (the +// check-normalized dedup key) and emits a finding per signature that +// has 2+ occurrences. Severity ramps with count: 2 = info, 3-4 = warn, +// 5+ = block. This is how the auditor accumulates institutional +// memory: without this check, a recurring flaw (placeholder code +// class X, unbacked claim pattern Y) looks new every audit. +async function checkAuditLessons(): Promise { + const rows = await tailJsonl(AUDIT_LESSONS_JSONL, TAIL_LINES * 4); + if (rows.length === 0) return []; + + type Agg = { count: number; last_summary: string; last_pr: number; last_sha: string; checks: Set; prs: Set }; + const bySig = new Map(); + for (const r of rows) { + const sig = String(r.signature ?? ""); + if (!sig) continue; + const a = bySig.get(sig) ?? { + count: 0, last_summary: "", last_pr: 0, last_sha: "", + checks: new Set(), prs: new Set(), + }; + a.count += 1; + a.last_summary = String(r.summary ?? a.last_summary); + a.last_pr = Number(r.pr_number ?? a.last_pr); + a.last_sha = String(r.head_sha ?? a.last_sha); + if (r.check) a.checks.add(String(r.check)); + if (r.pr_number) a.prs.add(Number(r.pr_number)); + bySig.set(sig, a); + } + + const findings: Finding[] = []; + // Emit only signatures with 2+ prior PRs (not just 2+ rows — a + // single unresolved PR being re-audited on every push would + // otherwise self-inflate). Distinct-PRs count is the real signal. + for (const [sig, a] of bySig) { + if (a.prs.size < 2) continue; + const sev: "block" | "warn" | "info" = + a.prs.size >= RECURRENCE_BLOCK_THRESHOLD + 2 ? "block" : + a.prs.size >= RECURRENCE_BLOCK_THRESHOLD ? "warn" : "info"; + findings.push({ + check: "kb_query", + severity: sev, + summary: `recurring audit pattern (${a.prs.size} distinct PRs, ${a.count} total flaggings): ${a.last_summary.slice(0, 180)}`, + evidence: [ + `signature=${sig}`, + `checks: ${Array.from(a.checks).join(",")}`, + `PRs: ${Array.from(a.prs).sort((x,y)=>x-y).join(",")}`, + `most recent: PR #${a.last_pr} @ ${a.last_sha.slice(0, 12)}`, + ], + }); + } + return findings; +} + // Scrum-master reviews — the scrum pipeline writes one row per // accepted per-file review. We match reviews whose `file` matches // any path in the PR's diff, then surface the *preview* + which diff --git a/auditor/checks/static.ts b/auditor/checks/static.ts index dc31e38..5c8a329 100644 --- a/auditor/checks/static.ts +++ b/auditor/checks/static.ts @@ -61,7 +61,13 @@ export function runStaticCheck(diff: string): Finding[] { if (!isAuditorCheckerFile) { for (const { re, why } of BLOCK_PATTERNS) { - if (re.test(added)) { + const m = added.match(re); + if (m && typeof m.index === "number") { + // Skip if the match sits inside a quoted string literal — + // this is how rubric files (tests/real-world/*, prompt + // templates) legitimately reference the patterns they + // guard against, without actually executing them. + if (isInsideQuotedString(added, m.index)) continue; findings.push({ check: "static", severity: "block", @@ -154,6 +160,25 @@ function extractNewFields(addedLines: string[]): string[] { return Array.from(fields); } +// True if `pos` falls inside a double- or single-quoted string on this +// line (backtick template literals too). Walks left→right toggling the +// "in quote" state on each unescaped quote. Good enough for single- +// line matches; multi-line strings aren't parsed (they're extremely +// rare in the patterns we're blocking on, and would require a proper +// tokenizer to handle correctly). +function isInsideQuotedString(line: string, pos: number): boolean { + let inDouble = false, inSingle = false, inBacktick = false; + for (let i = 0; i < pos; i++) { + const c = line[i]; + const esc = i > 0 && line[i - 1] === "\\"; + if (esc) continue; + if (c === '"' && !inSingle && !inBacktick) inDouble = !inDouble; + else if (c === "'" && !inDouble && !inBacktick) inSingle = !inSingle; + else if (c === "`" && !inDouble && !inSingle) inBacktick = !inBacktick; + } + return inDouble || inSingle || inBacktick; +} + function escape(s: string): string { return s.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"); }