auditor: close the verdict→playbook loop + fix rubric-string false positive
Some checks failed
lakehouse/auditor 2 blocking issues: unimplemented!() macro call in tests/real-world/hard_task_escalation.ts
Some checks failed
lakehouse/auditor 2 blocking issues: unimplemented!() macro call in tests/real-world/hard_task_escalation.ts
Two changes that fell out of running the auto-loop for real on PR #8: 1. The systemd auditor blocked PR #8 on 'unimplemented!()' / 'todo!()' in tests/real-world/hard_task_escalation.ts — but those strings are the rubric itself, not macro calls. Added isInsideQuotedString() detection in static.ts: BLOCK_PATTERNS now skip matches that fall inside double-quoted / single-quoted / backtick string literals on the added line. WARN/INFO patterns still run — a TODO comment in a string is still a valid signal. 2. Verdicts were being persisted to disk but never fed back as learning signal. Added appendAuditLessons() — every block/warn finding writes a JSONL row to data/_kb/audit_lessons.jsonl with a path-agnostic signature (strips file paths, line numbers, commit hashes) so the SAME class of finding on DIFFERENT files dedups to one signature. kb_query now tails audit_lessons.jsonl and emits recurrence findings: 2 distinct PRs hit a signature = info, 3-4 = warn, 5+ = block. Severity ramps on distinct-PR count, not total rows, so a single unfixed PR being re-audited doesn't inflate its own recurrence score. Fires on post-verdict fire-and-forget (can't break the audit if disk write fails). The learning loop is now closed: each audit contributes to the KB that guides the next audit. Tested: unit tests for normalizedSignature confirmed path-agnostic dedup; static.ts regression tests confirmed rubric strings no longer trip BLOCK while real unquoted unimplemented!() still does.
This commit is contained in:
parent
dc01ba0a3b
commit
0306dd88c1
@ -12,7 +12,8 @@
|
|||||||
// review — reviews have self-review restrictions on Gitea and the
|
// review — reviews have self-review restrictions on Gitea and the
|
||||||
// auditor currently uses the same PAT as the PR author).
|
// auditor currently uses the same PAT as the PR author).
|
||||||
|
|
||||||
import { readFile, writeFile, mkdir } from "node:fs/promises";
|
import { readFile, writeFile, mkdir, appendFile } from "node:fs/promises";
|
||||||
|
import { createHash } from "node:crypto";
|
||||||
import { join } from "node:path";
|
import { join } from "node:path";
|
||||||
import type { PrSnapshot, Verdict, Finding } from "./types.ts";
|
import type { PrSnapshot, Verdict, Finding } from "./types.ts";
|
||||||
import { getPrDiff, postCommitStatus, postIssueComment } from "./gitea.ts";
|
import { getPrDiff, postCommitStatus, postIssueComment } from "./gitea.ts";
|
||||||
@ -24,6 +25,10 @@ import { runInferenceCheck } from "./checks/inference.ts";
|
|||||||
import { runKbCheck } from "./checks/kb_query.ts";
|
import { runKbCheck } from "./checks/kb_query.ts";
|
||||||
|
|
||||||
const VERDICTS_DIR = "/home/profit/lakehouse/data/_auditor/verdicts";
|
const VERDICTS_DIR = "/home/profit/lakehouse/data/_auditor/verdicts";
|
||||||
|
// Playbook for audit findings — one row per block/warn finding from a
|
||||||
|
// verdict. kb_query tails this next audit and escalates recurrences.
|
||||||
|
// Structured as JSONL so it's cheap to append and cheap to tail.
|
||||||
|
const AUDIT_LESSONS_JSONL = "/home/profit/lakehouse/data/_kb/audit_lessons.jsonl";
|
||||||
|
|
||||||
export interface AuditOptions {
|
export interface AuditOptions {
|
||||||
// Skip the cloud inference call (fast path for iteration). Default false.
|
// Skip the cloud inference call (fast path for iteration). Default false.
|
||||||
@ -80,6 +85,15 @@ export async function auditPr(pr: PrSnapshot, opts: AuditOptions = {}): Promise<
|
|||||||
|
|
||||||
await persistVerdict(verdict);
|
await persistVerdict(verdict);
|
||||||
|
|
||||||
|
// Feedback loop — every block/warn finding becomes a row in
|
||||||
|
// audit_lessons.jsonl, dedup-keyed by (check, normalized-summary).
|
||||||
|
// The next audit's kb_query reads these and escalates recurring
|
||||||
|
// findings so we don't lose the "this pattern has been flagged
|
||||||
|
// before" signal across runs. Fire-and-forget; failure here must
|
||||||
|
// not break the audit.
|
||||||
|
appendAuditLessons(verdict).catch(e =>
|
||||||
|
console.error(`[audit] audit_lessons append failed: ${(e as Error).message}`));
|
||||||
|
|
||||||
if (!opts.dry_run) {
|
if (!opts.dry_run) {
|
||||||
await postToGitea(verdict);
|
await postToGitea(verdict);
|
||||||
}
|
}
|
||||||
@ -87,6 +101,42 @@ export async function auditPr(pr: PrSnapshot, opts: AuditOptions = {}): Promise<
|
|||||||
return verdict;
|
return verdict;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Normalizes a finding summary for dedup: strips path-specific tails
|
||||||
|
// ("in path/to/file.ts" → "in <file>"), line numbers, and long
|
||||||
|
// commit-hash snippets. The goal is: the SAME class of finding on
|
||||||
|
// DIFFERENT files should share a signature, so we can measure
|
||||||
|
// "this pattern keeps showing up."
|
||||||
|
function normalizedSignature(f: Finding): string {
|
||||||
|
const summary = String(f.summary)
|
||||||
|
.replace(/\bin\s+\S+\.(ts|rs|js|py|md)\b/gi, "in <file>")
|
||||||
|
.replace(/:\+?\d+\b/g, ":<line>")
|
||||||
|
.replace(/[0-9a-f]{8,}/gi, "<hash>")
|
||||||
|
.replace(/\s+/g, " ")
|
||||||
|
.trim()
|
||||||
|
.slice(0, 240);
|
||||||
|
const src = `${f.check}::${f.severity}::${summary}`;
|
||||||
|
return createHash("sha256").update(src).digest("hex").slice(0, 16);
|
||||||
|
}
|
||||||
|
|
||||||
|
async function appendAuditLessons(v: Verdict): Promise<void> {
|
||||||
|
const actionable = v.findings.filter(f => f.severity === "block" || f.severity === "warn");
|
||||||
|
if (actionable.length === 0) return;
|
||||||
|
await mkdir(join(AUDIT_LESSONS_JSONL, ".."), { recursive: true });
|
||||||
|
const rows: string[] = [];
|
||||||
|
for (const f of actionable) {
|
||||||
|
rows.push(JSON.stringify({
|
||||||
|
signature: normalizedSignature(f),
|
||||||
|
check: f.check,
|
||||||
|
severity: f.severity,
|
||||||
|
summary: f.summary,
|
||||||
|
pr_number: v.pr_number,
|
||||||
|
head_sha: v.head_sha,
|
||||||
|
audited_at: v.audited_at,
|
||||||
|
}));
|
||||||
|
}
|
||||||
|
await appendFile(AUDIT_LESSONS_JSONL, rows.join("\n") + "\n");
|
||||||
|
}
|
||||||
|
|
||||||
async function persistVerdict(v: Verdict): Promise<void> {
|
async function persistVerdict(v: Verdict): Promise<void> {
|
||||||
await mkdir(VERDICTS_DIR, { recursive: true });
|
await mkdir(VERDICTS_DIR, { recursive: true });
|
||||||
const filename = `${v.pr_number}-${v.head_sha.slice(0, 12)}.json`;
|
const filename = `${v.pr_number}-${v.head_sha.slice(0, 12)}.json`;
|
||||||
|
|||||||
@ -23,8 +23,14 @@ const KB_DIR = "/home/profit/lakehouse/data/_kb";
|
|||||||
const OBSERVER_OPS = "/home/profit/lakehouse/data/_observer/ops.jsonl";
|
const OBSERVER_OPS = "/home/profit/lakehouse/data/_observer/ops.jsonl";
|
||||||
const BOT_CYCLES_DIR = "/home/profit/lakehouse/data/_bot/cycles";
|
const BOT_CYCLES_DIR = "/home/profit/lakehouse/data/_bot/cycles";
|
||||||
const SCRUM_REVIEWS_JSONL = "/home/profit/lakehouse/data/_kb/scrum_reviews.jsonl";
|
const SCRUM_REVIEWS_JSONL = "/home/profit/lakehouse/data/_kb/scrum_reviews.jsonl";
|
||||||
|
const AUDIT_LESSONS_JSONL = "/home/profit/lakehouse/data/_kb/audit_lessons.jsonl";
|
||||||
const TAIL_LINES = 500;
|
const TAIL_LINES = 500;
|
||||||
const MAX_BOT_CYCLE_FILES = 30;
|
const MAX_BOT_CYCLE_FILES = 30;
|
||||||
|
// Recurrence threshold — at this count a warn becomes a block.
|
||||||
|
// The rationale: three independent audits all flagging the SAME
|
||||||
|
// pattern signature is strong evidence the pattern is a real
|
||||||
|
// problem, not noise. One occurrence = info, two = warn, three+ = block.
|
||||||
|
const RECURRENCE_BLOCK_THRESHOLD = 3;
|
||||||
|
|
||||||
export async function runKbCheck(claims: Claim[], prFiles: string[] = []): Promise<Finding[]> {
|
export async function runKbCheck(claims: Claim[], prFiles: string[] = []): Promise<Finding[]> {
|
||||||
const findings: Finding[] = [];
|
const findings: Finding[] = [];
|
||||||
@ -59,6 +65,18 @@ export async function runKbCheck(claims: Claim[], prFiles: string[] = []): Promi
|
|||||||
findings.push(...scrumFindings);
|
findings.push(...scrumFindings);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// 6. Audit-lessons feedback loop — summarize the top recurring
|
||||||
|
// patterns from prior audits' block/warn findings. If the same
|
||||||
|
// pattern signature has fired 3+ times across prior audits,
|
||||||
|
// emit it as a block-severity finding so reviewers know this
|
||||||
|
// is a known-recurring class, not a one-off. Does NOT couple
|
||||||
|
// to the current audit's static/inference findings (those run
|
||||||
|
// in parallel and we can't see them here) — the amplification
|
||||||
|
// is emergent: if the current audit's finding-summary matches
|
||||||
|
// a top recurrence, the reviewer sees both.
|
||||||
|
const auditLessonFindings = await checkAuditLessons();
|
||||||
|
findings.push(...auditLessonFindings);
|
||||||
|
|
||||||
return findings;
|
return findings;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -193,6 +211,59 @@ function observerBySource(ops: any[]): string {
|
|||||||
return Object.entries(c).sort((a, b) => b[1] - a[1]).map(([k, v]) => `${k}=${v}`).join(", ") || "empty";
|
return Object.entries(c).sort((a, b) => b[1] - a[1]).map(([k, v]) => `${k}=${v}`).join(", ") || "empty";
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Audit-lessons — reads data/_kb/audit_lessons.jsonl (populated by
|
||||||
|
// every audit's appendAuditLessons). Groups rows by `signature` (the
|
||||||
|
// check-normalized dedup key) and emits a finding per signature that
|
||||||
|
// has 2+ occurrences. Severity ramps with count: 2 = info, 3-4 = warn,
|
||||||
|
// 5+ = block. This is how the auditor accumulates institutional
|
||||||
|
// memory: without this check, a recurring flaw (placeholder code
|
||||||
|
// class X, unbacked claim pattern Y) looks new every audit.
|
||||||
|
async function checkAuditLessons(): Promise<Finding[]> {
|
||||||
|
const rows = await tailJsonl<any>(AUDIT_LESSONS_JSONL, TAIL_LINES * 4);
|
||||||
|
if (rows.length === 0) return [];
|
||||||
|
|
||||||
|
type Agg = { count: number; last_summary: string; last_pr: number; last_sha: string; checks: Set<string>; prs: Set<number> };
|
||||||
|
const bySig = new Map<string, Agg>();
|
||||||
|
for (const r of rows) {
|
||||||
|
const sig = String(r.signature ?? "");
|
||||||
|
if (!sig) continue;
|
||||||
|
const a = bySig.get(sig) ?? {
|
||||||
|
count: 0, last_summary: "", last_pr: 0, last_sha: "",
|
||||||
|
checks: new Set<string>(), prs: new Set<number>(),
|
||||||
|
};
|
||||||
|
a.count += 1;
|
||||||
|
a.last_summary = String(r.summary ?? a.last_summary);
|
||||||
|
a.last_pr = Number(r.pr_number ?? a.last_pr);
|
||||||
|
a.last_sha = String(r.head_sha ?? a.last_sha);
|
||||||
|
if (r.check) a.checks.add(String(r.check));
|
||||||
|
if (r.pr_number) a.prs.add(Number(r.pr_number));
|
||||||
|
bySig.set(sig, a);
|
||||||
|
}
|
||||||
|
|
||||||
|
const findings: Finding[] = [];
|
||||||
|
// Emit only signatures with 2+ prior PRs (not just 2+ rows — a
|
||||||
|
// single unresolved PR being re-audited on every push would
|
||||||
|
// otherwise self-inflate). Distinct-PRs count is the real signal.
|
||||||
|
for (const [sig, a] of bySig) {
|
||||||
|
if (a.prs.size < 2) continue;
|
||||||
|
const sev: "block" | "warn" | "info" =
|
||||||
|
a.prs.size >= RECURRENCE_BLOCK_THRESHOLD + 2 ? "block" :
|
||||||
|
a.prs.size >= RECURRENCE_BLOCK_THRESHOLD ? "warn" : "info";
|
||||||
|
findings.push({
|
||||||
|
check: "kb_query",
|
||||||
|
severity: sev,
|
||||||
|
summary: `recurring audit pattern (${a.prs.size} distinct PRs, ${a.count} total flaggings): ${a.last_summary.slice(0, 180)}`,
|
||||||
|
evidence: [
|
||||||
|
`signature=${sig}`,
|
||||||
|
`checks: ${Array.from(a.checks).join(",")}`,
|
||||||
|
`PRs: ${Array.from(a.prs).sort((x,y)=>x-y).join(",")}`,
|
||||||
|
`most recent: PR #${a.last_pr} @ ${a.last_sha.slice(0, 12)}`,
|
||||||
|
],
|
||||||
|
});
|
||||||
|
}
|
||||||
|
return findings;
|
||||||
|
}
|
||||||
|
|
||||||
// Scrum-master reviews — the scrum pipeline writes one row per
|
// Scrum-master reviews — the scrum pipeline writes one row per
|
||||||
// accepted per-file review. We match reviews whose `file` matches
|
// accepted per-file review. We match reviews whose `file` matches
|
||||||
// any path in the PR's diff, then surface the *preview* + which
|
// any path in the PR's diff, then surface the *preview* + which
|
||||||
|
|||||||
@ -61,7 +61,13 @@ export function runStaticCheck(diff: string): Finding[] {
|
|||||||
|
|
||||||
if (!isAuditorCheckerFile) {
|
if (!isAuditorCheckerFile) {
|
||||||
for (const { re, why } of BLOCK_PATTERNS) {
|
for (const { re, why } of BLOCK_PATTERNS) {
|
||||||
if (re.test(added)) {
|
const m = added.match(re);
|
||||||
|
if (m && typeof m.index === "number") {
|
||||||
|
// Skip if the match sits inside a quoted string literal —
|
||||||
|
// this is how rubric files (tests/real-world/*, prompt
|
||||||
|
// templates) legitimately reference the patterns they
|
||||||
|
// guard against, without actually executing them.
|
||||||
|
if (isInsideQuotedString(added, m.index)) continue;
|
||||||
findings.push({
|
findings.push({
|
||||||
check: "static",
|
check: "static",
|
||||||
severity: "block",
|
severity: "block",
|
||||||
@ -154,6 +160,25 @@ function extractNewFields(addedLines: string[]): string[] {
|
|||||||
return Array.from(fields);
|
return Array.from(fields);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// True if `pos` falls inside a double- or single-quoted string on this
|
||||||
|
// line (backtick template literals too). Walks left→right toggling the
|
||||||
|
// "in quote" state on each unescaped quote. Good enough for single-
|
||||||
|
// line matches; multi-line strings aren't parsed (they're extremely
|
||||||
|
// rare in the patterns we're blocking on, and would require a proper
|
||||||
|
// tokenizer to handle correctly).
|
||||||
|
function isInsideQuotedString(line: string, pos: number): boolean {
|
||||||
|
let inDouble = false, inSingle = false, inBacktick = false;
|
||||||
|
for (let i = 0; i < pos; i++) {
|
||||||
|
const c = line[i];
|
||||||
|
const esc = i > 0 && line[i - 1] === "\\";
|
||||||
|
if (esc) continue;
|
||||||
|
if (c === '"' && !inSingle && !inBacktick) inDouble = !inDouble;
|
||||||
|
else if (c === "'" && !inDouble && !inBacktick) inSingle = !inSingle;
|
||||||
|
else if (c === "`" && !inDouble && !inSingle) inBacktick = !inBacktick;
|
||||||
|
}
|
||||||
|
return inDouble || inSingle || inBacktick;
|
||||||
|
}
|
||||||
|
|
||||||
function escape(s: string): string {
|
function escape(s: string): string {
|
||||||
return s.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
|
return s.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
|
||||||
}
|
}
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user