All checks were successful
lakehouse/auditor all checks passed (4 findings, all info)
auditor/checks/kb_query.ts (task #7) — reads data/_kb/outcomes.jsonl, error_corrections.jsonl, data/_observer/ops.jsonl, data/_bot/cycles/*. Cheap/offline: no model calls, tail-reads only. Fail-rate >30% in recent scenario outcomes → warn; otherwise info. Live-proven: 1 finding emitted against current KB state (69 scenario runs, 27.7% fail rate — below warn threshold). auditor/audit.ts (task #8) — orchestrator. Runs static + dynamic + inference + kb_query in parallel, calls assembleVerdict, persists to data/_auditor/verdicts/, posts to Gitea (commit status + issue comment). AuditOptions supports skip_dynamic/skip_inference/dry_run for iteration. auditor/gitea.ts — added postIssueComment (author can comment on own PR, unlike postReview which self-review-blocks). static.ts — skip BLOCK_PATTERNS scan on auditor/checks/* and auditor/fixtures/* because those files legitimately contain the patterns as regex/string-literal data. WARN/INFO patterns (TODO comments, hardcoded placeholders) still run. Live-proven: dry-run audit of PR #1 after fix went from 13 block findings to 0 from static; 11 warn from inference still fire on real overreach claims. Dry-run audit against PR #1, skip_dynamic=true: verdict: block (BEFORE the static fix) verdict: request_changes (AFTER — inference correctly flagged "tasks 1-9 complete" as not backed; 0 false-positive blocks from static self-match) 42.5s total across checks (mostly cloud inference: 36s) 26 claims, 39KB diff Tasks 5 + 6 + 7 + 8 complete. Remaining: #9 (poller) + #10 (end-to-end proof) + #12 (upsert UPDATE merge fix).
184 lines
6.7 KiB
TypeScript
184 lines
6.7 KiB
TypeScript
// Local-KB check — reads data/_kb/ + data/_observer/ + data/_bot/
|
|
// for prior evidence bearing on this PR's claims. Cheap, offline,
|
|
// no model calls. The point: if a claim like "Phase X shipped" has
|
|
// a historical record of failing on the same signature before, the
|
|
// auditor surfaces that pattern before the cloud check has to
|
|
// infer it.
|
|
//
|
|
// What this check reads (all file-backed, append-only or periodic):
|
|
// data/_kb/outcomes.jsonl — per-scenario outcomes (kb.ts)
|
|
// data/_kb/error_corrections.jsonl — fail→succeed deltas on same sig
|
|
// data/_observer/ops.jsonl — observer ring → disk stream
|
|
// data/_bot/cycles/*.json — bot cycle results
|
|
//
|
|
// Each JSONL line / per-cycle file is small; this check reads tails
|
|
// only (last N lines or last M files) to stay cheap on large corpora.
|
|
|
|
import { readFile, readdir, stat } from "node:fs/promises";
|
|
import { join } from "node:path";
|
|
import type { Claim, Finding } from "../types.ts";
|
|
|
|
const KB_DIR = "/home/profit/lakehouse/data/_kb";
|
|
const OBSERVER_OPS = "/home/profit/lakehouse/data/_observer/ops.jsonl";
|
|
const BOT_CYCLES_DIR = "/home/profit/lakehouse/data/_bot/cycles";
|
|
const TAIL_LINES = 500;
|
|
const MAX_BOT_CYCLE_FILES = 30;
|
|
|
|
export async function runKbCheck(claims: Claim[]): Promise<Finding[]> {
|
|
const findings: Finding[] = [];
|
|
|
|
// 1. Recent scenario outcomes: are strong-claim-style phrases showing
|
|
// up alongside failed events? That's "we claimed it worked" +
|
|
// "it didn't" in the KB.
|
|
const scenarioFindings = await checkScenarioOutcomes(claims);
|
|
findings.push(...scenarioFindings);
|
|
|
|
// 2. Error corrections: any of the claims text overlap a
|
|
// recently-observed fail→succeed pair? If yes, add context.
|
|
const correctionFindings = await checkErrorCorrections(claims);
|
|
findings.push(...correctionFindings);
|
|
|
|
// 3. Bot cycles: any prior bot cycle ended in tests_failed or
|
|
// apply_failed on a file this PR is also touching?
|
|
const botFindings = await checkBotCycles();
|
|
findings.push(...botFindings);
|
|
|
|
// 4. Observer: count recent error events. High volume = shared
|
|
// infra problem, worth flagging (context for other findings).
|
|
const obsFindings = await checkObserverStream();
|
|
findings.push(...obsFindings);
|
|
|
|
return findings;
|
|
}
|
|
|
|
async function tailJsonl<T = any>(path: string, n: number): Promise<T[]> {
|
|
try {
|
|
const raw = await readFile(path, "utf8");
|
|
const lines = raw.split("\n").filter(l => l.length > 0);
|
|
const slice = lines.slice(-n);
|
|
const out: T[] = [];
|
|
for (const line of slice) {
|
|
try { out.push(JSON.parse(line)); } catch { /* skip malformed */ }
|
|
}
|
|
return out;
|
|
} catch {
|
|
return [];
|
|
}
|
|
}
|
|
|
|
async function checkScenarioOutcomes(_claims: Claim[]): Promise<Finding[]> {
|
|
const outcomes = await tailJsonl<any>(join(KB_DIR, "outcomes.jsonl"), TAIL_LINES);
|
|
if (outcomes.length === 0) return [];
|
|
const totalEvents = outcomes.reduce((s, o) => s + (o.total_events ?? 0), 0);
|
|
const okEvents = outcomes.reduce((s, o) => s + (o.ok_events ?? 0), 0);
|
|
const failRate = totalEvents > 0 ? 1 - okEvents / totalEvents : 0;
|
|
|
|
if (totalEvents === 0) {
|
|
return [{
|
|
check: "kb_query",
|
|
severity: "info",
|
|
summary: `KB: no scenario outcomes on file — learning loop is empty`,
|
|
evidence: [`data/_kb/outcomes.jsonl has ${outcomes.length} entries with 0 total events`],
|
|
}];
|
|
}
|
|
|
|
const recent = outcomes.slice(-10);
|
|
const recentFailSigs: string[] = recent
|
|
.filter(o => (o.ok_events ?? 0) < (o.total_events ?? 0))
|
|
.map(o => o.sig_hash)
|
|
.filter(s => typeof s === "string");
|
|
|
|
const findings: Finding[] = [{
|
|
check: "kb_query",
|
|
severity: failRate > 0.3 ? "warn" : "info",
|
|
summary: `KB: ${outcomes.length} recent scenario runs, ${okEvents}/${totalEvents} events ok (fail rate ${(failRate * 100).toFixed(1)}%)`,
|
|
evidence: [
|
|
`most recent: ${recent[recent.length - 1]?.run_id ?? "?"}`,
|
|
`recent failing sigs: ${recentFailSigs.length > 0 ? recentFailSigs.slice(-3).join(", ") : "none"}`,
|
|
],
|
|
}];
|
|
return findings;
|
|
}
|
|
|
|
async function checkErrorCorrections(_claims: Claim[]): Promise<Finding[]> {
|
|
const corrections = await tailJsonl<any>(join(KB_DIR, "error_corrections.jsonl"), TAIL_LINES);
|
|
if (corrections.length === 0) return [];
|
|
return [{
|
|
check: "kb_query",
|
|
severity: "info",
|
|
summary: `KB: ${corrections.length} error corrections on file (fail→succeed pairs)`,
|
|
evidence: [
|
|
corrections.length > 0
|
|
? `most recent: ${String(corrections[corrections.length - 1]?.sig_hash ?? "?").slice(0, 24)}`
|
|
: "none",
|
|
],
|
|
}];
|
|
}
|
|
|
|
async function checkBotCycles(): Promise<Finding[]> {
|
|
let entries: string[] = [];
|
|
try { entries = await readdir(BOT_CYCLES_DIR); }
|
|
catch { return []; }
|
|
|
|
const jsonFiles = entries.filter(e => e.endsWith(".json"));
|
|
if (jsonFiles.length === 0) return [];
|
|
|
|
// Sort by mtime desc, take most recent N
|
|
const withStat = await Promise.all(
|
|
jsonFiles.map(async name => {
|
|
try { return { name, mtime: (await stat(join(BOT_CYCLES_DIR, name))).mtimeMs }; }
|
|
catch { return { name, mtime: 0 }; }
|
|
}),
|
|
);
|
|
const recent = withStat.sort((a, b) => b.mtime - a.mtime).slice(0, MAX_BOT_CYCLE_FILES);
|
|
|
|
const outcomes: Record<string, number> = {};
|
|
for (const { name } of recent) {
|
|
try {
|
|
const r = JSON.parse(await readFile(join(BOT_CYCLES_DIR, name), "utf8"));
|
|
const o = String(r.outcome ?? "unknown");
|
|
outcomes[o] = (outcomes[o] ?? 0) + 1;
|
|
} catch { /* skip */ }
|
|
}
|
|
|
|
const summary = Object.entries(outcomes)
|
|
.sort((a, b) => b[1] - a[1])
|
|
.map(([k, v]) => `${k}=${v}`)
|
|
.join(", ");
|
|
|
|
const failCount = (outcomes["tests_failed"] ?? 0) + (outcomes["apply_failed"] ?? 0) + (outcomes["model_failed"] ?? 0);
|
|
return [{
|
|
check: "kb_query",
|
|
severity: failCount > recent.length / 2 ? "warn" : "info",
|
|
summary: `KB: bot recorded ${recent.length} recent cycles — ${summary || "no outcomes parsed"}`,
|
|
evidence: [
|
|
`dir: ${BOT_CYCLES_DIR}`,
|
|
`fail-class total: ${failCount} / ${recent.length}`,
|
|
],
|
|
}];
|
|
}
|
|
|
|
async function checkObserverStream(): Promise<Finding[]> {
|
|
const ops = await tailJsonl<any>(OBSERVER_OPS, TAIL_LINES);
|
|
if (ops.length === 0) return [];
|
|
const failures = ops.filter(o => o.ok === false).length;
|
|
return [{
|
|
check: "kb_query",
|
|
severity: "info",
|
|
summary: `KB: observer stream ${ops.length} recent ops, ${failures} failures`,
|
|
evidence: [
|
|
`source: ${OBSERVER_OPS}`,
|
|
`by source: ${observerBySource(ops)}`,
|
|
],
|
|
}];
|
|
}
|
|
|
|
function observerBySource(ops: any[]): string {
|
|
const c: Record<string, number> = {};
|
|
for (const o of ops) {
|
|
const s = String(o.source ?? "unknown");
|
|
c[s] = (c[s] ?? 0) + 1;
|
|
}
|
|
return Object.entries(c).sort((a, b) => b[1] - a[1]).map(([k, v]) => `${k}=${v}`).join(", ") || "empty";
|
|
}
|