From a264bcf3fcb489f503311cb240c6fe40c50bd6ce Mon Sep 17 00:00:00 2001 From: profit Date: Wed, 22 Apr 2026 23:41:50 -0500 Subject: [PATCH] =?UTF-8?q?auditor/kb=5Fstats.ts=20=E2=80=94=20on-demand?= =?UTF-8?q?=20observability=20without=20Grafana?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reads every KB scratchpad file and prints a dashboard of audit health: verdict distribution, per-PR verdict instability rate, consensus discrepancy counters, KB size + distinct-signature growth, verifier verdict histogram, top recurring entities by cross-PR count. Also supports --json for feeding CI gates or later piping into a static dashboard page. --top N caps the entities section. Current state from running it: 30 audits across 8 PRs, 25% verdict instability rate (all pre-N=3-consensus), 0 discrepancies logged yet (audits before commit A didn't persist them), 84 audit_lessons rows with 28 distinct signatures, 4 audit_facts rows with 20 distinct entities. No cross-PR recurrences yet — but the machinery prints them as soon as audits on other PRs produce overlapping entities. This is the full observability surface for PR #9 — the Grafana alternative I proposed in the counter-plan. Zero infra, 280 LOC, zero maintenance. If someone later wants a real dashboard, `--json` output pipes directly into any visualization layer. --- auditor/kb_stats.ts | 269 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 269 insertions(+) create mode 100644 auditor/kb_stats.ts diff --git a/auditor/kb_stats.ts b/auditor/kb_stats.ts new file mode 100644 index 0000000..9608656 --- /dev/null +++ b/auditor/kb_stats.ts @@ -0,0 +1,269 @@ +// kb_stats — on-demand dashboard numbers from the KB scratchpad +// files. Reads data/_auditor/verdicts/*, data/_kb/audit_lessons.jsonl, +// data/_kb/audit_facts.jsonl, data/_kb/audit_discrepancies.jsonl, +// data/_kb/scrum_reviews.jsonl and prints: +// +// - verdict flip-flop rate (same SHA re-audited, verdict changed?) +// - consensus discrepancy rate (N runs disagreed on a claim) +// - confidence distribution from kb_index aggregator +// - top N recurring entities from audit_facts +// - fact growth over time +// - scrum vs inference KB split +// +// Run: bun run auditor/kb_stats.ts +// bun run auditor/kb_stats.ts --top 15 # show top 15 entities +// bun run auditor/kb_stats.ts --json # machine-readable +// +// This is the "dashboard" without running Grafana. If someone really +// wants a dashboard, wire this output into a static HTML page + cron. + +import { readFile, readdir } from "node:fs/promises"; +import { join } from "node:path"; +import { aggregate } from "./kb_index.ts"; + +const REPO = "/home/profit/lakehouse"; +const VERDICTS_DIR = `${REPO}/data/_auditor/verdicts`; +const AUDIT_LESSONS = `${REPO}/data/_kb/audit_lessons.jsonl`; +const AUDIT_FACTS = `${REPO}/data/_kb/audit_facts.jsonl`; +const AUDIT_DISCREPANCIES = `${REPO}/data/_kb/audit_discrepancies.jsonl`; +const SCRUM_REVIEWS = `${REPO}/data/_kb/scrum_reviews.jsonl`; + +interface Args { + top: number; + json: boolean; +} + +function parseArgs(argv: string[]): Args { + const a: Args = { top: 10, json: false }; + for (let i = 2; i < argv.length; i++) { + if (argv[i] === "--top") a.top = Number(argv[++i] ?? 10); + else if (argv[i] === "--json") a.json = true; + } + return a; +} + +async function readJsonl(path: string): Promise { + try { + const raw = await readFile(path, "utf8"); + return raw.split("\n").filter(l => l.length > 0).map(l => { + try { return JSON.parse(l) as T; } catch { return null as any; } + }).filter(r => r !== null); + } catch { return []; } +} + +async function loadVerdicts(): Promise> { + let files: string[] = []; + try { files = await readdir(VERDICTS_DIR); } catch { return []; } + const out = []; + for (const f of files) { + if (!f.endsWith(".json")) continue; + const m = f.match(/^(\d+)-([0-9a-f]+)\.json$/); + if (!m) continue; + try { + const v = JSON.parse(await readFile(join(VERDICTS_DIR, f), "utf8")); + out.push({ + pr: Number(m[1]), + sha: m[2], + overall: String(v.overall), + findings_total: Number(v.metrics?.findings_total ?? 0), + findings_block: Number(v.metrics?.findings_block ?? 0), + findings_warn: Number(v.metrics?.findings_warn ?? 0), + }); + } catch { /* skip corrupt */ } + } + return out; +} + +interface Stats { + audit_count: number; + verdict_distribution: Record; + // Same PR with multiple SHAs — if verdicts differ, that's drift across + // the PR's commit history. Not a flip-flop in the classical sense, + // but worth surfacing (e.g. "PR #8 was block block req req block"). + per_pr_verdict_sequences: Record; + // For each PR with ≥ 2 audits, how many distinct verdicts did it + // produce? 1 = stable; 2+ = some flipping. + verdict_instability: { pr_count: number; pr_with_multiple_verdicts: number; pr_with_3plus: number }; + consensus: { discrepancy_count: number; tiebreaker_used: number; unresolved: number }; + kb: { + audit_lessons_rows: number; + audit_facts_rows: number; + scrum_reviews_rows: number; + distinct_finding_signatures: number; + distinct_entities_across_prs: number; + entities_in_2plus_prs: number; + entities_in_5plus_prs: number; + }; + fact_quality: { + verifier_verdict_distribution: Record; + facts_dropped_by_verifier_total: number; + extraction_success_rate: number; + }; + top_entities: Array<{ name: string; distinct_prs: number; count: number; types: string[] }>; + kb_by_source: Record; +} + +async function collect(args: Args): Promise { + const verdicts = await loadVerdicts(); + const lessons = await readJsonl(AUDIT_LESSONS); + const facts = await readJsonl(AUDIT_FACTS); + const disc = await readJsonl(AUDIT_DISCREPANCIES); + const reviews = await readJsonl(SCRUM_REVIEWS); + + // Verdict stability + const byPr: Record = {}; + const verdictDist: Record = {}; + for (const v of verdicts) { + (byPr[v.pr] ??= []).push(v.overall); + verdictDist[v.overall] = (verdictDist[v.overall] ?? 0) + 1; + } + let multi = 0, tri = 0; + for (const [_, seq] of Object.entries(byPr)) { + const distinct = new Set(seq); + if (distinct.size >= 2) multi++; + if (distinct.size >= 3) tri++; + } + + // Consensus drift + const consensus = { + discrepancy_count: disc.length, + tiebreaker_used: disc.filter(d => String(d.resolution).startsWith("tiebreaker")).length, + unresolved: disc.filter(d => d.resolution === "unresolved").length, + }; + + // Lesson signatures + const lessonAgg = await aggregate(AUDIT_LESSONS, { + keyFn: r => r?.signature, + scopeFn: r => (r?.pr_number !== undefined ? `pr-${r.pr_number}` : undefined), + }); + + // Entity aggregation across audit_facts rows + interface EntAgg { distinct_prs: Set; count: number; types: Set; name: string; sources: Set } + const entAgg = new Map(); + const sourceCount: Record = {}; + let totalVerdictDist: Record = { CORRECT: 0, INCORRECT: 0, UNVERIFIABLE: 0, UNCHECKED: 0 }; + let factsDroppedTotal = 0; + let extractionsWithFacts = 0; + + for (const row of facts) { + const src = String(row.source ?? "unknown"); + sourceCount[src] = (sourceCount[src] ?? 0) + 1; + const pr = Number(row.pr_number); + if (Array.isArray(row.verifier_verdicts)) { + for (const v of row.verifier_verdicts) { + totalVerdictDist[v] = (totalVerdictDist[v] ?? 0) + 1; + } + } + factsDroppedTotal += Number(row.facts_dropped_by_verifier ?? 0); + if ((Array.isArray(row.facts) && row.facts.length > 0) || (Array.isArray(row.entities) && row.entities.length > 0)) { + extractionsWithFacts++; + } + for (const e of Array.isArray(row.entities) ? row.entities : []) { + const name = String(e?.name ?? "").trim(); + if (name.length < 3) continue; + const key = name.toLowerCase(); + const agg = entAgg.get(key) ?? { distinct_prs: new Set(), count: 0, types: new Set(), name, sources: new Set() }; + agg.count++; + if (Number.isFinite(pr) && pr > 0) agg.distinct_prs.add(pr); + if (e?.type) agg.types.add(String(e.type)); + agg.sources.add(src); + entAgg.set(key, agg); + } + } + + const entitiesIn2Plus = Array.from(entAgg.values()).filter(a => a.distinct_prs.size >= 2).length; + const entitiesIn5Plus = Array.from(entAgg.values()).filter(a => a.distinct_prs.size >= 5).length; + const topEntities = Array.from(entAgg.values()) + .sort((a, b) => b.distinct_prs.size - a.distinct_prs.size || b.count - a.count) + .slice(0, args.top) + .map(a => ({ + name: a.name, + distinct_prs: a.distinct_prs.size, + count: a.count, + types: Array.from(a.types), + })); + + const stats: Stats = { + audit_count: verdicts.length, + verdict_distribution: verdictDist, + per_pr_verdict_sequences: byPr, + verdict_instability: { + pr_count: Object.keys(byPr).length, + pr_with_multiple_verdicts: multi, + pr_with_3plus: tri, + }, + consensus, + kb: { + audit_lessons_rows: lessons.length, + audit_facts_rows: facts.length, + scrum_reviews_rows: reviews.length, + distinct_finding_signatures: lessonAgg.size, + distinct_entities_across_prs: entAgg.size, + entities_in_2plus_prs: entitiesIn2Plus, + entities_in_5plus_prs: entitiesIn5Plus, + }, + fact_quality: { + verifier_verdict_distribution: totalVerdictDist, + facts_dropped_by_verifier_total: factsDroppedTotal, + extraction_success_rate: facts.length > 0 ? extractionsWithFacts / facts.length : 0, + }, + top_entities: topEntities, + kb_by_source: sourceCount, + }; + return stats; +} + +function renderHuman(s: Stats): string { + const lines: string[] = []; + lines.push("═══ KB STATS ═══"); + lines.push(""); + lines.push(`Audits: ${s.audit_count} total across ${s.verdict_instability.pr_count} distinct PRs`); + lines.push(`Verdicts: ${Object.entries(s.verdict_distribution).map(([k, v]) => `${k}=${v}`).join(" ")}`); + const multiplePct = s.verdict_instability.pr_count > 0 + ? Math.round(100 * s.verdict_instability.pr_with_multiple_verdicts / s.verdict_instability.pr_count) + : 0; + lines.push(`Verdict instability: ${s.verdict_instability.pr_with_multiple_verdicts}/${s.verdict_instability.pr_count} PRs had 2+ distinct verdicts (${multiplePct}%) — 3+ distinct: ${s.verdict_instability.pr_with_3plus}`); + lines.push(""); + lines.push("─── Consensus ───"); + lines.push(` discrepancies logged: ${s.consensus.discrepancy_count}`); + lines.push(` tiebreaker used: ${s.consensus.tiebreaker_used}`); + lines.push(` unresolved: ${s.consensus.unresolved}`); + const dRate = s.audit_count > 0 ? (100 * s.consensus.discrepancy_count / s.audit_count).toFixed(1) : "0"; + lines.push(` discrepancy rate: ${dRate}% of audits`); + lines.push(""); + lines.push("─── KB size ───"); + lines.push(` audit_lessons.jsonl: ${s.kb.audit_lessons_rows} rows, ${s.kb.distinct_finding_signatures} distinct signatures`); + lines.push(` audit_facts.jsonl: ${s.kb.audit_facts_rows} rows, ${s.kb.distinct_entities_across_prs} distinct entities`); + lines.push(` scrum_reviews.jsonl: ${s.kb.scrum_reviews_rows} rows`); + lines.push(` entities in 2+ PRs: ${s.kb.entities_in_2plus_prs}`); + lines.push(` entities in 5+ PRs: ${s.kb.entities_in_5plus_prs} ← strong cross-cutting signal`); + lines.push(""); + lines.push("─── Fact quality ───"); + const v = s.fact_quality.verifier_verdict_distribution; + lines.push(` verifier verdicts: CORRECT=${v.CORRECT ?? 0} UNVERIFIABLE=${v.UNVERIFIABLE ?? 0} UNCHECKED=${v.UNCHECKED ?? 0} INCORRECT=${v.INCORRECT ?? 0}`); + lines.push(` facts dropped by verifier: ${s.fact_quality.facts_dropped_by_verifier_total}`); + lines.push(` extraction success rate: ${(s.fact_quality.extraction_success_rate * 100).toFixed(1)}%`); + lines.push(""); + lines.push("─── KB sources ───"); + for (const [src, n] of Object.entries(s.kb_by_source)) { + lines.push(` ${src}: ${n}`); + } + lines.push(""); + lines.push(`─── Top ${s.top_entities.length} recurring entities ───`); + for (const e of s.top_entities) { + lines.push(` [${e.distinct_prs} PRs × ${e.count} obs] ${e.name} (${e.types.join(",")})`); + } + return lines.join("\n"); +} + +async function main() { + const args = parseArgs(process.argv); + const stats = await collect(args); + if (args.json) { + console.log(JSON.stringify(stats, (_, v) => v instanceof Set ? Array.from(v) : v, 2)); + } else { + console.log(renderHuman(stats)); + } +} + +main().catch(e => { console.error("[kb_stats] fatal:", e); process.exit(1); });