Architectural snapshot of the lakehouse codebase at the point where the
full matrix-driven agent loop with Mem0 versioning + deletion was
validated end-to-end.
WHAT THIS REPO IS
A clean single-commit snapshot of the lakehouse code. Heavy test data
(.parquet datasets, vector indexes) excluded — see REPLICATION.md for
regen path. Full lakehouse history at git.agentview.dev/profit/lakehouse.
WHAT WAS PROVEN
- Vector retrieval across multi-corpora matrix (chicago_permits + entity
briefs + sec_tickers + distilled procedural + llm_team runs)
- Observer hand-review (cloud + heuristic fallback) gating each candidate
- Local-model agent loop (qwen3.5:latest) with tool use + scratchpad
- Playbook seal on success → next-iter retrieval surfaces it as preamble
- Mem0 versioning + deletion in pathway_memory:
* UPSERT: ADD on new workflow, UPDATE bumps replay_count on identical
* REVISE: chains versions, parent.superseded_at + superseded_by stamped
* RETIRE: marks specific trace retired with reason, excluded from retrieval
* HISTORY: walks chain root→tip, cycle-safe
KEY DIRECTORIES
- crates/vectord/src/pathway_memory.rs — Mem0 ops live here
- crates/vectord/src/playbook_memory.rs — original Mem0 reference
- tests/agent_test/ — local-model agent harness + PRD + session archives
- scripts/dump_raw_corpus.sh — MinIO bucket dump (raw test corpus)
- scripts/vectorize_raw_corpus.ts — corpus → vector indexes
- scripts/analyze_chicago_contracts.ts — real inference pipeline
- scripts/seal_agent_playbook.ts — Mem0 upsert from agent traces
Replication: see REPLICATION.md for Debian 13 clean install + cloud-only
adaptation (no local Ollama).
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
270 lines
11 KiB
TypeScript
270 lines
11 KiB
TypeScript
// kb_stats — on-demand dashboard numbers from the KB scratchpad
|
||
// files. Reads data/_auditor/verdicts/*, data/_kb/audit_lessons.jsonl,
|
||
// data/_kb/audit_facts.jsonl, data/_kb/audit_discrepancies.jsonl,
|
||
// data/_kb/scrum_reviews.jsonl and prints:
|
||
//
|
||
// - verdict flip-flop rate (same SHA re-audited, verdict changed?)
|
||
// - consensus discrepancy rate (N runs disagreed on a claim)
|
||
// - confidence distribution from kb_index aggregator
|
||
// - top N recurring entities from audit_facts
|
||
// - fact growth over time
|
||
// - scrum vs inference KB split
|
||
//
|
||
// Run: bun run auditor/kb_stats.ts
|
||
// bun run auditor/kb_stats.ts --top 15 # show top 15 entities
|
||
// bun run auditor/kb_stats.ts --json # machine-readable
|
||
//
|
||
// This is the "dashboard" without running Grafana. If someone really
|
||
// wants a dashboard, wire this output into a static HTML page + cron.
|
||
|
||
import { readFile, readdir } from "node:fs/promises";
|
||
import { join } from "node:path";
|
||
import { aggregate } from "./kb_index.ts";
|
||
|
||
const REPO = "/home/profit/lakehouse";
|
||
const VERDICTS_DIR = `${REPO}/data/_auditor/verdicts`;
|
||
const AUDIT_LESSONS = `${REPO}/data/_kb/audit_lessons.jsonl`;
|
||
const AUDIT_FACTS = `${REPO}/data/_kb/audit_facts.jsonl`;
|
||
const AUDIT_DISCREPANCIES = `${REPO}/data/_kb/audit_discrepancies.jsonl`;
|
||
const SCRUM_REVIEWS = `${REPO}/data/_kb/scrum_reviews.jsonl`;
|
||
|
||
interface Args {
|
||
top: number;
|
||
json: boolean;
|
||
}
|
||
|
||
function parseArgs(argv: string[]): Args {
|
||
const a: Args = { top: 10, json: false };
|
||
for (let i = 2; i < argv.length; i++) {
|
||
if (argv[i] === "--top") a.top = Number(argv[++i] ?? 10);
|
||
else if (argv[i] === "--json") a.json = true;
|
||
}
|
||
return a;
|
||
}
|
||
|
||
async function readJsonl<T = any>(path: string): Promise<T[]> {
|
||
try {
|
||
const raw = await readFile(path, "utf8");
|
||
return raw.split("\n").filter(l => l.length > 0).map(l => {
|
||
try { return JSON.parse(l) as T; } catch { return null as any; }
|
||
}).filter(r => r !== null);
|
||
} catch { return []; }
|
||
}
|
||
|
||
async function loadVerdicts(): Promise<Array<{ pr: number; sha: string; overall: string; findings_total: number; findings_block: number; findings_warn: number }>> {
|
||
let files: string[] = [];
|
||
try { files = await readdir(VERDICTS_DIR); } catch { return []; }
|
||
const out = [];
|
||
for (const f of files) {
|
||
if (!f.endsWith(".json")) continue;
|
||
const m = f.match(/^(\d+)-([0-9a-f]+)\.json$/);
|
||
if (!m) continue;
|
||
try {
|
||
const v = JSON.parse(await readFile(join(VERDICTS_DIR, f), "utf8"));
|
||
out.push({
|
||
pr: Number(m[1]),
|
||
sha: m[2],
|
||
overall: String(v.overall),
|
||
findings_total: Number(v.metrics?.findings_total ?? 0),
|
||
findings_block: Number(v.metrics?.findings_block ?? 0),
|
||
findings_warn: Number(v.metrics?.findings_warn ?? 0),
|
||
});
|
||
} catch { /* skip corrupt */ }
|
||
}
|
||
return out;
|
||
}
|
||
|
||
interface Stats {
|
||
audit_count: number;
|
||
verdict_distribution: Record<string, number>;
|
||
// Same PR with multiple SHAs — if verdicts differ, that's drift across
|
||
// the PR's commit history. Not a flip-flop in the classical sense,
|
||
// but worth surfacing (e.g. "PR #8 was block block req req block").
|
||
per_pr_verdict_sequences: Record<number, string[]>;
|
||
// For each PR with ≥ 2 audits, how many distinct verdicts did it
|
||
// produce? 1 = stable; 2+ = some flipping.
|
||
verdict_instability: { pr_count: number; pr_with_multiple_verdicts: number; pr_with_3plus: number };
|
||
consensus: { discrepancy_count: number; tiebreaker_used: number; unresolved: number };
|
||
kb: {
|
||
audit_lessons_rows: number;
|
||
audit_facts_rows: number;
|
||
scrum_reviews_rows: number;
|
||
distinct_finding_signatures: number;
|
||
distinct_entities_across_prs: number;
|
||
entities_in_2plus_prs: number;
|
||
entities_in_5plus_prs: number;
|
||
};
|
||
fact_quality: {
|
||
verifier_verdict_distribution: Record<string, number>;
|
||
facts_dropped_by_verifier_total: number;
|
||
extraction_success_rate: number;
|
||
};
|
||
top_entities: Array<{ name: string; distinct_prs: number; count: number; types: string[] }>;
|
||
kb_by_source: Record<string, number>;
|
||
}
|
||
|
||
async function collect(args: Args): Promise<Stats> {
|
||
const verdicts = await loadVerdicts();
|
||
const lessons = await readJsonl<any>(AUDIT_LESSONS);
|
||
const facts = await readJsonl<any>(AUDIT_FACTS);
|
||
const disc = await readJsonl<any>(AUDIT_DISCREPANCIES);
|
||
const reviews = await readJsonl<any>(SCRUM_REVIEWS);
|
||
|
||
// Verdict stability
|
||
const byPr: Record<number, string[]> = {};
|
||
const verdictDist: Record<string, number> = {};
|
||
for (const v of verdicts) {
|
||
(byPr[v.pr] ??= []).push(v.overall);
|
||
verdictDist[v.overall] = (verdictDist[v.overall] ?? 0) + 1;
|
||
}
|
||
let multi = 0, tri = 0;
|
||
for (const [_, seq] of Object.entries(byPr)) {
|
||
const distinct = new Set(seq);
|
||
if (distinct.size >= 2) multi++;
|
||
if (distinct.size >= 3) tri++;
|
||
}
|
||
|
||
// Consensus drift
|
||
const consensus = {
|
||
discrepancy_count: disc.length,
|
||
tiebreaker_used: disc.filter(d => String(d.resolution).startsWith("tiebreaker")).length,
|
||
unresolved: disc.filter(d => d.resolution === "unresolved").length,
|
||
};
|
||
|
||
// Lesson signatures
|
||
const lessonAgg = await aggregate<any>(AUDIT_LESSONS, {
|
||
keyFn: r => r?.signature,
|
||
scopeFn: r => (r?.pr_number !== undefined ? `pr-${r.pr_number}` : undefined),
|
||
});
|
||
|
||
// Entity aggregation across audit_facts rows
|
||
interface EntAgg { distinct_prs: Set<number>; count: number; types: Set<string>; name: string; sources: Set<string> }
|
||
const entAgg = new Map<string, EntAgg>();
|
||
const sourceCount: Record<string, number> = {};
|
||
let totalVerdictDist: Record<string, number> = { CORRECT: 0, INCORRECT: 0, UNVERIFIABLE: 0, UNCHECKED: 0 };
|
||
let factsDroppedTotal = 0;
|
||
let extractionsWithFacts = 0;
|
||
|
||
for (const row of facts) {
|
||
const src = String(row.source ?? "unknown");
|
||
sourceCount[src] = (sourceCount[src] ?? 0) + 1;
|
||
const pr = Number(row.pr_number);
|
||
if (Array.isArray(row.verifier_verdicts)) {
|
||
for (const v of row.verifier_verdicts) {
|
||
totalVerdictDist[v] = (totalVerdictDist[v] ?? 0) + 1;
|
||
}
|
||
}
|
||
factsDroppedTotal += Number(row.facts_dropped_by_verifier ?? 0);
|
||
if ((Array.isArray(row.facts) && row.facts.length > 0) || (Array.isArray(row.entities) && row.entities.length > 0)) {
|
||
extractionsWithFacts++;
|
||
}
|
||
for (const e of Array.isArray(row.entities) ? row.entities : []) {
|
||
const name = String(e?.name ?? "").trim();
|
||
if (name.length < 3) continue;
|
||
const key = name.toLowerCase();
|
||
const agg = entAgg.get(key) ?? { distinct_prs: new Set(), count: 0, types: new Set(), name, sources: new Set() };
|
||
agg.count++;
|
||
if (Number.isFinite(pr) && pr > 0) agg.distinct_prs.add(pr);
|
||
if (e?.type) agg.types.add(String(e.type));
|
||
agg.sources.add(src);
|
||
entAgg.set(key, agg);
|
||
}
|
||
}
|
||
|
||
const entitiesIn2Plus = Array.from(entAgg.values()).filter(a => a.distinct_prs.size >= 2).length;
|
||
const entitiesIn5Plus = Array.from(entAgg.values()).filter(a => a.distinct_prs.size >= 5).length;
|
||
const topEntities = Array.from(entAgg.values())
|
||
.sort((a, b) => b.distinct_prs.size - a.distinct_prs.size || b.count - a.count)
|
||
.slice(0, args.top)
|
||
.map(a => ({
|
||
name: a.name,
|
||
distinct_prs: a.distinct_prs.size,
|
||
count: a.count,
|
||
types: Array.from(a.types),
|
||
}));
|
||
|
||
const stats: Stats = {
|
||
audit_count: verdicts.length,
|
||
verdict_distribution: verdictDist,
|
||
per_pr_verdict_sequences: byPr,
|
||
verdict_instability: {
|
||
pr_count: Object.keys(byPr).length,
|
||
pr_with_multiple_verdicts: multi,
|
||
pr_with_3plus: tri,
|
||
},
|
||
consensus,
|
||
kb: {
|
||
audit_lessons_rows: lessons.length,
|
||
audit_facts_rows: facts.length,
|
||
scrum_reviews_rows: reviews.length,
|
||
distinct_finding_signatures: lessonAgg.size,
|
||
distinct_entities_across_prs: entAgg.size,
|
||
entities_in_2plus_prs: entitiesIn2Plus,
|
||
entities_in_5plus_prs: entitiesIn5Plus,
|
||
},
|
||
fact_quality: {
|
||
verifier_verdict_distribution: totalVerdictDist,
|
||
facts_dropped_by_verifier_total: factsDroppedTotal,
|
||
extraction_success_rate: facts.length > 0 ? extractionsWithFacts / facts.length : 0,
|
||
},
|
||
top_entities: topEntities,
|
||
kb_by_source: sourceCount,
|
||
};
|
||
return stats;
|
||
}
|
||
|
||
function renderHuman(s: Stats): string {
|
||
const lines: string[] = [];
|
||
lines.push("═══ KB STATS ═══");
|
||
lines.push("");
|
||
lines.push(`Audits: ${s.audit_count} total across ${s.verdict_instability.pr_count} distinct PRs`);
|
||
lines.push(`Verdicts: ${Object.entries(s.verdict_distribution).map(([k, v]) => `${k}=${v}`).join(" ")}`);
|
||
const multiplePct = s.verdict_instability.pr_count > 0
|
||
? Math.round(100 * s.verdict_instability.pr_with_multiple_verdicts / s.verdict_instability.pr_count)
|
||
: 0;
|
||
lines.push(`Verdict instability: ${s.verdict_instability.pr_with_multiple_verdicts}/${s.verdict_instability.pr_count} PRs had 2+ distinct verdicts (${multiplePct}%) — 3+ distinct: ${s.verdict_instability.pr_with_3plus}`);
|
||
lines.push("");
|
||
lines.push("─── Consensus ───");
|
||
lines.push(` discrepancies logged: ${s.consensus.discrepancy_count}`);
|
||
lines.push(` tiebreaker used: ${s.consensus.tiebreaker_used}`);
|
||
lines.push(` unresolved: ${s.consensus.unresolved}`);
|
||
const dRate = s.audit_count > 0 ? (100 * s.consensus.discrepancy_count / s.audit_count).toFixed(1) : "0";
|
||
lines.push(` discrepancy rate: ${dRate}% of audits`);
|
||
lines.push("");
|
||
lines.push("─── KB size ───");
|
||
lines.push(` audit_lessons.jsonl: ${s.kb.audit_lessons_rows} rows, ${s.kb.distinct_finding_signatures} distinct signatures`);
|
||
lines.push(` audit_facts.jsonl: ${s.kb.audit_facts_rows} rows, ${s.kb.distinct_entities_across_prs} distinct entities`);
|
||
lines.push(` scrum_reviews.jsonl: ${s.kb.scrum_reviews_rows} rows`);
|
||
lines.push(` entities in 2+ PRs: ${s.kb.entities_in_2plus_prs}`);
|
||
lines.push(` entities in 5+ PRs: ${s.kb.entities_in_5plus_prs} ← strong cross-cutting signal`);
|
||
lines.push("");
|
||
lines.push("─── Fact quality ───");
|
||
const v = s.fact_quality.verifier_verdict_distribution;
|
||
lines.push(` verifier verdicts: CORRECT=${v.CORRECT ?? 0} UNVERIFIABLE=${v.UNVERIFIABLE ?? 0} UNCHECKED=${v.UNCHECKED ?? 0} INCORRECT=${v.INCORRECT ?? 0}`);
|
||
lines.push(` facts dropped by verifier: ${s.fact_quality.facts_dropped_by_verifier_total}`);
|
||
lines.push(` extraction success rate: ${(s.fact_quality.extraction_success_rate * 100).toFixed(1)}%`);
|
||
lines.push("");
|
||
lines.push("─── KB sources ───");
|
||
for (const [src, n] of Object.entries(s.kb_by_source)) {
|
||
lines.push(` ${src}: ${n}`);
|
||
}
|
||
lines.push("");
|
||
lines.push(`─── Top ${s.top_entities.length} recurring entities ───`);
|
||
for (const e of s.top_entities) {
|
||
lines.push(` [${e.distinct_prs} PRs × ${e.count} obs] ${e.name} (${e.types.join(",")})`);
|
||
}
|
||
return lines.join("\n");
|
||
}
|
||
|
||
async function main() {
|
||
const args = parseArgs(process.argv);
|
||
const stats = await collect(args);
|
||
if (args.json) {
|
||
console.log(JSON.stringify(stats, (_, v) => v instanceof Set ? Array.from(v) : v, 2));
|
||
} else {
|
||
console.log(renderHuman(stats));
|
||
}
|
||
}
|
||
|
||
main().catch(e => { console.error("[kb_stats] fatal:", e); process.exit(1); });
|