profit ac01fffd9a checkpoint: matrix-agent-validated (2026-04-25)
Architectural snapshot of the lakehouse codebase at the point where the
full matrix-driven agent loop with Mem0 versioning + deletion was
validated end-to-end.

WHAT THIS REPO IS
A clean single-commit snapshot of the lakehouse code. Heavy test data
(.parquet datasets, vector indexes) excluded — see REPLICATION.md for
regen path. Full lakehouse history at git.agentview.dev/profit/lakehouse.

WHAT WAS PROVEN
- Vector retrieval across multi-corpora matrix (chicago_permits + entity
  briefs + sec_tickers + distilled procedural + llm_team runs)
- Observer hand-review (cloud + heuristic fallback) gating each candidate
- Local-model agent loop (qwen3.5:latest) with tool use + scratchpad
- Playbook seal on success → next-iter retrieval surfaces it as preamble
- Mem0 versioning + deletion in pathway_memory:
    * UPSERT: ADD on new workflow, UPDATE bumps replay_count on identical
    * REVISE: chains versions, parent.superseded_at + superseded_by stamped
    * RETIRE: marks specific trace retired with reason, excluded from retrieval
    * HISTORY: walks chain root→tip, cycle-safe

KEY DIRECTORIES
- crates/vectord/src/pathway_memory.rs — Mem0 ops live here
- crates/vectord/src/playbook_memory.rs — original Mem0 reference
- tests/agent_test/ — local-model agent harness + PRD + session archives
- scripts/dump_raw_corpus.sh — MinIO bucket dump (raw test corpus)
- scripts/vectorize_raw_corpus.ts — corpus → vector indexes
- scripts/analyze_chicago_contracts.ts — real inference pipeline
- scripts/seal_agent_playbook.ts — Mem0 upsert from agent traces

Replication: see REPLICATION.md for Debian 13 clean install + cloud-only
adaptation (no local Ollama).

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-25 19:43:27 -05:00

270 lines
11 KiB
TypeScript
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

// kb_stats — on-demand dashboard numbers from the KB scratchpad
// files. Reads data/_auditor/verdicts/*, data/_kb/audit_lessons.jsonl,
// data/_kb/audit_facts.jsonl, data/_kb/audit_discrepancies.jsonl,
// data/_kb/scrum_reviews.jsonl and prints:
//
// - verdict flip-flop rate (same SHA re-audited, verdict changed?)
// - consensus discrepancy rate (N runs disagreed on a claim)
// - confidence distribution from kb_index aggregator
// - top N recurring entities from audit_facts
// - fact growth over time
// - scrum vs inference KB split
//
// Run: bun run auditor/kb_stats.ts
// bun run auditor/kb_stats.ts --top 15 # show top 15 entities
// bun run auditor/kb_stats.ts --json # machine-readable
//
// This is the "dashboard" without running Grafana. If someone really
// wants a dashboard, wire this output into a static HTML page + cron.
import { readFile, readdir } from "node:fs/promises";
import { join } from "node:path";
import { aggregate } from "./kb_index.ts";
const REPO = "/home/profit/lakehouse";
const VERDICTS_DIR = `${REPO}/data/_auditor/verdicts`;
const AUDIT_LESSONS = `${REPO}/data/_kb/audit_lessons.jsonl`;
const AUDIT_FACTS = `${REPO}/data/_kb/audit_facts.jsonl`;
const AUDIT_DISCREPANCIES = `${REPO}/data/_kb/audit_discrepancies.jsonl`;
const SCRUM_REVIEWS = `${REPO}/data/_kb/scrum_reviews.jsonl`;
interface Args {
top: number;
json: boolean;
}
function parseArgs(argv: string[]): Args {
const a: Args = { top: 10, json: false };
for (let i = 2; i < argv.length; i++) {
if (argv[i] === "--top") a.top = Number(argv[++i] ?? 10);
else if (argv[i] === "--json") a.json = true;
}
return a;
}
async function readJsonl<T = any>(path: string): Promise<T[]> {
try {
const raw = await readFile(path, "utf8");
return raw.split("\n").filter(l => l.length > 0).map(l => {
try { return JSON.parse(l) as T; } catch { return null as any; }
}).filter(r => r !== null);
} catch { return []; }
}
async function loadVerdicts(): Promise<Array<{ pr: number; sha: string; overall: string; findings_total: number; findings_block: number; findings_warn: number }>> {
let files: string[] = [];
try { files = await readdir(VERDICTS_DIR); } catch { return []; }
const out = [];
for (const f of files) {
if (!f.endsWith(".json")) continue;
const m = f.match(/^(\d+)-([0-9a-f]+)\.json$/);
if (!m) continue;
try {
const v = JSON.parse(await readFile(join(VERDICTS_DIR, f), "utf8"));
out.push({
pr: Number(m[1]),
sha: m[2],
overall: String(v.overall),
findings_total: Number(v.metrics?.findings_total ?? 0),
findings_block: Number(v.metrics?.findings_block ?? 0),
findings_warn: Number(v.metrics?.findings_warn ?? 0),
});
} catch { /* skip corrupt */ }
}
return out;
}
interface Stats {
audit_count: number;
verdict_distribution: Record<string, number>;
// Same PR with multiple SHAs — if verdicts differ, that's drift across
// the PR's commit history. Not a flip-flop in the classical sense,
// but worth surfacing (e.g. "PR #8 was block block req req block").
per_pr_verdict_sequences: Record<number, string[]>;
// For each PR with ≥ 2 audits, how many distinct verdicts did it
// produce? 1 = stable; 2+ = some flipping.
verdict_instability: { pr_count: number; pr_with_multiple_verdicts: number; pr_with_3plus: number };
consensus: { discrepancy_count: number; tiebreaker_used: number; unresolved: number };
kb: {
audit_lessons_rows: number;
audit_facts_rows: number;
scrum_reviews_rows: number;
distinct_finding_signatures: number;
distinct_entities_across_prs: number;
entities_in_2plus_prs: number;
entities_in_5plus_prs: number;
};
fact_quality: {
verifier_verdict_distribution: Record<string, number>;
facts_dropped_by_verifier_total: number;
extraction_success_rate: number;
};
top_entities: Array<{ name: string; distinct_prs: number; count: number; types: string[] }>;
kb_by_source: Record<string, number>;
}
async function collect(args: Args): Promise<Stats> {
const verdicts = await loadVerdicts();
const lessons = await readJsonl<any>(AUDIT_LESSONS);
const facts = await readJsonl<any>(AUDIT_FACTS);
const disc = await readJsonl<any>(AUDIT_DISCREPANCIES);
const reviews = await readJsonl<any>(SCRUM_REVIEWS);
// Verdict stability
const byPr: Record<number, string[]> = {};
const verdictDist: Record<string, number> = {};
for (const v of verdicts) {
(byPr[v.pr] ??= []).push(v.overall);
verdictDist[v.overall] = (verdictDist[v.overall] ?? 0) + 1;
}
let multi = 0, tri = 0;
for (const [_, seq] of Object.entries(byPr)) {
const distinct = new Set(seq);
if (distinct.size >= 2) multi++;
if (distinct.size >= 3) tri++;
}
// Consensus drift
const consensus = {
discrepancy_count: disc.length,
tiebreaker_used: disc.filter(d => String(d.resolution).startsWith("tiebreaker")).length,
unresolved: disc.filter(d => d.resolution === "unresolved").length,
};
// Lesson signatures
const lessonAgg = await aggregate<any>(AUDIT_LESSONS, {
keyFn: r => r?.signature,
scopeFn: r => (r?.pr_number !== undefined ? `pr-${r.pr_number}` : undefined),
});
// Entity aggregation across audit_facts rows
interface EntAgg { distinct_prs: Set<number>; count: number; types: Set<string>; name: string; sources: Set<string> }
const entAgg = new Map<string, EntAgg>();
const sourceCount: Record<string, number> = {};
let totalVerdictDist: Record<string, number> = { CORRECT: 0, INCORRECT: 0, UNVERIFIABLE: 0, UNCHECKED: 0 };
let factsDroppedTotal = 0;
let extractionsWithFacts = 0;
for (const row of facts) {
const src = String(row.source ?? "unknown");
sourceCount[src] = (sourceCount[src] ?? 0) + 1;
const pr = Number(row.pr_number);
if (Array.isArray(row.verifier_verdicts)) {
for (const v of row.verifier_verdicts) {
totalVerdictDist[v] = (totalVerdictDist[v] ?? 0) + 1;
}
}
factsDroppedTotal += Number(row.facts_dropped_by_verifier ?? 0);
if ((Array.isArray(row.facts) && row.facts.length > 0) || (Array.isArray(row.entities) && row.entities.length > 0)) {
extractionsWithFacts++;
}
for (const e of Array.isArray(row.entities) ? row.entities : []) {
const name = String(e?.name ?? "").trim();
if (name.length < 3) continue;
const key = name.toLowerCase();
const agg = entAgg.get(key) ?? { distinct_prs: new Set(), count: 0, types: new Set(), name, sources: new Set() };
agg.count++;
if (Number.isFinite(pr) && pr > 0) agg.distinct_prs.add(pr);
if (e?.type) agg.types.add(String(e.type));
agg.sources.add(src);
entAgg.set(key, agg);
}
}
const entitiesIn2Plus = Array.from(entAgg.values()).filter(a => a.distinct_prs.size >= 2).length;
const entitiesIn5Plus = Array.from(entAgg.values()).filter(a => a.distinct_prs.size >= 5).length;
const topEntities = Array.from(entAgg.values())
.sort((a, b) => b.distinct_prs.size - a.distinct_prs.size || b.count - a.count)
.slice(0, args.top)
.map(a => ({
name: a.name,
distinct_prs: a.distinct_prs.size,
count: a.count,
types: Array.from(a.types),
}));
const stats: Stats = {
audit_count: verdicts.length,
verdict_distribution: verdictDist,
per_pr_verdict_sequences: byPr,
verdict_instability: {
pr_count: Object.keys(byPr).length,
pr_with_multiple_verdicts: multi,
pr_with_3plus: tri,
},
consensus,
kb: {
audit_lessons_rows: lessons.length,
audit_facts_rows: facts.length,
scrum_reviews_rows: reviews.length,
distinct_finding_signatures: lessonAgg.size,
distinct_entities_across_prs: entAgg.size,
entities_in_2plus_prs: entitiesIn2Plus,
entities_in_5plus_prs: entitiesIn5Plus,
},
fact_quality: {
verifier_verdict_distribution: totalVerdictDist,
facts_dropped_by_verifier_total: factsDroppedTotal,
extraction_success_rate: facts.length > 0 ? extractionsWithFacts / facts.length : 0,
},
top_entities: topEntities,
kb_by_source: sourceCount,
};
return stats;
}
function renderHuman(s: Stats): string {
const lines: string[] = [];
lines.push("═══ KB STATS ═══");
lines.push("");
lines.push(`Audits: ${s.audit_count} total across ${s.verdict_instability.pr_count} distinct PRs`);
lines.push(`Verdicts: ${Object.entries(s.verdict_distribution).map(([k, v]) => `${k}=${v}`).join(" ")}`);
const multiplePct = s.verdict_instability.pr_count > 0
? Math.round(100 * s.verdict_instability.pr_with_multiple_verdicts / s.verdict_instability.pr_count)
: 0;
lines.push(`Verdict instability: ${s.verdict_instability.pr_with_multiple_verdicts}/${s.verdict_instability.pr_count} PRs had 2+ distinct verdicts (${multiplePct}%) — 3+ distinct: ${s.verdict_instability.pr_with_3plus}`);
lines.push("");
lines.push("─── Consensus ───");
lines.push(` discrepancies logged: ${s.consensus.discrepancy_count}`);
lines.push(` tiebreaker used: ${s.consensus.tiebreaker_used}`);
lines.push(` unresolved: ${s.consensus.unresolved}`);
const dRate = s.audit_count > 0 ? (100 * s.consensus.discrepancy_count / s.audit_count).toFixed(1) : "0";
lines.push(` discrepancy rate: ${dRate}% of audits`);
lines.push("");
lines.push("─── KB size ───");
lines.push(` audit_lessons.jsonl: ${s.kb.audit_lessons_rows} rows, ${s.kb.distinct_finding_signatures} distinct signatures`);
lines.push(` audit_facts.jsonl: ${s.kb.audit_facts_rows} rows, ${s.kb.distinct_entities_across_prs} distinct entities`);
lines.push(` scrum_reviews.jsonl: ${s.kb.scrum_reviews_rows} rows`);
lines.push(` entities in 2+ PRs: ${s.kb.entities_in_2plus_prs}`);
lines.push(` entities in 5+ PRs: ${s.kb.entities_in_5plus_prs} ← strong cross-cutting signal`);
lines.push("");
lines.push("─── Fact quality ───");
const v = s.fact_quality.verifier_verdict_distribution;
lines.push(` verifier verdicts: CORRECT=${v.CORRECT ?? 0} UNVERIFIABLE=${v.UNVERIFIABLE ?? 0} UNCHECKED=${v.UNCHECKED ?? 0} INCORRECT=${v.INCORRECT ?? 0}`);
lines.push(` facts dropped by verifier: ${s.fact_quality.facts_dropped_by_verifier_total}`);
lines.push(` extraction success rate: ${(s.fact_quality.extraction_success_rate * 100).toFixed(1)}%`);
lines.push("");
lines.push("─── KB sources ───");
for (const [src, n] of Object.entries(s.kb_by_source)) {
lines.push(` ${src}: ${n}`);
}
lines.push("");
lines.push(`─── Top ${s.top_entities.length} recurring entities ───`);
for (const e of s.top_entities) {
lines.push(` [${e.distinct_prs} PRs × ${e.count} obs] ${e.name} (${e.types.join(",")})`);
}
return lines.join("\n");
}
async function main() {
const args = parseArgs(process.argv);
const stats = await collect(args);
if (args.json) {
console.log(JSON.stringify(stats, (_, v) => v instanceof Set ? Array.from(v) : v, 2));
} else {
console.log(renderHuman(stats));
}
}
main().catch(e => { console.error("[kb_stats] fatal:", e); process.exit(1); });