profit ac01fffd9a checkpoint: matrix-agent-validated (2026-04-25)
Architectural snapshot of the lakehouse codebase at the point where the
full matrix-driven agent loop with Mem0 versioning + deletion was
validated end-to-end.

WHAT THIS REPO IS
A clean single-commit snapshot of the lakehouse code. Heavy test data
(.parquet datasets, vector indexes) excluded — see REPLICATION.md for
regen path. Full lakehouse history at git.agentview.dev/profit/lakehouse.

WHAT WAS PROVEN
- Vector retrieval across multi-corpora matrix (chicago_permits + entity
  briefs + sec_tickers + distilled procedural + llm_team runs)
- Observer hand-review (cloud + heuristic fallback) gating each candidate
- Local-model agent loop (qwen3.5:latest) with tool use + scratchpad
- Playbook seal on success → next-iter retrieval surfaces it as preamble
- Mem0 versioning + deletion in pathway_memory:
    * UPSERT: ADD on new workflow, UPDATE bumps replay_count on identical
    * REVISE: chains versions, parent.superseded_at + superseded_by stamped
    * RETIRE: marks specific trace retired with reason, excluded from retrieval
    * HISTORY: walks chain root→tip, cycle-safe

KEY DIRECTORIES
- crates/vectord/src/pathway_memory.rs — Mem0 ops live here
- crates/vectord/src/playbook_memory.rs — original Mem0 reference
- tests/agent_test/ — local-model agent harness + PRD + session archives
- scripts/dump_raw_corpus.sh — MinIO bucket dump (raw test corpus)
- scripts/vectorize_raw_corpus.ts — corpus → vector indexes
- scripts/analyze_chicago_contracts.ts — real inference pipeline
- scripts/seal_agent_playbook.ts — Mem0 upsert from agent traces

Replication: see REPLICATION.md for Debian 13 clean install + cloud-only
adaptation (no local Ollama).

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-25 19:43:27 -05:00

162 lines
6.3 KiB
TypeScript
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

// kb_index — generic on-the-fly aggregation over append-only JSONL
// scratchpads (audit_lessons, scrum_reviews, outcomes, observer ops).
//
// The mem0 insight: raw rows are CHEAP and tell the full story, but
// downstream prompts need a DEFINITION, not a log. A definition is
// the aggregate: "this signature has fired N times across M distinct
// scopes, first_seen=X, last_seen=Y, confidence=M/N."
//
// This library is the single shared aggregator. Every KB writer keeps
// appending raw rows; every KB reader uses aggregate() instead of
// tailing the raw stream. No second file to sync, no ADD/UPDATE/NOOP
// routing — the stats roll up from the raw rows every time.
//
// Why this works past hundreds of runs:
// - aggregate() is bounded by distinct_signatures, not total_rows.
// - confidence = distinct_scopes / count — low for same-scope noise,
// high for cross-scope patterns. Downstream severity ramps on
// confidence × count, not raw count, so one unfixed PR can't
// inflate its own recurrence score (the classic mem0 failure).
// - rotation (later) moves old raw to archive files; aggregate()
// can still read both to compute lifetime counts when needed.
import { readFile } from "node:fs/promises";
export interface AggregateRow {
signature: string;
count: number;
distinct_scopes: number;
first_seen: string;
last_seen: string;
confidence: number; // distinct_scopes / count — capped at 1.0
representative_summary: string; // most-recent summary for this signature
scopes: string[]; // up to 20 most-recent scopes for debugging
checks: string[]; // distinct `check` values (audit_lessons-specific)
}
export interface AggregateOptions<T> {
/** How to extract the dedup key from a row. */
keyFn: (row: T) => string | undefined;
/** How to extract the "scope" — distinct scopes count gives confidence. */
scopeFn: (row: T) => string | undefined;
/** How to extract the timestamp (defaults to row.audited_at / row.reviewed_at / row.timestamp). */
timeFn?: (row: T) => string | undefined;
/** How to extract a representative summary (defaults to row.summary). */
summaryFn?: (row: T) => string | undefined;
/** Max rows to read from the JSONL tail; 0 = read all. */
tailLimit?: number;
/** Include per-row check field (for multi-check aggregates). */
checkFn?: (row: T) => string | undefined;
}
/**
* Read a JSONL file and produce the aggregate map keyed by signature.
* Safe on missing or malformed files — returns empty map.
*/
export async function aggregate<T = any>(
jsonlPath: string,
opts: AggregateOptions<T>,
): Promise<Map<string, AggregateRow>> {
const out = new Map<string, AggregateRow>();
let raw: string;
try { raw = await readFile(jsonlPath, "utf8"); } catch { return out; }
const lines = raw.split("\n").filter(l => l.length > 0);
const sliceFrom = opts.tailLimit && opts.tailLimit > 0 ? Math.max(0, lines.length - opts.tailLimit) : 0;
const timeFn = opts.timeFn ?? ((r: any) => r?.audited_at ?? r?.reviewed_at ?? r?.timestamp ?? r?.ran_at);
const summaryFn = opts.summaryFn ?? ((r: any) => r?.summary ?? r?.representative_summary);
// Per-signature scope tracking — need counts by scope to compute
// distinct_scopes without double-counting a scope that appears 50
// times. Using a Set<scope> per signature.
const scopeSets = new Map<string, Set<string>>();
const checkSets = new Map<string, Set<string>>();
for (let i = sliceFrom; i < lines.length; i++) {
let row: T;
try { row = JSON.parse(lines[i]) as T; } catch { continue; }
const sig = opts.keyFn(row);
if (!sig) continue;
let agg = out.get(sig);
if (!agg) {
agg = {
signature: sig,
count: 0,
distinct_scopes: 0,
first_seen: "",
last_seen: "",
confidence: 0,
representative_summary: "",
scopes: [],
checks: [],
};
out.set(sig, agg);
scopeSets.set(sig, new Set<string>());
checkSets.set(sig, new Set<string>());
}
agg.count += 1;
const scope = opts.scopeFn(row);
if (scope !== undefined && scope !== null && scope !== "") {
scopeSets.get(sig)!.add(String(scope));
// Keep scopes array ordered by recency (newest wins — shift
// oldest when at cap).
const arr = agg.scopes;
const s = String(scope);
const existing = arr.indexOf(s);
if (existing >= 0) arr.splice(existing, 1);
arr.push(s);
if (arr.length > 20) arr.shift();
}
if (opts.checkFn) {
const c = opts.checkFn(row);
if (c) checkSets.get(sig)!.add(String(c));
}
const t = timeFn(row);
if (t) {
if (!agg.first_seen || t < agg.first_seen) agg.first_seen = t;
if (!agg.last_seen || t > agg.last_seen) agg.last_seen = t;
}
const s = summaryFn(row);
if (s) agg.representative_summary = String(s);
}
// Finalize derived fields.
for (const [sig, agg] of out) {
const scopes = scopeSets.get(sig) ?? new Set<string>();
agg.distinct_scopes = scopes.size;
agg.confidence = agg.count > 0 ? Math.min(1, agg.distinct_scopes / agg.count) : 0;
const checks = checkSets.get(sig);
if (checks) agg.checks = Array.from(checks).sort();
}
return out;
}
/**
* Severity policy derived from aggregate stats. The rating lives here
* (not in each check) so all KB readers ramp severity consistently.
*
* - confidence × count product is the real signal.
* - Low confidence (< 0.3) = same-scope noise → info regardless of count.
* - Mid confidence (0.3-0.6) = mixed signal → warn at count ≥ 3.
* - High confidence (> 0.6) with count ≥ 5 = block-worthy cross-cutting pattern.
*
* Callers can override by reading agg directly; this is the default
* policy that matches the "don't escalate one unfixed PR" discipline.
*/
export function ratingSeverity(agg: AggregateRow): "info" | "warn" | "block" {
if (agg.confidence >= 0.6 && agg.count >= 5) return "block";
if (agg.confidence >= 0.3 && agg.count >= 3) return "warn";
return "info";
}
/** Human-friendly one-line summary of an aggregate row for finding evidence. */
export function formatAgg(agg: AggregateRow): string {
return `count=${agg.count} distinct_scopes=${agg.distinct_scopes} confidence=${agg.confidence.toFixed(2)} seen=[${agg.first_seen.slice(0, 10)}..${agg.last_seen.slice(0, 10)}]`;
}