profit ac01fffd9a checkpoint: matrix-agent-validated (2026-04-25)
Architectural snapshot of the lakehouse codebase at the point where the
full matrix-driven agent loop with Mem0 versioning + deletion was
validated end-to-end.

WHAT THIS REPO IS
A clean single-commit snapshot of the lakehouse code. Heavy test data
(.parquet datasets, vector indexes) excluded — see REPLICATION.md for
regen path. Full lakehouse history at git.agentview.dev/profit/lakehouse.

WHAT WAS PROVEN
- Vector retrieval across multi-corpora matrix (chicago_permits + entity
  briefs + sec_tickers + distilled procedural + llm_team runs)
- Observer hand-review (cloud + heuristic fallback) gating each candidate
- Local-model agent loop (qwen3.5:latest) with tool use + scratchpad
- Playbook seal on success → next-iter retrieval surfaces it as preamble
- Mem0 versioning + deletion in pathway_memory:
    * UPSERT: ADD on new workflow, UPDATE bumps replay_count on identical
    * REVISE: chains versions, parent.superseded_at + superseded_by stamped
    * RETIRE: marks specific trace retired with reason, excluded from retrieval
    * HISTORY: walks chain root→tip, cycle-safe

KEY DIRECTORIES
- crates/vectord/src/pathway_memory.rs — Mem0 ops live here
- crates/vectord/src/playbook_memory.rs — original Mem0 reference
- tests/agent_test/ — local-model agent harness + PRD + session archives
- scripts/dump_raw_corpus.sh — MinIO bucket dump (raw test corpus)
- scripts/vectorize_raw_corpus.ts — corpus → vector indexes
- scripts/analyze_chicago_contracts.ts — real inference pipeline
- scripts/seal_agent_playbook.ts — Mem0 upsert from agent traces

Replication: see REPLICATION.md for Debian 13 clean install + cloud-only
adaptation (no local Ollama).

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-25 19:43:27 -05:00

102 lines
3.5 KiB
TypeScript

// Bot-local knowledge base. Every finished cycle already persists a
// CycleResult to data/_bot/cycles/{id}.json — that IS the outcome log.
// KB here just reads that dir, filters to prior cycles on the same gap,
// and produces a short summary the cloud model can condition on.
//
// No separate jsonl, no new write path, no embedding calls. The bot's
// "memory" is the same primary artifact that the observer consumes.
//
// Future: embedding-based neighbor matching across gaps (cheap once
// sidecar is local), cross-pollination with scenario KB's
// pathway_recommendations. Not required for the feedback loop to work
// on a single gap — that's the floor we're building first.
import { readdir, readFile } from "node:fs/promises";
import { join } from "node:path";
import type { CycleResult } from "./types.ts";
const CYCLES_DIR = "/home/profit/lakehouse/data/_bot/cycles";
export interface HistoryEntry {
cycle_id: string;
ended_at: string;
outcome: string;
reason: string;
pr_url: string | null;
tests_green: boolean | null;
files_added: string[];
files_updated: string[];
tokens_used: number;
}
export async function loadHistory(gap_id: string, max: number = 5): Promise<HistoryEntry[]> {
let entries: string[] = [];
try {
entries = await readdir(CYCLES_DIR);
} catch {
return [];
}
const matches: HistoryEntry[] = [];
for (const e of entries) {
if (!e.endsWith(".json")) continue;
try {
const raw = await readFile(join(CYCLES_DIR, e), "utf8");
const r = JSON.parse(raw) as CycleResult;
if (r.gap?.id !== gap_id) continue;
matches.push({
cycle_id: r.cycle_id,
ended_at: r.ended_at,
outcome: r.outcome,
reason: r.reason,
pr_url: r.prUrl,
tests_green: r.testsGreen,
files_added: r.filesAdded ?? [],
files_updated: r.filesUpdated ?? [],
tokens_used: r.tokens_used,
});
} catch {
// Skip unreadable / malformed cycle files. Don't fail the current
// cycle because an old one is corrupt.
}
}
matches.sort((a, b) => b.ended_at.localeCompare(a.ended_at));
return matches.slice(0, max);
}
// Compact prompt-ready summary. Empty string when there's no history —
// caller can skip the "prior attempts" block entirely.
export function summarizeHistory(h: HistoryEntry[]): string {
if (h.length === 0) return "";
const lines = h.map(e => {
const when = e.ended_at.slice(0, 16).replace("T", " ");
const files = [...e.files_added, ...e.files_updated];
const filesStr = files.length > 0 ? ` touched: ${files.join(", ")}` : "";
const prStr = e.pr_url ? ` PR: ${e.pr_url}` : "";
return `- ${when} UTC — ${e.outcome}${prStr}${filesStr}\n reason: ${e.reason}`;
});
return [
`Prior attempts on this gap (${h.length} most recent):`,
...lines,
"",
"Learn from these: build on what worked, avoid paths that failed.",
].join("\n");
}
// Aggregate stats for telemetry — lets the bot expose "% of cycles on
// this gap that landed a PR" without re-parsing the raw history.
export function statsFor(h: HistoryEntry[]): {
attempts: number;
pr_opened: number;
tests_failed: number;
proposal_rejected: number;
noop: number;
} {
return {
attempts: h.length,
pr_opened: h.filter(e => e.outcome === "ok").length,
tests_failed: h.filter(e => e.outcome === "tests_failed").length,
proposal_rejected: h.filter(e => e.outcome === "proposal_rejected").length,
noop: h.filter(e => e.outcome === "cycle_noop").length,
};
}