Architectural snapshot of the lakehouse codebase at the point where the
full matrix-driven agent loop with Mem0 versioning + deletion was
validated end-to-end.
WHAT THIS REPO IS
A clean single-commit snapshot of the lakehouse code. Heavy test data
(.parquet datasets, vector indexes) excluded — see REPLICATION.md for
regen path. Full lakehouse history at git.agentview.dev/profit/lakehouse.
WHAT WAS PROVEN
- Vector retrieval across multi-corpora matrix (chicago_permits + entity
briefs + sec_tickers + distilled procedural + llm_team runs)
- Observer hand-review (cloud + heuristic fallback) gating each candidate
- Local-model agent loop (qwen3.5:latest) with tool use + scratchpad
- Playbook seal on success → next-iter retrieval surfaces it as preamble
- Mem0 versioning + deletion in pathway_memory:
* UPSERT: ADD on new workflow, UPDATE bumps replay_count on identical
* REVISE: chains versions, parent.superseded_at + superseded_by stamped
* RETIRE: marks specific trace retired with reason, excluded from retrieval
* HISTORY: walks chain root→tip, cycle-safe
KEY DIRECTORIES
- crates/vectord/src/pathway_memory.rs — Mem0 ops live here
- crates/vectord/src/playbook_memory.rs — original Mem0 reference
- tests/agent_test/ — local-model agent harness + PRD + session archives
- scripts/dump_raw_corpus.sh — MinIO bucket dump (raw test corpus)
- scripts/vectorize_raw_corpus.ts — corpus → vector indexes
- scripts/analyze_chicago_contracts.ts — real inference pipeline
- scripts/seal_agent_playbook.ts — Mem0 upsert from agent traces
Replication: see REPLICATION.md for Debian 13 clean install + cloud-only
adaptation (no local Ollama).
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
102 lines
3.5 KiB
TypeScript
102 lines
3.5 KiB
TypeScript
// Bot-local knowledge base. Every finished cycle already persists a
|
|
// CycleResult to data/_bot/cycles/{id}.json — that IS the outcome log.
|
|
// KB here just reads that dir, filters to prior cycles on the same gap,
|
|
// and produces a short summary the cloud model can condition on.
|
|
//
|
|
// No separate jsonl, no new write path, no embedding calls. The bot's
|
|
// "memory" is the same primary artifact that the observer consumes.
|
|
//
|
|
// Future: embedding-based neighbor matching across gaps (cheap once
|
|
// sidecar is local), cross-pollination with scenario KB's
|
|
// pathway_recommendations. Not required for the feedback loop to work
|
|
// on a single gap — that's the floor we're building first.
|
|
|
|
import { readdir, readFile } from "node:fs/promises";
|
|
import { join } from "node:path";
|
|
import type { CycleResult } from "./types.ts";
|
|
|
|
const CYCLES_DIR = "/home/profit/lakehouse/data/_bot/cycles";
|
|
|
|
export interface HistoryEntry {
|
|
cycle_id: string;
|
|
ended_at: string;
|
|
outcome: string;
|
|
reason: string;
|
|
pr_url: string | null;
|
|
tests_green: boolean | null;
|
|
files_added: string[];
|
|
files_updated: string[];
|
|
tokens_used: number;
|
|
}
|
|
|
|
export async function loadHistory(gap_id: string, max: number = 5): Promise<HistoryEntry[]> {
|
|
let entries: string[] = [];
|
|
try {
|
|
entries = await readdir(CYCLES_DIR);
|
|
} catch {
|
|
return [];
|
|
}
|
|
const matches: HistoryEntry[] = [];
|
|
for (const e of entries) {
|
|
if (!e.endsWith(".json")) continue;
|
|
try {
|
|
const raw = await readFile(join(CYCLES_DIR, e), "utf8");
|
|
const r = JSON.parse(raw) as CycleResult;
|
|
if (r.gap?.id !== gap_id) continue;
|
|
matches.push({
|
|
cycle_id: r.cycle_id,
|
|
ended_at: r.ended_at,
|
|
outcome: r.outcome,
|
|
reason: r.reason,
|
|
pr_url: r.prUrl,
|
|
tests_green: r.testsGreen,
|
|
files_added: r.filesAdded ?? [],
|
|
files_updated: r.filesUpdated ?? [],
|
|
tokens_used: r.tokens_used,
|
|
});
|
|
} catch {
|
|
// Skip unreadable / malformed cycle files. Don't fail the current
|
|
// cycle because an old one is corrupt.
|
|
}
|
|
}
|
|
matches.sort((a, b) => b.ended_at.localeCompare(a.ended_at));
|
|
return matches.slice(0, max);
|
|
}
|
|
|
|
// Compact prompt-ready summary. Empty string when there's no history —
|
|
// caller can skip the "prior attempts" block entirely.
|
|
export function summarizeHistory(h: HistoryEntry[]): string {
|
|
if (h.length === 0) return "";
|
|
const lines = h.map(e => {
|
|
const when = e.ended_at.slice(0, 16).replace("T", " ");
|
|
const files = [...e.files_added, ...e.files_updated];
|
|
const filesStr = files.length > 0 ? ` touched: ${files.join(", ")}` : "";
|
|
const prStr = e.pr_url ? ` PR: ${e.pr_url}` : "";
|
|
return `- ${when} UTC — ${e.outcome}${prStr}${filesStr}\n reason: ${e.reason}`;
|
|
});
|
|
return [
|
|
`Prior attempts on this gap (${h.length} most recent):`,
|
|
...lines,
|
|
"",
|
|
"Learn from these: build on what worked, avoid paths that failed.",
|
|
].join("\n");
|
|
}
|
|
|
|
// Aggregate stats for telemetry — lets the bot expose "% of cycles on
|
|
// this gap that landed a PR" without re-parsing the raw history.
|
|
export function statsFor(h: HistoryEntry[]): {
|
|
attempts: number;
|
|
pr_opened: number;
|
|
tests_failed: number;
|
|
proposal_rejected: number;
|
|
noop: number;
|
|
} {
|
|
return {
|
|
attempts: h.length,
|
|
pr_opened: h.filter(e => e.outcome === "ok").length,
|
|
tests_failed: h.filter(e => e.outcome === "tests_failed").length,
|
|
proposal_rejected: h.filter(e => e.outcome === "proposal_rejected").length,
|
|
noop: h.filter(e => e.outcome === "cycle_noop").length,
|
|
};
|
|
}
|