profit ac01fffd9a checkpoint: matrix-agent-validated (2026-04-25)
Architectural snapshot of the lakehouse codebase at the point where the
full matrix-driven agent loop with Mem0 versioning + deletion was
validated end-to-end.

WHAT THIS REPO IS
A clean single-commit snapshot of the lakehouse code. Heavy test data
(.parquet datasets, vector indexes) excluded — see REPLICATION.md for
regen path. Full lakehouse history at git.agentview.dev/profit/lakehouse.

WHAT WAS PROVEN
- Vector retrieval across multi-corpora matrix (chicago_permits + entity
  briefs + sec_tickers + distilled procedural + llm_team runs)
- Observer hand-review (cloud + heuristic fallback) gating each candidate
- Local-model agent loop (qwen3.5:latest) with tool use + scratchpad
- Playbook seal on success → next-iter retrieval surfaces it as preamble
- Mem0 versioning + deletion in pathway_memory:
    * UPSERT: ADD on new workflow, UPDATE bumps replay_count on identical
    * REVISE: chains versions, parent.superseded_at + superseded_by stamped
    * RETIRE: marks specific trace retired with reason, excluded from retrieval
    * HISTORY: walks chain root→tip, cycle-safe

KEY DIRECTORIES
- crates/vectord/src/pathway_memory.rs — Mem0 ops live here
- crates/vectord/src/playbook_memory.rs — original Mem0 reference
- tests/agent_test/ — local-model agent harness + PRD + session archives
- scripts/dump_raw_corpus.sh — MinIO bucket dump (raw test corpus)
- scripts/vectorize_raw_corpus.ts — corpus → vector indexes
- scripts/analyze_chicago_contracts.ts — real inference pipeline
- scripts/seal_agent_playbook.ts — Mem0 upsert from agent traces

Replication: see REPLICATION.md for Debian 13 clean install + cloud-only
adaptation (no local Ollama).

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-25 19:43:27 -05:00

148 lines
4.9 KiB
TypeScript

// Auditor poller — the top-level entry. Polls Gitea for open PRs on
// a fixed interval, dedupes by head SHA, runs audit + posts verdict
// for each new (pr, sha) pair.
//
// Run manually:
// bun run auditor/index.ts
//
// Stop:
// touch auditor.paused (skips next cycle)
// pkill -f auditor/index.ts (kills in-flight)
//
// State:
// data/_auditor/state.json — last-audited SHA per PR
// data/_auditor/verdicts/{id}.json — per-run verdict records
//
// This entry runs forever. A systemd unit would wrap it once the
// workflow is trusted (same pattern as mcp-server, observer).
import { readFile, writeFile, mkdir, access } from "node:fs/promises";
import { listOpenPrs } from "./gitea.ts";
import { auditPr } from "./audit.ts";
const POLL_INTERVAL_MS = 90_000; // 90s — enough budget for audit runs to complete
const PAUSE_FILE = "/home/profit/lakehouse/auditor.paused";
const STATE_FILE = "/home/profit/lakehouse/data/_auditor/state.json";
interface State {
// Map: PR number → last-audited head SHA. Lets us dedupe audits
// across restarts (poller can crash/restart without re-auditing
// all open PRs from scratch).
last_audited: Record<string, string>;
started_at: string;
cycles_total: number;
cycles_skipped_paused: number;
audits_run: number;
last_cycle_at?: string;
}
async function fileExists(path: string): Promise<boolean> {
try { await access(path); return true; } catch { return false; }
}
async function loadState(): Promise<State> {
try {
const raw = await readFile(STATE_FILE, "utf8");
const s = JSON.parse(raw);
return {
last_audited: s.last_audited ?? {},
started_at: s.started_at ?? new Date().toISOString(),
cycles_total: s.cycles_total ?? 0,
cycles_skipped_paused: s.cycles_skipped_paused ?? 0,
audits_run: s.audits_run ?? 0,
last_cycle_at: s.last_cycle_at,
};
} catch {
return {
last_audited: {},
started_at: new Date().toISOString(),
cycles_total: 0,
cycles_skipped_paused: 0,
audits_run: 0,
};
}
}
async function saveState(s: State): Promise<void> {
await mkdir("/home/profit/lakehouse/data/_auditor", { recursive: true });
await writeFile(STATE_FILE, JSON.stringify(s, null, 2));
}
async function runCycle(state: State): Promise<State> {
state.cycles_total += 1;
state.last_cycle_at = new Date().toISOString();
if (await fileExists(PAUSE_FILE)) {
state.cycles_skipped_paused += 1;
console.log(`[auditor] cycle ${state.cycles_total}: paused (touch ${PAUSE_FILE} exists)`);
return state;
}
let prs;
try {
prs = await listOpenPrs();
} catch (e) {
console.error(`[auditor] listOpenPrs failed: ${(e as Error).message}`);
return state;
}
console.log(`[auditor] cycle ${state.cycles_total}: ${prs.length} open PR(s)`);
for (const pr of prs) {
const last = state.last_audited[String(pr.number)];
if (last === pr.head_sha) {
console.log(`[auditor] skip PR #${pr.number} (SHA ${pr.head_sha.slice(0, 8)} already audited)`);
continue;
}
console.log(`[auditor] audit PR #${pr.number} (${pr.head_sha.slice(0, 8)}) — ${pr.title.slice(0, 60)}`);
try {
// Skip dynamic by default: it mutates live playbook state and
// re-runs on every PR update would pollute quickly. Operator
// can run dynamic via `bun run auditor/fixtures/cli.ts` manually
// OR set LH_AUDITOR_RUN_DYNAMIC=1 to opt in.
const run_dynamic = process.env.LH_AUDITOR_RUN_DYNAMIC === "1";
const verdict = await auditPr(pr, {
skip_dynamic: !run_dynamic,
skip_inference: process.env.LH_AUDITOR_SKIP_INFERENCE === "1",
});
console.log(`[auditor] verdict=${verdict.overall} findings=${verdict.metrics.findings_total} (block=${verdict.metrics.findings_block} warn=${verdict.metrics.findings_warn})`);
state.last_audited[String(pr.number)] = pr.head_sha;
state.audits_run += 1;
} catch (e) {
console.error(`[auditor] audit failed: ${(e as Error).message}`);
}
}
return state;
}
async function main(): Promise<void> {
console.log(`[auditor] starting poller — interval ${POLL_INTERVAL_MS / 1000}s`);
console.log(`[auditor] pause file: ${PAUSE_FILE}`);
console.log(`[auditor] state file: ${STATE_FILE}`);
let state = await loadState();
console.log(`[auditor] loaded state: ${Object.keys(state.last_audited).length} PRs previously audited, ${state.cycles_total} cycles so far`);
// Single-shot mode for CLI testing: `bun run auditor/index.ts --once`
const once = process.argv.includes("--once");
if (once) {
state = await runCycle(state);
await saveState(state);
console.log(`[auditor] single-shot complete. total audits: ${state.audits_run}`);
return;
}
// Loop.
while (true) {
state = await runCycle(state);
await saveState(state);
await new Promise(res => setTimeout(res, POLL_INTERVAL_MS));
}
}
main().catch(e => {
console.error("[auditor] fatal:", e);
process.exit(1);
});