profit ac01fffd9a checkpoint: matrix-agent-validated (2026-04-25)
Architectural snapshot of the lakehouse codebase at the point where the
full matrix-driven agent loop with Mem0 versioning + deletion was
validated end-to-end.

WHAT THIS REPO IS
A clean single-commit snapshot of the lakehouse code. Heavy test data
(.parquet datasets, vector indexes) excluded — see REPLICATION.md for
regen path. Full lakehouse history at git.agentview.dev/profit/lakehouse.

WHAT WAS PROVEN
- Vector retrieval across multi-corpora matrix (chicago_permits + entity
  briefs + sec_tickers + distilled procedural + llm_team runs)
- Observer hand-review (cloud + heuristic fallback) gating each candidate
- Local-model agent loop (qwen3.5:latest) with tool use + scratchpad
- Playbook seal on success → next-iter retrieval surfaces it as preamble
- Mem0 versioning + deletion in pathway_memory:
    * UPSERT: ADD on new workflow, UPDATE bumps replay_count on identical
    * REVISE: chains versions, parent.superseded_at + superseded_by stamped
    * RETIRE: marks specific trace retired with reason, excluded from retrieval
    * HISTORY: walks chain root→tip, cycle-safe

KEY DIRECTORIES
- crates/vectord/src/pathway_memory.rs — Mem0 ops live here
- crates/vectord/src/playbook_memory.rs — original Mem0 reference
- tests/agent_test/ — local-model agent harness + PRD + session archives
- scripts/dump_raw_corpus.sh — MinIO bucket dump (raw test corpus)
- scripts/vectorize_raw_corpus.ts — corpus → vector indexes
- scripts/analyze_chicago_contracts.ts — real inference pipeline
- scripts/seal_agent_playbook.ts — Mem0 upsert from agent traces

Replication: see REPLICATION.md for Debian 13 clean install + cloud-only
adaptation (no local Ollama).

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-25 19:43:27 -05:00

346 lines
12 KiB
TypeScript

// Single-cycle orchestrator. This is the manual entry point:
// bun run bot/cycle.ts
//
// Steps (each step can short-circuit with a clear outcome):
// 1. Pause-file check → skipped_pause
// 2. Cost budget check → skipped_cost
// 3. Gather CycleContext (dirty tree, last cycle, autotune status)
// 4. policy.shouldRunCycle → skipped_policy
// 5. findGaps + policy.pickGap → skipped_no_gap
// 6. generateProposal → model_failed
// 7. policy.scoreProposal → proposal_rejected
// 8. git checkout new branch
// 9. applyProposal → apply_failed
// 10. runTests → tests_failed
// 11. policy.shouldOpenPR → pr_skipped_by_policy
// 12. git commit + push
// 13. openPr → pr_failed / ok
// 14. recordCost + postEvent (every outcome)
// 15. write cycle result to data/_bot/cycles/{cycle_id}.json
import { readFile, writeFile, mkdir, access } from "node:fs/promises";
import { createHash, randomUUID } from "node:crypto";
import { spawnSync } from "node:child_process";
import { join } from "node:path";
import type { CycleContext, CycleResult, CycleOutcome, Proposal } from "./types.ts";
import * as policy from "./policy.ts";
import { readCost, recordCost, budgetCheck, DAILY_CALLS_BUDGET, DAILY_TOKENS_BUDGET } from "./cost.ts";
import { findGaps, generateProposal } from "./propose.ts";
import { applyProposal } from "./apply.ts";
import { runTests } from "./test.ts";
import { openPr } from "./pr.ts";
import { postEvent } from "./observer.ts";
import { loadHistory, summarizeHistory, statsFor } from "./kb.ts";
const REPO_ROOT = "/home/profit/lakehouse";
const PAUSE_FILE = `${REPO_ROOT}/bot.paused`;
const CYCLES_DIR = `${REPO_ROOT}/data/_bot/cycles`;
const GATEWAY_URL = process.env.LH_GATEWAY_URL ?? "http://localhost:3100";
function log(msg: string) { console.log(`[bot] ${msg}`); }
async function fileExists(p: string): Promise<boolean> {
try { await access(p); return true; } catch { return false; }
}
function git(args: string[]): { code: number; stdout: string; stderr: string } {
const r = spawnSync("git", args, { cwd: REPO_ROOT, encoding: "utf8" });
return { code: r.status ?? -1, stdout: r.stdout ?? "", stderr: r.stderr ?? "" };
}
async function gatherContext(cost: { calls: number; tokens: number }): Promise<CycleContext> {
// Last cycle: newest file in CYCLES_DIR.
let lastCycleAt: string | null = null;
let lastCycleGapId: string | null = null;
try {
const { readdir, stat } = await import("node:fs/promises");
const entries = await readdir(CYCLES_DIR).catch(() => [] as string[]);
let newestTs = 0;
let newestPath: string | null = null;
for (const e of entries) {
if (!e.endsWith(".json")) continue;
const p = join(CYCLES_DIR, e);
const s = await stat(p);
if (s.mtimeMs > newestTs) { newestTs = s.mtimeMs; newestPath = p; }
}
if (newestPath) {
const r = JSON.parse(await readFile(newestPath, "utf8")) as CycleResult;
lastCycleAt = r.started_at;
lastCycleGapId = r.gap?.id ?? null;
}
} catch {}
// Autotune status — if the agent is busy, skip this cycle to avoid
// contention on shared Ollama GPU memory.
let autotuneBusy = false;
try {
const r = await fetch(`${GATEWAY_URL}/vectors/agent/status`, { signal: AbortSignal.timeout(2000) });
if (r.ok) {
const j = await r.json() as any;
autotuneBusy = Boolean(j.running) && (j.queue_depth ?? 0) > 0;
}
} catch {}
// Dirty tree check.
const status = git(["status", "--porcelain"]);
const workingTreeDirty = status.code === 0 && status.stdout.trim().length > 0;
return {
startedAt: new Date().toISOString(),
dailyCallsUsed: cost.calls,
dailyCallsBudget: DAILY_CALLS_BUDGET,
dailyTokensUsed: cost.tokens,
dailyTokensBudget: DAILY_TOKENS_BUDGET,
lastCycleAt,
lastCycleGapId,
autotuneBusy,
workingTreeDirty,
};
}
async function persistResult(res: CycleResult): Promise<void> {
await mkdir(CYCLES_DIR, { recursive: true });
await writeFile(join(CYCLES_DIR, `${res.cycle_id}.json`), JSON.stringify(res, null, 2));
}
function sigHash(gap_id: string | null, proposalSummary: string): string {
const h = createHash("sha256");
h.update(gap_id ?? "no-gap");
h.update("|");
h.update(proposalSummary);
return h.digest("hex").slice(0, 16);
}
function emit(res: CycleResult): Promise<void> {
return postEvent({
source: "bot",
cycle_id: res.cycle_id,
sig_hash: sigHash(res.gap?.id ?? null, res.proposal?.summary ?? ""),
event_kind: res.outcome,
ok: res.outcome === "ok",
staffer_id: res.proposal?.model_used,
turns: res.cloud_calls,
duration_ms: new Date(res.ended_at).getTime() - new Date(res.started_at).getTime(),
extra: {
reason: res.reason,
pr_url: res.prUrl,
files_added: res.filesAdded,
files_updated: res.filesUpdated,
files_noop: res.filesNoop,
tests_green: res.testsGreen,
},
});
}
async function main() {
const cycle_id = `bot-${new Date().toISOString().replace(/[:.]/g, "-")}-${randomUUID().slice(0, 8)}`;
const started_at = new Date().toISOString();
log(`cycle ${cycle_id} starting`);
const finish = async (outcome: CycleOutcome, reason: string, extra: Partial<CycleResult> = {}): Promise<CycleResult> => {
const res: CycleResult = {
cycle_id,
started_at,
ended_at: new Date().toISOString(),
outcome,
reason,
gap: null,
proposal: null,
filesAdded: [],
filesUpdated: [],
filesNoop: [],
testsGreen: null,
testsOutput: "",
prUrl: null,
tokens_used: 0,
cloud_calls: 0,
...extra,
};
await persistResult(res);
await emit(res);
log(`${outcome}: ${reason}`);
return res;
};
// 1. Pause.
if (await fileExists(PAUSE_FILE)) {
return finish("skipped_pause", `pause file at ${PAUSE_FILE}`);
}
// 2. Cost.
const cost = await readCost();
const bc = budgetCheck(cost);
if (!bc.ok) return finish("skipped_cost", bc.reason);
log(`cost: ${bc.reason}`);
// 3+4. Context + policy.shouldRunCycle.
const ctx = await gatherContext(cost);
const srq = policy.shouldRunCycle(ctx);
if (!srq.run) {
return finish(ctx.workingTreeDirty ? "skipped_dirty_tree" : "skipped_policy", srq.reason);
}
// 5. Gaps + policy.pickGap.
const gaps = await findGaps();
log(`found ${gaps.length} [bot-eligible] gap(s) in PRD`);
const gapPick = policy.pickGap(gaps);
if (!gapPick.gap) return finish("skipped_no_gap", gapPick.reason);
const gap = gapPick.gap;
log(`picked gap ${gap.id}: "${gap.prd_line.slice(0, 80)}"`);
// 6. KB lookup + Proposal.
// Bot cycles compound: prior cycles on this gap inform the current
// proposal. Summary is a compact block injected into the cloud prompt
// so the model can build on past successes and steer around past
// failures. Empty string when this is the first cycle on this gap.
const history = await loadHistory(gap.id, 5);
const stats = statsFor(history);
if (history.length > 0) {
log(`kb: ${stats.attempts} prior cycle(s) — ${stats.pr_opened} pr / ${stats.tests_failed} tests_failed / ${stats.proposal_rejected} rejected / ${stats.noop} noop`);
} else {
log(`kb: first cycle on this gap`);
}
const historySummary = summarizeHistory(history);
let proposal: Proposal;
try {
proposal = await generateProposal(gap, historySummary);
log(`proposal: ${proposal.summary} (${proposal.files.length} files, ~${proposal.estimated_loc} LOC, ${proposal.tokens_used} tokens)`);
} catch (e) {
await recordCost(1, 0);
return finish("model_failed", (e as Error).message, { gap, cloud_calls: 1 });
}
await recordCost(1, proposal.tokens_used);
// 7. Score.
const score = policy.scoreProposal(proposal);
if (!score.accept) {
return finish("proposal_rejected", score.reason, { gap, proposal, tokens_used: proposal.tokens_used, cloud_calls: 1 });
}
log(`proposal accepted: ${score.reason}`);
// 8. Branch.
const dayStamp = new Date().toISOString().slice(0, 10).replace(/-/g, "");
const branch = `bot/cycle-${dayStamp}-${gap.id}`;
// Branch must be fresh. If one already exists from a prior failed cycle
// for the same gap, bail — don't silently overwrite or reuse stale state.
const check = git(["rev-parse", "--verify", `refs/heads/${branch}`]);
if (check.code === 0) {
return finish("apply_failed",
`branch ${branch} already exists — delete it manually if you want a fresh cycle for this gap`,
{ gap, proposal, tokens_used: proposal.tokens_used, cloud_calls: 1 });
}
const co = git(["checkout", "-b", branch]);
if (co.code !== 0) {
return finish("apply_failed", `git checkout -b failed: ${co.stderr}`,
{ gap, proposal, tokens_used: proposal.tokens_used, cloud_calls: 1 });
}
log(`on branch ${branch}`);
// 9. Apply — Mem0 ADD/UPDATE/NOOP semantics.
const ap = await applyProposal(proposal);
if (ap.errors.length > 0) {
git(["checkout", "main"]);
git(["branch", "-D", branch]);
return finish("apply_failed", ap.errors.join("; "),
{ gap, proposal, tokens_used: proposal.tokens_used, cloud_calls: 1 });
}
const changed = [...ap.added, ...ap.updated];
log(`apply: ${ap.added.length} add, ${ap.updated.length} update, ${ap.noop.length} noop`);
// Short-circuit: if every proposed file was identical to what's on
// disk, there's nothing to test or PR. Clean up the branch and exit.
if (changed.length === 0) {
git(["checkout", "main"]);
git(["branch", "-D", branch]);
return finish("cycle_noop",
`all ${proposal.files.length} proposed file(s) were identical to current state`,
{ gap, proposal, filesNoop: ap.noop,
tokens_used: proposal.tokens_used, cloud_calls: 1 });
}
// 10. Test.
log(`running tests (this will take a while)...`);
const testRes = await runTests();
if (!testRes.green) {
git(["checkout", "."]);
git(["checkout", "main"]);
git(["branch", "-D", branch]);
return finish("tests_failed", "cargo or bun test failed", {
gap, proposal,
filesAdded: ap.added, filesUpdated: ap.updated, filesNoop: ap.noop,
testsGreen: false, testsOutput: testRes.output,
tokens_used: proposal.tokens_used, cloud_calls: 1,
});
}
log(`tests green`);
// 11. PR gate.
const partial: CycleResult = {
cycle_id, started_at,
ended_at: new Date().toISOString(),
outcome: "ok", reason: "",
gap, proposal,
filesAdded: ap.added, filesUpdated: ap.updated, filesNoop: ap.noop,
testsGreen: true, testsOutput: testRes.output,
prUrl: null, tokens_used: proposal.tokens_used, cloud_calls: 1,
};
const prGate = policy.shouldOpenPR(partial);
if (!prGate.open) {
git(["checkout", "main"]);
git(["branch", "-D", branch]);
return finish("pr_skipped_by_policy", prGate.reason, partial);
}
// 12. Commit + push.
git(["add", ...changed]);
const commitMsg = [
`bot: ${proposal.summary}`,
"",
proposal.rationale,
"",
`PRD gap: ${gap.prd_line}`,
`Cycle: ${cycle_id}`,
"",
`Co-Authored-By: ${proposal.model_used} (cloud) <bot@lakehouse.local>`,
].join("\n");
const ci = git(["commit", "-m", commitMsg]);
if (ci.code !== 0) {
return finish("pr_failed", `git commit failed: ${ci.stderr}`, partial);
}
const ps = git(["push", "-u", "origin", branch]);
if (ps.code !== 0) {
return finish("pr_failed", `git push failed: ${ps.stderr}`, partial);
}
// 13. Open PR.
try {
const pr = await openPr({
branch,
title: `bot: ${proposal.summary}`,
body: [
`**Gap** (PRD line ${gap.line_number}): ${gap.prd_line}`,
"",
`**Rationale**`,
proposal.rationale,
"",
`**Cycle**: \`${cycle_id}\``,
`**Model**: \`${proposal.model_used}\``,
`**Tokens**: ${proposal.tokens_used}`,
`**Added**: ${ap.added.map(f => `\`${f}\``).join(", ") || "_none_"}`,
`**Updated**: ${ap.updated.map(f => `\`${f}\``).join(", ") || "_none_"}`,
ap.noop.length > 0 ? `**NOOP (identical to current)**: ${ap.noop.map(f => `\`${f}\``).join(", ")}` : "",
].join("\n"),
});
return finish("ok", `PR #${pr.number} opened`, { ...partial, prUrl: pr.html_url });
} catch (e) {
return finish("pr_failed", (e as Error).message, partial);
}
}
main().catch(e => {
console.error("[bot] fatal:", e);
process.exit(1);
});