profit ac01fffd9a checkpoint: matrix-agent-validated (2026-04-25)
Architectural snapshot of the lakehouse codebase at the point where the
full matrix-driven agent loop with Mem0 versioning + deletion was
validated end-to-end.

WHAT THIS REPO IS
A clean single-commit snapshot of the lakehouse code. Heavy test data
(.parquet datasets, vector indexes) excluded — see REPLICATION.md for
regen path. Full lakehouse history at git.agentview.dev/profit/lakehouse.

WHAT WAS PROVEN
- Vector retrieval across multi-corpora matrix (chicago_permits + entity
  briefs + sec_tickers + distilled procedural + llm_team runs)
- Observer hand-review (cloud + heuristic fallback) gating each candidate
- Local-model agent loop (qwen3.5:latest) with tool use + scratchpad
- Playbook seal on success → next-iter retrieval surfaces it as preamble
- Mem0 versioning + deletion in pathway_memory:
    * UPSERT: ADD on new workflow, UPDATE bumps replay_count on identical
    * REVISE: chains versions, parent.superseded_at + superseded_by stamped
    * RETIRE: marks specific trace retired with reason, excluded from retrieval
    * HISTORY: walks chain root→tip, cycle-safe

KEY DIRECTORIES
- crates/vectord/src/pathway_memory.rs — Mem0 ops live here
- crates/vectord/src/playbook_memory.rs — original Mem0 reference
- tests/agent_test/ — local-model agent harness + PRD + session archives
- scripts/dump_raw_corpus.sh — MinIO bucket dump (raw test corpus)
- scripts/vectorize_raw_corpus.ts — corpus → vector indexes
- scripts/analyze_chicago_contracts.ts — real inference pipeline
- scripts/seal_agent_playbook.ts — Mem0 upsert from agent traces

Replication: see REPLICATION.md for Debian 13 clean install + cloud-only
adaptation (no local Ollama).

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-25 19:43:27 -05:00

146 lines
5.4 KiB
TypeScript

// Gitea API client. Minimal surface — only what the auditor needs:
// list open PRs, get commits + files for a PR, fetch a diff, post a
// commit status, post a review.
//
// Auth: reads PAT from ~/.git-credentials (set up by the credential
// helper flow in 2026-04-22 session). Gitea's "token" auth scheme
// matches what `git fetch` is already using.
import { readFile } from "node:fs/promises";
import type { PrSnapshot } from "./types.ts";
const HOST = process.env.GITEA_HOST ?? "https://git.agentview.dev";
const OWNER = "profit";
const REPO = "lakehouse";
const CRED_FILE = "/home/profit/.git-credentials";
let cachedPat: string | null = null;
async function getPat(): Promise<string> {
if (cachedPat) return cachedPat;
const raw = await readFile(CRED_FILE, "utf8");
for (const line of raw.split("\n")) {
const m = line.match(/^https:\/\/[^:]+:([^@]+)@git\.agentview\.dev/);
if (m) { cachedPat = m[1]; return m[1]; }
}
throw new Error(`no Gitea PAT in ${CRED_FILE}`);
}
async function giteaFetch(path: string, init: RequestInit = {}): Promise<Response> {
const pat = await getPat();
const url = `${HOST}/api/v1${path}`;
const headers = new Headers(init.headers);
headers.set("Authorization", `token ${pat}`);
if (init.body && !headers.has("content-type")) {
headers.set("content-type", "application/json");
}
return fetch(url, { ...init, headers, signal: AbortSignal.timeout(20000) });
}
export async function listOpenPrs(): Promise<PrSnapshot[]> {
const r = await giteaFetch(`/repos/${OWNER}/${REPO}/pulls?state=open&page=1&limit=50`);
if (!r.ok) throw new Error(`listOpenPrs ${r.status}: ${await r.text()}`);
const rows = (await r.json()) as any[];
return Promise.all(rows.map(row => snapshotFromPr(row)));
}
export async function getPrSnapshot(num: number): Promise<PrSnapshot> {
const r = await giteaFetch(`/repos/${OWNER}/${REPO}/pulls/${num}`);
if (!r.ok) throw new Error(`getPr ${num} ${r.status}: ${await r.text()}`);
return snapshotFromPr((await r.json()) as any);
}
async function snapshotFromPr(row: any): Promise<PrSnapshot> {
const num = row.number;
const commitsResp = await giteaFetch(`/repos/${OWNER}/${REPO}/pulls/${num}/commits`);
const commits = commitsResp.ok ? ((await commitsResp.json()) as any[]) : [];
const filesResp = await giteaFetch(`/repos/${OWNER}/${REPO}/pulls/${num}/files`);
const files = filesResp.ok ? ((await filesResp.json()) as any[]) : [];
return {
number: num,
head_sha: row.head?.sha ?? "",
base_sha: row.base?.sha ?? "",
title: row.title ?? "",
body: row.body ?? "",
state: row.state === "open" ? "open" : (row.merged ? "merged" : "closed"),
author: row.user?.login ?? "",
commits: commits.map(c => ({
sha: (c.sha ?? "").slice(0, 12),
message: c.commit?.message ?? "",
author: c.commit?.author?.name ?? "",
})),
files: files.map(f => ({
path: f.filename ?? "",
additions: f.additions ?? 0,
deletions: f.deletions ?? 0,
})),
};
}
/// Returns the unified diff text of the PR. Used by static checks.
export async function getPrDiff(num: number): Promise<string> {
const r = await giteaFetch(`/repos/${OWNER}/${REPO}/pulls/${num}.diff`);
if (!r.ok) throw new Error(`getDiff ${num} ${r.status}: ${await r.text()}`);
return await r.text();
}
/// Hard-block mechanism: post a failing commit status on the PR head
/// SHA. Branch protection (if enabled on `main`) treats this as a
/// required-check fail and prevents merge. The description is shown
/// in the Gitea UI next to the red X.
export async function postCommitStatus(args: {
sha: string;
state: "success" | "pending" | "failure" | "error";
context: string;
description: string;
target_url?: string;
}): Promise<void> {
const r = await giteaFetch(`/repos/${OWNER}/${REPO}/statuses/${args.sha}`, {
method: "POST",
body: JSON.stringify({
state: args.state,
context: args.context,
description: args.description.slice(0, 140),
target_url: args.target_url ?? "",
}),
});
if (!r.ok) throw new Error(`postCommitStatus ${r.status}: ${await r.text()}`);
}
/// Post a review comment. Gitea typically blocks self-review
/// (author posting a review on their own PR). Prefer
/// `postIssueComment` when running with the author's PAT.
export async function postReview(args: {
pr_number: number;
commit_id: string;
body: string;
event: "APPROVE" | "REQUEST_CHANGES" | "COMMENT";
}): Promise<void> {
const r = await giteaFetch(`/repos/${OWNER}/${REPO}/pulls/${args.pr_number}/reviews`, {
method: "POST",
body: JSON.stringify({
commit_id: args.commit_id,
body: args.body,
event: args.event,
}),
});
if (!r.ok) throw new Error(`postReview ${r.status}: ${await r.text()}`);
}
/// Plain issue comment. Works for the auditor's own PAT because
/// Gitea allows authors to comment on their own PRs (just not
/// review them). Auditor uses this for the reasoning body; the
/// actual block signal is the commit status.
export async function postIssueComment(args: {
pr_number: number;
body: string;
}): Promise<{ id: number; html_url: string }> {
const r = await giteaFetch(`/repos/${OWNER}/${REPO}/issues/${args.pr_number}/comments`, {
method: "POST",
body: JSON.stringify({ body: args.body }),
});
if (!r.ok) throw new Error(`postIssueComment ${r.status}: ${await r.text()}`);
const j = await r.json() as any;
return { id: j.id, html_url: j.html_url };
}