Architectural snapshot of the lakehouse codebase at the point where the
full matrix-driven agent loop with Mem0 versioning + deletion was
validated end-to-end.
WHAT THIS REPO IS
A clean single-commit snapshot of the lakehouse code. Heavy test data
(.parquet datasets, vector indexes) excluded — see REPLICATION.md for
regen path. Full lakehouse history at git.agentview.dev/profit/lakehouse.
WHAT WAS PROVEN
- Vector retrieval across multi-corpora matrix (chicago_permits + entity
briefs + sec_tickers + distilled procedural + llm_team runs)
- Observer hand-review (cloud + heuristic fallback) gating each candidate
- Local-model agent loop (qwen3.5:latest) with tool use + scratchpad
- Playbook seal on success → next-iter retrieval surfaces it as preamble
- Mem0 versioning + deletion in pathway_memory:
* UPSERT: ADD on new workflow, UPDATE bumps replay_count on identical
* REVISE: chains versions, parent.superseded_at + superseded_by stamped
* RETIRE: marks specific trace retired with reason, excluded from retrieval
* HISTORY: walks chain root→tip, cycle-safe
KEY DIRECTORIES
- crates/vectord/src/pathway_memory.rs — Mem0 ops live here
- crates/vectord/src/playbook_memory.rs — original Mem0 reference
- tests/agent_test/ — local-model agent harness + PRD + session archives
- scripts/dump_raw_corpus.sh — MinIO bucket dump (raw test corpus)
- scripts/vectorize_raw_corpus.ts — corpus → vector indexes
- scripts/analyze_chicago_contracts.ts — real inference pipeline
- scripts/seal_agent_playbook.ts — Mem0 upsert from agent traces
Replication: see REPLICATION.md for Debian 13 clean install + cloud-only
adaptation (no local Ollama).
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
146 lines
5.4 KiB
TypeScript
146 lines
5.4 KiB
TypeScript
// Gitea API client. Minimal surface — only what the auditor needs:
|
|
// list open PRs, get commits + files for a PR, fetch a diff, post a
|
|
// commit status, post a review.
|
|
//
|
|
// Auth: reads PAT from ~/.git-credentials (set up by the credential
|
|
// helper flow in 2026-04-22 session). Gitea's "token" auth scheme
|
|
// matches what `git fetch` is already using.
|
|
|
|
import { readFile } from "node:fs/promises";
|
|
import type { PrSnapshot } from "./types.ts";
|
|
|
|
const HOST = process.env.GITEA_HOST ?? "https://git.agentview.dev";
|
|
const OWNER = "profit";
|
|
const REPO = "lakehouse";
|
|
const CRED_FILE = "/home/profit/.git-credentials";
|
|
|
|
let cachedPat: string | null = null;
|
|
|
|
async function getPat(): Promise<string> {
|
|
if (cachedPat) return cachedPat;
|
|
const raw = await readFile(CRED_FILE, "utf8");
|
|
for (const line of raw.split("\n")) {
|
|
const m = line.match(/^https:\/\/[^:]+:([^@]+)@git\.agentview\.dev/);
|
|
if (m) { cachedPat = m[1]; return m[1]; }
|
|
}
|
|
throw new Error(`no Gitea PAT in ${CRED_FILE}`);
|
|
}
|
|
|
|
async function giteaFetch(path: string, init: RequestInit = {}): Promise<Response> {
|
|
const pat = await getPat();
|
|
const url = `${HOST}/api/v1${path}`;
|
|
const headers = new Headers(init.headers);
|
|
headers.set("Authorization", `token ${pat}`);
|
|
if (init.body && !headers.has("content-type")) {
|
|
headers.set("content-type", "application/json");
|
|
}
|
|
return fetch(url, { ...init, headers, signal: AbortSignal.timeout(20000) });
|
|
}
|
|
|
|
export async function listOpenPrs(): Promise<PrSnapshot[]> {
|
|
const r = await giteaFetch(`/repos/${OWNER}/${REPO}/pulls?state=open&page=1&limit=50`);
|
|
if (!r.ok) throw new Error(`listOpenPrs ${r.status}: ${await r.text()}`);
|
|
const rows = (await r.json()) as any[];
|
|
return Promise.all(rows.map(row => snapshotFromPr(row)));
|
|
}
|
|
|
|
export async function getPrSnapshot(num: number): Promise<PrSnapshot> {
|
|
const r = await giteaFetch(`/repos/${OWNER}/${REPO}/pulls/${num}`);
|
|
if (!r.ok) throw new Error(`getPr ${num} ${r.status}: ${await r.text()}`);
|
|
return snapshotFromPr((await r.json()) as any);
|
|
}
|
|
|
|
async function snapshotFromPr(row: any): Promise<PrSnapshot> {
|
|
const num = row.number;
|
|
const commitsResp = await giteaFetch(`/repos/${OWNER}/${REPO}/pulls/${num}/commits`);
|
|
const commits = commitsResp.ok ? ((await commitsResp.json()) as any[]) : [];
|
|
const filesResp = await giteaFetch(`/repos/${OWNER}/${REPO}/pulls/${num}/files`);
|
|
const files = filesResp.ok ? ((await filesResp.json()) as any[]) : [];
|
|
return {
|
|
number: num,
|
|
head_sha: row.head?.sha ?? "",
|
|
base_sha: row.base?.sha ?? "",
|
|
title: row.title ?? "",
|
|
body: row.body ?? "",
|
|
state: row.state === "open" ? "open" : (row.merged ? "merged" : "closed"),
|
|
author: row.user?.login ?? "",
|
|
commits: commits.map(c => ({
|
|
sha: (c.sha ?? "").slice(0, 12),
|
|
message: c.commit?.message ?? "",
|
|
author: c.commit?.author?.name ?? "",
|
|
})),
|
|
files: files.map(f => ({
|
|
path: f.filename ?? "",
|
|
additions: f.additions ?? 0,
|
|
deletions: f.deletions ?? 0,
|
|
})),
|
|
};
|
|
}
|
|
|
|
/// Returns the unified diff text of the PR. Used by static checks.
|
|
export async function getPrDiff(num: number): Promise<string> {
|
|
const r = await giteaFetch(`/repos/${OWNER}/${REPO}/pulls/${num}.diff`);
|
|
if (!r.ok) throw new Error(`getDiff ${num} ${r.status}: ${await r.text()}`);
|
|
return await r.text();
|
|
}
|
|
|
|
/// Hard-block mechanism: post a failing commit status on the PR head
|
|
/// SHA. Branch protection (if enabled on `main`) treats this as a
|
|
/// required-check fail and prevents merge. The description is shown
|
|
/// in the Gitea UI next to the red X.
|
|
export async function postCommitStatus(args: {
|
|
sha: string;
|
|
state: "success" | "pending" | "failure" | "error";
|
|
context: string;
|
|
description: string;
|
|
target_url?: string;
|
|
}): Promise<void> {
|
|
const r = await giteaFetch(`/repos/${OWNER}/${REPO}/statuses/${args.sha}`, {
|
|
method: "POST",
|
|
body: JSON.stringify({
|
|
state: args.state,
|
|
context: args.context,
|
|
description: args.description.slice(0, 140),
|
|
target_url: args.target_url ?? "",
|
|
}),
|
|
});
|
|
if (!r.ok) throw new Error(`postCommitStatus ${r.status}: ${await r.text()}`);
|
|
}
|
|
|
|
/// Post a review comment. Gitea typically blocks self-review
|
|
/// (author posting a review on their own PR). Prefer
|
|
/// `postIssueComment` when running with the author's PAT.
|
|
export async function postReview(args: {
|
|
pr_number: number;
|
|
commit_id: string;
|
|
body: string;
|
|
event: "APPROVE" | "REQUEST_CHANGES" | "COMMENT";
|
|
}): Promise<void> {
|
|
const r = await giteaFetch(`/repos/${OWNER}/${REPO}/pulls/${args.pr_number}/reviews`, {
|
|
method: "POST",
|
|
body: JSON.stringify({
|
|
commit_id: args.commit_id,
|
|
body: args.body,
|
|
event: args.event,
|
|
}),
|
|
});
|
|
if (!r.ok) throw new Error(`postReview ${r.status}: ${await r.text()}`);
|
|
}
|
|
|
|
/// Plain issue comment. Works for the auditor's own PAT because
|
|
/// Gitea allows authors to comment on their own PRs (just not
|
|
/// review them). Auditor uses this for the reasoning body; the
|
|
/// actual block signal is the commit status.
|
|
export async function postIssueComment(args: {
|
|
pr_number: number;
|
|
body: string;
|
|
}): Promise<{ id: number; html_url: string }> {
|
|
const r = await giteaFetch(`/repos/${OWNER}/${REPO}/issues/${args.pr_number}/comments`, {
|
|
method: "POST",
|
|
body: JSON.stringify({ body: args.body }),
|
|
});
|
|
if (!r.ok) throw new Error(`postIssueComment ${r.status}: ${await r.text()}`);
|
|
const j = await r.json() as any;
|
|
return { id: j.id, html_url: j.html_url };
|
|
}
|