profit ac01fffd9a checkpoint: matrix-agent-validated (2026-04-25)
Architectural snapshot of the lakehouse codebase at the point where the
full matrix-driven agent loop with Mem0 versioning + deletion was
validated end-to-end.

WHAT THIS REPO IS
A clean single-commit snapshot of the lakehouse code. Heavy test data
(.parquet datasets, vector indexes) excluded — see REPLICATION.md for
regen path. Full lakehouse history at git.agentview.dev/profit/lakehouse.

WHAT WAS PROVEN
- Vector retrieval across multi-corpora matrix (chicago_permits + entity
  briefs + sec_tickers + distilled procedural + llm_team runs)
- Observer hand-review (cloud + heuristic fallback) gating each candidate
- Local-model agent loop (qwen3.5:latest) with tool use + scratchpad
- Playbook seal on success → next-iter retrieval surfaces it as preamble
- Mem0 versioning + deletion in pathway_memory:
    * UPSERT: ADD on new workflow, UPDATE bumps replay_count on identical
    * REVISE: chains versions, parent.superseded_at + superseded_by stamped
    * RETIRE: marks specific trace retired with reason, excluded from retrieval
    * HISTORY: walks chain root→tip, cycle-safe

KEY DIRECTORIES
- crates/vectord/src/pathway_memory.rs — Mem0 ops live here
- crates/vectord/src/playbook_memory.rs — original Mem0 reference
- tests/agent_test/ — local-model agent harness + PRD + session archives
- scripts/dump_raw_corpus.sh — MinIO bucket dump (raw test corpus)
- scripts/vectorize_raw_corpus.ts — corpus → vector indexes
- scripts/analyze_chicago_contracts.ts — real inference pipeline
- scripts/seal_agent_playbook.ts — Mem0 upsert from agent traces

Replication: see REPLICATION.md for Debian 13 clean install + cloud-only
adaptation (no local Ollama).

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-25 19:43:27 -05:00

89 lines
2.5 KiB
TypeScript

// Shared types for the PR-bot. Small and explicit — the bot's behavior
// should be readable from these shapes alone.
export interface Gap {
id: string; // hash of prd_line — stable key for dedup
prd_line: string; // the [bot-eligible]-tagged line, verbatim
context: string; // surrounding context from the PRD (next ~5 lines)
source_file: string; // e.g. "docs/PRD.md"
line_number: number;
}
export interface ProposalFile {
path: string; // repo-relative (no leading slash)
content: string; // full file content — the bot writes whole files, not patches
is_new: boolean; // true = create, false = overwrite existing
}
export interface Proposal {
summary: string; // one-line
rationale: string; // why this change addresses the gap
files: ProposalFile[];
estimated_loc: number; // total added+changed lines across all files
model_used: string;
tokens_used: number;
}
export interface CycleContext {
startedAt: string; // ISO
dailyCallsUsed: number;
dailyCallsBudget: number;
dailyTokensUsed: number;
dailyTokensBudget: number;
lastCycleAt: string | null;
lastCycleGapId: string | null;
autotuneBusy: boolean;
workingTreeDirty: boolean;
}
export type CycleOutcome =
| "ok" // PR opened
| "skipped_pause"
| "skipped_cost"
| "skipped_policy"
| "skipped_no_gap"
| "skipped_dirty_tree"
| "proposal_rejected"
| "apply_failed"
| "cycle_noop" // proposal applied but every file was identical to what's on disk
| "tests_failed"
| "pr_skipped_by_policy"
| "pr_failed"
| "model_failed";
// Mem0-aligned apply outcomes. Three shapes instead of the binary
// "written" / "errored". NOOP is a first-class outcome — identical
// content shouldn't waste test cycles or open an empty PR.
export type ApplyMode = "add" | "update" | "noop";
export interface ApplyOutcome {
added: string[];
updated: string[];
noop: string[];
errors: string[];
}
export interface CycleResult {
cycle_id: string;
started_at: string;
ended_at: string;
outcome: CycleOutcome;
reason: string;
gap: Gap | null;
proposal: Proposal | null;
filesAdded: string[];
filesUpdated: string[];
filesNoop: string[];
testsGreen: boolean | null; // null = not run
testsOutput: string;
prUrl: string | null;
tokens_used: number;
cloud_calls: number;
}
export interface CostState {
date: string; // YYYY-MM-DD UTC
calls: number;
tokens: number;
}