Architectural snapshot of the lakehouse codebase at the point where the
full matrix-driven agent loop with Mem0 versioning + deletion was
validated end-to-end.
WHAT THIS REPO IS
A clean single-commit snapshot of the lakehouse code. Heavy test data
(.parquet datasets, vector indexes) excluded — see REPLICATION.md for
regen path. Full lakehouse history at git.agentview.dev/profit/lakehouse.
WHAT WAS PROVEN
- Vector retrieval across multi-corpora matrix (chicago_permits + entity
briefs + sec_tickers + distilled procedural + llm_team runs)
- Observer hand-review (cloud + heuristic fallback) gating each candidate
- Local-model agent loop (qwen3.5:latest) with tool use + scratchpad
- Playbook seal on success → next-iter retrieval surfaces it as preamble
- Mem0 versioning + deletion in pathway_memory:
* UPSERT: ADD on new workflow, UPDATE bumps replay_count on identical
* REVISE: chains versions, parent.superseded_at + superseded_by stamped
* RETIRE: marks specific trace retired with reason, excluded from retrieval
* HISTORY: walks chain root→tip, cycle-safe
KEY DIRECTORIES
- crates/vectord/src/pathway_memory.rs — Mem0 ops live here
- crates/vectord/src/playbook_memory.rs — original Mem0 reference
- tests/agent_test/ — local-model agent harness + PRD + session archives
- scripts/dump_raw_corpus.sh — MinIO bucket dump (raw test corpus)
- scripts/vectorize_raw_corpus.ts — corpus → vector indexes
- scripts/analyze_chicago_contracts.ts — real inference pipeline
- scripts/seal_agent_playbook.ts — Mem0 upsert from agent traces
Replication: see REPLICATION.md for Debian 13 clean install + cloud-only
adaptation (no local Ollama).
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
458 lines
31 KiB
HTML
458 lines
31 KiB
HTML
<!DOCTYPE html>
|
||
<html lang="en"><head>
|
||
<meta charset="utf-8"><meta name="viewport" content="width=device-width,initial-scale=1">
|
||
<title>Lakehouse — Architecture & Reproduction</title>
|
||
<style>
|
||
*{margin:0;padding:0;box-sizing:border-box}
|
||
body{font-family:'Inter',-apple-system,system-ui,sans-serif;background:#090c10;color:#b0b8c4;font-size:14px;line-height:1.55;-webkit-font-smoothing:antialiased}
|
||
a{color:#58a6ff;text-decoration:none}
|
||
a:hover{color:#79c0ff}
|
||
|
||
.bar{background:#0d1117;padding:0 24px;height:56px;border-bottom:1px solid #171d27;display:flex;justify-content:space-between;align-items:center;position:sticky;top:0;z-index:10}
|
||
.bar h1{font-size:14px;font-weight:600;color:#e6edf3;letter-spacing:-0.2px}
|
||
.bar nav{display:flex;gap:2px}
|
||
.bar nav a{font-size:12px;color:#545d68;padding:6px 14px;border-radius:6px;transition:all 0.15s}
|
||
.bar nav a:hover{color:#e6edf3;background:#161b22}
|
||
.bar nav a.active{color:#e6edf3;background:#1c2333}
|
||
.bar .rt{font-size:11px;color:#545d68}
|
||
|
||
.wrap{max-width:1040px;margin:0 auto;padding:28px 20px 60px}
|
||
|
||
.chapter{margin-bottom:48px}
|
||
.chapter .num{color:#545d68;font-size:11px;font-weight:600;letter-spacing:1.6px;text-transform:uppercase;margin-bottom:6px}
|
||
.chapter h2{color:#e6edf3;font-size:24px;font-weight:700;letter-spacing:-0.4px;margin-bottom:8px;line-height:1.2}
|
||
.chapter .lede{color:#8b949e;font-size:14px;margin-bottom:18px;max-width:680px;line-height:1.6}
|
||
.chapter h3{color:#e6edf3;font-size:16px;font-weight:600;margin:18px 0 6px}
|
||
|
||
.card{background:#0d1117;border:1px solid #171d27;border-radius:12px;padding:20px;margin-bottom:12px}
|
||
.grid{display:grid;grid-template-columns:repeat(auto-fit,minmax(220px,1fr));gap:10px}
|
||
|
||
.stat-lg{padding:18px 20px}
|
||
.stat-lg .n{font-size:26px;font-weight:800;color:#e6edf3;letter-spacing:-1px;line-height:1}
|
||
.stat-lg .l{font-size:10px;color:#545d68;text-transform:uppercase;letter-spacing:1.2px;margin-top:8px;font-weight:600}
|
||
.stat-lg .sub{font-size:12px;color:#8b949e;margin-top:4px}
|
||
|
||
.accent-l{border-left:3px solid #2ea043}
|
||
.accent-b{border-left:3px solid #1f6feb}
|
||
.accent-a{border-left:3px solid #bc8cff}
|
||
.accent-w{border-left:3px solid #d29922}
|
||
.accent-r{border-left:3px solid #f85149}
|
||
|
||
.row{display:flex;justify-content:space-between;align-items:center;gap:12px;padding:10px 14px;background:#0d1117;border:1px solid #171d27;border-radius:8px;margin-bottom:6px;font-size:13px}
|
||
.row:hover{border-color:#21262d}
|
||
.row .title{color:#e6edf3;font-weight:500}
|
||
.row .meta{color:#8b949e;font-size:11px;margin-top:2px;font-family:ui-monospace,Menlo,monospace}
|
||
.row .val{color:#58a6ff;font-weight:600;white-space:nowrap;font-family:ui-monospace,Menlo,monospace}
|
||
.row.pass{border-left:3px solid #2ea043}
|
||
.row.fail{border-left:3px solid #f85149}
|
||
|
||
table.plain{width:100%;border-collapse:collapse;font-size:12px;margin-top:8px}
|
||
table.plain th{text-align:left;padding:8px 12px;color:#545d68;font-weight:600;text-transform:uppercase;font-size:10px;letter-spacing:0.8px;border-bottom:1px solid #171d27}
|
||
table.plain td{padding:8px 12px;border-bottom:1px solid #171d27;color:#c9d1d9;font-family:ui-monospace,Menlo,monospace}
|
||
table.plain tr:hover td{background:#0d1117}
|
||
|
||
code{background:#161b22;color:#e6edf3;padding:2px 6px;border-radius:4px;font-family:ui-monospace,Menlo,monospace;font-size:12px}
|
||
pre{background:#161b22;border:1px solid #171d27;border-radius:8px;padding:14px 16px;overflow-x:auto;font-family:ui-monospace,Menlo,monospace;font-size:12px;color:#c9d1d9;line-height:1.5;margin:8px 0}
|
||
|
||
.narr{color:#8b949e;font-size:13px;line-height:1.7;margin:10px 0;padding:10px 14px;border-left:2px solid #21262d}
|
||
.narr strong{color:#c9d1d9;font-weight:600}
|
||
|
||
.ref{color:#545d68;font-size:11px;margin-top:6px;font-family:ui-monospace,Menlo,monospace}
|
||
.ref strong{color:#79c0ff;font-weight:600}
|
||
|
||
.math{background:#0d1117;border:1px solid #171d27;border-radius:8px;padding:14px 16px;font-family:ui-monospace,Menlo,monospace;font-size:13px;color:#e6edf3;margin:8px 0}
|
||
.math .c{color:#8b949e}
|
||
|
||
.footer{border-top:1px solid #171d27;padding:20px;text-align:center;color:#3d444d;font-size:11px}
|
||
|
||
.loading{color:#484f58;font-style:italic;padding:20px 0;text-align:center}
|
||
.err{color:#f85149;font-size:12px;padding:10px}
|
||
|
||
@media(max-width:720px){
|
||
.wrap{padding:20px 12px 40px}
|
||
.chapter h2{font-size:20px}
|
||
.bar nav{display:none}
|
||
}
|
||
</style></head>
|
||
<body>
|
||
|
||
<div class="bar">
|
||
<h1>Lakehouse — Architecture & Reproduction</h1>
|
||
<nav>
|
||
<a href=".">Dashboard</a>
|
||
<a href="console">Walkthrough</a>
|
||
<a href="proof" class="active">Architecture</a>
|
||
<a href="spec">Spec</a>
|
||
<a href="onboard">Onboard</a>
|
||
<a href="alerts">Alerts</a>
|
||
<a href="workspaces">Workspaces</a>
|
||
</nav>
|
||
<div class="rt" id="hdr-time">Running live tests…</div>
|
||
</div>
|
||
|
||
<div class="wrap">
|
||
|
||
<div class="chapter">
|
||
<div class="num">Chapter 1</div>
|
||
<h2>Receipts, not promises</h2>
|
||
<div class="lede">Every test below ran live against the real gateway when you loaded this page. Sub-100ms SQL on multi-million-row Parquet, hybrid search with playbook boost applied. No fixtures. If a test fails, you'll see ✗.</div>
|
||
<div id="ch1-tests"><div class="loading">Running tests…</div></div>
|
||
</div>
|
||
|
||
<div class="chapter">
|
||
<div class="num">Chapter 2</div>
|
||
<h2>Architecture — 13 crates, one object store, one local AI runtime</h2>
|
||
<div class="lede">Request flows top to bottom. Every node is independently swappable. Every line is a real HTTP or gRPC hop that you can trace with <code>tcpdump</code>.</div>
|
||
<div class="card accent-b">
|
||
<pre> HTTP :3100 + gRPC :3101
|
||
│
|
||
┌───────▼───────┐
|
||
│ gateway │ Rust · Axum · routing, CORS, auth, tools
|
||
└───────┬───────┘
|
||
┌────────────┬───────────┼───────────┬────────────┐
|
||
│ │ │ │ │
|
||
┌────▼───┐ ┌────▼───┐ ┌────▼───┐ ┌────▼───┐ ┌────▼───┐
|
||
│catalog │ │ query │ │ vector │ │ ingest │ │aibridge│
|
||
│ d │ │ d │ │ d │ │ d │ │ │
|
||
└────┬───┘ └────┬───┘ └────┬───┘ └────┬───┘ └────┬───┘
|
||
│ │ │ │ │
|
||
└────────────┴───────────┼───────────┴────────────┘
|
||
▼
|
||
┌─────────────────┐
|
||
│ object storage │ Parquet files (local / S3)
|
||
└─────────────────┘
|
||
▲
|
||
│
|
||
┌───────┴────────┐
|
||
│ Python sidecar │ FastAPI → Ollama
|
||
│ (aibridge) │ local models only
|
||
└────────────────┘</pre>
|
||
</div>
|
||
<h3>Per-crate responsibility</h3>
|
||
<table class="plain">
|
||
<thead><tr><th>Crate</th><th>Role</th><th>Path</th></tr></thead>
|
||
<tbody>
|
||
<tr><td>shared</td><td>Types, errors, Arrow helpers, PII detection, secrets provider</td><td>crates/shared/</td></tr>
|
||
<tr><td>storaged</td><td>object_store I/O, BucketRegistry (multi-bucket), AppendLog, ErrorJournal</td><td>crates/storaged/</td></tr>
|
||
<tr><td>catalogd</td><td>Metadata authority — manifests, views, tombstones, profiles, schema fingerprints</td><td>crates/catalogd/</td></tr>
|
||
<tr><td>queryd</td><td>DataFusion SQL engine, MemTable cache, delta merge-on-read, compaction</td><td>crates/queryd/</td></tr>
|
||
<tr><td>ingestd</td><td>CSV/JSON/PDF(+OCR)/Postgres/MySQL ingest, cron schedules, auto-PII</td><td>crates/ingestd/</td></tr>
|
||
<tr><td>vectord</td><td>Embeddings as Parquet, HNSW, trial system, autotune agent, playbook_memory</td><td>crates/vectord/</td></tr>
|
||
<tr><td>vectord-lance</td><td>Firewall crate — Lance 4.0 + Arrow 57 isolated from main Arrow 55</td><td>crates/vectord-lance/</td></tr>
|
||
<tr><td>journald</td><td>Append-only mutation event log for time-travel & audit</td><td>crates/journald/</td></tr>
|
||
<tr><td>aibridge</td><td>Rust↔Python sidecar, Ollama HTTP client, VRAM introspection</td><td>crates/aibridge/</td></tr>
|
||
<tr><td>gateway</td><td>Axum HTTP :3100 + gRPC :3101, middleware, tools registry</td><td>crates/gateway/</td></tr>
|
||
<tr><td>ui</td><td>Dioxus WASM internal developer UI</td><td>crates/ui/</td></tr>
|
||
<tr><td>mcp-server</td><td>Bun TypeScript recruiter-facing app (this server)</td><td>mcp-server/</td></tr>
|
||
</tbody>
|
||
</table>
|
||
<div class="ref"><strong>Source:</strong> git.agentview.dev/profit/lakehouse · <strong>ADRs:</strong> docs/DECISIONS.md (currently 20 records)</div>
|
||
</div>
|
||
|
||
<div class="chapter">
|
||
<div class="num">Chapter 3</div>
|
||
<h2>Dual-agent recursive consensus loop</h2>
|
||
<div class="lede">The system we use to execute staffing fills is a dual-agent recursive protocol. Two agents with distinct roles iterate against a shared log until one of three terminal states is reached. It is deterministic in structure, stochastic in content, and verifiable through the per-run log artifact.</div>
|
||
<h3>Agents and protocol</h3>
|
||
<div class="card accent-a">
|
||
<pre> task in
|
||
│
|
||
▼
|
||
┌───────────────────────────────────────────────────────────┐
|
||
│ EXECUTOR (mistral:latest) │
|
||
│ ──────────────────────────────────────────────────────── │
|
||
│ input: task spec + shared log + seen-candidates ledger │
|
||
│ output: one JSON action per turn │
|
||
│ · {kind:"plan",steps:[…]} │
|
||
│ · {kind:"tool_call",tool,args,rationale} │
|
||
│ · {kind:"propose_done",fills:[N of N]} │
|
||
└───────────┬───────────────────────────────┬───────────────┘
|
||
│ tool_call │ propose_done
|
||
▼ │
|
||
┌──────────────────────────┐ │
|
||
│ TOOL DISPATCH │ │
|
||
│ hybrid_search / sql │ │
|
||
│ (against live gateway) │ │
|
||
└──────────┬───────────────┘ │
|
||
│ result (trimmed, exclusions) │
|
||
▼ ▼
|
||
┌───────────────────────────────────────────────────────────┐
|
||
│ REVIEWER (qwen2.5:latest) │
|
||
│ ──────────────────────────────────────────────────────── │
|
||
│ input: task spec + shared log (including tool result) │
|
||
│ output: {kind:"critique",verdict:"continue|drift| │
|
||
│ approve_done",notes} │
|
||
└───────────┬───────────────────────────────────────────────┘
|
||
│
|
||
┌─────┴─────┐
|
||
▼ ▼ ▼
|
||
continue drift approve_done + propose_done ⟹ SEAL
|
||
(next turn) (cap ≈ 3 →
|
||
hard abort)
|
||
</pre>
|
||
</div>
|
||
<div class="ref"><strong>Code:</strong> tests/multi-agent/agent.ts (protocol + prompts) · tests/multi-agent/orchestrator.ts (run loop) · tests/multi-agent/scenario.ts (5-event warehouse week)</div>
|
||
|
||
<h3>Why "dual" — role specialization</h3>
|
||
<div class="narr">
|
||
<strong>The executor is an optimist.</strong> Its job is to produce progress: pull candidates, verify SQL, propose consensus. It's instructed to be decisive.
|
||
<br><br>
|
||
<strong>The reviewer is a pessimist.</strong> Its job is to catch drift: proposals that don't match the task's geography, fill count, or role. It's authorized to stop the loop.
|
||
<br><br>
|
||
This adversarial separation is cheaper and more deterministic than asking a single model to self-critique. The reviewer has a hard rule: on the turn after a <code>propose_done</code>, it MUST emit either <code>approve_done</code> or <code>drift</code> — it cannot stall with <code>continue</code>.
|
||
</div>
|
||
|
||
<h3>Why "parallel" — orchestrator can fan out</h3>
|
||
<div class="narr">
|
||
<strong>Independent pairs run concurrently.</strong> <code>tests/multi-agent/run_e2e_rated.ts</code> runs two task-specific agent pairs via <code>Promise.all</code>. Ollama serializes inference at the model level, so "parallel" is concurrent orchestration — but the substrate (gateway, queryd, vectord) handles concurrent requests cleanly. Verified in the scenario harness: two contracts sealing simultaneously.
|
||
</div>
|
||
|
||
<h3>Why "recursive" — each seal feeds the next</h3>
|
||
<div class="narr">
|
||
<strong>Consensus does not end at the sealed playbook.</strong> Every sealed playbook is persisted to <code>playbook_memory</code> via <code>POST /vectors/playbook_memory/seed</code>. The next hybrid search for a semantically similar operation consults that memory via <code>compute_boost_for(query_embedding, top_k, base_weight)</code> and re-ranks the candidate pool. The system builds on itself turn over turn, playbook over playbook.
|
||
</div>
|
||
|
||
<h3>Termination guarantees</h3>
|
||
<div class="math">
|
||
<span class="c">// three paths out, every run has one of these:</span><br>
|
||
sealed = executor.propose_done ∧ reviewer.approve_done ∧ fills.count == target<br>
|
||
abort = consecutive_tool_errors ≥ MAX_TOOL_ERRORS (3) <span class="c">// executor can't form a valid call</span><br>
|
||
abort = consecutive_drifts ≥ MAX_CONSECUTIVE_DRIFTS (3) <span class="c">// reviewer keeps flagging</span><br>
|
||
abort = turn > MAX_TURNS (12) <span class="c">// no consensus reached in window</span>
|
||
</div>
|
||
<div class="narr">Every abort dumps the full log to <code>tests/multi-agent/playbooks/<id>-FAILED.json</code> for forensic review. No consensus is ever implicit.</div>
|
||
</div>
|
||
|
||
<div class="chapter">
|
||
<div class="num">Chapter 4</div>
|
||
<h2>Playbook memory — the compounding feedback loop</h2>
|
||
<div class="lede">A CRM stores events. This system turns events into re-ranking signal. Every sealed playbook endorses specific (worker, city, state) tuples. Every failure penalizes them. Every similar future query inherits the signal through cosine similarity.</div>
|
||
|
||
<h3>Seed shape</h3>
|
||
<div class="math">
|
||
PlaybookEntry {<br>
|
||
playbook_id, <span class="c">// pb-seed-<sha8></span><br>
|
||
operation, <span class="c">// "fill: Welder x2 in Toledo, OH"</span><br>
|
||
approach, context, <span class="c">// short canonical — long strings dilute embedding</span><br>
|
||
timestamp, <span class="c">// RFC3339</span><br>
|
||
endorsed_names[], <span class="c">// validated against workers_500k for city+state</span><br>
|
||
city, state, <span class="c">// parsed from operation</span><br>
|
||
embedding <span class="c">// 768-d nomic-embed-text of text shape</span><br>
|
||
}
|
||
</div>
|
||
<div class="ref"><strong>Code:</strong> crates/vectord/src/playbook_memory.rs (PlaybookEntry, FailureRecord, PlaybookMemoryState)</div>
|
||
|
||
<h3>Boost math (positive + decay + negative)</h3>
|
||
<div class="math">
|
||
<span class="c">// For each playbook pb among top-K most cosine-similar:</span><br>
|
||
<span class="c">// given query embedding qv, constant base_weight, n_workers = |pb.endorsed_names|</span><br>
|
||
<br>
|
||
similarity = cosine(qv, pb.embedding) <span class="c">// skip if ≤ 0.05</span><br>
|
||
age_days = (now - pb.timestamp) / 86_400 seconds<br>
|
||
decay = e<sup>-age_days / 30</sup> <span class="c">// half-life = 30 days</span><br>
|
||
<br>
|
||
<span class="c">// For each endorsed worker in pb:</span><br>
|
||
key = (pb.city, pb.state, name)<br>
|
||
fail_count = failures[key] <span class="c">// # times this worker was marked no-show for same geo</span><br>
|
||
penalty = 0.5<sup>min(fail_count, 20)</sup><br>
|
||
<br>
|
||
per_worker = similarity × base_weight × decay × penalty / n_workers<br>
|
||
boost[key] = min(boost[key] + per_worker, MAX_BOOST_PER_WORKER)<br>
|
||
<br>
|
||
<span class="c">// MAX_BOOST_PER_WORKER = 0.25 — cap stops one popular worker from always winning</span>
|
||
</div>
|
||
<div class="ref"><strong>Code:</strong> crates/vectord/src/playbook_memory.rs::compute_boost_for · constants: MAX_BOOST_PER_WORKER, DEFAULT_TOP_K_PLAYBOOKS, BOOST_HALF_LIFE_DAYS</div>
|
||
|
||
<h3>Application at query time</h3>
|
||
<div class="math">
|
||
<span class="c">// In /vectors/hybrid handler (crates/vectord/src/service.rs):</span><br>
|
||
1. SQL filter narrows workers_500k to geo/role/availability<br>
|
||
2. Vector index returns top_k × 5 candidates by cosine to question<br>
|
||
3. compute_boost_for(qv, k=200) returns boost map<br>
|
||
4. For each candidate: parse (name, city, state) from chunk, look up boost, add to score<br>
|
||
5. Re-sort sources by boosted score<br>
|
||
6. Truncate to requested top_k, return with playbook_boost and playbook_citations
|
||
</div>
|
||
<div class="narr">
|
||
<strong>Why k=200.</strong> Direct measurement showed cosine similarity clusters in the 0.55-0.67 band across all playbooks regardless of geo (nomic-embed-text has narrow discrimination on this kind of structured operation text). A k of 25 silently missed geo-matched playbooks. k=200 is the measured floor for reliably catching compounding. Brute-force over 200 × 768-d is sub-ms even on this hardware.
|
||
</div>
|
||
<div class="ref"><strong>Evidence:</strong> Chicago Electrician compounding test 2026-04-20 — Carmen Green, Anna Patel, Fatima Wilson went from rank >5 / boost 0 / 0 citations (run 0, no seed) to rank 1/2/3 / boost +0.250 (capped) / 3 citations each (run 3, after 3 identical seeds). Each seed increments citations; total boost caps at 0.25/worker.</div>
|
||
|
||
<h3>Write-through to SQL</h3>
|
||
<div class="narr">
|
||
<strong>successful_playbooks_live</strong> is a DataFusion-queryable Parquet surface maintained by <code>POST /vectors/playbook_memory/persist_sql</code>. Every <code>/log</code> from the recruiter UI triggers seed → persist_sql. The in-memory store and the SQL surface stay synchronized (full snapshot on each persist, safe because memory is source of truth).
|
||
</div>
|
||
<div class="ref"><strong>Code:</strong> crates/vectord/src/playbook_memory.rs::persist_to_sql · catalog-registered under "successful_playbooks_live"</div>
|
||
|
||
<h3>Pattern discovery (Path 2 — meta-index)</h3>
|
||
<div class="narr">
|
||
<strong>Beyond "who was endorsed."</strong> <code>POST /vectors/playbook_memory/patterns</code> takes a query, finds top-K similar past playbooks, pulls each endorsed worker's full workers_500k profile, and aggregates shared traits: recurring certifications, skill frequencies, modal archetype, reliability distribution. Returns a <code>discovered_pattern</code> string showing operator-actionable signal the user didn't explicitly query for.
|
||
</div>
|
||
<div class="ref"><strong>Code:</strong> crates/vectord/src/playbook_memory.rs::discover_patterns · <strong>Surfaces:</strong> /vectors/playbook_memory/patterns endpoint, /intelligence/chat response, /intelligence/permit_contracts cards</div>
|
||
</div>
|
||
|
||
<div class="chapter">
|
||
<div class="num">Chapter 5</div>
|
||
<h2>Key architectural choices — what was picked and why</h2>
|
||
<div class="lede">Each choice is documented in <code>docs/DECISIONS.md</code> (Architecture Decision Records). If you dispute any of these, the ADR names the alternatives we rejected and the measurement that drove the call.</div>
|
||
<div class="card">
|
||
<div class="row accent-b">
|
||
<div style="flex:1"><div class="title">ADR-001 · Object storage as source of truth</div><div class="meta">No traditional database. All data is Parquet on S3-compatible object storage. Eliminates DB operational overhead; every engine can read Parquet.</div></div>
|
||
</div>
|
||
<div class="row accent-b">
|
||
<div style="flex:1"><div class="title">ADR-008 · Embeddings stored as Parquet, not a vector DB</div><div class="meta">Keeps all data in one portable format. No Pinecone/Weaviate/Qdrant lock-in. Trade-off: brute-force search up to ~100K; HNSW beyond.</div></div>
|
||
</div>
|
||
<div class="row accent-l">
|
||
<div style="flex:1"><div class="title">ADR-012 · Append-only event journal — never destroy evidence</div><div class="meta">Every mutation is appended. Compliance, audit, AI-decision forensics. Impossible to retrofit; easy to add now.</div></div>
|
||
</div>
|
||
<div class="row accent-a">
|
||
<div style="flex:1"><div class="title">ADR-015 · Tool registry before raw SQL for agents</div><div class="meta">Named, governed, audited actions for agents. Permission checks, rate limits, parameter validation. MCP-compatible.</div></div>
|
||
</div>
|
||
<div class="row accent-w">
|
||
<div style="flex:1"><div class="title">ADR-019 · Hybrid Parquet+HNSW ⊕ Lance vector backend</div><div class="meta">Parquet+HNSW primary (2.55× faster search at 100K). Lance secondary for index-build speed (14× faster), random fetch (112× faster), append (structural). Per-profile <code>vector_backend: Parquet | Lance</code>.</div></div>
|
||
</div>
|
||
<div class="row accent-r">
|
||
<div style="flex:1"><div class="title">ADR-020 · Idempotent register() with schema-fingerprint gate</div><div class="meta">Same (name, fingerprint) reuses manifest. Different fingerprint = 409 Conflict. Prevents silent duplicate manifests. Cleanup run collapsed 374 → 31 datasets.</div></div>
|
||
</div>
|
||
<div class="row accent-l">
|
||
<div style="flex:1"><div class="title">Phase 19 design note · Statistical + semantic, not neural</div><div class="meta">Meta-index is cosine similarity + endorsement aggregation. No model training. Rebuildable from <code>successful_playbooks</code> alone. Neural re-ranker deferred to Phase 20+ only if statistical floor plateaus.</div></div>
|
||
</div>
|
||
</div>
|
||
</div>
|
||
|
||
<div class="chapter">
|
||
<div class="num">Chapter 6</div>
|
||
<h2>Measured at scale, on this machine</h2>
|
||
<div class="lede">Hardware: i9 + 128GB RAM + Nvidia A4000 16GB VRAM. Numbers below are from <em>this</em> running instance. Refresh the page and they'll recompute.</div>
|
||
<div class="grid" id="ch6-scale"><div class="loading">Loading scale data…</div></div>
|
||
<div id="ch6-recall" style="margin-top:10px"></div>
|
||
</div>
|
||
|
||
<div class="chapter">
|
||
<div class="num">Chapter 7</div>
|
||
<h2>Verify or dispute — reproduce it yourself</h2>
|
||
<div class="lede">Every claim below is a curl away from falsification.</div>
|
||
<div class="card">
|
||
<div class="narr"><strong>Health.</strong> Should return <code>lakehouse ok</code>.</div>
|
||
<pre>curl http://localhost:3100/health</pre>
|
||
<div class="narr"><strong>Any SQL on multi-million-row Parquet.</strong> Sub-100ms typical.</div>
|
||
<pre>curl -s -X POST http://localhost:3100/query/sql \
|
||
-H 'Content-Type: application/json' \
|
||
-d '{"sql":"SELECT role, COUNT(*) FROM workers_500k WHERE state=\"IL\" GROUP BY role LIMIT 5"}'</pre>
|
||
<div class="narr"><strong>Hybrid search with playbook boost.</strong> The whole Phase 19 feedback loop in one request.</div>
|
||
<pre>curl -s -X POST http://localhost:3100/vectors/hybrid \
|
||
-H 'Content-Type: application/json' \
|
||
-d '{"index_name":"workers_500k_v1",
|
||
"sql_filter":"role = '\''Forklift Operator'\'' AND city = '\''Chicago'\'' AND CAST(availability AS DOUBLE) > 0.5",
|
||
"question":"reliable forklift operator",
|
||
"top_k":5,"use_playbook_memory":true,"playbook_memory_k":200}'</pre>
|
||
<div class="narr"><strong>Playbook memory stats.</strong> Count + endorsed names + sample.</div>
|
||
<pre>curl http://localhost:3100/vectors/playbook_memory/stats</pre>
|
||
<div class="narr"><strong>Pattern discovery.</strong> What do past similar fills have in common?</div>
|
||
<pre>curl -s -X POST http://localhost:3100/vectors/playbook_memory/patterns \
|
||
-H 'Content-Type: application/json' \
|
||
-d '{"query":"Forklift Operator in Chicago, IL","top_k_playbooks":25,"min_trait_frequency":0.3}'</pre>
|
||
<div class="narr"><strong>Run the dual-agent scenario yourself.</strong> All 5 events, real fills, real artifacts.</div>
|
||
<pre>cd /home/profit/lakehouse
|
||
bun run tests/multi-agent/scenario.ts
|
||
# Output: tests/multi-agent/playbooks/scenario-<timestamp>/report.md</pre>
|
||
</div>
|
||
</div>
|
||
|
||
<div class="chapter">
|
||
<div class="num">Chapter 8</div>
|
||
<h2>What we are <em>not</em> claiming</h2>
|
||
<div class="lede">Every impressive-sounding number comes with a footnote. Here are the honest limits.</div>
|
||
<div class="card">
|
||
<div class="row accent-a"><div style="flex:1"><div class="title">workers_500k is synthetic.</div><div class="meta">Real client ATS export replaces this table. Schema is deliberately identical to a production ATS.</div></div></div>
|
||
<div class="row accent-a"><div style="flex:1"><div class="title">candidates table has 1,000 rows.</div><div class="meta">Intentionally small for demo. call_log references higher candidate_ids that don't cross-reference — this is a dataset alignment issue, not a pipeline issue.</div></div></div>
|
||
<div class="row accent-b"><div style="flex:1"><div class="title">Chicago permit data is real.</div><div class="meta">Pulled live from data.cityofchicago.org/resource/ydr8-5enu.json (Socrata API). Not synthetic. Not cached.</div></div></div>
|
||
<div class="row accent-l"><div style="flex:1"><div class="title">Playbook memory is seeded from demo runs.</div><div class="meta">The pipeline that seeds it is identical to what a live recruiter would trigger via /log. Same code path.</div></div></div>
|
||
<div class="row accent-w"><div style="flex:1"><div class="title">Local 7B models (mistral, qwen2.5) are imperfect.</div><div class="meta">They occasionally malform tool calls or drop fields. Multi-agent scenarios seal roughly 40-80% in one run. Larger models or constrained decoding would improve this. Not a substrate problem.</div></div></div>
|
||
<div class="row accent-r"><div style="flex:1"><div class="title">No rate/margin awareness yet.</div><div class="meta">Worker pay expectations vs contract bill rates are not modeled. Flagged as a Phase 20 item; no architectural blocker.</div></div></div>
|
||
</div>
|
||
</div>
|
||
|
||
</div>
|
||
|
||
<div class="footer">Lakehouse · Architecture page regenerates tests on every load · <a href="console">walkthrough</a> · <a href=".">dashboard</a></div>
|
||
|
||
<script>
|
||
var P=location.pathname.indexOf('/lakehouse')>=0?'/lakehouse':'';
|
||
var A=location.origin+P;
|
||
|
||
function el(tag, cls, text){
|
||
var e=document.createElement(tag);
|
||
if(cls) e.className=cls;
|
||
if(text!==undefined && text!==null) e.textContent=String(text);
|
||
return e;
|
||
}
|
||
|
||
function apiPost(path, body){
|
||
return fetch(A+path,{method:'POST',headers:{'Content-Type':'application/json'},body:JSON.stringify(body||{})}).then(function(r){return r.json()});
|
||
}
|
||
|
||
window.addEventListener('load',function(){
|
||
loadLiveSections();
|
||
});
|
||
|
||
function loadLiveSections(){
|
||
apiPost('/proof.json',{}).then(function(r){
|
||
var host1=document.getElementById('ch1-tests');host1.textContent='';
|
||
var tests=r.tests||[];
|
||
tests.forEach(function(t){
|
||
var row=el('div','row '+(t.pass?'pass':'fail'));
|
||
var left=document.createElement('div');left.style.flex='1';left.style.minWidth='0';
|
||
var title=el('div','title',(t.pass?'✓ ':'✗ ')+(t.name||'(unnamed)'));
|
||
var resultStr='';
|
||
if(typeof t.result==='object' && t.result) resultStr=JSON.stringify(t.result);
|
||
else if(t.result!==undefined && t.result!==null) resultStr=String(t.result);
|
||
var meta=el('div','meta',resultStr);
|
||
left.appendChild(title);left.appendChild(meta);
|
||
row.appendChild(left);
|
||
row.appendChild(el('div','val',(t.ms||0)+' ms'));
|
||
host1.appendChild(row);
|
||
});
|
||
|
||
var host6=document.getElementById('ch6-scale');host6.textContent='';
|
||
var scale=r.scale||{};
|
||
addStat(host6,(scale.total_rows||0).toLocaleString(),'Rows under management','across '+(scale.datasets||0)+' datasets','accent-b');
|
||
addStat(host6,(scale.total_chunks||0).toLocaleString(),'Vector chunks indexed','across '+(scale.indexes||0)+' HNSW + Lance indexes','accent-a');
|
||
var gpu=r.gpu||{};
|
||
var vramStr=(gpu.vram_used_mib!==undefined?gpu.vram_used_mib+' / '+gpu.vram_total_mib+' MiB':'—');
|
||
addStat(host6,vramStr,'GPU VRAM',(gpu.name||'A4000'),'accent-w');
|
||
if(r.recall){
|
||
var reco=r.recall||{};
|
||
var hnswStr=(reco.hnsw!==undefined?(reco.hnsw*100).toFixed(0)+'%':'—');
|
||
addStat(host6,hnswStr,'HNSW recall','measured · '+(reco.note||''),'accent-l');
|
||
}
|
||
|
||
var host6r=document.getElementById('ch6-recall');host6r.textContent='';
|
||
if(r.lance_10m){
|
||
var narr=el('div','narr');
|
||
narr.appendChild(el('strong',null,'Scale headroom: '));
|
||
narr.appendChild(document.createTextNode(
|
||
'Lance backend tested at '+r.lance_10m.vectors.toLocaleString()+' vectors, '
|
||
+r.lance_10m.disk_gb+' GB on disk, '+r.lance_10m.search_p50_ms+'ms p50. '
|
||
+(r.lance_10m.note||'')
|
||
));
|
||
host6r.appendChild(narr);
|
||
}
|
||
|
||
document.getElementById('hdr-time').textContent='Generated · '+(r.generated?new Date(r.generated).toLocaleTimeString():'live');
|
||
}).catch(function(e){
|
||
var host1=document.getElementById('ch1-tests');host1.textContent='';
|
||
host1.appendChild(el('div','err','Live tests failed: '+(e.message||e)+'. Server may be offline.'));
|
||
});
|
||
}
|
||
|
||
function addStat(host,n,l,sub,cls){
|
||
var d=el('div','card stat-lg '+(cls||''));
|
||
d.appendChild(el('div','n',n));
|
||
d.appendChild(el('div','l',l));
|
||
d.appendChild(el('div','sub',sub||''));
|
||
host.appendChild(d);
|
||
}
|
||
</script>
|
||
</body></html>
|