lakehouse/mcp-server/proof.html
root 6287558493 Push/daemon presence: background digest + /alerts settings page
Converts the app from 'dashboard you visit' to 'system that finds you.'
Critical for the phone-first staffing shop that won't open a URL —
the system reaches out when something matters.

Daemon:
- Starts once per Bun process (guarded via globalThis sentinel)
- Default interval 15 min (configurable, min 1, max 1440)
- On each cycle, buildDigest() compares current state against prior
  snapshot persisted in mcp-server/data/notification_state.json
- Events detected:
  - risk_escalation: role moved to tight or critical (was ok/watch)
  - deadline_approaching: staffing window falls within warn window
    (default 7 days) AND deadline date differs from prior
  - memory_growth: playbook_memory entries grew by >= 5 since last run

Channels (all opt-out individually via config):
- console: always on, logged to journalctl -u lakehouse-agent
- file: always on, appends JSONL to mcp-server/data/notifications.jsonl
- webhook: optional, POSTs {text, digest} to configured URL
  (Slack incoming-webhook / Discord webhook / any custom endpoint)

Digest format (human-readable, fits in a Slack message):
  LAKEHOUSE DIGEST — 2026-04-20 23:24
  3 staffing deadlines within window:
    • Production Worker — 2d to 2026-04-23 · demand 724
    • Maintenance Tech — 4d to 2026-04-25 · demand 32
    • Electrician — 5d to 2026-04-26 · demand 34
  +779 new playbooks (total 779, 2204 endorsed names)
  snapshot: 0 critical · 0 tight · $275,599,326 pipeline

/alerts page:
- Current status table (daemon state, interval, webhook, last run)
- Config form: enable toggle, interval, deadline warn window, webhook
  URL + label (saved to data/notification_config.json)
- 'Fire a test digest now' button — force a cycle without waiting
- Recent digests panel shows the last 10 dispatches with full text

End-to-end verified live:
- Daemon armed successfully on startup
- First-run digest dispatched to console + file in <1s
- Events detected correctly: 3 deadlines within 7 days from real
  Chicago permit data; 779 playbook entries surfaced as memory growth
- Digest text format is Slack-pastable
- Dispatch records appear in /alerts recent list

TDZ caveat: startAlertsDaemon() invocation moved to end of module so
all const/let in the alerts block evaluate before daemon reads them.
Previously failed with 'Cannot access X before initialization' when
the call lived near the top of the file. Nav added to all 6 pages:
Dashboard · Walkthrough · Architecture · Spec · Onboard · Alerts.
2026-04-20 18:24:48 -05:00

457 lines
31 KiB
HTML
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

<!DOCTYPE html>
<html lang="en"><head>
<meta charset="utf-8"><meta name="viewport" content="width=device-width,initial-scale=1">
<title>Lakehouse — Architecture &amp; Reproduction</title>
<style>
*{margin:0;padding:0;box-sizing:border-box}
body{font-family:'Inter',-apple-system,system-ui,sans-serif;background:#090c10;color:#b0b8c4;font-size:14px;line-height:1.55;-webkit-font-smoothing:antialiased}
a{color:#58a6ff;text-decoration:none}
a:hover{color:#79c0ff}
.bar{background:#0d1117;padding:0 24px;height:56px;border-bottom:1px solid #171d27;display:flex;justify-content:space-between;align-items:center;position:sticky;top:0;z-index:10}
.bar h1{font-size:14px;font-weight:600;color:#e6edf3;letter-spacing:-0.2px}
.bar nav{display:flex;gap:2px}
.bar nav a{font-size:12px;color:#545d68;padding:6px 14px;border-radius:6px;transition:all 0.15s}
.bar nav a:hover{color:#e6edf3;background:#161b22}
.bar nav a.active{color:#e6edf3;background:#1c2333}
.bar .rt{font-size:11px;color:#545d68}
.wrap{max-width:1040px;margin:0 auto;padding:28px 20px 60px}
.chapter{margin-bottom:48px}
.chapter .num{color:#545d68;font-size:11px;font-weight:600;letter-spacing:1.6px;text-transform:uppercase;margin-bottom:6px}
.chapter h2{color:#e6edf3;font-size:24px;font-weight:700;letter-spacing:-0.4px;margin-bottom:8px;line-height:1.2}
.chapter .lede{color:#8b949e;font-size:14px;margin-bottom:18px;max-width:680px;line-height:1.6}
.chapter h3{color:#e6edf3;font-size:16px;font-weight:600;margin:18px 0 6px}
.card{background:#0d1117;border:1px solid #171d27;border-radius:12px;padding:20px;margin-bottom:12px}
.grid{display:grid;grid-template-columns:repeat(auto-fit,minmax(220px,1fr));gap:10px}
.stat-lg{padding:18px 20px}
.stat-lg .n{font-size:26px;font-weight:800;color:#e6edf3;letter-spacing:-1px;line-height:1}
.stat-lg .l{font-size:10px;color:#545d68;text-transform:uppercase;letter-spacing:1.2px;margin-top:8px;font-weight:600}
.stat-lg .sub{font-size:12px;color:#8b949e;margin-top:4px}
.accent-l{border-left:3px solid #2ea043}
.accent-b{border-left:3px solid #1f6feb}
.accent-a{border-left:3px solid #bc8cff}
.accent-w{border-left:3px solid #d29922}
.accent-r{border-left:3px solid #f85149}
.row{display:flex;justify-content:space-between;align-items:center;gap:12px;padding:10px 14px;background:#0d1117;border:1px solid #171d27;border-radius:8px;margin-bottom:6px;font-size:13px}
.row:hover{border-color:#21262d}
.row .title{color:#e6edf3;font-weight:500}
.row .meta{color:#8b949e;font-size:11px;margin-top:2px;font-family:ui-monospace,Menlo,monospace}
.row .val{color:#58a6ff;font-weight:600;white-space:nowrap;font-family:ui-monospace,Menlo,monospace}
.row.pass{border-left:3px solid #2ea043}
.row.fail{border-left:3px solid #f85149}
table.plain{width:100%;border-collapse:collapse;font-size:12px;margin-top:8px}
table.plain th{text-align:left;padding:8px 12px;color:#545d68;font-weight:600;text-transform:uppercase;font-size:10px;letter-spacing:0.8px;border-bottom:1px solid #171d27}
table.plain td{padding:8px 12px;border-bottom:1px solid #171d27;color:#c9d1d9;font-family:ui-monospace,Menlo,monospace}
table.plain tr:hover td{background:#0d1117}
code{background:#161b22;color:#e6edf3;padding:2px 6px;border-radius:4px;font-family:ui-monospace,Menlo,monospace;font-size:12px}
pre{background:#161b22;border:1px solid #171d27;border-radius:8px;padding:14px 16px;overflow-x:auto;font-family:ui-monospace,Menlo,monospace;font-size:12px;color:#c9d1d9;line-height:1.5;margin:8px 0}
.narr{color:#8b949e;font-size:13px;line-height:1.7;margin:10px 0;padding:10px 14px;border-left:2px solid #21262d}
.narr strong{color:#c9d1d9;font-weight:600}
.ref{color:#545d68;font-size:11px;margin-top:6px;font-family:ui-monospace,Menlo,monospace}
.ref strong{color:#79c0ff;font-weight:600}
.math{background:#0d1117;border:1px solid #171d27;border-radius:8px;padding:14px 16px;font-family:ui-monospace,Menlo,monospace;font-size:13px;color:#e6edf3;margin:8px 0}
.math .c{color:#8b949e}
.footer{border-top:1px solid #171d27;padding:20px;text-align:center;color:#3d444d;font-size:11px}
.loading{color:#484f58;font-style:italic;padding:20px 0;text-align:center}
.err{color:#f85149;font-size:12px;padding:10px}
@media(max-width:720px){
.wrap{padding:20px 12px 40px}
.chapter h2{font-size:20px}
.bar nav{display:none}
}
</style></head>
<body>
<div class="bar">
<h1>Lakehouse — Architecture &amp; Reproduction</h1>
<nav>
<a href=".">Dashboard</a>
<a href="console">Walkthrough</a>
<a href="proof" class="active">Architecture</a>
<a href="spec">Spec</a>
<a href="onboard">Onboard</a>
<a href="alerts">Alerts</a>
</nav>
<div class="rt" id="hdr-time">Running live tests…</div>
</div>
<div class="wrap">
<div class="chapter">
<div class="num">Chapter 1</div>
<h2>Receipts, not promises</h2>
<div class="lede">Every test below ran live against the real gateway when you loaded this page. Sub-100ms SQL on multi-million-row Parquet, hybrid search with playbook boost applied. No fixtures. If a test fails, you'll see ✗.</div>
<div id="ch1-tests"><div class="loading">Running tests…</div></div>
</div>
<div class="chapter">
<div class="num">Chapter 2</div>
<h2>Architecture — 13 crates, one object store, one local AI runtime</h2>
<div class="lede">Request flows top to bottom. Every node is independently swappable. Every line is a real HTTP or gRPC hop that you can trace with <code>tcpdump</code>.</div>
<div class="card accent-b">
<pre> HTTP :3100 + gRPC :3101
┌───────▼───────┐
│ gateway │ Rust · Axum · routing, CORS, auth, tools
└───────┬───────┘
┌────────────┬───────────┼───────────┬────────────┐
│ │ │ │ │
┌────▼───┐ ┌────▼───┐ ┌────▼───┐ ┌────▼───┐ ┌────▼───┐
│catalog │ │ query │ │ vector │ │ ingest │ │aibridge│
│ d │ │ d │ │ d │ │ d │ │ │
└────┬───┘ └────┬───┘ └────┬───┘ └────┬───┘ └────┬───┘
│ │ │ │ │
└────────────┴───────────┼───────────┴────────────┘
┌─────────────────┐
│ object storage │ Parquet files (local / S3)
└─────────────────┘
┌───────┴────────┐
│ Python sidecar │ FastAPI → Ollama
│ (aibridge) │ local models only
└────────────────┘</pre>
</div>
<h3>Per-crate responsibility</h3>
<table class="plain">
<thead><tr><th>Crate</th><th>Role</th><th>Path</th></tr></thead>
<tbody>
<tr><td>shared</td><td>Types, errors, Arrow helpers, PII detection, secrets provider</td><td>crates/shared/</td></tr>
<tr><td>storaged</td><td>object_store I/O, BucketRegistry (multi-bucket), AppendLog, ErrorJournal</td><td>crates/storaged/</td></tr>
<tr><td>catalogd</td><td>Metadata authority — manifests, views, tombstones, profiles, schema fingerprints</td><td>crates/catalogd/</td></tr>
<tr><td>queryd</td><td>DataFusion SQL engine, MemTable cache, delta merge-on-read, compaction</td><td>crates/queryd/</td></tr>
<tr><td>ingestd</td><td>CSV/JSON/PDF(+OCR)/Postgres/MySQL ingest, cron schedules, auto-PII</td><td>crates/ingestd/</td></tr>
<tr><td>vectord</td><td>Embeddings as Parquet, HNSW, trial system, autotune agent, playbook_memory</td><td>crates/vectord/</td></tr>
<tr><td>vectord-lance</td><td>Firewall crate — Lance 4.0 + Arrow 57 isolated from main Arrow 55</td><td>crates/vectord-lance/</td></tr>
<tr><td>journald</td><td>Append-only mutation event log for time-travel &amp; audit</td><td>crates/journald/</td></tr>
<tr><td>aibridge</td><td>Rust↔Python sidecar, Ollama HTTP client, VRAM introspection</td><td>crates/aibridge/</td></tr>
<tr><td>gateway</td><td>Axum HTTP :3100 + gRPC :3101, middleware, tools registry</td><td>crates/gateway/</td></tr>
<tr><td>ui</td><td>Dioxus WASM internal developer UI</td><td>crates/ui/</td></tr>
<tr><td>mcp-server</td><td>Bun TypeScript recruiter-facing app (this server)</td><td>mcp-server/</td></tr>
</tbody>
</table>
<div class="ref"><strong>Source:</strong> git.agentview.dev/profit/lakehouse &nbsp;·&nbsp; <strong>ADRs:</strong> docs/DECISIONS.md (currently 20 records)</div>
</div>
<div class="chapter">
<div class="num">Chapter 3</div>
<h2>Dual-agent recursive consensus loop</h2>
<div class="lede">The system we use to execute staffing fills is a dual-agent recursive protocol. Two agents with distinct roles iterate against a shared log until one of three terminal states is reached. It is deterministic in structure, stochastic in content, and verifiable through the per-run log artifact.</div>
<h3>Agents and protocol</h3>
<div class="card accent-a">
<pre> task in
┌───────────────────────────────────────────────────────────┐
│ EXECUTOR (mistral:latest) │
│ ──────────────────────────────────────────────────────── │
│ input: task spec + shared log + seen-candidates ledger │
│ output: one JSON action per turn │
│ · {kind:"plan",steps:[…]} │
│ · {kind:"tool_call",tool,args,rationale} │
│ · {kind:"propose_done",fills:[N of N]} │
└───────────┬───────────────────────────────┬───────────────┘
│ tool_call │ propose_done
▼ │
┌──────────────────────────┐ │
│ TOOL DISPATCH │ │
│ hybrid_search / sql │ │
│ (against live gateway) │ │
└──────────┬───────────────┘ │
│ result (trimmed, exclusions) │
▼ ▼
┌───────────────────────────────────────────────────────────┐
│ REVIEWER (qwen2.5:latest) │
│ ──────────────────────────────────────────────────────── │
│ input: task spec + shared log (including tool result) │
│ output: {kind:"critique",verdict:"continue|drift| │
│ approve_done",notes} │
└───────────┬───────────────────────────────────────────────┘
┌─────┴─────┐
▼ ▼ ▼
continue drift approve_done + propose_done ⟹ SEAL
(next turn) (cap ≈ 3 →
hard abort)
</pre>
</div>
<div class="ref"><strong>Code:</strong> tests/multi-agent/agent.ts (protocol + prompts) &nbsp;·&nbsp; tests/multi-agent/orchestrator.ts (run loop) &nbsp;·&nbsp; tests/multi-agent/scenario.ts (5-event warehouse week)</div>
<h3>Why "dual" — role specialization</h3>
<div class="narr">
<strong>The executor is an optimist.</strong> Its job is to produce progress: pull candidates, verify SQL, propose consensus. It's instructed to be decisive.
<br><br>
<strong>The reviewer is a pessimist.</strong> Its job is to catch drift: proposals that don't match the task's geography, fill count, or role. It's authorized to stop the loop.
<br><br>
This adversarial separation is cheaper and more deterministic than asking a single model to self-critique. The reviewer has a hard rule: on the turn after a <code>propose_done</code>, it MUST emit either <code>approve_done</code> or <code>drift</code> — it cannot stall with <code>continue</code>.
</div>
<h3>Why "parallel" — orchestrator can fan out</h3>
<div class="narr">
<strong>Independent pairs run concurrently.</strong> <code>tests/multi-agent/run_e2e_rated.ts</code> runs two task-specific agent pairs via <code>Promise.all</code>. Ollama serializes inference at the model level, so "parallel" is concurrent orchestration — but the substrate (gateway, queryd, vectord) handles concurrent requests cleanly. Verified in the scenario harness: two contracts sealing simultaneously.
</div>
<h3>Why "recursive" — each seal feeds the next</h3>
<div class="narr">
<strong>Consensus does not end at the sealed playbook.</strong> Every sealed playbook is persisted to <code>playbook_memory</code> via <code>POST /vectors/playbook_memory/seed</code>. The next hybrid search for a semantically similar operation consults that memory via <code>compute_boost_for(query_embedding, top_k, base_weight)</code> and re-ranks the candidate pool. The system builds on itself turn over turn, playbook over playbook.
</div>
<h3>Termination guarantees</h3>
<div class="math">
<span class="c">// three paths out, every run has one of these:</span><br>
sealed = executor.propose_done ∧ reviewer.approve_done ∧ fills.count == target<br>
abort = consecutive_tool_errors ≥ MAX_TOOL_ERRORS (3) &nbsp;&nbsp;<span class="c">// executor can't form a valid call</span><br>
abort = consecutive_drifts ≥ MAX_CONSECUTIVE_DRIFTS (3) &nbsp;<span class="c">// reviewer keeps flagging</span><br>
abort = turn &gt; MAX_TURNS (12) &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;<span class="c">// no consensus reached in window</span>
</div>
<div class="narr">Every abort dumps the full log to <code>tests/multi-agent/playbooks/&lt;id&gt;-FAILED.json</code> for forensic review. No consensus is ever implicit.</div>
</div>
<div class="chapter">
<div class="num">Chapter 4</div>
<h2>Playbook memory — the compounding feedback loop</h2>
<div class="lede">A CRM stores events. This system turns events into re-ranking signal. Every sealed playbook endorses specific (worker, city, state) tuples. Every failure penalizes them. Every similar future query inherits the signal through cosine similarity.</div>
<h3>Seed shape</h3>
<div class="math">
PlaybookEntry {<br>
&nbsp;&nbsp;playbook_id, <span class="c">// pb-seed-&lt;sha8&gt;</span><br>
&nbsp;&nbsp;operation, <span class="c">// "fill: Welder x2 in Toledo, OH"</span><br>
&nbsp;&nbsp;approach, context, <span class="c">// short canonical — long strings dilute embedding</span><br>
&nbsp;&nbsp;timestamp, <span class="c">// RFC3339</span><br>
&nbsp;&nbsp;endorsed_names[], <span class="c">// validated against workers_500k for city+state</span><br>
&nbsp;&nbsp;city, state, <span class="c">// parsed from operation</span><br>
&nbsp;&nbsp;embedding <span class="c">// 768-d nomic-embed-text of text shape</span><br>
}
</div>
<div class="ref"><strong>Code:</strong> crates/vectord/src/playbook_memory.rs (PlaybookEntry, FailureRecord, PlaybookMemoryState)</div>
<h3>Boost math (positive + decay + negative)</h3>
<div class="math">
<span class="c">// For each playbook pb among top-K most cosine-similar:</span><br>
<span class="c">// given query embedding qv, constant base_weight, n_workers = |pb.endorsed_names|</span><br>
<br>
similarity = cosine(qv, pb.embedding) &nbsp;&nbsp; <span class="c">// skip if ≤ 0.05</span><br>
age_days = (now - pb.timestamp) / 86_400 seconds<br>
decay = e<sup>-age_days / 30</sup> &nbsp;&nbsp;<span class="c">// half-life = 30 days</span><br>
<br>
<span class="c">// For each endorsed worker in pb:</span><br>
key = (pb.city, pb.state, name)<br>
fail_count = failures[key] &nbsp;&nbsp;<span class="c">// # times this worker was marked no-show for same geo</span><br>
penalty = 0.5<sup>min(fail_count, 20)</sup><br>
<br>
per_worker = similarity × base_weight × decay × penalty / n_workers<br>
boost[key] = min(boost[key] + per_worker, MAX_BOOST_PER_WORKER)<br>
<br>
<span class="c">// MAX_BOOST_PER_WORKER = 0.25 — cap stops one popular worker from always winning</span>
</div>
<div class="ref"><strong>Code:</strong> crates/vectord/src/playbook_memory.rs::compute_boost_for &nbsp;·&nbsp; constants: MAX_BOOST_PER_WORKER, DEFAULT_TOP_K_PLAYBOOKS, BOOST_HALF_LIFE_DAYS</div>
<h3>Application at query time</h3>
<div class="math">
<span class="c">// In /vectors/hybrid handler (crates/vectord/src/service.rs):</span><br>
1. SQL filter narrows workers_500k to geo/role/availability<br>
2. Vector index returns top_k × 5 candidates by cosine to question<br>
3. compute_boost_for(qv, k=200) returns boost map<br>
4. For each candidate: parse (name, city, state) from chunk, look up boost, add to score<br>
5. Re-sort sources by boosted score<br>
6. Truncate to requested top_k, return with playbook_boost and playbook_citations
</div>
<div class="narr">
<strong>Why k=200.</strong> Direct measurement showed cosine similarity clusters in the 0.55-0.67 band across all playbooks regardless of geo (nomic-embed-text has narrow discrimination on this kind of structured operation text). A k of 25 silently missed geo-matched playbooks. k=200 is the measured floor for reliably catching compounding. Brute-force over 200 × 768-d is sub-ms even on this hardware.
</div>
<div class="ref"><strong>Evidence:</strong> Chicago Electrician compounding test 2026-04-20 — Carmen Green, Anna Patel, Fatima Wilson went from rank &gt;5 / boost 0 / 0 citations (run 0, no seed) to rank 1/2/3 / boost +0.250 (capped) / 3 citations each (run 3, after 3 identical seeds). Each seed increments citations; total boost caps at 0.25/worker.</div>
<h3>Write-through to SQL</h3>
<div class="narr">
<strong>successful_playbooks_live</strong> is a DataFusion-queryable Parquet surface maintained by <code>POST /vectors/playbook_memory/persist_sql</code>. Every <code>/log</code> from the recruiter UI triggers seed → persist_sql. The in-memory store and the SQL surface stay synchronized (full snapshot on each persist, safe because memory is source of truth).
</div>
<div class="ref"><strong>Code:</strong> crates/vectord/src/playbook_memory.rs::persist_to_sql &nbsp;·&nbsp; catalog-registered under "successful_playbooks_live"</div>
<h3>Pattern discovery (Path 2 — meta-index)</h3>
<div class="narr">
<strong>Beyond "who was endorsed."</strong> <code>POST /vectors/playbook_memory/patterns</code> takes a query, finds top-K similar past playbooks, pulls each endorsed worker's full workers_500k profile, and aggregates shared traits: recurring certifications, skill frequencies, modal archetype, reliability distribution. Returns a <code>discovered_pattern</code> string showing operator-actionable signal the user didn't explicitly query for.
</div>
<div class="ref"><strong>Code:</strong> crates/vectord/src/playbook_memory.rs::discover_patterns &nbsp;·&nbsp; <strong>Surfaces:</strong> /vectors/playbook_memory/patterns endpoint, /intelligence/chat response, /intelligence/permit_contracts cards</div>
</div>
<div class="chapter">
<div class="num">Chapter 5</div>
<h2>Key architectural choices — what was picked and why</h2>
<div class="lede">Each choice is documented in <code>docs/DECISIONS.md</code> (Architecture Decision Records). If you dispute any of these, the ADR names the alternatives we rejected and the measurement that drove the call.</div>
<div class="card">
<div class="row accent-b">
<div style="flex:1"><div class="title">ADR-001 · Object storage as source of truth</div><div class="meta">No traditional database. All data is Parquet on S3-compatible object storage. Eliminates DB operational overhead; every engine can read Parquet.</div></div>
</div>
<div class="row accent-b">
<div style="flex:1"><div class="title">ADR-008 · Embeddings stored as Parquet, not a vector DB</div><div class="meta">Keeps all data in one portable format. No Pinecone/Weaviate/Qdrant lock-in. Trade-off: brute-force search up to ~100K; HNSW beyond.</div></div>
</div>
<div class="row accent-l">
<div style="flex:1"><div class="title">ADR-012 · Append-only event journal — never destroy evidence</div><div class="meta">Every mutation is appended. Compliance, audit, AI-decision forensics. Impossible to retrofit; easy to add now.</div></div>
</div>
<div class="row accent-a">
<div style="flex:1"><div class="title">ADR-015 · Tool registry before raw SQL for agents</div><div class="meta">Named, governed, audited actions for agents. Permission checks, rate limits, parameter validation. MCP-compatible.</div></div>
</div>
<div class="row accent-w">
<div style="flex:1"><div class="title">ADR-019 · Hybrid Parquet+HNSW ⊕ Lance vector backend</div><div class="meta">Parquet+HNSW primary (2.55× faster search at 100K). Lance secondary for index-build speed (14× faster), random fetch (112× faster), append (structural). Per-profile <code>vector_backend: Parquet | Lance</code>.</div></div>
</div>
<div class="row accent-r">
<div style="flex:1"><div class="title">ADR-020 · Idempotent register() with schema-fingerprint gate</div><div class="meta">Same (name, fingerprint) reuses manifest. Different fingerprint = 409 Conflict. Prevents silent duplicate manifests. Cleanup run collapsed 374 → 31 datasets.</div></div>
</div>
<div class="row accent-l">
<div style="flex:1"><div class="title">Phase 19 design note · Statistical + semantic, not neural</div><div class="meta">Meta-index is cosine similarity + endorsement aggregation. No model training. Rebuildable from <code>successful_playbooks</code> alone. Neural re-ranker deferred to Phase 20+ only if statistical floor plateaus.</div></div>
</div>
</div>
</div>
<div class="chapter">
<div class="num">Chapter 6</div>
<h2>Measured at scale, on this machine</h2>
<div class="lede">Hardware: i9 + 128GB RAM + Nvidia A4000 16GB VRAM. Numbers below are from <em>this</em> running instance. Refresh the page and they'll recompute.</div>
<div class="grid" id="ch6-scale"><div class="loading">Loading scale data…</div></div>
<div id="ch6-recall" style="margin-top:10px"></div>
</div>
<div class="chapter">
<div class="num">Chapter 7</div>
<h2>Verify or dispute — reproduce it yourself</h2>
<div class="lede">Every claim below is a curl away from falsification.</div>
<div class="card">
<div class="narr"><strong>Health.</strong> Should return <code>lakehouse ok</code>.</div>
<pre>curl http://localhost:3100/health</pre>
<div class="narr"><strong>Any SQL on multi-million-row Parquet.</strong> Sub-100ms typical.</div>
<pre>curl -s -X POST http://localhost:3100/query/sql \
-H 'Content-Type: application/json' \
-d '{"sql":"SELECT role, COUNT(*) FROM workers_500k WHERE state=\"IL\" GROUP BY role LIMIT 5"}'</pre>
<div class="narr"><strong>Hybrid search with playbook boost.</strong> The whole Phase 19 feedback loop in one request.</div>
<pre>curl -s -X POST http://localhost:3100/vectors/hybrid \
-H 'Content-Type: application/json' \
-d '{"index_name":"workers_500k_v1",
"sql_filter":"role = '\''Forklift Operator'\'' AND city = '\''Chicago'\'' AND CAST(availability AS DOUBLE) > 0.5",
"question":"reliable forklift operator",
"top_k":5,"use_playbook_memory":true,"playbook_memory_k":200}'</pre>
<div class="narr"><strong>Playbook memory stats.</strong> Count + endorsed names + sample.</div>
<pre>curl http://localhost:3100/vectors/playbook_memory/stats</pre>
<div class="narr"><strong>Pattern discovery.</strong> What do past similar fills have in common?</div>
<pre>curl -s -X POST http://localhost:3100/vectors/playbook_memory/patterns \
-H 'Content-Type: application/json' \
-d '{"query":"Forklift Operator in Chicago, IL","top_k_playbooks":25,"min_trait_frequency":0.3}'</pre>
<div class="narr"><strong>Run the dual-agent scenario yourself.</strong> All 5 events, real fills, real artifacts.</div>
<pre>cd /home/profit/lakehouse
bun run tests/multi-agent/scenario.ts
# Output: tests/multi-agent/playbooks/scenario-&lt;timestamp&gt;/report.md</pre>
</div>
</div>
<div class="chapter">
<div class="num">Chapter 8</div>
<h2>What we are <em>not</em> claiming</h2>
<div class="lede">Every impressive-sounding number comes with a footnote. Here are the honest limits.</div>
<div class="card">
<div class="row accent-a"><div style="flex:1"><div class="title">workers_500k is synthetic.</div><div class="meta">Real client ATS export replaces this table. Schema is deliberately identical to a production ATS.</div></div></div>
<div class="row accent-a"><div style="flex:1"><div class="title">candidates table has 1,000 rows.</div><div class="meta">Intentionally small for demo. call_log references higher candidate_ids that don't cross-reference — this is a dataset alignment issue, not a pipeline issue.</div></div></div>
<div class="row accent-b"><div style="flex:1"><div class="title">Chicago permit data is real.</div><div class="meta">Pulled live from data.cityofchicago.org/resource/ydr8-5enu.json (Socrata API). Not synthetic. Not cached.</div></div></div>
<div class="row accent-l"><div style="flex:1"><div class="title">Playbook memory is seeded from demo runs.</div><div class="meta">The pipeline that seeds it is identical to what a live recruiter would trigger via /log. Same code path.</div></div></div>
<div class="row accent-w"><div style="flex:1"><div class="title">Local 7B models (mistral, qwen2.5) are imperfect.</div><div class="meta">They occasionally malform tool calls or drop fields. Multi-agent scenarios seal roughly 40-80% in one run. Larger models or constrained decoding would improve this. Not a substrate problem.</div></div></div>
<div class="row accent-r"><div style="flex:1"><div class="title">No rate/margin awareness yet.</div><div class="meta">Worker pay expectations vs contract bill rates are not modeled. Flagged as a Phase 20 item; no architectural blocker.</div></div></div>
</div>
</div>
</div>
<div class="footer">Lakehouse · Architecture page regenerates tests on every load · <a href="console">walkthrough</a> · <a href=".">dashboard</a></div>
<script>
var P=location.pathname.indexOf('/lakehouse')>=0?'/lakehouse':'';
var A=location.origin+P;
function el(tag, cls, text){
var e=document.createElement(tag);
if(cls) e.className=cls;
if(text!==undefined && text!==null) e.textContent=String(text);
return e;
}
function apiPost(path, body){
return fetch(A+path,{method:'POST',headers:{'Content-Type':'application/json'},body:JSON.stringify(body||{})}).then(function(r){return r.json()});
}
window.addEventListener('load',function(){
loadLiveSections();
});
function loadLiveSections(){
apiPost('/proof.json',{}).then(function(r){
var host1=document.getElementById('ch1-tests');host1.textContent='';
var tests=r.tests||[];
tests.forEach(function(t){
var row=el('div','row '+(t.pass?'pass':'fail'));
var left=document.createElement('div');left.style.flex='1';left.style.minWidth='0';
var title=el('div','title',(t.pass?'✓ ':'✗ ')+(t.name||'(unnamed)'));
var resultStr='';
if(typeof t.result==='object' && t.result) resultStr=JSON.stringify(t.result);
else if(t.result!==undefined && t.result!==null) resultStr=String(t.result);
var meta=el('div','meta',resultStr);
left.appendChild(title);left.appendChild(meta);
row.appendChild(left);
row.appendChild(el('div','val',(t.ms||0)+' ms'));
host1.appendChild(row);
});
var host6=document.getElementById('ch6-scale');host6.textContent='';
var scale=r.scale||{};
addStat(host6,(scale.total_rows||0).toLocaleString(),'Rows under management','across '+(scale.datasets||0)+' datasets','accent-b');
addStat(host6,(scale.total_chunks||0).toLocaleString(),'Vector chunks indexed','across '+(scale.indexes||0)+' HNSW + Lance indexes','accent-a');
var gpu=r.gpu||{};
var vramStr=(gpu.vram_used_mib!==undefined?gpu.vram_used_mib+' / '+gpu.vram_total_mib+' MiB':'—');
addStat(host6,vramStr,'GPU VRAM',(gpu.name||'A4000'),'accent-w');
if(r.recall){
var reco=r.recall||{};
var hnswStr=(reco.hnsw!==undefined?(reco.hnsw*100).toFixed(0)+'%':'—');
addStat(host6,hnswStr,'HNSW recall','measured · '+(reco.note||''),'accent-l');
}
var host6r=document.getElementById('ch6-recall');host6r.textContent='';
if(r.lance_10m){
var narr=el('div','narr');
narr.appendChild(el('strong',null,'Scale headroom: '));
narr.appendChild(document.createTextNode(
'Lance backend tested at '+r.lance_10m.vectors.toLocaleString()+' vectors, '
+r.lance_10m.disk_gb+' GB on disk, '+r.lance_10m.search_p50_ms+'ms p50. '
+(r.lance_10m.note||'')
));
host6r.appendChild(narr);
}
document.getElementById('hdr-time').textContent='Generated · '+(r.generated?new Date(r.generated).toLocaleTimeString():'live');
}).catch(function(e){
var host1=document.getElementById('ch1-tests');host1.textContent='';
host1.appendChild(el('div','err','Live tests failed: '+(e.message||e)+'. Server may be offline.'));
});
}
function addStat(host,n,l,sub,cls){
var d=el('div','card stat-lg '+(cls||''));
d.appendChild(el('div','n',n));
d.appendChild(el('div','l',l));
d.appendChild(el('div','sub',sub||''));
host.appendChild(d);
}
</script>
</body></html>