/proof: explain the dual-agent recursive architecture with citations

Previous page was numeric claims without explanations — 'sub-100ms SQL',
'500K vectors in 341ms' etc. Accurate but undefendable without math,
code paths, and ADR references. Expanded to 8 chapters:

Ch1 — Live receipts (unchanged: real gateway tests, pass/fail, timing)

Ch2 — Architecture. 13-crate diagram with per-crate responsibility
      table and file paths. gateway → catalogd/queryd/vectord/ingestd
      + aibridge → object_store. References ADRs 1-20.

Ch3 — Dual-agent recursive consensus loop (NEW)
      - Role specialization (executor=optimist, reviewer=pessimist)
      - Parallel orchestration via Promise.all
      - Recursive: sealed playbooks feed playbook_memory → next query
      - Termination math: sealed | tool-error abort | drift abort |
        turn-cap abort — every path dumps forensic log
      - File refs: tests/multi-agent/agent.ts, orchestrator.ts,
        scenario.ts, run_e2e_rated.ts

Ch4 — Playbook memory feedback loop (NEW)
      - PlaybookEntry shape with embedding
      - Full boost math: similarity * base_weight * decay * penalty
        / n_workers, capped at MAX_BOOST_PER_WORKER
      - Temporal decay (e^-age/30, 30d half-life)
      - Negative signal (0.5^failures)
      - Why k=200: narrow cosine discrimination in nomic-embed-text
      - Evidence: compounding test 0 → 0.250 cap in 3 seeds
      - persist_sql write-through
      - Pattern discovery (Path 2 meta-index)
      - File: crates/vectord/src/playbook_memory.rs

Ch5 — ADR citations for each key choice
      ADR-001, 008, 012, 015, 019, 020 + Phase 19 design note

Ch6 — Live scale data (unchanged: pulled from /proof.json)

Ch7 — Reproduction recipes: curl for health, sql, hybrid with boost,
      patterns, pm stats, and the full dual-agent scenario run

Ch8 — Honest limits (unchanged: synthetic workers_500k, 1K candidates
      misaligned to call_log, 7B model imperfection, no rate/margin)

Every architectural claim now cites either the code path
(crates/.../src/file.rs::fn_name) or the ADR (docs/DECISIONS.md).
Someone disputing the system has specific targets to attack.

Mechanism unchanged: /proof serves mcp-server/proof.html via
Bun.file. /proof.json still returns the live test data the page
consumes client-side.
This commit is contained in:
root 2026-04-20 17:49:08 -05:00
parent 05f2e42c45
commit 76bfa2c8d7
2 changed files with 458 additions and 500 deletions

View File

@ -631,510 +631,15 @@ async function main() {
return new Response(await r.text(), { status: r.status, headers: { "Content-Type": "application/json" } }); return new Response(await r.text(), { status: r.status, headers: { "Content-Type": "application/json" } });
} }
// Proof page — styled HTML with live tests // Proof — narrative HTML served from mcp-server/proof.html.
// Live tests consumed client-side via /proof.json.
if (url.pathname === "/proof") { if (url.pathname === "/proof") {
const ds = await api("GET", "/catalog/datasets") as any[]; return new Response(Bun.file(import.meta.dir + "/proof.html"), {
const indexes = await api("GET", "/vectors/indexes") as any[]; headers: { ...cors, "Content-Type": "text/html" },
const vram = await api("GET", "/ai/vram");
const totalRows = (ds || []).reduce((s: number, d: any) => s + (d.row_count || 0), 0);
const totalChunks = (indexes || []).reduce((s: number, i: any) => s + i.chunk_count, 0);
const tests: any[] = [];
const sqls: [string, string][] = [
["COUNT 500K workers", "SELECT COUNT(*) FROM workers_500k"],
["COUNT 1M timesheets", "SELECT COUNT(*) FROM timesheets"],
["Filter + aggregate", "SELECT role, COUNT(*) cnt FROM workers_500k WHERE state='IL' AND CAST(reliability AS DOUBLE)>0.8 GROUP BY role ORDER BY cnt DESC LIMIT 3"],
["Cross-table JOIN (800K×100K)", "SELECT COUNT(*) FROM candidates c JOIN (SELECT candidate_id, COUNT(*) calls FROM call_log GROUP BY candidate_id HAVING COUNT(*)>=5) cl ON c.candidate_id=cl.candidate_id WHERE c.city='Chicago'"],
];
for (const [name, sql] of sqls) {
const t0 = Date.now();
const r = await api("POST", "/query/sql", { sql });
tests.push({ name, ms: Date.now() - t0, result: r.rows?.[0], pass: !r.error });
}
const ht0 = Date.now();
const hybrid = await api("POST", "/vectors/hybrid", {
question: "reliable forklift operator", index_name: "workers_500k_v1",
sql_filter: "role = 'Forklift Operator' AND state = 'IL' AND CAST(reliability AS DOUBLE) > 0.8",
filter_dataset: "workers_500k", id_column: "worker_id", top_k: 5, generate: false,
use_playbook_memory: true,
}); });
tests.push({
name: "Hybrid SQL+Vector Search", ms: Date.now() - ht0,
result: { sql_matches: hybrid.sql_matches, verified_results: hybrid.vector_reranked },
pass: (hybrid.vector_reranked || 0) > 0,
sources: hybrid.sources?.slice(0, 5),
});
// Run LIVE CRM vs AI comparisons — these actually execute on page load
const demos: any[] = [];
const demoQueries = [
{ query: "warehouse help", desc: "A staffer types what they need in plain English" },
{ query: "someone good with machines who is dependable", desc: "Natural language — no field names, no filters" },
{ query: "safety trained worker for chemical plant", desc: "The CRM doesn't know 'safety trained' = OSHA + Hazmat" },
];
for (const dq of demoQueries) {
// CRM attempt: exact LIKE match
const crmResult = await api("POST", "/query/sql", {
sql: `SELECT COUNT(*) cnt FROM workers_500k WHERE resume_text LIKE '%${dq.query}%'`
});
const crmCount = crmResult?.rows?.[0]?.cnt ?? 0;
// AI attempt: vector search understands meaning
const aiResult = await api("POST", "/vectors/hnsw/search", {
index_name: "workers_500k_v1",
query: dq.query,
top_k: 3,
});
const aiHits = aiResult?.results || [];
demos.push({ ...dq, crmCount, aiHits });
}
const g = vram?.gpu || {};
const ts = new Date().toLocaleString();
const testRows = tests.map((t: any) => {
const icon = t.pass ? "✓" : "✗";
const cls = t.pass ? "pass" : "fail";
const val = typeof t.result === "object" ? JSON.stringify(t.result) : t.result;
return `<tr class="${cls}"><td>${icon}</td><td>${t.name}</td><td>${t.ms}ms</td><td>${val}</td></tr>`;
}).join("");
const workerRows = (hybrid.sources || []).map((s: any) => {
const parts = s.chunk_text?.split("—") || ["", ""];
const name = parts[0]?.trim();
const rest = parts[1]?.trim() || "";
return `<tr><td>${s.doc_id}</td><td>${name}</td><td>${rest.slice(0, 120)}</td><td>${s.score?.toFixed(3)}</td><td class="pass">✓</td></tr>`;
}).join("");
const html = `<!DOCTYPE html><html><head><meta charset="utf-8"><meta name="viewport" content="width=device-width,initial-scale=1">
<title>Lakehouse Proof of Work</title>
<style>
*{margin:0;padding:0;box-sizing:border-box}
body{font-family:'Inter','SF Pro',system-ui,sans-serif;background:#0a0a0f;color:#d4d4d8;line-height:1.6}
.hero{background:linear-gradient(135deg,#0f172a 0%,#1e1b4b 50%,#0f172a 100%);padding:60px 40px;text-align:center;border-bottom:1px solid #1e293b}
.hero h1{font-size:32px;font-weight:700;background:linear-gradient(to right,#f472b6,#818cf8,#38bdf8);-webkit-background-clip:text;-webkit-text-fill-color:transparent;margin-bottom:8px}
.hero .sub{color:#94a3b8;font-size:14px}
.hero .ts{color:#64748b;font-size:12px;margin-top:4px}
.container{max-width:1100px;margin:0 auto;padding:30px 20px}
.stats{display:grid;grid-template-columns:repeat(auto-fit,minmax(200px,1fr));gap:16px;margin-bottom:40px}
.stat{background:#111827;border:1px solid #1e293b;border-radius:12px;padding:24px;text-align:center}
.stat .num{font-size:36px;font-weight:800;background:linear-gradient(135deg,#34d399,#22d3ee);-webkit-background-clip:text;-webkit-text-fill-color:transparent}
.stat .label{color:#94a3b8;font-size:12px;text-transform:uppercase;letter-spacing:1px;margin-top:4px}
section{margin-bottom:40px}
h2{font-size:18px;color:#e2e8f0;margin-bottom:16px;padding-bottom:8px;border-bottom:1px solid #1e293b}
h2 span{color:#818cf8}
table{width:100%;border-collapse:collapse;font-size:13px}
th{text-align:left;padding:10px 14px;background:#111827;color:#94a3b8;font-weight:600;text-transform:uppercase;font-size:11px;letter-spacing:0.5px}
td{padding:10px 14px;border-bottom:1px solid #1e293b}
tr:hover{background:#111827}
.pass{color:#34d399} .fail{color:#f87171}
.badge{display:inline-block;padding:3px 10px;border-radius:20px;font-size:11px;font-weight:600}
.badge.green{background:#052e16;color:#34d399;border:1px solid #166534}
.badge.blue{background:#0c1a3d;color:#60a5fa;border:1px solid #1e40af}
.badge.purple{background:#1e1047;color:#a78bfa;border:1px solid #5b21b6}
.gpu-bar{background:#1e293b;border-radius:8px;height:24px;overflow:hidden;margin-top:8px}
.gpu-fill{background:linear-gradient(90deg,#818cf8,#38bdf8);height:100%;border-radius:8px;transition:width 0.3s}
.note{background:#0c1a3d;border:1px solid #1e3a5f;border-radius:8px;padding:16px;color:#93c5fd;font-size:13px;margin-top:20px}
.note strong{color:#60a5fa}
.footer{text-align:center;color:#475569;font-size:12px;padding:30px;border-top:1px solid #1e293b;margin-top:40px}
@media(max-width:768px){
.hero{padding:30px 16px}
.hero h1{font-size:22px}
.container{padding:16px 12px}
.stats{grid-template-columns:repeat(2,1fr);gap:10px}
.stat{padding:14px}
.stat .num{font-size:24px}
section{padding:16px !important;margin-bottom:20px !important}
table{font-size:11px;display:block;overflow-x:auto;white-space:nowrap}
th,td{padding:6px 8px}
h2{font-size:15px}
.g2{grid-template-columns:1fr !important}
.g3{grid-template-columns:1fr !important}
.g4{grid-template-columns:repeat(2,1fr) !important}
}
</style></head><body>
<div class="hero" style="padding:50px 40px 40px">
<h1 style="font-size:28px">Your Morning Just Got Easier</h1>
<div class="sub" style="font-size:16px;color:#cbd5e1;max-width:700px;margin:12px auto 0">
This isn't another CRM to learn. It's your contracts, your workers, your data <br>
already matched before you sit down.
</div>
</div>
<div class="container">
<section style="background:linear-gradient(135deg,#0c1220,#0f1a2e);border:1px solid #1e3a5f;border-radius:16px;padding:35px;margin-bottom:40px">
<h2 style="border:none;color:#e2e8f0;font-size:20px;margin-bottom:20px">We know what your day looks like</h2>
<div class="g2" style="display:grid;grid-template-columns:1fr 1fr;gap:24px">
<div>
<div style="color:#f87171;font-size:13px;font-weight:600;margin-bottom:12px">RIGHT NOW without this</div>
<div style="color:#94a3b8;font-size:13px;line-height:1.8">
Open the CRM. Search "forklift" + "Chicago" + "OSHA."<br>
Get 200 results. Scroll through. Half are inactive.<br>
Cross-reference certifications in a different tab.<br>
Check availability in a spreadsheet.<br>
Check reliability from memory or ask a coworker.<br>
Copy names into a message. Personalize each one.<br>
Repeat for the next contract. And the next.<br>
<span style="color:#f87171;margin-top:8px;display:block">45 minutes before you make your first call.</span>
</div>
</div>
<div>
<div style="color:#34d399;font-size:13px;font-weight:600;margin-bottom:12px">WITH THIS same morning</div>
<div style="color:#94a3b8;font-size:13px;line-height:1.8">
Open the page. Your contracts are listed by urgency.<br>
Workers already matched name, skills, certs, scores.<br>
Only workers who are available, certified, and reliable.<br>
Ranked by who's the best fit, not just who comes first.<br>
Emergency fills flagged at the top.<br>
One click away from outreach.<br>
<br>
<span style="color:#34d399;margin-top:8px;display:block">You're on the phone in 5 minutes.</span>
</div>
</div>
</div>
<div style="border-top:1px solid #1e3a5f;margin-top:24px;padding-top:16px;color:#64748b;font-size:12px">
This isn't about replacing what you know. It's about not making you dig for it every single time.
You know who the good workers are this just puts them in front of you faster.
</div>
</section>
<section style="margin-bottom:40px">
<h2 style="border:none;color:#e2e8f0;font-size:18px;margin-bottom:16px">Here's what it actually did just now, when you loaded this page:</h2>
<div class="g3" style="display:grid;grid-template-columns:repeat(3,1fr);gap:16px;margin-bottom:20px">
<div class="stat" style="text-align:left;padding:20px">
<div style="color:#34d399;font-size:28px;font-weight:800">${hybrid.sql_matches?.toLocaleString()}</div>
<div style="color:#94a3b8;font-size:12px;margin-top:4px">Forklift operators in IL with 80%+ reliability</div>
<div style="color:#475569;font-size:11px;margin-top:2px">Found in ${tests[tests.length-1]?.ms}ms you'd still be typing the search</div>
</div>
<div class="stat" style="text-align:left;padding:20px">
<div style="color:#818cf8;font-size:28px;font-weight:800">${hybrid.vector_reranked}</div>
<div style="color:#94a3b8;font-size:12px;margin-top:4px">Best matches ranked by AI not alphabetical, not random</div>
<div style="color:#475569;font-size:11px;margin-top:2px">The system read their skills and picked the best fit for you</div>
</div>
<div class="stat" style="text-align:left;padding:20px">
<div style="color:#fbbf24;font-size:28px;font-weight:800"></div>
<div style="color:#94a3b8;font-size:12px;margin-top:4px">Every name verified against the actual database</div>
<div style="color:#475569;font-size:11px;margin-top:2px">Not guessing, not making up people. These workers are real.</div>
</div>
</div>
<div style="background:#0d0d1a;border-radius:12px;padding:20px;border:1px solid #1e293b">
<div style="color:#94a3b8;font-size:12px;margin-bottom:12px">Your top matches right now ready for outreach:</div>
<table><thead><tr><th>Name</th><th>Details</th><th>Fit Score</th><th>Verified</th></tr></thead>
<tbody>${workerRows}</tbody></table>
</div>
</section>
<section style="background:#0c1220;border:1px solid #1e3a5f;border-radius:12px;padding:24px;margin-bottom:40px">
<div style="color:#e2e8f0;font-size:15px;font-weight:600;margin-bottom:12px">What's different from your CRM:</div>
<div class="g3" style="display:grid;grid-template-columns:1fr 1fr 1fr;gap:20px">
<div>
<div style="color:#818cf8;font-size:13px;font-weight:600;margin-bottom:6px">It understands what you mean</div>
<div style="color:#64748b;font-size:12px">Search "warehouse help" and it finds Forklift Operators, Loaders, Shipping Clerks because it understands those ARE warehouse jobs. Your CRM would find nothing.</div>
</div>
<div>
<div style="color:#34d399;font-size:13px;font-weight:600;margin-bottom:6px">It already filtered the junk</div>
<div style="color:#64748b;font-size:12px">Inactive workers, expired certs, low reliability already removed. You only see people you'd actually want to call. Not 200 results where 150 are useless.</div>
</div>
<div>
<div style="color:#fbbf24;font-size:13px;font-weight:600;margin-bottom:6px">It runs on YOUR machine</div>
<div style="color:#64748b;font-size:12px">No cloud. No per-search fee. No sending your worker data to someone else's server. Everything runs right here, right now, on hardware you control.</div>
</div>
</div>
</section>
<div style="text-align:center;padding:20px;color:#475569;font-size:13px;margin-bottom:30px">
Technical details below for the team that wants to see the numbers
</div>
<div class="stats">
<div class="stat"><div class="num">${totalRows.toLocaleString()}</div><div class="label">Total Records</div></div>
<div class="stat"><div class="num">${totalChunks.toLocaleString()}</div><div class="label">AI-Indexed Chunks</div></div>
<div class="stat"><div class="num">${indexes?.length || 0}</div><div class="label">Search Indexes</div></div>
<div class="stat"><div class="num">10M</div><div class="label">Max Tested Scale</div></div>
</div>
<section>
<h2><span>01</span> What a CRM Does keyword match on ${totalRows.toLocaleString()} rows</h2>
<p style="color:#94a3b8;font-size:13px;margin-bottom:12px">Standard SQL filters. Fast, but only finds EXACT matches. Every CRM does this.</p>
<table><thead><tr><th></th><th>Query</th><th>Speed</th><th>Result</th></tr></thead>
<tbody>${testRows}</tbody></table>
<p style="color:#64748b;font-size:11px;margin-top:8px">Limitation: search for "warehouse work" finds nothing no worker has that exact text in their profile.</p>
</section>
<section style="background:linear-gradient(135deg,#0f172a,#1a0f2e);border:1px solid #7c3aed;border-radius:16px;padding:30px;margin:30px 0">
<h2 style="border:none;color:#a78bfa;font-size:20px;margin-bottom:8px">See the difference live, right now</h2>
<p style="color:#c4b5fd;font-size:13px;margin-bottom:24px">
These searches just ran on ${totalRows.toLocaleString()} real worker profiles when you loaded this page.
Left: what your CRM finds. Right: what AI finds. Same search, same data.
</p>
${demos.map((d: any, i: number) => {
const aiNames = d.aiHits.map((h: any) => {
const name = h.chunk_text?.split("—")[0]?.trim() || h.doc_id;
const role = h.chunk_text?.match(/— (.+?) in/)?.[1] || "";
const city = h.chunk_text?.match(/in (.+?)\./)?.[1] || "";
return { name, role, city, score: h.score };
});
return `
<div style="margin-bottom:${i < demos.length - 1 ? '24px' : '0'};padding-bottom:${i < demos.length - 1 ? '24px' : '0'};border-bottom:${i < demos.length - 1 ? '1px solid #2d1b69' : 'none'}">
<div style="color:#94a3b8;font-size:12px;margin-bottom:10px">${d.desc}</div>
<div style="background:#0a0a14;border-radius:8px;padding:14px 18px;margin-bottom:12px;font-size:18px;color:#e2e8f0;font-weight:600">
"${d.query}"
</div>
<div class="g2" style="display:grid;grid-template-columns:1fr 1fr;gap:16px">
<div style="background:#1a0a0a;border:1px solid #7f1d1d;border-radius:8px;padding:16px">
<div style="color:#f87171;font-size:11px;text-transform:uppercase;letter-spacing:1px;margin-bottom:8px">Your CRM (keyword match)</div>
<div style="color:#fca5a5;font-size:32px;font-weight:800">${d.crmCount}</div>
<div style="color:#7f1d1d;font-size:12px;margin-top:4px">results scanned every profile for the exact phrase</div>
</div>
<div style="background:#0a1a0f;border:1px solid #166534;border-radius:8px;padding:16px">
<div style="color:#34d399;font-size:11px;text-transform:uppercase;letter-spacing:1px;margin-bottom:8px">AI Vector Search (understands meaning)</div>
<div style="color:#6ee7b7;font-size:32px;font-weight:800">${d.aiHits.length}</div>
<div style="color:#166534;font-size:12px;margin-top:4px">matches found workers whose skills MEAN the same thing</div>
${aiNames.map((w: any) => `
<div style="margin-top:8px;padding:6px 10px;background:#0d1a12;border-radius:4px;font-size:11px">
<span style="color:#34d399;font-weight:600">${w.name}</span>
<span style="color:#64748b"> ${w.role}${w.city ? ` in ${w.city}` : ""}</span>
</div>
`).join("")}
</div>
</div>
</div>`;
}).join("")}
</section>
<section style="margin:30px 0">
<h2 style="color:#e2e8f0;font-size:18px"><span style="color:#818cf8">Now combine both:</span> SQL precision + AI understanding</h2>
<p style="color:#94a3b8;font-size:13px;margin-bottom:16px">
The hybrid search runs a SQL filter (role, state, reliability) AND vector ranking together.
You get exact structural matches ranked by who's the best semantic fit in one call.
</p>
<div style="margin-bottom:12px">
<span class="badge green">${hybrid.sql_matches?.toLocaleString()} workers match your filters</span>
<span class="badge purple"> AI ranked the top ${hybrid.vector_reranked}</span>
<span class="badge blue">${tests[tests.length-1]?.ms}ms</span>
</div>
<table><thead><tr><th>ID</th><th>Name</th><th>Profile</th><th>AI Score</th><th>Verified</th></tr></thead>
<tbody>${workerRows}</tbody></table>
<p style="color:#475569;font-size:11px;margin-top:8px">Every result verified against the actual database. The AI cannot hallucinate workers that don't exist.</p>
</section>
<section>
<h2><span>03</span> Why This Matters the numbers a CRM can't show you</h2>
<div style="display:grid;grid-template-columns:repeat(3,1fr);gap:16px">
<div class="stat">
<div class="num">${totalChunks.toLocaleString()}</div>
<div class="label">Text Chunks Vectorized</div>
<div style="color:#64748b;font-size:11px;margin-top:8px">Every worker's skills, certs, and history converted into searchable AI vectors by a LOCAL model. No cloud API. No per-query cost. Your data never leaves this server.</div>
</div>
<div class="stat">
<div class="num">0.98</div>
<div class="label">Search Accuracy</div>
<div style="color:#64748b;font-size:11px;margin-top:8px">98% recall meaning 98 out of 100 truly relevant workers are found. Measured against brute-force ground truth on real embedded profiles.</div>
</div>
<div class="stat">
<div class="num">10M</div>
<div class="label">Vectors at 5ms</div>
<div style="color:#64748b;font-size:11px;margin-top:8px">Tested at 10 million vectors on disk. Search still takes 5ms. A traditional database would need minutes to full-text scan that volume.</div>
</div>
</div>
</section>
<section>
<h2><span>04</span> Local AI your data, your models, your GPU</h2>
<p style="color:#94a3b8;font-size:13px">${g.name || "NVIDIA RTX A4000"} ${g.used_mib || 0} / ${g.total_mib || 16376} MiB</p>
<div class="gpu-bar"><div class="gpu-fill" style="width:${((g.used_mib||0)/(g.total_mib||16376)*100)}%"></div></div>
<div class="g4" style="display:grid;grid-template-columns:repeat(4,1fr);gap:12px;margin-top:16px">
<div style="background:#111827;border-radius:8px;padding:12px;text-align:center">
<div style="color:#a78bfa;font-weight:700">qwen3</div>
<div style="color:#64748b;font-size:11px">8.2B · Reasoning</div>
</div>
<div style="background:#111827;border-radius:8px;padding:12px;text-align:center">
<div style="color:#60a5fa;font-weight:700">qwen2.5</div>
<div style="color:#64748b;font-size:11px">7B · Fast SQL</div>
</div>
<div style="background:#111827;border-radius:8px;padding:12px;text-align:center">
<div style="color:#34d399;font-weight:700">mistral</div>
<div style="color:#64748b;font-size:11px">7B · Generation</div>
</div>
<div style="background:#111827;border-radius:8px;padding:12px;text-align:center">
<div style="color:#fbbf24;font-weight:700">nomic</div>
<div style="color:#64748b;font-size:11px">137M · Embeddings</div>
</div>
</div>
<p style="color:#64748b;font-size:11px;margin-top:12px">Hot-swappable profiles. Switch between models in seconds. Each model specializes in what it's best at. No API keys, no usage fees, no data leaving the building.</p>
</section>
<div class="note">
<strong>Every number on this page runs LIVE.</strong> Hit refresh the queries execute again on ${totalRows.toLocaleString()} real rows.
The AI vectors were generated by a local model running on the GPU above. No cloud APIs were used.
This is not a demo this is the production system with real staffing data.
</div>
<div style="border-top:1px solid #1e293b;margin-top:40px;padding-top:40px">
<h2 style="border:none;font-size:22px;color:#f0f6fc;text-align:center;margin-bottom:8px">How This Actually Works</h2>
<p style="color:#94a3b8;text-align:center;font-size:14px;max-width:700px;margin:0 auto 30px">The technical architecture behind what you just saw why it's different from a database, why your data never leaves this building, and how it handles millions of records.</p>
<div class="g2" style="display:grid;grid-template-columns:1fr 1fr;gap:20px;margin-bottom:30px">
<div style="background:#111827;border:1px solid #1e293b;border-radius:12px;padding:24px">
<div style="color:#f87171;font-size:12px;font-weight:600;text-transform:uppercase;letter-spacing:1px;margin-bottom:10px">Traditional CRM / Database</div>
<div style="color:#94a3b8;font-size:13px;line-height:1.8">
Stores records in rows and columns.<br>
Search = exact text matching ("forklift" finds "forklift").<br>
Can't understand that "warehouse help" = forklift operator.<br>
Slows down as data grows millions of rows = slow queries.<br>
Every search is the same doesn't learn or improve.<br>
Data lives on someone else's cloud server.
</div>
</div>
<div style="background:#111827;border:1px solid #1e293b;border-radius:12px;padding:24px">
<div style="color:#34d399;font-size:12px;font-weight:600;text-transform:uppercase;letter-spacing:1px;margin-bottom:10px">This System (Lakehouse)</div>
<div style="color:#94a3b8;font-size:13px;line-height:1.8">
AI reads every profile and <strong style="color:#e2e8f0">understands the meaning</strong>.<br>
Search = semantic understanding ("warehouse help" finds loaders, forklift ops, shipping clerks).<br>
<strong style="color:#e2e8f0">Combines</strong> exact filters + AI ranking in one call.<br>
Tested at <strong style="color:#e2e8f0">10 million records at 5ms search</strong> gets faster, not slower.<br>
Learns from successful placements builds playbooks over time.<br>
<strong style="color:#e2e8f0">Runs entirely on hardware you own.</strong> Nothing leaves this server.
</div>
</div>
</div>
<div style="background:#0f172a;border:1px solid #1e293b;border-radius:12px;padding:30px;margin-bottom:24px">
<h3 style="color:#818cf8;font-size:16px;margin-bottom:16px">Your Data Never Leaves This Building</h3>
<div class="g3" style="display:grid;grid-template-columns:1fr 1fr 1fr;gap:16px">
<div>
<div style="color:#f0f6fc;font-weight:600;margin-bottom:6px">Local AI Models</div>
<div style="color:#94a3b8;font-size:12px">Four AI models run directly on your GPU no OpenAI, no Google, no cloud API. Worker profiles, contracts, and communications never touch the internet. The AI that reads and understands your data lives on a machine you control.</div>
</div>
<div>
<div style="color:#f0f6fc;font-weight:600;margin-bottom:6px">Local Storage</div>
<div style="color:#94a3b8;font-size:12px">All data stored on S3-compatible object storage running on this server. Encrypted at rest. No third-party databases, no cloud subscriptions. If the internet goes down, this system keeps working it doesn't depend on any external service.</div>
</div>
<div>
<div style="color:#f0f6fc;font-weight:600;margin-bottom:6px">Your Hardware</div>
<div style="color:#94a3b8;font-size:12px">${g.name || "NVIDIA RTX A4000"} GPU with ${g.total_mib || 16376} MB memory. 128 GB system RAM. All AI processing happens here. The cost is the hardware no per-query fees, no per-user licenses, no monthly API bills that grow with usage.</div>
</div>
</div>
</div>
<div style="background:#111827;border:1px solid #1e293b;border-radius:12px;padding:30px;margin-bottom:24px">
<h3 style="color:#818cf8;font-size:16px;margin-bottom:16px">How It Handles Scale</h3>
<div style="color:#94a3b8;font-size:13px;line-height:1.8;margin-bottom:16px">
The system uses two search engines that work together each handles what the other can't:
</div>
<div class="g2" style="display:grid;grid-template-columns:1fr 1fr;gap:16px;margin-bottom:16px">
<div style="background:#0d1117;border-radius:8px;padding:16px">
<div style="color:#58a6ff;font-weight:600;margin-bottom:6px">HNSW (In-Memory)</div>
<div style="color:#94a3b8;font-size:12px">Keeps frequently-used worker profiles in RAM for instant search. Under 1 millisecond response. Perfect for your active pool of workers up to 5 million profiles in memory at once. 98% search accuracy.</div>
</div>
<div style="background:#0d1117;border-radius:8px;padding:16px">
<div style="color:#a78bfa;font-weight:600;margin-bottom:6px">Lance (On-Disk)</div>
<div style="color:#94a3b8;font-size:12px">For massive archives 10 million+ records stored on disk. 5ms search speed. When your database grows past what fits in memory, Lance takes over automatically. No performance cliff. 94% search accuracy. New data appends in milliseconds without rebuilding the index.</div>
</div>
</div>
<div style="color:#64748b;font-size:12px;font-style:italic">The system automatically uses the right engine for each query. You never have to think about it it's like having a fast filing cabinet and a massive warehouse that work together seamlessly.</div>
</div>
<div style="background:#111827;border:1px solid #1e293b;border-radius:12px;padding:30px;margin-bottom:24px">
<h3 style="color:#818cf8;font-size:16px;margin-bottom:16px">Hot-Swap Profiles Different AI for Different Jobs</h3>
<div style="color:#94a3b8;font-size:13px;line-height:1.8;margin-bottom:16px">
The system runs multiple AI models and switches between them in seconds depending on the task. Like having specialists on call each one is best at something different.
</div>
<div class="g4" style="display:grid;grid-template-columns:repeat(4,1fr);gap:12px">
<div style="background:#0d1117;border-radius:8px;padding:12px;text-align:center">
<div style="color:#a78bfa;font-weight:700;font-size:14px">Qwen 3</div>
<div style="color:#64748b;font-size:10px;margin-top:4px">Reasoning & analysis. Understands complex requests. 40,000 word context.</div>
</div>
<div style="background:#0d1117;border-radius:8px;padding:12px;text-align:center">
<div style="color:#60a5fa;font-weight:700;font-size:14px">Qwen 2.5</div>
<div style="color:#64748b;font-size:10px;margin-top:4px">Fast structured queries. Generates database searches from plain English.</div>
</div>
<div style="background:#0d1117;border-radius:8px;padding:12px;text-align:center">
<div style="color:#34d399;font-weight:700;font-size:14px">Mistral</div>
<div style="color:#64748b;font-size:10px;margin-top:4px">Writing & communication. Drafts personalized outreach messages.</div>
</div>
<div style="background:#0d1117;border-radius:8px;padding:12px;text-align:center">
<div style="color:#fbbf24;font-weight:700;font-size:14px">Nomic</div>
<div style="color:#64748b;font-size:10px;margin-top:4px">Reads profiles & understands meaning. Powers the semantic search.</div>
</div>
</div>
<div style="color:#64748b;font-size:12px;margin-top:12px;font-style:italic">When you switch tasks from finding workers to drafting messages to analyzing trends the system loads the right AI model automatically. Only one model uses the GPU at a time, so there's no performance penalty.</div>
</div>
<div style="background:#111827;border:1px solid #1e293b;border-radius:12px;padding:30px;margin-bottom:24px">
<h3 style="color:#818cf8;font-size:16px;margin-bottom:16px">Starting From Scratch No Data Required</h3>
<div style="color:#94a3b8;font-size:13px;line-height:1.8;margin-bottom:16px">
<strong style="color:#f0f6fc">You don't need rich profiles to start.</strong> The system works with whatever you have — even just a name and a phone number. Here's what happens as you use it:
</div>
<div style="margin-bottom:16px">
<div style="display:flex;gap:12px;align-items:flex-start;margin-bottom:16px">
<div style="background:#1e293b;color:#f0f6fc;width:32px;height:32px;border-radius:50%;display:flex;align-items:center;justify-content:center;font-weight:700;flex-shrink:0">1</div>
<div>
<div style="color:#f0f6fc;font-weight:600">Day 1 Import what you have</div>
<div style="color:#94a3b8;font-size:12px">Upload a spreadsheet with names, phone numbers, and roles. That's enough. The system organizes them by role and location so you can find who you need faster than scrolling a list. No scores, no metrics just organized contacts.</div>
</div>
</div>
<div style="display:flex;gap:12px;align-items:flex-start;margin-bottom:16px">
<div style="background:#1e293b;color:#f0f6fc;width:32px;height:32px;border-radius:50%;display:flex;align-items:center;justify-content:center;font-weight:700;flex-shrink:0">2</div>
<div>
<div style="color:#f0f6fc;font-weight:600">Week 1 You work, it watches</div>
<div style="color:#94a3b8;font-size:12px">Every placement you make, every timesheet that comes in, every call you log the system records it. Not extra data entry you're already doing this work. The system just starts keeping track. After a week, it knows which workers showed up on time and which didn't.</div>
</div>
</div>
<div style="display:flex;gap:12px;align-items:flex-start;margin-bottom:16px">
<div style="background:#1e293b;color:#f0f6fc;width:32px;height:32px;border-radius:50%;display:flex;align-items:center;justify-content:center;font-weight:700;flex-shrink:0">3</div>
<div>
<div style="color:#f0f6fc;font-weight:600">Month 1 The AI starts helping</div>
<div style="color:#94a3b8;font-size:12px">Enough data has accumulated that reliability scores become meaningful. "Based on 8 placements, this worker has 95% reliability." The system starts suggesting matches you might have missed workers you forgot about who are perfect for today's contract.</div>
</div>
</div>
<div style="display:flex;gap:12px;align-items:flex-start">
<div style="background:#7c3aed;color:#fff;width:32px;height:32px;border-radius:50%;display:flex;align-items:center;justify-content:center;font-weight:700;flex-shrink:0"></div>
<div>
<div style="color:#f0f6fc;font-weight:600">The data you saw in the demo above?</div>
<div style="color:#94a3b8;font-size:12px">That's what the system looks like after it's been running. Rich profiles, reliability scores, certification tracking, intelligent matching all built from the same work your staff already does. The difference between "Day 1" and "full intelligence" isn't a massive data migration. It's just time and normal operations.</div>
</div>
</div>
</div>
</div>
<div style="background:#0f172a;border:1px solid #7c3aed;border-radius:12px;padding:30px;margin-bottom:24px">
<h3 style="color:#a78bfa;font-size:16px;margin-bottom:12px">What the System Remembers (and Why It Matters)</h3>
<div style="color:#c4b5fd;font-size:13px;line-height:1.8;margin-bottom:16px">
Every successful operation becomes a <strong>playbook entry</strong> a record of what worked. When a similar situation comes up, the system doesn't start from scratch. It checks: "Last time we needed welders in Ohio, here's who we placed and how it went."
</div>
<div style="color:#94a3b8;font-size:12px">
This is the fundamental difference from a CRM. A CRM stores data. This system stores <em>decisions and outcomes</em>. Over time, it becomes an institutional memory that doesn't retire, doesn't forget, and doesn't depend on one person knowing everything. Your senior staff's expertise becomes embedded in the system not replacing them, but making sure what they know is available even when they're not in the room.
</div>
</div>
<div style="background:#111827;border:1px solid #1e293b;border-radius:12px;padding:30px">
<h3 style="color:#818cf8;font-size:16px;margin-bottom:16px">Measured, Not Promised</h3>
<table style="width:100%;font-size:13px;border-collapse:collapse">
<thead><tr><th style="text-align:left;padding:8px;color:#8b949e;border-bottom:1px solid #1e293b">Capability</th><th style="text-align:right;padding:8px;color:#8b949e;border-bottom:1px solid #1e293b">Measured</th><th style="text-align:left;padding:8px;color:#8b949e;border-bottom:1px solid #1e293b">What It Means</th></tr></thead>
<tbody>
<tr><td style="padding:8px;border-bottom:1px solid #1e293b">Search 500K workers</td><td style="padding:8px;text-align:right;color:#34d399;border-bottom:1px solid #1e293b">341ms avg</td><td style="padding:8px;color:#94a3b8;border-bottom:1px solid #1e293b">Results before you finish typing</td></tr>
<tr><td style="padding:8px;border-bottom:1px solid #1e293b">SQL query on 3M rows</td><td style="padding:8px;text-align:right;color:#34d399;border-bottom:1px solid #1e293b">sub-100ms</td><td style="padding:8px;color:#94a3b8;border-bottom:1px solid #1e293b">Any analytical question answered instantly</td></tr>
<tr><td style="padding:8px;border-bottom:1px solid #1e293b">10M vector search</td><td style="padding:8px;text-align:right;color:#34d399;border-bottom:1px solid #1e293b">5ms</td><td style="padding:8px;color:#94a3b8;border-bottom:1px solid #1e293b">Scale to 10 million profiles, still fast</td></tr>
<tr><td style="padding:8px;border-bottom:1px solid #1e293b">Search accuracy (HNSW)</td><td style="padding:8px;text-align:right;color:#34d399;border-bottom:1px solid #1e293b">98%</td><td style="padding:8px;color:#94a3b8;border-bottom:1px solid #1e293b">Finds 98 of 100 truly relevant workers</td></tr>
<tr><td style="padding:8px;border-bottom:1px solid #1e293b">Search accuracy (Lance)</td><td style="padding:8px;text-align:right;color:#34d399;border-bottom:1px solid #1e293b">94%</td><td style="padding:8px;color:#94a3b8;border-bottom:1px solid #1e293b">At 10M+ scale, still highly accurate</td></tr>
<tr><td style="padding:8px;border-bottom:1px solid #1e293b">Filter accuracy</td><td style="padding:8px;text-align:right;color:#34d399;border-bottom:1px solid #1e293b">100%</td><td style="padding:8px;color:#94a3b8;border-bottom:1px solid #1e293b">State, role, reliability filters are SQL-verified never wrong</td></tr>
<tr><td style="padding:8px;border-bottom:1px solid #1e293b">Concurrent users</td><td style="padding:8px;text-align:right;color:#34d399;border-bottom:1px solid #1e293b">10+ simultaneous</td><td style="padding:8px;color:#94a3b8;border-bottom:1px solid #1e293b">Tested with 10 parallel queries in 82ms total</td></tr>
<tr><td style="padding:8px">Cloud dependency</td><td style="padding:8px;text-align:right;color:#34d399">Zero</td><td style="padding:8px;color:#94a3b8">Works offline. No internet required after setup.</td></tr>
</tbody>
</table>
</div>
</div>
</div>
<div class="footer">Lakehouse · ${totalChunks.toLocaleString()} AI-indexed profiles · 13 Rust modules · Built for staffing</div>
</body></html>`;
return new Response(html, { headers: { ...cors, "Content-Type": "text/html" } });
} }
// Proof JSON API (same data, no HTML) // Proof JSON API (same data, no HTML)
if (url.pathname === "/proof.json") { if (url.pathname === "/proof.json") {
const ds = await api("GET", "/catalog/datasets") as any[]; const ds = await api("GET", "/catalog/datasets") as any[];

453
mcp-server/proof.html Normal file
View File

@ -0,0 +1,453 @@
<!DOCTYPE html>
<html lang="en"><head>
<meta charset="utf-8"><meta name="viewport" content="width=device-width,initial-scale=1">
<title>Lakehouse — Architecture &amp; Reproduction</title>
<style>
*{margin:0;padding:0;box-sizing:border-box}
body{font-family:'Inter',-apple-system,system-ui,sans-serif;background:#090c10;color:#b0b8c4;font-size:14px;line-height:1.55;-webkit-font-smoothing:antialiased}
a{color:#58a6ff;text-decoration:none}
a:hover{color:#79c0ff}
.bar{background:#0d1117;padding:0 24px;height:56px;border-bottom:1px solid #171d27;display:flex;justify-content:space-between;align-items:center;position:sticky;top:0;z-index:10}
.bar h1{font-size:14px;font-weight:600;color:#e6edf3;letter-spacing:-0.2px}
.bar nav{display:flex;gap:2px}
.bar nav a{font-size:12px;color:#545d68;padding:6px 14px;border-radius:6px;transition:all 0.15s}
.bar nav a:hover{color:#e6edf3;background:#161b22}
.bar nav a.active{color:#e6edf3;background:#1c2333}
.bar .rt{font-size:11px;color:#545d68}
.wrap{max-width:1040px;margin:0 auto;padding:28px 20px 60px}
.chapter{margin-bottom:48px}
.chapter .num{color:#545d68;font-size:11px;font-weight:600;letter-spacing:1.6px;text-transform:uppercase;margin-bottom:6px}
.chapter h2{color:#e6edf3;font-size:24px;font-weight:700;letter-spacing:-0.4px;margin-bottom:8px;line-height:1.2}
.chapter .lede{color:#8b949e;font-size:14px;margin-bottom:18px;max-width:680px;line-height:1.6}
.chapter h3{color:#e6edf3;font-size:16px;font-weight:600;margin:18px 0 6px}
.card{background:#0d1117;border:1px solid #171d27;border-radius:12px;padding:20px;margin-bottom:12px}
.grid{display:grid;grid-template-columns:repeat(auto-fit,minmax(220px,1fr));gap:10px}
.stat-lg{padding:18px 20px}
.stat-lg .n{font-size:26px;font-weight:800;color:#e6edf3;letter-spacing:-1px;line-height:1}
.stat-lg .l{font-size:10px;color:#545d68;text-transform:uppercase;letter-spacing:1.2px;margin-top:8px;font-weight:600}
.stat-lg .sub{font-size:12px;color:#8b949e;margin-top:4px}
.accent-l{border-left:3px solid #2ea043}
.accent-b{border-left:3px solid #1f6feb}
.accent-a{border-left:3px solid #bc8cff}
.accent-w{border-left:3px solid #d29922}
.accent-r{border-left:3px solid #f85149}
.row{display:flex;justify-content:space-between;align-items:center;gap:12px;padding:10px 14px;background:#0d1117;border:1px solid #171d27;border-radius:8px;margin-bottom:6px;font-size:13px}
.row:hover{border-color:#21262d}
.row .title{color:#e6edf3;font-weight:500}
.row .meta{color:#8b949e;font-size:11px;margin-top:2px;font-family:ui-monospace,Menlo,monospace}
.row .val{color:#58a6ff;font-weight:600;white-space:nowrap;font-family:ui-monospace,Menlo,monospace}
.row.pass{border-left:3px solid #2ea043}
.row.fail{border-left:3px solid #f85149}
table.plain{width:100%;border-collapse:collapse;font-size:12px;margin-top:8px}
table.plain th{text-align:left;padding:8px 12px;color:#545d68;font-weight:600;text-transform:uppercase;font-size:10px;letter-spacing:0.8px;border-bottom:1px solid #171d27}
table.plain td{padding:8px 12px;border-bottom:1px solid #171d27;color:#c9d1d9;font-family:ui-monospace,Menlo,monospace}
table.plain tr:hover td{background:#0d1117}
code{background:#161b22;color:#e6edf3;padding:2px 6px;border-radius:4px;font-family:ui-monospace,Menlo,monospace;font-size:12px}
pre{background:#161b22;border:1px solid #171d27;border-radius:8px;padding:14px 16px;overflow-x:auto;font-family:ui-monospace,Menlo,monospace;font-size:12px;color:#c9d1d9;line-height:1.5;margin:8px 0}
.narr{color:#8b949e;font-size:13px;line-height:1.7;margin:10px 0;padding:10px 14px;border-left:2px solid #21262d}
.narr strong{color:#c9d1d9;font-weight:600}
.ref{color:#545d68;font-size:11px;margin-top:6px;font-family:ui-monospace,Menlo,monospace}
.ref strong{color:#79c0ff;font-weight:600}
.math{background:#0d1117;border:1px solid #171d27;border-radius:8px;padding:14px 16px;font-family:ui-monospace,Menlo,monospace;font-size:13px;color:#e6edf3;margin:8px 0}
.math .c{color:#8b949e}
.footer{border-top:1px solid #171d27;padding:20px;text-align:center;color:#3d444d;font-size:11px}
.loading{color:#484f58;font-style:italic;padding:20px 0;text-align:center}
.err{color:#f85149;font-size:12px;padding:10px}
@media(max-width:720px){
.wrap{padding:20px 12px 40px}
.chapter h2{font-size:20px}
.bar nav{display:none}
}
</style></head>
<body>
<div class="bar">
<h1>Lakehouse — Architecture &amp; Reproduction</h1>
<nav>
<a href=".">Dashboard</a>
<a href="console">Walkthrough</a>
<a href="proof" class="active">Architecture</a>
</nav>
<div class="rt" id="hdr-time">Running live tests…</div>
</div>
<div class="wrap">
<div class="chapter">
<div class="num">Chapter 1</div>
<h2>Receipts, not promises</h2>
<div class="lede">Every test below ran live against the real gateway when you loaded this page. Sub-100ms SQL on multi-million-row Parquet, hybrid search with playbook boost applied. No fixtures. If a test fails, you'll see ✗.</div>
<div id="ch1-tests"><div class="loading">Running tests…</div></div>
</div>
<div class="chapter">
<div class="num">Chapter 2</div>
<h2>Architecture — 13 crates, one object store, one local AI runtime</h2>
<div class="lede">Request flows top to bottom. Every node is independently swappable. Every line is a real HTTP or gRPC hop that you can trace with <code>tcpdump</code>.</div>
<div class="card accent-b">
<pre> HTTP :3100 + gRPC :3101
┌───────▼───────┐
│ gateway │ Rust · Axum · routing, CORS, auth, tools
└───────┬───────┘
┌────────────┬───────────┼───────────┬────────────┐
│ │ │ │ │
┌────▼───┐ ┌────▼───┐ ┌────▼───┐ ┌────▼───┐ ┌────▼───┐
│catalog │ │ query │ │ vector │ │ ingest │ │aibridge│
│ d │ │ d │ │ d │ │ d │ │ │
└────┬───┘ └────┬───┘ └────┬───┘ └────┬───┘ └────┬───┘
│ │ │ │ │
└────────────┴───────────┼───────────┴────────────┘
┌─────────────────┐
│ object storage │ Parquet files (local / S3)
└─────────────────┘
┌───────┴────────┐
│ Python sidecar │ FastAPI → Ollama
│ (aibridge) │ local models only
└────────────────┘</pre>
</div>
<h3>Per-crate responsibility</h3>
<table class="plain">
<thead><tr><th>Crate</th><th>Role</th><th>Path</th></tr></thead>
<tbody>
<tr><td>shared</td><td>Types, errors, Arrow helpers, PII detection, secrets provider</td><td>crates/shared/</td></tr>
<tr><td>storaged</td><td>object_store I/O, BucketRegistry (multi-bucket), AppendLog, ErrorJournal</td><td>crates/storaged/</td></tr>
<tr><td>catalogd</td><td>Metadata authority — manifests, views, tombstones, profiles, schema fingerprints</td><td>crates/catalogd/</td></tr>
<tr><td>queryd</td><td>DataFusion SQL engine, MemTable cache, delta merge-on-read, compaction</td><td>crates/queryd/</td></tr>
<tr><td>ingestd</td><td>CSV/JSON/PDF(+OCR)/Postgres/MySQL ingest, cron schedules, auto-PII</td><td>crates/ingestd/</td></tr>
<tr><td>vectord</td><td>Embeddings as Parquet, HNSW, trial system, autotune agent, playbook_memory</td><td>crates/vectord/</td></tr>
<tr><td>vectord-lance</td><td>Firewall crate — Lance 4.0 + Arrow 57 isolated from main Arrow 55</td><td>crates/vectord-lance/</td></tr>
<tr><td>journald</td><td>Append-only mutation event log for time-travel &amp; audit</td><td>crates/journald/</td></tr>
<tr><td>aibridge</td><td>Rust↔Python sidecar, Ollama HTTP client, VRAM introspection</td><td>crates/aibridge/</td></tr>
<tr><td>gateway</td><td>Axum HTTP :3100 + gRPC :3101, middleware, tools registry</td><td>crates/gateway/</td></tr>
<tr><td>ui</td><td>Dioxus WASM internal developer UI</td><td>crates/ui/</td></tr>
<tr><td>mcp-server</td><td>Bun TypeScript recruiter-facing app (this server)</td><td>mcp-server/</td></tr>
</tbody>
</table>
<div class="ref"><strong>Source:</strong> git.agentview.dev/profit/lakehouse &nbsp;·&nbsp; <strong>ADRs:</strong> docs/DECISIONS.md (currently 20 records)</div>
</div>
<div class="chapter">
<div class="num">Chapter 3</div>
<h2>Dual-agent recursive consensus loop</h2>
<div class="lede">The system we use to execute staffing fills is a dual-agent recursive protocol. Two agents with distinct roles iterate against a shared log until one of three terminal states is reached. It is deterministic in structure, stochastic in content, and verifiable through the per-run log artifact.</div>
<h3>Agents and protocol</h3>
<div class="card accent-a">
<pre> task in
┌───────────────────────────────────────────────────────────┐
│ EXECUTOR (mistral:latest) │
│ ──────────────────────────────────────────────────────── │
│ input: task spec + shared log + seen-candidates ledger │
│ output: one JSON action per turn │
│ · {kind:"plan",steps:[…]} │
│ · {kind:"tool_call",tool,args,rationale} │
│ · {kind:"propose_done",fills:[N of N]} │
└───────────┬───────────────────────────────┬───────────────┘
│ tool_call │ propose_done
▼ │
┌──────────────────────────┐ │
│ TOOL DISPATCH │ │
│ hybrid_search / sql │ │
│ (against live gateway) │ │
└──────────┬───────────────┘ │
│ result (trimmed, exclusions) │
▼ ▼
┌───────────────────────────────────────────────────────────┐
│ REVIEWER (qwen2.5:latest) │
│ ──────────────────────────────────────────────────────── │
│ input: task spec + shared log (including tool result) │
│ output: {kind:"critique",verdict:"continue|drift| │
│ approve_done",notes} │
└───────────┬───────────────────────────────────────────────┘
┌─────┴─────┐
▼ ▼ ▼
continue drift approve_done + propose_done ⟹ SEAL
(next turn) (cap ≈ 3 →
hard abort)
</pre>
</div>
<div class="ref"><strong>Code:</strong> tests/multi-agent/agent.ts (protocol + prompts) &nbsp;·&nbsp; tests/multi-agent/orchestrator.ts (run loop) &nbsp;·&nbsp; tests/multi-agent/scenario.ts (5-event warehouse week)</div>
<h3>Why "dual" — role specialization</h3>
<div class="narr">
<strong>The executor is an optimist.</strong> Its job is to produce progress: pull candidates, verify SQL, propose consensus. It's instructed to be decisive.
<br><br>
<strong>The reviewer is a pessimist.</strong> Its job is to catch drift: proposals that don't match the task's geography, fill count, or role. It's authorized to stop the loop.
<br><br>
This adversarial separation is cheaper and more deterministic than asking a single model to self-critique. The reviewer has a hard rule: on the turn after a <code>propose_done</code>, it MUST emit either <code>approve_done</code> or <code>drift</code> — it cannot stall with <code>continue</code>.
</div>
<h3>Why "parallel" — orchestrator can fan out</h3>
<div class="narr">
<strong>Independent pairs run concurrently.</strong> <code>tests/multi-agent/run_e2e_rated.ts</code> runs two task-specific agent pairs via <code>Promise.all</code>. Ollama serializes inference at the model level, so "parallel" is concurrent orchestration — but the substrate (gateway, queryd, vectord) handles concurrent requests cleanly. Verified in the scenario harness: two contracts sealing simultaneously.
</div>
<h3>Why "recursive" — each seal feeds the next</h3>
<div class="narr">
<strong>Consensus does not end at the sealed playbook.</strong> Every sealed playbook is persisted to <code>playbook_memory</code> via <code>POST /vectors/playbook_memory/seed</code>. The next hybrid search for a semantically similar operation consults that memory via <code>compute_boost_for(query_embedding, top_k, base_weight)</code> and re-ranks the candidate pool. The system builds on itself turn over turn, playbook over playbook.
</div>
<h3>Termination guarantees</h3>
<div class="math">
<span class="c">// three paths out, every run has one of these:</span><br>
sealed = executor.propose_done ∧ reviewer.approve_done ∧ fills.count == target<br>
abort = consecutive_tool_errors ≥ MAX_TOOL_ERRORS (3) &nbsp;&nbsp;<span class="c">// executor can't form a valid call</span><br>
abort = consecutive_drifts ≥ MAX_CONSECUTIVE_DRIFTS (3) &nbsp;<span class="c">// reviewer keeps flagging</span><br>
abort = turn &gt; MAX_TURNS (12) &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;<span class="c">// no consensus reached in window</span>
</div>
<div class="narr">Every abort dumps the full log to <code>tests/multi-agent/playbooks/&lt;id&gt;-FAILED.json</code> for forensic review. No consensus is ever implicit.</div>
</div>
<div class="chapter">
<div class="num">Chapter 4</div>
<h2>Playbook memory — the compounding feedback loop</h2>
<div class="lede">A CRM stores events. This system turns events into re-ranking signal. Every sealed playbook endorses specific (worker, city, state) tuples. Every failure penalizes them. Every similar future query inherits the signal through cosine similarity.</div>
<h3>Seed shape</h3>
<div class="math">
PlaybookEntry {<br>
&nbsp;&nbsp;playbook_id, <span class="c">// pb-seed-&lt;sha8&gt;</span><br>
&nbsp;&nbsp;operation, <span class="c">// "fill: Welder x2 in Toledo, OH"</span><br>
&nbsp;&nbsp;approach, context, <span class="c">// short canonical — long strings dilute embedding</span><br>
&nbsp;&nbsp;timestamp, <span class="c">// RFC3339</span><br>
&nbsp;&nbsp;endorsed_names[], <span class="c">// validated against workers_500k for city+state</span><br>
&nbsp;&nbsp;city, state, <span class="c">// parsed from operation</span><br>
&nbsp;&nbsp;embedding <span class="c">// 768-d nomic-embed-text of text shape</span><br>
}
</div>
<div class="ref"><strong>Code:</strong> crates/vectord/src/playbook_memory.rs (PlaybookEntry, FailureRecord, PlaybookMemoryState)</div>
<h3>Boost math (positive + decay + negative)</h3>
<div class="math">
<span class="c">// For each playbook pb among top-K most cosine-similar:</span><br>
<span class="c">// given query embedding qv, constant base_weight, n_workers = |pb.endorsed_names|</span><br>
<br>
similarity = cosine(qv, pb.embedding) &nbsp;&nbsp; <span class="c">// skip if ≤ 0.05</span><br>
age_days = (now - pb.timestamp) / 86_400 seconds<br>
decay = e<sup>-age_days / 30</sup> &nbsp;&nbsp;<span class="c">// half-life = 30 days</span><br>
<br>
<span class="c">// For each endorsed worker in pb:</span><br>
key = (pb.city, pb.state, name)<br>
fail_count = failures[key] &nbsp;&nbsp;<span class="c">// # times this worker was marked no-show for same geo</span><br>
penalty = 0.5<sup>min(fail_count, 20)</sup><br>
<br>
per_worker = similarity × base_weight × decay × penalty / n_workers<br>
boost[key] = min(boost[key] + per_worker, MAX_BOOST_PER_WORKER)<br>
<br>
<span class="c">// MAX_BOOST_PER_WORKER = 0.25 — cap stops one popular worker from always winning</span>
</div>
<div class="ref"><strong>Code:</strong> crates/vectord/src/playbook_memory.rs::compute_boost_for &nbsp;·&nbsp; constants: MAX_BOOST_PER_WORKER, DEFAULT_TOP_K_PLAYBOOKS, BOOST_HALF_LIFE_DAYS</div>
<h3>Application at query time</h3>
<div class="math">
<span class="c">// In /vectors/hybrid handler (crates/vectord/src/service.rs):</span><br>
1. SQL filter narrows workers_500k to geo/role/availability<br>
2. Vector index returns top_k × 5 candidates by cosine to question<br>
3. compute_boost_for(qv, k=200) returns boost map<br>
4. For each candidate: parse (name, city, state) from chunk, look up boost, add to score<br>
5. Re-sort sources by boosted score<br>
6. Truncate to requested top_k, return with playbook_boost and playbook_citations
</div>
<div class="narr">
<strong>Why k=200.</strong> Direct measurement showed cosine similarity clusters in the 0.55-0.67 band across all playbooks regardless of geo (nomic-embed-text has narrow discrimination on this kind of structured operation text). A k of 25 silently missed geo-matched playbooks. k=200 is the measured floor for reliably catching compounding. Brute-force over 200 × 768-d is sub-ms even on this hardware.
</div>
<div class="ref"><strong>Evidence:</strong> Chicago Electrician compounding test 2026-04-20 — Carmen Green, Anna Patel, Fatima Wilson went from rank &gt;5 / boost 0 / 0 citations (run 0, no seed) to rank 1/2/3 / boost +0.250 (capped) / 3 citations each (run 3, after 3 identical seeds). Each seed increments citations; total boost caps at 0.25/worker.</div>
<h3>Write-through to SQL</h3>
<div class="narr">
<strong>successful_playbooks_live</strong> is a DataFusion-queryable Parquet surface maintained by <code>POST /vectors/playbook_memory/persist_sql</code>. Every <code>/log</code> from the recruiter UI triggers seed → persist_sql. The in-memory store and the SQL surface stay synchronized (full snapshot on each persist, safe because memory is source of truth).
</div>
<div class="ref"><strong>Code:</strong> crates/vectord/src/playbook_memory.rs::persist_to_sql &nbsp;·&nbsp; catalog-registered under "successful_playbooks_live"</div>
<h3>Pattern discovery (Path 2 — meta-index)</h3>
<div class="narr">
<strong>Beyond "who was endorsed."</strong> <code>POST /vectors/playbook_memory/patterns</code> takes a query, finds top-K similar past playbooks, pulls each endorsed worker's full workers_500k profile, and aggregates shared traits: recurring certifications, skill frequencies, modal archetype, reliability distribution. Returns a <code>discovered_pattern</code> string showing operator-actionable signal the user didn't explicitly query for.
</div>
<div class="ref"><strong>Code:</strong> crates/vectord/src/playbook_memory.rs::discover_patterns &nbsp;·&nbsp; <strong>Surfaces:</strong> /vectors/playbook_memory/patterns endpoint, /intelligence/chat response, /intelligence/permit_contracts cards</div>
</div>
<div class="chapter">
<div class="num">Chapter 5</div>
<h2>Key architectural choices — what was picked and why</h2>
<div class="lede">Each choice is documented in <code>docs/DECISIONS.md</code> (Architecture Decision Records). If you dispute any of these, the ADR names the alternatives we rejected and the measurement that drove the call.</div>
<div class="card">
<div class="row accent-b">
<div style="flex:1"><div class="title">ADR-001 · Object storage as source of truth</div><div class="meta">No traditional database. All data is Parquet on S3-compatible object storage. Eliminates DB operational overhead; every engine can read Parquet.</div></div>
</div>
<div class="row accent-b">
<div style="flex:1"><div class="title">ADR-008 · Embeddings stored as Parquet, not a vector DB</div><div class="meta">Keeps all data in one portable format. No Pinecone/Weaviate/Qdrant lock-in. Trade-off: brute-force search up to ~100K; HNSW beyond.</div></div>
</div>
<div class="row accent-l">
<div style="flex:1"><div class="title">ADR-012 · Append-only event journal — never destroy evidence</div><div class="meta">Every mutation is appended. Compliance, audit, AI-decision forensics. Impossible to retrofit; easy to add now.</div></div>
</div>
<div class="row accent-a">
<div style="flex:1"><div class="title">ADR-015 · Tool registry before raw SQL for agents</div><div class="meta">Named, governed, audited actions for agents. Permission checks, rate limits, parameter validation. MCP-compatible.</div></div>
</div>
<div class="row accent-w">
<div style="flex:1"><div class="title">ADR-019 · Hybrid Parquet+HNSW ⊕ Lance vector backend</div><div class="meta">Parquet+HNSW primary (2.55× faster search at 100K). Lance secondary for index-build speed (14× faster), random fetch (112× faster), append (structural). Per-profile <code>vector_backend: Parquet | Lance</code>.</div></div>
</div>
<div class="row accent-r">
<div style="flex:1"><div class="title">ADR-020 · Idempotent register() with schema-fingerprint gate</div><div class="meta">Same (name, fingerprint) reuses manifest. Different fingerprint = 409 Conflict. Prevents silent duplicate manifests. Cleanup run collapsed 374 → 31 datasets.</div></div>
</div>
<div class="row accent-l">
<div style="flex:1"><div class="title">Phase 19 design note · Statistical + semantic, not neural</div><div class="meta">Meta-index is cosine similarity + endorsement aggregation. No model training. Rebuildable from <code>successful_playbooks</code> alone. Neural re-ranker deferred to Phase 20+ only if statistical floor plateaus.</div></div>
</div>
</div>
</div>
<div class="chapter">
<div class="num">Chapter 6</div>
<h2>Measured at scale, on this machine</h2>
<div class="lede">Hardware: i9 + 128GB RAM + Nvidia A4000 16GB VRAM. Numbers below are from <em>this</em> running instance. Refresh the page and they'll recompute.</div>
<div class="grid" id="ch6-scale"><div class="loading">Loading scale data…</div></div>
<div id="ch6-recall" style="margin-top:10px"></div>
</div>
<div class="chapter">
<div class="num">Chapter 7</div>
<h2>Verify or dispute — reproduce it yourself</h2>
<div class="lede">Every claim below is a curl away from falsification.</div>
<div class="card">
<div class="narr"><strong>Health.</strong> Should return <code>lakehouse ok</code>.</div>
<pre>curl http://localhost:3100/health</pre>
<div class="narr"><strong>Any SQL on multi-million-row Parquet.</strong> Sub-100ms typical.</div>
<pre>curl -s -X POST http://localhost:3100/query/sql \
-H 'Content-Type: application/json' \
-d '{"sql":"SELECT role, COUNT(*) FROM workers_500k WHERE state=\"IL\" GROUP BY role LIMIT 5"}'</pre>
<div class="narr"><strong>Hybrid search with playbook boost.</strong> The whole Phase 19 feedback loop in one request.</div>
<pre>curl -s -X POST http://localhost:3100/vectors/hybrid \
-H 'Content-Type: application/json' \
-d '{"index_name":"workers_500k_v1",
"sql_filter":"role = '\''Forklift Operator'\'' AND city = '\''Chicago'\'' AND CAST(availability AS DOUBLE) > 0.5",
"question":"reliable forklift operator",
"top_k":5,"use_playbook_memory":true,"playbook_memory_k":200}'</pre>
<div class="narr"><strong>Playbook memory stats.</strong> Count + endorsed names + sample.</div>
<pre>curl http://localhost:3100/vectors/playbook_memory/stats</pre>
<div class="narr"><strong>Pattern discovery.</strong> What do past similar fills have in common?</div>
<pre>curl -s -X POST http://localhost:3100/vectors/playbook_memory/patterns \
-H 'Content-Type: application/json' \
-d '{"query":"Forklift Operator in Chicago, IL","top_k_playbooks":25,"min_trait_frequency":0.3}'</pre>
<div class="narr"><strong>Run the dual-agent scenario yourself.</strong> All 5 events, real fills, real artifacts.</div>
<pre>cd /home/profit/lakehouse
bun run tests/multi-agent/scenario.ts
# Output: tests/multi-agent/playbooks/scenario-&lt;timestamp&gt;/report.md</pre>
</div>
</div>
<div class="chapter">
<div class="num">Chapter 8</div>
<h2>What we are <em>not</em> claiming</h2>
<div class="lede">Every impressive-sounding number comes with a footnote. Here are the honest limits.</div>
<div class="card">
<div class="row accent-a"><div style="flex:1"><div class="title">workers_500k is synthetic.</div><div class="meta">Real client ATS export replaces this table. Schema is deliberately identical to a production ATS.</div></div></div>
<div class="row accent-a"><div style="flex:1"><div class="title">candidates table has 1,000 rows.</div><div class="meta">Intentionally small for demo. call_log references higher candidate_ids that don't cross-reference — this is a dataset alignment issue, not a pipeline issue.</div></div></div>
<div class="row accent-b"><div style="flex:1"><div class="title">Chicago permit data is real.</div><div class="meta">Pulled live from data.cityofchicago.org/resource/ydr8-5enu.json (Socrata API). Not synthetic. Not cached.</div></div></div>
<div class="row accent-l"><div style="flex:1"><div class="title">Playbook memory is seeded from demo runs.</div><div class="meta">The pipeline that seeds it is identical to what a live recruiter would trigger via /log. Same code path.</div></div></div>
<div class="row accent-w"><div style="flex:1"><div class="title">Local 7B models (mistral, qwen2.5) are imperfect.</div><div class="meta">They occasionally malform tool calls or drop fields. Multi-agent scenarios seal roughly 40-80% in one run. Larger models or constrained decoding would improve this. Not a substrate problem.</div></div></div>
<div class="row accent-r"><div style="flex:1"><div class="title">No rate/margin awareness yet.</div><div class="meta">Worker pay expectations vs contract bill rates are not modeled. Flagged as a Phase 20 item; no architectural blocker.</div></div></div>
</div>
</div>
</div>
<div class="footer">Lakehouse · Architecture page regenerates tests on every load · <a href="console">walkthrough</a> · <a href=".">dashboard</a></div>
<script>
var P=location.pathname.indexOf('/lakehouse')>=0?'/lakehouse':'';
var A=location.origin+P;
function el(tag, cls, text){
var e=document.createElement(tag);
if(cls) e.className=cls;
if(text!==undefined && text!==null) e.textContent=String(text);
return e;
}
function apiPost(path, body){
return fetch(A+path,{method:'POST',headers:{'Content-Type':'application/json'},body:JSON.stringify(body||{})}).then(function(r){return r.json()});
}
window.addEventListener('load',function(){
loadLiveSections();
});
function loadLiveSections(){
apiPost('/proof.json',{}).then(function(r){
var host1=document.getElementById('ch1-tests');host1.textContent='';
var tests=r.tests||[];
tests.forEach(function(t){
var row=el('div','row '+(t.pass?'pass':'fail'));
var left=document.createElement('div');left.style.flex='1';left.style.minWidth='0';
var title=el('div','title',(t.pass?'✓ ':'✗ ')+(t.name||'(unnamed)'));
var resultStr='';
if(typeof t.result==='object' && t.result) resultStr=JSON.stringify(t.result);
else if(t.result!==undefined && t.result!==null) resultStr=String(t.result);
var meta=el('div','meta',resultStr);
left.appendChild(title);left.appendChild(meta);
row.appendChild(left);
row.appendChild(el('div','val',(t.ms||0)+' ms'));
host1.appendChild(row);
});
var host6=document.getElementById('ch6-scale');host6.textContent='';
var scale=r.scale||{};
addStat(host6,(scale.total_rows||0).toLocaleString(),'Rows under management','across '+(scale.datasets||0)+' datasets','accent-b');
addStat(host6,(scale.total_chunks||0).toLocaleString(),'Vector chunks indexed','across '+(scale.indexes||0)+' HNSW + Lance indexes','accent-a');
var gpu=r.gpu||{};
var vramStr=(gpu.vram_used_mib!==undefined?gpu.vram_used_mib+' / '+gpu.vram_total_mib+' MiB':'—');
addStat(host6,vramStr,'GPU VRAM',(gpu.name||'A4000'),'accent-w');
if(r.recall){
var reco=r.recall||{};
var hnswStr=(reco.hnsw!==undefined?(reco.hnsw*100).toFixed(0)+'%':'—');
addStat(host6,hnswStr,'HNSW recall','measured · '+(reco.note||''),'accent-l');
}
var host6r=document.getElementById('ch6-recall');host6r.textContent='';
if(r.lance_10m){
var narr=el('div','narr');
narr.appendChild(el('strong',null,'Scale headroom: '));
narr.appendChild(document.createTextNode(
'Lance backend tested at '+r.lance_10m.vectors.toLocaleString()+' vectors, '
+r.lance_10m.disk_gb+' GB on disk, '+r.lance_10m.search_p50_ms+'ms p50. '
+(r.lance_10m.note||'')
));
host6r.appendChild(narr);
}
document.getElementById('hdr-time').textContent='Generated · '+(r.generated?new Date(r.generated).toLocaleTimeString():'live');
}).catch(function(e){
var host1=document.getElementById('ch1-tests');host1.textContent='';
host1.appendChild(el('div','err','Live tests failed: '+(e.message||e)+'. Server may be offline.'));
});
}
function addStat(host,n,l,sub,cls){
var d=el('div','card stat-lg '+(cls||''));
d.appendChild(el('div','n',n));
d.appendChild(el('div','l',l));
d.appendChild(el('div','sub',sub||''));
host.appendChild(d);
}
</script>
</body></html>