diff --git a/mcp-server/index.ts b/mcp-server/index.ts index 8945ba4..65a73dc 100644 --- a/mcp-server/index.ts +++ b/mcp-server/index.ts @@ -631,510 +631,15 @@ async function main() { return new Response(await r.text(), { status: r.status, headers: { "Content-Type": "application/json" } }); } - // Proof page — styled HTML with live tests + // Proof — narrative HTML served from mcp-server/proof.html. + // Live tests consumed client-side via /proof.json. if (url.pathname === "/proof") { - const ds = await api("GET", "/catalog/datasets") as any[]; - const indexes = await api("GET", "/vectors/indexes") as any[]; - const vram = await api("GET", "/ai/vram"); - const totalRows = (ds || []).reduce((s: number, d: any) => s + (d.row_count || 0), 0); - const totalChunks = (indexes || []).reduce((s: number, i: any) => s + i.chunk_count, 0); - - const tests: any[] = []; - const sqls: [string, string][] = [ - ["COUNT 500K workers", "SELECT COUNT(*) FROM workers_500k"], - ["COUNT 1M timesheets", "SELECT COUNT(*) FROM timesheets"], - ["Filter + aggregate", "SELECT role, COUNT(*) cnt FROM workers_500k WHERE state='IL' AND CAST(reliability AS DOUBLE)>0.8 GROUP BY role ORDER BY cnt DESC LIMIT 3"], - ["Cross-table JOIN (800K×100K)", "SELECT COUNT(*) FROM candidates c JOIN (SELECT candidate_id, COUNT(*) calls FROM call_log GROUP BY candidate_id HAVING COUNT(*)>=5) cl ON c.candidate_id=cl.candidate_id WHERE c.city='Chicago'"], - ]; - for (const [name, sql] of sqls) { - const t0 = Date.now(); - const r = await api("POST", "/query/sql", { sql }); - tests.push({ name, ms: Date.now() - t0, result: r.rows?.[0], pass: !r.error }); - } - const ht0 = Date.now(); - const hybrid = await api("POST", "/vectors/hybrid", { - question: "reliable forklift operator", index_name: "workers_500k_v1", - sql_filter: "role = 'Forklift Operator' AND state = 'IL' AND CAST(reliability AS DOUBLE) > 0.8", - filter_dataset: "workers_500k", id_column: "worker_id", top_k: 5, generate: false, - use_playbook_memory: true, + return new Response(Bun.file(import.meta.dir + "/proof.html"), { + headers: { ...cors, "Content-Type": "text/html" }, }); - tests.push({ - name: "Hybrid SQL+Vector Search", ms: Date.now() - ht0, - result: { sql_matches: hybrid.sql_matches, verified_results: hybrid.vector_reranked }, - pass: (hybrid.vector_reranked || 0) > 0, - sources: hybrid.sources?.slice(0, 5), - }); - - // Run LIVE CRM vs AI comparisons — these actually execute on page load - const demos: any[] = []; - const demoQueries = [ - { query: "warehouse help", desc: "A staffer types what they need in plain English" }, - { query: "someone good with machines who is dependable", desc: "Natural language — no field names, no filters" }, - { query: "safety trained worker for chemical plant", desc: "The CRM doesn't know 'safety trained' = OSHA + Hazmat" }, - ]; - for (const dq of demoQueries) { - // CRM attempt: exact LIKE match - const crmResult = await api("POST", "/query/sql", { - sql: `SELECT COUNT(*) cnt FROM workers_500k WHERE resume_text LIKE '%${dq.query}%'` - }); - const crmCount = crmResult?.rows?.[0]?.cnt ?? 0; - - // AI attempt: vector search understands meaning - const aiResult = await api("POST", "/vectors/hnsw/search", { - index_name: "workers_500k_v1", - query: dq.query, - top_k: 3, - }); - const aiHits = aiResult?.results || []; - - demos.push({ ...dq, crmCount, aiHits }); - } - - const g = vram?.gpu || {}; - const ts = new Date().toLocaleString(); - const testRows = tests.map((t: any) => { - const icon = t.pass ? "✓" : "✗"; - const cls = t.pass ? "pass" : "fail"; - const val = typeof t.result === "object" ? JSON.stringify(t.result) : t.result; - return `${icon}${t.name}${t.ms}ms${val}`; - }).join(""); - - const workerRows = (hybrid.sources || []).map((s: any) => { - const parts = s.chunk_text?.split("—") || ["", ""]; - const name = parts[0]?.trim(); - const rest = parts[1]?.trim() || ""; - return `${s.doc_id}${name}${rest.slice(0, 120)}${s.score?.toFixed(3)}✓`; - }).join(""); - - const html = ` -Lakehouse — Proof of Work - -
-

Your Morning Just Got Easier

-
- This isn't another CRM to learn. It's your contracts, your workers, your data —
- already matched before you sit down. -
-
- -
-
-

We know what your day looks like

-
-
-
RIGHT NOW — without this
-
- ☐ Open the CRM. Search "forklift" + "Chicago" + "OSHA."
- ☐ Get 200 results. Scroll through. Half are inactive.
- ☐ Cross-reference certifications in a different tab.
- ☐ Check availability in a spreadsheet.
- ☐ Check reliability from memory or ask a coworker.
- ☐ Copy names into a message. Personalize each one.
- ☐ Repeat for the next contract. And the next.
- 45 minutes before you make your first call. -
-
-
-
WITH THIS — same morning
-
- ✓ Open the page. Your contracts are listed by urgency.
- ✓ Workers already matched — name, skills, certs, scores.
- ✓ Only workers who are available, certified, and reliable.
- ✓ Ranked by who's the best fit, not just who comes first.
- ✓ Emergency fills flagged at the top.
- ✓ One click away from outreach.
-
- You're on the phone in 5 minutes. -
-
-
-
- This isn't about replacing what you know. It's about not making you dig for it every single time. - You know who the good workers are — this just puts them in front of you faster. -
-
- -
-

Here's what it actually did — just now, when you loaded this page:

-
-
-
${hybrid.sql_matches?.toLocaleString()}
-
Forklift operators in IL with 80%+ reliability
-
Found in ${tests[tests.length-1]?.ms}ms — you'd still be typing the search
-
-
-
${hybrid.vector_reranked}
-
Best matches ranked by AI — not alphabetical, not random
-
The system read their skills and picked the best fit for you
-
-
-
-
Every name verified against the actual database
-
Not guessing, not making up people. These workers are real.
-
-
- -
-
Your top matches right now — ready for outreach:
- - ${workerRows}
NameDetailsFit ScoreVerified
-
-
- -
-
What's different from your CRM:
-
-
-
It understands what you mean
-
Search "warehouse help" and it finds Forklift Operators, Loaders, Shipping Clerks — because it understands those ARE warehouse jobs. Your CRM would find nothing.
-
-
-
It already filtered the junk
-
Inactive workers, expired certs, low reliability — already removed. You only see people you'd actually want to call. Not 200 results where 150 are useless.
-
-
-
It runs on YOUR machine
-
No cloud. No per-search fee. No sending your worker data to someone else's server. Everything runs right here, right now, on hardware you control.
-
-
-
- -
- — Technical details below for the team that wants to see the numbers — -
- -
-
${totalRows.toLocaleString()}
Total Records
-
${totalChunks.toLocaleString()}
AI-Indexed Chunks
-
${indexes?.length || 0}
Search Indexes
-
10M
Max Tested Scale
-
- -
-

01 What a CRM Does — keyword match on ${totalRows.toLocaleString()} rows

-

Standard SQL filters. Fast, but only finds EXACT matches. Every CRM does this.

- - ${testRows}
QuerySpeedResult
-

Limitation: search for "warehouse work" finds nothing — no worker has that exact text in their profile.

-
- -
-

See the difference — live, right now

-

- These searches just ran on ${totalRows.toLocaleString()} real worker profiles when you loaded this page. - Left: what your CRM finds. Right: what AI finds. Same search, same data. -

- - ${demos.map((d: any, i: number) => { - const aiNames = d.aiHits.map((h: any) => { - const name = h.chunk_text?.split("—")[0]?.trim() || h.doc_id; - const role = h.chunk_text?.match(/— (.+?) in/)?.[1] || ""; - const city = h.chunk_text?.match(/in (.+?)\./)?.[1] || ""; - return { name, role, city, score: h.score }; - }); - - return ` -
-
${d.desc}
-
- "${d.query}" -
-
-
-
Your CRM (keyword match)
-
${d.crmCount}
-
results — scanned every profile for the exact phrase
-
-
-
AI Vector Search (understands meaning)
-
${d.aiHits.length}
-
matches — found workers whose skills MEAN the same thing
- ${aiNames.map((w: any) => ` -
- ${w.name} - — ${w.role}${w.city ? ` in ${w.city}` : ""} -
- `).join("")} -
-
-
`; - }).join("")} -
- -
-

Now combine both: SQL precision + AI understanding

-

- The hybrid search runs a SQL filter (role, state, reliability) AND vector ranking together. - You get exact structural matches ranked by who's the best semantic fit — in one call. -

-
- ${hybrid.sql_matches?.toLocaleString()} workers match your filters - → AI ranked the top ${hybrid.vector_reranked} - ${tests[tests.length-1]?.ms}ms -
- - ${workerRows}
IDNameProfileAI ScoreVerified
-

Every result verified against the actual database. The AI cannot hallucinate workers that don't exist.

-
- -
-

03 Why This Matters — the numbers a CRM can't show you

-
-
-
${totalChunks.toLocaleString()}
-
Text Chunks Vectorized
-
Every worker's skills, certs, and history converted into searchable AI vectors by a LOCAL model. No cloud API. No per-query cost. Your data never leaves this server.
-
-
-
0.98
-
Search Accuracy
-
98% recall — meaning 98 out of 100 truly relevant workers are found. Measured against brute-force ground truth on real embedded profiles.
-
-
-
10M
-
Vectors at 5ms
-
Tested at 10 million vectors on disk. Search still takes 5ms. A traditional database would need minutes to full-text scan that volume.
-
-
-
- -
-

04 Local AI — your data, your models, your GPU

-

${g.name || "NVIDIA RTX A4000"} — ${g.used_mib || 0} / ${g.total_mib || 16376} MiB

-
-
-
-
qwen3
-
8.2B · Reasoning
-
-
-
qwen2.5
-
7B · Fast SQL
-
-
-
mistral
-
7B · Generation
-
-
-
nomic
-
137M · Embeddings
-
-
-

Hot-swappable profiles. Switch between models in seconds. Each model specializes in what it's best at. No API keys, no usage fees, no data leaving the building.

-
- -
- Every number on this page runs LIVE. Hit refresh — the queries execute again on ${totalRows.toLocaleString()} real rows. - The AI vectors were generated by a local model running on the GPU above. No cloud APIs were used. - This is not a demo — this is the production system with real staffing data. -
- -
-

How This Actually Works

-

The technical architecture behind what you just saw — why it's different from a database, why your data never leaves this building, and how it handles millions of records.

- -
-
-
Traditional CRM / Database
-
- Stores records in rows and columns.
- Search = exact text matching ("forklift" finds "forklift").
- Can't understand that "warehouse help" = forklift operator.
- Slows down as data grows — millions of rows = slow queries.
- Every search is the same — doesn't learn or improve.
- Data lives on someone else's cloud server. -
-
-
-
This System (Lakehouse)
-
- AI reads every profile and understands the meaning.
- Search = semantic understanding ("warehouse help" → finds loaders, forklift ops, shipping clerks).
- Combines exact filters + AI ranking in one call.
- Tested at 10 million records at 5ms search — gets faster, not slower.
- Learns from successful placements — builds playbooks over time.
- Runs entirely on hardware you own. Nothing leaves this server. -
-
-
- -
-

Your Data Never Leaves This Building

-
-
-
Local AI Models
-
Four AI models run directly on your GPU — no OpenAI, no Google, no cloud API. Worker profiles, contracts, and communications never touch the internet. The AI that reads and understands your data lives on a machine you control.
-
-
-
Local Storage
-
All data stored on S3-compatible object storage running on this server. Encrypted at rest. No third-party databases, no cloud subscriptions. If the internet goes down, this system keeps working — it doesn't depend on any external service.
-
-
-
Your Hardware
-
${g.name || "NVIDIA RTX A4000"} GPU with ${g.total_mib || 16376} MB memory. 128 GB system RAM. All AI processing happens here. The cost is the hardware — no per-query fees, no per-user licenses, no monthly API bills that grow with usage.
-
-
-
- -
-

How It Handles Scale

-
- The system uses two search engines that work together — each handles what the other can't: -
-
-
-
HNSW (In-Memory)
-
Keeps frequently-used worker profiles in RAM for instant search. Under 1 millisecond response. Perfect for your active pool of workers — up to 5 million profiles in memory at once. 98% search accuracy.
-
-
-
Lance (On-Disk)
-
For massive archives — 10 million+ records stored on disk. 5ms search speed. When your database grows past what fits in memory, Lance takes over automatically. No performance cliff. 94% search accuracy. New data appends in milliseconds without rebuilding the index.
-
-
-
The system automatically uses the right engine for each query. You never have to think about it — it's like having a fast filing cabinet and a massive warehouse that work together seamlessly.
-
- -
-

Hot-Swap Profiles — Different AI for Different Jobs

-
- The system runs multiple AI models and switches between them in seconds depending on the task. Like having specialists on call — each one is best at something different. -
-
-
-
Qwen 3
-
Reasoning & analysis. Understands complex requests. 40,000 word context.
-
-
-
Qwen 2.5
-
Fast structured queries. Generates database searches from plain English.
-
-
-
Mistral
-
Writing & communication. Drafts personalized outreach messages.
-
-
-
Nomic
-
Reads profiles & understands meaning. Powers the semantic search.
-
-
-
When you switch tasks — from finding workers to drafting messages to analyzing trends — the system loads the right AI model automatically. Only one model uses the GPU at a time, so there's no performance penalty.
-
- -
-

Starting From Scratch — No Data Required

-
- You don't need rich profiles to start. The system works with whatever you have — even just a name and a phone number. Here's what happens as you use it: -
-
-
-
1
-
-
Day 1 — Import what you have
-
Upload a spreadsheet with names, phone numbers, and roles. That's enough. The system organizes them by role and location so you can find who you need faster than scrolling a list. No scores, no metrics — just organized contacts.
-
-
-
-
2
-
-
Week 1 — You work, it watches
-
Every placement you make, every timesheet that comes in, every call you log — the system records it. Not extra data entry — you're already doing this work. The system just starts keeping track. After a week, it knows which workers showed up on time and which didn't.
-
-
-
-
3
-
-
Month 1 — The AI starts helping
-
Enough data has accumulated that reliability scores become meaningful. "Based on 8 placements, this worker has 95% reliability." The system starts suggesting matches you might have missed — workers you forgot about who are perfect for today's contract.
-
-
-
-
-
-
The data you saw in the demo above?
-
That's what the system looks like after it's been running. Rich profiles, reliability scores, certification tracking, intelligent matching — all built from the same work your staff already does. The difference between "Day 1" and "full intelligence" isn't a massive data migration. It's just time and normal operations.
-
-
-
-
- -
-

What the System Remembers (and Why It Matters)

-
- Every successful operation becomes a playbook entry — a record of what worked. When a similar situation comes up, the system doesn't start from scratch. It checks: "Last time we needed welders in Ohio, here's who we placed and how it went." -
-
- This is the fundamental difference from a CRM. A CRM stores data. This system stores decisions and outcomes. Over time, it becomes an institutional memory that doesn't retire, doesn't forget, and doesn't depend on one person knowing everything. Your senior staff's expertise becomes embedded in the system — not replacing them, but making sure what they know is available even when they're not in the room. -
-
- -
-

Measured, Not Promised

- - - - - - - - - - - - -
CapabilityMeasuredWhat It Means
Search 500K workers341ms avgResults before you finish typing
SQL query on 3M rowssub-100msAny analytical question answered instantly
10M vector search5msScale to 10 million profiles, still fast
Search accuracy (HNSW)98%Finds 98 of 100 truly relevant workers
Search accuracy (Lance)94%At 10M+ scale, still highly accurate
Filter accuracy100%State, role, reliability filters are SQL-verified — never wrong
Concurrent users10+ simultaneousTested with 10 parallel queries in 82ms total
Cloud dependencyZeroWorks offline. No internet required after setup.
-
-
-
- -`; - - return new Response(html, { headers: { ...cors, "Content-Type": "text/html" } }); } + // Proof JSON API (same data, no HTML) if (url.pathname === "/proof.json") { const ds = await api("GET", "/catalog/datasets") as any[]; diff --git a/mcp-server/proof.html b/mcp-server/proof.html new file mode 100644 index 0000000..c340d14 --- /dev/null +++ b/mcp-server/proof.html @@ -0,0 +1,453 @@ + + + +Lakehouse — Architecture & Reproduction + + + +
+

Lakehouse — Architecture & Reproduction

+ +
Running live tests…
+
+ +
+ +
+
Chapter 1
+

Receipts, not promises

+
Every test below ran live against the real gateway when you loaded this page. Sub-100ms SQL on multi-million-row Parquet, hybrid search with playbook boost applied. No fixtures. If a test fails, you'll see ✗.
+
Running tests…
+
+ +
+
Chapter 2
+

Architecture — 13 crates, one object store, one local AI runtime

+
Request flows top to bottom. Every node is independently swappable. Every line is a real HTTP or gRPC hop that you can trace with tcpdump.
+
+
                            HTTP :3100  +  gRPC :3101
+                                    │
+                            ┌───────▼───────┐
+                            │   gateway     │   Rust · Axum · routing, CORS, auth, tools
+                            └───────┬───────┘
+           ┌────────────┬───────────┼───────────┬────────────┐
+           │            │           │           │            │
+      ┌────▼───┐   ┌────▼───┐  ┌────▼───┐  ┌────▼───┐   ┌────▼───┐
+      │catalog │   │ query  │  │ vector │  │ ingest │   │aibridge│
+      │   d    │   │   d    │  │   d    │  │   d    │   │        │
+      └────┬───┘   └────┬───┘  └────┬───┘  └────┬───┘   └────┬───┘
+           │            │           │           │            │
+           └────────────┴───────────┼───────────┴────────────┘
+                                    ▼
+                          ┌─────────────────┐
+                          │ object storage  │   Parquet files (local / S3)
+                          └─────────────────┘
+                                    ▲
+                                    │
+                            ┌───────┴────────┐
+                            │ Python sidecar │   FastAPI → Ollama
+                            │   (aibridge)   │   local models only
+                            └────────────────┘
+
+

Per-crate responsibility

+ + + + + + + + + + + + + + + + +
CrateRolePath
sharedTypes, errors, Arrow helpers, PII detection, secrets providercrates/shared/
storagedobject_store I/O, BucketRegistry (multi-bucket), AppendLog, ErrorJournalcrates/storaged/
catalogdMetadata authority — manifests, views, tombstones, profiles, schema fingerprintscrates/catalogd/
querydDataFusion SQL engine, MemTable cache, delta merge-on-read, compactioncrates/queryd/
ingestdCSV/JSON/PDF(+OCR)/Postgres/MySQL ingest, cron schedules, auto-PIIcrates/ingestd/
vectordEmbeddings as Parquet, HNSW, trial system, autotune agent, playbook_memorycrates/vectord/
vectord-lanceFirewall crate — Lance 4.0 + Arrow 57 isolated from main Arrow 55crates/vectord-lance/
journaldAppend-only mutation event log for time-travel & auditcrates/journald/
aibridgeRust↔Python sidecar, Ollama HTTP client, VRAM introspectioncrates/aibridge/
gatewayAxum HTTP :3100 + gRPC :3101, middleware, tools registrycrates/gateway/
uiDioxus WASM internal developer UIcrates/ui/
mcp-serverBun TypeScript recruiter-facing app (this server)mcp-server/
+
Source: git.agentview.dev/profit/lakehouse  ·  ADRs: docs/DECISIONS.md (currently 20 records)
+
+ +
+
Chapter 3
+

Dual-agent recursive consensus loop

+
The system we use to execute staffing fills is a dual-agent recursive protocol. Two agents with distinct roles iterate against a shared log until one of three terminal states is reached. It is deterministic in structure, stochastic in content, and verifiable through the per-run log artifact.
+

Agents and protocol

+
+
  task in
+    │
+    ▼
+  ┌───────────────────────────────────────────────────────────┐
+  │  EXECUTOR (mistral:latest)                                │
+  │  ──────────────────────────────────────────────────────── │
+  │  input:   task spec + shared log + seen-candidates ledger │
+  │  output:  one JSON action per turn                        │
+  │             · {kind:"plan",steps:[…]}                     │
+  │             · {kind:"tool_call",tool,args,rationale}      │
+  │             · {kind:"propose_done",fills:[N of N]}        │
+  └───────────┬───────────────────────────────┬───────────────┘
+              │ tool_call                     │ propose_done
+              ▼                               │
+  ┌──────────────────────────┐                │
+  │  TOOL DISPATCH           │                │
+  │  hybrid_search / sql     │                │
+  │  (against live gateway)  │                │
+  └──────────┬───────────────┘                │
+             │ result (trimmed, exclusions)   │
+             ▼                                ▼
+  ┌───────────────────────────────────────────────────────────┐
+  │  REVIEWER (qwen2.5:latest)                                │
+  │  ──────────────────────────────────────────────────────── │
+  │  input:   task spec + shared log (including tool result)  │
+  │  output:  {kind:"critique",verdict:"continue|drift|       │
+  │                                    approve_done",notes}   │
+  └───────────┬───────────────────────────────────────────────┘
+              │
+        ┌─────┴─────┐
+        ▼           ▼           ▼
+    continue     drift       approve_done + propose_done ⟹ SEAL
+    (next turn)  (cap ≈ 3 →
+                  hard abort)
+    
+
+
Code: tests/multi-agent/agent.ts (protocol + prompts)  ·  tests/multi-agent/orchestrator.ts (run loop)  ·  tests/multi-agent/scenario.ts (5-event warehouse week)
+ +

Why "dual" — role specialization

+
+ The executor is an optimist. Its job is to produce progress: pull candidates, verify SQL, propose consensus. It's instructed to be decisive. +

+ The reviewer is a pessimist. Its job is to catch drift: proposals that don't match the task's geography, fill count, or role. It's authorized to stop the loop. +

+ This adversarial separation is cheaper and more deterministic than asking a single model to self-critique. The reviewer has a hard rule: on the turn after a propose_done, it MUST emit either approve_done or drift — it cannot stall with continue. +
+ +

Why "parallel" — orchestrator can fan out

+
+ Independent pairs run concurrently. tests/multi-agent/run_e2e_rated.ts runs two task-specific agent pairs via Promise.all. Ollama serializes inference at the model level, so "parallel" is concurrent orchestration — but the substrate (gateway, queryd, vectord) handles concurrent requests cleanly. Verified in the scenario harness: two contracts sealing simultaneously. +
+ +

Why "recursive" — each seal feeds the next

+
+ Consensus does not end at the sealed playbook. Every sealed playbook is persisted to playbook_memory via POST /vectors/playbook_memory/seed. The next hybrid search for a semantically similar operation consults that memory via compute_boost_for(query_embedding, top_k, base_weight) and re-ranks the candidate pool. The system builds on itself turn over turn, playbook over playbook. +
+ +

Termination guarantees

+
+ // three paths out, every run has one of these:
+ sealed = executor.propose_done ∧ reviewer.approve_done ∧ fills.count == target
+ abort = consecutive_tool_errors ≥ MAX_TOOL_ERRORS (3)   // executor can't form a valid call
+ abort = consecutive_drifts ≥ MAX_CONSECUTIVE_DRIFTS (3)  // reviewer keeps flagging
+ abort = turn > MAX_TURNS (12)               // no consensus reached in window +
+
Every abort dumps the full log to tests/multi-agent/playbooks/<id>-FAILED.json for forensic review. No consensus is ever implicit.
+
+ +
+
Chapter 4
+

Playbook memory — the compounding feedback loop

+
A CRM stores events. This system turns events into re-ranking signal. Every sealed playbook endorses specific (worker, city, state) tuples. Every failure penalizes them. Every similar future query inherits the signal through cosine similarity.
+ +

Seed shape

+
+ PlaybookEntry {
+   playbook_id, // pb-seed-<sha8>
+   operation, // "fill: Welder x2 in Toledo, OH"
+   approach, context, // short canonical — long strings dilute embedding
+   timestamp, // RFC3339
+   endorsed_names[], // validated against workers_500k for city+state
+   city, state, // parsed from operation
+   embedding // 768-d nomic-embed-text of text shape
+ } +
+
Code: crates/vectord/src/playbook_memory.rs (PlaybookEntry, FailureRecord, PlaybookMemoryState)
+ +

Boost math (positive + decay + negative)

+
+ // For each playbook pb among top-K most cosine-similar:
+ // given query embedding qv, constant base_weight, n_workers = |pb.endorsed_names|
+
+ similarity = cosine(qv, pb.embedding)    // skip if ≤ 0.05
+ age_days = (now - pb.timestamp) / 86_400 seconds
+ decay = e-age_days / 30   // half-life = 30 days
+
+ // For each endorsed worker in pb:
+ key = (pb.city, pb.state, name)
+ fail_count = failures[key]   // # times this worker was marked no-show for same geo
+ penalty = 0.5min(fail_count, 20)
+
+ per_worker = similarity × base_weight × decay × penalty / n_workers
+ boost[key] = min(boost[key] + per_worker, MAX_BOOST_PER_WORKER)
+
+ // MAX_BOOST_PER_WORKER = 0.25 — cap stops one popular worker from always winning +
+
Code: crates/vectord/src/playbook_memory.rs::compute_boost_for  ·  constants: MAX_BOOST_PER_WORKER, DEFAULT_TOP_K_PLAYBOOKS, BOOST_HALF_LIFE_DAYS
+ +

Application at query time

+
+ // In /vectors/hybrid handler (crates/vectord/src/service.rs):
+ 1. SQL filter narrows workers_500k to geo/role/availability
+ 2. Vector index returns top_k × 5 candidates by cosine to question
+ 3. compute_boost_for(qv, k=200) returns boost map
+ 4. For each candidate: parse (name, city, state) from chunk, look up boost, add to score
+ 5. Re-sort sources by boosted score
+ 6. Truncate to requested top_k, return with playbook_boost and playbook_citations +
+
+ Why k=200. Direct measurement showed cosine similarity clusters in the 0.55-0.67 band across all playbooks regardless of geo (nomic-embed-text has narrow discrimination on this kind of structured operation text). A k of 25 silently missed geo-matched playbooks. k=200 is the measured floor for reliably catching compounding. Brute-force over 200 × 768-d is sub-ms even on this hardware. +
+
Evidence: Chicago Electrician compounding test 2026-04-20 — Carmen Green, Anna Patel, Fatima Wilson went from rank >5 / boost 0 / 0 citations (run 0, no seed) to rank 1/2/3 / boost +0.250 (capped) / 3 citations each (run 3, after 3 identical seeds). Each seed increments citations; total boost caps at 0.25/worker.
+ +

Write-through to SQL

+
+ successful_playbooks_live is a DataFusion-queryable Parquet surface maintained by POST /vectors/playbook_memory/persist_sql. Every /log from the recruiter UI triggers seed → persist_sql. The in-memory store and the SQL surface stay synchronized (full snapshot on each persist, safe because memory is source of truth). +
+
Code: crates/vectord/src/playbook_memory.rs::persist_to_sql  ·  catalog-registered under "successful_playbooks_live"
+ +

Pattern discovery (Path 2 — meta-index)

+
+ Beyond "who was endorsed." POST /vectors/playbook_memory/patterns takes a query, finds top-K similar past playbooks, pulls each endorsed worker's full workers_500k profile, and aggregates shared traits: recurring certifications, skill frequencies, modal archetype, reliability distribution. Returns a discovered_pattern string showing operator-actionable signal the user didn't explicitly query for. +
+
Code: crates/vectord/src/playbook_memory.rs::discover_patterns  ·  Surfaces: /vectors/playbook_memory/patterns endpoint, /intelligence/chat response, /intelligence/permit_contracts cards
+
+ +
+
Chapter 5
+

Key architectural choices — what was picked and why

+
Each choice is documented in docs/DECISIONS.md (Architecture Decision Records). If you dispute any of these, the ADR names the alternatives we rejected and the measurement that drove the call.
+
+
+
ADR-001 · Object storage as source of truth
No traditional database. All data is Parquet on S3-compatible object storage. Eliminates DB operational overhead; every engine can read Parquet.
+
+
+
ADR-008 · Embeddings stored as Parquet, not a vector DB
Keeps all data in one portable format. No Pinecone/Weaviate/Qdrant lock-in. Trade-off: brute-force search up to ~100K; HNSW beyond.
+
+
+
ADR-012 · Append-only event journal — never destroy evidence
Every mutation is appended. Compliance, audit, AI-decision forensics. Impossible to retrofit; easy to add now.
+
+
+
ADR-015 · Tool registry before raw SQL for agents
Named, governed, audited actions for agents. Permission checks, rate limits, parameter validation. MCP-compatible.
+
+
+
ADR-019 · Hybrid Parquet+HNSW ⊕ Lance vector backend
Parquet+HNSW primary (2.55× faster search at 100K). Lance secondary for index-build speed (14× faster), random fetch (112× faster), append (structural). Per-profile vector_backend: Parquet | Lance.
+
+
+
ADR-020 · Idempotent register() with schema-fingerprint gate
Same (name, fingerprint) reuses manifest. Different fingerprint = 409 Conflict. Prevents silent duplicate manifests. Cleanup run collapsed 374 → 31 datasets.
+
+
+
Phase 19 design note · Statistical + semantic, not neural
Meta-index is cosine similarity + endorsement aggregation. No model training. Rebuildable from successful_playbooks alone. Neural re-ranker deferred to Phase 20+ only if statistical floor plateaus.
+
+
+
+ +
+
Chapter 6
+

Measured at scale, on this machine

+
Hardware: i9 + 128GB RAM + Nvidia A4000 16GB VRAM. Numbers below are from this running instance. Refresh the page and they'll recompute.
+
Loading scale data…
+
+
+ +
+
Chapter 7
+

Verify or dispute — reproduce it yourself

+
Every claim below is a curl away from falsification.
+
+
Health. Should return lakehouse ok.
+
curl http://localhost:3100/health
+
Any SQL on multi-million-row Parquet. Sub-100ms typical.
+
curl -s -X POST http://localhost:3100/query/sql \
+  -H 'Content-Type: application/json' \
+  -d '{"sql":"SELECT role, COUNT(*) FROM workers_500k WHERE state=\"IL\" GROUP BY role LIMIT 5"}'
+
Hybrid search with playbook boost. The whole Phase 19 feedback loop in one request.
+
curl -s -X POST http://localhost:3100/vectors/hybrid \
+  -H 'Content-Type: application/json' \
+  -d '{"index_name":"workers_500k_v1",
+       "sql_filter":"role = '\''Forklift Operator'\'' AND city = '\''Chicago'\'' AND CAST(availability AS DOUBLE) > 0.5",
+       "question":"reliable forklift operator",
+       "top_k":5,"use_playbook_memory":true,"playbook_memory_k":200}'
+
Playbook memory stats. Count + endorsed names + sample.
+
curl http://localhost:3100/vectors/playbook_memory/stats
+
Pattern discovery. What do past similar fills have in common?
+
curl -s -X POST http://localhost:3100/vectors/playbook_memory/patterns \
+  -H 'Content-Type: application/json' \
+  -d '{"query":"Forklift Operator in Chicago, IL","top_k_playbooks":25,"min_trait_frequency":0.3}'
+
Run the dual-agent scenario yourself. All 5 events, real fills, real artifacts.
+
cd /home/profit/lakehouse
+bun run tests/multi-agent/scenario.ts
+# Output: tests/multi-agent/playbooks/scenario-<timestamp>/report.md
+
+
+ +
+
Chapter 8
+

What we are not claiming

+
Every impressive-sounding number comes with a footnote. Here are the honest limits.
+
+
workers_500k is synthetic.
Real client ATS export replaces this table. Schema is deliberately identical to a production ATS.
+
candidates table has 1,000 rows.
Intentionally small for demo. call_log references higher candidate_ids that don't cross-reference — this is a dataset alignment issue, not a pipeline issue.
+
Chicago permit data is real.
Pulled live from data.cityofchicago.org/resource/ydr8-5enu.json (Socrata API). Not synthetic. Not cached.
+
Playbook memory is seeded from demo runs.
The pipeline that seeds it is identical to what a live recruiter would trigger via /log. Same code path.
+
Local 7B models (mistral, qwen2.5) are imperfect.
They occasionally malform tool calls or drop fields. Multi-agent scenarios seal roughly 40-80% in one run. Larger models or constrained decoding would improve this. Not a substrate problem.
+
No rate/margin awareness yet.
Worker pay expectations vs contract bill rates are not modeled. Flagged as a Phase 20 item; no architectural blocker.
+
+
+ +
+ + + + +