diff --git a/mcp-server/index.ts b/mcp-server/index.ts index 8945ba4..65a73dc 100644 --- a/mcp-server/index.ts +++ b/mcp-server/index.ts @@ -631,510 +631,15 @@ async function main() { return new Response(await r.text(), { status: r.status, headers: { "Content-Type": "application/json" } }); } - // Proof page — styled HTML with live tests + // Proof — narrative HTML served from mcp-server/proof.html. + // Live tests consumed client-side via /proof.json. if (url.pathname === "/proof") { - const ds = await api("GET", "/catalog/datasets") as any[]; - const indexes = await api("GET", "/vectors/indexes") as any[]; - const vram = await api("GET", "/ai/vram"); - const totalRows = (ds || []).reduce((s: number, d: any) => s + (d.row_count || 0), 0); - const totalChunks = (indexes || []).reduce((s: number, i: any) => s + i.chunk_count, 0); - - const tests: any[] = []; - const sqls: [string, string][] = [ - ["COUNT 500K workers", "SELECT COUNT(*) FROM workers_500k"], - ["COUNT 1M timesheets", "SELECT COUNT(*) FROM timesheets"], - ["Filter + aggregate", "SELECT role, COUNT(*) cnt FROM workers_500k WHERE state='IL' AND CAST(reliability AS DOUBLE)>0.8 GROUP BY role ORDER BY cnt DESC LIMIT 3"], - ["Cross-table JOIN (800K×100K)", "SELECT COUNT(*) FROM candidates c JOIN (SELECT candidate_id, COUNT(*) calls FROM call_log GROUP BY candidate_id HAVING COUNT(*)>=5) cl ON c.candidate_id=cl.candidate_id WHERE c.city='Chicago'"], - ]; - for (const [name, sql] of sqls) { - const t0 = Date.now(); - const r = await api("POST", "/query/sql", { sql }); - tests.push({ name, ms: Date.now() - t0, result: r.rows?.[0], pass: !r.error }); - } - const ht0 = Date.now(); - const hybrid = await api("POST", "/vectors/hybrid", { - question: "reliable forklift operator", index_name: "workers_500k_v1", - sql_filter: "role = 'Forklift Operator' AND state = 'IL' AND CAST(reliability AS DOUBLE) > 0.8", - filter_dataset: "workers_500k", id_column: "worker_id", top_k: 5, generate: false, - use_playbook_memory: true, + return new Response(Bun.file(import.meta.dir + "/proof.html"), { + headers: { ...cors, "Content-Type": "text/html" }, }); - tests.push({ - name: "Hybrid SQL+Vector Search", ms: Date.now() - ht0, - result: { sql_matches: hybrid.sql_matches, verified_results: hybrid.vector_reranked }, - pass: (hybrid.vector_reranked || 0) > 0, - sources: hybrid.sources?.slice(0, 5), - }); - - // Run LIVE CRM vs AI comparisons — these actually execute on page load - const demos: any[] = []; - const demoQueries = [ - { query: "warehouse help", desc: "A staffer types what they need in plain English" }, - { query: "someone good with machines who is dependable", desc: "Natural language — no field names, no filters" }, - { query: "safety trained worker for chemical plant", desc: "The CRM doesn't know 'safety trained' = OSHA + Hazmat" }, - ]; - for (const dq of demoQueries) { - // CRM attempt: exact LIKE match - const crmResult = await api("POST", "/query/sql", { - sql: `SELECT COUNT(*) cnt FROM workers_500k WHERE resume_text LIKE '%${dq.query}%'` - }); - const crmCount = crmResult?.rows?.[0]?.cnt ?? 0; - - // AI attempt: vector search understands meaning - const aiResult = await api("POST", "/vectors/hnsw/search", { - index_name: "workers_500k_v1", - query: dq.query, - top_k: 3, - }); - const aiHits = aiResult?.results || []; - - demos.push({ ...dq, crmCount, aiHits }); - } - - const g = vram?.gpu || {}; - const ts = new Date().toLocaleString(); - const testRows = tests.map((t: any) => { - const icon = t.pass ? "✓" : "✗"; - const cls = t.pass ? "pass" : "fail"; - const val = typeof t.result === "object" ? JSON.stringify(t.result) : t.result; - return `
| Name | Details | Fit Score | Verified |
|---|
Standard SQL filters. Fast, but only finds EXACT matches. Every CRM does this.
-| Query | Speed | Result |
|---|
Limitation: search for "warehouse work" finds nothing — no worker has that exact text in their profile.
-- These searches just ran on ${totalRows.toLocaleString()} real worker profiles when you loaded this page. - Left: what your CRM finds. Right: what AI finds. Same search, same data. -
- - ${demos.map((d: any, i: number) => { - const aiNames = d.aiHits.map((h: any) => { - const name = h.chunk_text?.split("—")[0]?.trim() || h.doc_id; - const role = h.chunk_text?.match(/— (.+?) in/)?.[1] || ""; - const city = h.chunk_text?.match(/in (.+?)\./)?.[1] || ""; - return { name, role, city, score: h.score }; - }); - - return ` -- The hybrid search runs a SQL filter (role, state, reliability) AND vector ranking together. - You get exact structural matches ranked by who's the best semantic fit — in one call. -
-| ID | Name | Profile | AI Score | Verified |
|---|
Every result verified against the actual database. The AI cannot hallucinate workers that don't exist.
-${g.name || "NVIDIA RTX A4000"} — ${g.used_mib || 0} / ${g.total_mib || 16376} MiB
- -Hot-swappable profiles. Switch between models in seconds. Each model specializes in what it's best at. No API keys, no usage fees, no data leaving the building.
-The technical architecture behind what you just saw — why it's different from a database, why your data never leaves this building, and how it handles millions of records.
- -| Capability | Measured | What It Means |
|---|---|---|
| Search 500K workers | 341ms avg | Results before you finish typing |
| SQL query on 3M rows | sub-100ms | Any analytical question answered instantly |
| 10M vector search | 5ms | Scale to 10 million profiles, still fast |
| Search accuracy (HNSW) | 98% | Finds 98 of 100 truly relevant workers |
| Search accuracy (Lance) | 94% | At 10M+ scale, still highly accurate |
| Filter accuracy | 100% | State, role, reliability filters are SQL-verified — never wrong |
| Concurrent users | 10+ simultaneous | Tested with 10 parallel queries in 82ms total |
| Cloud dependency | Zero | Works offline. No internet required after setup. |
tcpdump.HTTP :3100 + gRPC :3101 + │ + ┌───────▼───────┐ + │ gateway │ Rust · Axum · routing, CORS, auth, tools + └───────┬───────┘ + ┌────────────┬───────────┼───────────┬────────────┐ + │ │ │ │ │ + ┌────▼───┐ ┌────▼───┐ ┌────▼───┐ ┌────▼───┐ ┌────▼───┐ + │catalog │ │ query │ │ vector │ │ ingest │ │aibridge│ + │ d │ │ d │ │ d │ │ d │ │ │ + └────┬───┘ └────┬───┘ └────┬───┘ └────┬───┘ └────┬───┘ + │ │ │ │ │ + └────────────┴───────────┼───────────┴────────────┘ + ▼ + ┌─────────────────┐ + │ object storage │ Parquet files (local / S3) + └─────────────────┘ + ▲ + │ + ┌───────┴────────┐ + │ Python sidecar │ FastAPI → Ollama + │ (aibridge) │ local models only + └────────────────┘+
| Crate | Role | Path |
|---|---|---|
| shared | Types, errors, Arrow helpers, PII detection, secrets provider | crates/shared/ |
| storaged | object_store I/O, BucketRegistry (multi-bucket), AppendLog, ErrorJournal | crates/storaged/ |
| catalogd | Metadata authority — manifests, views, tombstones, profiles, schema fingerprints | crates/catalogd/ |
| queryd | DataFusion SQL engine, MemTable cache, delta merge-on-read, compaction | crates/queryd/ |
| ingestd | CSV/JSON/PDF(+OCR)/Postgres/MySQL ingest, cron schedules, auto-PII | crates/ingestd/ |
| vectord | Embeddings as Parquet, HNSW, trial system, autotune agent, playbook_memory | crates/vectord/ |
| vectord-lance | Firewall crate — Lance 4.0 + Arrow 57 isolated from main Arrow 55 | crates/vectord-lance/ |
| journald | Append-only mutation event log for time-travel & audit | crates/journald/ |
| aibridge | Rust↔Python sidecar, Ollama HTTP client, VRAM introspection | crates/aibridge/ |
| gateway | Axum HTTP :3100 + gRPC :3101, middleware, tools registry | crates/gateway/ |
| ui | Dioxus WASM internal developer UI | crates/ui/ |
| mcp-server | Bun TypeScript recruiter-facing app (this server) | mcp-server/ |
task in
+ │
+ ▼
+ ┌───────────────────────────────────────────────────────────┐
+ │ EXECUTOR (mistral:latest) │
+ │ ──────────────────────────────────────────────────────── │
+ │ input: task spec + shared log + seen-candidates ledger │
+ │ output: one JSON action per turn │
+ │ · {kind:"plan",steps:[…]} │
+ │ · {kind:"tool_call",tool,args,rationale} │
+ │ · {kind:"propose_done",fills:[N of N]} │
+ └───────────┬───────────────────────────────┬───────────────┘
+ │ tool_call │ propose_done
+ ▼ │
+ ┌──────────────────────────┐ │
+ │ TOOL DISPATCH │ │
+ │ hybrid_search / sql │ │
+ │ (against live gateway) │ │
+ └──────────┬───────────────┘ │
+ │ result (trimmed, exclusions) │
+ ▼ ▼
+ ┌───────────────────────────────────────────────────────────┐
+ │ REVIEWER (qwen2.5:latest) │
+ │ ──────────────────────────────────────────────────────── │
+ │ input: task spec + shared log (including tool result) │
+ │ output: {kind:"critique",verdict:"continue|drift| │
+ │ approve_done",notes} │
+ └───────────┬───────────────────────────────────────────────┘
+ │
+ ┌─────┴─────┐
+ ▼ ▼ ▼
+ continue drift approve_done + propose_done ⟹ SEAL
+ (next turn) (cap ≈ 3 →
+ hard abort)
+
+ propose_done, it MUST emit either approve_done or drift — it cannot stall with continue.
+ tests/multi-agent/run_e2e_rated.ts runs two task-specific agent pairs via Promise.all. Ollama serializes inference at the model level, so "parallel" is concurrent orchestration — but the substrate (gateway, queryd, vectord) handles concurrent requests cleanly. Verified in the scenario harness: two contracts sealing simultaneously.
+ playbook_memory via POST /vectors/playbook_memory/seed. The next hybrid search for a semantically similar operation consults that memory via compute_boost_for(query_embedding, top_k, base_weight) and re-ranks the candidate pool. The system builds on itself turn over turn, playbook over playbook.
+ tests/multi-agent/playbooks/<id>-FAILED.json for forensic review. No consensus is ever implicit.POST /vectors/playbook_memory/persist_sql. Every /log from the recruiter UI triggers seed → persist_sql. The in-memory store and the SQL surface stay synchronized (full snapshot on each persist, safe because memory is source of truth).
+ POST /vectors/playbook_memory/patterns takes a query, finds top-K similar past playbooks, pulls each endorsed worker's full workers_500k profile, and aggregates shared traits: recurring certifications, skill frequencies, modal archetype, reliability distribution. Returns a discovered_pattern string showing operator-actionable signal the user didn't explicitly query for.
+ docs/DECISIONS.md (Architecture Decision Records). If you dispute any of these, the ADR names the alternatives we rejected and the measurement that drove the call.lakehouse ok.curl http://localhost:3100/health+
curl -s -X POST http://localhost:3100/query/sql \
+ -H 'Content-Type: application/json' \
+ -d '{"sql":"SELECT role, COUNT(*) FROM workers_500k WHERE state=\"IL\" GROUP BY role LIMIT 5"}'
+ curl -s -X POST http://localhost:3100/vectors/hybrid \
+ -H 'Content-Type: application/json' \
+ -d '{"index_name":"workers_500k_v1",
+ "sql_filter":"role = '\''Forklift Operator'\'' AND city = '\''Chicago'\'' AND CAST(availability AS DOUBLE) > 0.5",
+ "question":"reliable forklift operator",
+ "top_k":5,"use_playbook_memory":true,"playbook_memory_k":200}'
+ curl http://localhost:3100/vectors/playbook_memory/stats+
curl -s -X POST http://localhost:3100/vectors/playbook_memory/patterns \
+ -H 'Content-Type: application/json' \
+ -d '{"query":"Forklift Operator in Chicago, IL","top_k_playbooks":25,"min_trait_frequency":0.3}'
+ cd /home/profit/lakehouse +bun run tests/multi-agent/scenario.ts +# Output: tests/multi-agent/playbooks/scenario-<timestamp>/report.md+