Post-PR-#11 polish: demo UI, staffer console, face pool, icons, contractor profile (24 commits) #12

Merged
profit merged 44 commits from demo/post-pr11-polish-2026-04-28 into main 2026-05-03 05:16:17 +00:00
Showing only changes of commit 631b0329b1 - Show all commits

View File

@ -81,6 +81,7 @@ pre{background:#161b22;border:1px solid #171d27;border-radius:8px;padding:14px 1
<nav>
<a href=".">Dashboard</a>
<a href="console">Walkthrough</a>
<a href="profiler">Profiler</a>
<a href="proof" class="active">Architecture</a>
<a href="spec">Spec</a>
<a href="onboard">Onboard</a>
@ -95,138 +96,137 @@ pre{background:#161b22;border:1px solid #171d27;border-radius:8px;padding:14px 1
<div class="chapter">
<div class="num">Chapter 1</div>
<h2>Receipts, not promises</h2>
<div class="lede">Every test below ran live against the real gateway when you loaded this page. Sub-100ms SQL on multi-million-row Parquet, hybrid search with playbook boost applied. No fixtures. If a test fails, you'll see ✗.</div>
<div class="lede">Every test below ran live against the real gateway when you loaded this page. Sub-100ms SQL on multi-million-row Parquet, hybrid search with playbook boost applied, public-issuer attribution computed from this view. No fixtures. If a test fails, you'll see ✗.</div>
<div id="ch1-tests"><div class="loading">Running tests…</div></div>
<div id="ch1-live" style="margin-top:14px"></div>
</div>
<div class="chapter">
<div class="num">Chapter 2</div>
<h2>Architecture — 13 crates, one object store, one local AI runtime</h2>
<div class="lede">Request flows top to bottom. Every node is independently swappable. Every line is a real HTTP or gRPC hop that you can trace with <code>tcpdump</code>.</div>
<h2>Architecture — 15 crates, one object store, a 5-provider model fleet</h2>
<div class="lede">Gateway is a drop-in OpenAI-compatible middleware. Any consumer that speaks the OpenAI Chat Completions shape — agent SDKs, IDE plugins, custom apps — points at <code>localhost:3100/v1</code> and gets routing, audit, and the full memory substrate behind every call. The model side has 5 providers and 40+ frontier models reachable via one OpenCode key. The data side stays Rust-first.</div>
<div class="card accent-b">
<pre> HTTP :3100 + gRPC :3101
┌───────▼───────┐
│ gateway │ Rust · Axum · routing, CORS, auth, tools
└───────┬───────┘
┌────────────┬───────────┼───────────┬────────────┐
│ │ │ │ │
┌────▼───┐ ┌────▼───┐ ┌────▼───┐ ┌────▼───┐ ┌────▼───┐
│catalog │ │ query │ │ vector │ │ ingest │ │aibridge│
│ d │ │ d │ │ d │ │ d │ │ │
└────┬───┘ └────┬───┘ └────┬───┘ └────┬───┘ └────┬───┘
│ │ │ │ │
└────────────┴───────────┼───────────┴────────────┘
┌─────────────────┐
│ object storage │ Parquet files (local / S3)
└─────────────────┘
┌───────┴────────┐
│ Python sidecar │ FastAPI → Ollama
│ (aibridge) │ local models only
└────────────────┘</pre>
<pre> OpenAI SDK consumers MCP clients Browser UI (Bun :3700)
│ │ │
└──────────────────────────┼──────────────────────────┘
┌──────────────────────────────┐
│ gateway :3100 /v1/* │ Rust · Axum
│ OpenAI-compat drop-in │ smart provider routing
│ /v1/chat /v1/mode /iterate │ cost telemetry, Langfuse
└──────────┬───────────────────┘
┌─────────┬───────────────┼───────────────┬──────────┐
│ │ │ │ │
┌────▼───┐ ┌───▼────┐ ┌─────▼──────┐ ┌─────▼─────┐ ┌──▼──────┐
│catalog │ │ query │ │ vector │ │ ingest │ │aibridge │
│ d │ │ d │ │ d │ │ d │ │ │
│idempot │ │DataFus │ │HNSW · Lance│ │CSV PDF SQL│ │provider │
│schema │ │delta │ │playbook+ │ │auto-PII │ │adapters │
│fingerp │ │MemTabl │ │pathway mem │ │schema fp │ │5 active │
└────┬───┘ └───┬────┘ └─────┬──────┘ └─────┬─────┘ └──┬──────┘
└─────────┴────────────────┼────────────────┴─────────┘
┌──────────────────┐
│ object storage │ Parquet · MinIO · S3-compat
└──────────────────┘
┌───────────────┴────────────────┐
│ validator · journald │ schema/PII/policy gates
│ (Phase 43) · (audit log) │ + append-only mutations
└────────────────────────────────┘
Provider fleet (config/providers.toml):
ollama localhost:3200 local Ollama → qwen3.5, gemma2
ollama_cloud ollama.com gpt-oss:120b, qwen3-coder:480b,
deepseek-v3.1:671b, kimi-k2:1t,
mistral-large-3:675b, qwen3.5:397b
openrouter openrouter.ai/api/v1 343 models — paid + free rescue
opencode opencode.ai/zen/v1 40 models · ONE sk-* key reaches
Claude Opus 4.7, GPT-5.5-pro,
Gemini 3.1-pro, Kimi K2.6, GLM 5.1,
DeepSeek, Qwen, MiniMax, free tier
kimi api.kimi.com/coding/v1 direct Kimi For Coding (TOS-clean)</pre>
</div>
<h3>Per-crate responsibility</h3>
<h3>Per-crate responsibility (15 crates)</h3>
<table class="plain">
<thead><tr><th>Crate</th><th>Role</th><th>Path</th></tr></thead>
<tbody>
<tr><td>shared</td><td>Types, errors, Arrow helpers, PII detection, secrets provider</td><td>crates/shared/</td></tr>
<tr><td>storaged</td><td>object_store I/O, BucketRegistry (multi-bucket), AppendLog, ErrorJournal</td><td>crates/storaged/</td></tr>
<tr><td>catalogd</td><td>Metadata authority — manifests, views, tombstones, profiles, schema fingerprints</td><td>crates/catalogd/</td></tr>
<tr><td>queryd</td><td>DataFusion SQL engine, MemTable cache, delta merge-on-read, compaction</td><td>crates/queryd/</td></tr>
<tr><td>ingestd</td><td>CSV/JSON/PDF(+OCR)/Postgres/MySQL ingest, cron schedules, auto-PII</td><td>crates/ingestd/</td></tr>
<tr><td>vectord</td><td>Embeddings as Parquet, HNSW, trial system, autotune agent, playbook_memory</td><td>crates/vectord/</td></tr>
<tr><td>shared</td><td>Types, errors, Arrow helpers, PII detection, secrets provider, model_matrix</td><td>crates/shared/</td></tr>
<tr><td>storaged</td><td>object_store I/O, BucketRegistry, AppendLog, ErrorJournal, federation_service</td><td>crates/storaged/</td></tr>
<tr><td>catalogd</td><td>Manifests, views (incl. PII-safe view layer), tombstones, profiles, schema fingerprints, register-idempotency (ADR-020)</td><td>crates/catalogd/</td></tr>
<tr><td>queryd</td><td>DataFusion SQL, MemTable cache, delta merge-on-read, compaction, truth gate (ADR-021)</td><td>crates/queryd/</td></tr>
<tr><td>ingestd</td><td>CSV/JSON/PDF(+OCR)/Postgres/MySQL ingest, cron schedules, auto-PII flagging</td><td>crates/ingestd/</td></tr>
<tr><td>vectord</td><td>Embeddings as Parquet, HNSW, trial system, autotune, playbook_memory + pathway_memory (ADR-021 semantic-correctness layer)</td><td>crates/vectord/</td></tr>
<tr><td>vectord-lance</td><td>Firewall crate — Lance 4.0 + Arrow 57 isolated from main Arrow 55</td><td>crates/vectord-lance/</td></tr>
<tr><td>journald</td><td>Append-only mutation event log for time-travel &amp; audit</td><td>crates/journald/</td></tr>
<tr><td>aibridge</td><td>Rust↔Python sidecar, Ollama HTTP client, VRAM introspection</td><td>crates/aibridge/</td></tr>
<tr><td>gateway</td><td>Axum HTTP :3100 + gRPC :3101, middleware, tools registry</td><td>crates/gateway/</td></tr>
<tr><td>ui</td><td>Dioxus WASM internal developer UI</td><td>crates/ui/</td></tr>
<tr><td>mcp-server</td><td>Bun TypeScript recruiter-facing app (this server)</td><td>mcp-server/</td></tr>
<tr><td>journald</td><td>Append-only mutation event log for time-travel + audit</td><td>crates/journald/</td></tr>
<tr><td>truth</td><td>File-backed rule store; <code>evaluate(task_class, ctx) → Vec&lt;RuleOutcome&gt;</code> (ADR-021)</td><td>crates/truth/</td></tr>
<tr><td>aibridge</td><td>Rust↔Python sidecar, Ollama client, ProviderAdapter trait, /v1/chat router</td><td>crates/aibridge/</td></tr>
<tr><td>gateway</td><td>Axum HTTP :3100 + gRPC :3101, OpenAI-compat /v1/*, mode runner, validator, iterate loop, cost telemetry, Langfuse + observer fan-out</td><td>crates/gateway/</td></tr>
<tr><td>validator</td><td>Phase 43 — schema / completeness / consistency / policy gates over LLM outputs (FillValidator, EmailValidator, ParquetWorkerLookup)</td><td>crates/validator/</td></tr>
<tr><td>ui</td><td>Dioxus WASM internal developer UI (separate from this Bun-served public UI)</td><td>crates/ui/</td></tr>
<tr><td>mcp-server</td><td>Bun TypeScript public-facing app + MCP tool surface — what you're reading right now</td><td>mcp-server/</td></tr>
<tr><td>auditor</td><td>External claim-vs-diff verifier on PRs · Kimi K2.6 ↔ Haiku 4.5 cross-lineage alternation, Opus 4.7 auto-promote on diffs &gt;100k chars</td><td>auditor/</td></tr>
</tbody>
</table>
<div class="ref"><strong>Source:</strong> git.agentview.dev/profit/lakehouse &nbsp;·&nbsp; <strong>ADRs:</strong> docs/DECISIONS.md (currently 20 records)</div>
<div class="ref"><strong>Source:</strong> git.agentview.dev/profit/lakehouse · branch <code>scrum/auto-apply-19814</code> · tag <code>distillation-v1.0.0</code> at commit <code>e7636f2</code> (frozen substrate) · <strong>ADRs:</strong> docs/DECISIONS.md (currently 21 records)</div>
</div>
<div class="chapter">
<div class="num">Chapter 3</div>
<h2>Dual-agent recursive consensus loop</h2>
<div class="lede">The system we use to execute staffing fills is a dual-agent recursive protocol. Two agents with distinct roles iterate against a shared log until one of three terminal states is reached. It is deterministic in structure, stochastic in content, and verifiable through the per-run log artifact.</div>
<h3>Agents and protocol</h3>
<div class="card accent-a">
<pre> task in
┌───────────────────────────────────────────────────────────┐
│ EXECUTOR (mistral:latest) │
│ ──────────────────────────────────────────────────────── │
│ input: task spec + shared log + seen-candidates ledger │
│ output: one JSON action per turn │
│ · {kind:"plan",steps:[…]} │
│ · {kind:"tool_call",tool,args,rationale} │
│ · {kind:"propose_done",fills:[N of N]} │
└───────────┬───────────────────────────────┬───────────────┘
│ tool_call │ propose_done
▼ │
┌──────────────────────────┐ │
│ TOOL DISPATCH │ │
│ hybrid_search / sql │ │
│ (against live gateway) │ │
└──────────┬───────────────┘ │
│ result (trimmed, exclusions) │
▼ ▼
┌───────────────────────────────────────────────────────────┐
│ REVIEWER (qwen2.5:latest) │
│ ──────────────────────────────────────────────────────── │
│ input: task spec + shared log (including tool result) │
│ output: {kind:"critique",verdict:"continue|drift| │
│ approve_done",notes} │
└───────────┬───────────────────────────────────────────────┘
┌─────┴─────┐
▼ ▼ ▼
continue drift approve_done + propose_done ⟹ SEAL
(next turn) (cap ≈ 3 →
hard abort)
</pre>
</div>
<div class="ref"><strong>Code:</strong> tests/multi-agent/agent.ts (protocol + prompts) &nbsp;·&nbsp; tests/multi-agent/orchestrator.ts (run loop) &nbsp;·&nbsp; tests/multi-agent/scenario.ts (5-event warehouse week)</div>
<h2>The model fleet — 9-rung ladder, N=3 consensus, cross-lineage audit</h2>
<div class="lede">No single model owns the answer. Every consequential call is structured: the right tier picks up first, fallback rungs catch what fails, parallel runs vote, and an independent auditor of a different model lineage checks the result against the diff. The protocol is deterministic; the inference is stochastic; every step writes a receipt.</div>
<h3>Why "dual" — role specialization</h3>
<div class="narr">
<strong>The executor is an optimist.</strong> Its job is to produce progress: pull candidates, verify SQL, propose consensus. It's instructed to be decisive.
<br><br>
<strong>The reviewer is a pessimist.</strong> Its job is to catch drift: proposals that don't match the task's geography, fill count, or role. It's authorized to stop the loop.
<br><br>
This adversarial separation is cheaper and more deterministic than asking a single model to self-critique. The reviewer has a hard rule: on the turn after a <code>propose_done</code>, it MUST emit either <code>approve_done</code> or <code>drift</code> — it cannot stall with <code>continue</code>.
<h3>The 9-rung cloud-first ladder</h3>
<div class="card accent-b">
<pre> request in
┌───────────────────────────────────────────────────────────────────┐
│ attempt 1 ollama_cloud / kimi-k2:1t 1T params · flagship │
│ attempt 2 ollama_cloud / qwen3-coder:480b coding specialist │
│ attempt 3 ollama_cloud / deepseek-v3.1:671b reasoning │
│ attempt 4 ollama_cloud / mistral-large-3:675b deep analysis │
│ attempt 5 ollama_cloud / gpt-oss:120b reliable workhorse │
│ attempt 6 ollama_cloud / qwen3.5:397b dense final thinker │
│ attempt 7 openrouter / openai/gpt-oss-120b:free rescue tier │
│ attempt 8 openrouter / google/gemma-3-27b-it:free fastest rescue │
│ attempt 9 ollama / qwen3.5:latest last-resort local │
└───────────────┬───────────────────────────────────────────────────┘
│ isAcceptable() = chars ≥ 3800 ∧ not malformed JSON
sealed result OR next-rung learning preamble</pre>
</div>
<div class="narr">Every rung sees a learning preamble carrying the prior rejection reason. The ladder is the standard scrum/auditor path; for individual <code>/v1/chat</code> calls the caller picks the model directly (or lets the smart-routing default fire).</div>
<div class="ref"><strong>Code:</strong> tests/real-world/scrum_master_pipeline.ts <code>const LADDER</code> · config/routing.toml · crates/gateway/src/v1/mode.rs (mode runner)</div>
<h3>Why "parallel" — orchestrator can fan out</h3>
<div class="narr">
<strong>Independent pairs run concurrently.</strong> <code>tests/multi-agent/run_e2e_rated.ts</code> runs two task-specific agent pairs via <code>Promise.all</code>. Ollama serializes inference at the model level, so "parallel" is concurrent orchestration — but the substrate (gateway, queryd, vectord) handles concurrent requests cleanly. Verified in the scenario harness: two contracts sealing simultaneously.
</div>
<h3>Why "recursive" — each seal feeds the next</h3>
<div class="narr">
<strong>Consensus does not end at the sealed playbook.</strong> Every sealed playbook is persisted to <code>playbook_memory</code> via <code>POST /vectors/playbook_memory/seed</code>. The next hybrid search for a semantically similar operation consults that memory via <code>compute_boost_for(query_embedding, top_k, base_weight)</code> and re-ranks the candidate pool. The system builds on itself turn over turn, playbook over playbook.
</div>
<h3>Termination guarantees</h3>
<h3>N=3 consensus + tie-breaker (auditor inference)</h3>
<div class="math">
<span class="c">// three paths out, every run has one of these:</span><br>
sealed = executor.propose_done ∧ reviewer.approve_done ∧ fills.count == target<br>
abort = consecutive_tool_errors ≥ MAX_TOOL_ERRORS (3) &nbsp;&nbsp;<span class="c">// executor can't form a valid call</span><br>
abort = consecutive_drifts ≥ MAX_CONSECUTIVE_DRIFTS (3) &nbsp;<span class="c">// reviewer keeps flagging</span><br>
abort = turn &gt; MAX_TURNS (12) &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;<span class="c">// no consensus reached in window</span>
<span class="c">// auditor/checks/inference.ts — every claim audit runs this:</span><br>
1. Fire the primary reviewer N=3 times in PARALLEL (Promise.all) — wall-clock = single call<br>
2. Aggregate votes per claim_idx · majority wins<br>
3. On 1-1-1 split → tie-breaker model with <strong>different architecture</strong> (qwen3-coder:480b vs primary gpt-oss/kimi)<br>
4. Every disagreement (even when majority resolves) → <code>data/_kb/audit_discrepancies.jsonl</code><br>
<br>
<span class="c">// Closes the cloud-non-determinism gap: temp=0 isn't actually deterministic in practice</span><br>
<span class="c">// across hours; consensus + cross-architecture tie-break stabilizes verdicts.</span>
</div>
<div class="narr">Every abort dumps the full log to <code>tests/multi-agent/playbooks/&lt;id&gt;-FAILED.json</code> for forensic review. No consensus is ever implicit.</div>
<h3>Auditor cross-lineage — Kimi ↔ Haiku ↔ Opus</h3>
<div class="narr">Every push to PR #11 triggers <code>auditor/audit.ts</code> within ~90s. To prevent a single model lineage's blind spots from becoming the system's blind spots, audits alternate between Kimi K2.6 (Moonshot) and Haiku 4.5 (Anthropic) by SHA. Diffs over 100k chars auto-promote to Claude Opus 4.7. Per-PR cap of 3 audits with auto-reset on each new head SHA prevents infinite-loop spend. <strong>100% grounding-verified rate</strong> on Haiku 4.5 across the latest 10 findings — pairing different lineages + forcing per-finding grounding kills confabulation.</div>
<div class="ref"><strong>Code:</strong> auditor/audit.ts · auditor/checks/inference.ts (N=3) · auditor/checks/kimi_architect.ts · <strong>Verdicts:</strong> data/_auditor/kimi_verdicts/ — read any 11-&lt;sha&gt;.json to inspect a real audit</div>
<h3>Distillation v1.0.0 — the frozen substrate</h3>
<div class="narr">The substrate the auditor and mode runner sit on is tagged at <code>distillation-v1.0.0</code> / commit <code>e7636f2</code>. <strong>145 unit tests pass · 22/22 acceptance invariants · 16/16 audit-full checks · bit-identical reproducibility verified.</strong> The distillation phase exports clean SFT / RAG / preference samples with a multi-layer contamination firewall; the auditor consumes the substrate. The frozen tag means: any future "the system regressed" question has a baseline to bisect against, byte-for-byte.</div>
<div class="ref"><strong>Tag:</strong> distillation-v1.0.0 · <strong>Commit:</strong> e7636f2 · <strong>Substrate code:</strong> scripts/distillation/ · auditor/schemas/distillation/ · <strong>Output:</strong> data/_kb/distilled_{facts,procedures,config_hints}.jsonl</div>
</div>
<div class="chapter">
<div class="num">Chapter 4</div>
<h2>Playbook memory — the compounding feedback loop</h2>
<div class="lede">A CRM stores events. This system turns events into re-ranking signal. Every sealed playbook endorses specific (worker, city, state) tuples. Every failure penalizes them. Every similar future query inherits the signal through cosine similarity.</div>
<h2>Two memory layers — playbook (worker signal) + pathway (system signal)</h2>
<div class="lede">A CRM stores events. This system turns events into re-ranking signal at two layers. <strong>Playbook memory</strong> compounds worker-level outcomes (who got endorsed, where, when) into per-query boost. <strong>Pathway memory</strong> compounds system-level outcomes (which model + corpus + framing actually solved similar problems) into per-task hot-swap. Both are queryable. Both are auditable. Both compound.</div>
<h3>Layer 1 — playbook memory (worker + geo signal)</h3>
<h3>Seed shape</h3>
<div class="math">
@ -289,10 +289,82 @@ pre{background:#161b22;border:1px solid #171d27;border-radius:8px;padding:14px 1
<strong>Beyond "who was endorsed."</strong> <code>POST /vectors/playbook_memory/patterns</code> takes a query, finds top-K similar past playbooks, pulls each endorsed worker's full workers_500k profile, and aggregates shared traits: recurring certifications, skill frequencies, modal archetype, reliability distribution. Returns a <code>discovered_pattern</code> string showing operator-actionable signal the user didn't explicitly query for.
</div>
<div class="ref"><strong>Code:</strong> crates/vectord/src/playbook_memory.rs::discover_patterns &nbsp;·&nbsp; <strong>Surfaces:</strong> /vectors/playbook_memory/patterns endpoint, /intelligence/chat response, /intelligence/permit_contracts cards</div>
<h3>Layer 2 — pathway memory (system-level hot-swap, ADR-021)</h3>
<div class="narr">
<strong>Pathway memory remembers which approach worked, not just which worker.</strong> Every accepted scrum review writes a <code>PathwayTrace</code> with the full backtrack: file fingerprint, model used, signal class, KB chunks consulted, observer events, semantic flags, bug fingerprints. A new query that fingerprints to the same trace can hot-swap to the prior result without re-running the 9-rung escalation. The 5-factor hot-swap gate is strict: narrow fingerprint match AND audit consensus pass AND replay_count ≥ 3 (probation) AND success_rate ≥ 0.80 AND NOT retired AND vector cosine ≥ 0.90.
</div>
<div class="math">
<span class="c">// Live pathway state (refresh page to recompute):</span><br>
<span id="pwm-traces">— traces</span> · <span id="pwm-replays"></span> successful replays · <span id="pwm-rate"></span> reuse rate<br>
<span class="c">// 88 / 11/11 / 100% as of 2026-04-27 — probation gate crossed</span>
</div>
<div class="ref"><strong>Code:</strong> crates/vectord/src/pathway_memory.rs · <strong>Endpoints:</strong> /vectors/pathway/insert · /query · /record_replay · /stats · /bug_fingerprints · <strong>Spec:</strong> docs/DECISIONS.md ADR-021 — Semantic-correctness matrix layer</div>
<h3>What both memory layers feed (besides search)</h3>
<div class="narr">
Both layers also feed the <strong>per-staffer hot-swap index</strong> (Chapter 5) and the <strong>Construction Activity Signal Engine</strong> (Chapter 6). One memory model, surfaced three different ways at the request boundary depending on who's asking.
</div>
</div>
<div class="chapter">
<div class="num">Chapter 5</div>
<h2>Per-staffer hot-swap — same corpus, different relevance gradient</h2>
<div class="lede">Maria runs Chicago. Devon runs Indianapolis. Aisha runs Wisconsin/Michigan. They share one corpus, but the search results, the recurring-skill patterns, and the playbook context all reshape to whoever is acting. Same query "forklift operators" returns 89 IN workers when Devon's acting, 16 WI when Aisha's, 167 IL when Maria's. The MEMORY panel relabels itself with the active coordinator's name.</div>
<h3>What scopes per staffer</h3>
<div class="math">
<span class="c">// On every /intelligence/chat call:</span><br>
if (b.staffer_id) {<br>
&nbsp;&nbsp;const staffer = lookupStaffer(b.staffer_id);<br>
&nbsp;&nbsp;<span class="c">// 1. Default state filter to staffer territory unless caller pinned one</span><br>
&nbsp;&nbsp;if (!explicitState) filters.push(`state = '${staffer.territory.state}'`);<br>
&nbsp;&nbsp;<span class="c">// 2. Default playbook-pattern geo to staffer's primary city/state</span><br>
&nbsp;&nbsp;cityForPatterns = staffer.territory.cities[0];<br>
&nbsp;&nbsp;stateForPatterns = staffer.territory.state;<br>
&nbsp;&nbsp;<span class="c">// 3. Surface staffer.name back so the UI can relabel MEMORY → MARIA'S MEMORY</span><br>
&nbsp;&nbsp;response.staffer = { id, name, territory };<br>
}
</div>
<div class="narr">
The corpus stays intact. The relevance gradient is per coordinator. As each accumulates fills, their slice of the playbook compounds independently. The architecture generalizes — every new metro adds territories, not code paths.
</div>
<div class="ref"><strong>Code:</strong> mcp-server/index.ts <code>STAFFERS</code> roster + <code>lookupStaffer()</code> · <code>/staffers</code> endpoint · <code>/intelligence/chat</code> smart_search route · <strong>UI:</strong> staffer dropdown in mcp-server/search.html</div>
</div>
<div class="chapter">
<div class="num">Chapter 6</div>
<h2>Construction Activity Signal Engine — the corpus is also a market signal</h2>
<div class="lede">Every contractor in this corpus is also a forward indicator on the public equities they touch. Permits filed today predict construction starts ~45 days out, staffing ~30, revenue recognition months later. The associated-ticker network surfaces this signal <em>before</em> any 10-Q. The architecture is metro-agnostic — Chicago is Phase 1; NYC DOB, LA County, Houston BCD, Boston ISD ship as Socrata-shaped adapters.</div>
<h3>Three flavors of attribution</h3>
<div class="math">
<span class="c">// per contractor in /intelligence/profiler_index:</span><br>
direct <span class="c">// contractor IS a public issuer → SEC tickers index match</span><br>
parent <span class="c">// curated KNOWN_PARENT_MAP — Turner → HOC.DE via Hochtief AG</span><br>
associated <span class="c">// co-permit network — Bob's Electric appears with TARGET CORPORATION</span><br>
&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;<span class="c">// 3+ times → inherits TGT as an associated indicator</span>
</div>
<div class="narr">
The associated path is the moat. A staffing-permit dataset that maps contractor-to-public-issuer is not commercially available; we synthesize it from the Socrata co-occurrence graph. Every additional metro multiplies edges.
</div>
<h3>Building Activity Index (BAI)</h3>
<div class="math">
<span class="c">// BAI = attribution-weighted average day-change across surfaced issuers:</span><br>
BAI = Σ (day_change_pct × attribution_count) / Σ attribution_count<br>
<br>
<span class="c">// Indexed build value = total $ of permits attributable to ANY public issuer</span><br>
<span class="c">// Network depth = issuers / total attribution edges</span>
</div>
<div class="narr">
Run BAI daily, save the series, and you've got a backtestable thesis in months. Today's surface is Chicago-only with ~9 issuers; the curve scales linearly with metros added — and the marginal cost of a new metro is one Socrata adapter.
</div>
<div class="ref"><strong>Code:</strong> mcp-server/index.ts <code>/intelligence/profiler_index</code> + <code>/intelligence/ticker_quotes</code> · entity.ts <code>lookupTickerLite()</code> · <code>fetchStooqQuote()</code> · <strong>UI:</strong> /profiler · <strong>Data sources:</strong> SEC company_tickers.json (in-memory index) + Stooq CSV API + curated parent-link map</div>
</div>
<div class="chapter">
<div class="num">Chapter 7</div>
<h2>Key architectural choices — what was picked and why</h2>
<div class="lede">Each choice is documented in <code>docs/DECISIONS.md</code> (Architecture Decision Records). If you dispute any of these, the ADR names the alternatives we rejected and the measurement that drove the call.</div>
<div class="card">
@ -314,62 +386,95 @@ pre{background:#161b22;border:1px solid #171d27;border-radius:8px;padding:14px 1
<div class="row accent-r">
<div style="flex:1"><div class="title">ADR-020 · Idempotent register() with schema-fingerprint gate</div><div class="meta">Same (name, fingerprint) reuses manifest. Different fingerprint = 409 Conflict. Prevents silent duplicate manifests. Cleanup run collapsed 374 → 31 datasets.</div></div>
</div>
<div class="row accent-r">
<div style="flex:1"><div class="title">ADR-021 · Semantic-correctness matrix layer</div><div class="meta">Pathway memory carries semantic flags (UnitMismatch, TypeConfusion, OffByOne, StaleReference, DeadCode, BoundaryViolation, …) on every trace. New reviews see prior bug fingerprints as a preamble; recurrent classes get caught on first read. Compounds across files in the same crate.</div></div>
</div>
<div class="row accent-l">
<div style="flex:1"><div class="title">Phase 19 design note · Statistical + semantic, not neural</div><div class="meta">Meta-index is cosine similarity + endorsement aggregation. No model training. Rebuildable from <code>successful_playbooks</code> alone. Neural re-ranker deferred to Phase 20+ only if statistical floor plateaus.</div></div>
</div>
<div class="row accent-l">
<div style="flex:1"><div class="title">Distillation freeze · v1.0.0 at e7636f2</div><div class="meta">145 tests · 22/22 acceptance · 16/16 audit-full · bit-identical reproducibility. Multi-layer contamination firewall on SFT exports. Substrate the auditor + mode runner sit on; "the system regressed" questions bisect against this anchor.</div></div>
</div>
</div>
</div>
<div class="chapter">
<div class="num">Chapter 6</div>
<div class="num">Chapter 8</div>
<h2>Measured at scale, on this machine</h2>
<div class="lede">Hardware: i9 + 128GB RAM + Nvidia A4000 16GB VRAM. Numbers below are from <em>this</em> running instance. Refresh the page and they'll recompute.</div>
<div class="lede">Hardware: i9 + 128GB RAM + Nvidia A4000 16GB VRAM + 2.5GB symmetric. Numbers below are from <em>this</em> running instance. Refresh the page and they'll recompute.</div>
<div class="grid" id="ch6-scale"><div class="loading">Loading scale data…</div></div>
<div id="ch6-recall" style="margin-top:10px"></div>
</div>
<div class="chapter">
<div class="num">Chapter 7</div>
<div class="num">Chapter 9</div>
<h2>Verify or dispute — reproduce it yourself</h2>
<div class="lede">Every claim below is a curl away from falsification.</div>
<div class="lede">Every claim above is a curl away from falsification.</div>
<div class="card">
<div class="narr"><strong>Health.</strong> Should return <code>lakehouse ok</code>.</div>
<pre>curl http://localhost:3100/health</pre>
<div class="narr"><strong>Gateway health.</strong> Returns provider matrix + worker count.</div>
<pre>curl -s http://localhost:3100/v1/health | jq</pre>
<div class="narr"><strong>Any SQL on multi-million-row Parquet.</strong> Sub-100ms typical.</div>
<pre>curl -s -X POST http://localhost:3100/query/sql \
-H 'Content-Type: application/json' \
-d '{"sql":"SELECT role, COUNT(*) FROM workers_500k WHERE state=\"IL\" GROUP BY role LIMIT 5"}'</pre>
<div class="narr"><strong>Hybrid search with playbook boost.</strong> The whole Phase 19 feedback loop in one request.</div>
<div class="narr"><strong>Hybrid search with playbook boost.</strong> SQL filter + vector rerank + playbook memory in one call.</div>
<pre>curl -s -X POST http://localhost:3100/vectors/hybrid \
-H 'Content-Type: application/json' \
-d '{"index_name":"workers_500k_v1",
"sql_filter":"role = '\''Forklift Operator'\'' AND city = '\''Chicago'\'' AND CAST(availability AS DOUBLE) > 0.5",
"question":"reliable forklift operator",
"top_k":5,"use_playbook_memory":true,"playbook_memory_k":200}'</pre>
<div class="narr"><strong>Playbook memory stats.</strong> Count + endorsed names + sample.</div>
<pre>curl http://localhost:3100/vectors/playbook_memory/stats</pre>
<div class="narr"><strong>Pattern discovery.</strong> What do past similar fills have in common?</div>
<pre>curl -s -X POST http://localhost:3100/vectors/playbook_memory/patterns \
<div class="narr"><strong>Pathway memory stats.</strong> System-level hot-swap signal — should show 88 traces / 11 replays / 100% reuse rate (probation gate crossed).</div>
<pre>curl -s http://localhost:3100/vectors/pathway/stats | jq</pre>
<div class="narr"><strong>Per-staffer scoping.</strong> Same query, different rosters per coordinator.</div>
<pre>for s in maria devon aisha; do
curl -s -X POST http://localhost:3700/intelligence/chat \
-H 'Content-Type: application/json' \
-d "{\"message\":\"forklift operators\",\"staffer_id\":\"$s\"}" \
| jq -r ".staffer.name + \": \" + (.sql_results | length | tostring) + \" workers, top: \" + (.sql_results[0].name + \" in \" + .sql_results[0].city + \", \" + .sql_results[0].state)"
done
# Maria: 167 workers, top: ... in Chicago, IL
# Devon: 89 workers, top: ... in Fort Wayne, IN
# Aisha: 16 workers, top: ... in Milwaukee, WI</pre>
<div class="narr"><strong>Late-worker triage in one shot.</strong> Pulls profile + 5 backfills + drafts SMS. Should respond in under 300ms.</div>
<pre>curl -s -X POST http://localhost:3700/intelligence/chat \
-H 'Content-Type: application/json' \
-d '{"query":"Forklift Operator in Chicago, IL","top_k_playbooks":25,"min_trait_frequency":0.3}'</pre>
<div class="narr"><strong>Run the dual-agent scenario yourself.</strong> All 5 events, real fills, real artifacts.</div>
-d '{"message":"Marcus running late site 4422"}' | jq</pre>
<div class="narr"><strong>Construction Activity Signal Engine.</strong> Profiler index with attribution, cost, last filed.</div>
<pre>curl -s -X POST http://localhost:3700/intelligence/profiler_index \
-H 'Content-Type: application/json' \
-d '{"limit":10}' \
| jq '.contractors[] | {name, permits, total_cost, direct: (.tickers.direct | map(.ticker)), associated: (.tickers.associated | map(.ticker + " ←via " + .partner_name))}'</pre>
<div class="narr"><strong>Live ticker quotes.</strong> Batch Stooq pull for the basket.</div>
<pre>curl -s -X POST http://localhost:3700/intelligence/ticker_quotes \
-H 'Content-Type: application/json' \
-d '{"tickers":["TGT","JPM","BALY","WBA","MCD"]}' | jq .quotes</pre>
<div class="narr"><strong>Audit trail — read any verdict on PR #11.</strong> Independent claim-vs-diff verifier output.</div>
<pre>ls /home/profit/lakehouse/data/_auditor/kimi_verdicts/
# 11-c3c9c2174a91.json 11-ca7375ea2b17.json 11-2d9cb128bf42.json …
jq '.findings[0:3]' /home/profit/lakehouse/data/_auditor/kimi_verdicts/11-c3c9c2174a91.json</pre>
<div class="narr"><strong>Distillation acceptance gate.</strong> 22/22 invariants must pass for any commit that touches the substrate.</div>
<pre>cd /home/profit/lakehouse
bun run tests/multi-agent/scenario.ts
# Output: tests/multi-agent/playbooks/scenario-&lt;timestamp&gt;/report.md</pre>
bun test auditor/schemas/distillation/ tests/distillation/
# Expect: 145 pass · 0 fail · 372 expect() calls</pre>
</div>
</div>
<div class="chapter">
<div class="num">Chapter 8</div>
<div class="num">Chapter 10</div>
<h2>What we are <em>not</em> claiming</h2>
<div class="lede">Every impressive-sounding number comes with a footnote. Here are the honest limits.</div>
<div class="lede">Every impressive-sounding number comes with a footnote. Here are the honest limits as of 2026-04-27.</div>
<div class="card">
<div class="row accent-a"><div style="flex:1"><div class="title">workers_500k is synthetic.</div><div class="meta">Real client ATS export replaces this table. Schema is deliberately identical to a production ATS.</div></div></div>
<div class="row accent-a"><div style="flex:1"><div class="title">candidates table has 1,000 rows.</div><div class="meta">Intentionally small for demo. call_log references higher candidate_ids that don't cross-reference — this is a dataset alignment issue, not a pipeline issue.</div></div></div>
<div class="row accent-b"><div style="flex:1"><div class="title">Chicago permit data is real.</div><div class="meta">Pulled live from data.cityofchicago.org/resource/ydr8-5enu.json (Socrata API). Not synthetic. Not cached.</div></div></div>
<div class="row accent-l"><div style="flex:1"><div class="title">Playbook memory is seeded from demo runs.</div><div class="meta">The pipeline that seeds it is identical to what a live recruiter would trigger via /log. Same code path.</div></div></div>
<div class="row accent-w"><div style="flex:1"><div class="title">Local 7B models (mistral, qwen2.5) are imperfect.</div><div class="meta">They occasionally malform tool calls or drop fields. Multi-agent scenarios seal roughly 40-80% in one run. Larger models or constrained decoding would improve this. Not a substrate problem.</div></div></div>
<div class="row accent-a"><div style="flex:1"><div class="title">workers_500k is synthetic.</div><div class="meta">Real client ATS export replaces this table. Schema is deliberately identical to a production ATS so the swap is config, not code.</div></div></div>
<div class="row accent-a"><div style="flex:1"><div class="title">candidates table is light at 1,000 rows.</div><div class="meta">Intentionally small. Live PII-safe view layer is built; replacing the small table with a 100K+ ATS is a one-line config flip.</div></div></div>
<div class="row accent-b"><div style="flex:1"><div class="title">Chicago permit data is real.</div><div class="meta">Pulled live from data.cityofchicago.org/resource/ydr8-5enu.json (Socrata). Not synthetic. Not cached. Verifiable address-by-address.</div></div></div>
<div class="row accent-l"><div style="flex:1"><div class="title">Playbook memory is seeded from demo runs.</div><div class="meta">Same code path that seeds in production: every /log from the recruiter UI triggers seed → persist_sql. Demo seeds use the same shape as live operations.</div></div></div>
<div class="row accent-l"><div style="flex:1"><div class="title">Pathway memory probation gate is crossed.</div><div class="meta">88 traces, 11 replays, 11 successful, 100% reuse rate. Any pathway that fails to clear ≥0.80 success_rate after ≥3 replays gets retired automatically (sticky flag prevents oscillation).</div></div></div>
<div class="row accent-w"><div style="flex:1"><div class="title">SEC name-to-ticker fuzzy matcher has rare false positives.</div><div class="meta">For names with no clean SEC match the matcher occasionally surfaces a same-keyword small-cap (saw FLG attach to a PNC-adjacent contractor once). Kept conservative — minimum 2 non-stopword overlap. Tightenable to require explicit allow-list for production trading use.</div></div></div>
<div class="row accent-r"><div style="flex:1"><div class="title">12 awaiting public-data sources are placeholders.</div><div class="meta">DOL Wage &amp; Hour, EPA ECHO, MSHA, BBB, PACER, UCC liens, D&amp;B, etc. — listed by name on every contractor profile with a one-line "would show:" sample. Not yet wired. Each ships as a Socrata-style adapter; engineering scope is concrete.</div></div></div>
<div class="row accent-r"><div style="flex:1"><div class="title">No rate/margin awareness yet.</div><div class="meta">Worker pay expectations vs contract bill rates are not modeled. Flagged as a Phase 20 item; no architectural blocker.</div></div></div>
<div class="row accent-r"><div style="flex:1"><div class="title">BAI is a thesis, not a backtested signal.</div><div class="meta">The Building Activity Index is computed live from current attribution + day-change. To have a backtestable thesis we need the daily series saved over months. Architectural support is there (data/_kb/audit_baselines.jsonl pattern); just hasn't been running long enough.</div></div></div>
<div class="row accent-r"><div style="flex:1"><div class="title">Single-metro today.</div><div class="meta">Chicago via Socrata. NYC DOB, LA County, Houston BCD, Boston ISD, DC DCRA all use Socrata-equivalent APIs — adapters are config-only. Each new metro multiplies the network without multiplying the codebase.</div></div></div>
</div>
</div>
@ -394,8 +499,72 @@ function apiPost(path, body){
window.addEventListener('load',function(){
loadLiveSections();
loadPathwayLive();
loadSignalLive();
});
// Pathway memory live counters in Chapter 4 — small inline spans.
function loadPathwayLive(){
fetch(A+'/api/vectors/pathway/stats').then(function(r){return r.json()}).then(function(p){
if(!p) return;
var t=document.getElementById('pwm-traces');
var r=document.getElementById('pwm-replays');
var rate=document.getElementById('pwm-rate');
if(t) t.textContent = (p.total_pathways||0) + ' traces';
if(r) r.textContent = (p.successful_replays||0) + '/' + (p.total_replays||0);
if(rate) rate.textContent = Math.round((p.replay_success_rate||0)*100) + '%';
}).catch(function(){});
}
// Live tile under Chapter 1 — what the signal engine sees in this view.
function loadSignalLive(){
apiPost('/intelligence/profiler_index',{limit:200}).then(function(d){
var host=document.getElementById('ch1-live');if(!host) return;
host.textContent='';
var rows=d.contractors||[];
if(!rows.length) return;
// Aggregate basket
var byTk={};
rows.forEach(function(r){
var ts=(r.tickers&&r.tickers.direct?r.tickers.direct:[]).concat(r.tickers&&r.tickers.associated?r.tickers.associated:[]);
ts.forEach(function(t){
if(!t||!t.ticker) return;
if(!byTk[t.ticker]) byTk[t.ticker]={kinds:[],count:0};
byTk[t.ticker].count++;
if(byTk[t.ticker].kinds.indexOf(t.via)<0) byTk[t.ticker].kinds.push(t.via);
});
});
var basket=Object.values(byTk);
var attribCost=rows.reduce(function(s,r){
var ts=(r.tickers&&r.tickers.direct?r.tickers.direct:[]).concat(r.tickers&&r.tickers.associated?r.tickers.associated:[]);
return s + (ts.length>0 ? (r.total_cost||0) : 0);
},0);
if(!basket.length) return;
var card=el('div','card accent-l');
var hdr=el('div',null,'LIVE — Construction Activity Signal Engine');
hdr.style.cssText='font-size:10px;color:#3fb950;text-transform:uppercase;letter-spacing:1.4px;font-weight:700;margin-bottom:8px';
card.appendChild(hdr);
var line=document.createElement('div');
line.style.cssText='display:flex;gap:24px;flex-wrap:wrap;font-size:13px';
function block(num,lab){
var b=document.createElement('div');
var n=document.createElement('div');n.style.cssText='font-size:18px;font-weight:700;color:#e6edf3;font-family:ui-monospace,monospace';n.textContent=num;
var l=document.createElement('div');l.style.cssText='font-size:10px;color:#545d68;text-transform:uppercase;letter-spacing:1.2px;font-weight:600';l.textContent=lab;
b.appendChild(n);b.appendChild(l);return b;
}
var bav = attribCost>=1e9?'$'+(attribCost/1e9).toFixed(2)+'B':attribCost>=1e6?'$'+(attribCost/1e6).toFixed(0)+'M':'$'+Math.round(attribCost/1e3)+'K';
line.appendChild(block(basket.length+'', 'Public issuers in scope'));
line.appendChild(block(bav, 'Attributed build value'));
line.appendChild(block(rows.length+'', 'Contractors indexed'));
line.appendChild(block(basket.reduce(function(s,b){return s+b.count},0)+'', 'Attribution edges'));
card.appendChild(line);
var note=el('div',null,'Computed live from /intelligence/profiler_index in '+(d.duration_ms||0)+'ms · click any of the chapter-9 curl lines to verify');
note.style.cssText='font-size:11px;color:#545d68;margin-top:10px;font-family:ui-monospace,monospace';
card.appendChild(note);
host.appendChild(card);
}).catch(function(){});
}
function loadLiveSections(){
apiPost('/proof.json',{}).then(function(r){
var host1=document.getElementById('ch1-tests');host1.textContent='';