Merge pull request 'Post-PR-#11 polish: demo UI, staffer console, face pool, icons, contractor profile (24 commits)' (#12 ) from demo/post-pr11-polish-2026-04-28 into main

sanitize: drop over-broad path-missing branch + UTF-8-safe redaction
Re-scrum of yesterday's sanitizer fix surfaced 2 more real bugs in the fix itself (opus, both WARN, neither caught by kimi/qwen): W1 (service.rs:1949) — `mentions_path_missing` standalone branch was too aggressive. A registry-internal error like "/root/.cargo/.../x.rs: no such file or directory" would 404 because it triggers without dataset context. That's a real 500. Dropped the standalone branch; require dataset context AND missing-shape phrase. Lance's actual "Dataset at path X was not found" still satisfies it. W2 (service.rs:2018) — `out.push(bytes[i] as char)` corrupted multi-byte UTF-8 by casting raw bytes to char (only sound for ASCII < 128). A path containing user-supplied non-ASCII names produced Latin-1 mojibake. Rewrote redact_paths to track byte indices and emit unmatched runs as &str slices via push_str(&s[range]) — preserves multi-byte sequences verbatim. Step advance is now per-char, not per-byte, via small utf8_char_len helper. Two new regression tests: - is_not_found_does_not_match_unrelated_path_missing - redact_preserves_multibyte_utf8 (uses 工作 + café in input) 12/12 sanitize tests PASS. Smoke 10/10 PASS. Loop closure for opus re-scrum on the 2026-05-02 fix bundle. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-03 05:16:15 +00:00 · 2026-05-03 00:15:23 -05:00 · 2026-05-03 00:11:59 -05:00 · 2026-05-02 23:34:54 -05:00 · 2026-05-02 22:40:03 -05:00 · 2026-05-03 03:39:52 +00:00
1026 changed files with 154775 additions and 1520 deletions
--- a/.archon/workflows/lakehouse-architect-review.yaml
+++ b/.archon/workflows/lakehouse-architect-review.yaml
@ -0,0 +1,42 @@
+# Real Archon workflow on the Lakehouse repo, fully via our gateway.
+# Three Pi nodes, each fires LLM → /v1/chat/completions → OpenRouter,
+# every call lands a Langfuse trace + observer event.
+#
+# Read-only (allowed_tools: [read]). Don't pass --branch / leave
+# --no-worktree at runtime so Archon doesn't try to create a worktree.
+name: lakehouse-architect-review
+description: 'Pi reviews Lakehouse architecture in 3 turns through our gateway.'
+provider: pi
+model: openrouter/x-ai/grok-4.1-fast
+
+nodes:
+  - id: shape
+    prompt: |
+      Read these files and answer in 3 short bullets describing the
+      architectural shape of Lakehouse:
+      - /home/profit/lakehouse/Cargo.toml
+      - /home/profit/lakehouse/lakehouse.toml
+      - /home/profit/lakehouse/docs/MODE_RUNNER_TUNING_PLAN.md
+      Be terse. No preamble.
+    allowed_tools: ["read"]
+    effort: low
+    idle_timeout: 90000
+
+  - id: weakness
+    prompt: |
+      Read /home/profit/lakehouse/crates/gateway/src/v1/mod.rs and
+      identify ONE real weakness or risk. Cite file:line. One paragraph.
+    allowed_tools: ["read"]
+    effort: low
+    idle_timeout: 90000
+    depends_on: [shape]
+
+  - id: improvement
+    prompt: |
+      Based on the prior weakness ($weakness.output), propose ONE
+      surgical improvement (≤6 lines of Rust). Show the patch as
+      `old_string` and `new_string` in markdown code blocks.
+    allowed_tools: []
+    effort: low
+    idle_timeout: 90000
+    depends_on: [weakness]
--- a/.gitignore
+++ b/.gitignore
@ -4,3 +4,49 @@
 .env
 __pycache__/
 *.pyc
+
+# Headshot pool — binary face JPGs are fetched by scripts/staffing/fetch_face_pool.py
+# (synthetic StyleGAN, ~580MB for 1000 faces). Manifest + fetch script are tracked.
+data/headshots/face_*.jpg
+data/headshots/_thumbs/
+# ComfyUI on-demand generated portraits (per-worker unique). Cached on first
+# request; fully regeneratable via /headshots/generate/:key.
+data/headshots_gen/
+
+# Runtime data — all regeneratable from inputs or accumulated by daemons.
+# Anything under data/_<name>/ is internal state (auditor outputs, KB caches,
+# pathway memory snapshots, HNSW trial results, etc.). Anything under
+# data/datasets/ or data/vectors/ is generated by ingest/index pipelines.
+data/_*/
+data/lance/
+data/datasets/
+data/vectors/
+data/demo/
+data/evidence/
+data/face_test/
+data/headshots_role_pool/
+data/icons_pool/
+data/scored-runs/
+data/workspaces/
+data/catalog/
+data/**/*.bak-*
+data/**/*.pre-*-bak
+
+# Logs
+logs/
+
+# Build artifacts
+node_modules/
+exports/
+mcp-server/data/
+
+# Per-run distillation reports (timestamp-named); keep the parent dir tracked
+# via .gitkeep if needed but don't carry every batch's report set.
+reports/distillation/[0-9]*/
+reports/distillation/*-*-*-*-*/
+
+# Test scratch — scratchpads, traces, sessions are regenerated each run.
+# PRD/scenario fixtures stay tracked (they ARE the test).
+tests/agent_test/_*
+tests/agent_test/sessions/
+tests/real-world/runs/
--- a/.mcp.json
+++ b/.mcp.json
@ -7,6 +7,14 @@
        "LAKEHOUSE_URL": "http://localhost:3100",
        "MCP_TRANSPORT": "stdio"
      }
+    },
+    "gitea": {
+      "command": "bunx",
+      "args": ["gitea-mcp"],
+      "env": {
+        "GITEA_HOST": "https://git.agentview.dev",
+        "GITEA_ACCESS_TOKEN": "SET_ME_FROM_GITEA_UI_USER_SETTINGS_APPLICATIONS"
+      }
    }
  }
 }
--- a/Cargo.lock
+++ b/Cargo.lock
@ -46,7 +46,9 @@ dependencies = [
 name = "aibridge"
 version = "0.1.0"
 dependencies = [
+ "async-trait",
 "axum",
+ "lru",
 "reqwest",
 "serde",
 "serde_json",
@ -4085,11 +4087,14 @@ dependencies = [
 "shared",
 "storaged",
 "tokio",
+ "toml",
 "tonic",
 "tower-http",
 "tracing",
 "tracing-opentelemetry",
 "tracing-subscriber",
+ "truth",
+ "validator",
 "vectord",
 ]

@ -4678,6 +4683,7 @@ dependencies = [
 "chrono",
 "croner",
 "csv",
+ "journald",
 "lopdf",
 "mysql_async",
 "object_store",
@ -6895,6 +6901,7 @@ dependencies = [
 "storaged",
 "tokio",
 "tracing",
+ "truth",
 "url",
 ]

@ -8726,6 +8733,17 @@ dependencies = [
 "wasm-bindgen",
 ]

+[[package]]
+name = "truth"
+version = "0.1.0"
+dependencies = [
+ "serde",
+ "serde_json",
+ "tokio",
+ "toml",
+ "tracing",
+]
+
 [[package]]
 name = "try-lock"
 version = "0.2.5"
@ -8892,6 +8910,19 @@ dependencies = [
 "wasm-bindgen",
 ]

+[[package]]
+name = "validator"
+version = "0.1.0"
+dependencies = [
+ "arrow 55.2.0",
+ "parquet 55.2.0",
+ "serde",
+ "serde_json",
+ "thiserror 2.0.18",
+ "tokio",
+ "tracing",
+]
+
 [[package]]
 name = "valuable"
 version = "0.1.1"
@ -8919,6 +8950,7 @@ dependencies = [
 "object_store",
 "parquet 55.2.0",
 "queryd",
+ "reqwest",
 "serde",
 "serde_json",
 "sha2",
--- a/Cargo.toml
+++ b/Cargo.toml
@ -14,6 +14,8 @@ members = [
    "crates/ui",
    "crates/lance-bench",
    "crates/vectord-lance",
+    "crates/truth",
+    "crates/validator",
 ]

 [workspace.dependencies]
--- a/STATE_OF_PLAY.md
+++ b/STATE_OF_PLAY.md
@ -0,0 +1,269 @@
+# STATE OF PLAY — Lakehouse
+
+**Last verified:** 2026-05-02 evening CDT
+**Verified by:** live probe (smoke 9/9, parity 32/32, gateway restarted), not memory.
+
+> **Read this FIRST.** When the user says "we're working on lakehouse," they mean the working code captured below — NOT what `git log` framed as "the cutover" or what memory snapshots from 2 days ago suggest. If memory contradicts this file, this file wins. Update it when something is verified working — not when a phase finishes.
+
+---
+
+## WHAT LANDED 2026-05-01 → 2026-05-02 (10 commits this wave)
+
+| Commit | What | Verified |
+|---|---|---|
+| `5d30b3d` | lance: auto-build doc_id btree in `lance_migrate` handler | doc-fetch ~5ms (was ~100ms full scan) on scale_test_10m |
+| `044650a` | lance-bench: same scalar build post-IVF (matches gateway) | cargo check clean |
+| `7594725` | lance: 4-pack — `sanitize_lance_err` + 7 unit tests + 9-probe smoke + 10M re-bench | smoke 9/9 PASS, tests 7/7 PASS |
+| `98b6647` | gateway: `IterateResponse.trace_id` echoed; session_log_path enabled | parity probes see one unified JSONL |
+| `57bde63` | gateway: trace-id propagation + coordinator session JSONL (Rust parity with Go wave) | session_log_parity 4/4 |
+| `ba928b1` | aibridge: drop Python sidecar from hot path; AiClient → direct Ollama | aibridge tests 32/32 PASS, /ai/embed live 768d |
+| `654797a` | gateway: pub `extract_json` + `parity_extract_json` bin | extract_json_parity 12/12 |
+| `c5654d4` | docs: pointer to `golangLAKEHOUSE/docs/ARCHITECTURE_COMPARISON.md` | — |
+| `150cc3b` | aibridge: LRU embed cache, 236× RPS warm (78ms → 129us p50) | load test |
+| `9eed982` | mcp-server: /_go/* pass-through for G5 cutover slice | — |
+| `6e34ef7` | gitignore: stop tracking 100+ runtime ephemera (data/_*, lance, logs, node_modules) | untracked dropped 100+ → 0 |
+| `41b0a99` | chore: add 33 real items that were sitting untracked (scripts, scenarios, kimi reports, dev UIs) | clean working tree |
+
+**Cross-runtime parity (post-this-wave):** 32/32 across 5 probes — `validator(6/6) + extract_json(12/12) + session_log(4/4) + materializer(2/2) + embed(8/8)`. Run `cd /home/profit/golangLAKEHOUSE && for p in scripts/cutover/parity/*.sh; do bash "$p"; done` to re-verify.
+
+**Lance backend (was untested 5 days ago, now gauntlet-ready):**
+- `cargo test -p vectord-lance --release` → 7/7 PASS
+- `./scripts/lance_smoke.sh` → 9/9 PASS against live gateway
+- `reports/lance_10m_rebench_2026-05-02.md` — search warm ~20ms / cold ~46ms median, doc-fetch ~5ms post-btree
+
+---
+
+## VERIFIED WORKING RIGHT NOW
+
+### The client demo (Staffing Co-Pilot)
+
+**Public URL:** `https://devop.live/lakehouse/` — 200, "Staffing Co-Pilot" (159 KB SPA, leaflet maps, dark theme).
+**Local URL:** `http://localhost:3700/` — same page, served by `mcp-server/index.ts` (PID 1271, started 09:48 CDT today).
+
+**The staffers console** (the one the client was thoroughly impressed with):
+- `https://devop.live/lakehouse/console` — 200, "Lakehouse — What Your Staffing System Would Do" (26 KB)
+- Pulls project index via `/api/catalog/datasets` (36 datasets) + playbook memory via `/api/vectors/playbook_memory/stats` (4,701 entries with embeddings, real ops like *"fill: Maintenance Tech x2 in Milwaukee, WI"*)
+
+Client-visible flow that works end-to-end on the public URL:
+
+| Endpoint | Sample output |
+|---|---|
+| `GET /api/catalog/datasets` | 36 datasets indexed: timesheets 1M, call_log 800K, workers_500k 500K, email_log 500K, workers_100k 100K, candidates 100K, placements 50K, job_orders 15K, successful_playbooks_live 2,077 |
+| `GET /api/vectors/playbook_memory/stats` | 4,701 fill operations with embeddings |
+| `GET /system/summary` | 36 datasets, 2.98M rows, 60 indexes, 500K workers loaded, 1K candidates |
+| `POST /intelligence/staffing_forecast` | 744 Production Workers needed in 30d, 11,281 bench (4,687 reliable), coverage 1,444%, risk=ok. Same for Electrician (need 32, bench 2,440) and Maintenance Tech (need 17, bench 5,004). |
+| `POST /intelligence/permit_contracts` | permit `3442956` $500K → 3 Production Workers, 886-candidate pool, 95% fill, $36K gross. 5 more Chicago permits with 8 workers each, same pool, 95% fill, $96K each. |
+| `POST /intelligence/market` | major Chicago permits ranked: $730M O'Hare, $615M 307 N Michigan, $580M casino, $445M Loop transit (real geo coords). |
+| `POST /intelligence/permit_entities` | architects + contractors from permit contacts (e.g. "KACPRZYNSKI, ANDY", "SLS ELECTRICAL SERVICE"). |
+| `POST /intelligence/activity` + `/intelligence/arch_signals` + `/intelligence/chat` | all 200 |
+
+The demo tells the story: *"upcoming Chicago contracts → workers needed → coverage from the bench → architects/contractors involved → revenue and margin."* That's the "live data + anticipating contracts + complete workflow" pitch — working as of right now.
+
+### Backend, verified live this session
+
+| Surface | State |
+|---|---|
+| Gateway `:3100` | up, 4 providers configured, `/v1/health` 200 with 500K workers loaded |
+| MCP server `:3700` (Co-Pilot demo) | up, all `/intelligence/*` endpoints respond |
+| VCP UI `:3950` | started this session, `/data/*` 200, real numbers |
+| Observer `:3800` | ring full (2,000/2,000) — older events evicted, query Langfuse for 24h-ago state |
+| Sidecar `:3200` | up |
+| Langfuse `:3001` | recording, `gw:/log` + `v1.chat:openrouter` traces visible |
+| LLM Team UI `:5000` | up, only `extract` mode registered |
+| OpenCode fleet | **40 models reachable through one `sk-*` key** (verified live `GET https://opencode.ai/zen/v1/models`) |
+
+OpenCode catalog (live):
+- Claude: opus-4-7, opus-4-6, opus-4-5, opus-4-1, sonnet-4-6, sonnet-4-5, sonnet-4, haiku-4-5
+- GPT-5: 5.5-pro, 5.5, 5.4-pro, 5.4, 5.4-mini, 5.4-nano, 5.3-codex-spark, 5.3-codex, 5.2, 5.2-codex, 5.1-codex-max, 5.1-codex, 5.1-codex-mini, 5.1, 5-codex, 5-nano, 5
+- Gemini: 3.1-pro, 3-flash
+- GLM: 5.1, 5
+- Minimax: m2.7, m2.5
+- Kimi: k2.6, k2.5
+- Qwen: 3.6-plus, 3.5-plus
+- Other: BIG-PKL (was a typo-prone name in the catalog, model id starts with "big-pkl-something")
+- Free tier: minimax-m2.5-free, hy3-preview-free, ling-2.6-flash-free, trinity-large-preview-free
+
+### The substrate (frozen — do not re-architect)
+
+- Distillation v1.0.0 at tag `e7636f2` — **145/145 bun tests pass, 22/22 acceptance, 16/16 audit-full**
+- Output: `data/_kb/distilled_{facts,procedures,config_hints}.jsonl` + `data/vectors/distilled_{factual,procedural,config_hint}_v20260423102847.parquet`
+- Auditor cross-lineage: Kimi K2.6 ↔ Haiku 4.5 alternation, Opus auto-promote on diffs >100k chars, **per-PR cap=3 with auto-reset on new head SHA**
+- Pathway memory: 88 traces, 11/11 successful replays (probation gate crossed)
+- Mode runner: 5 native modes; `codereview_isolation` is default; composed-corpus auto-downgrade verified Apr 26 (composed lost 5/5 vs isolation, p=0.031)
+
+### Matrix indexer
+
+30+ live corpora including:
+- 5 versions of `workers_500k_v1..v9` (50K embedded chunks each)
+- 11 batched 2K-row shards `w500k_b3..b17`
+- `chicago_permits_v1` (3,420), `resumes_100k_v2` (100K candidates), `ethereal_workers_v1` (10K)
+- `lakehouse_arch_v1` (2,119), `lakehouse_symbols_v1` (2,470), `lakehouse_answers_v1` (1,269), `scrum_findings_v1` (1,260)
+- `kb_team_runs_v1` (12,693) + `kb_team_runs_agent` (4,407) — LLM-team play history embedded
+- `distilled_factual_v20260423102507` (8) — distillation output
+
+### Code health
+
+- `cargo check --workspace` → **0 warnings, 0 errors**
+- `bun test auditor + tests/distillation` → **145/145 pass**
+- `ui/server.ts` + `auditor.ts` bundle clean
+
+---
+
+## DO NOT RELITIGATE
+
+- **PR #11 is merged into `origin/main` as `ed57eda`** — do not "still need to merge PR #11."
+- **Distillation tag `distillation-v1.0.0` at `e7636f2` is FROZEN** — do not re-architect schemas, scorer rules, audit fixtures.
+- **Kimi forensic HOLD verdict (2026-04-27) was 2/8 false + 6/8 latent** — do not re-debate, see `reports/kimi/audit-last-week-full.md`.
+- **`candidates_safe` `vertical` column bug** — fixed at catalog metadata layer in commit `c3c9c21`. Do not "discover" it again.
+- **Decisions A/B/C/D from `synthetic-data-gap-report.md`** — all four scripts shipped today (`d56f08e`, `940737d`, `c3c9c21`). Do not "ask J for approval."
+- **`workers_500k.phone` type fixup** — already string. The fixup script is idempotent; running it is a no-op.
+- **`client_workerskjkk` typo dataset** — was breaking every SQL query (catalog had it registered, file didn't exist). Removed via `DELETE /catalog/datasets/by-name/client_workerskjkk` this session. Do not re-add. Adding a startup gate that errors on unrecognized parquet names is the long-term fix per now.md Step 2C.
+- **Python sidecar dropped from hot path 2026-05-02 (`ba928b1`)** — AiClient calls Ollama directly. Do not "wire python embedding back in." `lab_ui.py` + `pipeline_lab.py` keep running as dev-only UIs (not on the runtime path).
+- **Lance backend gauntlet (2026-05-02)** — sanitizer over all 5 routes, 7 unit tests, 9-probe smoke, 10M re-bench. The `doc_id` btree auto-builds inside `lance_migrate` AND `lance-bench`. Do not "discover" the missing scalar index again or the leaked filesystem paths in error bodies.
+- **Cross-runtime parity = 32/32** across 5 probes in `golangLAKEHOUSE/scripts/cutover/parity/`. Do not "build a parity probe for X" without checking — validator, extract_json, session_log, materializer, and embed are all already covered.
+- **Decisions tracker is `golangLAKEHOUSE/docs/ARCHITECTURE_COMPARISON.md`** — single living source of truth for cross-runtime decisions. As of 2026-05-02 it has 0 `_open_` code work items; only 2 strategic items left (Lance vs Parquet+HNSW-with-spilling, Go-vs-Rust primary cutover).
+
+---
+
+## FIXES MADE THIS SESSION (2026-04-27 evening)
+
+1. **`crates/gateway/src/v1/iterate.rs:93`** — `state` → `_state` (cleared the one cargo warning).
+2. **`lakehouse-ui.service` (Dioxus)** — disabled. Was failing 7,242 times against a missing `target/dx/ui/debug/web/public` build dir. `systemctl stop && disable`.
+3. **VCP UI on `:3950`** — started `bun run ui/server.ts` (PID 1162212, log `/tmp/lakehouse_ui.log`). `/data/*` endpoints now 200 with real data.
+4. **`client_workerskjkk` catalog entry** — `DELETE /catalog/datasets/by-name/client_workerskjkk` removed the dead manifest. **This was the actual root cause** of `/system/summary` reporting `workers_500k_rows: 0` and the demo showing zero bench. Every SQL query was failing schema inference on the missing file before reaching its target table. Fixed → `workers_500k_rows: 500000`, `candidates_rows: 1000`, demo coverage flipped from "critical 0%" to actual percentages on devop.live/lakehouse.
+
+## FIXES MADE THIS SESSION (2026-04-28 early — face pool)
+
+5. **Synthetic StyleGAN face pool — 1000 faces, gender+race+age tagged.** `scripts/staffing/fetch_face_pool.py` fetches from thispersondoesnotexist.com; `scripts/staffing/tag_face_pool.py --min-age 22` runs deepface and excludes minors. `data/headshots/manifest.jsonl` now has gender (494 men / 458 women), race (caucasian 662 · east_asian 128 · hispanic 86 · middle_eastern 59 · black 14 · south_asian 3), age, and 48 minor exclusions. Server pool = 952 servable faces.
+6. **`mcp-server/index.ts:1308` `/headshots/:key` route** — gender×race×age intersection bucketing with graceful fallback (gender-only → all). Same key always returns same face; different keys spread evenly.
+7. **`/headshots/_thumbs/` pre-resized 384×384 webp** (60× smaller: 587KB → ~11KB). Without this, 40-card grids overran Chrome's parallel-connection budget and ~75% of tiles never finished decoding. Generated via parallel ffmpeg (`xargs -P 8`); `.gitignore`d.
+8. **`mcp-server/search.html` + `console.html`** — dropped `img.loading='lazy'`. With 11KB thumbs, eager load is cheap (~500KB for 50 cards) and avoids the off-screen race that lazy decode produced.
+9. **ComfyUI on-demand uniqueness — `serve_imagegen.py:32`** added `seed` to `_cache_key()` (was caching by prompt only — 3 different worker seeds collapsed to 1 cached image). Verified: seed=839185194/195/196 → 3 distinct md5s.
+10. **`mcp-server/index.ts:1234` `/headshots/generate/:key`** — ComfyUI hot-path that derives a deterministic-per-worker seed via djb2-style hash; cold ~1.5s, cached ~1ms. Worker prompt format: `professional corporate headshot portrait of a {age}-year-old {race} {gender}, {role}, neutral expression, plain studio background, soft natural lighting, sharp focus, photorealistic, dslr`. Cache at `data/headshots_gen/` (gitignored, regeneratable).
+11. **Confidence-default name resolution** in `search.html` — `genderFor()` and `guessEthnicityFromFirstName()` lookup tables (FEMALE_NAMES, MALE_NAMES, NAMES_HISPANIC, NAMES_BLACK, NAMES_SOUTH_ASIAN, NAMES_EAST_ASIAN, NAMES_MIDDLE_EASTERN). Xavier → man+hispanic, Aisha → woman+black, etc. Every worker resolves to a face-pool bucket.
+
+End-to-end verified: playwright run on `https://devop.live/lakehouse/?q=forklift+operators+IL` → 21/21 cards loaded, 0 broken, all 384×384 webp thumbs.
+
+---
+
+## OPEN — but not blocking the demo
+
+| Item | What | When to act |
+|---|---|---|
+| `modes.toml` `staffing_inference.matrix_corpus` | still says `workers_500k_v8`. v9 in vector index is from Apr 17 (raw-sourced, not safe-view). The new `build_workers_v9.sh` rebuilds from `workers_safe`. | Run when you have 30+ min for the rebuild. |
+| Open PRs #6, #7, #10 | sitting since Apr 22-24, auditor verdicts on disk at `data/_auditor/kimi_verdicts/{6,7,10}-*.json` | Read verdicts, decide reconcile/close. |
+| `test/enrich-prd-pipeline` branch | 35 unmerged commits, includes more-evolved auditor/inference.ts (666 vs main's 580 lines), curation+fact-extractor wiring | Reconcile or formally archive — see `memory/project_unmerged_architecture_work.md`. |
+| `federation-hnsw-trials` stash | Lance + S3/MinIO prototype, `aws-config` crate added, 708 insertions | Phase B from EXECUTION_PLAN.md — revisit when Parquet vector ceiling actually hurts. |
+| `candidates` manifest drift | manifest 100K vs SQL 1K. Cosmetic. | Run a metadata resync if it matters. |
+
+---
+
+## RUNTIME CHEATSHEET
+
+```bash
+# Verify the demo (public + local both work)
+curl -sS https://devop.live/lakehouse/                   # Co-Pilot HTML
+curl -sS https://devop.live/lakehouse/console            # staffers console
+curl -sS -X POST https://devop.live/lakehouse/intelligence/staffing_forecast \
+  -d '{}' -H 'content-type: application/json' \
+  | jq '.forecast[] | {role, demand_workers, bench_total, coverage_pct, risk}'
+
+# Restart sequence (after Rust changes)
+sudo systemctl restart lakehouse.service                 # gateway :3100
+sudo systemctl restart lakehouse-auditor                 # auditor daemon
+sudo systemctl restart lakehouse-observer                # observer :3800
+# UI bun on :3950 is NOT systemd-managed (lakehouse-ui.service is disabled).
+# Restart manually:  kill <pid>; nohup bun run ui/server.ts > /tmp/lakehouse_ui.log 2>&1 &
+
+# Health checks
+curl -sS http://localhost:3100/v1/health | jq            # workers_count, providers
+curl -sS http://localhost:3100/vectors/pathway/stats | jq
+curl -sS http://localhost:3100/v1/usage | jq             # since-restart cost
+curl -sS http://localhost:3700/system/summary | jq       # dataset counts
+```
+
+---
+
+## VISION — what we're actually building (not what's done)
+
+J's framing for the legacy staffing company:
+
+- Pull live data, anticipate contracts based on Chicago permits → real architect/contractor associations, headcount, time period, money, scope.
+- Hybrid + memory index → search large corpora cheaply.
+- Email comes in → verify against contract; SMS comes in → alert when index changes.
+- Real-time.
+- Invent metrics nobody else has using the hybrid index.
+- Next stage: workers download an app → geolocation clock-in → automatic responsiveness measurement, no user effort, with incentives for using it.
+- Find people getting certificates (passive cert tracking).
+- Pull union data → bring contracts that work for **employees**, not just employers.
+- All metrics visible, nothing hidden, value-aligned with what each side actually needs.
+
+If a future session is shaving away from this vision toward "fix the cutover" or "land Phase X," the vision wins. Phases are scaffolding for the vision, not the goal.
+
+---
+
+## CURRENT PLAN — fix the demo for the legacy staffing client
+
+Built from playwright audit of the live demo (2026-04-27 evening). Each item ends in something the client can SEE, not internal cleanups.
+
+**Demo state is anchored by git tag `demo-2026-04-27`** (commit `ed57eda`, the merge of PR #11). To restore code state: `git checkout demo-2026-04-27`. To restore runtime state: `DELETE /catalog/datasets/by-name/client_workerskjkk` (catalog hot-fix is not in git).
+
+### P1 — Search box that actually filters (highest visible impact)
+
+**Problem:** typing in `#sq` and pressing Enter fires `POST /intelligence/chat` with body `{"message":"<query>"}`. The state (`#sst`) and role (`#srl`) selects are ignored — never sent in the body. So every search returns a generic chat completion, never a SQL+vector hybrid filter against `workers_500k`. That is the "cached/generic response" the client sees.
+
+**Fix:** in `mcp-server/search.html`, change the search-submit handler to call the real worker search endpoint with `{query, state, role, top_k}`. The MCP `search_workers` tool surface already exists; route the form there. Render returned worker rows in the existing card grid.
+
+**Done when:** typing "forklift" + state IL + role "Forklift Operator" returns ≤ top_k IL Forklift Operators, and changing state to WI returns different workers.
+
+### P2 — Contractor-name click → `/contractor` profile page
+
+**Problem:** clicking a contractor name in any rendered card stays on `/lakehouse/`. URL doesn't change.
+
+**Fix:** wrap contractor names in `<a href="/contractor?name=<encoded>">`. The page `mcp-server/contractor.html` (14.8 KB, "Contractor Profile · Staffing Co-Pilot") already exists at `/contractor` and the data endpoint `/intelligence/contractor_profile` already returns rich data.
+
+**Then check contractor.html actually shows:** full history of every record the database has on that contractor + heat map of locations underneath + relevant info (per J 2026-04-27). If the page is incomplete, finish it. Otherwise just wire the link.
+
+**Done when:** clicking "KACPRZYNSKI, ANDY" opens a profile with: every Chicago permit they're contact_1 or contact_2 on, a leaflet map with markers for each address, and any matched workers from prior placements at their sites.
+
+### P3 — Substrate signal at the bottom shows the right numbers
+
+**Problem:** J reports the bottom panel says "playbook memory empty, 80 traces 0 replies." Reality from the live endpoints: `/api/vectors/playbook_memory/stats` = 4,701 entries with embeddings; `/vectors/pathway/stats` = 88 traces, 11/11 replays.
+
+**Fix:** find the renderer in search.html that builds the substrate signal panel; verify it's hitting the right endpoints and reading the right keys; fix shape mismatches.
+
+**Done when:** bottom panel shows real numbers (4,701 playbooks, 88 traces, 11/11 replays) and references at least one specific recent operation from the playbook stats sample.
+
+### P4 — Top nav reflects today's architecture
+
+**Problem:** Walkthrough/Architecture/Spec/Onboard/Alerts/Workspaces tabs all return 200 but content is from old architecture. Doesn't mention: gateway scratchpad, memory indexer, ranker, mode runner, OpenCode 40-model fleet, distillation substrate, auditor cross-lineage.
+
+**Fix:** rewrite `mcp-server/proof.html` (or add a single new page "What's running" that replaces Architecture+Spec) to describe what's actually shipped as of `demo-2026-04-27`. Keep one architecture page, drop redundancy. Either complete or hide Onboard/Alerts/Workspaces — J's call which.
+
+**Done when:** the architecture page tells a non-technical reader, in 2 minutes, what each piece does in coordinator-relatable terms ("intern that read every email", not "3-stage adversarial inference pipeline").
+
+### P5 — Caching for the project-index build_signal (J flagged unfinished)
+
+**Problem:** "we never finished our caching for project index build signal it's not pulling new information." Need to find what `build_signal` refers to. Likely a scrum/auditor signal that should rebuild the `lakehouse_arch_v1` corpus on commit but isn't wired to.
+
+**Fix:** identify the build-signal pipeline (likely in `auditor/` or `crates/vectord/`), wire its emit to a corpus rebuild, verify by making a test commit and watching the new chunk appear in `/vectors/indexes` for `lakehouse_arch_v1`.
+
+**Done when:** committing a new file to `crates/` causes `lakehouse_arch_v1` chunk_count to increase within N minutes.
+
+### P0 — Anchor the demo state (DONE)
+
+Tagged `ed57eda` as `demo-2026-04-27`. Future sessions: `git checkout demo-2026-04-27` to land in this exact code state.
+
+---
+
+## EXECUTION ORDER
+
+1. **P1 first** — biggest visible bug, ~30-60 min
+2. **P2 next** — contractor click is the second-biggest "doesn't work" the client sees, ~20 min if profile is mostly done
+3. **P3** — small fix, big "looks alive" win
+4. **P4** — biggest scope; might split across sessions
+5. **P5** — feature work, only after the visible bugs are fixed
+
+Each item commits independently with the format `demo: P<n> — <one-line>` so the commit log doubles as a progress journal. After each merge to main, re-tag `demo-latest` to point at the new HEAD.
+
+Stop here and let J pick which item to start with. Do not silently extend scope.
--- a/auditor/audit.ts
+++ b/auditor/audit.ts
@ -23,6 +23,7 @@ import { runStaticCheck } from "./checks/static.ts";
 import { runDynamicCheck } from "./checks/dynamic.ts";
 import { runInferenceCheck } from "./checks/inference.ts";
 import { runKbCheck } from "./checks/kb_query.ts";
+import { runKimiArchitectCheck } from "./checks/kimi_architect.ts";

 const VERDICTS_DIR = "/home/profit/lakehouse/data/_auditor/verdicts";
 // Playbook for audit findings — one row per block/warn finding from a
@ -67,6 +68,29 @@ export async function auditPr(pr: PrSnapshot, opts: AuditOptions = {}): Promise<
    ...kbFindings,
  ];

+  // Kimi-architect second-pass review. Off by default; enabled with
+  // LH_AUDITOR_KIMI=1. Sequential (not in the parallel block above)
+  // because it consumes the prior findings as context — Kimi sees what
+  // deepseek already flagged and is asked "what did everyone miss?"
+  // Failure-isolated by design: any error returns a single info-level
+  // skip finding so the existing audit pipeline never blocks on Kimi.
+  if (process.env.LH_AUDITOR_KIMI === "1") {
+    try {
+      const kimiFindings = await runKimiArchitectCheck(diff, allFindings, {
+        pr_number: pr.number,
+        head_sha: pr.head_sha,
+      });
+      allFindings.push(...kimiFindings);
+    } catch (e) {
+      allFindings.push({
+        check: "kimi_architect",
+        severity: "info",
+        summary: `kimi_architect outer error — ${(e as Error).message.slice(0, 160)}`,
+        evidence: [(e as Error).stack?.slice(0, 360) ?? ""],
+      });
+    }
+  }
+
  const duration_ms = Date.now() - t0;
  const metrics = {
    audit_duration_ms: duration_ms,
@ -184,7 +208,7 @@ function formatReviewBody(v: Verdict): string {
  lines.push("");

  // Per-check sections, only if the check produced findings.
-  const checkOrder = ["static", "dynamic", "inference", "kb_query"] as const;
+  const checkOrder = ["static", "dynamic", "inference", "kb_query", "kimi_architect"] as const;
  for (const check of checkOrder) {
    const fs = byCheck[check] ?? [];
    if (fs.length === 0) continue;
@ -217,6 +241,6 @@ function formatReviewBody(v: Verdict): string {
  return lines.join("\n");
 }

-function stubFinding(check: "dynamic" | "inference", why: string): Finding[] {
+function stubFinding(check: "dynamic" | "inference" | "kimi_architect", why: string): Finding[] {
  return [{ check, severity: "info", summary: `${check} check skipped — ${why}`, evidence: [why] }];
 }
--- a/auditor/checks/inference.ts
+++ b/auditor/checks/inference.ts
@ -18,36 +18,37 @@ import { readFile, mkdir, appendFile } from "node:fs/promises";
 import { extractFacts } from "../fact_extractor.ts";

 const GATEWAY = process.env.LH_GATEWAY_URL ?? "http://localhost:3100";
-const MODEL = process.env.LH_AUDITOR_REVIEW_MODEL ?? "gpt-oss:120b";
-// Tie-breaker for claims where the N=3 consensus produces a 1-1-1
-// split (genuinely borderline). Different architecture from the
-// primary reviewer (gpt-oss) so the tie-break isn't correlated with
-// the original disagreement. qwen3-coder:480b is a newer coding
-// specialist at 480B params, well-suited to PR-diff claim verification
-// and distinct in training lineage from gpt-oss.
-const TIEBREAKER_MODEL = process.env.LH_AUDITOR_TIEBREAKER_MODEL ?? "qwen3-coder:480b";
+// Rebuild 2026-04-26: route claim verification through /v1/mode/execute
+// (task_class=pr_audit) so we get pathway memory + lakehouse_answers_v1
+// + JSON-shaped framing molded into ONE prompt. The hand-rolled
+// systemMsg/userMsg path was reinventing the mode runner badly.
+//
+// 2026-04-27 update: original default kimi-k2:1t hit a sustained
+// upstream outage on Ollama Cloud (consistent 500 ISE across hours of
+// retries — verified with trivial 8-token probes). Swapped default to
+// deepseek-v3.1:671b which is proven working end-to-end through the
+// pr_audit mode runner during Phase 5 distillation acceptance testing.
+// kimi-k2:1t can be re-selected via LH_AUDITOR_REVIEW_MODEL env when
+// the upstream returns. Tie-breaker stays grok-4.1-fast (different
+// vendor lineage so consensus + tie-break won't fail-correlate).
+const MODEL = process.env.LH_AUDITOR_REVIEW_MODEL ?? "deepseek-v3.1:671b";
+const TIEBREAKER_MODEL = process.env.LH_AUDITOR_TIEBREAKER_MODEL ?? "x-ai/grok-4.1-fast";
 const N_CONSENSUS = Number(process.env.LH_AUDITOR_CONSENSUS_N ?? 3);
 const AUDIT_DISCREPANCIES_JSONL = "/home/profit/lakehouse/data/_kb/audit_discrepancies.jsonl";
-// 40KB comfortably fits gpt-oss:120b's context. PR #1 (~39KB) was
-// previously truncated at 15KB causing the reviewer to miss later
-// files (gitea.ts, policy.ts) and flag "no Gitea client present" as a
-// block finding when the file was simply outside the truncation window.
-//
-// Above this threshold we curate via tree-split rather than truncate,
-// following the scrum_master pattern: shard the diff, summarize each
-// shard against the claim-verification task, merge into a compact
-// scratchpad, then ask the cloud to verify claims against the
-// scratchpad. This gives the cloud full-PR fidelity without bursting
-// its context window (observed failure mode: empty response or
-// unparseable output when prompt exceeds model's comfortable range).
+// 40KB comfortably fits the consensus models' context windows
+// (deepseek-v3.1 64K, gpt-oss-120b 128K). When the raw PR diff
+// exceeds this, we truncate and signal it via curationNote — the
+// pr_audit mode runner's matrix retrieval (lakehouse_answers_v1 +
+// arch + symbols) supplies the cross-PR context that tree-split
+// used to synthesize from scratch. Tree-split itself was retired
+// 2026-04-27 (see commit deleting treeSplitDiff/callCloud/SHARD_*).
 const MAX_DIFF_CHARS = 40000;
-// Tree-split kicks in above this. 30KB is below MAX_DIFF_CHARS so we
-// curate BEFORE truncation would happen — never lose signal to a hard
-// cut. Shard size is chosen so ~10 shards cover PR #8-size diffs in a
-// reasonable round-trip budget.
-const CURATION_THRESHOLD = 30000;
-const DIFF_SHARD_SIZE = 4500;
 const CALL_TIMEOUT_MS = 120_000;
+// Mode runner can take longer than a raw /v1/chat call because it does
+// pathway-fingerprint lookup + matrix retrieval + relevance filter
+// before the LLM call. Budget extra time so we don't trip on a slow
+// answers-corpus search.
+const MODE_RUNNER_TIMEOUT_MS = 240_000;
 const REPO_ROOT = "/home/profit/lakehouse";

 export interface InferenceContext {
@ -86,26 +87,23 @@ export async function runInferenceCheck(
    }];
  }

-  // Diff source for the cloud prompt — either the raw diff (small
-  // enough to fit), or a tree-split scratchpad (curation layer). We
-  // prefer curation to truncation: truncation silently drops files
-  // past the window; curation summarizes them so the cloud still sees
-  // what changed, just densified.
-  let diffForPrompt: string;
-  let curationNote = "";
-  if (diff.length > CURATION_THRESHOLD) {
-    const ts = await treeSplitDiff(diff, verifiable);
-    diffForPrompt = ts.scratchpad;
-    curationNote = ` (curated: ${diff.length} chars → ${ts.shards} shards → scratchpad ${ts.scratchpad.length} chars)`;
-  } else {
-    diffForPrompt = diff;
-  }
-  // Belt-and-suspenders truncation — even a tree-split scratchpad
-  // shouldn't exceed MAX_DIFF_CHARS in practice, but guard anyway so
-  // pathological inputs can't burst the prompt.
-  const truncated = diffForPrompt.length > MAX_DIFF_CHARS
-    ? diffForPrompt.slice(0, MAX_DIFF_CHARS) + `\n...[${diffForPrompt.length - MAX_DIFF_CHARS} more chars truncated]`
-    : diffForPrompt;
+  // 2026-04-27 architecture simplification: dropped the tree-split
+  // scratchpad layer. Rationale: the mode runner's pr_audit pipeline
+  // pulls from lakehouse_answers_v1 (gold-standard prior audits) +
+  // lakehouse_arch_v1 + lakehouse_symbols_v1 via matrix retrieval. That
+  // corpus IS the cross-PR context the tree-split was synthesizing
+  // from scratch on every audit run. With the distillation substrate
+  // shipped (commits 27b1d27..1b433a9), per-shard fact extraction is
+  // redundant — and gpt-oss:120b at 168 calls/audit was the dominant
+  // cost. Now: truncate diff to MAX_DIFF_CHARS, hand straight to the
+  // mode runner, let retrieval supply context. ONE strong-model call
+  // per consensus rep × N=3 reps = 3 calls total per audit.
+  const truncated = diff.length > MAX_DIFF_CHARS
+    ? diff.slice(0, MAX_DIFF_CHARS) + `\n...[${diff.length - MAX_DIFF_CHARS} more chars truncated — the pr_audit mode runner has matrix retrieval against lakehouse_answers_v1 + arch + symbols for cross-PR context]`
+    : diff;
+  const curationNote = diff.length > MAX_DIFF_CHARS
+    ? ` (truncated ${diff.length}→${MAX_DIFF_CHARS} chars; matrix retrieval supplies cross-PR context)`
+    : "";

  // Build the reviewer prompt in the same shape as run_codereview's
  // review stage (llm_team_ui.py:10950), adapted for claim verification:
@ -114,79 +112,20 @@ export async function runInferenceCheck(
  //   "Review: bugs/security/perf/style/edge. Provide corrected code."
  // We add: claim list upfront + ask for structured JSON verdict.
  //
-  // When the diff was curated (tree-split scratchpad), we add an
-  // explicit anti-false-positive instruction: the scratchpad is a
-  // distillation, not the full source, so absence-from-scratchpad is
-  // NOT evidence of absence-from-diff. Mirrors the fix we made in
-  // scrum_master's review prompt for the same class of error.
+  // Curation flag is now just a truncation flag — when the diff was
+  // cut, tell the reviewer it didn't see the full picture so it doesn't
+  // confidently mark a claim NOT BACKED based on absence in the
+  // (potentially incomplete) input.
  const isCurated = curationNote.length > 0;
-  const curationGuard = isCurated
-    ? [
-        "",
-        "CRITICAL: the 'Diff' below is a curated multi-shard scratchpad,",
-        "NOT the full raw diff. The scratchpad distills each shard down",
-        "to facts useful for claim verification and drops the rest.",
-        "DO NOT flag a function/field/feature as 'missing' or 'not",
-        "implemented' based solely on its absence from the scratchpad —",
-        "absence in a distillation is NOT evidence of absence in the",
-        "actual diff. Only judge a claim NOT BACKED when the scratchpad",
-        "DIRECTLY contradicts it (e.g. scratchpad shows the function was",
-        "added empty, or shows the claimed code path is a stub).",
-        "Skip the unflagged_gaps section entirely when operating on a",
-        "curated scratchpad — you can't reliably detect gaps from a",
-        "distillation, and false positives there are worse than misses.",
-      ].join("\n")
-    : "";
-  const systemMsg = [
-    "You review pull-request diffs against the author's own ship-claims.",
-    "For each claim, decide: is it backed by actual code in the diff, or is",
-    "it placeholder / aspirational / unwired?",
-    "",
-    "A claim is BACKED when the diff contains a real code path that delivers",
-    "the claimed behavior. A claim is NOT BACKED when:",
-    "  - the claim asserts functionality but the diff only adds types/fields",
-    "    with no consumer",
-    "  - the claim mentions tests but no test function was added",
-    "  - the claim claims integration but the integration point is a stub",
-    "  - the diff contains unimplemented!() / todo!() / TODO comments",
-    "  - the claim says 'works end-to-end' but the diff has no end-to-end test",
-    curationGuard,
-    "",
-    "Respond with strict JSON only. No prose before or after. Shape:",
-    "{",
-    '  "claim_verdicts": [',
-    '    {"claim_idx": 0, "backed": false, "evidence": "short reason"}',
-    "  ],",
-    '  "unflagged_gaps": [',
-    '    {"location": "file:line", "summary": "short description"}',
-    "  ]",
-    "}",
-  ].join("\n");
+  const prNumber = ctx?.pr_number ?? 0;

-  const userMsg = [
-    `Ship-claims the author made (numbered 0..N-1):`,
-    verifiable.map((c, i) => `  ${i}. [${c.strength}] "${c.text}" at ${c.location}`).join("\n"),
-    "",
-    `Diff:`,
-    "```",
-    truncated,
-    "```",
-    "",
-    `For each numbered claim above, emit a claim_verdicts entry. For gaps the`,
-    `author DIDN'T claim but that look like placeholder code, emit unflagged_gaps.`,
-    `Strict JSON only, matching the shape described. No prose outside JSON.`,
-  ].join("\n");
-
-  // N=3 consensus — run the primary reviewer in parallel, collect
-  // all three parsed responses, majority-vote per claim. Parallel
-  // (Promise.all) because each call is ~20-30s and they're independent;
-  // wall-clock stays ~same as single call, cost 3x tokens. Empirical
-  // justification: in 3-run determinism tests, 7/8 findings were
-  // stable but 1 flipped across runs — majority vote stabilizes the
-  // flipping class without losing the stable signal.
+  // N=3 consensus — fire the mode runner three times in parallel.
+  // Each /v1/mode/execute call composes pathway memory + answers corpus
+  // + JSON-shaped pr_audit framing internally, so the auditor's only
+  // job here is to vote-aggregate. Wall-clock ~= single call.
  const primaryRuns = await Promise.all(
    Array.from({ length: N_CONSENSUS }, () =>
-      runCloudInference(systemMsg, userMsg, MODEL)),
+      runModeRunnerInference(truncated, verifiable, prNumber, isCurated, MODEL)),
  );

  const parsedRuns = primaryRuns.filter(r => r.parsed !== null);
@ -209,9 +148,19 @@ export async function runInferenceCheck(
  interface Votes { trues: number; falses: number; evidences: string[] }
  const votesByClaim = new Map<number, Votes>();
  const unflaggedByRun: any[][] = [];
-  let totalTokens = 0;
+  // The N=3 consensus calls run via Promise.all — wall-clock is
+  // bounded by the SLOWEST call, not the sum. Pre-2026-04-27 we
+  // summed and reported "Xms total" which double/triple-counted
+  // (Opus self-audit caught it). Use max for accurate wall-clock.
+  let maxLatencyMs = 0;
+  let totalEnrichedChars = 0;
+  let bugFingerprintsSeen = 0;
+  let matrixKeptSeen = 0;
  for (const run of parsedRuns) {
-    totalTokens += run.tokens;
+    maxLatencyMs = Math.max(maxLatencyMs, run.latency_ms ?? 0);
+    totalEnrichedChars += run.enriched_chars ?? 0;
+    bugFingerprintsSeen = Math.max(bugFingerprintsSeen, run.bug_fingerprints ?? 0);
+    matrixKeptSeen = Math.max(matrixKeptSeen, run.matrix_kept ?? 0);
    unflaggedByRun.push(Array.isArray(run.parsed?.unflagged_gaps) ? run.parsed.unflagged_gaps : []);
    for (const v of run.parsed?.claim_verdicts ?? []) {
      const idx = Number(v?.claim_idx);
@ -233,10 +182,11 @@ export async function runInferenceCheck(
  findings.push({
    check: "inference",
    severity: "info",
-    summary: `cloud review completed (model=${MODEL}, consensus=${parsedRuns.length}/${N_CONSENSUS}, tokens=${totalTokens})${curationNote}`,
+    summary: `pr_audit mode runner completed (model=${MODEL}, consensus=${parsedRuns.length}/${N_CONSENSUS}, ${maxLatencyMs}ms wall-clock)${curationNote}`,
    evidence: [
      `claims voted: ${votesByClaim.size}`,
      `parsed runs: ${parsedRuns.length} / ${N_CONSENSUS}`,
+      `enrichment: ${bugFingerprintsSeen} bug fingerprints, ${matrixKeptSeen} answers-corpus chunks, prompt avg ${Math.round(totalEnrichedChars / Math.max(parsedRuns.length, 1))} chars`,
    ],
  });

@ -266,8 +216,9 @@ export async function runInferenceCheck(
      notBacked = false;
      resolution = "majority_backed";
    } else {
-      // Tie. Run tie-breaker with a different-architecture model.
-      const tb = await runCloudInference(systemMsg, userMsg, TIEBREAKER_MODEL);
+      // Tie. Run tie-breaker with a different-architecture model
+      // through the same mode runner so framing/enrichment match.
+      const tb = await runModeRunnerInference(truncated, verifiable, prNumber, isCurated, TIEBREAKER_MODEL);
      if (tb.parsed) {
        const tv = (tb.parsed.claim_verdicts ?? []).find((v: any) => Number(v?.claim_idx) === idx);
        if (tv?.backed === false) {
@ -335,9 +286,13 @@ export async function runInferenceCheck(
  // don't exit before extraction lands; the systemd poller has plenty
  // of headroom (90s cycle vs ~15s extraction). A failure inside
  // extractAndPersistFacts is caught + logged but never throws.
+  // Post-2026-04-27: extraction now runs against the truncated diff
+  // (no scratchpad to extract from since tree-split was retired).
+  // Fact extraction is still useful for surfacing entities/symbols
+  // into audit_facts.jsonl even from truncated input.
  if (isCurated && ctx && process.env.LH_AUDITOR_SKIP_EXTRACT !== "1") {
    try {
-      await extractAndPersistFacts(diffForPrompt, ctx);
+      await extractAndPersistFacts(truncated, ctx);
    } catch (e) {
      console.error(`[inference] fact extraction failed: ${(e as Error).message}`);
    }
@ -394,60 +349,106 @@ export async function runInferenceCheck(
  return findings;
 }

-// Single cloud call — the consensus loop calls this N times in
-// parallel. Returns the parsed JSON shape + token usage + any error
-// diagnostic. NEVER throws; the consensus aggregator handles partial
-// failures by dropping non-parsed runs from the vote.
+// Single mode-runner call — consensus + tie-breaker dispatch through
+// here. Returns parsed JSON shape + telemetry from /v1/mode/execute
+// (latency, enrichment metrics) + any error diagnostic. NEVER throws.
+// The consensus aggregator handles partial failures by dropping
+// non-parsed runs from the vote.
 interface CloudRunResult {
  parsed: any | null;
-  tokens: number;
+  latency_ms: number;
+  enriched_chars: number;
+  bug_fingerprints: number;
+  matrix_kept: number;
  error?: string;       // "unreachable" | "non_200" | "unparseable"
  diagnostic?: string;  // first 200 chars for debugging
  model: string;
 }

-async function runCloudInference(systemMsg: string, userMsg: string, model: string): Promise<CloudRunResult> {
+async function runModeRunnerInference(
+  diffOrScratchpad: string,
+  claims: Claim[],
+  prNumber: number,
+  isCurated: boolean,
+  model: string,
+): Promise<CloudRunResult> {
+  // user_question carries the claim list + the curation note (if any).
+  // pr_audit's framing (mode.rs FRAMING_PR_AUDIT) holds the JSON shape +
+  // strict-output rules so we don't repeat them here.
+  const claimDigest = claims
+    .map((c, i) => `  ${i}. [${c.strength}] "${c.text}" at ${c.location}`)
+    .join("\n");
+  const curationNote = isCurated
+    ? "\n\nNOTE: the FILE below is a curated multi-shard scratchpad of the diff, not the raw diff itself. Absence in the scratchpad is NOT evidence of absence in the actual diff. Only mark backed=false on direct contradiction (e.g. scratchpad shows the function is empty / a stub). Skip unflagged_gaps entirely when scratchpad is curated."
+    : "";
+  const userQuestion = [
+    "Verify each ship-claim against the diff (or scratchpad).",
+    "",
+    "Ship-claims (numbered 0..N-1):",
+    claimDigest,
+    curationNote,
+    "",
+    "Every claim above must produce exactly one claim_verdicts entry. Output strict JSON only — no prose outside the JSON object.",
+  ].join("\n");
+
  let resp: Response;
  try {
-    resp = await fetch(`${GATEWAY}/v1/chat`, {
+    resp = await fetch(`${GATEWAY}/v1/mode/execute`, {
      method: "POST",
      headers: { "content-type": "application/json" },
      body: JSON.stringify({
-        provider: "ollama_cloud",
-        model,
-        messages: [
-          { role: "system", content: systemMsg },
-          { role: "user", content: userMsg },
-        ],
-        // temp=0 (greedy) + think=true. think=true is required for
-        // gpt-oss:120b — without it the model returns empty content
-        // on large prompts. Variance from the think trace is observed
-        // in practice, which is why we use N=3 consensus, not single-
-        // call determinism.
-        max_tokens: 3000,
-        temperature: 0,
-        think: true,
+        task_class: "pr_audit",
+        file_path: `pr-${prNumber}.diff`,
+        file_content: diffOrScratchpad,
+        user_question: userQuestion,
+        force_model: model,
+        force_temperature: 0,
      }),
-      signal: AbortSignal.timeout(CALL_TIMEOUT_MS),
+      signal: AbortSignal.timeout(MODE_RUNNER_TIMEOUT_MS),
    });
  } catch (e) {
-    return { parsed: null, tokens: 0, error: "unreachable", diagnostic: (e as Error).message.slice(0, 200), model };
+    return {
+      parsed: null, latency_ms: 0, enriched_chars: 0, bug_fingerprints: 0, matrix_kept: 0,
+      error: "unreachable", diagnostic: (e as Error).message.slice(0, 200), model,
+    };
  }
  if (!resp.ok) {
-    return { parsed: null, tokens: 0, error: "non_200", diagnostic: `${resp.status}: ${(await resp.text()).slice(0, 160)}`, model };
+    return {
+      parsed: null, latency_ms: 0, enriched_chars: 0, bug_fingerprints: 0, matrix_kept: 0,
+      error: "non_200", diagnostic: `${resp.status}: ${(await resp.text()).slice(0, 160)}`, model,
+    };
  }
  let body: any;
  try { body = await resp.json(); }
-  catch (e) { return { parsed: null, tokens: 0, error: "unparseable", diagnostic: (e as Error).message, model }; }
-  const content: string = body?.choices?.[0]?.message?.content ?? "";
-  const tokens: number = body?.usage?.total_tokens ?? 0;
-  const parsed = extractJson(content);
-  if (!parsed) {
-    return { parsed: null, tokens, error: "unparseable", diagnostic: content.slice(0, 200), model };
+  catch (e) {
+    return {
+      parsed: null, latency_ms: 0, enriched_chars: 0, bug_fingerprints: 0, matrix_kept: 0,
+      error: "unparseable", diagnostic: (e as Error).message, model,
+    };
  }
-  return { parsed, tokens, model };
+  const content: string = typeof body?.response === "string" ? body.response : "";
+  const parsed = extractJson(content);
+  // Number-coerced extractors so a non-numeric upstream value (string,
+  // null, NaN) collapses to 0 instead of poisoning downstream
+  // arithmetic. Caught 2026-04-27 by kimi_architect self-audit —
+  // optional-chaining + ?? only catches null/undefined, not type drift.
+  const num = (v: unknown): number => {
+    const n = typeof v === "number" ? v : Number(v);
+    return Number.isFinite(n) ? n : 0;
+  };
+  return {
+    parsed,
+    latency_ms: num(body?.latency_ms),
+    enriched_chars: num(body?.enriched_prompt_chars),
+    bug_fingerprints: num(body?.sources?.bug_fingerprints_count),
+    matrix_kept: num(body?.sources?.matrix_chunks_kept),
+    error: parsed ? undefined : "unparseable",
+    diagnostic: parsed ? undefined : content.slice(0, 200),
+    model,
+  };
 }

+
 async function persistDiscrepancies(ctx: InferenceContext, discrepancies: any[]): Promise<void> {
  await mkdir("/home/profit/lakehouse/data/_kb", { recursive: true });
  const rows = discrepancies.map(d => JSON.stringify({
@ -490,94 +491,7 @@ async function extractAndPersistFacts(scratchpad: string, ctx: InferenceContext)
  await appendFile(AUDIT_FACTS_JSONL, JSON.stringify(row) + "\n");
 }

-// Curation via tree-split — ports the scrum_master pattern into the
-// inference check. Shards the raw diff into DIFF_SHARD_SIZE chunks,
-// summarizes each shard *against the claim-verification task* so the
-// summary preserves exactly what the cloud needs to judge claims
-// (function signatures, struct fields, deletions, new files), drops
-// everything else. Merges into a compact scratchpad.
-//
-// Cost: N cloud calls for the shard summaries + 1 cloud call for the
-// final verification = N+1 calls instead of 1. Mitigation: shards run
-// serially (not parallel) to keep gateway load bounded; summary calls
-// use max_tokens=400 so they're fast (~2s each on gpt-oss:120b).
-//
-// Determinism: each shard summary call uses temp=0 + think=true (same
-// as the top-level inference call), so identical input yields
-// identical scratchpad. The final verification call then sees a
-// stable scratchpad, giving stable verdicts.
-async function treeSplitDiff(
-  fullDiff: string,
-  claims: Claim[],
-): Promise<{ scratchpad: string; shards: number }> {
-  const shards: Array<{ from: number; to: number; text: string }> = [];
-  for (let i = 0; i < fullDiff.length; i += DIFF_SHARD_SIZE) {
-    const end = Math.min(i + DIFF_SHARD_SIZE, fullDiff.length);
-    shards.push({ from: i, to: end, text: fullDiff.slice(i, end) });
-  }
-  // Curate the claim list into a short form the summary prompt can
-  // use to bias extraction toward relevant facts.
-  const claimDigest = claims.map((c, i) =>
-    `${i}. [${c.strength}] "${c.text.slice(0, 100)}"`
-  ).join("\n");

-  let scratchpad = "";
-  for (const [si, shard] of shards.entries()) {
-    const prompt = [
-      `You are summarizing shard ${si + 1}/${shards.length} (chars ${shard.from}..${shard.to}) of a PR diff.`,
-      `The downstream task will verify these ship-claims against the full-PR summary. Extract ONLY facts that could confirm or refute these claims:`,
-      "",
-      claimDigest,
-      "",
-      "Extract: new function/method signatures, struct fields, deletions, new files, wiring (function X calls Y), absence-of-implementation markers, TODO comments on added lines.",
-      "Skip: comment-only edits, whitespace, import reordering, unrelated cosmetic changes.",
-      "",
-      "─────── shard diff ───────",
-      shard.text,
-      "─────── end shard ───────",
-      "",
-      "Output: up to 180 words of facts in bullet form. No prose preamble, no claim verdicts (that's for the downstream step).",
-    ].join("\n");
-
-    const r = await callCloud(prompt, 400);
-    if (r.content) {
-      scratchpad += `\n--- shard ${si + 1} (chars ${shard.from}..${shard.to}) ---\n${r.content.trim()}\n`;
-    }
-  }
-  return { scratchpad: scratchpad.trim(), shards: shards.length };
-}
-
-// Minimal cloud caller used only by treeSplitDiff — same gateway +
-// model as the top-level call, but think=false. Shards are small
-// (≤DIFF_SHARD_SIZE ~4500 chars) and the task is pure fact
-// extraction, not reasoning. think=true on the shards introduced
-// variance in reasoning traces that compounded across 23 calls into
-// a non-deterministic scratchpad (observed during curation
-// validation: same-SHA runs produced 5/7/8 final findings).
-// think=false on small prompts is stable — only breaks at the main
-// call's 10K+ prompt size, which keeps think=true.
-async function callCloud(prompt: string, maxTokens: number): Promise<{ content: string }> {
-  try {
-    const r = await fetch(`${GATEWAY}/v1/chat`, {
-      method: "POST",
-      headers: { "content-type": "application/json" },
-      body: JSON.stringify({
-        provider: "ollama_cloud",
-        model: MODEL,
-        messages: [{ role: "user", content: prompt }],
-        max_tokens: maxTokens,
-        temperature: 0,
-        think: false,
-      }),
-      signal: AbortSignal.timeout(CALL_TIMEOUT_MS),
-    });
-    if (!r.ok) return { content: "" };
-    const j: any = await r.json();
-    return { content: j?.choices?.[0]?.message?.content ?? "" };
-  } catch {
-    return { content: "" };
-  }
-}

 // Pull out plausible code-symbol names from a summary string.
 // Matches:
--- a/auditor/checks/kimi_architect.ts
+++ b/auditor/checks/kimi_architect.ts
@ -0,0 +1,461 @@
+// Kimi-architect check — second-pass senior architectural review using
+// kimi-for-coding (Kimi K2.6) via /v1/chat provider=kimi.
+//
+// Runs AFTER the deepseek inference check (N=3 consensus) and the
+// static/kb_query checks. Reads their findings as context and asks Kimi
+// "what did everyone else miss?" — complementing the cheap-consensus
+// voting with a sparse senior pass that catches load-bearing issues
+// (compile errors, false telemetry, schema bypasses, etc.) which the
+// voting structure can't see.
+//
+// Why Kimi here and not in the inner inference loop:
+// - Cost: ~3min wall-clock per call vs ~30s for deepseek consensus.
+// - TOS: api.kimi.com is User-Agent-gated (see crates/gateway/src/v1/
+//   kimi.rs); cost-bounded calls only.
+// - Value: experiment 2026-04-27 showed 7/7 grounding rate with full
+//   files vs ~50% on truncated input. Best as a sparse complement, not
+//   a replacement.
+//
+// Failure-isolated: any Kimi error returns a single info-level Finding
+// "kimi_architect skipped — <reason>" so the existing audit pipeline
+// is never blocked by a Kimi outage / TOS revocation / 429.
+//
+// Cost cap: if a kimi_verdicts/<pr>-<sha>.json file exists less than 24h
+// old, return cached findings without calling upstream. New commits
+// produce new SHAs so this is per-head, not per-day.
+//
+// Off by default: caller checks LH_AUDITOR_KIMI=1 before invoking.
+
+import { readFile, writeFile, mkdir, appendFile, stat, realpath } from "node:fs/promises";
+import { existsSync, realpathSync } from "node:fs";
+import { dirname, join, resolve } from "node:path";
+import type { Finding, CheckKind } from "../types.ts";
+
+const GATEWAY = process.env.LH_GATEWAY_URL ?? "http://localhost:3100";
+const KIMI_VERDICTS_DIR = "/home/profit/lakehouse/data/_auditor/kimi_verdicts";
+const KIMI_AUDITS_JSONL = "/home/profit/lakehouse/data/_kb/kimi_audits.jsonl";
+const REPO_ROOT = "/home/profit/lakehouse";
+// Canonicalize at module load — REPO_ROOT itself may be a symlink in
+// some environments (e.g. /home/profit is a bind-mount). Computing
+// once at startup means the per-finding grounding loop can compare
+// realpath(target) against this stable anchor.
+const REPO_ROOT_REAL = (() => {
+  try { return realpathSync(REPO_ROOT); }
+  catch { return REPO_ROOT; }
+})();
+// 15 min budget. Bun's fetch has an intrinsic ~300s limit that our
+// AbortController + setTimeout combo could not override; we use curl
+// via Bun.spawn instead (callKimi below). Curl honors -m for max
+// transfer time without a hard intrinsic ceiling.
+const CALL_TIMEOUT_MS = 900_000;
+const CACHE_TTL_MS = 24 * 60 * 60 * 1000;
+const MAX_DIFF_CHARS = 180_000;
+const MAX_PRIOR_FINDINGS = 50;
+// Default provider/model = ollama_cloud/kimi-k2.6. Pre-2026-04-27 we
+// went direct to api.kimi.com, but Ollama Cloud Pro now exposes the
+// same model legitimately, so we route there to avoid User-Agent
+// gating. The api.kimi.com path (provider=kimi) remains wired in the
+// gateway as a fallback for when Ollama Cloud is upstream-broken.
+const KIMI_PROVIDER = process.env.LH_AUDITOR_KIMI_PROVIDER ?? "ollama_cloud";
+const KIMI_MODEL = process.env.LH_AUDITOR_KIMI_MODEL ?? "kimi-k2.6";
+// Cross-lineage alternation. 2026-04-27 J's call: Opus is too
+// expensive to auto-fire (~$0.30/audit). Kimi K2.6 via Go-sub is
+// effectively free; Haiku 4.5 via Zen is ~$0.04. Alternate between
+// them so we get cross-lineage signal (Moonshot vs Anthropic) on
+// every PR's audit history without burning the budget.
+//
+// Default: Kimi K2.6 on even audits, Haiku 4.5 on odd. Each PR's
+// audits flip between vendors as new SHAs come in.
+//
+// Frontier models (Opus 4.7, GPT-5.5, Gemini 3.1) are NOT in the
+// auto path. Operator hands distilled findings to a frontier model
+// manually when high-leverage decisions need it. Removing Opus from
+// auto-promotion saves ~$1-3/day on the daemon at our cadence.
+//
+// Override the alternation entirely with LH_AUDITOR_KIMI_MODEL
+// (forces one model regardless of audit count); set
+// LH_AUDITOR_KIMI_ALT_MODEL to the alternate.
+const ALT_MODEL = process.env.LH_AUDITOR_KIMI_ALT_MODEL ?? "claude-haiku-4-5";
+const ALT_PROVIDER = process.env.LH_AUDITOR_KIMI_ALT_PROVIDER ?? "opencode";
+const FORCE_DEFAULT = process.env.LH_AUDITOR_KIMI_MODEL !== undefined && process.env.LH_AUDITOR_KIMI_MODEL !== "";
+
+function selectModel(diffLen: number, auditIndex: number = 0): { provider: string; model: string; promoted: boolean } {
+  // Operator override — env-pinned model wins.
+  if (FORCE_DEFAULT) {
+    return { provider: KIMI_PROVIDER, model: KIMI_MODEL, promoted: false };
+  }
+  // Alternate Kimi (default, even index) ↔ Haiku (alt, odd index).
+  // diffLen kept in the signature for future "big diff → Haiku
+  // anyway" logic; not used yet so we don't auto-burn on big PRs.
+  void diffLen;
+  if (auditIndex % 2 === 1) {
+    return { provider: ALT_PROVIDER, model: ALT_MODEL, promoted: true };
+  }
+  return { provider: KIMI_PROVIDER, model: KIMI_MODEL, promoted: false };
+}
+// Model-aware max_tokens. Different upstream APIs cap at different
+// limits and reject requests that exceed them:
+//   - Anthropic Opus 4.x: 32K output (with extended-output header)
+//   - Anthropic Haiku 4.5: 8K output
+//   - Kimi K2.6 (reasoning): 128K — needs headroom because
+//     reasoning_content counts against the budget
+//   - Default: 16K, conservative middle ground
+//
+// 2026-04-27 BLOCK from Opus self-audit: the prior single-default of
+// 128K worked silently (Anthropic clamps server-side) but was
+// technically invalid. Per-model caps make it explicit. Override via
+// LH_AUDITOR_KIMI_MAX_TOKENS to force a value (also fixes the empty-
+// env Number("") -> 0 trap by using `||` not `??`).
+const MAX_TOKENS_OVERRIDE = Number(process.env.LH_AUDITOR_KIMI_MAX_TOKENS) || 0;
+function maxTokensFor(model: string): number {
+  if (MAX_TOKENS_OVERRIDE > 0) return MAX_TOKENS_OVERRIDE;
+  if (model.startsWith("claude-opus")) return 32_000;
+  if (model.startsWith("claude-haiku") || model.startsWith("claude-sonnet")) return 8_192;
+  if (model.startsWith("kimi-")) return 128_000;
+  if (model.startsWith("gpt-5") || model.startsWith("o1") || model.startsWith("o3") || model.startsWith("o4")) return 32_000;
+  return 16_000;
+}
+
+export interface KimiArchitectContext {
+  pr_number: number;
+  head_sha: string;
+}
+
+interface KimiVerdictFile {
+  pr_number: number;
+  head_sha: string;
+  cached_at: string;
+  model: string;
+  latency_ms: number;
+  finish_reason: string;
+  usage: { prompt_tokens: number; completion_tokens: number; total_tokens: number };
+  raw_content: string;
+  findings: Finding[];
+  grounding: { total: number; verified: number; rate: number };
+}
+
+export async function runKimiArchitectCheck(
+  diff: string,
+  priorFindings: Finding[],
+  ctx: KimiArchitectContext,
+): Promise<Finding[]> {
+  const cachePath = join(KIMI_VERDICTS_DIR, `${ctx.pr_number}-${ctx.head_sha.slice(0, 12)}.json`);
+  const outageSentinel = `${cachePath}.outage`;
+  const OUTAGE_TTL_MS = 10 * 60 * 1000;
+
+  // Outage negative-cache — if upstream failed within OUTAGE_TTL_MS,
+  // skip this audit and return immediately. Prevents the daemon from
+  // hammering a downed Kimi/Anthropic upstream every 90s.
+  if (existsSync(outageSentinel)) {
+    try {
+      const s = await stat(outageSentinel);
+      if (Date.now() - s.mtimeMs < OUTAGE_TTL_MS) {
+        const note = JSON.parse(await readFile(outageSentinel, "utf8"));
+        return [skipFinding(`upstream still down (cached ${Math.round((Date.now() - s.mtimeMs) / 1000)}s ago): ${String(note.reason).slice(0, 160)}`)];
+      }
+    } catch { /* malformed sentinel — fall through to fresh call */ }
+  }
+
+  // Cost cap — return cached findings if a verdict for this exact head
+  // SHA was generated within the TTL.
+  const cached = await loadCachedVerdict(cachePath);
+  if (cached) {
+    return cached.findings.length > 0
+      ? cached.findings
+      : [{ check: "kimi_architect" as CheckKind, severity: "info", summary: "kimi_architect cached — 0 findings", evidence: [`cache: ${cachePath}`] }];
+  }
+
+  // Alternate model based on how many audits this PR has had — gives
+  // cross-lineage signal (Kimi/Moonshot ↔ Haiku/Anthropic) on every
+  // PR's audit history. Count is derived from existing kimi_verdicts
+  // files for this PR; cheap O(N_PRs) directory read.
+  let auditIndex = 0;
+  try {
+    const dir = "/home/profit/lakehouse/data/_auditor/kimi_verdicts";
+    if (existsSync(dir)) {
+      const all = require("node:fs").readdirSync(dir) as string[];
+      auditIndex = all.filter((f) => f.startsWith(`${ctx.pr_number}-`)).length;
+    }
+  } catch { /* default 0 — Kimi */ }
+
+  const selected = selectModel(diff.length, auditIndex);
+  let response: { content: string; usage: any; finish_reason: string; latency_ms: number };
+  try {
+    response = await callKimi(buildPrompt(diff, priorFindings, ctx), selected.provider, selected.model);
+  } catch (e) {
+    // Negative-cache for 10 min on outage (caught 2026-04-27 by Opus
+    // self-audit): without this, every audit cycle within the 24h
+    // TTL re-calls upstream while it's still down. Use a sentinel
+    // file with mtime check rather than persisting a verdict so the
+    // happy-path cache reader doesn't have to special-case it.
+    const sentinel = `${cachePath}.outage`;
+    try { await writeFile(sentinel, JSON.stringify({ at: new Date().toISOString(), reason: (e as Error).message.slice(0, 200) })); } catch {}
+    return [skipFinding(`kimi call failed (${selected.model}): ${(e as Error).message.slice(0, 200)}`)];
+  }
+
+  const findings = parseFindings(response.content);
+  const grounding = await computeGrounding(findings);
+
+  const verdict: KimiVerdictFile = {
+    pr_number: ctx.pr_number,
+    head_sha: ctx.head_sha,
+    cached_at: new Date().toISOString(),
+    model: selected.model,
+    latency_ms: response.latency_ms,
+    finish_reason: response.finish_reason,
+    usage: {
+      prompt_tokens: response.usage?.prompt_tokens ?? 0,
+      completion_tokens: response.usage?.completion_tokens ?? 0,
+      total_tokens: response.usage?.total_tokens ?? 0,
+    },
+    raw_content: response.content,
+    findings,
+    grounding,
+  };
+
+  // Cache-poisoning guard (caught 2026-04-27 by Opus self-audit):
+  // when parseFindings returns 0 findings (Kimi rambled, prompt too
+  // big, or the markdown shape changed and our regex missed every
+  // block), persisting the empty verdict short-circuits all future
+  // audits in the 24h TTL window with a useless cached "0 findings"
+  // result. Better to leave no cache and re-call upstream next time.
+  // Always append metrics — observability shouldn't depend on whether
+  // findings parsed.
+  await appendMetrics(verdict);
+  if (findings.length > 0) {
+    await persistVerdict(cachePath, verdict);
+    return findings;
+  }
+  return [{
+    check: "kimi_architect" as CheckKind,
+    severity: "info",
+    summary: `kimi_architect produced 0 ranked findings (${response.finish_reason}, ${verdict.usage.completion_tokens} tokens) — not cached`,
+    evidence: [`raw saved (no cache): see kimi_audits.jsonl ${verdict.cached_at}`],
+  }];
+}
+
+async function loadCachedVerdict(path: string): Promise<KimiVerdictFile | null> {
+  if (!existsSync(path)) return null;
+  try {
+    const s = await stat(path);
+    if (Date.now() - s.mtimeMs > CACHE_TTL_MS) return null;
+    return JSON.parse(await readFile(path, "utf8")) as KimiVerdictFile;
+  } catch { return null; }
+}
+
+function buildPrompt(diff: string, priorFindings: Finding[], ctx: KimiArchitectContext): string {
+  const truncatedDiff = diff.length > MAX_DIFF_CHARS
+    ? diff.slice(0, MAX_DIFF_CHARS) + `\n\n... [truncated; original diff was ${diff.length} chars]`
+    : diff;
+
+  const priorBlock = priorFindings
+    .filter(f => f.severity !== "info")
+    .slice(0, MAX_PRIOR_FINDINGS)
+    .map(f => `- [${f.check}/${f.severity}] ${f.summary}${f.evidence?.[0] ? ` — ${f.evidence[0].slice(0, 160)}` : ""}`)
+    .join("\n");
+
+  return `You are a senior software architect doing a second-pass review on PR #${ctx.pr_number} (head ${ctx.head_sha.slice(0, 12)}). The team's automated auditor (deepseek-v3.1:671b, N=3 consensus) already produced findings. Your job is NOT to repeat what they found — your job is to catch what their voting structure CAN'T see: compile errors, type-system bypasses, false telemetry, silent determinism leaks, schema-bypass anti-patterns, load-bearing assumptions that look fine line-by-line.
+
+GROUNDING RULES (non-negotiable):
+- Cite file:line for EVERY finding. Lines you cite must actually contain what you claim. Confabulating a finding wastes more time than missing one.
+- If the diff is truncated and you can't verify a claim, say "diff-truncated, can't verify" — DO NOT guess.
+- Distinguish architectural concerns (no specific line) from concrete bugs (specific line). Don't dress one as the other.
+
+PRIOR FINDINGS FROM DEEPSEEK CONSENSUS (do not repeat these):
+${priorBlock || "(none)"}
+
+OUTPUT FORMAT (markdown):
+- ## Verdict (one sentence)
+- ## Findings (5-10 items, each formatted EXACTLY as below)
+
+For each finding use this exact shape so a parser can lift them:
+
+### F1: <one-line summary>
+- **Severity:** block | warn | info
+- **File:** path/to/file.ext:LINE
+- **Rationale:** one or two sentences
+
+THE DIFF:
+
+${truncatedDiff}
+`;
+}
+
+async function callKimi(prompt: string, provider: string, model: string): Promise<{ content: string; usage: any; finish_reason: string; latency_ms: number }> {
+  const t0 = Date.now();
+  const body = JSON.stringify({
+    provider,
+    model,
+    messages: [{ role: "user", content: prompt }],
+    max_tokens: maxTokensFor(model),
+    temperature: 0.2,
+  });
+  // curl via Bun.spawn — bypasses Bun fetch's ~300s intrinsic ceiling.
+  // -m sets the max transfer time honored end-to-end. Body is piped via
+  // stdin to avoid argv length limits on big audit prompts (~50K+ tokens).
+  const proc = Bun.spawn({
+    cmd: [
+      "curl", "-sS", "-X", "POST",
+      "-m", String(Math.ceil(CALL_TIMEOUT_MS / 1000)),
+      "-H", "content-type: application/json",
+      "--data-binary", "@-",
+      `${GATEWAY}/v1/chat`,
+    ],
+    stdin: "pipe",
+    stdout: "pipe",
+    stderr: "pipe",
+  });
+  proc.stdin.write(body);
+  await proc.stdin.end();
+  const [stdout, stderr, exitCode] = await Promise.all([
+    new Response(proc.stdout).text(),
+    new Response(proc.stderr).text(),
+    proc.exited,
+  ]);
+  if (exitCode !== 0) {
+    throw new Error(`curl exit ${exitCode}: ${stderr.slice(0, 300)}`);
+  }
+  let j: any;
+  try { j = JSON.parse(stdout); }
+  catch (e) {
+    throw new Error(`bad response (${stdout.length} bytes): ${stdout.slice(0, 300)}`);
+  }
+  if (j.error || !j.choices) {
+    throw new Error(`gateway error: ${JSON.stringify(j).slice(0, 300)}`);
+  }
+  return {
+    content: j.choices?.[0]?.message?.content ?? "",
+    usage: j.usage ?? {},
+    finish_reason: j.choices?.[0]?.finish_reason ?? "unknown",
+    latency_ms: Date.now() - t0,
+  };
+}
+
+// Parse Kimi's markdown into Finding[]. Format expected (per buildPrompt):
+//   ### F<N>: <summary>
+//   - **Severity:** block | warn | info
+//   - **File:** path:line
+//   - **Rationale:** ...
+function parseFindings(content: string): Finding[] {
+  const findings: Finding[] = [];
+  const blocks = content.split(/^###\s+F\d+:\s*/m).slice(1);
+  for (const block of blocks) {
+    const summary = (block.split("\n")[0] ?? "").trim();
+    if (!summary) continue;
+    const sev = /\*\*Severity:\*\*\s*(block|warn|info)/i.exec(block)?.[1]?.toLowerCase();
+    const fileLine = /\*\*File:\*\*\s*(\S+)/i.exec(block)?.[1] ?? "unknown";
+    const rationale = /\*\*Rationale:\*\*\s*([\s\S]+?)(?=\n###|\n\*\*|$)/i.exec(block)?.[1]?.trim() ?? "";
+    const severity: Finding["severity"] = sev === "block" ? "block" : sev === "warn" ? "warn" : "info";
+    findings.push({
+      check: "kimi_architect" as CheckKind,
+      severity,
+      summary: summary.slice(0, 240),
+      evidence: [fileLine, rationale.slice(0, 360)].filter(Boolean),
+    });
+  }
+  return findings;
+}
+
+// For each finding's cited file:line, grep the actual file to verify
+// the line exists. Returns total + verified counts; per-finding metadata
+// is appended into the evidence array so the reader can see which
+// citations were verified.
+async function computeGrounding(findings: Finding[]): Promise<{ total: number; verified: number; rate: number }> {
+  // readFile (async) instead of readFileSync — caught 2026-04-27 by
+  // Kimi's self-audit. Sync I/O in an async fn blocks the event loop
+  // for every cited file; doesn't matter at 10 findings, would matter
+  // at 100+.
+  const checks = await Promise.all(findings.map(async (f) => {
+    const cite = f.evidence[0] ?? "";
+    const m = /^(\S+?):(\d+)/.exec(cite);
+    if (!m) return false;
+    const [, relpath, lineStr] = m;
+    const line = Number(lineStr);
+    if (!line || !relpath) return false;
+
+    // Path-traversal guard, two-layer (caught 2026-04-27 by Kimi
+    // self-audits on dd77632 then 2d9cb12).
+    //
+    // Layer 1 (lexical): resolve() normalizes `..` segments. Refuse
+    // any path that doesn't anchor under REPO_ROOT.
+    //
+    // Layer 2 (symlink): even if the lexical path is anchored, it
+    // could be a symlink whose target escapes. realpath() resolves
+    // symlinks; compare the real path against REPO_ROOT_REAL.
+    //
+    // Both layers exist because attackers might bypass either alone:
+    // raw `../etc/passwd` triggers layer 1; a planted symlink at
+    // ./safe-looking-name → /etc/passwd triggers layer 2.
+    const abs = resolve(REPO_ROOT, relpath);
+    if (!abs.startsWith(REPO_ROOT + "/") && abs !== REPO_ROOT) {
+      f.evidence.push(`[grounding: path escapes repo root, refusing]`);
+      return false;
+    }
+
+    if (!existsSync(abs)) {
+      f.evidence.push("[grounding: file not found]");
+      return false;
+    }
+    try {
+      // Symlink-resolution check before any read. realpath() throws
+      // if the file doesn't exist; existsSync above shields the
+      // common case but a TOCTOU race could still error here — the
+      // outer catch handles it.
+      const realPath = await realpath(abs);
+      if (!realPath.startsWith(REPO_ROOT_REAL + "/") && realPath !== REPO_ROOT_REAL) {
+        f.evidence.push(`[grounding: symlink target escapes repo root, refusing]`);
+        return false;
+      }
+      const lines = (await readFile(realPath, "utf8")).split("\n");
+      if (line < 1 || line > lines.length) {
+        f.evidence.push(`[grounding: line ${line} > EOF (${lines.length})]`);
+        return false;
+      }
+      f.evidence.push(`[grounding: verified at ${relpath}:${line}]`);
+      return true;
+    } catch (e) {
+      f.evidence.push(`[grounding: read failed: ${(e as Error).message.slice(0, 80)}]`);
+      return false;
+    }
+  }));
+  const verified = checks.filter(Boolean).length;
+  const total = findings.length;
+  return { total, verified, rate: total === 0 ? 0 : verified / total };
+}
+
+async function persistVerdict(path: string, v: KimiVerdictFile): Promise<void> {
+  await mkdir(KIMI_VERDICTS_DIR, { recursive: true });
+  await writeFile(path, JSON.stringify(v, null, 2));
+}
+
+async function appendMetrics(v: KimiVerdictFile): Promise<void> {
+  // dirname() instead of join(path, "..") — caught 2026-04-27 by both
+  // Haiku and Opus self-audits. The "/.." idiom resolves correctly
+  // via Node path normalization but is non-idiomatic + breaks if the
+  // path ever has trailing dots.
+  await mkdir(dirname(KIMI_AUDITS_JSONL), { recursive: true });
+  await appendFile(KIMI_AUDITS_JSONL, JSON.stringify({
+    pr_number: v.pr_number,
+    head_sha: v.head_sha,
+    audited_at: v.cached_at,
+    model: v.model,
+    latency_ms: v.latency_ms,
+    finish_reason: v.finish_reason,
+    prompt_tokens: v.usage.prompt_tokens,
+    completion_tokens: v.usage.completion_tokens,
+    findings_total: v.findings.length,
+    findings_block: v.findings.filter(f => f.severity === "block").length,
+    findings_warn: v.findings.filter(f => f.severity === "warn").length,
+    grounding_verified: v.grounding.verified,
+    grounding_rate: Number(v.grounding.rate.toFixed(3)),
+  }) + "\n");
+}
+
+function skipFinding(why: string): Finding {
+  return {
+    check: "kimi_architect" as CheckKind,
+    severity: "info",
+    summary: `kimi_architect skipped — ${why}`,
+    evidence: [why],
+  };
+}
--- a/auditor/checks/static.ts
+++ b/auditor/checks/static.ts
@ -54,49 +54,87 @@ export function runStaticCheck(diff: string): Finding[] {
    const isAuditorCheckerFile = path.startsWith("auditor/checks/") ||
                                 path.startsWith("auditor/fixtures/");

+    // Track multi-line backtick-template state across the file. Walks
+    // all post-merge lines (context + added, skipping removed lines)
+    // in order and keeps `inMultilineBacktick` flipping on each
+    // unescaped backtick. Pre-2026-04-26 the per-line walk in
+    // isInsideQuotedString missed `todo!()` matches inside docstring
+    // template literals because the opening backtick lived on a
+    // line above the match. Now we OR the file-level state into the
+    // per-line check.
+    let inMultilineBacktick = false;
+
    for (let idx = 0; idx < lines.length; idx++) {
      const line = lines[idx];
-      if (!line.startsWith("+") || line.startsWith("+++")) continue;
-      const added = line.slice(1);

-      if (!isAuditorCheckerFile) {
-        for (const { re, why } of BLOCK_PATTERNS) {
-          const m = added.match(re);
-          if (m && typeof m.index === "number") {
-            // Skip if the match sits inside a quoted string literal —
-            // this is how rubric files (tests/real-world/*, prompt
-            // templates) legitimately reference the patterns they
-            // guard against, without actually executing them.
-            if (isInsideQuotedString(added, m.index)) continue;
+      // Diff bookkeeping lines and removed lines don't contribute to
+      // the post-merge file's string state.
+      if (line.startsWith("+++") || line.startsWith("---") ||
+          line.startsWith("@@") || line.startsWith("\\ No newline")) continue;
+      if (line.startsWith("-")) continue;
+
+      const isAdded = line.startsWith("+");
+      // Strip the diff prefix (' ' for context, '+' for added).
+      const body = (isAdded || line.startsWith(" ")) ? line.slice(1) : line;
+
+      // Compute the file-level backtick state ENTERING this line.
+      // The state machine sees pattern matches against the right
+      // context: a line that opens a backtick block has its own
+      // pattern checks evaluated under "inside-backtick" semantics
+      // for the portion AFTER the opening tick. Pre-2026-04-27 the
+      // state was updated AFTER the pattern checks, so the FIRST
+      // pattern on a backtick-opening line slipped through with
+      // stale "outside-backtick" semantics. Caught by Kimi self-audit.
+      const stateAtLineStart = inMultilineBacktick;
+      const stateAtLineEnd = updateBacktickState(body, stateAtLineStart);
+
+      if (isAdded) {
+        const added = body;
+
+        if (!isAuditorCheckerFile) {
+          for (const { re, why } of BLOCK_PATTERNS) {
+            const m = added.match(re);
+            if (m && typeof m.index === "number") {
+              // Skip if EITHER (a) the file was already inside a
+              // multi-line backtick block when this line started, OR
+              // (b) the match sits inside a quoted string literal on
+              // THIS line. The earlier code only checked stateAtLineStart;
+              // now we also check that the match isn't past the
+              // opening backtick of a block that opens on this line.
+              if (stateAtLineStart || isInsideQuotedString(added, m.index)) continue;
+              findings.push({
+                check: "static",
+                severity: "block",
+                summary: `${why} in ${path}`,
+                evidence: [`${path}:+${idx + 1}: ${added.trim().slice(0, 160)}`],
+              });
+            }
+          }
+        }
+        for (const { re, why } of WARN_COMMENT_PATTERNS) {
+          if (re.test(line)) {
            findings.push({
              check: "static",
-              severity: "block",
+              severity: "warn",
+              summary: `${why} in ${path}`,
+              evidence: [`${path}:+${idx + 1}: ${added.trim().slice(0, 160)}`],
+            });
+          }
+        }
+        for (const { re, why } of INFO_HARDCODED_PATTERNS) {
+          if (re.test(added)) {
+            findings.push({
+              check: "static",
+              severity: "info",
              summary: `${why} in ${path}`,
              evidence: [`${path}:+${idx + 1}: ${added.trim().slice(0, 160)}`],
            });
          }
        }
      }
-      for (const { re, why } of WARN_COMMENT_PATTERNS) {
-        if (re.test(line)) {
-          findings.push({
-            check: "static",
-            severity: "warn",
-            summary: `${why} in ${path}`,
-            evidence: [`${path}:+${idx + 1}: ${added.trim().slice(0, 160)}`],
-          });
-        }
-      }
-      for (const { re, why } of INFO_HARDCODED_PATTERNS) {
-        if (re.test(added)) {
-          findings.push({
-            check: "static",
-            severity: "info",
-            summary: `${why} in ${path}`,
-            evidence: [`${path}:+${idx + 1}: ${added.trim().slice(0, 160)}`],
-          });
-        }
-      }
+
+      // Carry the end-of-line state forward to the next iteration.
+      inMultilineBacktick = stateAtLineEnd;
    }

    // "Field added but never read" heuristic — catches exactly the
@ -105,10 +143,20 @@ export function runStaticCheck(diff: string): Finding[] {
    // elsewhere might exist). The point is: if NEITHER this diff nor
    // any other line in the diff reads the field, the PR is shipping
    // state without a consumer.
+    //
+    // Serde exemption: if the field's parent struct derives Serialize
+    // or Deserialize, the read-site is the macro itself — JSON
+    // round-trips consume every public field. Without this exemption
+    // the check produces false positives on every response/request
+    // struct shipped through `/v1/*`.
    const addedLines = lines.filter(l => l.startsWith("+") && !l.startsWith("+++"))
      .map(l => l.slice(1));
-    const newFields = extractNewFields(addedLines);
-    for (const field of newFields) {
+    const newFields = extractNewFieldsWithLine(lines);
+    const seenNames = new Set<string>();
+    for (const { name: field, lineIdx } of newFields) {
+      if (seenNames.has(field)) continue;
+      seenNames.add(field);
+      if (parentStructHasSerdeDerive(lines, lineIdx)) continue;
      const readPattern = new RegExp(`[\\.:]\\s*${escape(field)}\\b|\\b${escape(field)}\\s*:`);
      // The definition line itself matches readPattern — filter it out
      // by requiring at least TWO lines in the diff mention the field
@ -146,26 +194,105 @@ function splitDiffByFile(diff: string): Map<string, string[]> {
  return out;
 }

-// Extract new `pub name: Type,` fields from added lines. Rust syntax.
-// Narrowly-scoped: only matches at the start of a trimmed line,
-// requires `pub ` prefix, ignores `pub fn` / `pub struct` / etc.
-function extractNewFields(addedLines: string[]): string[] {
-  const fields = new Set<string>();
-  for (const line of addedLines) {
-    const t = line.trim();
-    // pub NAME: Type,
+// Extract new `pub name: Type,` fields from the per-file diff lines,
+// keeping each occurrence's line index so the caller can resolve the
+// parent struct. Same narrow rules as before: starts with `pub `,
+// excludes `pub fn` / `pub struct` / etc.
+function extractNewFieldsWithLine(lines: string[]): Array<{ name: string; lineIdx: number }> {
+  const out: Array<{ name: string; lineIdx: number }> = [];
+  for (let i = 0; i < lines.length; i++) {
+    const line = lines[i];
+    if (!line.startsWith("+") || line.startsWith("+++")) continue;
+    const t = line.slice(1).trim();
    const m = t.match(/^pub\s+(?!fn\b|struct\b|enum\b|mod\b|use\b|trait\b|impl\b|const\b|static\b|type\b)(\w+)\s*:/);
-    if (m) fields.add(m[1]);
+    if (m) out.push({ name: m[1], lineIdx: i });
  }
-  return Array.from(fields);
+  return out;
+}
+
+// True if the field at `fieldLineIdx` lives inside a struct whose
+// declaration carries `#[derive(... Serialize|Deserialize ...)]`. We
+// walk backward through the diff (added + context lines both count —
+// a struct declaration unchanged by the PR still appears as context)
+// to find the nearest `pub struct` boundary, then scan a few lines
+// above it for derive attributes. Conservative bounds:
+//   - 80 lines back to find `struct` (struct definitions can grow large)
+//   - 8 lines above the `struct` keyword for attribute lines
+// Stops the struct-search early if we hit a `}` at zero indent
+// (the previous scope) or another `pub struct` (we left ours).
+function parentStructHasSerdeDerive(lines: string[], fieldLineIdx: number): boolean {
+  // Bounds-check fieldLineIdx (caught 2026-04-27 by Kimi self-audit).
+  // Pre-fix: if fieldLineIdx >= lines.length, the loop ran from a
+  // negative implicit upper bound (fieldLineIdx - 80 could be > 0
+  // even when fieldLineIdx is past EOF) and read undefined slots.
+  // Defensive: bail early on out-of-range input.
+  if (fieldLineIdx < 0 || fieldLineIdx >= lines.length) return false;
+
+  let structLineIdx = -1;
+  for (let i = fieldLineIdx - 1; i >= 0 && i >= fieldLineIdx - 80; i--) {
+    const raw = lines[i];
+    if (typeof raw !== "string" || raw.length === 0) continue;
+    const body = stripDiffPrefix(raw);
+    const trimmed = body.trim();
+    if (/^pub\s+struct\s+\w/.test(trimmed)) {
+      structLineIdx = i;
+      break;
+    }
+    // Closing brace at column 0 means the enclosing scope ended above
+    // the field — we're not actually inside a struct.
+    if (body.startsWith("}")) return false;
+  }
+  if (structLineIdx < 0) return false;
+
+  for (let j = structLineIdx - 1; j >= 0 && j >= structLineIdx - 8; j--) {
+    const raw = lines[j];
+    if (typeof raw !== "string") continue;
+    const trimmed = stripDiffPrefix(raw).trim();
+    if (trimmed === "" || trimmed.startsWith("//") || trimmed.startsWith("///")) continue;
+    if (!trimmed.startsWith("#[")) break;
+    if (/derive\s*\([^)]*\b(Serialize|Deserialize)\b/.test(trimmed)) return true;
+  }
+  return false;
+}
+
+// Strip leading +/-/space from a unified-diff line, leaving the raw
+// source line. Handles the case where the line is shorter than 1 char
+// (rare but real for empty-context lines).
+function stripDiffPrefix(line: string): string {
+  if (line.length === 0) return line;
+  const c = line[0];
+  if (c === "+" || c === "-" || c === " ") return line.slice(1);
+  return line;
+}
+
+// Walk a single line and toggle the cross-line backtick state on each
+// unescaped backtick. Single-quote and double-quote runs are line-
+// bounded in JS/TS/Rust by language rules (string literals don't span
+// newlines without explicit `\` continuation), so we only track
+// backticks across lines. Returns the new state for the next line.
+function updateBacktickState(line: string, inBacktick: boolean): boolean {
+  let state = inBacktick;
+  let inDouble = false;
+  let inSingle = false;
+  for (let i = 0; i < line.length; i++) {
+    const c = line[i];
+    const esc = i > 0 && line[i - 1] === "\\";
+    if (esc) continue;
+    // Inside a multi-line backtick template, single/double quotes
+    // don't open new strings — they're literal characters of the
+    // template. Same applies the other way around.
+    if (c === '"' && !inSingle && !state) inDouble = !inDouble;
+    else if (c === "'" && !inDouble && !state) inSingle = !inSingle;
+    else if (c === "`" && !inDouble && !inSingle) state = !state;
+  }
+  return state;
 }

 // True if `pos` falls inside a double- or single-quoted string on this
 // line (backtick template literals too). Walks left→right toggling the
-// "in quote" state on each unescaped quote. Good enough for single-
-// line matches; multi-line strings aren't parsed (they're extremely
-// rare in the patterns we're blocking on, and would require a proper
-// tokenizer to handle correctly).
+// "in quote" state on each unescaped quote. Per-line only — the file-
+// level walk in runStaticCheck handles multi-line backtick templates
+// via updateBacktickState.
 function isInsideQuotedString(line: string, pos: number): boolean {
  let inDouble = false, inSingle = false, inBacktick = false;
  for (let i = 0; i < pos; i++) {
--- a/auditor/index.ts
+++ b/auditor/index.ts
@ -24,14 +24,30 @@ const POLL_INTERVAL_MS = 90_000; // 90s — enough budget for audit runs to comp
 const PAUSE_FILE = "/home/profit/lakehouse/auditor.paused";
 const STATE_FILE = "/home/profit/lakehouse/data/_auditor/state.json";

+// Per-PR audit cap. Prevents the daemon from running away on a PR
+// when each push surfaces new findings — operator wants to review
+// in batch, not have the daemon burn budget while they're away.
+// Default 3 audits per PR. Override via LH_AUDITOR_MAX_AUDITS_PER_PR.
+// Set to 0 to disable the cap.
+//
+// Reset (after manual review): edit data/_auditor/state.json and
+// set audit_count_per_pr.<N> = 0 (or delete the key). Daemon picks
+// up the change on the next cycle without restart.
+const MAX_AUDITS_PER_PR = Number(process.env.LH_AUDITOR_MAX_AUDITS_PER_PR) || 3;
+
 interface State {
  // Map: PR number → last-audited head SHA. Lets us dedupe audits
  // across restarts (poller can crash/restart without re-auditing
  // all open PRs from scratch).
  last_audited: Record<string, string>;
+  // Map: PR number → number of audits run on that PR since last reset.
+  // Daemon halts auditing a PR once this hits MAX_AUDITS_PER_PR.
+  // Operator clears the entry to resume.
+  audit_count_per_pr: Record<string, number>;
  started_at: string;
  cycles_total: number;
  cycles_skipped_paused: number;
+  cycles_skipped_capped: number;
  audits_run: number;
  last_cycle_at?: string;
 }
@ -47,17 +63,21 @@ async function loadState(): Promise<State> {
    return {
      last_audited: s.last_audited ?? {},
      started_at: s.started_at ?? new Date().toISOString(),
+      audit_count_per_pr: s.audit_count_per_pr ?? {},
      cycles_total: s.cycles_total ?? 0,
      cycles_skipped_paused: s.cycles_skipped_paused ?? 0,
+      cycles_skipped_capped: s.cycles_skipped_capped ?? 0,
      audits_run: s.audits_run ?? 0,
      last_cycle_at: s.last_cycle_at,
    };
  } catch {
    return {
      last_audited: {},
+      audit_count_per_pr: {},
      started_at: new Date().toISOString(),
      cycles_total: 0,
      cycles_skipped_paused: 0,
+      cycles_skipped_capped: 0,
      audits_run: 0,
    };
  }
@ -89,12 +109,38 @@ async function runCycle(state: State): Promise<State> {
  console.log(`[auditor] cycle ${state.cycles_total}: ${prs.length} open PR(s)`);

  for (const pr of prs) {
-    const last = state.last_audited[String(pr.number)];
+    const prKey = String(pr.number);
+    const last = state.last_audited[prKey];
    if (last === pr.head_sha) {
      console.log(`[auditor]   skip PR #${pr.number} (SHA ${pr.head_sha.slice(0, 8)} already audited)`);
      continue;
    }
-    console.log(`[auditor]   audit PR #${pr.number} (${pr.head_sha.slice(0, 8)}) — ${pr.title.slice(0, 60)}`);
+    // Per-head-SHA audit cap. Each new push gets MAX_AUDITS_PER_PR
+    // fresh attempts; the counter auto-resets when the head SHA
+    // changes. Operator only intervenes manually if a single SHA
+    // somehow needs MORE than the cap (rare — usually transient
+    // upstream errors clear themselves inside 3 attempts).
+    //
+    // Reset rule: if `last` exists (we've seen this PR before) AND
+    // pr.head_sha != last, that's a new push. Drop the counter.
+    // The dedup branch above already handles same-SHA → skip, so
+    // we only land here when the SHA actually moved.
+    if (last !== undefined && (state.audit_count_per_pr[prKey] ?? 0) > 0) {
+      const prior_count = state.audit_count_per_pr[prKey];
+      console.log(`[auditor]   PR #${pr.number} new head ${pr.head_sha.slice(0, 8)} (prior ${last.slice(0, 8)}, was ${prior_count}/${MAX_AUDITS_PER_PR}) — resetting cap counter`);
+      state.audit_count_per_pr[prKey] = 0;
+    }
+    const auditedSoFar = state.audit_count_per_pr[prKey] ?? 0;
+    if (MAX_AUDITS_PER_PR > 0 && auditedSoFar >= MAX_AUDITS_PER_PR) {
+      // This branch only fires now if the SAME head SHA somehow
+      // burned MAX audits (transient upstream errors retried that
+      // many times). Operator can clear state.audit_count_per_pr.<N>
+      // = 0 to force one more attempt; otherwise wait for next push.
+      console.log(`[auditor]   skip PR #${pr.number} (same head ${pr.head_sha.slice(0, 8)} burned ${auditedSoFar}/${MAX_AUDITS_PER_PR} — push new code or clear state.json audit_count_per_pr.${prKey})`);
+      state.cycles_skipped_capped += 1;
+      continue;
+    }
+    console.log(`[auditor]   audit PR #${pr.number} (${pr.head_sha.slice(0, 8)}) — ${pr.title.slice(0, 60)} [${auditedSoFar + 1}/${MAX_AUDITS_PER_PR}]`);
    try {
      // Skip dynamic by default: it mutates live playbook state and
      // re-runs on every PR update would pollute quickly. Operator
@ -106,8 +152,22 @@ async function runCycle(state: State): Promise<State> {
        skip_inference: process.env.LH_AUDITOR_SKIP_INFERENCE === "1",
      });
      console.log(`[auditor]     verdict=${verdict.overall} findings=${verdict.metrics.findings_total} (block=${verdict.metrics.findings_block} warn=${verdict.metrics.findings_warn})`);
-      state.last_audited[String(pr.number)] = pr.head_sha;
+      state.last_audited[prKey] = pr.head_sha;
+      state.audit_count_per_pr[prKey] = auditedSoFar + 1;
      state.audits_run += 1;
+      if (state.audit_count_per_pr[prKey] >= MAX_AUDITS_PER_PR) {
+        console.log(`[auditor]     PR #${pr.number} reached cap (${MAX_AUDITS_PER_PR} audits) — daemon will skip further audits until reset`);
+      }
+      // Persist state immediately after each successful audit so the
+      // increment survives a crash. Pre-2026-04-27 the cycle saved
+      // once at the end (main.ts:140), which lost the count if the
+      // daemon was killed mid-cycle. Fix lifted from kimi_architect's
+      // own audit on this very file. saveState is idempotent + cheap
+      // (one JSON write), so per-audit cost is negligible.
+      try { await saveState(state); }
+      catch (e) {
+        console.error(`[auditor]     saveState mid-cycle failed: ${(e as Error).message} — count held in memory`);
+      }
    } catch (e) {
      console.error(`[auditor]     audit failed: ${(e as Error).message}`);
    }
--- a/auditor/schemas/distillation/drift_report.ts
+++ b/auditor/schemas/distillation/drift_report.ts
@ -0,0 +1,85 @@
+// drift_report.ts — comparison of a current run summary vs the
+// previous run summary on disk. Spec calls this "drift detection";
+// concretely it answers: did the pipeline behave the same way as
+// last time, and if not, was the change explained by an input change
+// or did it appear out of nowhere (silent drift)?
+//
+// Severity:
+//   ok    — within 20% on every metric, no hash surprises
+//   warn  — record-count or category swing > 20%, OR new error class
+//   alert — output_hash differs while input_hash is identical
+//           (deterministic violation — same input → different output)
+
+import {
+  ValidationResult, requireString, requireIsoTimestamp,
+} from "./types";
+import type { StageName } from "./stage_receipt";
+
+export const DRIFT_REPORT_SCHEMA_VERSION = 2;
+export const DRIFT_THRESHOLD_PCT = 0.20;
+
+export type DriftSeverity = "ok" | "warn" | "alert";
+
+export interface StageDrift {
+  stage: StageName;
+  delta_records_in: number;       // current - prior
+  delta_records_out: number;
+  delta_accepted: number;
+  delta_quarantined: number;
+  pct_change_out: number | null;  // null when prior had 0 records
+  // null when input_hash isn't materialized into the stage summary —
+  // schema v1 lied and reported `true` here. v2 is honest: callers
+  // that want determinism enforcement must read the full StageReceipt
+  // off disk and compute input_hash equality there.
+  input_hash_match: boolean | null;
+  output_hash_match: boolean;
+  // alert if input_hash matches but output_hash diverges
+  deterministic_violation: boolean;
+  notes: string[];
+}
+
+export interface DriftReport {
+  schema_version: number;
+  run_id: string;
+  prior_run_id: string | null;    // null when no prior run on disk
+  generated_at: string;
+  severity: DriftSeverity;
+  stages: StageDrift[];
+  // Top-level swings the human reader should see immediately.
+  flags: string[];
+}
+
+export function validateDriftReport(input: unknown): ValidationResult<DriftReport> {
+  const errors: string[] = [];
+  if (typeof input !== "object" || input === null) {
+    return { valid: false, errors: ["expected object"] };
+  }
+  const r = input as Record<string, unknown>;
+  let ok = true;
+
+  if (r.schema_version !== DRIFT_REPORT_SCHEMA_VERSION) {
+    errors.push(`schema_version: expected ${DRIFT_REPORT_SCHEMA_VERSION}, got ${JSON.stringify(r.schema_version)}`);
+    ok = false;
+  }
+  ok = requireString(r.run_id, "run_id", errors) && ok;
+  if (r.prior_run_id !== null && typeof r.prior_run_id !== "string") {
+    errors.push("prior_run_id: must be string or null");
+    ok = false;
+  }
+  ok = requireIsoTimestamp(r.generated_at, "generated_at", errors) && ok;
+  if (!["ok", "warn", "alert"].includes(r.severity as string)) {
+    errors.push(`severity: must be ok|warn|alert, got ${JSON.stringify(r.severity)}`);
+    ok = false;
+  }
+  if (!Array.isArray(r.stages)) {
+    errors.push("stages: expected array");
+    ok = false;
+  }
+  if (!Array.isArray(r.flags)) {
+    errors.push("flags: expected array");
+    ok = false;
+  }
+
+  if (!ok) return { valid: false, errors };
+  return { valid: true, value: r as unknown as DriftReport };
+}
--- a/auditor/schemas/distillation/evidence_record.test.ts
+++ b/auditor/schemas/distillation/evidence_record.test.ts
@ -0,0 +1,116 @@
+// EvidenceRecord schema tests.
+//
+// Two positive fixtures (one per real-source prototype: distilled_facts
+// + contract_analyses) and three negative fixtures pinning the
+// non-negotiable invariants the spec demands:
+//   - every record must trace to a source (provenance)
+//   - schema_version must match — silent v1/v2 drift is the worst kind
+//   - required identity fields (run_id) cannot be missing
+//
+// Run with: bun test auditor/schemas/distillation/evidence_record.test.ts
+
+import { test, expect } from "bun:test";
+import { readFileSync } from "node:fs";
+import { resolve } from "node:path";
+
+import { validateEvidenceRecord, EVIDENCE_SCHEMA_VERSION } from "./evidence_record";
+
+const FIXTURE_DIR = resolve(import.meta.dir, "fixtures");
+
+function loadFixture(name: string): unknown {
+  return JSON.parse(readFileSync(resolve(FIXTURE_DIR, name), "utf8"));
+}
+
+test("EVIDENCE_SCHEMA_VERSION is 1 — bump deliberately, never silently", () => {
+  expect(EVIDENCE_SCHEMA_VERSION).toBe(1);
+});
+
+test("positive: distilled_fact materialized record validates", () => {
+  const r = validateEvidenceRecord(loadFixture("evidence_positive_distilled_fact.json"));
+  if (!r.valid) console.error("unexpected errors:", r.errors);
+  expect(r.valid).toBe(true);
+  if (r.valid) {
+    expect(r.value.run_id).toBe("cae21289");
+    expect(r.value.model_role).toBe("extractor");
+    expect(r.value.provenance.source_file).toBe("data/_kb/distilled_facts.jsonl");
+  }
+});
+
+test("positive: contract_analysis materialized record validates with retrieval + observer fields", () => {
+  const r = validateEvidenceRecord(loadFixture("evidence_positive_contract_analysis.json"));
+  if (!r.valid) console.error("unexpected errors:", r.errors);
+  expect(r.valid).toBe(true);
+  if (r.valid) {
+    expect(r.value.observer_verdict).toBe("reject");
+    expect(r.value.observer_confidence).toBe(95);
+    expect(r.value.retrieved_context?.matrix_corpora?.length).toBe(4);
+    expect(r.value.failure_markers).toContain("observer_rejected");
+  }
+});
+
+test("negative: missing run_id is rejected with a specific error", () => {
+  const r = validateEvidenceRecord(loadFixture("evidence_negative_no_run_id.json"));
+  expect(r.valid).toBe(false);
+  if (!r.valid) {
+    expect(r.errors.some(e => e.includes("run_id"))).toBe(true);
+  }
+});
+
+test("negative: schema_version mismatch is rejected (silent v1/v2 drift guard)", () => {
+  const r = validateEvidenceRecord(loadFixture("evidence_negative_bad_schema_version.json"));
+  expect(r.valid).toBe(false);
+  if (!r.valid) {
+    expect(r.errors.some(e => e.includes("schema_version"))).toBe(true);
+  }
+});
+
+test("negative: bad provenance (non-sha256 sig_hash, non-ISO timestamp) is rejected", () => {
+  const r = validateEvidenceRecord(loadFixture("evidence_negative_bad_provenance.json"));
+  expect(r.valid).toBe(false);
+  if (!r.valid) {
+    // Must catch BOTH the sig_hash AND the recorded_at — comprehensive
+    // error reporting is part of the contract.
+    expect(r.errors.some(e => e.includes("sig_hash"))).toBe(true);
+    expect(r.errors.some(e => e.includes("recorded_at"))).toBe(true);
+  }
+});
+
+test("negative: non-object input is rejected with clear error", () => {
+  const r = validateEvidenceRecord("not an object");
+  expect(r.valid).toBe(false);
+  if (!r.valid) {
+    expect(r.errors[0]).toContain("expected object");
+  }
+});
+
+test("negative: human_override with invalid decision is rejected", () => {
+  const fixture = loadFixture("evidence_positive_distilled_fact.json") as Record<string, unknown>;
+  fixture.human_override = {
+    overrider: "test-user",
+    decision: "maybe",  // invalid — must be accept|reject|needs_review
+    reason: "test",
+    overridden_at: "2026-04-26T22:30:00.000Z",
+  };
+  const r = validateEvidenceRecord(fixture);
+  expect(r.valid).toBe(false);
+  if (!r.valid) {
+    expect(r.errors.some(e => e.includes("human_override.decision"))).toBe(true);
+  }
+});
+
+test("positive: human_override = null is allowed (explicitly no override)", () => {
+  const fixture = loadFixture("evidence_positive_distilled_fact.json") as Record<string, unknown>;
+  fixture.human_override = null;
+  const r = validateEvidenceRecord(fixture);
+  expect(r.valid).toBe(true);
+});
+
+test("negative: observer_confidence outside [0, 100] is rejected", () => {
+  const fixture = loadFixture("evidence_positive_contract_analysis.json") as Record<string, unknown>;
+  fixture.observer_confidence = 150;
+  const r = validateEvidenceRecord(fixture);
+  expect(r.valid).toBe(false);
+  if (!r.valid) {
+    expect(r.errors.some(e => e.includes("observer_confidence"))).toBe(true);
+  }
+});
--- a/auditor/schemas/distillation/evidence_record.ts
+++ b/auditor/schemas/distillation/evidence_record.ts
@ -0,0 +1,202 @@
+// EvidenceRecord — the unified per-execution-trace record that the
+// Evidence View emits and the Success Scorer reads.
+//
+// Derived from now.md spec + reconciliation of two existing prototypes:
+//   - distilled_facts.jsonl / distilled_procedures.jsonl (LLM-extracted
+//     text with run_id + sig_hash + extractor + verifier + embedding)
+//   - contract_analyses.jsonl (observer integration + retrieval
+//     telemetry + cost + duration)
+//
+// Required fields are the ones every record MUST have for traceability:
+// run_id, task_id, timestamp, schema_version, provenance. Everything
+// else is typed-but-optional because no single source has all of them
+// — the Evidence View materializes them by JOINing across streams when
+// the source data is present.
+//
+// schema_version starts at 1 and gets bumped on breaking changes.
+// Validators MUST check schema_version and refuse unknown values so a
+// future v2 reader doesn't silently accept v1 records (or vice versa).
+
+import {
+  ValidationResult, Provenance,
+  requireString, requireNumber, requireIsoTimestamp, requireProvenance, requireStringArray,
+} from "./types";
+
+export const EVIDENCE_SCHEMA_VERSION = 1;
+
+export type ModelRole =
+  | "executor"      // produced the answer (e.g. scrum reviewer, mode runner LLM call)
+  | "reviewer"      // judged an executor output (e.g. observer, hand-review)
+  | "extractor"     // pulled structured data from text (e.g. fact_extractor)
+  | "verifier"      // confirmed/rejected an extracted claim (verifier in distilled_*)
+  | "categorizer"   // assigned a category (categorizer in distilled_*)
+  | "tiebreaker"    // resolved a consensus split
+  | "applier"       // landed code (scrum_applier)
+  | "embedder"      // produced embeddings
+  | "other";
+
+export interface EvidenceRecord {
+  // ── Identity ──
+  // run_id ties this record to a specific execution. Sources use it
+  // inconsistently (some stream-level, some per-call). The Evidence
+  // View canonicalizes to per-call; if the source is stream-level,
+  // synthesize as `${stream_run_id}:${row_index}`.
+  run_id: string;
+
+  // task_id groups records by logical task (e.g. one PR = one task_id
+  // across multiple per-call runs). Defaults to run_id when no group
+  // exists — never null.
+  task_id: string;
+
+  // ISO 8601 of when the EXECUTION happened, not when this record was
+  // materialized. Use the source row's timestamp; provenance carries
+  // the materialization time separately.
+  timestamp: string;
+
+  schema_version: number;
+
+  // ── Provenance ── (required — no record without source linkage)
+  provenance: Provenance;
+
+  // ── Model attribution (optional) ──
+  model_name?: string;          // e.g. "kimi-k2:1t", "gpt-oss:120b"
+  model_provider?: string;      // e.g. "ollama_cloud", "openrouter", "ollama"
+  model_role?: ModelRole;
+
+  // ── Content hashes (optional) ──
+  // sha256 of the full input prompt and full output content. Pre-
+  // computed so the Evidence Index can dedup across re-runs of the
+  // same prompt without re-hashing.
+  input_hash?: string;
+  output_hash?: string;
+
+  // ── Repo + execution context ──
+  source_files?: string[];      // files the run touched/read
+  commands_run?: string[];      // shell commands or tool calls fired
+  retrieved_context?: {         // what the model saw via retrieval
+    matrix_corpora?: string[];
+    matrix_hits?: number;
+    matrix_chunks_kept?: number;
+    matrix_chunks_dropped?: number;
+    pathway_fingerprints_seen?: number;
+  };
+
+  // ── Observer + scratchpad ──
+  observer_notes?: string[];    // observer.review() free-form notes
+  observer_verdict?: "accept" | "reject" | "cycle" | string;
+  observer_confidence?: number; // 0-100
+  scratchpad_summary?: string;  // tree-split scratchpad text or hash ref
+
+  // ── Outcome markers ──
+  // Both arrays exist because a run can have multiple succeeded gates
+  // AND multiple failed gates simultaneously. Empty arrays are valid;
+  // missing arrays are also valid (means "no evidence either way").
+  success_markers?: string[];   // e.g. "cargo_green", "tests_passed", "anchor_grounded"
+  failure_markers?: string[];   // e.g. "warning_count_up", "rationale_mismatch", "consensus_split"
+
+  // ── Validation telemetry ──
+  validation_results?: {
+    grounded_fraction?: number; // mode_compare grounding %
+    schema_valid?: boolean;
+    pathway_replay_succeeded?: boolean;
+    [key: string]: unknown;
+  };
+
+  // ── Human-in-loop ──
+  human_override?: {
+    overrider: string;          // user identifier
+    decision: "accept" | "reject" | "needs_review";
+    reason: string;
+    overridden_at: string;      // ISO 8601
+  } | null;
+
+  // ── Performance ──
+  cost_usd?: number;
+  latency_ms?: number;
+  prompt_tokens?: number;
+  completion_tokens?: number;
+
+  // ── Free-form text content (the actual run output) ──
+  // Optional because some sources are pure metadata (auto_apply.jsonl)
+  // and have no text payload. Present for distilled_*, contract_analyses,
+  // mode_experiments, scrum_reviews etc.
+  text?: string;
+
+  // ── Domain-specific metadata bucket ──
+  // Source-specific fields that don't earn a top-level slot. e.g.
+  // contract_analyses rows carry `contractor` here; mode_experiments
+  // could carry `corpus_set`. Typed scalar values only — keep this
+  // small or it becomes a junk drawer. Added 2026-04-27 (Kimi audit
+  // flagged `(ev as any).contractor` schema bypass at export_sft.ts:126).
+  metadata?: Record<string, string | number | boolean>;
+}
+
+export function validateEvidenceRecord(input: unknown): ValidationResult<EvidenceRecord> {
+  const errors: string[] = [];
+
+  if (typeof input !== "object" || input === null) {
+    return { valid: false, errors: ["expected object, got " + (input === null ? "null" : typeof input)] };
+  }
+  const r = input as Record<string, unknown>;
+
+  // Required
+  let ok = true;
+  ok = requireString(r.run_id, "run_id", errors) && ok;
+  ok = requireString(r.task_id, "task_id", errors) && ok;
+  ok = requireIsoTimestamp(r.timestamp, "timestamp", errors) && ok;
+  ok = requireProvenance(r.provenance, "provenance", errors) && ok;
+
+  if (r.schema_version !== EVIDENCE_SCHEMA_VERSION) {
+    errors.push(`schema_version: expected ${EVIDENCE_SCHEMA_VERSION}, got ${JSON.stringify(r.schema_version)}`);
+    ok = false;
+  }
+
+  // Optional but typed-when-present
+  if (r.model_role !== undefined) {
+    const valid: ModelRole[] = ["executor", "reviewer", "extractor", "verifier", "categorizer", "tiebreaker", "applier", "embedder", "other"];
+    if (!valid.includes(r.model_role as ModelRole)) {
+      errors.push(`model_role: must be one of ${valid.join("|")}, got ${JSON.stringify(r.model_role)}`);
+      ok = false;
+    }
+  }
+  if (r.input_hash !== undefined && !/^[0-9a-f]{64}$/.test(String(r.input_hash))) {
+    errors.push("input_hash: must be hex sha256 when present");
+    ok = false;
+  }
+  if (r.output_hash !== undefined && !/^[0-9a-f]{64}$/.test(String(r.output_hash))) {
+    errors.push("output_hash: must be hex sha256 when present");
+    ok = false;
+  }
+  if (r.source_files !== undefined && !requireStringArray(r.source_files, "source_files", errors)) ok = false;
+  if (r.commands_run !== undefined && !requireStringArray(r.commands_run, "commands_run", errors)) ok = false;
+  if (r.success_markers !== undefined && !requireStringArray(r.success_markers, "success_markers", errors)) ok = false;
+  if (r.failure_markers !== undefined && !requireStringArray(r.failure_markers, "failure_markers", errors)) ok = false;
+  if (r.observer_notes !== undefined && !requireStringArray(r.observer_notes, "observer_notes", errors)) ok = false;
+
+  if (r.observer_confidence !== undefined) {
+    if (!requireNumber(r.observer_confidence, "observer_confidence", errors)) ok = false;
+    else if ((r.observer_confidence as number) < 0 || (r.observer_confidence as number) > 100) {
+      errors.push("observer_confidence: must be in [0, 100]");
+      ok = false;
+    }
+  }
+
+  if (r.human_override !== undefined && r.human_override !== null) {
+    const ho = r.human_override as Record<string, unknown>;
+    if (typeof ho !== "object") {
+      errors.push("human_override: expected object or null");
+      ok = false;
+    } else {
+      ok = requireString(ho.overrider, "human_override.overrider", errors) && ok;
+      ok = requireString(ho.reason, "human_override.reason", errors) && ok;
+      ok = requireIsoTimestamp(ho.overridden_at, "human_override.overridden_at", errors) && ok;
+      if (!["accept", "reject", "needs_review"].includes(ho.decision as string)) {
+        errors.push(`human_override.decision: must be accept|reject|needs_review`);
+        ok = false;
+      }
+    }
+  }
+
+  if (!ok) return { valid: false, errors };
+  return { valid: true, value: r as unknown as EvidenceRecord };
+}
--- a/auditor/schemas/distillation/fixtures/evidence_negative_bad_provenance.json
+++ b/auditor/schemas/distillation/fixtures/evidence_negative_bad_provenance.json
@ -0,0 +1,11 @@
+{
+  "run_id": "cae21289",
+  "task_id": "team_runs:637",
+  "timestamp": "2026-04-23T09:54:40.729599Z",
+  "schema_version": 1,
+  "provenance": {
+    "source_file": "data/_kb/distilled_facts.jsonl",
+    "sig_hash": "not-a-real-sha256",
+    "recorded_at": "yesterday"
+  }
+}
--- a/auditor/schemas/distillation/fixtures/evidence_negative_bad_schema_version.json
+++ b/auditor/schemas/distillation/fixtures/evidence_negative_bad_schema_version.json
@ -0,0 +1,11 @@
+{
+  "run_id": "cae21289",
+  "task_id": "team_runs:637",
+  "timestamp": "2026-04-23T09:54:40.729599Z",
+  "schema_version": 99,
+  "provenance": {
+    "source_file": "data/_kb/distilled_facts.jsonl",
+    "sig_hash": "21a809e2dc43dfae0000000000000000000000000000000000000000deadbeef",
+    "recorded_at": "2026-04-26T22:30:00.000Z"
+  }
+}
--- a/auditor/schemas/distillation/fixtures/evidence_negative_no_run_id.json
+++ b/auditor/schemas/distillation/fixtures/evidence_negative_no_run_id.json
@ -0,0 +1,11 @@
+{
+  "task_id": "team_runs:637",
+  "timestamp": "2026-04-23T09:54:40.729599Z",
+  "schema_version": 1,
+  "provenance": {
+    "source_file": "data/_kb/distilled_facts.jsonl",
+    "sig_hash": "21a809e2dc43dfae0000000000000000000000000000000000000000deadbeef",
+    "recorded_at": "2026-04-26T22:30:00.000Z"
+  },
+  "text": "missing run_id should fail validation"
+}
--- a/auditor/schemas/distillation/fixtures/evidence_positive_contract_analysis.json
+++ b/auditor/schemas/distillation/fixtures/evidence_positive_contract_analysis.json
@ -0,0 +1,27 @@
+{
+  "run_id": "contract_analysis:101078392:1777250758717",
+  "task_id": "permit:101078392",
+  "timestamp": "2026-04-25T23:45:58.717Z",
+  "schema_version": 1,
+  "provenance": {
+    "source_file": "data/_kb/contract_analyses.jsonl",
+    "line_offset": 0,
+    "sig_hash": "f1f1f1f1f1f1f1f1f1f1f1f1f1f1f1f1f1f1f1f1f1f1f1f1f1f1f1f1f1f1f1f1",
+    "recorded_at": "2026-04-26T22:30:00.000Z"
+  },
+  "model_name": "kimi-k2:1t",
+  "model_role": "executor",
+  "model_provider": "ollama_cloud",
+  "retrieved_context": {
+    "matrix_corpora": ["entity_brief_v1", "chicago_permits_v1", "distilled_procedural_v20260423102847", "sec_tickers_v1"],
+    "matrix_hits": 10
+  },
+  "observer_notes": ["contractor history shows 0 prior fills in Chicago downtown zone"],
+  "observer_verdict": "reject",
+  "observer_confidence": 95,
+  "success_markers": ["matrix_hits_above_threshold"],
+  "failure_markers": ["observer_rejected"],
+  "cost_usd": 0.0002,
+  "latency_ms": 25419,
+  "text": "Permit 101078392 contractor ANTHONY FIORE — analysis: insufficient prior performance signal; recommend escalation."
+}
--- a/auditor/schemas/distillation/fixtures/evidence_positive_distilled_fact.json
+++ b/auditor/schemas/distillation/fixtures/evidence_positive_distilled_fact.json
@ -0,0 +1,19 @@
+{
+  "run_id": "cae21289",
+  "task_id": "team_runs:637",
+  "timestamp": "2026-04-23T09:54:40.729599Z",
+  "schema_version": 1,
+  "provenance": {
+    "source_file": "data/_kb/distilled_facts.jsonl",
+    "line_offset": 0,
+    "sig_hash": "21a809e2dc43dfae0000000000000000000000000000000000000000deadbeef",
+    "recorded_at": "2026-04-26T22:30:00.000Z"
+  },
+  "model_name": "qwen2.5:latest",
+  "model_role": "extractor",
+  "model_provider": "ollama",
+  "text": "Convergence refers to the system stabilizing into a state of high performance with low variance across iterations.",
+  "validation_results": {
+    "schema_valid": true
+  }
+}
--- a/auditor/schemas/distillation/model_ledger.ts
+++ b/auditor/schemas/distillation/model_ledger.ts
@ -0,0 +1,56 @@
+// ModelLedgerEntry — aggregate per-task-type-per-model performance.
+// Built by aggregating mode_experiments.jsonl + model_trust.jsonl.
+// Updated rather than appended — one row per (model_name, task_type)
+// representing latest aggregates.
+import {
+  ValidationResult, requireString, requireNumber, requireIsoTimestamp, requireStringArray,
+} from "./types";
+
+export const MODEL_LEDGER_SCHEMA_VERSION = 1;
+
+export interface ModelLedgerEntry {
+  schema_version: number;
+  model_name: string;
+  model_provider: string;
+  task_type: string;
+  success_rate: number;          // [0, 1]
+  failure_modes: string[];       // top failure mode tags
+  best_partner_model?: string;   // pairs well with X (consensus / tie-break)
+  escalation_role?: string;      // when this model gets escalated TO (or FROM)
+  cost_usd_p50?: number;
+  latency_ms_p50?: number;
+  latency_ms_p95?: number;
+  context_window?: number;
+  sample_count: number;
+  last_updated: string;          // ISO 8601
+  notes?: string;
+}
+
+export function validateModelLedgerEntry(input: unknown): ValidationResult<ModelLedgerEntry> {
+  const errors: string[] = [];
+  if (typeof input !== "object" || input === null) return { valid: false, errors: ["expected object"] };
+  const r = input as Record<string, unknown>;
+  let ok = true;
+
+  if (r.schema_version !== MODEL_LEDGER_SCHEMA_VERSION) {
+    errors.push(`schema_version: expected ${MODEL_LEDGER_SCHEMA_VERSION}, got ${JSON.stringify(r.schema_version)}`);
+    ok = false;
+  }
+  ok = requireString(r.model_name, "model_name", errors) && ok;
+  ok = requireString(r.model_provider, "model_provider", errors) && ok;
+  ok = requireString(r.task_type, "task_type", errors) && ok;
+  ok = requireIsoTimestamp(r.last_updated, "last_updated", errors) && ok;
+  ok = requireStringArray(r.failure_modes, "failure_modes", errors) && ok;
+
+  if (!requireNumber(r.success_rate, "success_rate", errors)) ok = false;
+  else if ((r.success_rate as number) < 0 || (r.success_rate as number) > 1) {
+    errors.push("success_rate: must be in [0, 1]"); ok = false;
+  }
+  if (!requireNumber(r.sample_count, "sample_count", errors)) ok = false;
+  else if ((r.sample_count as number) < 1 || !Number.isInteger(r.sample_count)) {
+    errors.push("sample_count: must be positive integer (no aggregate from zero samples)"); ok = false;
+  }
+
+  if (!ok) return { valid: false, errors };
+  return { valid: true, value: r as unknown as ModelLedgerEntry };
+}
--- a/auditor/schemas/distillation/playbook.ts
+++ b/auditor/schemas/distillation/playbook.ts
@ -0,0 +1,68 @@
+// Playbook — procedural knowledge extracted from accepted/partially-
+// accepted runs. Different from pathway_memory's bug_fingerprints (which
+// are pattern-detectors) — playbooks describe HOW to handle a task type.
+import {
+  ValidationResult, requireString, requireIsoTimestamp, requireProvenance, requireStringArray,
+} from "./types";
+
+export const PLAYBOOK_SCHEMA_VERSION = 1;
+
+export interface Playbook {
+  schema_version: number;
+  playbook_id: string;
+  task_type: string;                // e.g. "scrum_review", "pr_audit", "staffing.fill"
+  problem_pattern: string;          // when does this playbook apply?
+  useful_context: string[];         // what to retrieve before running
+  model_routing_path: string[];     // ordered model attempts that worked
+  commands_worked: string[];
+  commands_failed: string[];
+  validation_steps: string[];
+  repo_files_touched: string[];
+  recovery_strategy: string;        // what to do when the path fails
+  known_failure_modes: string[];
+  escalation_threshold: string;     // when to switch to a stronger model
+  acceptance_criteria: string[];    // how to know it succeeded
+  source_run_ids: string[];         // FK to EvidenceRecord.run_id (provenance — every playbook traces to source)
+  created_at: string;
+  provenance: { source_file: string; line_offset?: number; sig_hash: string; recorded_at: string };
+}
+
+export function validatePlaybook(input: unknown): ValidationResult<Playbook> {
+  const errors: string[] = [];
+  if (typeof input !== "object" || input === null) return { valid: false, errors: ["expected object"] };
+  const r = input as Record<string, unknown>;
+  let ok = true;
+
+  if (r.schema_version !== PLAYBOOK_SCHEMA_VERSION) {
+    errors.push(`schema_version: expected ${PLAYBOOK_SCHEMA_VERSION}, got ${JSON.stringify(r.schema_version)}`);
+    ok = false;
+  }
+  ok = requireString(r.playbook_id, "playbook_id", errors) && ok;
+  ok = requireString(r.task_type, "task_type", errors) && ok;
+  ok = requireString(r.problem_pattern, "problem_pattern", errors) && ok;
+  ok = requireString(r.recovery_strategy, "recovery_strategy", errors) && ok;
+  ok = requireString(r.escalation_threshold, "escalation_threshold", errors) && ok;
+  ok = requireIsoTimestamp(r.created_at, "created_at", errors) && ok;
+  ok = requireStringArray(r.useful_context, "useful_context", errors) && ok;
+  ok = requireStringArray(r.model_routing_path, "model_routing_path", errors) && ok;
+  ok = requireStringArray(r.commands_worked, "commands_worked", errors) && ok;
+  ok = requireStringArray(r.commands_failed, "commands_failed", errors) && ok;
+  ok = requireStringArray(r.validation_steps, "validation_steps", errors) && ok;
+  ok = requireStringArray(r.repo_files_touched, "repo_files_touched", errors) && ok;
+  ok = requireStringArray(r.known_failure_modes, "known_failure_modes", errors) && ok;
+  ok = requireStringArray(r.acceptance_criteria, "acceptance_criteria", errors) && ok;
+  ok = requireStringArray(r.source_run_ids, "source_run_ids", errors) && ok;
+
+  if (Array.isArray(r.source_run_ids) && r.source_run_ids.length === 0) {
+    errors.push("source_run_ids: must be non-empty — every playbook traces to source evidence (spec non-negotiable)");
+    ok = false;
+  }
+  if (Array.isArray(r.acceptance_criteria) && r.acceptance_criteria.length === 0) {
+    errors.push("acceptance_criteria: must be non-empty — every playbook needs success criteria (spec non-negotiable)");
+    ok = false;
+  }
+  ok = requireProvenance(r.provenance, "provenance", errors) && ok;
+
+  if (!ok) return { valid: false, errors };
+  return { valid: true, value: r as unknown as Playbook };
+}
--- a/auditor/schemas/distillation/preference_sample.ts
+++ b/auditor/schemas/distillation/preference_sample.ts
@ -0,0 +1,60 @@
+// PreferenceSample — entry in exports/preference/chosen_rejected.jsonl.
+// Source: real disagreements (audit_discrepancies, scrum ladder retries).
+// Validator pins: chosen != rejected, both source_run_ids present, reason
+// is non-empty. No synthesized preferences.
+import {
+  ValidationResult, requireString, requireIsoTimestamp, requireProvenance,
+} from "./types";
+
+export const PREFERENCE_SAMPLE_SCHEMA_VERSION = 1;
+
+export interface PreferenceSample {
+  schema_version: number;
+  id: string;
+  prompt: string;
+  chosen: string;
+  rejected: string;
+  reason: string;                  // why chosen > rejected — must be non-empty
+  chosen_run_id: string;
+  rejected_run_id: string;
+  created_at: string;
+  provenance: { source_file: string; line_offset?: number; sig_hash: string; recorded_at: string };
+}
+
+export function validatePreferenceSample(input: unknown): ValidationResult<PreferenceSample> {
+  const errors: string[] = [];
+  if (typeof input !== "object" || input === null) return { valid: false, errors: ["expected object"] };
+  const r = input as Record<string, unknown>;
+  let ok = true;
+
+  if (r.schema_version !== PREFERENCE_SAMPLE_SCHEMA_VERSION) {
+    errors.push(`schema_version: expected ${PREFERENCE_SAMPLE_SCHEMA_VERSION}, got ${JSON.stringify(r.schema_version)}`);
+    ok = false;
+  }
+  ok = requireString(r.id, "id", errors) && ok;
+  ok = requireString(r.prompt, "prompt", errors) && ok;
+  ok = requireString(r.chosen, "chosen", errors) && ok;
+  ok = requireString(r.rejected, "rejected", errors) && ok;
+  ok = requireString(r.reason, "reason", errors) && ok;
+  ok = requireString(r.chosen_run_id, "chosen_run_id", errors) && ok;
+  ok = requireString(r.rejected_run_id, "rejected_run_id", errors) && ok;
+  ok = requireIsoTimestamp(r.created_at, "created_at", errors) && ok;
+  ok = requireProvenance(r.provenance, "provenance", errors) && ok;
+
+  // Self-pairing guard.
+  if (r.chosen === r.rejected && typeof r.chosen === "string") {
+    errors.push("chosen and rejected must differ — preference data needs a real disagreement");
+    ok = false;
+  }
+  if (r.chosen_run_id === r.rejected_run_id && typeof r.chosen_run_id === "string") {
+    errors.push("chosen_run_id and rejected_run_id must differ — same run can't disagree with itself");
+    ok = false;
+  }
+  if (typeof r.reason === "string" && (r.reason as string).trim().length === 0) {
+    errors.push("reason: must be non-whitespace (every preference needs WHY chosen > rejected)");
+    ok = false;
+  }
+
+  if (!ok) return { valid: false, errors };
+  return { valid: true, value: r as unknown as PreferenceSample };
+}
--- a/auditor/schemas/distillation/rag_sample.ts
+++ b/auditor/schemas/distillation/rag_sample.ts
@ -0,0 +1,72 @@
+// RagSample — entry in exports/rag/playbooks.jsonl. Spec shape exactly,
+// plus provenance + success_score (so the index can re-rank by quality).
+import {
+  ValidationResult, requireString, requireNumber, requireIsoTimestamp, requireProvenance, requireStringArray,
+} from "./types";
+
+export const RAG_SAMPLE_SCHEMA_VERSION = 1;
+
+// Allowed source_category values. RAG accepts accepted/partial freely;
+// needs_human_review is opt-in (must be tagged so consumers can filter
+// it out for SFT).
+export const RAG_ALLOWED_CATEGORIES = ["accepted", "partially_accepted", "needs_human_review"] as const;
+export type RagSourceCategory = (typeof RAG_ALLOWED_CATEGORIES)[number];
+
+export interface RagSample {
+  schema_version: number;
+  id: string;
+  title: string;
+  content: string;
+  tags: string[];
+  source_run_id: string;
+  // Snapshot of the score the source carried at export time. Lets a
+  // consumer see "this was partial" without re-reading scored-runs.
+  success_score: RagSourceCategory;
+  // Same value as success_score by spec (now.md asks for both fields).
+  // Kept distinct so future schemas can diverge them (e.g. an
+  // "is_review_material" flag) without breaking old consumers.
+  source_category: RagSourceCategory;
+  embedding_text: string;                            // the text to embed (often == content but can be shorter)
+  created_at: string;
+  provenance: { source_file: string; line_offset?: number; sig_hash: string; recorded_at: string };
+}
+
+export function validateRagSample(input: unknown): ValidationResult<RagSample> {
+  const errors: string[] = [];
+  if (typeof input !== "object" || input === null) return { valid: false, errors: ["expected object"] };
+  const r = input as Record<string, unknown>;
+  let ok = true;
+
+  if (r.schema_version !== RAG_SAMPLE_SCHEMA_VERSION) {
+    errors.push(`schema_version: expected ${RAG_SAMPLE_SCHEMA_VERSION}, got ${JSON.stringify(r.schema_version)}`);
+    ok = false;
+  }
+  ok = requireString(r.id, "id", errors) && ok;
+  ok = requireString(r.title, "title", errors) && ok;
+  ok = requireString(r.content, "content", errors) && ok;
+  ok = requireString(r.embedding_text, "embedding_text", errors) && ok;
+  ok = requireString(r.source_run_id, "source_run_id", errors) && ok;
+  ok = requireIsoTimestamp(r.created_at, "created_at", errors) && ok;
+  ok = requireStringArray(r.tags, "tags", errors) && ok;
+  ok = requireProvenance(r.provenance, "provenance", errors) && ok;
+
+  if (!RAG_ALLOWED_CATEGORIES.includes(r.success_score as RagSourceCategory)) {
+    errors.push(`success_score: must be one of ${RAG_ALLOWED_CATEGORIES.join("|")} (rejected never enters RAG)`);
+    ok = false;
+  }
+  if (!RAG_ALLOWED_CATEGORIES.includes(r.source_category as RagSourceCategory)) {
+    errors.push(`source_category: must be one of ${RAG_ALLOWED_CATEGORIES.join("|")}`);
+    ok = false;
+  }
+  if (r.success_score !== r.source_category) {
+    errors.push("success_score and source_category must match (mirrored fields per spec)");
+    ok = false;
+  }
+  if (typeof r.content === "string" && (r.content as string).trim().length === 0) {
+    errors.push("content: must be non-whitespace");
+    ok = false;
+  }
+
+  if (!ok) return { valid: false, errors };
+  return { valid: true, value: r as unknown as RagSample };
+}
--- a/auditor/schemas/distillation/realdata.test.ts
+++ b/auditor/schemas/distillation/realdata.test.ts
@ -0,0 +1,286 @@
+// Real-data validation test — proves the EvidenceRecord schema fits
+// what we ALREADY produce, with the minimum transformation each source
+// stream requires. Doubles as the stale-extraction probe: if
+// distilled_facts.jsonl rows can't materialize, we know that stream
+// has rotted and Phase 2 sources from elsewhere.
+//
+// Strategy:
+//   1. Read first N rows from each source jsonl (skip if missing)
+//   2. Apply minimal transformer: add schema_version + provenance,
+//      synthesize run_id/task_id when source doesn't carry them
+//   3. Validate each materialized record
+//   4. Tally pass/fail per source + collect failure reasons
+//
+// This file is allowed to skip when source files don't exist (fresh
+// clone), so it acts as both a CI guard and a real-environment probe.
+
+import { test, expect } from "bun:test";
+import { existsSync, readFileSync } from "node:fs";
+import { resolve } from "node:path";
+
+import {
+  validateEvidenceRecord, EVIDENCE_SCHEMA_VERSION, EvidenceRecord, ModelRole,
+} from "./evidence_record";
+
+const ROOT = "/home/profit/lakehouse";
+const SAMPLE_PER_SOURCE = 10;
+
+interface SourceProbe {
+  source_file: string;
+  transform: (row: any, lineNo: number) => Partial<EvidenceRecord> | null;
+}
+
+// Canonical 64-char synthetic sha256 for tests where the source row
+// lacks one. Pretends the materializer would compute it via
+// canonicalSha256(orderedKeys(row)) at Phase 2 time. We use a fixed
+// value here to keep the test deterministic; real materialization
+// re-hashes per row.
+const PLACEHOLDER_SHA = "0000000000000000000000000000000000000000000000000000000000000000";
+const RECORDED = "2026-04-26T22:30:00.000Z";
+
+function provFor(source_file: string, lineNo: number, sigHashRaw?: string): EvidenceRecord["provenance"] {
+  // Pad shorter hashes (distilled_* uses 16-char) to 64 — mimics
+  // canonical recompute.
+  const sig = sigHashRaw && /^[0-9a-f]+$/.test(sigHashRaw)
+    ? sigHashRaw.padEnd(64, "0").slice(0, 64)
+    : PLACEHOLDER_SHA;
+  return {
+    source_file: source_file.replace(`${ROOT}/`, ""),
+    line_offset: lineNo,
+    sig_hash: sig,
+    recorded_at: RECORDED,
+  };
+}
+
+const PROBES: SourceProbe[] = [
+  {
+    source_file: `${ROOT}/data/_kb/distilled_facts.jsonl`,
+    transform: (row: any, lineNo: number) => ({
+      run_id: String(row.run_id ?? `distilled_facts:${lineNo}`),
+      task_id: String(row.source_label ?? `distilled_facts:${lineNo}`),
+      timestamp: row.created_at,
+      schema_version: EVIDENCE_SCHEMA_VERSION,
+      provenance: provFor(`${ROOT}/data/_kb/distilled_facts.jsonl`, lineNo, row.sig_hash),
+      model_name: row.extractor,
+      model_role: "extractor" as ModelRole,
+      model_provider: "ollama",
+      text: row.text,
+    }),
+  },
+  {
+    source_file: `${ROOT}/data/_kb/distilled_procedures.jsonl`,
+    transform: (row: any, lineNo: number) => ({
+      run_id: String(row.run_id ?? `distilled_procedures:${lineNo}`),
+      task_id: String(row.source_label ?? `distilled_procedures:${lineNo}`),
+      timestamp: row.created_at,
+      schema_version: EVIDENCE_SCHEMA_VERSION,
+      provenance: provFor(`${ROOT}/data/_kb/distilled_procedures.jsonl`, lineNo, row.sig_hash),
+      model_name: row.extractor,
+      model_role: "extractor" as ModelRole,
+      model_provider: "ollama",
+      text: row.text,
+    }),
+  },
+  {
+    source_file: `${ROOT}/data/_kb/contract_analyses.jsonl`,
+    transform: (row: any, lineNo: number) => ({
+      run_id: `contract_analysis:${row.permit_id}:${new Date(row.ts).getTime()}`,
+      task_id: `permit:${row.permit_id}`,
+      timestamp: row.ts,
+      schema_version: EVIDENCE_SCHEMA_VERSION,
+      provenance: provFor(`${ROOT}/data/_kb/contract_analyses.jsonl`, lineNo),
+      model_role: "executor" as ModelRole,
+      retrieved_context: {
+        matrix_corpora: Object.keys(row.matrix_corpora ?? {}),
+        matrix_hits: row.matrix_hits,
+      },
+      observer_notes: row.observer_notes ? [row.observer_notes].flat() : undefined,
+      observer_verdict: row.observer_verdict,
+      observer_confidence: row.observer_conf,
+      success_markers: row.ok ? ["matrix_hits_above_threshold"] : undefined,
+      failure_markers: !row.ok || row.observer_verdict === "reject" ? ["observer_rejected"] : undefined,
+      cost_usd: typeof row.cost === "number" ? row.cost / 1_000_000 : undefined,
+      latency_ms: row.duration_ms,
+      text: row.analysis,
+    }),
+  },
+  {
+    source_file: `${ROOT}/data/_kb/mode_experiments.jsonl`,
+    transform: (row: any, lineNo: number) => ({
+      run_id: `mode_exec:${new Date(row.ts).getTime()}:${row.file_path ?? "?"}`,
+      task_id: row.task_class,
+      timestamp: row.ts,
+      schema_version: EVIDENCE_SCHEMA_VERSION,
+      provenance: provFor(`${ROOT}/data/_kb/mode_experiments.jsonl`, lineNo),
+      model_name: row.model,
+      model_role: "executor" as ModelRole,
+      model_provider: row.model?.includes("/") ? "openrouter" : "ollama_cloud",
+      retrieved_context: {
+        matrix_corpora: row.sources?.matrix_corpus,
+        matrix_chunks_kept: row.sources?.matrix_chunks_kept,
+        matrix_chunks_dropped: row.sources?.matrix_chunks_dropped,
+        pathway_fingerprints_seen: row.sources?.bug_fingerprints_count,
+      },
+      latency_ms: row.latency_ms,
+      text: row.response,
+      source_files: row.file_path ? [row.file_path] : undefined,
+    }),
+  },
+  {
+    source_file: `${ROOT}/data/_kb/scrum_reviews.jsonl`,
+    transform: (row: any, lineNo: number) => ({
+      run_id: `scrum:${new Date(row.reviewed_at).getTime()}:${row.file}`,
+      task_id: `scrum_review:${row.file}`,
+      timestamp: row.reviewed_at,
+      schema_version: EVIDENCE_SCHEMA_VERSION,
+      provenance: provFor(`${ROOT}/data/_kb/scrum_reviews.jsonl`, lineNo),
+      model_name: row.accepted_model,
+      model_role: "executor" as ModelRole,
+      source_files: [row.file],
+      success_markers: row.accepted_on_attempt ? [`accepted_on_attempt_${row.accepted_on_attempt}`] : undefined,
+      text: row.suggestions_preview,
+    }),
+  },
+  {
+    source_file: `${ROOT}/data/_kb/observer_escalations.jsonl`,
+    transform: (row: any, lineNo: number) => ({
+      run_id: `obs_esc:${new Date(row.ts).getTime()}:${row.sig_hash}`,
+      task_id: `observer_escalation:${row.cluster_endpoint ?? "?"}`,
+      timestamp: row.ts,
+      schema_version: EVIDENCE_SCHEMA_VERSION,
+      provenance: provFor(`${ROOT}/data/_kb/observer_escalations.jsonl`, lineNo, row.sig_hash),
+      model_role: "reviewer" as ModelRole,
+      prompt_tokens: row.prompt_tokens,
+      completion_tokens: row.completion_tokens,
+      text: row.analysis,
+    }),
+  },
+  {
+    source_file: `${ROOT}/data/_kb/audit_facts.jsonl`,
+    transform: (row: any, lineNo: number) => ({
+      run_id: `audit_facts:${row.head_sha}:${lineNo}`,
+      task_id: `pr:${row.pr_number}`,
+      timestamp: row.extracted_at,
+      schema_version: EVIDENCE_SCHEMA_VERSION,
+      provenance: provFor(`${ROOT}/data/_kb/audit_facts.jsonl`, lineNo),
+      model_name: row.extractor,
+      model_role: "extractor" as ModelRole,
+      // facts/entities/relationships go into text as a JSON dump for now;
+      // structured handling lives in Phase 2 where we map to specific
+      // EvidenceRecord substructures.
+      text: JSON.stringify({
+        facts: row.facts?.length ?? 0,
+        entities: row.entities?.length ?? 0,
+        relationships: row.relationships?.length ?? 0,
+      }),
+    }),
+  },
+];
+
+interface ProbeResult {
+  source_file: string;
+  rows_attempted: number;
+  rows_present: boolean;
+  passed: number;
+  failed: number;
+  failure_reasons: string[];   // unique error strings, top 5
+}
+
+const RESULTS: ProbeResult[] = [];
+
+for (const probe of PROBES) {
+  const sourceLabel = probe.source_file.replace(`${ROOT}/`, "");
+
+  test(`real-data: ${sourceLabel}`, () => {
+    const result: ProbeResult = {
+      source_file: sourceLabel,
+      rows_attempted: 0,
+      rows_present: false,
+      passed: 0,
+      failed: 0,
+      failure_reasons: [],
+    };
+
+    if (!existsSync(probe.source_file)) {
+      RESULTS.push(result);
+      // Skip silently — fresh clones won't have these files
+      return;
+    }
+
+    result.rows_present = true;
+    const lines = readFileSync(probe.source_file, "utf8").split("\n").filter(Boolean).slice(0, SAMPLE_PER_SOURCE);
+    const reasons = new Set<string>();
+
+    for (let i = 0; i < lines.length; i++) {
+      result.rows_attempted++;
+      let row: unknown;
+      try { row = JSON.parse(lines[i]); }
+      catch { continue; }
+
+      const transformed = probe.transform(row, i);
+      if (!transformed) continue;
+
+      const v = validateEvidenceRecord(transformed);
+      if (v.valid) result.passed++;
+      else {
+        result.failed++;
+        for (const e of v.errors) reasons.add(e);
+      }
+    }
+    result.failure_reasons = Array.from(reasons).slice(0, 5);
+    RESULTS.push(result);
+
+    // Test passes as long as we attempted something and got a result.
+    // Per-source pass/fail counts are reported in the markdown writeup.
+    expect(result.rows_attempted).toBeGreaterThanOrEqual(0);
+  });
+}
+
+test("real-data: emit markdown report", () => {
+  const md: string[] = [];
+  md.push("# Real-data validation report");
+  md.push("");
+  md.push("Schema = EvidenceRecord v" + EVIDENCE_SCHEMA_VERSION + ". Sample = first " + SAMPLE_PER_SOURCE + " rows per source.");
+  md.push("");
+  md.push("| Source | Present | Rows | Pass | Fail | Pass% |");
+  md.push("|---|---|---|---|---|---|");
+  for (const r of RESULTS) {
+    const pct = r.rows_attempted > 0 ? Math.round(100 * r.passed / r.rows_attempted) + "%" : "—";
+    md.push(`| ${r.source_file} | ${r.rows_present ? "✓" : "—"} | ${r.rows_attempted} | ${r.passed} | ${r.failed} | ${pct} |`);
+  }
+  md.push("");
+  let hasFailures = false;
+  for (const r of RESULTS) {
+    if (r.failed > 0) {
+      hasFailures = true;
+      md.push(`## Failures in ${r.source_file}`);
+      for (const reason of r.failure_reasons) md.push(`- \`${reason}\``);
+      md.push("");
+    }
+  }
+  if (!hasFailures) {
+    md.push("**No failures across all probed sources.** Every materialized record validates against EvidenceRecord v1.");
+    md.push("");
+  }
+  // Stale extraction probe: explicit pass/fail
+  const distilledFacts = RESULTS.find(r => r.source_file.endsWith("distilled_facts.jsonl"));
+  const distilledProc = RESULTS.find(r => r.source_file.endsWith("distilled_procedures.jsonl"));
+  md.push("## Stale-extraction probe");
+  md.push("");
+  if (distilledFacts && distilledFacts.rows_present && distilledFacts.passed > 0) {
+    md.push(`- **distilled_facts.jsonl:** ${distilledFacts.passed}/${distilledFacts.rows_attempted} materialize cleanly. Stream is alive at the schema level.`);
+  } else if (distilledFacts && !distilledFacts.rows_present) {
+    md.push(`- **distilled_facts.jsonl:** missing — stale or never produced. Phase 2 sources from live streams instead.`);
+  } else {
+    md.push(`- **distilled_facts.jsonl:** present but materialization failures; treat as suspect, prefer mode_experiments + scrum_reviews.`);
+  }
+  if (distilledProc && distilledProc.rows_present && distilledProc.passed > 0) {
+    md.push(`- **distilled_procedures.jsonl:** ${distilledProc.passed}/${distilledProc.rows_attempted} materialize cleanly.`);
+  }
+  md.push("");
+
+  // Write the markdown to a stable path and stdout
+  const out = md.join("\n");
+  Bun.write(`${ROOT}/data/_kb/realdata_validation_report.md`, out);
+  console.log("\n" + out);
+});
--- a/auditor/schemas/distillation/receipt.ts
+++ b/auditor/schemas/distillation/receipt.ts
@ -0,0 +1,111 @@
+// Receipt — per-pipeline-stage record with everything needed to
+// reproduce the run. Spec non-negotiable: substantive receipts, not
+// "ran successfully". Every field below has a deterministic source so
+// the receipt schema validator catches "I forgot to fill it in" the
+// same way it catches type errors.
+import {
+  ValidationResult, requireString, requireNumber, requireIsoTimestamp,
+} from "./types";
+
+export const RECEIPT_SCHEMA_VERSION = 1;
+
+export interface FileReference {
+  path: string;     // relative to repo root
+  sha256: string;   // hex
+  bytes?: number;   // optional but recommended
+}
+
+export interface Receipt {
+  schema_version: number;
+  command: string;             // shell-line or script identifier
+  git_sha: string;             // 40-char hex (full SHA1)
+  git_branch?: string;
+  git_dirty?: boolean;         // true if working tree had uncommitted changes
+  started_at: string;          // ISO 8601
+  ended_at: string;            // ISO 8601
+  duration_ms: number;
+  input_files: FileReference[];
+  output_files: FileReference[];
+  record_counts: {
+    in: number;
+    out: number;
+    [key: string]: number;     // per-stage extras (filtered, dropped, etc.)
+  };
+  validation_pass: boolean;    // explicit — never inferred
+  errors: string[];
+  warnings: string[];
+}
+
+function validateFileRef(v: unknown, field: string, errors: string[]): boolean {
+  if (typeof v !== "object" || v === null) {
+    errors.push(`${field}: expected object`);
+    return false;
+  }
+  const f = v as Record<string, unknown>;
+  let ok = true;
+  ok = requireString(f.path, `${field}.path`, errors) && ok;
+  if (typeof f.sha256 !== "string" || !/^[0-9a-f]{64}$/.test(f.sha256)) {
+    errors.push(`${field}.sha256: must be hex sha256`);
+    ok = false;
+  }
+  if (f.bytes !== undefined && typeof f.bytes !== "number") {
+    errors.push(`${field}.bytes: expected number when present`);
+    ok = false;
+  }
+  return ok;
+}
+
+export function validateReceipt(input: unknown): ValidationResult<Receipt> {
+  const errors: string[] = [];
+  if (typeof input !== "object" || input === null) {
+    return { valid: false, errors: ["expected object"] };
+  }
+  const r = input as Record<string, unknown>;
+  let ok = true;
+
+  if (r.schema_version !== RECEIPT_SCHEMA_VERSION) {
+    errors.push(`schema_version: expected ${RECEIPT_SCHEMA_VERSION}, got ${JSON.stringify(r.schema_version)}`);
+    ok = false;
+  }
+  ok = requireString(r.command, "command", errors) && ok;
+  if (typeof r.git_sha !== "string" || !/^[0-9a-f]{40}$/.test(r.git_sha as string)) {
+    errors.push("git_sha: must be 40-char hex");
+    ok = false;
+  }
+  ok = requireIsoTimestamp(r.started_at, "started_at", errors) && ok;
+  ok = requireIsoTimestamp(r.ended_at, "ended_at", errors) && ok;
+  ok = requireNumber(r.duration_ms, "duration_ms", errors) && ok;
+  if (typeof r.validation_pass !== "boolean") {
+    errors.push("validation_pass: must be boolean (explicit, never inferred)");
+    ok = false;
+  }
+  if (!Array.isArray(r.input_files)) {
+    errors.push("input_files: expected array");
+    ok = false;
+  } else {
+    for (let i = 0; i < r.input_files.length; i++) {
+      if (!validateFileRef(r.input_files[i], `input_files[${i}]`, errors)) ok = false;
+    }
+  }
+  if (!Array.isArray(r.output_files)) {
+    errors.push("output_files: expected array");
+    ok = false;
+  } else {
+    for (let i = 0; i < r.output_files.length; i++) {
+      if (!validateFileRef(r.output_files[i], `output_files[${i}]`, errors)) ok = false;
+    }
+  }
+  if (typeof r.record_counts !== "object" || r.record_counts === null) {
+    errors.push("record_counts: expected object");
+    ok = false;
+  } else {
+    const rc = r.record_counts as Record<string, unknown>;
+    if (typeof rc.in !== "number") { errors.push("record_counts.in: expected number"); ok = false; }
+    if (typeof rc.out !== "number") { errors.push("record_counts.out: expected number"); ok = false; }
+  }
+  if (!Array.isArray(r.errors)) { errors.push("errors: expected array"); ok = false; }
+  if (!Array.isArray(r.warnings)) { errors.push("warnings: expected array"); ok = false; }
+
+  if (!ok) return { valid: false, errors };
+  return { valid: true, value: r as unknown as Receipt };
+}
--- a/auditor/schemas/distillation/run_summary.ts
+++ b/auditor/schemas/distillation/run_summary.ts
@ -0,0 +1,90 @@
+// run_summary.ts — aggregates StageReceipt rows for one run_id.
+// Spec field set: total records processed, total accepted/rejected/
+// quarantined, dataset sizes, validation status, overall hash of run.
+
+import {
+  ValidationResult, requireString, requireNumber, requireIsoTimestamp, requireSha256,
+} from "./types";
+import type { StageName } from "./stage_receipt";
+
+export const RUN_SUMMARY_SCHEMA_VERSION = 1;
+
+export interface RunStageSummary {
+  stage: StageName;
+  records_in: number;
+  records_out: number;
+  accepted: number;
+  rejected: number;
+  quarantined: number;
+  skipped: number;
+  passed: boolean;
+  duration_ms: number;
+  output_hash: string;
+}
+
+export interface RunSummary {
+  schema_version: number;
+  run_id: string;
+  started_at: string;        // earliest stage timestamp
+  ended_at: string;          // latest stage timestamp + duration
+  git_commit: string;
+  stages: RunStageSummary[];
+  // Aggregates across stages
+  total_records_in: number;
+  total_records_out: number;
+  total_accepted: number;
+  total_rejected: number;
+  total_quarantined: number;
+  total_skipped: number;
+  // Dataset sizes — final outputs of each export stage
+  rag_records: number;
+  sft_records: number;
+  preference_pairs: number;
+  // Pipeline-wide pass = AND of every stage validation.passed
+  overall_passed: boolean;
+  // Run-wide hash: sha256 over each stage's output hash, sorted by stage name.
+  // Detects ANY change in any stage output across runs.
+  run_hash: string;
+  total_duration_ms: number;
+}
+
+export function validateRunSummary(input: unknown): ValidationResult<RunSummary> {
+  const errors: string[] = [];
+  if (typeof input !== "object" || input === null) {
+    return { valid: false, errors: ["expected object"] };
+  }
+  const r = input as Record<string, unknown>;
+  let ok = true;
+
+  if (r.schema_version !== RUN_SUMMARY_SCHEMA_VERSION) {
+    errors.push(`schema_version: expected ${RUN_SUMMARY_SCHEMA_VERSION}, got ${JSON.stringify(r.schema_version)}`);
+    ok = false;
+  }
+  ok = requireString(r.run_id, "run_id", errors) && ok;
+  ok = requireIsoTimestamp(r.started_at, "started_at", errors) && ok;
+  ok = requireIsoTimestamp(r.ended_at, "ended_at", errors) && ok;
+  if (typeof r.git_commit !== "string" || !/^[0-9a-f]{40}$/.test(r.git_commit as string)) {
+    errors.push("git_commit: must be 40-char hex");
+    ok = false;
+  }
+  if (typeof r.overall_passed !== "boolean") {
+    errors.push("overall_passed: must be boolean");
+    ok = false;
+  }
+  ok = requireSha256(r.run_hash, "run_hash", errors) && ok;
+  for (const k of ["total_records_in", "total_records_out", "total_accepted", "total_rejected",
+                   "total_quarantined", "total_skipped", "rag_records", "sft_records",
+                   "preference_pairs", "total_duration_ms"]) {
+    if (typeof (r as any)[k] !== "number") {
+      errors.push(`${k}: expected number`);
+      ok = false;
+    }
+  }
+  if (!Array.isArray(r.stages)) {
+    errors.push("stages: expected array");
+    ok = false;
+  }
+
+  if (!ok) return { valid: false, errors };
+  return { valid: true, value: r as unknown as RunSummary };
+}
--- a/auditor/schemas/distillation/schemas.test.ts
+++ b/auditor/schemas/distillation/schemas.test.ts
@ -0,0 +1,367 @@
+// Combined schema tests for ScoredRun, Receipt, Playbook,
+// ScratchpadSummary, ModelLedgerEntry, RagSample, SftSample,
+// PreferenceSample. EvidenceRecord lives in its own file because it's
+// the foundational schema and warrants the JSON-fixture round-trip
+// pattern; the rest use inline fixture makers since they're simpler.
+//
+// Each schema: 1 positive fixture + 4-5 negative cases pinning the
+// non-negotiable invariants from now.md.
+//
+// Run: bun test auditor/schemas/distillation/schemas.test.ts
+
+import { test, expect } from "bun:test";
+
+import { validateScoredRun, SCORED_RUN_SCHEMA_VERSION } from "./scored_run";
+import { validateReceipt, RECEIPT_SCHEMA_VERSION } from "./receipt";
+import { validatePlaybook, PLAYBOOK_SCHEMA_VERSION } from "./playbook";
+import { validateScratchpadSummary, SCRATCHPAD_SCHEMA_VERSION } from "./scratchpad_summary";
+import { validateModelLedgerEntry, MODEL_LEDGER_SCHEMA_VERSION } from "./model_ledger";
+import { validateRagSample, RAG_SAMPLE_SCHEMA_VERSION } from "./rag_sample";
+import { validateSftSample, SFT_SAMPLE_SCHEMA_VERSION } from "./sft_sample";
+import { validatePreferenceSample, PREFERENCE_SAMPLE_SCHEMA_VERSION } from "./preference_sample";
+
+const NOW = "2026-04-26T22:30:00.000Z";
+const SHA = "0123456789abcdef0123456789abcdef0123456789abcdef0123456789abcdef";
+const GIT_SHA = "f753e11157eef753e11157eef753e11157eef753";
+
+const PROVENANCE = {
+  source_file: "data/_kb/scored_runs.jsonl",
+  line_offset: 0,
+  sig_hash: SHA,
+  recorded_at: NOW,
+};
+
+// ─── ScoredRun ───────────────────────────────────────────────────────
+
+const SCORED_RUN_OK = {
+  schema_version: SCORED_RUN_SCHEMA_VERSION,
+  evidence_run_id: "run-abc",
+  evidence_task_id: "task-abc",
+  category: "accepted",
+  reasons: ["cargo_green=true", "anchor_grounding=0.95"],
+  scored_at: NOW,
+  scorer_version: "v1.0.0",
+  sub_scores: { cargo_green: true, anchor_grounding: 0.95 },
+  provenance: PROVENANCE,
+};
+
+test("ScoredRun: positive validates", () => {
+  const r = validateScoredRun(SCORED_RUN_OK);
+  if (!r.valid) console.error(r.errors);
+  expect(r.valid).toBe(true);
+});
+
+test("ScoredRun: empty reasons rejected (every score needs a reason)", () => {
+  const r = validateScoredRun({ ...SCORED_RUN_OK, reasons: [] });
+  expect(r.valid).toBe(false);
+  if (!r.valid) expect(r.errors.some(e => e.includes("reasons"))).toBe(true);
+});
+
+test("ScoredRun: invalid category rejected", () => {
+  const r = validateScoredRun({ ...SCORED_RUN_OK, category: "maybe_ok" });
+  expect(r.valid).toBe(false);
+  if (!r.valid) expect(r.errors.some(e => e.includes("category"))).toBe(true);
+});
+
+test("ScoredRun: anchor_grounding > 1 rejected (must be in [0, 1])", () => {
+  const r = validateScoredRun({ ...SCORED_RUN_OK, sub_scores: { ...SCORED_RUN_OK.sub_scores, anchor_grounding: 1.5 } });
+  expect(r.valid).toBe(false);
+  if (!r.valid) expect(r.errors.some(e => e.includes("anchor_grounding"))).toBe(true);
+});
+
+// ─── Receipt ─────────────────────────────────────────────────────────
+
+const RECEIPT_OK = {
+  schema_version: RECEIPT_SCHEMA_VERSION,
+  command: "bun run scripts/build_evidence_index.ts",
+  git_sha: GIT_SHA,
+  git_branch: "scrum/auto-apply-19814",
+  git_dirty: false,
+  started_at: NOW,
+  ended_at: NOW,
+  duration_ms: 1234,
+  input_files: [{ path: "data/_kb/scrum_reviews.jsonl", sha256: SHA, bytes: 448000 }],
+  output_files: [{ path: "data/evidence/2026/04/26/run.jsonl", sha256: SHA }],
+  record_counts: { in: 100, out: 95, filtered: 5 },
+  validation_pass: true,
+  errors: [],
+  warnings: [],
+};
+
+test("Receipt: positive validates", () => {
+  const r = validateReceipt(RECEIPT_OK);
+  if (!r.valid) console.error(r.errors);
+  expect(r.valid).toBe(true);
+});
+
+test("Receipt: bad git_sha rejected (must be 40-char hex)", () => {
+  const r = validateReceipt({ ...RECEIPT_OK, git_sha: "abc123" });
+  expect(r.valid).toBe(false);
+  if (!r.valid) expect(r.errors.some(e => e.includes("git_sha"))).toBe(true);
+});
+
+test("Receipt: validation_pass must be boolean (never inferred)", () => {
+  const r = validateReceipt({ ...RECEIPT_OK, validation_pass: "yes" });
+  expect(r.valid).toBe(false);
+  if (!r.valid) expect(r.errors.some(e => e.includes("validation_pass"))).toBe(true);
+});
+
+test("Receipt: file refs without proper sha256 rejected", () => {
+  const r = validateReceipt({ ...RECEIPT_OK, output_files: [{ path: "x", sha256: "short" }] });
+  expect(r.valid).toBe(false);
+  if (!r.valid) expect(r.errors.some(e => e.includes("sha256"))).toBe(true);
+});
+
+// ─── Playbook ────────────────────────────────────────────────────────
+
+const PLAYBOOK_OK = {
+  schema_version: PLAYBOOK_SCHEMA_VERSION,
+  playbook_id: "pb-scrum-review-001",
+  task_type: "scrum_review",
+  problem_pattern: "Cargo workspace warning escalation after applier patch",
+  useful_context: ["pathway memory bug fingerprints for the file area"],
+  model_routing_path: ["x-ai/grok-4.1-fast"],
+  commands_worked: ["cargo check --workspace"],
+  commands_failed: [],
+  validation_steps: ["warning count must not increase"],
+  repo_files_touched: ["crates/queryd/src/service.rs"],
+  recovery_strategy: "git checkout -- file when cargo red",
+  known_failure_modes: ["unused import noise"],
+  escalation_threshold: "use kimi-k2:1t when isolation mode rejects 2 attempts",
+  acceptance_criteria: ["cargo green", "warning count stable", "rationale-diff aligned"],
+  source_run_ids: ["run-xyz", "run-abc"],
+  created_at: NOW,
+  provenance: PROVENANCE,
+};
+
+test("Playbook: positive validates", () => {
+  const r = validatePlaybook(PLAYBOOK_OK);
+  if (!r.valid) console.error(r.errors);
+  expect(r.valid).toBe(true);
+});
+
+test("Playbook: empty source_run_ids rejected (every playbook traces to source — spec)", () => {
+  const r = validatePlaybook({ ...PLAYBOOK_OK, source_run_ids: [] });
+  expect(r.valid).toBe(false);
+  if (!r.valid) expect(r.errors.some(e => e.includes("source_run_ids"))).toBe(true);
+});
+
+test("Playbook: empty acceptance_criteria rejected (every playbook needs success criteria — spec)", () => {
+  const r = validatePlaybook({ ...PLAYBOOK_OK, acceptance_criteria: [] });
+  expect(r.valid).toBe(false);
+  if (!r.valid) expect(r.errors.some(e => e.includes("acceptance_criteria"))).toBe(true);
+});
+
+// ─── ScratchpadSummary ───────────────────────────────────────────────
+
+const SCRATCHPAD_OK = {
+  schema_version: SCRATCHPAD_SCHEMA_VERSION,
+  run_id: "run-abc",
+  current_objective: "verify pr_audit mode end-to-end",
+  completed_steps: ["restart gateway"],
+  failed_steps: ["cloud chat returned 500"],
+  pending_steps: ["swap default model"],
+  important_paths: ["auditor/checks/inference.ts"],
+  decisions: ["defer kimi-k2 swap until upstream returns"],
+  unresolved_questions: ["does deepseek match kimi quality?"],
+  validation_status: "partial",
+  next_command: "bun run auditor/audit_one.ts 11",
+  source_scratchpad_hash: SHA,
+  summarized_at: NOW,
+  provenance: PROVENANCE,
+};
+
+test("ScratchpadSummary: positive validates", () => {
+  const r = validateScratchpadSummary(SCRATCHPAD_OK);
+  if (!r.valid) console.error(r.errors);
+  expect(r.valid).toBe(true);
+});
+
+test("ScratchpadSummary: invalid validation_status rejected", () => {
+  const r = validateScratchpadSummary({ ...SCRATCHPAD_OK, validation_status: "tbd" });
+  expect(r.valid).toBe(false);
+  if (!r.valid) expect(r.errors.some(e => e.includes("validation_status"))).toBe(true);
+});
+
+test("ScratchpadSummary: short scratchpad_hash rejected", () => {
+  const r = validateScratchpadSummary({ ...SCRATCHPAD_OK, source_scratchpad_hash: "short" });
+  expect(r.valid).toBe(false);
+  if (!r.valid) expect(r.errors.some(e => e.includes("source_scratchpad_hash"))).toBe(true);
+});
+
+// ─── ModelLedgerEntry ────────────────────────────────────────────────
+
+const LEDGER_OK = {
+  schema_version: MODEL_LEDGER_SCHEMA_VERSION,
+  model_name: "kimi-k2:1t",
+  model_provider: "ollama_cloud",
+  task_type: "pr_audit",
+  success_rate: 0.85,
+  failure_modes: ["upstream_500", "context_truncation"],
+  best_partner_model: "x-ai/grok-4.1-fast",
+  escalation_role: "primary",
+  cost_usd_p50: 0.0002,
+  latency_ms_p50: 50000,
+  latency_ms_p95: 90000,
+  context_window: 200000,
+  sample_count: 47,
+  last_updated: NOW,
+};
+
+test("ModelLedgerEntry: positive validates", () => {
+  const r = validateModelLedgerEntry(LEDGER_OK);
+  if (!r.valid) console.error(r.errors);
+  expect(r.valid).toBe(true);
+});
+
+test("ModelLedgerEntry: success_rate > 1 rejected", () => {
+  const r = validateModelLedgerEntry({ ...LEDGER_OK, success_rate: 1.5 });
+  expect(r.valid).toBe(false);
+  if (!r.valid) expect(r.errors.some(e => e.includes("success_rate"))).toBe(true);
+});
+
+test("ModelLedgerEntry: zero sample_count rejected (no aggregate from zero)", () => {
+  const r = validateModelLedgerEntry({ ...LEDGER_OK, sample_count: 0 });
+  expect(r.valid).toBe(false);
+  if (!r.valid) expect(r.errors.some(e => e.includes("sample_count"))).toBe(true);
+});
+
+// ─── RagSample ───────────────────────────────────────────────────────
+
+const RAG_OK = {
+  schema_version: RAG_SAMPLE_SCHEMA_VERSION,
+  id: "rag-pb-001",
+  title: "Scrum applier rationale-diff alignment",
+  content: "When the applier emits a patch with rationale claiming X but the diff shows Y, the rationale-token alignment gate catches it...",
+  tags: ["scrum_review", "applier"],
+  source_run_id: "run-xyz",
+  success_score: "accepted",
+  source_category: "accepted",
+  embedding_text: "applier rationale-diff alignment guard scrum",
+  created_at: NOW,
+  provenance: PROVENANCE,
+};
+
+test("RagSample: positive validates", () => {
+  const r = validateRagSample(RAG_OK);
+  if (!r.valid) console.error(r.errors);
+  expect(r.valid).toBe(true);
+});
+
+test("RagSample: success_score=rejected forbidden (RAG never takes rejected)", () => {
+  const r = validateRagSample({ ...RAG_OK, success_score: "rejected", source_category: "rejected" });
+  expect(r.valid).toBe(false);
+  if (!r.valid) expect(r.errors.some(e => e.includes("success_score"))).toBe(true);
+});
+
+test("RagSample: success_score and source_category must match", () => {
+  const r = validateRagSample({ ...RAG_OK, success_score: "accepted", source_category: "partially_accepted" });
+  expect(r.valid).toBe(false);
+});
+
+test("RagSample: whitespace-only content rejected", () => {
+  const r = validateRagSample({ ...RAG_OK, content: "   \n  " });
+  expect(r.valid).toBe(false);
+  if (!r.valid) expect(r.errors.some(e => e.includes("content"))).toBe(true);
+});
+
+// ─── SftSample (the strict one) ──────────────────────────────────────
+
+const SFT_OK = {
+  schema_version: SFT_SAMPLE_SCHEMA_VERSION,
+  id: "sft-pr11-001",
+  instruction: "Audit this PR diff against ship-claims.",
+  context: "claims: 3 strong, 2 moderate",
+  response: "{\"claim_verdicts\": [...]}",
+  source_run_id: "run-pr11",
+  quality_score: "accepted",
+  created_at: NOW,
+  provenance: PROVENANCE,
+};
+
+test("SftSample: positive validates", () => {
+  const r = validateSftSample(SFT_OK);
+  if (!r.valid) console.error(r.errors);
+  expect(r.valid).toBe(true);
+});
+
+test("SftSample: quality_score=partially_accepted ACCEPTED (--include-partial path)", () => {
+  // Phase 4 update: partial allowed at schema layer; CLI gate decides.
+  const r = validateSftSample({ ...SFT_OK, quality_score: "partially_accepted" });
+  expect(r.valid).toBe(true);
+});
+
+test("SftSample: quality_score=rejected REJECTED (spec non-negotiable, no leak)", () => {
+  const r = validateSftSample({ ...SFT_OK, quality_score: "rejected" });
+  expect(r.valid).toBe(false);
+  if (!r.valid) expect(r.errors.some(e => e.includes("quality_score"))).toBe(true);
+});
+
+test("SftSample: quality_score=needs_human_review REJECTED (no leak)", () => {
+  const r = validateSftSample({ ...SFT_OK, quality_score: "needs_human_review" });
+  expect(r.valid).toBe(false);
+});
+
+test("SftSample: missing context rejected (must be string, even if empty)", () => {
+  const fixture: Record<string, unknown> = { ...SFT_OK };
+  delete fixture.context;
+  const r = validateSftSample(fixture);
+  expect(r.valid).toBe(false);
+  if (!r.valid) expect(r.errors.some(e => e.includes("context"))).toBe(true);
+});
+
+test("SftSample: empty-string context allowed", () => {
+  const r = validateSftSample({ ...SFT_OK, context: "" });
+  expect(r.valid).toBe(true);
+});
+
+test("SftSample: empty response rejected (no empty pairs)", () => {
+  const r = validateSftSample({ ...SFT_OK, response: "" });
+  expect(r.valid).toBe(false);
+  if (!r.valid) expect(r.errors.some(e => e.includes("response"))).toBe(true);
+});
+
+test("SftSample: whitespace-only instruction rejected", () => {
+  const r = validateSftSample({ ...SFT_OK, instruction: "  \t\n " });
+  expect(r.valid).toBe(false);
+  if (!r.valid) expect(r.errors.some(e => e.includes("instruction"))).toBe(true);
+});
+
+// ─── PreferenceSample ────────────────────────────────────────────────
+
+const PREF_OK = {
+  schema_version: PREFERENCE_SAMPLE_SCHEMA_VERSION,
+  id: "pref-task-x-001",
+  prompt: "Verify claim: 'all 3 services running on matrix-test'",
+  chosen: "{\"backed\": true, \"evidence\": \"systemctl status confirms 3 active\"}",
+  rejected: "{\"backed\": true, \"evidence\": \"the README says so\"}",
+  reason: "chosen cites runtime evidence, rejected cites doc claim only",
+  chosen_run_id: "run-A",
+  rejected_run_id: "run-B",
+  created_at: NOW,
+  provenance: PROVENANCE,
+};
+
+test("PreferenceSample: positive validates", () => {
+  const r = validatePreferenceSample(PREF_OK);
+  if (!r.valid) console.error(r.errors);
+  expect(r.valid).toBe(true);
+});
+
+test("PreferenceSample: chosen == rejected rejected (no self-pairing)", () => {
+  const r = validatePreferenceSample({ ...PREF_OK, chosen: "x", rejected: "x" });
+  expect(r.valid).toBe(false);
+  if (!r.valid) expect(r.errors.some(e => e.includes("chosen and rejected"))).toBe(true);
+});
+
+test("PreferenceSample: chosen_run_id == rejected_run_id rejected (no self-disagreement)", () => {
+  const r = validatePreferenceSample({ ...PREF_OK, chosen_run_id: "run-A", rejected_run_id: "run-A" });
+  expect(r.valid).toBe(false);
+  if (!r.valid) expect(r.errors.some(e => e.includes("chosen_run_id"))).toBe(true);
+});
+
+test("PreferenceSample: empty reason rejected (every preference needs WHY)", () => {
+  const r = validatePreferenceSample({ ...PREF_OK, reason: "  " });
+  expect(r.valid).toBe(false);
+  if (!r.valid) expect(r.errors.some(e => e.includes("reason"))).toBe(true);
+});
--- a/auditor/schemas/distillation/scored_run.ts
+++ b/auditor/schemas/distillation/scored_run.ts
@ -0,0 +1,86 @@
+// ScoredRun — output of the deterministic Success Scorer (Phase 3).
+// Spec mandates 4 categories with explicit reasons; we add scorer
+// versioning so a future scorer change is detectable in historical data.
+import {
+  ValidationResult, requireString, requireIsoTimestamp, requireProvenance, requireStringArray, requireNumber,
+} from "./types";
+
+export const SCORED_RUN_SCHEMA_VERSION = 1;
+export const SCORE_CATEGORIES = ["accepted", "partially_accepted", "rejected", "needs_human_review"] as const;
+export type ScoreCategory = (typeof SCORE_CATEGORIES)[number];
+
+export interface ScoredRun {
+  schema_version: number;
+  evidence_run_id: string;     // FK to EvidenceRecord.run_id
+  evidence_task_id: string;    // FK to EvidenceRecord.task_id
+  category: ScoreCategory;
+  reasons: string[];           // human-readable, e.g. ["cargo_green=true", "anchor_grounding<0.7"]
+  scored_at: string;           // ISO 8601
+  scorer_version: string;      // e.g. "v1.0.0" — bumped on scorer code change
+  // Sub-scores that the scorer collapsed into the category. Persisted
+  // so a downstream UI can show "why" without re-running the scorer.
+  sub_scores?: {
+    cargo_green?: boolean;
+    anchor_grounding?: number;
+    schema_valid?: boolean;
+    pathway_replay_succeeded?: boolean;
+    observer_verdict?: "accept" | "reject" | "cycle";
+    [key: string]: unknown;
+  };
+  provenance: {
+    source_file: string;
+    line_offset?: number;
+    sig_hash: string;
+    recorded_at: string;
+  };
+}
+
+export function validateScoredRun(input: unknown): ValidationResult<ScoredRun> {
+  const errors: string[] = [];
+  if (typeof input !== "object" || input === null) {
+    return { valid: false, errors: ["expected object"] };
+  }
+  const r = input as Record<string, unknown>;
+  let ok = true;
+  if (r.schema_version !== SCORED_RUN_SCHEMA_VERSION) {
+    errors.push(`schema_version: expected ${SCORED_RUN_SCHEMA_VERSION}, got ${JSON.stringify(r.schema_version)}`);
+    ok = false;
+  }
+  ok = requireString(r.evidence_run_id, "evidence_run_id", errors) && ok;
+  ok = requireString(r.evidence_task_id, "evidence_task_id", errors) && ok;
+  ok = requireIsoTimestamp(r.scored_at, "scored_at", errors) && ok;
+  ok = requireString(r.scorer_version, "scorer_version", errors) && ok;
+  ok = requireStringArray(r.reasons, "reasons", errors) && ok;
+  if (Array.isArray(r.reasons) && r.reasons.length === 0) {
+    errors.push("reasons: must be non-empty (every score must have at least one reason)");
+    ok = false;
+  }
+  if (!SCORE_CATEGORIES.includes(r.category as ScoreCategory)) {
+    errors.push(`category: must be one of ${SCORE_CATEGORIES.join("|")}, got ${JSON.stringify(r.category)}`);
+    ok = false;
+  }
+  ok = requireProvenance(r.provenance, "provenance", errors) && ok;
+
+  if (r.sub_scores !== undefined) {
+    if (typeof r.sub_scores !== "object" || r.sub_scores === null) {
+      errors.push("sub_scores: expected object when present");
+      ok = false;
+    } else {
+      const ss = r.sub_scores as Record<string, unknown>;
+      if (ss.anchor_grounding !== undefined) {
+        if (!requireNumber(ss.anchor_grounding, "sub_scores.anchor_grounding", errors)) ok = false;
+        else if ((ss.anchor_grounding as number) < 0 || (ss.anchor_grounding as number) > 1) {
+          errors.push("sub_scores.anchor_grounding: must be in [0, 1]");
+          ok = false;
+        }
+      }
+      if (ss.observer_verdict !== undefined && !["accept", "reject", "cycle"].includes(ss.observer_verdict as string)) {
+        errors.push("sub_scores.observer_verdict: must be accept|reject|cycle");
+        ok = false;
+      }
+    }
+  }
+
+  if (!ok) return { valid: false, errors };
+  return { valid: true, value: r as unknown as ScoredRun };
+}
--- a/auditor/schemas/distillation/scratchpad_summary.ts
+++ b/auditor/schemas/distillation/scratchpad_summary.ts
@ -0,0 +1,65 @@
+// ScratchpadSummary — structured normalization of a tree-split or
+// long-running scratchpad. Distinct from EvidenceRecord because a
+// scratchpad accumulates across many calls; this schema captures the
+// state at a checkpoint moment.
+import {
+  ValidationResult, requireString, requireIsoTimestamp, requireProvenance, requireStringArray,
+} from "./types";
+
+export const SCRATCHPAD_SCHEMA_VERSION = 1;
+
+export interface ScratchpadSummary {
+  schema_version: number;
+  run_id: string;
+  current_objective: string;
+  completed_steps: string[];
+  failed_steps: string[];
+  pending_steps: string[];
+  important_paths: string[];        // file paths the scratchpad references
+  decisions: string[];              // architectural/scope decisions made
+  unresolved_questions: string[];
+  validation_status: "pass" | "fail" | "partial" | "pending";
+  next_command?: string;            // recommendation for next action
+  source_scratchpad_hash: string;   // sha256 of the full source scratchpad text — diff detection
+  summarized_at: string;            // ISO 8601
+  provenance: { source_file: string; line_offset?: number; sig_hash: string; recorded_at: string };
+}
+
+const STATUS = ["pass", "fail", "partial", "pending"];
+
+export function validateScratchpadSummary(input: unknown): ValidationResult<ScratchpadSummary> {
+  const errors: string[] = [];
+  if (typeof input !== "object" || input === null) return { valid: false, errors: ["expected object"] };
+  const r = input as Record<string, unknown>;
+  let ok = true;
+
+  if (r.schema_version !== SCRATCHPAD_SCHEMA_VERSION) {
+    errors.push(`schema_version: expected ${SCRATCHPAD_SCHEMA_VERSION}, got ${JSON.stringify(r.schema_version)}`);
+    ok = false;
+  }
+  ok = requireString(r.run_id, "run_id", errors) && ok;
+  ok = requireString(r.current_objective, "current_objective", errors) && ok;
+  ok = requireIsoTimestamp(r.summarized_at, "summarized_at", errors) && ok;
+  if (typeof r.source_scratchpad_hash !== "string" || !/^[0-9a-f]{64}$/.test(r.source_scratchpad_hash as string)) {
+    errors.push("source_scratchpad_hash: must be hex sha256");
+    ok = false;
+  }
+  ok = requireStringArray(r.completed_steps, "completed_steps", errors) && ok;
+  ok = requireStringArray(r.failed_steps, "failed_steps", errors) && ok;
+  ok = requireStringArray(r.pending_steps, "pending_steps", errors) && ok;
+  ok = requireStringArray(r.important_paths, "important_paths", errors) && ok;
+  ok = requireStringArray(r.decisions, "decisions", errors) && ok;
+  ok = requireStringArray(r.unresolved_questions, "unresolved_questions", errors) && ok;
+  if (!STATUS.includes(r.validation_status as string)) {
+    errors.push(`validation_status: must be one of ${STATUS.join("|")}`);
+    ok = false;
+  }
+  if (r.next_command !== undefined && typeof r.next_command !== "string") {
+    errors.push("next_command: expected string when present");
+    ok = false;
+  }
+  ok = requireProvenance(r.provenance, "provenance", errors) && ok;
+
+  if (!ok) return { valid: false, errors };
+  return { valid: true, value: r as unknown as ScratchpadSummary };
+}
--- a/auditor/schemas/distillation/sft_sample.ts
+++ b/auditor/schemas/distillation/sft_sample.ts
@ -0,0 +1,69 @@
+// SftSample — entry in exports/sft/instruction_response.jsonl. Spec
+// non-negotiable: ONLY accepted runs, never partial/rejected/needs_human.
+// Validator enforces that invariant — exporters can't bypass.
+import {
+  ValidationResult, requireString, requireIsoTimestamp, requireProvenance, requireNumber,
+} from "./types";
+
+export const SFT_SAMPLE_SCHEMA_VERSION = 1;
+
+// SFT default: only `accepted` ships. With --include-partial CLI flag,
+// `partially_accepted` becomes legal. `rejected` and `needs_human_review`
+// NEVER ship to SFT — that's the contamination firewall.
+export const SFT_QUALITY_SCORES = ["accepted", "partially_accepted"] as const;
+export type SftQualityScore = (typeof SFT_QUALITY_SCORES)[number];
+
+export interface SftSample {
+  schema_version: number;
+  id: string;
+  instruction: string;             // the prompt / user message
+  context: string;                 // retrieved context that was visible (empty string allowed; null/undefined not)
+  response: string;                // the model output that was accepted
+  source_run_id: string;
+  quality_score: SftQualityScore;
+  created_at: string;
+  provenance: { source_file: string; line_offset?: number; sig_hash: string; recorded_at: string };
+}
+
+export function validateSftSample(input: unknown): ValidationResult<SftSample> {
+  const errors: string[] = [];
+  if (typeof input !== "object" || input === null) return { valid: false, errors: ["expected object"] };
+  const r = input as Record<string, unknown>;
+  let ok = true;
+
+  if (r.schema_version !== SFT_SAMPLE_SCHEMA_VERSION) {
+    errors.push(`schema_version: expected ${SFT_SAMPLE_SCHEMA_VERSION}, got ${JSON.stringify(r.schema_version)}`);
+    ok = false;
+  }
+  ok = requireString(r.id, "id", errors) && ok;
+  ok = requireString(r.instruction, "instruction", errors) && ok;
+  ok = requireString(r.response, "response", errors) && ok;
+  ok = requireString(r.source_run_id, "source_run_id", errors) && ok;
+  ok = requireIsoTimestamp(r.created_at, "created_at", errors) && ok;
+  ok = requireProvenance(r.provenance, "provenance", errors) && ok;
+
+  // Empty pair guard.
+  if (typeof r.instruction === "string" && (r.instruction as string).trim().length === 0) {
+    errors.push("instruction: must be non-whitespace (no empty pairs)");
+    ok = false;
+  }
+  if (typeof r.response === "string" && (r.response as string).trim().length === 0) {
+    errors.push("response: must be non-whitespace (no empty pairs)");
+    ok = false;
+  }
+  // Context is required-string but empty is allowed (some SFT samples
+  // are pure instruction→response with no retrieval context).
+  if (typeof r.context !== "string") {
+    errors.push("context: expected string (use empty string for no-context samples)");
+    ok = false;
+  }
+  // The non-negotiable: SFT samples MUST have quality_score in
+  // SFT_QUALITY_SCORES. Anything else is a leak.
+  if (!SFT_QUALITY_SCORES.includes(r.quality_score as SftQualityScore)) {
+    errors.push(`quality_score: must be one of ${SFT_QUALITY_SCORES.join("|")} (no rejected/needs_human leak into SFT — spec non-negotiable). Got ${JSON.stringify(r.quality_score)}`);
+    ok = false;
+  }
+
+  if (!ok) return { valid: false, errors };
+  return { valid: true, value: r as unknown as SftSample };
+}
--- a/auditor/schemas/distillation/stage_receipt.ts
+++ b/auditor/schemas/distillation/stage_receipt.ts
@ -0,0 +1,190 @@
+// stage_receipt.ts — forensic-grade per-stage receipt.
+//
+// Distinct from auditor/schemas/distillation/receipt.ts (Phase 1):
+// - Phase 1 Receipt is per-script invocation, format inherited from
+//   the early auditor wiring
+// - StageReceipt (THIS file) matches the now.md Phase 5 spec exactly
+//   and is the canonical artifact for pipeline observability
+//
+// Every pipeline stage (collect, score, export-rag, export-sft,
+// export-preference, future extract-playbooks/index) emits ONE
+// StageReceipt per run. Receipts are joined by `run_id` (shared
+// across all stages of a single `run-all` invocation) so a future
+// query can aggregate across the whole pipeline.
+
+import {
+  ValidationResult, requireString, requireNumber, requireIsoTimestamp, requireSha256,
+  requireStringArray,
+} from "./types";
+
+export const STAGE_RECEIPT_SCHEMA_VERSION = 1;
+
+export const STAGE_NAMES = [
+  "collect",          // build_evidence_index — materialize source jsonls → EvidenceRecord
+  "score",            // score_runs — EvidenceRecord → ScoredRun
+  "export-rag",       // exports/rag/playbooks.jsonl
+  "export-sft",       // exports/sft/instruction_response.jsonl
+  "export-preference",// exports/preference/chosen_rejected.jsonl
+  // Reserved for future stages — accept them in the schema so a stage
+  // can be added without bumping schema_version.
+  "extract-playbooks",
+  "index",
+] as const;
+export type StageName = (typeof STAGE_NAMES)[number];
+
+export interface StageFileRef {
+  path: string;          // relative to repo root
+  sha256: string;        // 64-char hex
+  bytes?: number;
+  record_count?: number; // line count for jsonl, when meaningful
+}
+
+export interface StageIO {
+  files: StageFileRef[];
+  record_count: number;
+  hash: string;          // 64-char hex — aggregate over all file hashes (sorted)
+}
+
+export interface StageStats {
+  accepted: number;       // rows that ended up in the stage's output
+  rejected: number;       // explicit category=rejected (Score), invalid pairs (Preference), etc.
+  quarantined: number;    // routed to exports/quarantine/* with structured reason
+  skipped: number;        // parse failures, schema violations at write time
+}
+
+export interface StageValidation {
+  passed: boolean;        // explicit boolean — never inferred (spec non-negotiable)
+  errors: string[];
+  warnings: string[];
+}
+
+export interface StageReceipt {
+  schema_version: number;
+  run_id: string;         // shared across all stages of one pipeline run
+  stage: StageName;
+  timestamp: string;      // ISO 8601 — stage start
+  git_commit: string;     // 40-char hex
+  inputs: StageIO;
+  outputs: StageIO;
+  stats: StageStats;
+  validation: StageValidation;
+  duration_ms: number;
+}
+
+function validateStageIO(v: unknown, field: string, errors: string[]): boolean {
+  if (typeof v !== "object" || v === null) {
+    errors.push(`${field}: expected object`);
+    return false;
+  }
+  const io = v as Record<string, unknown>;
+  let ok = true;
+  if (!Array.isArray(io.files)) {
+    errors.push(`${field}.files: expected array`);
+    ok = false;
+  } else {
+    for (let i = 0; i < io.files.length; i++) {
+      const f = io.files[i] as Record<string, unknown>;
+      if (typeof f !== "object" || f === null) {
+        errors.push(`${field}.files[${i}]: expected object`);
+        ok = false;
+        continue;
+      }
+      ok = requireString(f.path, `${field}.files[${i}].path`, errors) && ok;
+      ok = requireSha256(f.sha256, `${field}.files[${i}].sha256`, errors) && ok;
+      if (f.bytes !== undefined && typeof f.bytes !== "number") {
+        errors.push(`${field}.files[${i}].bytes: expected number when present`);
+        ok = false;
+      }
+      if (f.record_count !== undefined && typeof f.record_count !== "number") {
+        errors.push(`${field}.files[${i}].record_count: expected number when present`);
+        ok = false;
+      }
+    }
+  }
+  ok = requireNumber(io.record_count, `${field}.record_count`, errors) && ok;
+  ok = requireSha256(io.hash, `${field}.hash`, errors) && ok;
+  return ok;
+}
+
+export function validateStageReceipt(input: unknown): ValidationResult<StageReceipt> {
+  const errors: string[] = [];
+  if (typeof input !== "object" || input === null) {
+    return { valid: false, errors: ["expected object"] };
+  }
+  const r = input as Record<string, unknown>;
+  let ok = true;
+
+  if (r.schema_version !== STAGE_RECEIPT_SCHEMA_VERSION) {
+    errors.push(`schema_version: expected ${STAGE_RECEIPT_SCHEMA_VERSION}, got ${JSON.stringify(r.schema_version)}`);
+    ok = false;
+  }
+  ok = requireString(r.run_id, "run_id", errors) && ok;
+  if (typeof r.run_id === "string" && r.run_id.length < 8) {
+    errors.push("run_id: too short — expect uuid-like");
+    ok = false;
+  }
+  if (typeof r.stage !== "string" || !STAGE_NAMES.includes(r.stage as StageName)) {
+    errors.push(`stage: must be one of ${STAGE_NAMES.join("|")}`);
+    ok = false;
+  }
+  ok = requireIsoTimestamp(r.timestamp, "timestamp", errors) && ok;
+  if (typeof r.git_commit !== "string" || !/^[0-9a-f]{40}$/.test(r.git_commit as string)) {
+    errors.push("git_commit: must be 40-char hex");
+    ok = false;
+  }
+  if (typeof r.duration_ms !== "number") {
+    errors.push("duration_ms: expected number");
+    ok = false;
+  }
+  if (typeof r.inputs !== "object" || r.inputs === null) {
+    errors.push("inputs: expected object");
+    ok = false;
+  } else {
+    ok = validateStageIO(r.inputs, "inputs", errors) && ok;
+  }
+  if (typeof r.outputs !== "object" || r.outputs === null) {
+    errors.push("outputs: expected object");
+    ok = false;
+  } else {
+    ok = validateStageIO(r.outputs, "outputs", errors) && ok;
+  }
+  if (typeof r.stats !== "object" || r.stats === null) {
+    errors.push("stats: expected object");
+    ok = false;
+  } else {
+    const s = r.stats as Record<string, unknown>;
+    for (const k of ["accepted", "rejected", "quarantined", "skipped"]) {
+      if (typeof s[k] !== "number") { errors.push(`stats.${k}: expected number`); ok = false; }
+    }
+  }
+  if (typeof r.validation !== "object" || r.validation === null) {
+    errors.push("validation: expected object");
+    ok = false;
+  } else {
+    const v = r.validation as Record<string, unknown>;
+    if (typeof v.passed !== "boolean") {
+      errors.push("validation.passed: must be boolean (explicit, never inferred)");
+      ok = false;
+    }
+    if (!Array.isArray(v.errors)) { errors.push("validation.errors: expected array"); ok = false; }
+    if (!Array.isArray(v.warnings)) { errors.push("validation.warnings: expected array"); ok = false; }
+    if (Array.isArray(v.errors)) ok = requireStringArray(v.errors, "validation.errors", errors) && ok;
+    if (Array.isArray(v.warnings)) ok = requireStringArray(v.warnings, "validation.warnings", errors) && ok;
+  }
+
+  if (!ok) return { valid: false, errors };
+  return { valid: true, value: r as unknown as StageReceipt };
+}
+
+// Compute the canonical aggregate hash over a list of file refs.
+// Sorted by path so order-of-iteration doesn't drift the hash.
+// Each entry contributes "<path>|<sha256>|<record_count>" so two
+// files with identical content but different paths produce distinct
+// digests (real difference = real hash difference).
+export async function aggregateIoHash(files: StageFileRef[]): Promise<string> {
+  const sorted = [...files].sort((a, b) => a.path.localeCompare(b.path));
+  const parts = sorted.map(f => `${f.path}|${f.sha256}|${f.record_count ?? 0}`);
+  const h = new Bun.CryptoHasher("sha256");
+  h.update(parts.join("\n"));
+  return h.digest("hex");
+}
--- a/auditor/schemas/distillation/types.ts
+++ b/auditor/schemas/distillation/types.ts
@ -0,0 +1,141 @@
+// Shared types for distillation schemas. Hand-rolled validators (no Zod
+// dependency) — bun:test runs them; runtime cost is one tiny function
+// per record. Pattern: each schema exports `validate(x): ValidationResult`
+// returning `{valid: true, value}` or `{valid: false, errors}`.
+//
+// Why hand-rolled: the auditor + scrum + observer pipelines emit JSONL
+// rows in shapes that already work; we want to ENFORCE those shapes
+// without adding a 100KB dependency or rewriting producers. The
+// validators codify what we already produce.
+//
+// Naming: schemas live as nouns (`EvidenceRecord`), validators as
+// `validate<Noun>`. Each schema file exports both the type and the
+// validator.
+
+export interface Provenance {
+  // Path to the JSONL or other source where this row came from. Always
+  // relative to /home/profit/lakehouse so receipts are reproducible
+  // across deploys with the same repo layout.
+  source_file: string;
+
+  // Optional byte offset / line number into the source file. Lets a
+  // future "open the source row" UI jump directly to the line. Some
+  // sources (single-row JSON files like _playbook_lessons/*.json) don't
+  // need this.
+  line_offset?: number;
+
+  // SHA-256 of the canonical JSON of the source row (sorted keys, no
+  // whitespace). This is the dedup key — running distillation twice on
+  // the same source produces identical sig_hash, so duplicates are
+  // detectable without full row comparison.
+  sig_hash: string;
+
+  // ISO 8601 of when this provenance link was recorded — usually the
+  // moment the unified Evidence Index ran. Distinct from the source
+  // row's own timestamp, which lives on the EvidenceRecord itself.
+  recorded_at: string;
+}
+
+// Returned by every schema validator. The shape is `{valid: true, value}`
+// for success (so callers can use `value` with the right type narrowed)
+// or `{valid: false, errors}` for failure (so callers can surface
+// every error at once, not just the first).
+export type ValidationResult<T> =
+  | { valid: true; value: T }
+  | { valid: false; errors: string[] };
+
+// Standard helpers used by every schema. Centralized so naming +
+// error message format stay consistent across schemas.
+
+export function requireString(v: unknown, field: string, errors: string[]): v is string {
+  if (typeof v !== "string") {
+    errors.push(`${field}: expected string, got ${typeof v}`);
+    return false;
+  }
+  if (v.length === 0) {
+    errors.push(`${field}: must be non-empty`);
+    return false;
+  }
+  return true;
+}
+
+export function requireNumber(v: unknown, field: string, errors: string[]): v is number {
+  if (typeof v !== "number" || !Number.isFinite(v)) {
+    errors.push(`${field}: expected finite number, got ${typeof v}`);
+    return false;
+  }
+  return true;
+}
+
+export function requireIsoTimestamp(v: unknown, field: string, errors: string[]): v is string {
+  if (!requireString(v, field, errors)) return false;
+  // Permissive ISO 8601: YYYY-MM-DDTHH:MM:SS(.fraction)?(Z|±HH:MM)?
+  const re = /^\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}(?:\.\d+)?(?:Z|[+-]\d{2}:?\d{2})?$/;
+  if (!re.test(v as string)) {
+    errors.push(`${field}: not a valid ISO 8601 timestamp: ${(v as string).slice(0, 60)}`);
+    return false;
+  }
+  return true;
+}
+
+export function requireSha256(v: unknown, field: string, errors: string[]): v is string {
+  if (!requireString(v, field, errors)) return false;
+  if (!/^[0-9a-f]{64}$/.test(v as string)) {
+    errors.push(`${field}: not a valid hex sha256: ${(v as string).slice(0, 80)}`);
+    return false;
+  }
+  return true;
+}
+
+export function requireProvenance(v: unknown, field: string, errors: string[]): v is Provenance {
+  if (typeof v !== "object" || v === null) {
+    errors.push(`${field}: expected object, got ${v === null ? "null" : typeof v}`);
+    return false;
+  }
+  const p = v as Record<string, unknown>;
+  let ok = true;
+  ok = requireString(p.source_file, `${field}.source_file`, errors) && ok;
+  ok = requireSha256(p.sig_hash, `${field}.sig_hash`, errors) && ok;
+  ok = requireIsoTimestamp(p.recorded_at, `${field}.recorded_at`, errors) && ok;
+  if (p.line_offset !== undefined && typeof p.line_offset !== "number") {
+    errors.push(`${field}.line_offset: expected number when present`);
+    ok = false;
+  }
+  return ok;
+}
+
+export function requireStringArray(v: unknown, field: string, errors: string[]): v is string[] {
+  if (!Array.isArray(v)) {
+    errors.push(`${field}: expected array, got ${typeof v}`);
+    return false;
+  }
+  for (let i = 0; i < v.length; i++) {
+    if (typeof v[i] !== "string") {
+      errors.push(`${field}[${i}]: expected string, got ${typeof v[i]}`);
+      return false;
+    }
+  }
+  return true;
+}
+
+// Compute the canonical sha256 used for sig_hash. Sorts keys so the
+// hash is stable regardless of producer's serialization order. Uses
+// Bun.CryptoHasher (sync, fast) rather than node:crypto — matches the
+// rest of the auditor.
+export async function canonicalSha256(obj: unknown): Promise<string> {
+  const ordered = orderKeys(obj);
+  const json = JSON.stringify(ordered);
+  const hasher = new Bun.CryptoHasher("sha256");
+  hasher.update(json);
+  return hasher.digest("hex");
+}
+
+function orderKeys(v: unknown): unknown {
+  if (v === null || typeof v !== "object") return v;
+  if (Array.isArray(v)) return v.map(orderKeys);
+  const out: Record<string, unknown> = {};
+  for (const k of Object.keys(v as object).sort()) {
+    out[k] = orderKeys((v as Record<string, unknown>)[k]);
+  }
+  return out;
+}
--- a/auditor/types.ts
+++ b/auditor/types.ts
@ -2,7 +2,7 @@
 // if something can't be verified from a check, it goes into `evidence`
 // so the verdict is inspectable, not a black box.

-export type CheckKind = "static" | "dynamic" | "inference" | "kb_query";
+export type CheckKind = "static" | "dynamic" | "inference" | "kb_query" | "kimi_architect";

 export type Severity = "info" | "warn" | "block";

--- a/bot/propose.ts
+++ b/bot/propose.ts
@ -13,10 +13,17 @@ import { readFile } from "node:fs/promises";
 import { createHash } from "node:crypto";
 import type { Gap, Proposal } from "./types.ts";

-const SIDECAR_URL = process.env.LH_SIDECAR_URL ?? "http://localhost:3200";
+// Phase 44 migration (2026-04-27): bot/propose.ts now flows through
+// the gateway's /v1/chat instead of hitting the sidecar's /generate
+// directly. /v1/usage tracks the call, Langfuse traces it, observer
+// sees it. Gateway owns the routing.
+//
+// 2026-04-28: gpt-oss:120b → deepseek-v3.2 via Ollama Pro. Newer
+// DeepSeek revision, faster, still on the same OLLAMA_CLOUD_KEY.
+const GATEWAY_URL = process.env.LH_GATEWAY_URL ?? "http://localhost:3100";
 const REPO_ROOT = "/home/profit/lakehouse";
 const PRD_PATH = `${REPO_ROOT}/docs/PRD.md`;
-const CLOUD_MODEL = process.env.LH_BOT_MODEL ?? "gpt-oss:120b";
+const CLOUD_MODEL = process.env.LH_BOT_MODEL ?? "deepseek-v3.2";
 const MAX_TOKENS = 6000;

 export async function findGaps(): Promise<Gap[]> {
@ -72,13 +79,16 @@ export async function generateProposal(gap: Gap, historySummary: string = ""): P
  sections.push("Propose a small change that addresses this gap. Respond with the JSON object only.");
  const userPrompt = sections.join("\n");

-  const r = await fetch(`${SIDECAR_URL}/generate`, {
+  const r = await fetch(`${GATEWAY_URL}/v1/chat`, {
    method: "POST",
    headers: { "content-type": "application/json" },
    body: JSON.stringify({
      model: CLOUD_MODEL,
-      system: SYSTEM_PROMPT,
-      prompt: userPrompt,
+      provider: "ollama_cloud",
+      messages: [
+        { role: "system", content: SYSTEM_PROMPT },
+        { role: "user", content: userPrompt },
+      ],
      temperature: 0.2,
      max_tokens: MAX_TOKENS,
      think: false,
@ -86,10 +96,10 @@ export async function generateProposal(gap: Gap, historySummary: string = ""): P
    signal: AbortSignal.timeout(180000), // cloud T3 can be slow — 3 min
  });
  if (!r.ok) {
-    throw new Error(`sidecar ${r.status}: ${await r.text()}`);
+    throw new Error(`gateway /v1/chat ${r.status}: ${await r.text()}`);
  }
  const j = await r.json() as any;
-  const raw: string = j.text ?? j.response ?? "";
+  const raw: string = j?.choices?.[0]?.message?.content ?? "";
  const usage = j.usage ?? {};
  const tokens = (usage.prompt_tokens ?? 0) + (usage.completion_tokens ?? 0);

--- a/config/modes.toml
+++ b/config/modes.toml
@ -0,0 +1,91 @@
+# Mode router config — task_class → mode mapping
+#
+# `preferred_mode` is the first choice for a task class; `fallback_modes`
+# get tried in order if the preferred one isn't available (LLM Team can
+# return Unknown mode for some, OR the matrix has stronger signal for a
+# fallback). `default_model` seeds the mode runner's model field if the
+# caller doesn't override.
+#
+# Modes are dispatched against LLM Team UI (localhost:5000/api/run) for
+# now; future Rust-native runners will short-circuit before the proxy.
+# See crates/gateway/src/v1/mode.rs for the dispatch path.
+
+[[task_class]]
+name = "scrum_review"
+# 2026-04-26 pass5 variance test (5 reps × 4 conditions, grok-4.1-fast,
+# pathway_memory.rs): composed corpus LOST 5/5 vs isolation (Δ −1.8
+# grounded findings, p=0.031). See docs/MODE_RUNNER_TUNING_PLAN.md.
+# Default is now isolation — bug fingerprints + adversarial framing +
+# file content carries strong models without matrix noise. The
+# `codereview_lakehouse` matrix path remains available via force_mode
+# (auto-downgrades to isolation on strong models — see the
+# is_strong_model gate in crates/gateway/src/v1/mode.rs).
+preferred_mode = "codereview_isolation"
+fallback_modes = ["codereview_lakehouse", "codereview", "consensus", "ladder"]
+default_model = "qwen3-coder:480b"
+# Corpora kept defined so experimental modes (codereview_matrix_only,
+# pass2/pass5 sweeps) and weak-model rescue rungs can still pull them.
+# scrum_findings_v1 is built but EXCLUDED — bake-off showed 24% OOB
+# line citations from cross-file drift, only safe with same-file gating.
+matrix_corpus = ["lakehouse_arch_v1", "lakehouse_symbols_v1"]
+
+[[task_class]]
+name = "contract_analysis"
+preferred_mode = "deep_analysis"
+fallback_modes = ["research", "extract"]
+default_model = "kimi-k2:1t"
+matrix_corpus = "chicago_permits_v1"
+
+[[task_class]]
+name = "staffing_inference"
+# Staffing-domain native enrichment runner — Pass 4 (2026-04-26).
+# Same composer architecture as codereview_lakehouse but with staffing
+# framing + workers corpus. Validates that the modes-as-prompt-molders
+# pattern generalizes beyond code review.
+preferred_mode = "staffing_inference_lakehouse"
+fallback_modes = ["ladder", "consensus", "pipeline"]
+# 2026-04-28: gpt-oss-120b:free → kimi-k2.6 via Ollama Pro. Coding-
+# specialized, faster than gpt-oss, on the same OLLAMA_CLOUD_KEY so
+# no extra provider hop.
+default_model = "kimi-k2.6"
+matrix_corpus = "workers_500k_v8"
+
+[[task_class]]
+name = "fact_extract"
+preferred_mode = "extract"
+fallback_modes = ["distill"]
+default_model = "qwen2.5"
+matrix_corpus = "kb_team_runs_v1"
+
+[[task_class]]
+name = "doc_drift_check"
+preferred_mode = "drift"
+fallback_modes = ["validator"]
+# 2026-04-28: gpt-oss:120b → gemini-3-flash-preview via Ollama Pro.
+# Speed leader on factual checking, same OLLAMA_CLOUD_KEY.
+default_model = "gemini-3-flash-preview"
+matrix_corpus = "distilled_factual_v20260423095819"
+
+[[task_class]]
+name = "pr_audit"
+# Auditor's claim-vs-diff verification mode (2026-04-26 rebuild).
+# Replaces the auditor's hand-rolled inference check with the mode-runner
+# composer: pathway memory (PR-level patterns) + lakehouse_answers_v1
+# corpus (prior accepted reviews + observer escalations) + adversarial
+# JSON-shaped framing. Default model is paid Ollama Cloud kimi-k2:1t for
+# strong claim-grounding; tie-breaker via auditor-side env override.
+preferred_mode = "pr_audit"
+fallback_modes = ["consensus", "ladder"]
+# kimi-k2:1t broken upstream 2026-04-27 (Ollama Cloud 500 ISE, multi-hour
+# sustained outage verified by repeated probes). deepseek-v3.1:671b is
+# the drop-in substitute — proven working end-to-end through pr_audit
+# during Phase 5 distillation acceptance testing.
+default_model = "deepseek-v3.1:671b"
+matrix_corpus = "lakehouse_answers_v1"
+
+# Fallback when task_class isn't in the table — useful for ad-hoc calls
+# during development that don't yet have a mapped mode.
+[default]
+preferred_mode = "pipeline"
+fallback_modes = ["consensus", "ladder"]
+default_model = "qwen3.5:latest"
--- a/config/providers.toml
+++ b/config/providers.toml
@ -0,0 +1,106 @@
+# Phase 39: Provider Registry
+#
+# Per-provider base_url, auth scheme, and default model. The gateway's
+# /v1/chat dispatcher reads this file at boot to populate its provider
+# table. Secrets (API keys) come from /etc/lakehouse/secrets.toml or
+# environment variables — NEVER inline a key here.
+#
+# Adding a new provider:
+#   1. New [[provider]] block with name, base_url, auth, default_model
+#   2. Matching adapter at crates/aibridge/src/providers/<name>.rs
+#      implementing the ProviderAdapter trait (chat + embed + unload)
+#   3. Route arm in crates/gateway/src/v1/mod.rs matching on `name`
+#   4. Model-prefix routing hint in resolve_provider() if the provider
+#      uses an "<name>/..." model prefix (e.g. "openrouter/...")
+
+[[provider]]
+name = "ollama"
+base_url = "http://localhost:11434"
+auth = "none"
+default_model = "qwen3.5:latest"
+# Hot-path local inference. No bearer needed — direct to Ollama as of
+# 2026-05-02 (Python sidecar's pass-through wrapper retired). Model
+# names are bare (e.g. "qwen3.5:latest", not "ollama/qwen3.5:latest").
+
+[[provider]]
+name = "ollama_cloud"
+base_url = "https://ollama.com"
+auth = "bearer"
+auth_env = "OLLAMA_CLOUD_KEY"
+default_model = "deepseek-v3.2"
+# Cloud-tier Ollama (Pro plan as of 2026-04-28). Key resolved from
+# OLLAMA_CLOUD_KEY at gateway boot; Pro tier upgraded the account so
+# rate limits + model access widen without a key change. Model-prefix
+# routing: "cloud/<model>" auto-routes here. 39-model fleet now
+# includes deepseek-v3.2, deepseek-v4-{flash,pro}, gemini-3-flash-
+# preview, glm-{5,5.1}, kimi-k2.6, qwen3-coder-next.
+# 2026-04-28: default upgraded gpt-oss:120b → deepseek-v3.2 (newest
+# DeepSeek revision). NOTE: kimi-k2:1t is upstream-broken (HTTP 500
+# on Ollama Pro probe 2026-04-28) — do not route to it. Use kimi-k2.6
+# instead, which is what staffing_inference points at.
+
+[[provider]]
+name = "openrouter"
+base_url = "https://openrouter.ai/api/v1"
+auth = "bearer"
+auth_env = "OPENROUTER_API_KEY"
+auth_fallback_files = ["/home/profit/.env", "/root/llm_team_config.json"]
+default_model = "x-ai/grok-4.1-fast"
+# Multi-provider gateway. Covers Anthropic, Google, OpenAI, MiniMax,
+# Qwen, Gemma, etc. Key resolved via crates/gateway/src/v1/openrouter.rs
+# resolve_openrouter_key() — env first, then fallback files.
+# Model-prefix routing: "openrouter/<vendor>/<model>" auto-routes here,
+# prefix stripped before upstream call.
+
+[[provider]]
+name = "opencode"
+base_url = "https://opencode.ai/zen/v1"
+# Unified endpoint — covers BOTH Zen (pay-per-token Anthropic/OpenAI/
+# Gemini frontier) AND Go (flat-sub Kimi/GLM/DeepSeek/Qwen/Minimax).
+# Upstream bills per-model: Zen models hit Zen balance, Go models hit
+# Go subscription cap. /zen/go/v1 is the Go-only sub-path (rejects
+# Zen models), kept for reference but not used by this provider.
+auth = "bearer"
+auth_env = "OPENCODE_API_KEY"
+default_model = "claude-opus-4-7"
+# OpenCode (Zen + GO unified endpoint). One sk-* key reaches Claude
+# Opus 4.7, GPT-5.5-pro, Gemini 3.1-pro, Kimi K2.6, DeepSeek, GLM,
+# Qwen, plus 4 free-tier models. OpenAI-compatible Chat Completions
+# at /v1/chat/completions. Model-prefix routing: "opencode/<name>"
+# auto-routes here, prefix stripped before upstream call.
+# Key file: /etc/lakehouse/opencode.env (loaded via systemd EnvironmentFile).
+# Model catalog: curl -H "Authorization: Bearer ..." https://opencode.ai/zen/v1/models
+# Note: /zen/go/v1 is the GO-only sub-path (Kimi/GLM/DeepSeek tier);
+# /zen/v1 covers everything including Anthropic (which /zen/go/v1 rejects).
+
+[[provider]]
+name = "kimi"
+base_url = "https://api.kimi.com/coding/v1"
+auth = "bearer"
+auth_env = "KIMI_API_KEY"
+default_model = "kimi-for-coding"
+# Direct Kimi For Coding provider. `api.kimi.com` is a SEPARATE account
+# system from `api.moonshot.ai` and `api.moonshot.cn` — keys are NOT
+# interchangeable. Used as a fallback when Ollama Cloud's kimi-k2.6 is
+# unavailable and OpenRouter's `moonshotai/kimi-k2.6` is rate-limited.
+# (Was `kimi-k2:1t` here pre-2026-05-03 — that model is upstream-broken
+# and removed from operator guidance.)
+# Model id: `kimi-for-coding` (kimi-k2.6 underneath).
+# Key file: /etc/lakehouse/kimi.env (loaded via systemd EnvironmentFile).
+# Model-prefix routing: "kimi/<model>" auto-routes here, prefix stripped.
+
+# Planned (Phase 40 long-horizon — adapters not yet shipped):
+#
+# [[provider]]
+# name = "gemini"
+# base_url = "https://generativelanguage.googleapis.com/v1beta"
+# auth = "api_key_query"
+# auth_env = "GEMINI_API_KEY"
+# default_model = "gemini-2.0-flash"
+#
+# [[provider]]
+# name = "claude"
+# base_url = "https://api.anthropic.com/v1"
+# auth = "x_api_key"
+# auth_env = "ANTHROPIC_API_KEY"
+# default_model = "claude-3-5-sonnet-latest"
--- a/config/routing.toml
+++ b/config/routing.toml
@ -0,0 +1,61 @@
+# Phase 40: Routing Engine Configuration
+#
+# Human-editable rules for model → provider routing.
+# Matching order: first match wins.
+
+[[rule]]
+model_pattern = "gpt-4*"
+provider = "openrouter"
+max_tokens = 4096
+temperature = 0.7
+
+[[rule]]
+model_pattern = "claude*"
+provider = "claude"
+max_tokens = 4096
+temperature = 0.7
+
+[[rule]]
+model_pattern = "gemini*"
+provider = "gemini"
+max_tokens = 8192
+temperature = 0.9
+
+[[rule]]
+model_pattern = "qwen3.5*"
+provider = "ollama"
+max_tokens = 4096
+temperature = 0.3
+
+[[rule]]
+model_pattern = "qwen3*"
+provider = "ollama"
+max_tokens = 2048
+temperature = 0.3
+
+[[rule]]
+model_pattern = "gpt-oss*"
+provider = "ollama"
+temperature = 0.1
+
+[[rule]]
+model_pattern = "*"
+provider = "ollama"
+temperature = 0.5
+
+# Fallback chain: if primary fails, try these in order
+fallback = ["ollama", "openrouter"]
+
+# Cost gating (tokens = cents per 1M)
+[cost]
+ollama = 0
+openrouter = 15
+claude = 15
+gemini = 0
+
+# Daily budget per provider (cents)
+[daily_budget]
+ollama = 0
+openrouter = 1000
+claude = 500
+gemini = 0
--- a/crates/aibridge/Cargo.toml
+++ b/crates/aibridge/Cargo.toml
@ -11,3 +11,5 @@ serde = { workspace = true }
 serde_json = { workspace = true }
 tracing = { workspace = true }
 reqwest = { version = "0.12", default-features = false, features = ["json", "rustls-tls"] }
+async-trait = "0.1"
+lru = "0.12"
--- a/crates/aibridge/src/client.rs
+++ b/crates/aibridge/src/client.rs
@ -1,12 +1,74 @@
+use lru::LruCache;
 use reqwest::Client;
 use serde::{Deserialize, Serialize};
+use std::num::NonZeroUsize;
+use std::sync::Mutex;
+use std::sync::atomic::{AtomicU64, Ordering};
+use std::sync::Arc;
 use std::time::Duration;

-/// HTTP client for the Python AI sidecar.
+/// HTTP client for Ollama (post-2026-05-02 — sidecar dropped).
+///
+/// `base_url` was historically the Python sidecar at `:3200`, which
+/// pass-through-proxied to Ollama at `:11434`. The sidecar added zero
+/// logic on the hot path (embed.py + generate.py + rerank.py +
+/// admin.py = ~120 LOC of pure Ollama wrappers), so this client now
+/// talks to Ollama directly and the sidecar process can be retired.
+///
+/// What stayed Python: `lab_ui.py` + `pipeline_lab.py` (~888 LOC of
+/// dev-mode Streamlit-shape UIs) — those aren't on the runtime hot
+/// path and continue running for prompt experimentation.
+///
+/// `generate()` has two transport modes:
+/// - When `gateway_url` is None (default), posts directly to Ollama's
+///   `${base_url}/api/generate`.
+/// - When `gateway_url` is `Some(url)`, posts to `${url}/v1/chat`
+///   with `provider="ollama"` so the call appears in `/v1/usage` and
+///   Langfuse traces.
+///
+/// `embed()`, `rerank()`, and admin methods always go direct to
+/// Ollama — no `/v1` equivalent for those surfaces yet.
+///
+/// Phase 44 part 2 (2026-04-27): the gateway URL is wired in by
+/// callers that want observability (vectord modules); it's left
+/// unset by callers that ARE the gateway internals (avoids self-loops
+/// + redundant hops).
+/// Per-text embed cache key. We key on (model, text) so different
+/// model selections produce distinct cache lines — a query embedded
+/// under nomic-embed-text-v2-moe must NOT collide with the same
+/// query under nomic-embed-text v1.
+#[derive(Eq, PartialEq, Hash, Clone)]
+struct EmbedCacheKey {
+    model: String,
+    text: String,
+}
+
+/// Default LRU cache size — 4096 entries × ~6KB per 768-d f64
+/// vector ≈ 24MB. Sized for typical staffing-domain repetition
+/// (coordinator workflows have query repetition rates around 70-90%
+/// per session). Tunable via [aibridge].embed_cache_size in the
+/// config; 0 disables the cache entirely.
+const DEFAULT_EMBED_CACHE_SIZE: usize = 4096;
+
 #[derive(Clone)]
 pub struct AiClient {
    client: Client,
    base_url: String,
+    gateway_url: Option<String>,
+    /// Closes the 63× perf gap with Go side. Mirrors the shape of
+    /// Go's internal/embed/cached.go::CachedProvider — same
+    /// (model, text) → vector caching, same nil-disable semantics.
+    /// None = caching disabled (cache_size=0); Some = bounded LRU.
+    embed_cache: Option<Arc<Mutex<LruCache<EmbedCacheKey, Vec<f64>>>>>,
+    /// Hit / miss counters for /admin observability + load-test
+    /// validation. Atomic so Clone'd AiClients share the same counts.
+    embed_cache_hits: Arc<AtomicU64>,
+    embed_cache_misses: Arc<AtomicU64>,
+    /// Pinned at construction time so the EmbedResponse can carry
+    /// dimension consistently even when every text was a cache hit
+    /// (no fresh upstream call to learn the dim from). Set on first
+    /// successful Ollama embed; checked on every cache hit.
+    cached_dim: Arc<AtomicU64>,
 }

 // -- Request/Response types --
@ -79,68 +141,386 @@ pub struct RerankResponse {

 impl AiClient {
    pub fn new(base_url: &str) -> Self {
+        Self::with_embed_cache(base_url, DEFAULT_EMBED_CACHE_SIZE)
+    }
+
+    /// Constructs an AiClient with an explicit embed-cache size.
+    /// Pass 0 to disable the cache entirely (matches Go-side
+    /// CachedProvider's nil-cache semantics).
+    pub fn with_embed_cache(base_url: &str, cache_size: usize) -> Self {
        let client = Client::builder()
            .timeout(Duration::from_secs(120))
            .build()
            .expect("failed to build HTTP client");
+        let embed_cache = if cache_size > 0 {
+            // SAFETY: cache_size > 0 just verified, NonZeroUsize::new
+            // returns Some.
+            let cap = NonZeroUsize::new(cache_size).expect("cache_size > 0");
+            Some(Arc::new(Mutex::new(LruCache::new(cap))))
+        } else {
+            None
+        };
        Self {
            client,
            base_url: base_url.trim_end_matches('/').to_string(),
+            gateway_url: None,
+            embed_cache,
+            embed_cache_hits: Arc::new(AtomicU64::new(0)),
+            embed_cache_misses: Arc::new(AtomicU64::new(0)),
+            cached_dim: Arc::new(AtomicU64::new(0)),
        }
    }

+    /// Cache hit/miss/size snapshot. Useful for /admin endpoints +
+    /// load-test validation ("did the cache fire as expected?").
+    pub fn embed_cache_stats(&self) -> (u64, u64, usize) {
+        let hits = self.embed_cache_hits.load(Ordering::Relaxed);
+        let misses = self.embed_cache_misses.load(Ordering::Relaxed);
+        let len = self
+            .embed_cache
+            .as_ref()
+            .map(|c| c.lock().map(|g| g.len()).unwrap_or(0))
+            .unwrap_or(0);
+        (hits, misses, len)
+    }
+
+    /// Same as `new`, but every `generate()` is routed through
+    /// `${gateway_url}/v1/chat` (provider=ollama) for observability.
+    /// Use this for callers OUTSIDE the gateway. Inside the gateway
+    /// itself, prefer `new()` — calling /v1/chat from /v1/chat works
+    /// (no infinite loop, ollama_arm doesn't use AiClient) but adds
+    /// a wasted localhost hop.
+    pub fn new_with_gateway(base_url: &str, gateway_url: &str) -> Self {
+        let mut c = Self::new(base_url);
+        c.gateway_url = Some(gateway_url.trim_end_matches('/').to_string());
+        c
+    }
+
+    /// Reachability + version check. Hits Ollama's `/api/version`,
+    /// returns a sidecar-shaped envelope so callers reading
+    /// `.status` / `.ollama_url` don't break across the
+    /// pre-/post-2026-05-02 cutover.
    pub async fn health(&self) -> Result<serde_json::Value, String> {
        let resp = self.client
-            .get(format!("{}/health", self.base_url))
+            .get(format!("{}/api/version", self.base_url))
            .send()
            .await
-            .map_err(|e| format!("sidecar unreachable: {e}"))?;
-        resp.json().await.map_err(|e| format!("invalid response: {e}"))
+            .map_err(|e| format!("ollama unreachable: {e}"))?;
+        let body: serde_json::Value = resp.json().await
+            .map_err(|e| format!("invalid response: {e}"))?;
+        Ok(serde_json::json!({
+            "status": "ok",
+            "ollama_url": &self.base_url,
+            "ollama_version": body.get("version"),
+        }))
    }

+    /// Embed with per-text LRU caching. Mirrors Go-side
+    /// CachedProvider behavior: cache key is (model, text);
+    /// cache-hit texts skip the sidecar; cache-miss texts batch
+    /// into a single sidecar call; results are interleaved in the
+    /// caller's input order.
+    ///
+    /// Closes ~95% of the load-test perf gap vs Go side (loadgen
+    /// 2026-05-01: Rust 128 RPS → with cache ≥ 7000 RPS expected
+    /// for warm-cache workloads). Cold-cache behavior unchanged
+    /// (every text is a miss → single sidecar call, identical to
+    /// pre-cache).
    pub async fn embed(&self, req: EmbedRequest) -> Result<EmbedResponse, String> {
-        let resp = self.client
-            .post(format!("{}/embed", self.base_url))
-            .json(&req)
-            .send()
-            .await
-            .map_err(|e| format!("embed request failed: {e}"))?;
+        let model_key = req.model.clone().unwrap_or_default();

-        if !resp.status().is_success() {
-            let text = resp.text().await.unwrap_or_default();
-            return Err(format!("embed error ({}): {text}", text.len()));
+        // Fast path: cache disabled → original behavior.
+        let Some(cache) = self.embed_cache.as_ref() else {
+            return self.embed_uncached(&req).await;
+        };
+        if req.texts.is_empty() {
+            return self.embed_uncached(&req).await;
        }
-        resp.json().await.map_err(|e| format!("embed parse error: {e}"))
+
+        // First pass: check cache for each text. Track which positions
+        // need a sidecar fetch.
+        let mut embeddings: Vec<Option<Vec<f64>>> = vec![None; req.texts.len()];
+        let mut miss_indices: Vec<usize> = Vec::new();
+        let mut miss_texts: Vec<String> = Vec::new();
+        {
+            let mut guard = cache.lock().map_err(|e| format!("cache lock poisoned: {e}"))?;
+            for (i, text) in req.texts.iter().enumerate() {
+                let key = EmbedCacheKey { model: model_key.clone(), text: text.clone() };
+                if let Some(vec) = guard.get(&key) {
+                    embeddings[i] = Some(vec.clone());
+                    self.embed_cache_hits.fetch_add(1, Ordering::Relaxed);
+                } else {
+                    miss_indices.push(i);
+                    miss_texts.push(text.clone());
+                    self.embed_cache_misses.fetch_add(1, Ordering::Relaxed);
+                }
+            }
+        }
+
+        // All hit? Return immediately. Use cached_dim to populate
+        // the response dimension (no sidecar to ask).
+        if miss_indices.is_empty() {
+            let dim = self.cached_dim.load(Ordering::Relaxed) as usize;
+            let dim = if dim == 0 { embeddings[0].as_ref().map(|v| v.len()).unwrap_or(0) } else { dim };
+            return Ok(EmbedResponse {
+                embeddings: embeddings.into_iter().map(|opt| opt.expect("filled")).collect(),
+                model: req.model.unwrap_or_else(|| "nomic-embed-text".to_string()),
+                dimensions: dim,
+            });
+        }
+
+        // Second pass: fetch the misses in one sidecar call.
+        let miss_req = EmbedRequest { texts: miss_texts.clone(), model: req.model.clone() };
+        let resp = self.embed_uncached(&miss_req).await?;
+        if resp.embeddings.len() != miss_texts.len() {
+            return Err(format!(
+                "embed cache: sidecar returned {} embeddings for {} texts",
+                resp.embeddings.len(),
+                miss_texts.len()
+            ));
+        }
+
+        // Pin cached_dim on first successful response.
+        if resp.dimensions > 0 {
+            self.cached_dim.store(resp.dimensions as u64, Ordering::Relaxed);
+        }
+
+        // Insert misses into cache + fill response slots.
+        {
+            let mut guard = cache.lock().map_err(|e| format!("cache lock poisoned: {e}"))?;
+            for (j, idx) in miss_indices.iter().enumerate() {
+                let key = EmbedCacheKey {
+                    model: model_key.clone(),
+                    text: miss_texts[j].clone(),
+                };
+                let vec = resp.embeddings[j].clone();
+                guard.put(key, vec.clone());
+                embeddings[*idx] = Some(vec);
+            }
+        }
+
+        Ok(EmbedResponse {
+            embeddings: embeddings.into_iter().map(|opt| opt.expect("filled")).collect(),
+            model: resp.model,
+            dimensions: resp.dimensions,
+        })
+    }
+
+    /// Direct Ollama call — used internally by embed() for cache-miss
+    /// batches and as the transparent fallback when the cache is
+    /// disabled. Loops per-text against `${base_url}/api/embed`,
+    /// matching the sidecar's pre-2026-05-02 behavior. Ollama 0.4+
+    /// supports batch input but per-text keeps compatibility broader
+    /// + lets cache-miss-only batches share the loop with cold runs.
+    async fn embed_uncached(&self, req: &EmbedRequest) -> Result<EmbedResponse, String> {
+        let model = req.model.clone().unwrap_or_else(|| "nomic-embed-text".to_string());
+        let mut embeddings: Vec<Vec<f64>> = Vec::with_capacity(req.texts.len());
+
+        for text in &req.texts {
+            let resp = self.client
+                .post(format!("{}/api/embed", self.base_url))
+                .json(&serde_json::json!({
+                    "model": &model,
+                    "input": text,
+                }))
+                .send()
+                .await
+                .map_err(|e| format!("embed request failed: {e}"))?;
+
+            if !resp.status().is_success() {
+                let body = resp.text().await.unwrap_or_default();
+                return Err(format!("ollama embed error: {body}"));
+            }
+            // Ollama returns {"embeddings": [[...]], "model": "...", ...}.
+            // The outer `embeddings` is always a list; for a scalar input
+            // we get a single inner vector.
+            let parsed: serde_json::Value = resp.json().await
+                .map_err(|e| format!("embed parse error: {e}"))?;
+            let arr = parsed.get("embeddings")
+                .and_then(|v| v.as_array())
+                .ok_or_else(|| format!("ollama embed: missing 'embeddings' field in {parsed}"))?;
+            if arr.is_empty() {
+                return Err("ollama embed: empty embeddings array".to_string());
+            }
+            let first = arr[0].as_array()
+                .ok_or_else(|| "ollama embed: embeddings[0] not an array".to_string())?;
+            let vec: Vec<f64> = first.iter()
+                .filter_map(|n| n.as_f64())
+                .collect();
+            if vec.is_empty() {
+                return Err("ollama embed: numeric coercion produced empty vector".to_string());
+            }
+            embeddings.push(vec);
+        }
+
+        let dimensions = embeddings.first().map(|v| v.len()).unwrap_or(0);
+        Ok(EmbedResponse {
+            embeddings,
+            model,
+            dimensions,
+        })
    }

    pub async fn generate(&self, req: GenerateRequest) -> Result<GenerateResponse, String> {
+        if let Some(gw) = self.gateway_url.as_deref() {
+            return self.generate_via_gateway(gw, req).await;
+        }
+        // Direct Ollama path. Used by gateway internals (so the ollama
+        // provider can call upstream without a self-loop through
+        // /v1/chat) and by any consumer that wants raw transport
+        // without /v1/usage accounting.
+        let model = req.model.clone().unwrap_or_else(|| "qwen3.5:latest".to_string());
+        let mut body = serde_json::json!({
+            "model": &model,
+            "prompt": &req.prompt,
+            "stream": false,
+        });
+        let mut options = serde_json::Map::new();
+        if let Some(t) = req.temperature {
+            options.insert("temperature".to_string(), serde_json::json!(t));
+        }
+        if let Some(mt) = req.max_tokens {
+            options.insert("num_predict".to_string(), serde_json::json!(mt));
+        }
+        if !options.is_empty() {
+            body["options"] = serde_json::Value::Object(options);
+        }
+        if let Some(sys) = &req.system {
+            body["system"] = serde_json::json!(sys);
+        }
+        if let Some(th) = req.think {
+            body["think"] = serde_json::json!(th);
+        }
+
        let resp = self.client
-            .post(format!("{}/generate", self.base_url))
-            .json(&req)
+            .post(format!("{}/api/generate", self.base_url))
+            .json(&body)
            .send()
            .await
            .map_err(|e| format!("generate request failed: {e}"))?;

        if !resp.status().is_success() {
            let text = resp.text().await.unwrap_or_default();
-            return Err(format!("generate error: {text}"));
+            return Err(format!("ollama generate error: {text}"));
        }
-        resp.json().await.map_err(|e| format!("generate parse error: {e}"))
+        let parsed: serde_json::Value = resp.json().await
+            .map_err(|e| format!("generate parse error: {e}"))?;
+
+        Ok(GenerateResponse {
+            text: parsed.get("response").and_then(|v| v.as_str()).unwrap_or("").to_string(),
+            model,
+            tokens_evaluated: parsed.get("prompt_eval_count").and_then(|v| v.as_u64()),
+            tokens_generated: parsed.get("eval_count").and_then(|v| v.as_u64()),
+        })
    }

-    pub async fn rerank(&self, req: RerankRequest) -> Result<RerankResponse, String> {
+    /// Phase 44 part 2: route generate() through the gateway's
+    /// /v1/chat with provider="ollama" so the call lands in
+    /// /v1/usage + Langfuse. Translates between the sidecar
+    /// GenerateRequest/Response shape and the OpenAI-compat
+    /// chat shape on the wire.
+    async fn generate_via_gateway(&self, gateway_url: &str, req: GenerateRequest) -> Result<GenerateResponse, String> {
+        let mut messages = Vec::with_capacity(2);
+        if let Some(sys) = &req.system {
+            messages.push(serde_json::json!({"role": "system", "content": sys}));
+        }
+        messages.push(serde_json::json!({"role": "user", "content": req.prompt}));
+        let mut body = serde_json::json!({
+            "messages": messages,
+            "provider": "ollama",
+        });
+        if let Some(m) = &req.model { body["model"] = serde_json::json!(m); }
+        if let Some(t) = req.temperature { body["temperature"] = serde_json::json!(t); }
+        if let Some(mt) = req.max_tokens { body["max_tokens"] = serde_json::json!(mt); }
+        if let Some(th) = req.think { body["think"] = serde_json::json!(th); }
+
        let resp = self.client
-            .post(format!("{}/rerank", self.base_url))
-            .json(&req)
+            .post(format!("{}/v1/chat", gateway_url))
+            .json(&body)
            .send()
            .await
-            .map_err(|e| format!("rerank request failed: {e}"))?;
-
+            .map_err(|e| format!("/v1/chat request failed: {e}"))?;
        if !resp.status().is_success() {
            let text = resp.text().await.unwrap_or_default();
-            return Err(format!("rerank error: {text}"));
+            return Err(format!("/v1/chat error: {text}"));
        }
-        resp.json().await.map_err(|e| format!("rerank parse error: {e}"))
+        let parsed: serde_json::Value = resp.json().await
+            .map_err(|e| format!("/v1/chat parse error: {e}"))?;
+
+        let text = parsed
+            .pointer("/choices/0/message/content")
+            .and_then(|v| v.as_str())
+            .unwrap_or("")
+            .to_string();
+        let model = parsed.get("model")
+            .and_then(|v| v.as_str())
+            .unwrap_or_else(|| req.model.as_deref().unwrap_or(""))
+            .to_string();
+        let prompt_tokens = parsed.pointer("/usage/prompt_tokens").and_then(|v| v.as_u64());
+        let completion_tokens = parsed.pointer("/usage/completion_tokens").and_then(|v| v.as_u64());
+
+        Ok(GenerateResponse {
+            text,
+            model,
+            tokens_evaluated: prompt_tokens,
+            tokens_generated: completion_tokens,
+        })
+    }
+
+    /// Cross-encoder reranking via Ollama generate. Asks the model to
+    /// rate each document's relevance to the query 0-10, then sorts
+    /// descending. Mirrors the sidecar's pre-2026-05-02 algorithm
+    /// exactly so callers see the same scores.
+    pub async fn rerank(&self, req: RerankRequest) -> Result<RerankResponse, String> {
+        let model = req.model.clone().unwrap_or_else(|| "qwen3.5:latest".to_string());
+        let mut scored: Vec<ScoredDocument> = Vec::with_capacity(req.documents.len());
+
+        for (i, doc) in req.documents.iter().enumerate() {
+            let prompt = format!(
+                "Rate the relevance of the following document to the query on a scale of 0 to 10. \
+                 Respond with ONLY a number.\n\n\
+                 Query: {}\n\n\
+                 Document: {}\n\n\
+                 Score:",
+                req.query, doc,
+            );
+            let resp = self.client
+                .post(format!("{}/api/generate", self.base_url))
+                .json(&serde_json::json!({
+                    "model": &model,
+                    "prompt": prompt,
+                    "stream": false,
+                    "options": {"temperature": 0.0, "num_predict": 8},
+                }))
+                .send()
+                .await
+                .map_err(|e| format!("rerank request failed: {e}"))?;
+
+            if !resp.status().is_success() {
+                let body = resp.text().await.unwrap_or_default();
+                return Err(format!("ollama rerank error: {body}"));
+            }
+            let parsed: serde_json::Value = resp.json().await
+                .map_err(|e| format!("rerank parse error: {e}"))?;
+            let text = parsed.get("response").and_then(|v| v.as_str()).unwrap_or("").trim();
+            // Parse the leading number; tolerate "7", "7.5", "7 — strong match".
+            let score = text.split_whitespace().next()
+                .and_then(|t| t.parse::<f64>().ok())
+                .unwrap_or(0.0)
+                .clamp(0.0, 10.0);
+
+            scored.push(ScoredDocument {
+                index: i,
+                text: doc.clone(),
+                score,
+            });
+        }
+
+        scored.sort_by(|a, b| b.score.partial_cmp(&a.score).unwrap_or(std::cmp::Ordering::Equal));
+        if let Some(k) = req.top_k {
+            scored.truncate(k);
+        }
+        Ok(RerankResponse { results: scored, model })
    }

    /// Force Ollama to unload the named model from VRAM (keep_alive=0).
@ -149,40 +529,116 @@ impl AiClient {
    /// profile's model can linger in VRAM next to the new one.
    pub async fn unload_model(&self, model: &str) -> Result<serde_json::Value, String> {
        let resp = self.client
-            .post(format!("{}/admin/unload", self.base_url))
-            .json(&serde_json::json!({ "model": model }))
+            .post(format!("{}/api/generate", self.base_url))
+            .json(&serde_json::json!({
+                "model": model,
+                "prompt": "",
+                "keep_alive": 0,
+                "stream": false,
+            }))
            .send().await
            .map_err(|e| format!("unload request failed: {e}"))?;
        if !resp.status().is_success() {
            let text = resp.text().await.unwrap_or_default();
-            return Err(format!("unload error: {text}"));
+            return Err(format!("ollama unload error: {text}"));
        }
-        resp.json().await.map_err(|e| format!("unload parse error: {e}"))
+        // Ollama returns 200 with the empty-prompt response shape.
+        // Fold into the legacy {"unloaded": "<model>"} envelope so
+        // callers' parsing doesn't break.
+        Ok(serde_json::json!({ "unloaded": model }))
    }

    /// Ask Ollama to load the named model into VRAM proactively. Makes
    /// the first real request after profile activation fast (no cold-load
-    /// latency).
+    /// latency). Empty prompts confuse some models, so we send a single
+    /// space + cap num_predict=1 (matches the sidecar's prior behavior).
    pub async fn preload_model(&self, model: &str) -> Result<serde_json::Value, String> {
        let resp = self.client
-            .post(format!("{}/admin/preload", self.base_url))
-            .json(&serde_json::json!({ "model": model }))
+            .post(format!("{}/api/generate", self.base_url))
+            .json(&serde_json::json!({
+                "model": model,
+                "prompt": " ",
+                "keep_alive": "5m",
+                "stream": false,
+                "options": {"num_predict": 1},
+            }))
            .send().await
            .map_err(|e| format!("preload request failed: {e}"))?;
        if !resp.status().is_success() {
            let text = resp.text().await.unwrap_or_default();
-            return Err(format!("preload error: {text}"));
+            return Err(format!("ollama preload error: {text}"));
        }
-        resp.json().await.map_err(|e| format!("preload parse error: {e}"))
+        let parsed: serde_json::Value = resp.json().await
+            .map_err(|e| format!("preload parse error: {e}"))?;
+        Ok(serde_json::json!({
+            "preloaded": model,
+            "load_duration_ns": parsed.get("load_duration"),
+            "total_duration_ns": parsed.get("total_duration"),
+        }))
    }

-    /// GPU + loaded-model snapshot from the sidecar. Combines nvidia-smi
-    /// output (if available) with Ollama's /api/ps.
+    /// GPU + loaded-model snapshot. Combines nvidia-smi output (when
+    /// available) with Ollama's /api/ps. Same shape as the prior
+    /// sidecar /admin/vram endpoint so callers don't need updating.
    pub async fn vram_snapshot(&self) -> Result<serde_json::Value, String> {
        let resp = self.client
-            .get(format!("{}/admin/vram", self.base_url))
+            .get(format!("{}/api/ps", self.base_url))
            .send().await
-            .map_err(|e| format!("vram request failed: {e}"))?;
-        resp.json().await.map_err(|e| format!("vram parse error: {e}"))
+            .map_err(|e| format!("ollama ps request failed: {e}"))?;
+        let loaded: Vec<serde_json::Value> = if resp.status().is_success() {
+            let parsed: serde_json::Value = resp.json().await.unwrap_or(serde_json::Value::Null);
+            parsed.get("models")
+                .and_then(|v| v.as_array())
+                .map(|arr| arr.iter().map(|m| serde_json::json!({
+                    "name": m.get("name"),
+                    "size_vram_mib": m.get("size_vram").and_then(|v| v.as_u64()).map(|n| n / (1024 * 1024)),
+                    "expires_at": m.get("expires_at"),
+                })).collect())
+                .unwrap_or_default()
+        } else {
+            Vec::new()
+        };
+
+        let gpu = nvidia_smi_snapshot();
+
+        Ok(serde_json::json!({
+            "gpu": gpu,
+            "ollama_loaded": loaded,
+        }))
    }
 }
+
+/// One-shot nvidia-smi poll. Returns Null if the tool isn't on PATH
+/// or the call fails. Mirrors the sidecar's `_nvidia_smi_snapshot`
+/// shape exactly so callers reading vram_snapshot don't break.
+fn nvidia_smi_snapshot() -> serde_json::Value {
+    use std::process::Command;
+    let out = Command::new("nvidia-smi")
+        .args([
+            "--query-gpu=memory.used,memory.total,utilization.gpu,name",
+            "--format=csv,noheader,nounits",
+        ])
+        .output();
+    let stdout = match out {
+        Ok(o) if o.status.success() => o.stdout,
+        _ => return serde_json::Value::Null,
+    };
+    let line = String::from_utf8_lossy(&stdout);
+    let line = line.trim();
+    if line.is_empty() {
+        return serde_json::Value::Null;
+    }
+    let parts: Vec<&str> = line.split(',').map(|s| s.trim()).collect();
+    if parts.len() < 4 {
+        return serde_json::Value::Null;
+    }
+    let used = parts[0].parse::<u64>().unwrap_or(0);
+    let total = parts[1].parse::<u64>().unwrap_or(0);
+    let util = parts[2].parse::<u64>().unwrap_or(0);
+    serde_json::json!({
+        "name": parts[3],
+        "used_mib": used,
+        "total_mib": total,
+        "utilization_pct": util,
+    })
+}
--- a/crates/aibridge/src/context.rs
+++ b/crates/aibridge/src/context.rs
@ -13,11 +13,9 @@
 use std::collections::HashMap;
 use std::sync::OnceLock;

-/// Rough token count. `chars / 4` ceiling. See module docs for why
-/// this heuristic is sufficient.
-pub fn estimate_tokens(text: &str) -> usize {
-    (text.chars().count() + 3) / 4
-}
+// `estimate_tokens` moved to `shared::model_matrix::ModelMatrix::estimate_tokens`
+// (cdc24d8). All callers migrated; the deprecated wrapper that stood in its
+// place has been removed since it had zero external consumers.

 /// Phase 21 — per-model context windows, mirroring the TS table in
 /// `tests/multi-agent/agent.ts`. Anchored on each model's documented
@ -84,8 +82,8 @@ pub fn assert_context_budget(
    let window = context_window_for(model);
    let safety = opts.safety_margin.unwrap_or(DEFAULT_SAFETY_MARGIN);
    let max_tokens = opts.max_tokens.unwrap_or(DEFAULT_MAX_TOKENS);
-    let sys_tokens = opts.system.map(estimate_tokens).unwrap_or(0);
-    let estimated = estimate_tokens(prompt) + sys_tokens + max_tokens;
+    let sys_tokens = opts.system.map(shared::model_matrix::ModelMatrix::estimate_tokens).unwrap_or(0);
+    let estimated = shared::model_matrix::ModelMatrix::estimate_tokens(prompt) + sys_tokens + max_tokens;
    let remaining = window as i64 - estimated as i64 - safety as i64;
    let check = BudgetCheck { estimated, window, remaining };
    if remaining < 0 && !opts.bypass {
@ -109,14 +107,10 @@ pub fn overflow_message(model: &str, check: &BudgetCheck, over_by: usize, safety
 mod tests {
    use super::*;

-    #[test]
-    fn estimate_tokens_ceiling_divides_by_four() {
-        assert_eq!(estimate_tokens(""), 0);
-        assert_eq!(estimate_tokens("abc"), 1);    // 3 → ceil(3/4) = 1
-        assert_eq!(estimate_tokens("abcd"), 1);   // 4 → ceil(4/4) = 1
-        assert_eq!(estimate_tokens("abcde"), 2);  // 5 → ceil(5/4) = 2
-        assert_eq!(estimate_tokens(&"x".repeat(400)), 100);
-    }
+    // Deprecated-function behavior is now canonically tested in
+    // crates/shared/src/model_matrix.rs. This test was the legacy
+    // pin that preceded the migration; delete when the deprecated
+    // wrapper itself goes (see the #[deprecated] attribute).

    #[test]
    fn context_window_known_and_fallback() {
@ -179,7 +173,7 @@ mod tests {
        ).unwrap();
        assert!(with_sys.estimated > without_sys.estimated,
            "system prompt should raise estimate");
-        assert_eq!(with_sys.estimated - without_sys.estimated, estimate_tokens(&sys));
+        assert_eq!(with_sys.estimated - without_sys.estimated, shared::model_matrix::ModelMatrix::estimate_tokens(&sys));
    }

    #[test]
--- a/crates/aibridge/src/continuation.rs
+++ b/crates/aibridge/src/continuation.rs
@ -138,6 +138,17 @@ pub struct ContinuableOutcome {
    pub empty_retries: usize,
    pub continuations: usize,
    pub final_complete: bool,
+    /// Sum of `prompt_tokens` across every generator call made to
+    /// produce this outcome — including empty retries and continuations.
+    /// Lets callers (gateway execution loop, observability) stamp
+    /// accurate per-task usage without second-guessing the retry fan-out.
+    pub prompt_tokens: u32,
+    /// Sum of `completion_tokens` across every generator call.
+    pub completion_tokens: u32,
+    /// Total number of generator calls. `1 + empty_retries +
+    /// continuations` in the normal case; the field is explicit so
+    /// callers don't have to re-derive it.
+    pub calls: u32,
 }

 fn make_request(opts: &ContinuableOpts, prompt: String, current_max: u32) -> GenerateRequest {
@ -175,11 +186,20 @@ pub async fn generate_continuable<G: TextGenerator>(
    let mut combined = String::new();
    let mut empty_retries = 0usize;
    let mut continuations = 0usize;
+    let mut prompt_tokens: u32 = 0;
+    let mut completion_tokens: u32 = 0;
+    let mut calls: u32 = 0;

    // Phase 21(a) — empty-response backoff loop.
    for retry in 0..opts.max_empty_retries {
        let req = make_request(opts, prompt.to_string(), current_max);
        let resp = generator.generate_text(req).await?;
+        calls += 1;
+        // u32::try_from saturates at u32::MAX instead of silently
+        // truncating bits when tokens_evaluated/_generated comes back
+        // as a u64 > 4 billion. Caught 2026-04-27 by Opus self-audit.
+        prompt_tokens = prompt_tokens.saturating_add(u32::try_from(resp.tokens_evaluated.unwrap_or(0)).unwrap_or(u32::MAX));
+        completion_tokens = completion_tokens.saturating_add(u32::try_from(resp.tokens_generated.unwrap_or(0)).unwrap_or(u32::MAX));
        if !resp.text.trim().is_empty() {
            combined = resp.text;
            break;
@ -188,9 +208,7 @@ pub async fn generate_continuable<G: TextGenerator>(
        current_max = (current_max.saturating_mul(2)).min(opts.budget_cap);
    }

-    // Phase 21(b) — structural-completion continuation loop. Runs on
-    // the truncated-non-empty case; empty + exhausted retries falls
-    // through with empty combined and final_complete=false.
+    // Phase 21(b) — structural-completion continuation loop.
    for _ in 0..opts.max_continuations {
        if is_structurally_complete(&combined, opts.shape) {
            return Ok(ContinuableOutcome {
@ -198,17 +216,22 @@ pub async fn generate_continuable<G: TextGenerator>(
                empty_retries,
                continuations,
                final_complete: true,
+                prompt_tokens,
+                completion_tokens,
+                calls,
            });
        }
        if combined.trim().is_empty() {
            // Nothing to continue from — continuing "" is identical to
-            // the initial call and would loop. Bail so the caller sees
-            // the failure rather than burning N extra calls.
+            // the initial call and would loop.
            break;
        }
        let cont_prompt = continuation_prompt(prompt, &combined);
        let req = make_request(opts, cont_prompt, current_max.min(opts.budget_cap));
        let resp = generator.generate_text(req).await?;
+        calls += 1;
+        prompt_tokens = prompt_tokens.saturating_add(u32::try_from(resp.tokens_evaluated.unwrap_or(0)).unwrap_or(u32::MAX));
+        completion_tokens = completion_tokens.saturating_add(u32::try_from(resp.tokens_generated.unwrap_or(0)).unwrap_or(u32::MAX));
        combined.push_str(&resp.text);
        continuations += 1;
    }
@ -219,6 +242,9 @@ pub async fn generate_continuable<G: TextGenerator>(
        empty_retries,
        continuations,
        final_complete,
+        prompt_tokens,
+        completion_tokens,
+        calls,
    })
 }

--- a/crates/aibridge/src/lib.rs
+++ b/crates/aibridge/src/lib.rs
@ -1,5 +1,8 @@
 pub mod client;
 pub mod context;
 pub mod continuation;
+pub mod provider;
+pub mod providers;
+pub mod routing;
 pub mod service;
 pub mod tree_split;
--- a/crates/aibridge/src/provider.rs
+++ b/crates/aibridge/src/provider.rs
@ -0,0 +1,39 @@
+use async_trait::async_trait;
+use crate::client::{GenerateRequest, GenerateResponse, EmbedRequest, EmbedResponse};
+
+#[async_trait]
+pub trait ProviderAdapter: Send + Sync {
+    /// Name for routing (ollama, openrouter, etc.)
+    fn name(&self) -> &str;
+
+    /// Chat completion — returns text, model, token counts
+    async fn chat(&self, req: GenerateRequest) -> Result<GenerateResponse, String>;
+
+    /// Embeddings — returns vectors, model, dimensions
+    async fn embed(&self, req: EmbedRequest) -> Result<EmbedResponse, String>;
+
+    /// Unload model from VRAM (optional, no-op if not supported)
+    async fn unload(&self, _model: &str) -> Result<(), String> {
+        Ok(())
+    }
+
+    /// Health check
+    async fn health(&self) -> Result<serde_json::Value, String>;
+}
+
+/// Routing key extracted from model name.
+/// - "qwen3.5:latest" → "ollama"
+/// - "openrouter/anthropic/claude-3.5-sonnet" → "openrouter"
+/// - "gpt-4o" → "ollama" (default)
+pub fn provider_key(model: &str) -> &'static str {
+    let lower = model.to_lowercase();
+    if lower.starts_with("openrouter/") {
+        "openrouter"
+    } else if lower.starts_with("gemini") {
+        "gemini"
+    } else if lower.starts_with("claude") {
+        "claude"
+    } else {
+        "ollama"  // default: local Ollama
+    }
+}
--- a/crates/aibridge/src/providers/mod.rs
+++ b/crates/aibridge/src/providers/mod.rs
@ -0,0 +1,2 @@
+pub mod ollama;
+pub mod openrouter;
--- a/crates/aibridge/src/providers/ollama.rs
+++ b/crates/aibridge/src/providers/ollama.rs
@ -0,0 +1,37 @@
+use async_trait::async_trait;
+use crate::client::{AiClient, GenerateRequest, GenerateResponse, EmbedRequest, EmbedResponse};
+use crate::provider::ProviderAdapter;
+
+pub struct OllamaAdapter {
+    client: AiClient,
+}
+
+impl OllamaAdapter {
+    pub fn new(client: AiClient) -> Self {
+        Self { client }
+    }
+}
+
+#[async_trait]
+impl ProviderAdapter for OllamaAdapter {
+    fn name(&self) -> &str {
+        "ollama"
+    }
+
+    async fn chat(&self, req: GenerateRequest) -> Result<GenerateResponse, String> {
+        self.client.generate(req).await
+    }
+
+    async fn embed(&self, req: EmbedRequest) -> Result<EmbedResponse, String> {
+        self.client.embed(req).await
+    }
+
+    async fn unload(&self, model: &str) -> Result<(), String> {
+        let _: serde_json::Value = self.client.unload_model(model).await?;
+        Ok(())
+    }
+
+    async fn health(&self) -> Result<serde_json::Value, String> {
+        self.client.health().await
+    }
+}
--- a/crates/aibridge/src/providers/openrouter.rs
+++ b/crates/aibridge/src/providers/openrouter.rs
@ -0,0 +1,152 @@
+use async_trait::async_trait;
+use reqwest::Client;
+use serde::{Deserialize, Serialize};
+use std::time::Duration;
+
+use crate::client::{GenerateRequest, GenerateResponse, EmbedRequest, EmbedResponse};
+use crate::provider::ProviderAdapter;
+
+pub struct OpenRouterAdapter {
+    client: Client,
+    base_url: String,
+    api_key: String,
+    default_model: String,
+}
+
+#[derive(Serialize)]
+struct OpenRouterChatRequest {
+    model: String,
+    messages: Vec<OpenRouterMessage>,
+    temperature: Option<f64>,
+    max_tokens: Option<u32>,
+}
+
+#[derive(Serialize)]
+struct OpenRouterMessage {
+    role: String,
+    content: String,
+}
+
+#[derive(Deserialize)]
+struct OpenRouterChatResponse {
+    choices: Vec<OpenRouterChoice>,
+    usage: OpenRouterUsage,
+    model: String,
+}
+
+#[derive(Deserialize)]
+struct OpenRouterChoice {
+    message: OpenRouterMessageOut,
+}
+
+#[derive(Deserialize)]
+#[allow(dead_code)]
+struct OpenRouterMessageOut {
+    role: String,
+    content: String,
+}
+
+#[derive(Deserialize)]
+#[allow(dead_code)]
+struct OpenRouterUsage {
+    prompt_tokens: Option<u32>,
+    completion_tokens: Option<u32>,
+    total_tokens: Option<u32>,
+}
+
+impl OpenRouterAdapter {
+    pub fn new(base_url: &str, api_key: String, default_model: &str) -> Self {
+        let client = Client::builder()
+            .timeout(Duration::from_secs(180))
+            .build()
+            .expect("failed to build HTTP client");
+        Self {
+            client,
+            base_url: base_url.trim_end_matches('/').to_string(),
+            api_key,
+            default_model: default_model.to_string(),
+        }
+    }
+
+    fn chat_model(&self, model: &str) -> String {
+        // Strip "openrouter/" prefix if present
+        let m = model.trim_start_matches("openrouter/");
+        if m.is_empty() || m == model {
+            self.default_model.clone()
+        } else {
+            m.to_string()
+        }
+    }
+
+    fn to_openrouter_messages(req: &GenerateRequest) -> Vec<OpenRouterMessage> {
+        let mut out = vec![];
+        if let Some(sys) = &req.system {
+            out.push(OpenRouterMessage { role: "system".into(), content: sys.clone() });
+        }
+        out.push(OpenRouterMessage {
+            role: "user".into(),
+            content: req.prompt.clone(),
+        });
+        out
+    }
+}
+
+#[async_trait]
+impl ProviderAdapter for OpenRouterAdapter {
+    fn name(&self) -> &str {
+        "openrouter"
+    }
+
+    async fn chat(&self, req: GenerateRequest) -> Result<GenerateResponse, String> {
+        let model = self.chat_model(req.model.as_deref().unwrap_or(""));
+        let or_req = OpenRouterChatRequest {
+            model: model.clone(),
+            messages: OpenRouterAdapter::to_openrouter_messages(&req),
+            temperature: req.temperature,
+            max_tokens: req.max_tokens,
+        };
+
+        let resp = self.client
+            .post(format!("{}/chat/completions", self.base_url))
+            .header("Authorization", format!("Bearer {}", self.api_key))
+            .header("Content-Type", "application/json")
+            .json(&or_req)
+            .send()
+            .await
+            .map_err(|e| format!("openrouter request failed: {e}"))?;
+
+        let status = resp.status();
+        let body = resp.text().await.unwrap_or_default();
+
+        if !status.is_success() {
+            return Err(format!("openrouter error ({}): {}", status, body));
+        }
+
+        let or_resp: OpenRouterChatResponse = serde_json::from_str(&body)
+            .map_err(|e| format!("openrouter parse error: {e}"))?;
+
+        let choice = or_resp.choices.into_iter().next()
+            .ok_or("no completion choice returned")?;
+        let usage = or_resp.usage;
+
+        Ok(GenerateResponse {
+            text: choice.message.content,
+            model: or_resp.model,
+            tokens_evaluated: usage.prompt_tokens.map(|n| n as u64),
+            tokens_generated: usage.completion_tokens.map(|n| n as u64),
+        })
+    }
+
+    async fn embed(&self, _req: EmbedRequest) -> Result<EmbedResponse, String> {
+        Err("openrouter: embed not implemented".into())
+    }
+
+    async fn health(&self) -> Result<serde_json::Value, String> {
+        // OpenRouter doesn't have a dedicated health endpoint,
+        // so we just return a healthy response if the client works.
+        Ok(serde_json::json!({
+            "status": "ok",
+            "provider": "openrouter",
+        }))
+    }
+}
--- a/crates/aibridge/src/routing.rs
+++ b/crates/aibridge/src/routing.rs
@ -0,0 +1,119 @@
+use serde::{Deserialize, Serialize};
+
+#[derive(Clone, Debug, Deserialize, Serialize)]
+pub struct RoutingRule {
+    pub model_pattern: String,
+    pub provider: String,
+    pub max_tokens: Option<u32>,
+    pub temperature: Option<f64>,
+}
+
+#[derive(Clone, Default)]
+pub struct RoutingEngine {
+    rules: Vec<RoutingRule>,
+    fallback_chain: Vec<String>,
+}
+
+impl RoutingEngine {
+    pub fn new() -> Self {
+        Self::default()
+    }
+
+    pub fn with_rules(mut self, rules: Vec<RoutingRule>) -> Self {
+        self.rules = rules;
+        self
+    }
+
+    pub fn with_fallback(mut self, chain: Vec<String>) -> Self {
+        self.fallback_chain = chain;
+        self
+    }
+
+    pub fn route(&self, model: &str) -> RouteDecision {
+        let lower = model.to_lowercase();
+
+        for rule in &self.rules {
+            if glob_match(&rule.model_pattern.to_lowercase(), &lower) {
+                return RouteDecision {
+                    provider: rule.provider.clone(),
+                    model: model.to_string(),
+                    max_tokens: rule.max_tokens,
+                    temperature: rule.temperature,
+                };
+            }
+        }
+
+        if let Some(first) = self.fallback_chain.first() {
+            RouteDecision {
+                provider: first.clone(),
+                model: model.to_string(),
+                max_tokens: None,
+                temperature: None,
+            }
+        } else {
+            RouteDecision {
+                provider: "ollama".to_string(),
+                model: model.to_string(),
+                max_tokens: None,
+                temperature: None,
+            }
+        }
+    }
+}
+
+#[derive(Clone, Debug)]
+pub struct RouteDecision {
+    pub provider: String,
+    pub model: String,
+    pub max_tokens: Option<u32>,
+    pub temperature: Option<f64>,
+}
+
+fn glob_match(pattern: &str, name: &str) -> bool {
+    if !pattern.contains('*') { return pattern == name; }
+    let parts: Vec<&str> = pattern.split('*').collect();
+    // Multi-* support: first must be prefix, last must be suffix, each
+    // interior piece must appear in order. Fixes the iter-9 finding
+    // where gpt-*-large* silently fell through to an exact-match path.
+    // Also removes the dead `parts.len() == 1` branch that accessed
+    // parts[1] and would panic if ever reached (unreachable today
+    // since split('*') on a string containing '*' always yields ≥2).
+    if !name.starts_with(parts[0]) || !name.ends_with(parts.last().unwrap()) { return false; }
+    let mut cursor = parts[0].len();
+    parts[1..parts.len() - 1].iter().all(|mid| {
+        name[cursor..].find(mid).map(|pos| { cursor += pos + mid.len(); true }).unwrap_or(false)
+    })
+}
+
+impl Default for RoutingRule {
+    fn default() -> Self {
+        Self {
+            model_pattern: "*".to_string(),
+            provider: "ollama".to_string(),
+            max_tokens: None,
+            temperature: None,
+        }
+    }
+}
+
+#[cfg(test)]
+mod glob_match_tests {
+    use super::glob_match;
+
+    #[test] fn exact_match() { assert!(glob_match("gpt-oss:120b", "gpt-oss:120b")); }
+    #[test] fn exact_mismatch() { assert!(!glob_match("a", "b")); }
+    #[test] fn leading_wildcard() { assert!(glob_match("*:120b", "gpt-oss:120b")); }
+    #[test] fn trailing_wildcard() { assert!(glob_match("gpt-oss:*", "gpt-oss:120b")); }
+    #[test] fn bare_wildcard() { assert!(glob_match("*", "anything")); }
+    #[test] fn multi_wildcard_in_order() { assert!(glob_match("gpt-*-oss-*", "gpt-4-oss-120b")); }
+    #[test] fn multi_wildcard_wrong_order() { assert!(!glob_match("b*a*", "abba")); }
+    #[test] fn multi_wildcard_panic_safety() {
+        // Regression: earlier impl had an unreachable `parts.len() == 1`
+        // branch that indexed parts[1] — would panic if ever hit. Now
+        // the split('*') invariant guarantees ≥2 parts when * present,
+        // and we handle all N-part cases explicitly.
+        assert!(glob_match("a*b*c", "abc"));
+        assert!(glob_match("a*b*c", "axxxbxxxc"));
+        assert!(!glob_match("a*b*c", "xxxbxxx"));
+    }
+}
--- a/crates/aibridge/src/tree_split.rs
+++ b/crates/aibridge/src/tree_split.rs
@ -19,8 +19,9 @@
 //! we bubble the error up rather than silently truncating. That's the
 //! whole point of Phase 21.

-use crate::context::{assert_context_budget, BudgetOpts, estimate_tokens, overflow_message,
+use crate::context::{assert_context_budget, BudgetOpts, overflow_message,
                     DEFAULT_MAX_TOKENS, DEFAULT_SAFETY_MARGIN};
+use shared::model_matrix::ModelMatrix;
 use crate::continuation::{generate_continuable, ContinuableOpts, ResponseShape, TextGenerator};

 /// Callback signatures — caller supplies closures that stitch the
@ -80,12 +81,12 @@ pub struct TreeSplitResult {
 /// by `\n— shard N/M digest —\n` so we can find the first one and
 /// chop everything before its successor.
 fn truncate_scratchpad(scratchpad: &mut String, budget_tokens: usize) -> bool {
-    if estimate_tokens(scratchpad) <= budget_tokens { return false; }
+    if ModelMatrix::estimate_tokens(scratchpad) <= budget_tokens { return false; }
    // Find the second delimiter — everything before it gets dropped.
    const DELIM_PREFIX: &str = "\n— shard ";
    let mut cursor = 0;
    let mut truncated = false;
-    while estimate_tokens(&scratchpad[cursor..]) > budget_tokens {
+    while ModelMatrix::estimate_tokens(&scratchpad[cursor..]) > budget_tokens {
        // Skip past a leading delimiter (if we're sitting on one from
        // a previous iteration), then find the next.
        let search_from = cursor + if scratchpad[cursor..].starts_with(DELIM_PREFIX) {
@ -278,7 +279,7 @@ mod tests {
        // Scratchpad should still fit roughly within the budget
        // (post-truncation); the estimator uses chars/4 so the bound
        // is ~budget*4 chars. Give some slack for the delimiter.
-        let scratchpad_tokens = estimate_tokens(&result.scratchpad);
+        let scratchpad_tokens = ModelMatrix::estimate_tokens(&result.scratchpad);
        assert!(scratchpad_tokens <= opts.scratchpad_budget * 2,
            "scratchpad {} tokens vs budget {}", scratchpad_tokens, opts.scratchpad_budget);
    }
--- a/crates/catalogd/src/service.rs
+++ b/crates/catalogd/src/service.rs
@ -31,6 +31,10 @@ pub fn router(registry: Registry) -> Router {
        // Phase 17: model profiles
        .route("/profiles", post(create_profile).get(list_profiles))
        .route("/profiles/{id}", get(get_profile).delete(delete_profile))
+        // Phase 41: profile-type routes
+        .route("/profiles/retrieval", get(list_retrieval_profiles))
+        .route("/profiles/memory", get(list_memory_profiles))
+        .route("/profiles/observer", get(list_observer_profiles))
        .with_state(registry)
 }

@ -398,6 +402,9 @@ struct CreateProfileRequest {
    /// indexes to. Defaults to Parquet+HNSW.
    #[serde(default)]
    vector_backend: shared::types::VectorBackend,
+    /// Phase 41: Profile type for routing + activation behavior.
+    #[serde(default)]
+    profile_type: shared::types::ProfileType,
 }

 fn default_embed_model_req() -> String { "nomic-embed-text".to_string() }
@ -417,6 +424,7 @@ async fn create_profile(
        created_by: req.created_by,
        bucket: req.bucket,
        vector_backend: req.vector_backend,
+        profile_type: req.profile_type,
    };
    match registry.put_profile(profile).await {
        Ok(p) => Ok((StatusCode::CREATED, Json(p))),
@ -428,6 +436,24 @@ async fn list_profiles(State(registry): State<Registry>) -> impl IntoResponse {
    Json(registry.list_profiles().await)
 }

+async fn list_retrieval_profiles(State(registry): State<Registry>) -> impl IntoResponse {
+    let all = registry.list_profiles().await;
+    let retrieval: Vec<_> = all.into_iter().filter(|p| p.profile_type == shared::types::ProfileType::Retrieval).collect();
+    Json(retrieval)
+}
+
+async fn list_memory_profiles(State(registry): State<Registry>) -> impl IntoResponse {
+    let all = registry.list_profiles().await;
+    let memory: Vec<_> = all.into_iter().filter(|p| p.profile_type == shared::types::ProfileType::Memory).collect();
+    Json(memory)
+}
+
+async fn list_observer_profiles(State(registry): State<Registry>) -> impl IntoResponse {
+    let all = registry.list_profiles().await;
+    let observer: Vec<_> = all.into_iter().filter(|p| p.profile_type == shared::types::ProfileType::Observer).collect();
+    Json(observer)
+}
+
 async fn get_profile(
    State(registry): State<Registry>,
    Path(id): Path<String>,
--- a/crates/gateway/Cargo.toml
+++ b/crates/gateway/Cargo.toml
@ -12,6 +12,8 @@ aibridge = { path = "../aibridge" }
 ingestd = { path = "../ingestd" }
 vectord = { path = "../vectord" }
 journald = { path = "../journald" }
+truth = { path = "../truth" }
+validator = { path = "../validator" }
 tokio = { workspace = true }
 axum = { workspace = true }
 serde = { workspace = true }
@ -29,3 +31,4 @@ tracing-opentelemetry = { workspace = true }
 arrow = { workspace = true }
 chrono = { workspace = true }
 reqwest = { version = "0.12", default-features = false, features = ["json", "rustls-tls"] }
+toml = { workspace = true }
--- a/crates/gateway/src/access.rs
+++ b/crates/gateway/src/access.rs
@ -93,7 +93,7 @@ impl AccessControl {
        self.roles.write().await.insert(role.agent_name.clone(), role);
    }

-    /// Get an agent's role.
+    /// Get an agent's role. Called by `GET /access/roles/{agent}`.
    pub async fn get_role(&self, agent: &str) -> Option<AgentRole> {
        self.roles.read().await.get(agent).cloned()
    }
@ -113,6 +113,7 @@ impl AccessControl {
    }

    /// Determine which fields should be masked for an agent.
+    #[allow(dead_code)]
    pub async fn masked_fields(
        &self,
        agent: &str,
@ -138,6 +139,7 @@ impl AccessControl {
    }

    /// Log a query for audit.
+    #[allow(dead_code)]
    pub async fn log_query(&self, audit: QueryAudit) {
        self.audit_log.write().await.push(audit);
    }
@ -149,6 +151,9 @@ impl AccessControl {
        log[start..].iter().rev().cloned().collect()
    }

+    /// Reports whether access-control enforcement is active.
+    /// Called by `GET /access/enabled` — ops tooling / dashboards poll
+    /// this to confirm the auth posture of the running gateway.
    pub fn is_enabled(&self) -> bool {
        self.enabled
    }
--- a/crates/gateway/src/access_service.rs
+++ b/crates/gateway/src/access_service.rs
@ -1,6 +1,6 @@
 use axum::{
    Json, Router,
-    extract::{Query, State},
+    extract::{Path, Query, State},
    http::StatusCode,
    response::IntoResponse,
    routing::{get, post},
@ -13,6 +13,12 @@ pub fn router(ac: AccessControl) -> Router {
    Router::new()
        .route("/roles", get(list_roles))
        .route("/roles", post(set_role))
+        // Scrum iter 11 / P13-001 finding: get_role was #[allow(dead_code)]
+        // because nothing called it — dead until exposed. Route activates it.
+        // Returns 404 when the agent isn't registered so clients can
+        // distinguish "missing role" from "access denied."
+        .route("/roles/{agent}", get(get_role))
+        .route("/enabled", get(enabled_status))
        .route("/audit", get(query_audit))
        .route("/check", post(check_access))
        .with_state(ac)
@ -60,3 +66,17 @@ async fn check_access(
        "allowed": allowed,
    }))
 }
+
+async fn get_role(
+    State(ac): State<AccessControl>,
+    Path(agent): Path<String>,
+) -> impl IntoResponse {
+    match ac.get_role(&agent).await {
+        Some(role) => Ok(Json(role)),
+        None => Err((StatusCode::NOT_FOUND, format!("no role registered for agent '{agent}'"))),
+    }
+}
+
+async fn enabled_status(State(ac): State<AccessControl>) -> impl IntoResponse {
+    Json(serde_json::json!({ "enabled": ac.is_enabled() }))
+}
--- a/crates/gateway/src/auth.rs
+++ b/crates/gateway/src/auth.rs
@ -5,30 +5,51 @@ use axum::{
    response::Response,
 };

-/// API key auth middleware. Checks X-API-Key header against configured key.
+// API key auth middleware. Checks X-API-Key header against configured key.
+// Fixed P5-001 (2026-04-23): previously #[allow(dead_code)] — the function
+// existed but was never layered onto the router, so [auth] enabled=true
+// silently enforced nothing. Now wired via from_fn_with_state in main.rs.
 pub async fn api_key_auth(
+    axum::extract::State(expected): axum::extract::State<ApiKey>,
    request: Request,
    next: Next,
 ) -> Result<Response, StatusCode> {
-    // Get the expected key from the request extensions (set by the layer)
-    let expected_key = request.extensions().get::<ApiKey>().cloned();
-
-    if let Some(expected) = expected_key {
-        let provided = request
-            .headers()
-            .get("x-api-key")
-            .and_then(|v| v.to_str().ok());
-
-        match provided {
-            Some(key) if key == expected.0 => {}
-            _ => {
-                tracing::warn!("unauthorized request: missing or invalid API key");
-                return Err(StatusCode::UNAUTHORIZED);
-            }
-        }
+    // /health stays public (LB/systemd probes). Every other route is gated.
+    if request.uri().path() == "/health" {
+        return Ok(next.run(request).await);
    }

-    Ok(next.run(request).await)
+    let provided = request
+        .headers()
+        .get("x-api-key")
+        .and_then(|v| v.to_str().ok());
+
+    // Constant-time-ish eq on the raw bytes; good enough for a shared-secret
+    // X-API-Key. Timing-attack resistance here matters less than the
+    // equivalent HMAC check would; adopt subtle crate if key-space grows.
+    match provided {
+        Some(key) if eq_ct(key.as_bytes(), expected.0.as_bytes()) => {
+            Ok(next.run(request).await)
+        }
+        _ => {
+            tracing::warn!(
+                path = %request.uri().path(),
+                "unauthorized request: missing or invalid API key",
+            );
+            Err(StatusCode::UNAUTHORIZED)
+        }
+    }
+}
+
+fn eq_ct(a: &[u8], b: &[u8]) -> bool {
+    if a.len() != b.len() {
+        return false;
+    }
+    let mut diff: u8 = 0;
+    for (x, y) in a.iter().zip(b.iter()) {
+        diff |= x ^ y;
+    }
+    diff == 0
 }

 /// Wrapper type for the API key, stored in request extensions.
--- a/crates/gateway/src/bin/parity_extract_json.rs
+++ b/crates/gateway/src/bin/parity_extract_json.rs
@ -0,0 +1,37 @@
+//! Cross-runtime parity helper for `extract_json`.
+//!
+//! Reads a single model-output string from stdin, runs the Rust
+//! extract_json, prints `{"matched": bool, "value": <object|null>}`
+//! to stdout as JSON. Exit 0 on success, exit 1 on internal error.
+//!
+//! The Go counterpart lives at
+//! `golangLAKEHOUSE/internal/validator/iterate.go::ExtractJSON`. The
+//! parity probe at
+//! `golangLAKEHOUSE/scripts/cutover/parity/extract_json_parity.sh`
+//! feeds the same fixtures through both and diffs the outputs.
+//!
+//! Usage:
+//!   echo '<raw model output>' | parity_extract_json
+//!   parity_extract_json <<< '...'
+
+use std::io::Read;
+
+fn main() {
+    let mut buf = String::new();
+    if let Err(e) = std::io::stdin().read_to_string(&mut buf) {
+        eprintln!("read stdin: {e}");
+        std::process::exit(1);
+    }
+    let result = gateway::v1::iterate::extract_json(&buf);
+    let body = serde_json::json!({
+        "matched": result.is_some(),
+        "value": result.unwrap_or(serde_json::Value::Null),
+    });
+    match serde_json::to_string(&body) {
+        Ok(s) => println!("{s}"),
+        Err(e) => {
+            eprintln!("serialize result: {e}");
+            std::process::exit(1);
+        }
+    }
+}
--- a/crates/gateway/src/bin/parity_session_log.rs
+++ b/crates/gateway/src/bin/parity_session_log.rs
@ -0,0 +1,71 @@
+//! Cross-runtime parity helper for `SessionRecord` JSON shape.
+//!
+//! Reads a fixture JSON on stdin, builds a `SessionRecord`, emits
+//! one JSONL row on stdout. Used by
+//! `golangLAKEHOUSE/scripts/cutover/parity/session_log_parity.sh`
+//! to verify the Rust gateway's session log shape stays byte-equal
+//! to the Go-side validatord's `validator.SessionRecord` (commit
+//! 1a3a82a in golangLAKEHOUSE).
+
+use gateway::v1::session_log::{SessionAttemptRecord, SessionRecord, SESSION_RECORD_SCHEMA};
+use serde::Deserialize;
+use std::io::Read;
+
+#[derive(Deserialize)]
+struct FixtureInput {
+    session_id: String,
+    kind: String,
+    model: String,
+    provider: String,
+    prompt: String,
+    iterations: u32,
+    max_iterations: u32,
+    final_verdict: String,
+    attempts: Vec<SessionAttemptRecord>,
+    #[serde(default)]
+    artifact: Option<serde_json::Value>,
+    #[serde(default)]
+    grounded_in_roster: Option<bool>,
+    duration_ms: u64,
+}
+
+fn main() {
+    let mut buf = String::new();
+    if let Err(e) = std::io::stdin().read_to_string(&mut buf) {
+        eprintln!("read stdin: {e}");
+        std::process::exit(1);
+    }
+    let input: FixtureInput = match serde_json::from_str(&buf) {
+        Ok(v) => v,
+        Err(e) => {
+            eprintln!("parse stdin: {e}");
+            std::process::exit(1);
+        }
+    };
+    let rec = SessionRecord {
+        schema: SESSION_RECORD_SCHEMA.to_string(),
+        session_id: input.session_id,
+        // Pinned timestamp so both runtimes' rows compare byte-equal
+        // when the test wrapper normalizes on `daemon` only.
+        timestamp: "2026-01-01T00:00:00+00:00".to_string(),
+        daemon: "gateway".to_string(),
+        kind: input.kind,
+        model: input.model,
+        provider: input.provider,
+        prompt: input.prompt,
+        iterations: input.iterations,
+        max_iterations: input.max_iterations,
+        final_verdict: input.final_verdict,
+        attempts: input.attempts,
+        artifact: input.artifact,
+        grounded_in_roster: input.grounded_in_roster,
+        duration_ms: input.duration_ms,
+    };
+    match serde_json::to_string(&rec) {
+        Ok(s) => println!("{s}"),
+        Err(e) => {
+            eprintln!("marshal: {e}");
+            std::process::exit(1);
+        }
+    }
+}
--- a/crates/gateway/src/execution_loop/kb_context.rs
+++ b/crates/gateway/src/execution_loop/kb_context.rs
@ -0,0 +1,388 @@
+//! KB context loader — reads recent signal from `data/_kb/*.jsonl` for
+//! a given sig_hash + task_class and returns a compact summary.
+//!
+//! This is the "pipe to the overviewer" from the 2026-04-23 session:
+//! the overseer tier (T3, gpt-oss:120b) consumes this context before
+//! generating a correction, so its suggestions are informed by
+//! historical cost / latency / outcome / prior-correction patterns
+//! across ALL profiles that have run this task class — not just the
+//! single current loop.
+//!
+//! Hot-swap profiles read the SAME pool. When a profile activates and
+//! starts iterating, its KB context is the shared surface — one
+//! profile's learning becomes every profile's starting point.
+//!
+//! Best-effort throughout: missing files, corrupt rows, empty
+//! directories all produce an empty KbContext. The overseer works
+//! fine with no history; we just can't seed it then.
+
+use serde::Serialize;
+use std::path::Path;
+use tokio::io::AsyncBufReadExt;
+
+/// Compact summary returned to the overseer. Bounded size — recent
+/// outcomes + corrections plus rolled-up rates. Goal is to fit in a
+/// prompt without eating the overseer's context budget.
+#[derive(Debug, Clone, Default, Serialize)]
+pub struct KbContext {
+    pub sig_hash: String,
+    pub task_class: String,
+    pub recent_outcomes: Vec<OutcomeSummary>,
+    pub recent_corrections: Vec<CorrectionSummary>,
+    pub success_rate: Option<f64>,
+    pub avg_turns: Option<f64>,
+    pub avg_latency_ms: Option<u64>,
+    pub total_observed: u32,
+}
+
+#[derive(Debug, Clone, Serialize)]
+pub struct OutcomeSummary {
+    pub created_at: String,
+    pub ok: bool,
+    pub polarity: String,
+    pub turns: u32,
+    pub latency_ms: u64,
+    pub total_tokens: u64,
+    pub error: Option<String>,
+}
+
+#[derive(Debug, Clone, Serialize)]
+pub struct CorrectionSummary {
+    pub created_at: String,
+    pub reason: String,
+    pub correction_preview: String,   // first 300 chars
+    pub applied_at_turn: u32,
+}
+
+const OUTCOMES_PATH: &str = "data/_kb/outcomes.jsonl";
+const CORRECTIONS_PATH: &str = "data/_kb/overseer_corrections.jsonl";
+const RECENT_OUTCOME_LIMIT: usize = 5;
+const RECENT_CORRECTION_LIMIT: usize = 3;
+const AGGREGATE_WINDOW: usize = 50;
+
+impl KbContext {
+    /// Build context from the default KB paths.
+    pub async fn load_for(sig_hash: &str, task_class: &str) -> Self {
+        Self::load_from(
+            sig_hash, task_class,
+            Path::new(OUTCOMES_PATH), Path::new(CORRECTIONS_PATH),
+        ).await
+    }
+
+    /// Path-taking variant — tests inject tmp files without touching
+    /// the real KB directory (same pattern as append_outcomes_row_at).
+    pub async fn load_from(
+        sig_hash: &str,
+        task_class: &str,
+        outcomes_path: &Path,
+        corrections_path: &Path,
+    ) -> Self {
+        let mut ctx = KbContext {
+            sig_hash: sig_hash.to_string(),
+            task_class: task_class.to_string(),
+            ..Default::default()
+        };
+
+        // Scan outcomes — matches on sig_hash primary, task_class
+        // secondary (so different geos for the same task_class still
+        // contribute to aggregate rates even though they won't make
+        // the top-5 recent). The bounded window keeps scan cost
+        // linear in file size — we're reading tail only.
+        let outcome_rows = tail_matching(
+            outcomes_path, AGGREGATE_WINDOW * 4,
+            |row| {
+                let row_sig = row.get("sig_hash").and_then(|v| v.as_str()).unwrap_or("");
+                let row_tc = row.get("task_class").and_then(|v| v.as_str()).unwrap_or("");
+                row_sig == sig_hash || row_tc == task_class
+            },
+        ).await;
+
+        // Recent outcomes: exact sig_hash match first (strongest
+        // signal), then task_class fallback up to the limit.
+        let mut exact: Vec<OutcomeSummary> = Vec::new();
+        let mut loose: Vec<OutcomeSummary> = Vec::new();
+        for row in &outcome_rows {
+            let row_sig = row.get("sig_hash").and_then(|v| v.as_str()).unwrap_or("");
+            let summary = summarize_outcome(row);
+            if row_sig == sig_hash { exact.push(summary); }
+            else { loose.push(summary); }
+        }
+        ctx.recent_outcomes = exact.into_iter().rev().take(RECENT_OUTCOME_LIMIT).collect();
+        if ctx.recent_outcomes.len() < RECENT_OUTCOME_LIMIT {
+            let need = RECENT_OUTCOME_LIMIT - ctx.recent_outcomes.len();
+            ctx.recent_outcomes.extend(loose.into_iter().rev().take(need));
+        }
+
+        // Aggregate rates across the full matched window (both
+        // sig_hash and task_class matches — gives a stable rate even
+        // on sparse sig_hash history).
+        let window = outcome_rows.iter().rev().take(AGGREGATE_WINDOW);
+        let mut ok_count = 0u32;
+        let mut total = 0u32;
+        let mut turn_sum = 0u32;
+        let mut latency_sum = 0u64;
+        for row in window {
+            total += 1;
+            if row.get("ok").and_then(|v| v.as_bool()).unwrap_or(false) { ok_count += 1; }
+            turn_sum += row.get("turns").and_then(|v| v.as_u64()).unwrap_or(0) as u32;
+            latency_sum += row.get("usage")
+                .and_then(|u| u.get("latency_ms"))
+                .and_then(|v| v.as_u64()).unwrap_or(0);
+        }
+        if total > 0 {
+            ctx.total_observed = total;
+            ctx.success_rate = Some(ok_count as f64 / total as f64);
+            ctx.avg_turns = Some(turn_sum as f64 / total as f64);
+            ctx.avg_latency_ms = Some(latency_sum / total as u64);
+        }
+
+        // Overseer corrections. Prefer sig_hash match; fall back to
+        // task_class. The overseer reading its OWN prior corrections
+        // is the main point — if the last 3 attempts produced
+        // corrections X, Y, Z, the new correction should acknowledge
+        // those patterns rather than suggest X for the fourth time.
+        let correction_rows = tail_matching(
+            corrections_path, RECENT_CORRECTION_LIMIT * 4,
+            |row| {
+                let row_sig = row.get("sig_hash").and_then(|v| v.as_str()).unwrap_or("");
+                let row_tc = row.get("task_class").and_then(|v| v.as_str()).unwrap_or("");
+                row_sig == sig_hash || row_tc == task_class
+            },
+        ).await;
+        let mut c_exact: Vec<CorrectionSummary> = Vec::new();
+        let mut c_loose: Vec<CorrectionSummary> = Vec::new();
+        for row in &correction_rows {
+            let row_sig = row.get("sig_hash").and_then(|v| v.as_str()).unwrap_or("");
+            let summary = summarize_correction(row);
+            if row_sig == sig_hash { c_exact.push(summary); }
+            else { c_loose.push(summary); }
+        }
+        ctx.recent_corrections = c_exact.into_iter().rev().take(RECENT_CORRECTION_LIMIT).collect();
+        if ctx.recent_corrections.len() < RECENT_CORRECTION_LIMIT {
+            let need = RECENT_CORRECTION_LIMIT - ctx.recent_corrections.len();
+            ctx.recent_corrections.extend(c_loose.into_iter().rev().take(need));
+        }
+
+        ctx
+    }
+
+    /// Compact string form for the overseer prompt. Deterministic
+    /// ordering + bounded length so prompt caching stays stable
+    /// across iterations on the same task.
+    pub fn to_prompt_section(&self) -> String {
+        let mut s = String::new();
+        s.push_str("## Knowledge Base Context\n");
+        if let (Some(rate), Some(turns), Some(lat)) = (self.success_rate, self.avg_turns, self.avg_latency_ms) {
+            s.push_str(&format!(
+                "Across {} prior similar runs: success_rate={:.1}%, avg_turns={:.1}, avg_latency_ms={}\n",
+                self.total_observed, rate * 100.0, turns, lat,
+            ));
+        } else {
+            s.push_str("No prior similar runs recorded.\n");
+        }
+
+        if !self.recent_outcomes.is_empty() {
+            s.push_str(&format!("\nRecent {} outcomes:\n", self.recent_outcomes.len()));
+            for o in &self.recent_outcomes {
+                let err = o.error.as_deref().map(|e| format!(" — {}", truncate(e, 80))).unwrap_or_default();
+                s.push_str(&format!(
+                    "  [{}] ok={} turns={} tokens={} lat={}ms{}\n",
+                    &o.created_at[..19.min(o.created_at.len())],
+                    o.ok, o.turns, o.total_tokens, o.latency_ms, err,
+                ));
+            }
+        }
+
+        if !self.recent_corrections.is_empty() {
+            s.push_str(&format!("\nRecent {} overseer corrections (yours — don't repeat):\n", self.recent_corrections.len()));
+            for c in &self.recent_corrections {
+                s.push_str(&format!(
+                    "  [{}] turn={} reason={} correction={}\n",
+                    &c.created_at[..19.min(c.created_at.len())],
+                    c.applied_at_turn,
+                    truncate(&c.reason, 40),
+                    truncate(&c.correction_preview, 200),
+                ));
+            }
+        }
+
+        s
+    }
+}
+
+fn summarize_outcome(row: &serde_json::Value) -> OutcomeSummary {
+    OutcomeSummary {
+        created_at: row.get("created_at").and_then(|v| v.as_str()).unwrap_or("").to_string(),
+        ok: row.get("ok").and_then(|v| v.as_bool()).unwrap_or(false),
+        polarity: row.get("polarity").and_then(|v| v.as_str()).unwrap_or("").to_string(),
+        turns: row.get("turns").and_then(|v| v.as_u64()).unwrap_or(0) as u32,
+        latency_ms: row.get("usage").and_then(|u| u.get("latency_ms"))
+            .and_then(|v| v.as_u64()).unwrap_or(0),
+        total_tokens: row.get("usage").and_then(|u| u.get("total_tokens"))
+            .and_then(|v| v.as_u64()).unwrap_or(0),
+        error: row.get("error").and_then(|v| v.as_str()).map(String::from),
+    }
+}
+
+fn summarize_correction(row: &serde_json::Value) -> CorrectionSummary {
+    let preview = row.get("correction").and_then(|v| v.as_str()).unwrap_or("");
+    CorrectionSummary {
+        created_at: row.get("created_at").and_then(|v| v.as_str()).unwrap_or("").to_string(),
+        reason: row.get("reason").and_then(|v| v.as_str()).unwrap_or("").to_string(),
+        correction_preview: truncate(preview, 300),
+        applied_at_turn: row.get("applied_at_turn").and_then(|v| v.as_u64()).unwrap_or(0) as u32,
+    }
+}
+
+fn truncate(s: &str, n: usize) -> String {
+    if s.len() <= n { s.to_string() } else { format!("{}…", &s[..n]) }
+}
+
+/// Read a JSONL file from the tail, returning at most `limit` rows
+/// that match `filter`. Missing file returns empty. Corrupt lines are
+/// skipped. Limit is honored from the tail — a full-file scan with an
+/// in-memory ring would be wasteful for large outcomes histories, but
+/// we cap at reading the whole file and filtering post-hoc for now
+/// (reverse-seek line iteration is a real engineering task and the
+/// file is bounded by ingest rate; revisit when it bites).
+async fn tail_matching<F>(
+    path: &Path,
+    limit: usize,
+    filter: F,
+) -> Vec<serde_json::Value>
+where
+    F: Fn(&serde_json::Value) -> bool,
+{
+    let Ok(file) = tokio::fs::File::open(path).await else { return Vec::new(); };
+    let reader = tokio::io::BufReader::new(file);
+    let mut lines = reader.lines();
+    let mut matches: Vec<serde_json::Value> = Vec::new();
+    while let Ok(Some(line)) = lines.next_line().await {
+        let Ok(v) = serde_json::from_str::<serde_json::Value>(&line) else { continue };
+        if filter(&v) {
+            matches.push(v);
+            if matches.len() > limit {
+                // Keep the most-recent window only — drop from the
+                // front as we go rather than buffering everything.
+                matches.remove(0);
+            }
+        }
+    }
+    matches
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use tokio::io::AsyncWriteExt;
+
+    async fn write_fixture(path: &Path, rows: Vec<serde_json::Value>) {
+        if let Some(dir) = path.parent() {
+            tokio::fs::create_dir_all(dir).await.unwrap();
+        }
+        let mut f = tokio::fs::OpenOptions::new()
+            .create(true).write(true).truncate(true).open(path).await.unwrap();
+        for r in rows {
+            let mut line = serde_json::to_string(&r).unwrap();
+            line.push('\n');
+            f.write_all(line.as_bytes()).await.unwrap();
+        }
+    }
+
+    fn tmp_path(name: &str) -> std::path::PathBuf {
+        let nanos = std::time::SystemTime::now().duration_since(std::time::UNIX_EPOCH).unwrap().as_nanos();
+        std::env::temp_dir().join(format!("lh_kb_ctx_{}_{}_{}", std::process::id(), nanos, name))
+    }
+
+    #[tokio::test]
+    async fn empty_files_produce_empty_context() {
+        let op = tmp_path("outcomes.jsonl");
+        let cp = tmp_path("corrections.jsonl");
+        let ctx = KbContext::load_from("sig123", "staffing.fill", &op, &cp).await;
+        assert!(ctx.recent_outcomes.is_empty());
+        assert!(ctx.recent_corrections.is_empty());
+        assert!(ctx.success_rate.is_none());
+        assert_eq!(ctx.total_observed, 0);
+    }
+
+    #[tokio::test]
+    async fn exact_sig_hash_matches_take_priority() {
+        let op = tmp_path("outcomes.jsonl");
+        let cp = tmp_path("corrections.jsonl");
+        write_fixture(&op, vec![
+            // Other sig_hash, same task_class — loose match
+            serde_json::json!({
+                "sig_hash": "other", "task_class": "staffing.fill",
+                "ok": false, "polarity": "failure_pattern", "turns": 1,
+                "usage": {"latency_ms": 1000, "total_tokens": 100},
+                "created_at": "2026-04-22T10:00:00Z",
+            }),
+            // Exact sig_hash — should lead
+            serde_json::json!({
+                "sig_hash": "sig123", "task_class": "staffing.fill",
+                "ok": true, "polarity": "success_confirmation", "turns": 3,
+                "usage": {"latency_ms": 2000, "total_tokens": 500},
+                "created_at": "2026-04-23T10:00:00Z",
+            }),
+        ]).await;
+        write_fixture(&cp, vec![]).await;
+
+        let ctx = KbContext::load_from("sig123", "staffing.fill", &op, &cp).await;
+        assert_eq!(ctx.recent_outcomes.len(), 2);
+        assert_eq!(ctx.recent_outcomes[0].created_at, "2026-04-23T10:00:00Z");
+        assert_eq!(ctx.recent_outcomes[0].ok, true);
+        assert_eq!(ctx.total_observed, 2);
+        assert!((ctx.success_rate.unwrap() - 0.5).abs() < 0.001);
+    }
+
+    #[tokio::test]
+    async fn corrupt_rows_are_skipped() {
+        let op = tmp_path("outcomes.jsonl");
+        let cp = tmp_path("corrections.jsonl");
+        // Mix valid + invalid — invalid should be silently skipped.
+        if let Some(dir) = op.parent() { tokio::fs::create_dir_all(dir).await.unwrap(); }
+        tokio::fs::write(&op, "not json\n{\"sig_hash\":\"sig1\",\"task_class\":\"tc\",\"ok\":true,\"turns\":1,\"usage\":{}}\ngarbage\n").await.unwrap();
+        write_fixture(&cp, vec![]).await;
+        let ctx = KbContext::load_from("sig1", "tc", &op, &cp).await;
+        assert_eq!(ctx.recent_outcomes.len(), 1);
+    }
+
+    #[tokio::test]
+    async fn corrections_preview_is_truncated() {
+        let op = tmp_path("outcomes.jsonl");
+        let cp = tmp_path("corrections.jsonl");
+        let long = "x".repeat(500);
+        write_fixture(&op, vec![]).await;
+        write_fixture(&cp, vec![serde_json::json!({
+            "sig_hash": "sig1", "task_class": "tc",
+            "reason": "abort", "correction": long, "applied_at_turn": 3,
+            "created_at": "2026-04-23T10:00:00Z",
+        })]).await;
+        let ctx = KbContext::load_from("sig1", "tc", &op, &cp).await;
+        assert_eq!(ctx.recent_corrections.len(), 1);
+        // 300-char cap + 3-byte UTF-8 ellipsis character = 303-byte worst case.
+        assert!(ctx.recent_corrections[0].correction_preview.len() <= 303);
+    }
+
+    #[test]
+    fn prompt_section_is_stable_for_empty_context() {
+        let ctx = KbContext::default();
+        let s = ctx.to_prompt_section();
+        assert!(s.contains("No prior similar runs recorded"));
+    }
+
+    #[test]
+    fn prompt_section_reports_aggregate_rates() {
+        let ctx = KbContext {
+            total_observed: 10,
+            success_rate: Some(0.7),
+            avg_turns: Some(4.2),
+            avg_latency_ms: Some(45000),
+            ..Default::default()
+        };
+        let s = ctx.to_prompt_section();
+        assert!(s.contains("success_rate=70.0%"));
+        assert!(s.contains("avg_turns=4.2"));
+        assert!(s.contains("avg_latency_ms=45000"));
+    }
+}
--- a/crates/gateway/src/execution_loop/mod.rs
+++ b/crates/gateway/src/execution_loop/mod.rs
--- a/crates/gateway/src/lib.rs
+++ b/crates/gateway/src/lib.rs
@ -0,0 +1,19 @@
+//! Library facade for the gateway crate so sub-binaries (e.g.
+//! `parity_extract_json`) can reuse the same modules the gateway
+//! binary uses.
+//!
+//! Added 2026-05-02 to support the cross-runtime parity probe at
+//! `golangLAKEHOUSE/scripts/cutover/parity/extract_json_parity.sh`.
+//! `extract_json` is the load-bearing public surface for that probe.
+//!
+//! main.rs still uses local `mod foo;` declarations independently —
+//! adding this file is purely additive (the binary's module tree is
+//! unchanged).
+
+pub mod access;
+pub mod access_service;
+pub mod auth;
+pub mod execution_loop;
+pub mod observability;
+pub mod tools;
+pub mod v1;
--- a/crates/gateway/src/main.rs
+++ b/crates/gateway/src/main.rs
@ -1,6 +1,7 @@
 mod access;
 mod access_service;
 mod auth;
+mod execution_loop;
 mod observability;
 mod tools;
 mod v1;
@ -67,14 +68,62 @@ async fn main() {
    let access = access::AccessControl::new(config.auth.enabled);
    access.register_defaults().await;

+    // Phase 42 — file-backed truth rules. Probes the `truth/` directory
+    // at repo root (or $LAKEHOUSE_TRUTH_DIR override) and logs how many
+    // rules load. Current request paths still build their own stores
+    // via truth::default_truth_store() / truth::sql_query_guard_store();
+    // the composed-at-boot store gets plumbed through V1State in a
+    // follow-up. This boot probe catches parse errors + duplicate-ID
+    // collisions early rather than at first request.
+    {
+        let truth_dir = std::env::var("LAKEHOUSE_TRUTH_DIR")
+            .unwrap_or_else(|_| "/home/profit/lakehouse/truth".to_string());
+        if std::path::Path::new(&truth_dir).exists() {
+            let mut probe_store = truth::default_truth_store();
+            match truth::loader::load_from_dir(&mut probe_store, &truth_dir) {
+                Ok(n) => tracing::info!("truth: loaded {n} file-backed rule(s) from {truth_dir}"),
+                Err(e) => tracing::warn!("truth: failed to load rules from {truth_dir}: {e}"),
+            }
+        } else {
+            tracing::debug!("truth: no rule dir at {truth_dir}, skipping file-backed load");
+        }
+    }
+
    // Workspace manager for agent-specific overlays
    let workspace_mgr = queryd::workspace::WorkspaceManager::new(store.clone());
    if let Err(e) = workspace_mgr.rebuild().await {
        tracing::warn!("workspace rebuild: {e}");
    }

-    // AI sidecar client
-    let ai_client = aibridge::client::AiClient::new(&config.sidecar.url);
+    // AI sidecar clients — Phase 44 part 3 (2026-04-27).
+    //
+    // Two flavors of the same client:
+    // - `ai_client_direct` posts directly to ${sidecar}/generate. Used
+    //   inside the gateway by V1State + the legacy /ai proxy. These
+    //   call sites are themselves the implementation of /v1/chat
+    //   (or its sidecar shim), so routing them through /v1/chat
+    //   would self-loop.
+    // - `ai_client_observable` posts via ${gateway}/v1/chat with
+    //   provider="ollama". Used by vectord modules (autotune agent,
+    //   /vectors service) so their LLM calls land in /v1/usage and
+    //   Langfuse traces. Adds one localhost HTTP hop per call (~ms);
+    //   accepted for the observability gain.
+    //
+    // The gateway can call its own /v1/chat over localhost during
+    // boot's transient period because we don't fire any LLM calls
+    // until the listener is up — the observable client is just
+    // configured here, not exercised.
+    let ai_client_direct = aibridge::client::AiClient::new(&config.sidecar.url);
+    let gateway_self_url = format!("http://{}:{}", config.gateway.host, config.gateway.port);
+    let ai_client_observable = aibridge::client::AiClient::new_with_gateway(
+        &config.sidecar.url,
+        &gateway_self_url,
+    );
+    // Backwards-compat alias for the (many) existing references in this file.
+    // Defaults to direct so the existing wiring (V1State, /ai proxy)
+    // keeps its non-self-loop transport. New vectord wiring below
+    // explicitly uses ai_client_observable.
+    let ai_client = ai_client_direct.clone();

    // Vector service components — built before the router because both the
    // /vectors service AND ingestd need the agent handle to enqueue triggers.
@ -92,6 +141,12 @@ async fn main() {
    // operators call POST /vectors/playbook_memory/rebuild to populate.
    let pbm = vectord::playbook_memory::PlaybookMemory::new(store.clone());
    let _ = pbm.load_from_storage().await;
+    // Pathway memory — consensus-designed sidecar for full-context
+    // backtracking + hot-swap of successful review pathways. Same
+    // load-on-boot pattern as playbook_memory: empty state is fine,
+    // operators start populating via scrum_master_pipeline.ts.
+    let pwm = vectord::pathway_memory::PathwayMemory::new(store.clone());
+    let _ = pwm.load_from_storage().await;

    // Phase 16.2: spawn the autotune agent. When config.agent.enabled=false
    // this returns a handle that drops triggers silently — no surprise load.
@ -106,7 +161,9 @@ async fn main() {
        agent_cfg,
        vectord::agent::AgentDeps {
            store: store.clone(),
-            ai_client: ai_client.clone(),
+            // Observable: autotune agent's LLM calls go through
+            // /v1/chat for /v1/usage + Langfuse visibility.
+            ai_client: ai_client_observable.clone(),
            catalog: registry.clone(),
            index_registry: index_reg.clone(),
            hnsw_store: hnsw.clone(),
@ -153,10 +210,17 @@ async fn main() {
            agent_handle: agent_handle.clone(),
            index_registry: index_reg.clone(),
            schedules: sched_store,
+            // P9-001 fix 2026-04-23: journal reference flows into ingest so
+            // successful uploads emit a record_ingest event. Journal is Clone
+            // (Arc<RwLock> inside) so the /journal nest below still sees the
+            // same buffer + persistence.
+            journal: Some(journal.clone()),
        }))
        .nest("/vectors", vectord::service::router(vectord::service::VectorState {
            store: store.clone(),
-            ai_client: ai_client.clone(),
+            // Observable: /vectors service's LLM calls (RAG, summary,
+            // playbook synthesis, etc.) flow through /v1/chat.
+            ai_client: ai_client_observable.clone(),
            job_tracker: vectord::jobs::JobTracker::new(),
            index_registry: index_reg.clone(),
            hnsw_store: hnsw,
@ -172,17 +236,19 @@ async fn main() {
                bucket_registry.clone(), index_reg.clone(),
            ),
            playbook_memory: pbm,
+            pathway_memory: pwm,
            embed_semaphore: std::sync::Arc::new(tokio::sync::Semaphore::new(1)),
        }))
        .nest("/workspaces", queryd::workspace_service::router(workspace_mgr))
        .nest("/journal", journald::service::router(journal))
        .nest("/access", access_service::router(access))
        .nest("/tools", tools::service::router({
-            let tool_reg = tools::registry::ToolRegistry::new_with_defaults();
+            let tool_reg = tools::registry::ToolRegistry::new();
            tool_reg.register_defaults().await;
            tools::ToolState {
                registry: tool_reg,
                query_fn: tools::QueryExecutor::new(engine.clone()),
+                truth: std::sync::Arc::new(truth::sql_query_guard_store()),
            }
        }))
        // Phase 38 — Universal API skeleton. Thin OpenAI-compatible
@ -204,6 +270,86 @@ async fn main() {
                }
                k
            },
+            openrouter_key: {
+                // 2026-04-24 free-tier rescue rung for iter 5+. Shares
+                // the LLM Team UI's OPENROUTER_API_KEY so both systems
+                // draw from one quota.
+                let k = v1::openrouter::resolve_openrouter_key();
+                if k.is_some() {
+                    tracing::info!("v1: OpenRouter key loaded — /v1/chat provider=openrouter enabled");
+                } else {
+                    tracing::warn!("v1: no OpenRouter key — openrouter rescue rung will 503");
+                }
+                k
+            },
+            gemini_key: {
+                // Phase 40 provider. GEMINI_API_KEY in env or .env.
+                let k = v1::gemini::resolve_gemini_key();
+                if k.is_some() {
+                    tracing::info!("v1: Gemini key loaded — /v1/chat provider=gemini enabled");
+                } else {
+                    tracing::debug!("v1: no Gemini key — provider=gemini will 503");
+                }
+                k
+            },
+            claude_key: {
+                // Phase 40 provider. ANTHROPIC_API_KEY in env or .env.
+                let k = v1::claude::resolve_claude_key();
+                if k.is_some() {
+                    tracing::info!("v1: Claude key loaded — /v1/chat provider=claude enabled");
+                } else {
+                    tracing::debug!("v1: no Claude key — provider=claude will 503");
+                }
+                k
+            },
+            kimi_key: {
+                // Direct Kimi For Coding (api.kimi.com) — bypasses the
+                // broken-upstream kimi-k2:1t and OpenRouter rate caps.
+                // Key from /etc/lakehouse/kimi.env (KIMI_API_KEY=sk-kimi-…).
+                let k = v1::kimi::resolve_kimi_key();
+                if k.is_some() {
+                    tracing::info!("v1: Kimi key loaded — /v1/chat provider=kimi enabled (model=kimi-for-coding)");
+                } else {
+                    tracing::debug!("v1: no Kimi key — provider=kimi will 503");
+                }
+                k
+            },
+            opencode_key: {
+                // OpenCode GO multi-vendor gateway — Claude Opus 4.7,
+                // GPT-5.5-pro, Gemini 3.1-pro, Kimi K2.6, DeepSeek, GLM,
+                // Qwen + free-tier. Key from /etc/lakehouse/opencode.env.
+                let k = v1::opencode::resolve_opencode_key();
+                if k.is_some() {
+                    tracing::info!("v1: OpenCode key loaded — /v1/chat provider=opencode enabled (40 models)");
+                } else {
+                    tracing::debug!("v1: no OpenCode key — provider=opencode will 503");
+                }
+                k
+            },
+            validate_workers: {
+                // Load workers_500k.parquet snapshot for /v1/validate.
+                // Path overridable via LH_WORKERS_PARQUET env. Missing
+                // file is non-fatal — validators run schema/PII checks
+                // unaffected; only worker-existence checks fail clean.
+                let path_str = std::env::var("LH_WORKERS_PARQUET")
+                    .unwrap_or_else(|_| "/home/profit/lakehouse/data/datasets/workers_500k.parquet".into());
+                let path = std::path::Path::new(&path_str);
+                if path.exists() {
+                    match validator::staffing::parquet_lookup::load_workers_parquet(path) {
+                        Ok(lookup) => {
+                            tracing::info!("v1: workers parquet loaded from {} — /v1/validate worker-existence checks enabled", path_str);
+                            lookup
+                        }
+                        Err(e) => {
+                            tracing::warn!("v1: workers parquet at {} unreadable ({e}) — /v1/validate worker-existence checks will fail Consistency", path_str);
+                            std::sync::Arc::new(validator::InMemoryWorkerLookup::new())
+                        }
+                    }
+                } else {
+                    tracing::warn!("v1: workers parquet at {} not found — /v1/validate worker-existence checks will fail Consistency", path_str);
+                    std::sync::Arc::new(validator::InMemoryWorkerLookup::new())
+                }
+            },
            // Phase 40 early deliverable — Langfuse trace emitter.
            // Defaults match mcp-server/tracing.ts conventions so
            // gateway traces land in the same staffing project.
@ -216,16 +362,37 @@ async fn main() {
                }
                c
            },
+            // Coordinator session JSONL — one row per /v1/iterate
+            // session for offline DuckDB analysis. Cross-runtime
+            // parity with Go-side validatord (commit 1a3a82a).
+            session_log: {
+                let path = &config.gateway.session_log_path;
+                let s = v1::session_log::SessionLogger::from_path(path);
+                if s.is_some() {
+                    tracing::info!(
+                        "v1: session log enabled — coordinator sessions written to {}",
+                        path
+                    );
+                } else {
+                    tracing::info!("v1: session log disabled (set [gateway].session_log_path to enable)");
+                }
+                s
+            },
        }));

-    // Auth middleware (if enabled)
+    // Auth middleware (if enabled) — P5-001 fix 2026-04-23:
+    // previously only inserted the ApiKey as an extension and never layered
+    // the middleware, so auth.enabled=true enforced nothing. Now wraps the
+    // router with from_fn_with_state, which calls api_key_auth on every
+    // request. /health is exempted inside the middleware (LB probes).
    if config.auth.enabled {
        if let Some(ref key) = config.auth.api_key {
-            tracing::info!("API key auth enabled");
+            tracing::info!("API key auth enabled — enforcing on all routes except /health");
            let api_key = auth::ApiKey(key.clone());
-            app = app.layer(axum::Extension(api_key));
-            // Note: auth middleware applied per-route in production
-            // For now, the ApiKey extension is available for handlers to check
+            app = app.layer(axum::middleware::from_fn_with_state(
+                api_key,
+                auth::api_key_auth,
+            ));
        } else {
            tracing::warn!("auth enabled but no api_key set — all requests allowed");
        }
--- a/crates/gateway/src/tools/mod.rs
+++ b/crates/gateway/src/tools/mod.rs
@ -3,12 +3,18 @@ pub mod service;

 use queryd::context::QueryEngine;
 use arrow::json::writer::{JsonArray, Writer as JsonWriter};
+use std::sync::Arc;
+use truth::TruthStore;

 /// State for the tool system.
 #[derive(Clone)]
 pub struct ToolState {
    pub registry: registry::ToolRegistry,
    pub query_fn: QueryExecutor,
+    /// SQL guard (shared with queryd). Mirrors the queryd /sql truth
+    /// gate from P42-002 (9cc0ceb) — tools also execute model-
+    /// originated SQL, need the same destructive-verb block.
+    pub truth: Arc<TruthStore>,
 }

 /// Wraps QueryEngine to provide a simple execute interface for tools.
--- a/crates/gateway/src/tools/registry.rs
+++ b/crates/gateway/src/tools/registry.rs
@ -67,24 +67,14 @@ pub struct ToolRegistry {
 }

 impl ToolRegistry {
+    /// Build an empty registry. Callers in an async context should follow
+    /// this with `.register_defaults().await` if they want the built-in
+    /// staffing tools pre-installed — main.rs does exactly that.
    pub fn new() -> Self {
-        let registry = Self {
+        Self {
            tools: Arc::new(RwLock::new(HashMap::new())),
            audit_log: Arc::new(RwLock::new(Vec::new())),
-        };
-        // Register built-in staffing tools
-        tokio::task::block_in_place(|| {
-            tokio::runtime::Handle::current().block_on(registry.register_defaults())
-        });
-        registry
-    }
-
-    pub fn new_with_defaults() -> Self {
-        let registry = Self {
-            tools: Arc::new(RwLock::new(HashMap::new())),
-            audit_log: Arc::new(RwLock::new(Vec::new())),
-        };
-        registry
+        }
    }

    /// Register default staffing tools.
--- a/crates/gateway/src/tools/service.rs
+++ b/crates/gateway/src/tools/service.rs
@ -7,7 +7,7 @@ use axum::{
 };
 use serde::Deserialize;

-use super::registry::{Permission, ToolInvocation, ToolRegistry};
+use super::registry::{ToolInvocation, ToolRegistry};
 use crate::tools::ToolState;

 pub fn router(state: ToolState) -> Router {
@ -92,6 +92,32 @@ async fn call_tool(
        }
    };

+    // Truth gate — same contract as queryd /sql (P42-002). Rejects
+    // destructive verbs + empty SQL. Scrum iter 11 CF-1 + CF-2 on this
+    // file: tools executed model-provided SQL parameters without any
+    // validation. Close the gap here so the parallel surface has the
+    // same safety floor as queryd.
+    let ctx = serde_json::json!({ "sql": sql });
+    for outcome in state.truth.evaluate("sql_query", &ctx) {
+        if outcome.passed {
+            if let truth::RuleAction::Reject { message } | truth::RuleAction::Block { message } = &outcome.action {
+                tracing::warn!("tool {name}: SQL blocked by truth gate ({}): {message}", outcome.rule_id);
+                state.registry.log_invocation(ToolInvocation {
+                    id: format!("inv-{}", chrono::Utc::now().timestamp_millis()),
+                    tool_name: name.clone(),
+                    agent: req.agent.clone(),
+                    params: req.params.clone(),
+                    permission: tool.permission.clone(),
+                    timestamp: chrono::Utc::now(),
+                    success: false,
+                    error: Some(format!("truth gate: {message}")),
+                    rows_returned: None,
+                }).await;
+                return Err((StatusCode::FORBIDDEN, message.clone()));
+            }
+        }
+    }
+
    // Execute via query engine
    let result = state.query_fn.execute(&sql).await;

--- a/crates/gateway/src/v1/claude.rs
+++ b/crates/gateway/src/v1/claude.rs
@ -0,0 +1,222 @@
+//! Claude (Anthropic) adapter.
+//!
+//! POST `https://api.anthropic.com/v1/messages`. Auth via `x-api-key`
+//! header (not bearer) + required `anthropic-version` header. Payload
+//! is NOT OpenAI-compatible — response text lives at
+//! `content[0].text`. Phase 40 deliverable. System prompts travel in
+//! a top-level `system` field, separate from the `messages` array.
+
+use std::time::Duration;
+use serde::{Deserialize, Serialize};
+
+use super::{ChatRequest, ChatResponse, Choice, Message, UsageBlock};
+
+const CLAUDE_BASE_URL: &str = "https://api.anthropic.com/v1";
+const CLAUDE_API_VERSION: &str = "2023-06-01";
+const CLAUDE_TIMEOUT_SECS: u64 = 180;
+
+pub fn resolve_claude_key() -> Option<String> {
+    if let Ok(k) = std::env::var("ANTHROPIC_API_KEY") {
+        if !k.trim().is_empty() { return Some(k.trim().to_string()); }
+    }
+    for path in ["/home/profit/.env", "/root/.env"] {
+        if let Ok(raw) = std::fs::read_to_string(path) {
+            for line in raw.lines() {
+                if let Some(rest) = line.strip_prefix("ANTHROPIC_API_KEY=") {
+                    let k = rest.trim().trim_matches('"').trim_matches('\'');
+                    if !k.is_empty() { return Some(k.to_string()); }
+                }
+            }
+        }
+    }
+    None
+}
+
+pub async fn chat(
+    key: &str,
+    req: &ChatRequest,
+) -> Result<ChatResponse, String> {
+    // Strip the "claude/" prefix if the caller used the namespaced form.
+    let model = req.model.strip_prefix("claude/").unwrap_or(&req.model).to_string();
+
+    // Anthropic carries system prompts outside the messages array.
+    // Concatenate any system-role messages into a single system string;
+    // keep user + assistant messages in `messages`.
+    let mut system_parts: Vec<String> = Vec::new();
+    let mut msgs: Vec<AnMessage> = Vec::new();
+    for m in &req.messages {
+        if m.role == "system" {
+            system_parts.push(m.text());
+        } else {
+            // Anthropic expects strictly "user" or "assistant"; anything
+            // else we normalize to "user".
+            let role = if m.role == "assistant" { "assistant" } else { "user" };
+            msgs.push(AnMessage { role: role.to_string(), content: m.text() });
+        }
+    }
+    let system = if system_parts.is_empty() {
+        None
+    } else {
+        Some(system_parts.join("\n\n"))
+    };
+
+    let body = AnChatBody {
+        model: model.clone(),
+        messages: msgs,
+        max_tokens: req.max_tokens.unwrap_or(800),
+        temperature: req.temperature.unwrap_or(0.3),
+        system,
+    };
+
+    let client = reqwest::Client::builder()
+        .timeout(Duration::from_secs(CLAUDE_TIMEOUT_SECS))
+        .build()
+        .map_err(|e| format!("build client: {e}"))?;
+
+    let t0 = std::time::Instant::now();
+    let resp = client
+        .post(format!("{}/messages", CLAUDE_BASE_URL))
+        .header("x-api-key", key)
+        .header("anthropic-version", CLAUDE_API_VERSION)
+        .json(&body)
+        .send()
+        .await
+        .map_err(|e| format!("api.anthropic.com unreachable: {e}"))?;
+
+    let status = resp.status();
+    if !status.is_success() {
+        let body = resp.text().await.unwrap_or_else(|_| "?".into());
+        return Err(format!("claude {}: {}", status, body));
+    }
+
+    let parsed: AnChatResponse = resp.json().await
+        .map_err(|e| format!("invalid claude response: {e}"))?;
+
+    let latency_ms = t0.elapsed().as_millis();
+    let text = parsed.content.into_iter()
+        .find(|b| b.block_type == "text")
+        .map(|b| b.text)
+        .unwrap_or_default();
+
+    let prompt_tokens = parsed.usage.as_ref().map(|u| u.input_tokens).unwrap_or_else(|| {
+        let chars: usize = req.messages.iter().map(|m| m.text().chars().count()).sum();
+        ((chars + 3) / 4) as u32
+    });
+    let completion_tokens = parsed.usage.as_ref().map(|u| u.output_tokens).unwrap_or_else(|| {
+        ((text.chars().count() + 3) / 4) as u32
+    });
+
+    tracing::info!(
+        target: "v1.chat",
+        provider = "claude",
+        model = %model,
+        prompt_tokens,
+        completion_tokens,
+        latency_ms = latency_ms as u64,
+        "claude chat completed",
+    );
+
+    Ok(ChatResponse {
+        id: format!("chatcmpl-{}", chrono::Utc::now().timestamp_nanos_opt().unwrap_or(0)),
+        object: "chat.completion",
+        created: chrono::Utc::now().timestamp(),
+        model,
+        choices: vec![Choice {
+            index: 0,
+            message: Message::new_text("assistant", text),
+            finish_reason: parsed.stop_reason.unwrap_or_else(|| "stop".into()),
+        }],
+        usage: UsageBlock {
+            prompt_tokens,
+            completion_tokens,
+            total_tokens: prompt_tokens + completion_tokens,
+        },
+    })
+}
+
+// -- Anthropic Messages API wire shapes --
+
+#[derive(Serialize)]
+struct AnChatBody {
+    model: String,
+    messages: Vec<AnMessage>,
+    max_tokens: u32,
+    temperature: f64,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    system: Option<String>,
+}
+
+#[derive(Serialize)]
+struct AnMessage { role: String, content: String }
+
+#[derive(Deserialize)]
+struct AnChatResponse {
+    content: Vec<AnContentBlock>,
+    #[serde(default, rename = "stop_reason")]
+    stop_reason: Option<String>,
+    #[serde(default)]
+    usage: Option<AnUsage>,
+}
+
+#[derive(Deserialize)]
+struct AnContentBlock {
+    #[serde(rename = "type")]
+    block_type: String,
+    #[serde(default)]
+    text: String,
+}
+
+#[derive(Deserialize)]
+struct AnUsage { input_tokens: u32, output_tokens: u32 }
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn resolve_claude_key_does_not_panic() {
+        let _ = resolve_claude_key();
+    }
+
+    #[test]
+    fn chat_body_serializes_with_separate_system() {
+        let body = AnChatBody {
+            model: "claude-3-5-sonnet-latest".into(),
+            messages: vec![
+                AnMessage { role: "user".into(), content: "hi".into() },
+            ],
+            max_tokens: 800,
+            temperature: 0.3,
+            system: Some("You are helpful.".into()),
+        };
+        let json = serde_json::to_string(&body).unwrap();
+        assert!(json.contains("\"system\":\"You are helpful.\""));
+        assert!(json.contains("\"messages\""));
+        assert!(json.contains("\"max_tokens\":800"));
+    }
+
+    #[test]
+    fn body_omits_system_when_none() {
+        let body = AnChatBody {
+            model: "claude-3-5-sonnet-latest".into(),
+            messages: vec![AnMessage { role: "user".into(), content: "hi".into() }],
+            max_tokens: 800,
+            temperature: 0.3,
+            system: None,
+        };
+        let json = serde_json::to_string(&body).unwrap();
+        assert!(!json.contains("\"system\""), "system field should be skipped when None: {json}");
+    }
+
+    #[test]
+    fn model_prefix_strip_preserves_bare_names() {
+        let cases = [
+            ("claude/claude-3-5-sonnet-latest", "claude-3-5-sonnet-latest"),
+            ("claude-3-5-sonnet-latest", "claude-3-5-sonnet-latest"),
+        ];
+        for (input, expected) in cases {
+            let out = input.strip_prefix("claude/").unwrap_or(input);
+            assert_eq!(out, expected);
+        }
+    }
+}
--- a/crates/gateway/src/v1/gemini.rs
+++ b/crates/gateway/src/v1/gemini.rs
@ -0,0 +1,230 @@
+//! Gemini adapter — Google's Generative Language API.
+//!
+//! POST `https://generativelanguage.googleapis.com/v1beta/models/
+//! {model}:generateContent?key=<API_KEY>`. Auth via query-string key
+//! (not bearer). Payload shape is NOT OpenAI-compatible — we map
+//! messages → contents + parts, extract response from `candidates[0]
+//! .content.parts[0].text`. Phase 40 deliverable; gate: `/v1/chat`
+//! with a prefixed or explicit gemini model returns normally.
+
+use std::time::Duration;
+use serde::{Deserialize, Serialize};
+
+use super::{ChatRequest, ChatResponse, Choice, Message, UsageBlock};
+
+const GEMINI_BASE_URL: &str = "https://generativelanguage.googleapis.com/v1beta";
+const GEMINI_TIMEOUT_SECS: u64 = 180;
+
+pub fn resolve_gemini_key() -> Option<String> {
+    if let Ok(k) = std::env::var("GEMINI_API_KEY") {
+        if !k.trim().is_empty() { return Some(k.trim().to_string()); }
+    }
+    for path in ["/home/profit/.env", "/root/.env"] {
+        if let Ok(raw) = std::fs::read_to_string(path) {
+            for line in raw.lines() {
+                if let Some(rest) = line.strip_prefix("GEMINI_API_KEY=") {
+                    let k = rest.trim().trim_matches('"').trim_matches('\'');
+                    if !k.is_empty() { return Some(k.to_string()); }
+                }
+            }
+        }
+    }
+    None
+}
+
+pub async fn chat(
+    key: &str,
+    req: &ChatRequest,
+) -> Result<ChatResponse, String> {
+    // Strip the "gemini/" prefix if the caller used the namespaced form.
+    let model = req.model.strip_prefix("gemini/").unwrap_or(&req.model).to_string();
+
+    // Gemini splits system prompt from conversation differently.
+    // Simplest working mapping: concatenate any system messages at the
+    // top of a single user turn, then append user/assistant turns as
+    // separate contents entries. Covers the common single-turn case
+    // the scrum pipeline uses.
+    let mut contents: Vec<GmContent> = Vec::new();
+    for m in &req.messages {
+        let role = match m.role.as_str() {
+            "system" | "user" => "user",
+            _ => "model",
+        };
+        contents.push(GmContent {
+            role: role.to_string(),
+            parts: vec![GmPart { text: m.text() }],
+        });
+    }
+
+    let body = GmChatBody {
+        contents,
+        generation_config: GmGenerationConfig {
+            temperature: req.temperature.unwrap_or(0.3),
+            max_output_tokens: req.max_tokens.unwrap_or(800),
+        },
+    };
+
+    let client = reqwest::Client::builder()
+        .timeout(Duration::from_secs(GEMINI_TIMEOUT_SECS))
+        .build()
+        .map_err(|e| format!("build client: {e}"))?;
+
+    let url = format!("{}/models/{}:generateContent?key={}", GEMINI_BASE_URL, model, key);
+    let t0 = std::time::Instant::now();
+    let resp = client
+        .post(&url)
+        .json(&body)
+        .send()
+        .await
+        .map_err(|e| format!("generativelanguage.googleapis.com unreachable: {e}"))?;
+
+    let status = resp.status();
+    if !status.is_success() {
+        let body = resp.text().await.unwrap_or_else(|_| "?".into());
+        return Err(format!("gemini {}: {}", status, body));
+    }
+
+    let parsed: GmChatResponse = resp.json().await
+        .map_err(|e| format!("invalid gemini response: {e}"))?;
+
+    let latency_ms = t0.elapsed().as_millis();
+    let candidate = parsed.candidates.into_iter().next()
+        .ok_or_else(|| "gemini returned no candidates".to_string())?;
+    let text = candidate.content.parts.into_iter()
+        .next()
+        .map(|p| p.text)
+        .unwrap_or_default();
+
+    let prompt_tokens = parsed.usage_metadata.as_ref()
+        .map(|u| u.prompt_token_count)
+        .unwrap_or_else(|| {
+            let chars: usize = req.messages.iter().map(|m| m.text().chars().count()).sum();
+            ((chars + 3) / 4) as u32
+        });
+    let completion_tokens = parsed.usage_metadata.as_ref()
+        .map(|u| u.candidates_token_count)
+        .unwrap_or_else(|| ((text.chars().count() + 3) / 4) as u32);
+
+    tracing::info!(
+        target: "v1.chat",
+        provider = "gemini",
+        model = %model,
+        prompt_tokens,
+        completion_tokens,
+        latency_ms = latency_ms as u64,
+        "gemini chat completed",
+    );
+
+    Ok(ChatResponse {
+        id: format!("chatcmpl-{}", chrono::Utc::now().timestamp_nanos_opt().unwrap_or(0)),
+        object: "chat.completion",
+        created: chrono::Utc::now().timestamp(),
+        model,
+        choices: vec![Choice {
+            index: 0,
+            message: Message::new_text("assistant", text),
+            finish_reason: candidate.finish_reason.unwrap_or_else(|| "stop".into()),
+        }],
+        usage: UsageBlock {
+            prompt_tokens,
+            completion_tokens,
+            total_tokens: prompt_tokens + completion_tokens,
+        },
+    })
+}
+
+// -- Gemini wire shapes --
+
+#[derive(Serialize)]
+struct GmChatBody {
+    contents: Vec<GmContent>,
+    #[serde(rename = "generationConfig")]
+    generation_config: GmGenerationConfig,
+}
+
+#[derive(Serialize)]
+struct GmContent {
+    role: String,
+    parts: Vec<GmPart>,
+}
+
+#[derive(Serialize)]
+struct GmPart { text: String }
+
+#[derive(Serialize)]
+#[serde(rename_all = "camelCase")]
+struct GmGenerationConfig {
+    temperature: f64,
+    max_output_tokens: u32,
+}
+
+#[derive(Deserialize)]
+struct GmChatResponse {
+    candidates: Vec<GmCandidate>,
+    #[serde(default, rename = "usageMetadata")]
+    usage_metadata: Option<GmUsage>,
+}
+
+#[derive(Deserialize)]
+struct GmCandidate {
+    content: GmContentResp,
+    #[serde(default, rename = "finishReason")]
+    finish_reason: Option<String>,
+}
+
+#[derive(Deserialize)]
+struct GmContentResp { parts: Vec<GmPartResp> }
+
+#[derive(Deserialize)]
+struct GmPartResp { #[serde(default)] text: String }
+
+#[derive(Deserialize)]
+#[serde(rename_all = "camelCase")]
+struct GmUsage {
+    prompt_token_count: u32,
+    candidates_token_count: u32,
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn resolve_gemini_key_does_not_panic() {
+        let _ = resolve_gemini_key();
+    }
+
+    #[test]
+    fn chat_body_serializes_to_gemini_shape() {
+        let body = GmChatBody {
+            contents: vec![
+                GmContent {
+                    role: "user".into(),
+                    parts: vec![GmPart { text: "hello".into() }],
+                },
+            ],
+            generation_config: GmGenerationConfig {
+                temperature: 0.3,
+                max_output_tokens: 800,
+            },
+        };
+        let json = serde_json::to_string(&body).unwrap();
+        assert!(json.contains("\"contents\""));
+        assert!(json.contains("\"parts\""));
+        // camelCase per Gemini API
+        assert!(json.contains("\"generationConfig\""));
+        assert!(json.contains("\"maxOutputTokens\":800"));
+    }
+
+    #[test]
+    fn model_prefix_strip_preserves_bare_names() {
+        let cases = [
+            ("gemini/gemini-2.0-flash", "gemini-2.0-flash"),
+            ("gemini-2.0-flash", "gemini-2.0-flash"),
+        ];
+        for (input, expected) in cases {
+            let out = input.strip_prefix("gemini/").unwrap_or(input);
+            assert_eq!(out, expected);
+        }
+    }
+}
--- a/crates/gateway/src/v1/iterate.rs
+++ b/crates/gateway/src/v1/iterate.rs
@ -0,0 +1,543 @@
+//! /v1/iterate — the Phase 43 PRD's "generate → validate → correct → retry" loop.
+//!
+//! Closes the "0→85% with iteration" thesis structurally. A caller
+//! posts a prompt + artifact kind + validation context; the gateway:
+//!   1. Generates a JSON artifact via /v1/chat (any provider/model)
+//!   2. Extracts the JSON object from the model output
+//!   3. Validates via /v1/validate (FillValidator / EmailValidator /
+//!      PlaybookValidator with the shared WorkerLookup)
+//!   4. On ValidationError, appends the error to the prompt and
+//!      retries up to `max_iterations` (default 3)
+//!   5. Returns the accepted artifact + Report on success, OR the
+//!      attempt history + final error on max-iter exhaustion
+//!
+//! Internal calls go via HTTP loopback to localhost:gateway_port so
+//! the same /v1/usage tracking and Langfuse traces apply. A small
+//! latency cost (~1-3ms per loopback hop) for clean separation of
+//! concerns and observability.
+//!
+//! 2026-04-27 Phase 43 v3 part 3: this endpoint makes the iteration
+//! loop a first-class lakehouse capability rather than a per-caller
+//! re-implementation. Staffing executors, agent loops, and future
+//! validators all reach the same code path.
+
+use axum::{extract::State, http::{HeaderMap, StatusCode}, response::IntoResponse, Json};
+use serde::{Deserialize, Serialize};
+
+const DEFAULT_MAX_ITERATIONS: u32 = 3;
+const LOOPBACK_TIMEOUT_SECS: u64 = 240;
+
+/// Header name used to propagate a Langfuse parent trace id across
+/// daemon boundaries. Matches Go's `shared.TraceIDHeader` constant
+/// byte-for-byte (commit d6d2fdf in golangLAKEHOUSE) — same wire
+/// format means a Go caller can hit Rust's /v1/iterate (or vice
+/// versa) and the resulting Langfuse trees nest correctly.
+pub const TRACE_ID_HEADER: &str = "x-lakehouse-trace-id";
+
+#[derive(Deserialize)]
+pub struct IterateRequest {
+    /// "fill" | "email" | "playbook" — picks which validator runs.
+    pub kind: String,
+    /// The prompt to seed generation. Validation errors from prior
+    /// attempts are appended on retry.
+    pub prompt: String,
+    /// Provider/model passed through to /v1/chat. e.g. "ollama_cloud"
+    /// + "kimi-k2.6", or "opencode" + "claude-haiku-4-5".
+    pub provider: String,
+    pub model: String,
+    /// Optional system prompt — sent to /v1/chat as the system message.
+    #[serde(default)]
+    pub system: Option<String>,
+    /// Validation context (target_count, city, state, role, client_id
+    /// for fills; candidate_id for emails). Forwarded to /v1/validate.
+    #[serde(default)]
+    pub context: Option<serde_json::Value>,
+    /// Cap on iteration count. Defaults to 3 per the Phase 43 PRD.
+    #[serde(default)]
+    pub max_iterations: Option<u32>,
+    /// Forwarded to /v1/chat. Defaults to 0.2 if unset.
+    #[serde(default)]
+    pub temperature: Option<f64>,
+    /// Forwarded to /v1/chat. Defaults to 4096 if unset.
+    #[serde(default)]
+    pub max_tokens: Option<u32>,
+}
+
+#[derive(Serialize)]
+pub struct IterateAttempt {
+    pub iteration: u32,
+    pub raw: String,
+    pub status: AttemptStatus,
+}
+
+#[derive(Serialize)]
+#[serde(tag = "kind", rename_all = "snake_case")]
+pub enum AttemptStatus {
+    /// Model output didn't contain extractable JSON.
+    NoJson,
+    /// JSON extracted but failed validation; carries the error.
+    ValidationFailed { error: serde_json::Value },
+    /// Validation passed (last attempt's terminal status).
+    Accepted,
+}
+
+#[derive(Serialize)]
+pub struct IterateResponse {
+    pub artifact: serde_json::Value,
+    pub validation: serde_json::Value,
+    pub iterations: u32,
+    pub history: Vec<IterateAttempt>,
+    /// Echoes the resolved trace id (caller-forwarded header, body
+    /// field, langfuse-middleware mint, or local fallback). Operators
+    /// pivot from this id straight into Langfuse + the
+    /// coordinator_sessions.jsonl join key. Cross-runtime parity with
+    /// Go's `validator.IterateResponse` (commit 6847bbc in
+    /// golangLAKEHOUSE).
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub trace_id: Option<String>,
+}
+
+#[derive(Serialize)]
+pub struct IterateFailure {
+    pub error: String,
+    pub iterations: u32,
+    pub history: Vec<IterateAttempt>,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub trace_id: Option<String>,
+}
+
+pub async fn iterate(
+    State(state): State<super::V1State>,
+    headers: HeaderMap,
+    Json(req): Json<IterateRequest>,
+) -> impl IntoResponse {
+    let max_iter = req.max_iterations.unwrap_or(DEFAULT_MAX_ITERATIONS).max(1);
+    let temperature = req.temperature.unwrap_or(0.2);
+    let max_tokens = req.max_tokens.unwrap_or(4096);
+    let mut history: Vec<IterateAttempt> = Vec::with_capacity(max_iter as usize);
+    let mut attempt_records: Vec<super::session_log::SessionAttemptRecord> = Vec::with_capacity(max_iter as usize);
+    let mut current_prompt = req.prompt.clone();
+
+    // Resolve the parent Langfuse trace id. Caller-forwarded header
+    // wins (cross-daemon tree linkage); otherwise mint a fresh id so
+    // the iterate session is its own tree. Same shape as the Go-side
+    // validatord trace propagation.
+    let trace_id: String = headers
+        .get(TRACE_ID_HEADER)
+        .and_then(|v| v.to_str().ok())
+        .filter(|s| !s.is_empty())
+        .map(|s| s.to_string())
+        .unwrap_or_else(new_trace_id);
+
+    let client = match reqwest::Client::builder()
+        .timeout(std::time::Duration::from_secs(LOOPBACK_TIMEOUT_SECS))
+        .build() {
+        Ok(c) => c,
+        Err(e) => {
+            // Even infrastructure failures get a session row so a
+            // missing /v1/iterate event never silently disappears
+            // from the longitudinal log.
+            write_infra_error(&state, &req, &trace_id, max_iter, 0, format!("client build: {e}")).await;
+            return (StatusCode::INTERNAL_SERVER_ERROR, format!("client build: {e}")).into_response();
+        }
+    };
+    // Self-loopback to the gateway port. Carries gateway internal
+    // calls through /v1/chat + /v1/validate so /v1/usage tracks them.
+    let gateway = "http://127.0.0.1:3100";
+    let t0 = std::time::Instant::now();
+
+    for iteration in 0..max_iter {
+        let attempt_started = chrono::Utc::now();
+        // ── Generate ──
+        let mut messages = Vec::with_capacity(2);
+        if let Some(sys) = &req.system {
+            messages.push(serde_json::json!({"role": "system", "content": sys}));
+        }
+        messages.push(serde_json::json!({"role": "user", "content": current_prompt}));
+        let chat_body = serde_json::json!({
+            "messages": messages,
+            "provider": req.provider,
+            "model": req.model,
+            "temperature": temperature,
+            "max_tokens": max_tokens,
+        });
+        let raw = match call_chat(&client, gateway, &chat_body, &trace_id).await {
+            Ok(r) => r,
+            Err(e) => {
+                write_infra_error(&state, &req, &trace_id, max_iter, t0.elapsed().as_millis() as u64, format!("/v1/chat hop failed at iter {iteration}: {e}")).await;
+                return (StatusCode::BAD_GATEWAY, format!("/v1/chat hop failed at iter {iteration}: {e}")).into_response();
+            }
+        };
+
+        // ── Extract JSON ──
+        let artifact = match extract_json(&raw) {
+            Some(a) => a,
+            None => {
+                let span_id = emit_attempt_span(
+                    &state, &trace_id, iteration, &req, &current_prompt, &raw, "no_json", None,
+                    attempt_started, chrono::Utc::now(),
+                );
+                history.push(IterateAttempt {
+                    iteration,
+                    raw: raw.chars().take(2000).collect(),
+                    status: AttemptStatus::NoJson,
+                });
+                attempt_records.push(super::session_log::SessionAttemptRecord {
+                    iteration,
+                    verdict_kind: "no_json".to_string(),
+                    error: None,
+                    span_id,
+                });
+                current_prompt = format!(
+                    "{}\n\nYour previous attempt did not contain a JSON object. Reply with ONLY a valid JSON object matching the requested artifact shape.",
+                    req.prompt,
+                );
+                continue;
+            }
+        };
+
+        // ── Validate ──
+        let validate_body = serde_json::json!({
+            "kind": req.kind,
+            "artifact": artifact,
+            "context": req.context.clone().unwrap_or(serde_json::Value::Null),
+        });
+        match call_validate(&client, gateway, &validate_body, &trace_id).await {
+            Ok(report) => {
+                let span_id = emit_attempt_span(
+                    &state, &trace_id, iteration, &req, &current_prompt, &raw, "accepted", None,
+                    attempt_started, chrono::Utc::now(),
+                );
+                history.push(IterateAttempt {
+                    iteration,
+                    raw: raw.chars().take(2000).collect(),
+                    status: AttemptStatus::Accepted,
+                });
+                attempt_records.push(super::session_log::SessionAttemptRecord {
+                    iteration,
+                    verdict_kind: "accepted".to_string(),
+                    error: None,
+                    span_id,
+                });
+                let duration_ms = t0.elapsed().as_millis() as u64;
+                let grounded = grounded_in_roster(&state, &req.kind, &artifact);
+                write_session_accepted(&state, &req, &trace_id, iteration + 1, max_iter, attempt_records, &artifact, grounded, duration_ms).await;
+                return (StatusCode::OK, Json(IterateResponse {
+                    artifact,
+                    validation: report,
+                    iterations: iteration + 1,
+                    history,
+                    trace_id: Some(trace_id.clone()),
+                })).into_response();
+            }
+            Err(err) => {
+                let err_summary = err.to_string();
+                let span_id = emit_attempt_span(
+                    &state, &trace_id, iteration, &req, &current_prompt, &raw, "validation_failed",
+                    Some(err_summary.clone()),
+                    attempt_started, chrono::Utc::now(),
+                );
+                history.push(IterateAttempt {
+                    iteration,
+                    raw: raw.chars().take(2000).collect(),
+                    status: AttemptStatus::ValidationFailed {
+                        error: serde_json::to_value(&err_summary).unwrap_or(serde_json::Value::Null),
+                    },
+                });
+                attempt_records.push(super::session_log::SessionAttemptRecord {
+                    iteration,
+                    verdict_kind: "validation_failed".to_string(),
+                    error: Some(err_summary.clone()),
+                    span_id,
+                });
+                // Append validation feedback to prompt for next iter.
+                // The model sees concrete failure mode + retries with
+                // corrective context. This is the "observer correction"
+                // in Phase 43 PRD shape, simplified — the validator
+                // itself IS the observer for now.
+                current_prompt = format!(
+                    "{}\n\nPrior attempt failed validation:\n{}\n\nFix the specific issue above and respond with a corrected JSON object.",
+                    req.prompt, err_summary,
+                );
+                continue;
+            }
+        }
+    }
+
+    let duration_ms = t0.elapsed().as_millis() as u64;
+    write_session_failure(&state, &req, &trace_id, max_iter, max_iter, attempt_records, duration_ms).await;
+    (StatusCode::UNPROCESSABLE_ENTITY, Json(IterateFailure {
+        error: format!("max iterations reached ({max_iter}) without passing validation"),
+        iterations: max_iter,
+        history,
+        trace_id: Some(trace_id.clone()),
+    })).into_response()
+}
+
+// ─── Helpers — Langfuse spans + session log + roster check ─────────
+
+fn emit_attempt_span(
+    state: &super::V1State,
+    trace_id: &str,
+    iteration: u32,
+    req: &IterateRequest,
+    prompt: &str,
+    raw: &str,
+    verdict: &str,
+    error: Option<String>,
+    started: chrono::DateTime<chrono::Utc>,
+    ended: chrono::DateTime<chrono::Utc>,
+) -> Option<String> {
+    let lf = state.langfuse.as_ref()?;
+    Some(lf.emit_attempt_span(super::langfuse_trace::AttemptSpan {
+        trace_id: trace_id.to_string(),
+        iteration,
+        model: req.model.clone(),
+        provider: req.provider.clone(),
+        prompt: prompt.to_string(),
+        raw: raw.to_string(),
+        verdict: verdict.to_string(),
+        error,
+        start_time: started.to_rfc3339(),
+        end_time: ended.to_rfc3339(),
+    }))
+}
+
+/// Verify every fill artifact's candidate IDs exist in the roster.
+/// Returns Some(true)/Some(false) on the fill kind, None otherwise
+/// (other kinds don't have worker IDs to ground). Same semantics as
+/// Go's `handlers.rosterCheckFor("fill")`.
+fn grounded_in_roster(
+    state: &super::V1State,
+    kind: &str,
+    artifact: &serde_json::Value,
+) -> Option<bool> {
+    if kind != "fill" {
+        return None;
+    }
+    let fills = artifact.get("fills").and_then(|v| v.as_array())?;
+    for f in fills {
+        let id = match f.get("candidate_id").and_then(|v| v.as_str()) {
+            Some(s) if !s.is_empty() => s,
+            _ => return Some(false),
+        };
+        if state.validate_workers.find(id).is_none() {
+            return Some(false);
+        }
+    }
+    Some(true)
+}
+
+async fn write_session_accepted(
+    state: &super::V1State,
+    req: &IterateRequest,
+    trace_id: &str,
+    iterations: u32,
+    max_iter: u32,
+    attempts: Vec<super::session_log::SessionAttemptRecord>,
+    artifact: &serde_json::Value,
+    grounded: Option<bool>,
+    duration_ms: u64,
+) {
+    let Some(logger) = state.session_log.as_ref() else { return };
+    let rec = build_session_record(req, trace_id, "accepted", iterations, max_iter, attempts, Some(artifact.clone()), grounded, duration_ms);
+    logger.append(rec).await;
+}
+
+async fn write_session_failure(
+    state: &super::V1State,
+    req: &IterateRequest,
+    trace_id: &str,
+    iterations: u32,
+    max_iter: u32,
+    attempts: Vec<super::session_log::SessionAttemptRecord>,
+    duration_ms: u64,
+) {
+    let Some(logger) = state.session_log.as_ref() else { return };
+    let rec = build_session_record(req, trace_id, "max_iter_exhausted", iterations, max_iter, attempts, None, None, duration_ms);
+    logger.append(rec).await;
+}
+
+async fn write_infra_error(
+    state: &super::V1State,
+    req: &IterateRequest,
+    trace_id: &str,
+    max_iter: u32,
+    duration_ms: u64,
+    error: String,
+) {
+    let Some(logger) = state.session_log.as_ref() else { return };
+    let attempts = vec![super::session_log::SessionAttemptRecord {
+        iteration: 0,
+        verdict_kind: "infra_error".to_string(),
+        error: Some(error),
+        span_id: None,
+    }];
+    let rec = build_session_record(req, trace_id, "infra_error", 0, max_iter, attempts, None, None, duration_ms);
+    logger.append(rec).await;
+}
+
+fn build_session_record(
+    req: &IterateRequest,
+    trace_id: &str,
+    final_verdict: &str,
+    iterations: u32,
+    max_iter: u32,
+    attempts: Vec<super::session_log::SessionAttemptRecord>,
+    artifact: Option<serde_json::Value>,
+    grounded: Option<bool>,
+    duration_ms: u64,
+) -> super::session_log::SessionRecord {
+    super::session_log::SessionRecord {
+        schema: super::session_log::SESSION_RECORD_SCHEMA.to_string(),
+        session_id: trace_id.to_string(),
+        timestamp: chrono::Utc::now().to_rfc3339(),
+        daemon: "gateway".to_string(),
+        kind: req.kind.clone(),
+        model: req.model.clone(),
+        provider: req.provider.clone(),
+        prompt: super::session_log::truncate(&req.prompt, 4000),
+        iterations,
+        max_iterations: max_iter,
+        final_verdict: final_verdict.to_string(),
+        attempts,
+        artifact,
+        grounded_in_roster: grounded,
+        duration_ms,
+    }
+}
+
+/// Generate a fresh trace id when no parent was forwarded. Same
+/// time-ordered hex shape Langfuse already accepts elsewhere in this
+/// crate (see `langfuse_trace::uuid_v7_like`).
+fn new_trace_id() -> String {
+    let ts = chrono::Utc::now().timestamp_nanos_opt().unwrap_or(0);
+    let rand = std::time::SystemTime::now()
+        .duration_since(std::time::UNIX_EPOCH)
+        .map(|d| d.subsec_nanos())
+        .unwrap_or(0);
+    format!("{:016x}-{:08x}", ts, rand)
+}
+
+async fn call_chat(client: &reqwest::Client, gateway: &str, body: &serde_json::Value, trace_id: &str) -> Result<String, String> {
+    let mut req = client.post(format!("{gateway}/v1/chat")).json(body);
+    if !trace_id.is_empty() {
+        req = req.header(TRACE_ID_HEADER, trace_id);
+    }
+    let resp = req.send().await.map_err(|e| format!("chat hop: {e}"))?;
+    let status = resp.status();
+    if !status.is_success() {
+        let body = resp.text().await.unwrap_or_default();
+        return Err(format!("chat {}: {}", status, body.chars().take(300).collect::<String>()));
+    }
+    let parsed: serde_json::Value = resp.json().await.map_err(|e| format!("chat parse: {e}"))?;
+    Ok(parsed.pointer("/choices/0/message/content")
+        .and_then(|v| v.as_str())
+        .unwrap_or("")
+        .to_string())
+}
+
+async fn call_validate(client: &reqwest::Client, gateway: &str, body: &serde_json::Value, trace_id: &str) -> Result<serde_json::Value, String> {
+    let mut req = client.post(format!("{gateway}/v1/validate")).json(body);
+    if !trace_id.is_empty() {
+        req = req.header(TRACE_ID_HEADER, trace_id);
+    }
+    let resp = req.send().await.map_err(|e| format!("validate hop: {e}"))?;
+    let status = resp.status();
+    let parsed: serde_json::Value = resp.json().await.map_err(|e| format!("validate parse: {e}"))?;
+    if status.is_success() {
+        Ok(parsed)
+    } else {
+        // The /v1/validate endpoint returns a ValidationError JSON
+        // on 422; surface its structure verbatim so the prompt-
+        // appending step gets specific failure detail.
+        Err(serde_json::to_string(&parsed).unwrap_or_else(|_| format!("validation {} (unparseable body)", status)))
+    }
+}
+
+/// Extract the first JSON object from a model's output. Handles
+/// fenced code blocks (```json ... ```), bare braces, and stray
+/// prose around the JSON. Returns None on no extractable object.
+///
+/// Made `pub` 2026-05-02 to support the cross-runtime parity probe
+/// at `golangLAKEHOUSE/scripts/cutover/parity/extract_json_parity.sh`.
+/// The Go counterpart lives at `internal/validator/iterate.go::ExtractJSON`;
+/// when either runtime's algorithm changes the parity probe surfaces
+/// the divergence.
+pub fn extract_json(raw: &str) -> Option<serde_json::Value> {
+    // Try fenced first.
+    let candidates: Vec<String> = {
+        let mut out = vec![];
+        let mut s = raw;
+        while let Some(start) = s.find("```") {
+            let after = &s[start + 3..];
+            // Skip optional language tag (json, etc.)
+            let body_start = after.find('\n').map(|n| n + 1).unwrap_or(0);
+            let body = &after[body_start..];
+            if let Some(end) = body.find("```") {
+                out.push(body[..end].trim().to_string());
+                s = &body[end + 3..];
+            } else { break; }
+        }
+        out
+    };
+    for c in &candidates {
+        if let Ok(v) = serde_json::from_str::<serde_json::Value>(c) {
+            if v.is_object() { return Some(v); }
+        }
+    }
+    // Fall back to outermost {...} balance.
+    let bytes = raw.as_bytes();
+    let mut depth = 0i32;
+    let mut start: Option<usize> = None;
+    for (i, &b) in bytes.iter().enumerate() {
+        match b {
+            b'{' => { if start.is_none() { start = Some(i); } depth += 1; }
+            b'}' => {
+                depth -= 1;
+                if depth == 0 {
+                    if let Some(s) = start {
+                        let slice = &raw[s..=i];
+                        if let Ok(v) = serde_json::from_str::<serde_json::Value>(slice) {
+                            if v.is_object() { return Some(v); }
+                        }
+                        start = None;
+                    }
+                }
+            }
+            _ => {}
+        }
+    }
+    None
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn extract_json_from_fenced_block() {
+        let raw = "Here's my answer:\n```json\n{\"fills\": [{\"candidate_id\": \"W-1\"}]}\n```\nDone.";
+        let v = extract_json(raw).unwrap();
+        assert!(v.get("fills").is_some());
+    }
+
+    #[test]
+    fn extract_json_from_bare_braces() {
+        let raw = "Here you go: {\"fills\": [{\"candidate_id\": \"W-2\"}]}";
+        let v = extract_json(raw).unwrap();
+        assert!(v.get("fills").is_some());
+    }
+
+    #[test]
+    fn extract_json_returns_none_on_no_object() {
+        assert!(extract_json("just prose, no json").is_none());
+    }
+
+    #[test]
+    fn extract_json_picks_first_balanced() {
+        let raw = "{\"a\":1} then {\"b\":2}";
+        let v = extract_json(raw).unwrap();
+        assert_eq!(v.get("a").and_then(|v| v.as_i64()), Some(1));
+    }
+}
--- a/crates/gateway/src/v1/kimi.rs
+++ b/crates/gateway/src/v1/kimi.rs
@ -0,0 +1,227 @@
+//! Kimi For Coding adapter — direct provider for `kimi-for-coding`
+//! (kimi-k2.6 underneath). Used when Ollama Cloud's `kimi-k2:1t` is
+//! returning sustained 5xx (broken upstream) and OpenRouter's
+//! `moonshotai/kimi-k2.6` is rate-limited.
+//!
+//! Endpoint per `kimi.com/code/docs` and `moonshotai.github.io/kimi-cli`:
+//!   base_url:  https://api.kimi.com/coding/v1
+//!   model id:  kimi-for-coding
+//!   auth:      Bearer sk-kimi-…
+//!   protocol:  OpenAI Chat Completions compatible
+//!
+//! IMPORTANT: `api.kimi.com` is a separate account system from
+//! `api.moonshot.ai` and `api.moonshot.cn`. Keys are NOT interchangeable.
+//! This adapter is for `sk-kimi-*` keys provisioned via the Kimi
+//! membership console only.
+//!
+//! Key sourcing priority:
+//!   1. Env var `KIMI_API_KEY` (loaded from /etc/lakehouse/kimi.env via
+//!      systemd EnvironmentFile=)
+//!   2. /etc/lakehouse/kimi.env directly (rescue path if env not loaded)
+//!
+//! First hit wins. Resolved once at gateway startup, stored on
+//! `V1State.kimi_key`.
+
+use std::time::Duration;
+use serde::{Deserialize, Serialize};
+
+use super::{ChatRequest, ChatResponse, Choice, Message, UsageBlock};
+
+const KIMI_BASE_URL: &str = "https://api.kimi.com/coding/v1";
+// Default 600s — kimi-for-coding is a reasoning model; on large
+// code-audit prompts (~50KB+ input + 8K output) it routinely needs
+// 3-8 min to think + emit. Override with KIMI_TIMEOUT_SECS env var.
+const KIMI_TIMEOUT_SECS_DEFAULT: u64 = 600;
+
+fn kimi_timeout_secs() -> u64 {
+    std::env::var("KIMI_TIMEOUT_SECS")
+        .ok()
+        .and_then(|s| s.trim().parse::<u64>().ok())
+        .filter(|&n| n > 0)
+        .unwrap_or(KIMI_TIMEOUT_SECS_DEFAULT)
+}
+
+pub fn resolve_kimi_key() -> Option<String> {
+    if let Ok(k) = std::env::var("KIMI_API_KEY") {
+        if !k.trim().is_empty() { return Some(k.trim().to_string()); }
+    }
+    if let Ok(raw) = std::fs::read_to_string("/etc/lakehouse/kimi.env") {
+        for line in raw.lines() {
+            if let Some(rest) = line.strip_prefix("KIMI_API_KEY=") {
+                let k = rest.trim().trim_matches('"').trim_matches('\'');
+                if !k.is_empty() { return Some(k.to_string()); }
+            }
+        }
+    }
+    None
+}
+
+pub async fn chat(
+    key: &str,
+    req: &ChatRequest,
+) -> Result<ChatResponse, String> {
+    // Strip the "kimi/" namespace prefix if the caller used it so the
+    // upstream API sees the bare model id (e.g. "kimi-for-coding").
+    let model = req.model.strip_prefix("kimi/").unwrap_or(&req.model).to_string();
+
+    // Flatten content to a plain String. api.kimi.com is text-only on
+    // the coding endpoint; the OpenAI multimodal array shape
+    // ([{type:"text",text:"..."},{type:"image_url",...}]) returns 400.
+    // Message::text() concats text-parts and drops non-text. Caught
+    // 2026-04-27 by Kimi's self-audit (kimi.rs:137 — content as raw
+    // serde_json::Value risked upstream rejection).
+    let body = KimiChatBody {
+        model: model.clone(),
+        messages: req.messages.iter().map(|m| KimiMessage {
+            role: m.role.clone(),
+            content: serde_json::Value::String(m.text()),
+        }).collect(),
+        max_tokens: req.max_tokens.unwrap_or(800),
+        temperature: req.temperature.unwrap_or(0.3),
+        stream: false,
+    };
+
+    let client = reqwest::Client::builder()
+        .timeout(Duration::from_secs(kimi_timeout_secs()))
+        .build()
+        .map_err(|e| format!("build client: {e}"))?;
+
+    let t0 = std::time::Instant::now();
+    let resp = client
+        .post(format!("{}/chat/completions", KIMI_BASE_URL))
+        .bearer_auth(key)
+        // api.kimi.com gates this endpoint by User-Agent — only sanctioned
+        // coding agents (Claude Code, Kimi CLI, Roo Code, Kilo Code) get
+        // through. Generic clients receive 403 access_terminated_error.
+        // J accepted the TOS risk on 2026-04-27; revisit if Moonshot
+        // tightens enforcement.
+        .header("User-Agent", "claude-code/1.0.0")
+        .json(&body)
+        .send()
+        .await
+        .map_err(|e| format!("api.kimi.com unreachable: {e}"))?;
+
+    let status = resp.status();
+    if !status.is_success() {
+        let body = resp.text().await.unwrap_or_else(|_| "?".into());
+        return Err(format!("api.kimi.com {}: {}", status, body));
+    }
+
+    let parsed: KimiChatResponse = resp.json().await
+        .map_err(|e| format!("invalid kimi response: {e}"))?;
+
+    let latency_ms = t0.elapsed().as_millis();
+    let choice = parsed.choices.into_iter().next()
+        .ok_or_else(|| "kimi returned no choices".to_string())?;
+    let text = choice.message.content;
+
+    let prompt_tokens = parsed.usage.as_ref().map(|u| u.prompt_tokens).unwrap_or_else(|| {
+        let chars: usize = req.messages.iter().map(|m| m.text().chars().count()).sum();
+        ((chars + 3) / 4) as u32
+    });
+    let completion_tokens = parsed.usage.as_ref().map(|u| u.completion_tokens).unwrap_or_else(|| {
+        ((text.chars().count() + 3) / 4) as u32
+    });
+
+    tracing::info!(
+        target: "v1.chat",
+        provider = "kimi",
+        model = %model,
+        prompt_tokens,
+        completion_tokens,
+        latency_ms = latency_ms as u64,
+        "kimi chat completed",
+    );
+
+    Ok(ChatResponse {
+        id: format!("chatcmpl-{}", chrono::Utc::now().timestamp_nanos_opt().unwrap_or(0)),
+        object: "chat.completion",
+        created: chrono::Utc::now().timestamp(),
+        model,
+        choices: vec![Choice {
+            index: 0,
+            message: Message { role: "assistant".into(), content: serde_json::Value::String(text) },
+            finish_reason: choice.finish_reason.unwrap_or_else(|| "stop".into()),
+        }],
+        usage: UsageBlock {
+            prompt_tokens,
+            completion_tokens,
+            total_tokens: prompt_tokens + completion_tokens,
+        },
+    })
+}
+
+// -- Kimi wire shapes (OpenAI-compatible) --
+
+#[derive(Serialize)]
+struct KimiChatBody {
+    model: String,
+    messages: Vec<KimiMessage>,
+    max_tokens: u32,
+    temperature: f64,
+    stream: bool,
+}
+
+#[derive(Serialize)]
+struct KimiMessage { role: String, content: serde_json::Value }
+
+#[derive(Deserialize)]
+struct KimiChatResponse {
+    choices: Vec<KimiChoice>,
+    #[serde(default)]
+    usage: Option<KimiUsage>,
+}
+
+#[derive(Deserialize)]
+struct KimiChoice {
+    message: KimiMessageResp,
+    #[serde(default)]
+    finish_reason: Option<String>,
+}
+
+#[derive(Deserialize)]
+struct KimiMessageResp { content: String }
+
+#[derive(Deserialize)]
+struct KimiUsage { prompt_tokens: u32, completion_tokens: u32 }
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn resolve_kimi_key_does_not_panic() {
+        let _ = resolve_kimi_key();
+    }
+
+    #[test]
+    fn chat_body_serializes_to_openai_shape() {
+        let body = KimiChatBody {
+            model: "kimi-for-coding".into(),
+            messages: vec![
+                KimiMessage { role: "user".into(), content: "review this".into() },
+            ],
+            max_tokens: 800,
+            temperature: 0.3,
+            stream: false,
+        };
+        let json = serde_json::to_string(&body).unwrap();
+        assert!(json.contains("\"model\":\"kimi-for-coding\""));
+        assert!(json.contains("\"messages\""));
+        assert!(json.contains("\"max_tokens\":800"));
+        assert!(json.contains("\"stream\":false"));
+    }
+
+    #[test]
+    fn model_prefix_strip() {
+        let cases = [
+            ("kimi/kimi-for-coding", "kimi-for-coding"),
+            ("kimi-for-coding", "kimi-for-coding"),
+            ("kimi/kimi-k2.6", "kimi-k2.6"),
+        ];
+        for (input, expected) in cases {
+            let out = input.strip_prefix("kimi/").unwrap_or(input);
+            assert_eq!(out, expected, "{input} should become {expected}");
+        }
+    }
+}
--- a/crates/gateway/src/v1/langfuse_trace.rs
+++ b/crates/gateway/src/v1/langfuse_trace.rs
@ -76,63 +76,54 @@ impl LangfuseClient {
        });
    }

-    async fn emit_chat_inner(&self, ev: ChatTrace) -> Result<(), String> {
-        let trace_id = uuid_v7_like();
-        let gen_id = uuid_v7_like();
-        let trace_ts = ev.start_time.clone();
+    /// Fire-and-forget per-iteration span emit. Returns the generated
+    /// span id synchronously so the caller can stamp it on
+    /// `IterateAttempt.span_id` before the network round-trip resolves.
+    /// Mirrors Go's `validator.Tracer` callback shape.
+    pub fn emit_attempt_span(&self, sp: AttemptSpan) -> String {
+        let span_id = uuid_v7_like();
+        let span_id_for_caller = span_id.clone();
+        let this = self.clone();
+        tokio::spawn(async move {
+            if let Err(e) = this.emit_attempt_span_inner(span_id, sp).await {
+                tracing::warn!(target: "v1.langfuse", "iterate span drop: {e}");
+            }
+        });
+        span_id_for_caller
+    }

+    async fn emit_attempt_span_inner(&self, span_id: String, sp: AttemptSpan) -> Result<(), String> {
+        let level = if sp.verdict == "accepted" { "DEFAULT" } else { "WARNING" };
        let batch = IngestionBatch {
-            batch: vec![
-                IngestionEvent {
-                    id: uuid_v7_like(),
-                    timestamp: trace_ts.clone(),
-                    kind: "trace-create",
-                    body: serde_json::json!({
-                        "id": trace_id,
-                        "name": format!("v1.chat:{}", ev.provider),
-                        "input": serde_json::json!({
-                            "model": ev.model,
-                            "messages": ev.input,
-                        }),
-                        "metadata": serde_json::json!({
-                            "provider": ev.provider,
-                            "think": ev.think,
-                        }),
+            batch: vec![IngestionEvent {
+                id: uuid_v7_like(),
+                timestamp: sp.end_time.clone(),
+                kind: "span-create",
+                body: serde_json::json!({
+                    "id": span_id,
+                    "traceId": sp.trace_id,
+                    "name": format!("iterate.attempt[{}]", sp.iteration),
+                    "input": serde_json::json!({
+                        "iteration": sp.iteration,
+                        "model": sp.model,
+                        "provider": sp.provider,
+                        "prompt": truncate(&sp.prompt, 4000),
                    }),
-                },
-                IngestionEvent {
-                    id: uuid_v7_like(),
-                    timestamp: ev.end_time.clone(),
-                    kind: "generation-create",
-                    body: serde_json::json!({
-                        "id": gen_id,
-                        "traceId": trace_id,
-                        "name": "chat",
-                        "model": ev.model,
-                        "modelParameters": serde_json::json!({
-                            "temperature": ev.temperature,
-                            "max_tokens": ev.max_tokens,
-                            "think": ev.think,
-                        }),
-                        "input": ev.input,
-                        "output": ev.output,
-                        "usage": serde_json::json!({
-                            "input": ev.prompt_tokens,
-                            "output": ev.completion_tokens,
-                            "total": ev.prompt_tokens + ev.completion_tokens,
-                            "unit": "TOKENS",
-                        }),
-                        "startTime": ev.start_time,
-                        "endTime": ev.end_time,
-                        "metadata": serde_json::json!({
-                            "provider": ev.provider,
-                            "latency_ms": ev.latency_ms,
-                        }),
+                    "output": serde_json::json!({
+                        "verdict": sp.verdict,
+                        "error": sp.error,
+                        "raw": truncate(&sp.raw, 4000),
                    }),
-                },
-            ],
+                    "level": level,
+                    "startTime": sp.start_time,
+                    "endTime": sp.end_time,
+                }),
+            }],
        };
+        self.post_batch(batch).await
+    }

+    async fn post_batch(&self, batch: IngestionBatch) -> Result<(), String> {
        let url = format!("{}{}", self.inner.base_url.trim_end_matches('/'), INGESTION_PATH);
        let resp = self.inner.http
            .post(url)
@ -146,6 +137,81 @@ impl LangfuseClient {
        }
        Ok(())
    }
+
+    async fn emit_chat_inner(&self, ev: ChatTrace) -> Result<(), String> {
+        // When the caller forwarded a parent trace id (via the
+        // X-Lakehouse-Trace-Id header → V1State plumbing), attach the
+        // generation as a child of that trace. Without a parent we
+        // mint a new top-level trace per call (Phase 40 default).
+        let trace_id = ev.parent_trace_id.clone().unwrap_or_else(uuid_v7_like);
+        let nested = ev.parent_trace_id.is_some();
+        let gen_id = uuid_v7_like();
+        let trace_ts = ev.start_time.clone();
+
+        let mut events = Vec::with_capacity(2);
+        if !nested {
+            // Only mint a fresh trace-create when we don't have a parent.
+            // Reusing a parent trace id without re-creating it is the
+            // contract that lets validatord's iterate-session show up
+            // as one tree in Langfuse.
+            events.push(IngestionEvent {
+                id: uuid_v7_like(),
+                timestamp: trace_ts.clone(),
+                kind: "trace-create",
+                body: serde_json::json!({
+                    "id": trace_id,
+                    "name": format!("v1.chat:{}", ev.provider),
+                    "input": serde_json::json!({
+                        "model": ev.model,
+                        "messages": ev.input,
+                    }),
+                    "metadata": serde_json::json!({
+                        "provider": ev.provider,
+                        "think": ev.think,
+                    }),
+                }),
+            });
+        }
+        events.push(IngestionEvent {
+            id: uuid_v7_like(),
+            timestamp: ev.end_time.clone(),
+            kind: "generation-create",
+            body: serde_json::json!({
+                "id": gen_id,
+                "traceId": trace_id,
+                "name": "chat",
+                "model": ev.model,
+                "modelParameters": serde_json::json!({
+                    "temperature": ev.temperature,
+                    "max_tokens": ev.max_tokens,
+                    "think": ev.think,
+                }),
+                "input": ev.input,
+                "output": ev.output,
+                "usage": serde_json::json!({
+                    "input": ev.prompt_tokens,
+                    "output": ev.completion_tokens,
+                    "total": ev.prompt_tokens + ev.completion_tokens,
+                    "unit": "TOKENS",
+                }),
+                "startTime": ev.start_time,
+                "endTime": ev.end_time,
+                "metadata": serde_json::json!({
+                    "provider": ev.provider,
+                    "latency_ms": ev.latency_ms,
+                }),
+            }),
+        });
+
+        self.post_batch(IngestionBatch { batch: events }).await
+    }
+}
+
+/// Truncate a string to at most `n` chars (NOT bytes). Matches the Go
+/// `trim` helper used in session log + attempt-span emission so an
+/// operator reading two cross-runtime traces sees the same boundary.
+fn truncate(s: &str, n: usize) -> String {
+    s.chars().take(n).collect()
 }

 /// Everything the v1.chat handler collects for one completed call.
@ -162,6 +228,32 @@ pub struct ChatTrace {
    pub start_time: String,
    pub end_time: String,
    pub latency_ms: u64,
+    /// When set, attach this chat trace as a child of the named
+    /// Langfuse trace instead of starting a new top-level trace. Used
+    /// by `/v1/iterate` to nest its inner /v1/chat hops under the
+    /// iterate-session trace so a multi-call session shows in
+    /// Langfuse as ONE trace tree, not N+1 disconnected traces.
+    /// Matches the Go-side `X-Lakehouse-Trace-Id` propagation
+    /// (commit d6d2fdf in golangLAKEHOUSE).
+    pub parent_trace_id: Option<String>,
+}
+
+/// One iteration attempt inside `/v1/iterate`'s loop. Becomes one
+/// span on the parent trace when emitted via `emit_attempt_span`.
+/// Matches Go's `validator.AttemptSpan` shape so the cross-runtime
+/// observability surface is consistent.
+pub struct AttemptSpan {
+    pub trace_id: String,
+    pub iteration: u32,
+    pub model: String,
+    pub provider: String,
+    pub prompt: String,
+    pub raw: String,
+    /// Verdict kind: "no_json" | "validation_failed" | "accepted"
+    pub verdict: String,
+    pub error: Option<String>,
+    pub start_time: String,
+    pub end_time: String,
 }

 #[derive(Serialize)]
--- a/crates/gateway/src/v1/mod.rs
+++ b/crates/gateway/src/v1/mod.rs
@ -13,7 +13,18 @@

 pub mod ollama;
 pub mod ollama_cloud;
+pub mod openrouter;
+pub mod gemini;
+pub mod claude;
+pub mod kimi;
+pub mod opencode;
+pub mod validate;
+pub mod iterate;
 pub mod langfuse_trace;
+pub mod session_log;
+pub mod mode;
+pub mod respond;
+pub mod truth;

 use axum::{
    Router,
@ -34,10 +45,52 @@ pub struct V1State {
    /// Ollama Cloud bearer token. Loaded at startup via
    /// `ollama_cloud::resolve_cloud_key()`. None = cloud routes 503.
    pub ollama_cloud_key: Option<String>,
+    /// OpenRouter bearer token — free-tier rescue rung. Loaded at
+    /// startup via `openrouter::resolve_openrouter_key()`. None means
+    /// provider="openrouter" calls 503 rather than attempt. Same key
+    /// sourcing as LLM Team UI so the two share one API quota.
+    pub openrouter_key: Option<String>,
+    /// Gemini API key (Google Generative Language). Loaded at startup
+    /// via `gemini::resolve_gemini_key()`. None = provider="gemini"
+    /// calls 503. Phase 40 deliverable.
+    pub gemini_key: Option<String>,
+    /// Anthropic Claude API key. Loaded at startup via
+    /// `claude::resolve_claude_key()`. None = provider="claude" calls
+    /// 503. Phase 40 deliverable.
+    pub claude_key: Option<String>,
+    /// Kimi For Coding (api.kimi.com) bearer token — direct provider
+    /// for `kimi-for-coding`. Used when Ollama Cloud's `kimi-k2:1t` is
+    /// upstream-broken. Loaded at startup via `kimi::resolve_kimi_key()`
+    /// from `KIMI_API_KEY` env or `/etc/lakehouse/kimi.env`. None =
+    /// provider="kimi" calls 503.
+    pub kimi_key: Option<String>,
+    /// OpenCode GO (opencode.ai) bearer token — multi-vendor curated
+    /// gateway. One sk-* key reaches Claude Opus 4.7, GPT-5.5-pro,
+    /// Gemini 3.1-pro, Kimi K2.6, DeepSeek, GLM, Qwen + free-tier.
+    /// Loaded at startup via `opencode::resolve_opencode_key()` from
+    /// `OPENCODE_API_KEY` env or `/etc/lakehouse/opencode.env`. None =
+    /// provider="opencode" calls 503.
+    pub opencode_key: Option<String>,
+    /// Shared WorkerLookup loaded once at startup from
+    /// workers_500k.parquet (path: LH_WORKERS_PARQUET env, default
+    /// data/datasets/workers_500k.parquet). Used by /v1/validate to
+    /// run FillValidator/EmailValidator with worker-existence checks.
+    /// Falls back to an empty InMemoryWorkerLookup if the file is
+    /// missing — validators still run schema/PII checks but every
+    /// worker-existence check fails (Consistency error), which is
+    /// the correct behavior when the roster isn't configured.
+    pub validate_workers: std::sync::Arc<dyn validator::WorkerLookup>,
    /// Phase 40 early deliverable — Langfuse client. None = tracing
    /// disabled (keys missing or container unreachable). Traces are
    /// fire-and-forget: never block the response path.
    pub langfuse: Option<langfuse_trace::LangfuseClient>,
+    /// Coordinator session JSONL writer (path from
+    /// `[gateway].session_log_path`). One row per `/v1/iterate`
+    /// session for offline DuckDB analysis. None = disabled.
+    /// Cross-runtime parity with the Go-side `validatord`
+    /// `[validatord].session_log_path` (commit 1a3a82a in
+    /// golangLAKEHOUSE).
+    pub session_log: Option<session_log::SessionLogger>,
 }

 #[derive(Default, Clone, Serialize)]
@ -46,25 +99,88 @@ pub struct Usage {
    pub prompt_tokens: u64,
    pub completion_tokens: u64,
    pub total_tokens: u64,
+    #[serde(default)]
+    pub by_provider: std::collections::HashMap<String, ProviderUsage>,
+}
+
+#[derive(Default, Clone, Serialize)]
+pub struct ProviderUsage {
+    pub requests: u64,
+    pub prompt_tokens: u64,
+    pub completion_tokens: u64,
+    pub total_tokens: u64,
 }

 pub fn router(state: V1State) -> Router {
    Router::new()
        .route("/chat", post(chat))
+        // Canonical OpenAI path alias — lets any client built on the
+        // openai SDK (pi-ai, langchain-js, etc.) treat the gateway as
+        // a drop-in middleware via OPENAI_BASE_URL=http://gw/v1 alone.
+        // Same handler as /chat; same OpenAI-compatible request shape.
+        .route("/chat/completions", post(chat))
+        .route("/respond", post(respond::respond))
        .route("/usage", get(usage))
        .route("/sessions", get(sessions))
+        .route("/context", get(truth::context))
+        .route("/mode", post(mode::route))
+        .route("/mode/list", get(mode::list))
+        .route("/mode/execute", post(mode::execute))
+        .route("/validate", post(validate::validate))
+        .route("/iterate", post(iterate::iterate))
+        .route("/health", get(health))
        .with_state(state)
 }

 // -- Shared types (OpenAI-compatible) --

+/// OpenAI-compatible message. `content` accepts either a plain string or
+/// an array of content parts (the modern multimodal shape:
+/// `[{type:"text", text:"..."}, {type:"image_url", ...}]`). We store as
+/// `serde_json::Value` to preserve client shape on forward; downstream
+/// providers can take it verbatim. `Message::text()` flattens for
+/// places that need a plain string (Ollama prompt assembly, char
+/// counts, the assistant's own response synthesis).
 #[derive(Serialize, Deserialize, Clone, Debug)]
 pub struct Message {
    pub role: String,
-    pub content: String,
+    pub content: serde_json::Value,
 }

-#[derive(Deserialize, Debug)]
+impl Message {
+    /// Construct a plain text message — the common shape for callers
+    /// that don't need multimodal content. Wraps the body in
+    /// `serde_json::Value::String` so downstream serializers see the
+    /// canonical OpenAI shape.
+    pub fn new_text(role: impl Into<String>, body: impl Into<String>) -> Self {
+        Self {
+            role: role.into(),
+            content: serde_json::Value::String(body.into()),
+        }
+    }
+    /// Flatten content to a plain string. Strings pass through; content-
+    /// part arrays concatenate the `text` fields with newlines and skip
+    /// non-text parts (images etc.) — Phase 38/39 callers are text-only,
+    /// real multimodal forwarding is queued.
+    pub fn text(&self) -> String {
+        match &self.content {
+            serde_json::Value::String(s) => s.clone(),
+            serde_json::Value::Array(parts) => {
+                let mut out = String::new();
+                for p in parts {
+                    if let Some(t) = p.get("text").and_then(|v| v.as_str()) {
+                        if !out.is_empty() { out.push('\n'); }
+                        out.push_str(t);
+                    }
+                }
+                out
+            }
+            other => other.to_string(),
+        }
+    }
+}
+
+#[derive(Deserialize, Debug, Clone)]
 pub struct ChatRequest {
    pub model: String,
    pub messages: Vec<Message>,
@ -120,8 +236,140 @@ pub struct UsageBlock {

 // -- Handlers --

+/// Phase 39: resolve (provider, effective_model) from a ChatRequest.
+///
+/// Explicit `req.provider` wins. If absent, infer from a model-name
+/// prefix: "openrouter/..." → openrouter (strip prefix), "cloud/..." →
+/// ollama_cloud (strip prefix). Bare names default to "ollama".
+///
+/// The stripped model is what the upstream adapter expects:
+/// OpenRouter's API wants "openai/gpt-4o-mini", not
+/// "openrouter/openai/gpt-4o-mini".
+fn resolve_provider(req: &ChatRequest) -> (String, String) {
+    if let Some(p) = req.provider.as_deref() {
+        return (p.to_ascii_lowercase(), req.model.clone());
+    }
+    if let Some(rest) = req.model.strip_prefix("openrouter/") {
+        return ("openrouter".to_string(), rest.to_string());
+    }
+    if let Some(rest) = req.model.strip_prefix("cloud/") {
+        return ("ollama_cloud".to_string(), rest.to_string());
+    }
+    if let Some(rest) = req.model.strip_prefix("gemini/") {
+        return ("gemini".to_string(), rest.to_string());
+    }
+    if let Some(rest) = req.model.strip_prefix("claude/") {
+        return ("claude".to_string(), rest.to_string());
+    }
+    if let Some(rest) = req.model.strip_prefix("kimi/") {
+        return ("kimi".to_string(), rest.to_string());
+    }
+    if let Some(rest) = req.model.strip_prefix("opencode/") {
+        return ("opencode".to_string(), rest.to_string());
+    }
+    // Bare `vendor/model` shape (e.g. `x-ai/grok-4.1-fast`,
+    // `moonshotai/kimi-k2`, `openai/gpt-oss-120b:free`) → OpenRouter.
+    // This makes the gateway a drop-in OpenAI-compatible middleware:
+    // clients using the official `openai` SDK only set OPENAI_BASE_URL
+    // + a model name and get correct upstream routing without needing
+    // our custom `provider` field. Ollama models in J's stack use
+    // `model:tag` form with NO slash (`qwen3.5:latest`, `kimi-k2:1t`),
+    // so a slash here unambiguously means "namespaced provider/model".
+    if req.model.contains('/') {
+        return ("openrouter".to_string(), req.model.clone());
+    }
+    // Vendor-bare model names (no slash, no colon) — `gpt-4o-mini`,
+    // `claude-3-5-sonnet-20241022`, etc. Tools like pi-ai validate
+    // models against an OpenAI-style catalog (no namespace prefix),
+    // so they send the bare name. Map to OpenRouter's namespaced form
+    // by inferring the vendor from the leading token. Falls through to
+    // ollama if no pattern matches — preserves existing behavior.
+    if !req.model.contains(':') && !req.model.contains('/') {
+        let m = req.model.as_str();
+        if m.starts_with("gpt-") || m.starts_with("o1-") || m.starts_with("o3-") || m.starts_with("o4-") || m == "o1" || m == "o3" || m == "o4-mini" {
+            return ("openrouter".to_string(), format!("openai/{}", m));
+        }
+        if m.starts_with("claude-") {
+            return ("openrouter".to_string(), format!("anthropic/{}", m));
+        }
+        if m.starts_with("grok-") {
+            return ("openrouter".to_string(), format!("x-ai/{}", m));
+        }
+    }
+    ("ollama".to_string(), req.model.clone())
+}
+
+#[cfg(test)]
+mod resolve_provider_tests {
+    use super::*;
+
+    fn mk_req(provider: Option<&str>, model: &str) -> ChatRequest {
+        ChatRequest {
+            model: model.to_string(),
+            messages: vec![],
+            temperature: None,
+            max_tokens: None,
+            stream: None,
+            think: None,
+            provider: provider.map(|s| s.to_string()),
+        }
+    }
+
+    #[test]
+    fn explicit_provider_wins() {
+        let r = mk_req(Some("openrouter"), "qwen3.5:latest");
+        assert_eq!(resolve_provider(&r), ("openrouter".into(), "qwen3.5:latest".into()));
+    }
+
+    #[test]
+    fn bare_model_defaults_to_ollama() {
+        let r = mk_req(None, "qwen3.5:latest");
+        assert_eq!(resolve_provider(&r), ("ollama".into(), "qwen3.5:latest".into()));
+    }
+
+    #[test]
+    fn openrouter_prefix_infers_and_strips() {
+        let r = mk_req(None, "openrouter/openai/gpt-4o-mini");
+        assert_eq!(resolve_provider(&r), ("openrouter".into(), "openai/gpt-4o-mini".into()));
+    }
+
+    #[test]
+    fn cloud_prefix_infers_and_strips() {
+        let r = mk_req(None, "cloud/kimi-k2:1t");
+        assert_eq!(resolve_provider(&r), ("ollama_cloud".into(), "kimi-k2:1t".into()));
+    }
+
+    #[test]
+    fn explicit_provider_preserves_full_model_even_with_prefix() {
+        // If caller provides both provider and a model with a prefix,
+        // trust them — don't strip. The adapter will get the full model
+        // string as-is.
+        let r = mk_req(Some("openrouter"), "openrouter/openai/gpt-4o-mini");
+        assert_eq!(resolve_provider(&r), ("openrouter".into(), "openrouter/openai/gpt-4o-mini".into()));
+    }
+
+    #[test]
+    fn gemini_prefix_infers_and_strips() {
+        let r = mk_req(None, "gemini/gemini-2.0-flash");
+        assert_eq!(resolve_provider(&r), ("gemini".into(), "gemini-2.0-flash".into()));
+    }
+
+    #[test]
+    fn claude_prefix_infers_and_strips() {
+        let r = mk_req(None, "claude/claude-3-5-sonnet-latest");
+        assert_eq!(resolve_provider(&r), ("claude".into(), "claude-3-5-sonnet-latest".into()));
+    }
+
+    #[test]
+    fn kimi_prefix_infers_and_strips() {
+        let r = mk_req(None, "kimi/kimi-for-coding");
+        assert_eq!(resolve_provider(&r), ("kimi".into(), "kimi-for-coding".into()));
+    }
+}
+
 async fn chat(
    State(state): State<V1State>,
+    headers: axum::http::HeaderMap,
    Json(req): Json<ChatRequest>,
 ) -> Result<Json<ChatResponse>, (StatusCode, String)> {
    if req.messages.is_empty() {
@ -131,27 +379,111 @@ async fn chat(
        tracing::warn!("/v1/chat: stream=true requested but Phase 38 returns non-streaming");
    }

-    let provider = req.provider.as_deref().unwrap_or("ollama").to_ascii_lowercase();
+    // Provider resolution: explicit `req.provider` wins; otherwise
+    // infer from a model-name prefix. Phase 39 PRD gate example:
+    // `model: "openrouter/openai/gpt-4o-mini"` → provider "openrouter",
+    // adapter gets the stripped "openai/gpt-4o-mini".
+    let (provider, effective_model) = resolve_provider(&req);
    let start_time = chrono::Utc::now();
    let start_instant = std::time::Instant::now();

-    let resp = match provider.as_str() {
-        "ollama" | "local" | "" => ollama::chat(&state.ai_client, &req)
-            .await
-            .map_err(|e| (StatusCode::BAD_GATEWAY, format!("ollama local: {e}")))?,
+    // If we stripped a prefix, clone req with the effective model so
+    // the adapter sees what the upstream provider expects (OpenRouter
+    // wants "openai/gpt-4o-mini", not "openrouter/openai/gpt-4o-mini").
+    let req_for_adapter: std::borrow::Cow<'_, ChatRequest> =
+        if effective_model == req.model {
+            std::borrow::Cow::Borrowed(&req)
+        } else {
+            let mut cloned = req.clone();
+            cloned.model = effective_model.clone();
+            std::borrow::Cow::Owned(cloned)
+        };
+
+    let (resp, used_provider) = match provider.as_str() {
+        "ollama" | "local" | "" => {
+            let r = ollama::chat(&state.ai_client, &*req_for_adapter)
+                .await
+                .map_err(|e| (StatusCode::BAD_GATEWAY, format!("ollama local: {e}")))?;
+            (r, "ollama".to_string())
+        }
        "ollama_cloud" | "cloud" => {
            let key = state.ollama_cloud_key.as_deref().ok_or((
                StatusCode::SERVICE_UNAVAILABLE,
-                "OLLAMA_CLOUD_KEY not configured — set env or populate /root/llm_team_config.json".to_string(),
+                "OLLAMA_CLOUD_KEY not configured".to_string(),
            ))?;
-            ollama_cloud::chat(key, &req)
+            let r = ollama_cloud::chat(key, &*req_for_adapter)
                .await
-                .map_err(|e| (StatusCode::BAD_GATEWAY, format!("ollama cloud: {e}")))?
+                .map_err(|e| (StatusCode::BAD_GATEWAY, format!("ollama cloud: {e}")))?;
+            (r, "ollama_cloud".to_string())
+        }
+        "openrouter" | "openrouter_free" => {
+            // Free-tier rescue rung. Added 2026-04-24 after iter 5
+            // repeated Ollama Cloud 502s on kimi-k2:1t — OpenRouter
+            // gives a different provider backbone as fallback.
+            let key = state.openrouter_key.as_deref().ok_or((
+                StatusCode::SERVICE_UNAVAILABLE,
+                "OPENROUTER_API_KEY not configured".to_string(),
+            ))?;
+            let r = openrouter::chat(key, &*req_for_adapter)
+                .await
+                .map_err(|e| (StatusCode::BAD_GATEWAY, format!("openrouter: {e}")))?;
+            (r, "openrouter".to_string())
+        }
+        "gemini" => {
+            // Phase 40 provider adapter. Google Generative Language
+            // API via query-string key auth (not bearer).
+            let key = state.gemini_key.as_deref().ok_or((
+                StatusCode::SERVICE_UNAVAILABLE,
+                "GEMINI_API_KEY not configured".to_string(),
+            ))?;
+            let r = gemini::chat(key, &*req_for_adapter)
+                .await
+                .map_err(|e| (StatusCode::BAD_GATEWAY, format!("gemini: {e}")))?;
+            (r, "gemini".to_string())
+        }
+        "claude" | "anthropic" => {
+            // Phase 40 provider adapter. Anthropic Messages API via
+            // x-api-key header + anthropic-version:2023-06-01.
+            let key = state.claude_key.as_deref().ok_or((
+                StatusCode::SERVICE_UNAVAILABLE,
+                "ANTHROPIC_API_KEY not configured".to_string(),
+            ))?;
+            let r = claude::chat(key, &*req_for_adapter)
+                .await
+                .map_err(|e| (StatusCode::BAD_GATEWAY, format!("claude: {e}")))?;
+            (r, "claude".to_string())
+        }
+        "kimi" => {
+            // Direct Kimi For Coding provider — bypasses Ollama Cloud's
+            // upstream-broken kimi-k2:1t and OpenRouter's rate-limited
+            // moonshotai/kimi-k2.6. Uses sk-kimi-* keys from the Kimi
+            // membership console.
+            let key = state.kimi_key.as_deref().ok_or((
+                StatusCode::SERVICE_UNAVAILABLE,
+                "KIMI_API_KEY not configured".to_string(),
+            ))?;
+            let r = kimi::chat(key, &*req_for_adapter)
+                .await
+                .map_err(|e| (StatusCode::BAD_GATEWAY, format!("kimi: {e}")))?;
+            (r, "kimi".to_string())
+        }
+        "opencode" => {
+            // OpenCode GO multi-vendor gateway — Claude Opus 4.7,
+            // GPT-5.5-pro, Gemini 3.1-pro, Kimi K2.6, DeepSeek, GLM,
+            // Qwen, free-tier. OpenAI-compat at opencode.ai/zen/go/v1.
+            let key = state.opencode_key.as_deref().ok_or((
+                StatusCode::SERVICE_UNAVAILABLE,
+                "OPENCODE_API_KEY not configured".to_string(),
+            ))?;
+            let r = opencode::chat(key, &*req_for_adapter)
+                .await
+                .map_err(|e| (StatusCode::BAD_GATEWAY, format!("opencode: {e}")))?;
+            (r, "opencode".to_string())
        }
        other => {
            return Err((
                StatusCode::BAD_REQUEST,
-                format!("unknown provider '{other}' — supported: ollama, ollama_cloud"),
+                format!("unknown provider '{other}' — supported: ollama, ollama_cloud, openrouter, gemini, claude, kimi, opencode"),
            ));
        }
    };
@ -165,10 +497,21 @@ async fn chat(
    // untouched.
    if let Some(lf) = &state.langfuse {
        let output = resp.choices.first()
-            .map(|c| c.message.content.clone())
+            .map(|c| c.message.text())
            .unwrap_or_default();
+        // Cross-runtime trace linkage. When a caller (validatord on
+        // Go side, /v1/iterate on Rust side) forwards a parent trace
+        // id via X-Lakehouse-Trace-Id, attach this generation to that
+        // trace so the iterate session and its inner chat hops show
+        // up as ONE trace tree in Langfuse. Header name matches the
+        // Go-side `shared.TraceIDHeader` constant byte-for-byte.
+        let parent_trace_id = headers
+            .get(crate::v1::iterate::TRACE_ID_HEADER)
+            .and_then(|v| v.to_str().ok())
+            .map(|s| s.to_string())
+            .filter(|s| !s.is_empty());
        lf.emit_chat(langfuse_trace::ChatTrace {
-            provider: provider.clone(),
+            provider: used_provider.clone(),
            model: resp.model.clone(),
            input: req.messages.clone(),
            output,
@ -180,15 +523,63 @@ async fn chat(
            start_time: start_time.to_rfc3339(),
            end_time: end_time.to_rfc3339(),
            latency_ms,
+            parent_trace_id,
        });
    }

+    // Phase 40 part 2 — fire-and-forget /event to observer at :3800.
+    // Same ring-buffer that scrum + scenario events land in, so any
+    // tool-routed-through-our-gateway (Pi, Archon, openai SDK clients)
+    // shows up alongside scrum_master events for KB consolidation +
+    // pathway-memory + bug-fingerprint compounding. Best-effort:
+    // observer being down doesn't block the chat response.
+    {
+        let provider = used_provider.clone();
+        let model = resp.model.clone();
+        let prompt_tokens = resp.usage.prompt_tokens;
+        let completion_tokens = resp.usage.completion_tokens;
+        let success = true;
+        tokio::spawn(async move {
+            let body = serde_json::json!({
+                "endpoint": "/v1/chat",
+                "source": "v1.chat",
+                "event_kind": "chat_completion",
+                "input_summary": format!(
+                    "{} {} prompt={}t",
+                    provider, model, prompt_tokens
+                ),
+                "output_summary": format!(
+                    "completion={}t {}ms",
+                    completion_tokens, latency_ms
+                ),
+                "success": success,
+                "duration_ms": latency_ms,
+            });
+            let client = reqwest::Client::builder()
+                .timeout(std::time::Duration::from_secs(2))
+                .build()
+                .unwrap_or_else(|_| reqwest::Client::new());
+            let _ = client
+                .post("http://localhost:3800/event")
+                .json(&body)
+                .send()
+                .await;
+        });
+    }
+
+    // Phase 40: per-provider usage tracking
    {
        let mut u = state.usage.write().await;
        u.requests += 1;
        u.prompt_tokens += resp.usage.prompt_tokens as u64;
        u.completion_tokens += resp.usage.completion_tokens as u64;
        u.total_tokens += resp.usage.total_tokens as u64;
+
+        let provider_usage = u.by_provider.entry(used_provider).or_default();
+        provider_usage.requests += 1;
+        provider_usage.prompt_tokens += resp.usage.prompt_tokens as u64;
+        provider_usage.completion_tokens += resp.usage.completion_tokens as u64;
+        provider_usage.total_tokens += resp.usage.total_tokens as u64;
    }

    Ok(Json(resp))
@ -199,6 +590,43 @@ async fn usage(State(state): State<V1State>) -> impl IntoResponse {
    Json(snapshot)
 }

+/// Production operational health endpoint.
+///
+/// `/v1/health` reports per-subsystem status as a JSON object so an
+/// operator (or the lakehouse-auditor service, or a load balancer)
+/// can verify the gateway is fully booted, has its provider keys
+/// loaded, the worker roster is hot, and Langfuse is reachable.
+/// Returns 200 always — fields are observed-state, not pass/fail
+/// gates. A monitoring tool should evaluate the booleans + counts
+/// against its own thresholds.
+async fn health(State(state): State<V1State>) -> impl IntoResponse {
+    // Honest worker count via WorkerLookup::len. Production switchover
+    // verification: after swapping workers_500k.parquet → real Chicago
+    // data and restarting, this number should match the row count of
+    // the new file. 0 means the file was missing / unreadable / had a
+    // schema mismatch and the gateway booted with the empty fallback.
+    let workers_count = state.validate_workers.len();
+    let providers_configured = serde_json::json!({
+        "ollama_cloud": state.ollama_cloud_key.is_some(),
+        "openrouter": state.openrouter_key.is_some(),
+        "kimi": state.kimi_key.is_some(),
+        "opencode": state.opencode_key.is_some(),
+        "gemini": state.gemini_key.is_some(),
+        "claude": state.claude_key.is_some(),
+    });
+    let langfuse_configured = state.langfuse.is_some();
+    let usage_snapshot = state.usage.read().await.clone();
+    Json(serde_json::json!({
+        "status": "ok",
+        "workers_count": workers_count,
+        "workers_loaded": workers_count > 0,
+        "providers_configured": providers_configured,
+        "langfuse_configured": langfuse_configured,
+        "usage_total_requests": usage_snapshot.requests,
+        "usage_by_provider": usage_snapshot.by_provider.keys().collect::<Vec<_>>(),
+    }))
+}
+
 // Phase 38 is stateless — no session persistence yet. Return an empty
 // list in OpenAI-ish shape so clients that probe this endpoint don't
 // 404. Real session state lands in Phase 41 with the profile-system
@ -230,7 +658,7 @@ mod tests {
        assert_eq!(r.model, "qwen3.5:latest");
        assert_eq!(r.messages.len(), 2);
        assert_eq!(r.messages[0].role, "system");
-        assert_eq!(r.messages[1].content, "Hi");
+        assert_eq!(r.messages[1].text(), "Hi");
        assert_eq!(r.temperature, Some(0.2));
        assert_eq!(r.max_tokens, Some(100));
    }
--- a/crates/gateway/src/v1/mode.rs
+++ b/crates/gateway/src/v1/mode.rs
--- a/crates/gateway/src/v1/ollama.rs
+++ b/crates/gateway/src/v1/ollama.rs
@ -60,10 +60,7 @@ pub async fn chat(client: &AiClient, req: &ChatRequest) -> Result<ChatResponse,
        model: resp.model,
        choices: vec![Choice {
            index: 0,
-            message: Message {
-                role: "assistant".into(),
-                content: resp.text,
-            },
+            message: Message::new_text("assistant", resp.text),
            finish_reason: "stop".into(),
        }],
        usage: UsageBlock {
@ -89,13 +86,14 @@ fn flatten_messages(messages: &[Message]) -> (String, String) {
    let mut system = String::new();
    let mut prompt = String::new();
    for m in messages {
+        let body = m.text();
        if m.role == "system" {
            if !system.is_empty() { system.push('\n'); }
-            system.push_str(&m.content);
+            system.push_str(&body);
        } else {
            prompt.push_str(&m.role);
            prompt.push_str(": ");
-            prompt.push_str(&m.content);
+            prompt.push_str(&body);
            prompt.push_str("\n\n");
        }
    }
@ -104,7 +102,7 @@ fn flatten_messages(messages: &[Message]) -> (String, String) {
 }

 fn estimate_prompt_tokens(messages: &[Message]) -> u32 {
-    let chars: usize = messages.iter().map(|m| m.content.chars().count()).sum();
+    let chars: usize = messages.iter().map(|m| m.text().chars().count()).sum();
    ((chars + 3) / 4) as u32
 }

--- a/crates/gateway/src/v1/ollama_cloud.rs
+++ b/crates/gateway/src/v1/ollama_cloud.rs
@ -88,7 +88,7 @@ pub async fn chat(
    let text = parsed.response.unwrap_or_default();

    let prompt_tokens = parsed.prompt_eval_count.unwrap_or_else(|| {
-        let chars: usize = req.messages.iter().map(|m| m.content.chars().count()).sum();
+        let chars: usize = req.messages.iter().map(|m| m.text().chars().count()).sum();
        ((chars + 3) / 4) as u32
    });
    let completion_tokens = parsed.eval_count.unwrap_or_else(|| {
@ -112,7 +112,7 @@ pub async fn chat(
        model: parsed.model.unwrap_or_else(|| req.model.clone()),
        choices: vec![Choice {
            index: 0,
-            message: Message { role: "assistant".into(), content: text },
+            message: Message::new_text("assistant", text),
            finish_reason: "stop".into(),
        }],
        usage: UsageBlock {
--- a/crates/gateway/src/v1/opencode.rs
+++ b/crates/gateway/src/v1/opencode.rs
@ -0,0 +1,228 @@
+//! OpenCode GO adapter — multi-vendor curated gateway via opencode.ai/zen/go.
+//!
+//! One sk-* key reaches Claude Opus 4.7, GPT-5.5-pro, Gemini 3.1-pro,
+//! Kimi K2.6, DeepSeek, GLM, Qwen, plus 4 free-tier models.
+//! OpenAI-compatible Chat Completions; auth via Bearer.
+//!
+//! Why a separate adapter (vs reusing openrouter.rs):
+//! - Different account, different key, different base_url
+//! - No HTTP-Referer / X-Title headers (those are OpenRouter-specific)
+//! - Future-proof for any opencode-only request shaping
+//!
+//! Key sourcing priority:
+//!   1. Env var `OPENCODE_API_KEY` (loaded from /etc/lakehouse/opencode.env
+//!      via systemd EnvironmentFile=)
+//!   2. /etc/lakehouse/opencode.env directly (rescue path if env missing)
+//!
+//! Resolved once at gateway startup, stored on `V1State.opencode_key`.
+//! Model-prefix routing: "opencode/<model>" auto-routes here, prefix
+//! stripped before upstream call.
+
+use std::time::Duration;
+use serde::{Deserialize, Serialize};
+
+use super::{ChatRequest, ChatResponse, Choice, Message, UsageBlock};
+
+// /zen/v1 is the unified OpenCode endpoint that covers BOTH the
+// Zen pay-per-token tier (Claude/GPT/Gemini frontier) AND the Go
+// subscription tier (Kimi/GLM/DeepSeek/Qwen/Minimax/mimo). When the
+// caller has both, opencode bills per-model: Zen models charge Zen
+// balance, Go models charge against the Go subscription cap.
+//
+// /zen/go/v1 exists as a Go-only sub-path (rejects Zen models with
+// "Model not supported"); we use the unified /zen/v1 since the same
+// key works for both with correct billing routing upstream.
+const OPENCODE_BASE_URL: &str = "https://opencode.ai/zen/v1";
+// 600s default — opencode upstream models include reasoning-heavy
+// variants (Claude Opus, Kimi K2.6, GLM-5.1) that legitimately take
+// 3-5 min on big audit prompts. Override via OPENCODE_TIMEOUT_SECS.
+const OPENCODE_TIMEOUT_SECS_DEFAULT: u64 = 600;
+
+fn opencode_timeout_secs() -> u64 {
+    std::env::var("OPENCODE_TIMEOUT_SECS")
+        .ok()
+        .and_then(|s| s.trim().parse::<u64>().ok())
+        .filter(|&n| n > 0)
+        .unwrap_or(OPENCODE_TIMEOUT_SECS_DEFAULT)
+}
+
+pub fn resolve_opencode_key() -> Option<String> {
+    if let Ok(k) = std::env::var("OPENCODE_API_KEY") {
+        if !k.trim().is_empty() { return Some(k.trim().to_string()); }
+    }
+    if let Ok(raw) = std::fs::read_to_string("/etc/lakehouse/opencode.env") {
+        for line in raw.lines() {
+            if let Some(rest) = line.strip_prefix("OPENCODE_API_KEY=") {
+                let k = rest.trim().trim_matches('"').trim_matches('\'');
+                if !k.is_empty() { return Some(k.to_string()); }
+            }
+        }
+    }
+    None
+}
+
+pub async fn chat(
+    key: &str,
+    req: &ChatRequest,
+) -> Result<ChatResponse, String> {
+    // Strip the "opencode/" namespace prefix so the upstream sees the
+    // bare model id (e.g. "claude-opus-4-7", "kimi-k2.6").
+    let model = req.model.strip_prefix("opencode/").unwrap_or(&req.model).to_string();
+
+    // Anthropic models on opencode reject `temperature` with a 400
+    // "temperature is deprecated for this model" error. Strip the
+    // field for claude-* and the new gpt-5.x reasoning lineages
+    // (Anthropic/OpenAI's reasoning models all moved away from temp).
+    // Other models keep the caller's value or default to 0.3.
+    let drop_temp = model.starts_with("claude-")
+        || model.starts_with("gpt-5")
+        || model.starts_with("o1")
+        || model.starts_with("o3")
+        || model.starts_with("o4");
+    let body = OCChatBody {
+        model: model.clone(),
+        messages: req.messages.iter().map(|m| OCMessage {
+            role: m.role.clone(),
+            content: m.content.clone(),
+        }).collect(),
+        // filter(|&n| n > 0) catches Some(0) — same trap that bit the
+        // Kimi adapter when callers passed empty-env-parsed-to-0.
+        max_tokens: req.max_tokens.filter(|&n| n > 0).unwrap_or(800),
+        temperature: if drop_temp { None } else { Some(req.temperature.unwrap_or(0.3)) },
+        stream: false,
+    };
+
+    let client = reqwest::Client::builder()
+        .timeout(Duration::from_secs(opencode_timeout_secs()))
+        .build()
+        .map_err(|e| format!("build client: {e}"))?;
+
+    let t0 = std::time::Instant::now();
+    let resp = client
+        .post(format!("{}/chat/completions", OPENCODE_BASE_URL))
+        .bearer_auth(key)
+        .json(&body)
+        .send()
+        .await
+        .map_err(|e| format!("opencode.ai unreachable: {e}"))?;
+
+    let status = resp.status();
+    if !status.is_success() {
+        let body = resp.text().await.unwrap_or_else(|_| "?".into());
+        return Err(format!("opencode.ai {}: {}", status, body));
+    }
+
+    let parsed: OCChatResponse = resp.json().await
+        .map_err(|e| format!("invalid opencode response: {e}"))?;
+
+    let latency_ms = t0.elapsed().as_millis();
+    let choice = parsed.choices.into_iter().next()
+        .ok_or_else(|| "opencode returned no choices".to_string())?;
+    let text = choice.message.content;
+
+    let prompt_tokens = parsed.usage.as_ref().map(|u| u.prompt_tokens).unwrap_or_else(|| {
+        let chars: usize = req.messages.iter().map(|m| m.text().chars().count()).sum();
+        ((chars + 3) / 4) as u32
+    });
+    let completion_tokens = parsed.usage.as_ref().map(|u| u.completion_tokens).unwrap_or_else(|| {
+        ((text.chars().count() + 3) / 4) as u32
+    });
+
+    tracing::info!(
+        target: "v1.chat",
+        provider = "opencode",
+        model = %model,
+        prompt_tokens,
+        completion_tokens,
+        latency_ms = latency_ms as u64,
+        "opencode chat completed",
+    );
+
+    Ok(ChatResponse {
+        id: format!("chatcmpl-{}", chrono::Utc::now().timestamp_nanos_opt().unwrap_or(0)),
+        object: "chat.completion",
+        created: chrono::Utc::now().timestamp(),
+        model,
+        choices: vec![Choice {
+            index: 0,
+            message: Message { role: "assistant".into(), content: serde_json::Value::String(text) },
+            finish_reason: choice.finish_reason.unwrap_or_else(|| "stop".into()),
+        }],
+        usage: UsageBlock {
+            prompt_tokens,
+            completion_tokens,
+            total_tokens: prompt_tokens + completion_tokens,
+        },
+    })
+}
+
+// -- OpenCode wire shapes (OpenAI-compatible) --
+
+#[derive(Serialize)]
+struct OCChatBody {
+    model: String,
+    messages: Vec<OCMessage>,
+    max_tokens: u32,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    temperature: Option<f64>,
+    stream: bool,
+}
+
+#[derive(Serialize)]
+struct OCMessage { role: String, content: serde_json::Value }
+
+#[derive(Deserialize)]
+struct OCChatResponse {
+    choices: Vec<OCChoice>,
+    #[serde(default)]
+    usage: Option<OCUsage>,
+}
+
+#[derive(Deserialize)]
+struct OCChoice {
+    message: OCMessageResp,
+    #[serde(default)]
+    finish_reason: Option<String>,
+}
+
+#[derive(Deserialize)]
+struct OCMessageResp { content: String }
+
+#[derive(Deserialize)]
+struct OCUsage { prompt_tokens: u32, completion_tokens: u32 }
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn resolve_opencode_key_does_not_panic() {
+        let _ = resolve_opencode_key();
+    }
+
+    #[test]
+    fn model_prefix_strip() {
+        let cases = [
+            ("opencode/claude-opus-4-7", "claude-opus-4-7"),
+            ("opencode/kimi-k2.6", "kimi-k2.6"),
+            ("claude-opus-4-7", "claude-opus-4-7"),
+        ];
+        for (input, expected) in cases {
+            let out = input.strip_prefix("opencode/").unwrap_or(input);
+            assert_eq!(out, expected);
+        }
+    }
+
+    #[test]
+    fn max_tokens_filters_zero() {
+        // The trap: empty env -> Number("") -> 0 -> Some(0). Adapter
+        // must not pass 0 upstream; should fall to 800.
+        let some_zero: Option<u32> = Some(0);
+        let result = some_zero.filter(|&n| n > 0).unwrap_or(800);
+        assert_eq!(result, 800);
+        let some_real: Option<u32> = Some(4096);
+        assert_eq!(some_real.filter(|&n| n > 0).unwrap_or(800), 4096);
+        let none_val: Option<u32> = None;
+        assert_eq!(none_val.filter(|&n| n > 0).unwrap_or(800), 800);
+    }
+}
--- a/crates/gateway/src/v1/openrouter.rs
+++ b/crates/gateway/src/v1/openrouter.rs
@ -0,0 +1,220 @@
+//! OpenRouter adapter — free-tier rescue rung for /v1/chat.
+//!
+//! Direct HTTPS call to `https://openrouter.ai/api/v1/chat/completions`
+//! with Bearer auth. Mirrors the OpenAI-compatible shape so the model
+//! list can be expanded without code changes. Added 2026-04-24 after
+//! iter 5 hit repeated Ollama Cloud 502s on kimi-k2:1t — OpenRouter
+//! free-tier models give us a different provider backbone as fallback.
+//!
+//! Key sourcing priority:
+//!   1. Env var `OPENROUTER_API_KEY`
+//!   2. `/home/profit/.env`        (LLM Team convention)
+//!   3. `/root/llm_team_config.json` → providers.openrouter.api_key
+//!
+//! First hit wins. Key is resolved once at gateway startup and stored
+//! on `V1State.openrouter_key`.
+
+use std::time::Duration;
+use serde::{Deserialize, Serialize};
+
+use super::{ChatRequest, ChatResponse, Choice, Message, UsageBlock};
+
+const OR_BASE_URL: &str = "https://openrouter.ai/api/v1";
+const OR_TIMEOUT_SECS: u64 = 180;
+
+pub fn resolve_openrouter_key() -> Option<String> {
+    if let Ok(k) = std::env::var("OPENROUTER_API_KEY") {
+        if !k.trim().is_empty() { return Some(k.trim().to_string()); }
+    }
+    // LLM Team UI writes its key to ~/.env on the host user — pick it up
+    // from the same source so the free-tier rescue path works without
+    // an explicit systemd Environment= line.
+    for path in ["/home/profit/.env", "/root/.env"] {
+        if let Ok(raw) = std::fs::read_to_string(path) {
+            for line in raw.lines() {
+                if let Some(rest) = line.strip_prefix("OPENROUTER_API_KEY=") {
+                    let k = rest.trim().trim_matches('"').trim_matches('\'');
+                    if !k.is_empty() { return Some(k.to_string()); }
+                }
+            }
+        }
+    }
+    if let Ok(raw) = std::fs::read_to_string("/root/llm_team_config.json") {
+        if let Ok(v) = serde_json::from_str::<serde_json::Value>(&raw) {
+            if let Some(k) = v.pointer("/providers/openrouter/api_key").and_then(|x| x.as_str()) {
+                if !k.trim().is_empty() { return Some(k.trim().to_string()); }
+            }
+        }
+    }
+    None
+}
+
+pub async fn chat(
+    key: &str,
+    req: &ChatRequest,
+) -> Result<ChatResponse, String> {
+    // Strip the "openrouter/" prefix if the caller used the namespaced
+    // form so OpenRouter sees the raw model id (e.g. "openai/gpt-oss-120b:free").
+    let model = req.model.strip_prefix("openrouter/").unwrap_or(&req.model).to_string();
+
+    let body = ORChatBody {
+        model: model.clone(),
+        // Pass content through verbatim — preserves OpenAI's multimodal
+        // content-parts shape (`[{type:"text",text:"..."}, ...]`) so the
+        // upstream provider sees exactly what the client sent.
+        messages: req.messages.iter().map(|m| ORMessage {
+            role: m.role.clone(),
+            content: m.content.clone(),
+        }).collect(),
+        max_tokens: req.max_tokens.unwrap_or(800),
+        temperature: req.temperature.unwrap_or(0.3),
+        stream: false,
+    };
+
+    let client = reqwest::Client::builder()
+        .timeout(Duration::from_secs(OR_TIMEOUT_SECS))
+        .build()
+        .map_err(|e| format!("build client: {e}"))?;
+
+    let t0 = std::time::Instant::now();
+    let resp = client
+        .post(format!("{}/chat/completions", OR_BASE_URL))
+        .bearer_auth(key)
+        // OpenRouter recommends Referer + Title for attribution; absent
+        // headers do not fail the call but help us see our traffic in
+        // their dashboard.
+        .header("HTTP-Referer", "https://vcp.devop.live")
+        .header("X-Title", "Lakehouse Scrum")
+        .json(&body)
+        .send()
+        .await
+        .map_err(|e| format!("openrouter.ai unreachable: {e}"))?;
+
+    let status = resp.status();
+    if !status.is_success() {
+        let body = resp.text().await.unwrap_or_else(|_| "?".into());
+        return Err(format!("openrouter.ai {}: {}", status, body));
+    }
+
+    let parsed: ORChatResponse = resp.json().await
+        .map_err(|e| format!("invalid openrouter response: {e}"))?;
+
+    let latency_ms = t0.elapsed().as_millis();
+    let choice = parsed.choices.into_iter().next()
+        .ok_or_else(|| "openrouter returned no choices".to_string())?;
+    let text = choice.message.content;
+
+    let prompt_tokens = parsed.usage.as_ref().map(|u| u.prompt_tokens).unwrap_or_else(|| {
+        let chars: usize = req.messages.iter().map(|m| m.text().chars().count()).sum();
+        ((chars + 3) / 4) as u32
+    });
+    let completion_tokens = parsed.usage.as_ref().map(|u| u.completion_tokens).unwrap_or_else(|| {
+        ((text.chars().count() + 3) / 4) as u32
+    });
+
+    tracing::info!(
+        target: "v1.chat",
+        provider = "openrouter",
+        model = %model,
+        prompt_tokens,
+        completion_tokens,
+        latency_ms = latency_ms as u64,
+        "openrouter chat completed",
+    );
+
+    Ok(ChatResponse {
+        id: format!("chatcmpl-{}", chrono::Utc::now().timestamp_nanos_opt().unwrap_or(0)),
+        object: "chat.completion",
+        created: chrono::Utc::now().timestamp(),
+        model,
+        choices: vec![Choice {
+            index: 0,
+            message: Message { role: "assistant".into(), content: serde_json::Value::String(text) },
+            finish_reason: choice.finish_reason.unwrap_or_else(|| "stop".into()),
+        }],
+        usage: UsageBlock {
+            prompt_tokens,
+            completion_tokens,
+            total_tokens: prompt_tokens + completion_tokens,
+        },
+    })
+}
+
+// -- OpenRouter wire shapes (OpenAI-compatible) --
+
+#[derive(Serialize)]
+struct ORChatBody {
+    model: String,
+    messages: Vec<ORMessage>,
+    max_tokens: u32,
+    temperature: f64,
+    stream: bool,
+}
+
+#[derive(Serialize)]
+struct ORMessage { role: String, content: serde_json::Value }
+
+#[derive(Deserialize)]
+struct ORChatResponse {
+    choices: Vec<ORChoice>,
+    #[serde(default)]
+    usage: Option<ORUsage>,
+}
+
+#[derive(Deserialize)]
+struct ORChoice {
+    message: ORMessageResp,
+    #[serde(default)]
+    finish_reason: Option<String>,
+}
+
+#[derive(Deserialize)]
+struct ORMessageResp { content: String }
+
+#[derive(Deserialize)]
+struct ORUsage { prompt_tokens: u32, completion_tokens: u32 }
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn resolve_openrouter_key_does_not_panic() {
+        // Smoke test — all three sources may or may not be set depending
+        // on environment; just confirm the call returns cleanly.
+        let _ = resolve_openrouter_key();
+    }
+
+    #[test]
+    fn chat_body_serializes_to_openai_shape() {
+        let body = ORChatBody {
+            model: "openai/gpt-oss-120b:free".into(),
+            messages: vec![
+                ORMessage { role: "user".into(), content: "review this".into() },
+            ],
+            max_tokens: 800,
+            temperature: 0.3,
+            stream: false,
+        };
+        let json = serde_json::to_string(&body).unwrap();
+        assert!(json.contains("\"model\":\"openai/gpt-oss-120b:free\""));
+        assert!(json.contains("\"messages\""));
+        assert!(json.contains("\"max_tokens\":800"));
+        assert!(json.contains("\"stream\":false"));
+    }
+
+    #[test]
+    fn model_prefix_strip_preserves_unprefixed() {
+        // If caller passes "openrouter/openai/gpt-oss-120b:free" we strip.
+        // If caller passes "openai/gpt-oss-120b:free" unchanged, we keep.
+        let cases = [
+            ("openrouter/openai/gpt-oss-120b:free", "openai/gpt-oss-120b:free"),
+            ("openai/gpt-oss-120b:free", "openai/gpt-oss-120b:free"),
+            ("google/gemma-3-27b-it:free", "google/gemma-3-27b-it:free"),
+        ];
+        for (input, expected) in cases {
+            let out = input.strip_prefix("openrouter/").unwrap_or(input);
+            assert_eq!(out, expected, "{input} should become {expected}");
+        }
+    }
+}
--- a/crates/gateway/src/v1/respond.rs
+++ b/crates/gateway/src/v1/respond.rs
@ -0,0 +1,150 @@
+//! `/v1/respond` — the **execution** API (distinct from `/v1/chat`, the
+//! completion API).
+//!
+//! This is the consolidation move called out in the 2026-04-23 session:
+//! lift the proven pipeline from `tests/multi-agent/orchestrator.ts`
+//! (executor → reviewer → escalate → validate → seal playbook →
+//! write-through to KB) into the gateway, so the production path has
+//! the intelligence the tests already proved.
+//!
+//! `/v1/chat` stays a naive completion proxy for callers that want one.
+//! `/v1/respond` is where the loop lives. Every orchestrator-style
+//! caller migrates here and the TS harnesses become thin clients.
+//!
+//! This file holds the HTTP surface + request/response shapes. The loop
+//! itself lives in `execution_loop::ExecutionLoop`.
+
+use axum::{extract::State, http::StatusCode, Json};
+use serde::{Deserialize, Serialize};
+
+use super::V1State;
+use crate::execution_loop::{ExecutionLoop, LogEntry, RespondOutcome};
+
+/// A structured task — mirrors `TaskSpec` in `tests/multi-agent/agent.ts`.
+/// Kept deliberately open so non-staffing task classes (code-gen,
+/// DevOps-long-horizon) can land without a schema fight.
+#[derive(Deserialize, Debug, Clone)]
+pub struct RespondRequest {
+    /// Task class — routes to the right truth rules + validator. For the
+    /// staffing substrate: `staffing.fill`, `staffing.rescue`,
+    /// `staffing.sms_draft`. Truth-layer lookup is a no-op until a rule
+    /// set is registered for the class.
+    pub task_class: String,
+
+    /// Human-readable operation description — becomes the playbook
+    /// `operation` field on seal, and the primary signal for
+    /// playbook_memory embedding.
+    pub operation: String,
+
+    /// Free-form structured context. Passed to the executor prompt and
+    /// to the playbook seeder. Staffing tasks expect
+    /// `{target_role, target_count, target_city, target_state, approach_hint}`
+    /// but nothing here validates that — the validator crate will (Phase 43).
+    #[serde(default)]
+    pub spec: serde_json::Value,
+
+    /// Executor model. Defaults to the hot-path local model if omitted.
+    /// See orchestrator.ts:28 (`EXECUTOR_MODEL = "qwen3.5:latest"`).
+    #[serde(default)]
+    pub executor_model: Option<String>,
+
+    /// Reviewer model. Defaults to the hot-path local reviewer.
+    /// See orchestrator.ts:29 (`REVIEWER_MODEL = "qwen3:latest"`).
+    #[serde(default)]
+    pub reviewer_model: Option<String>,
+
+    /// Hard cap on executor turns. Default matches orchestrator.ts:30
+    /// (`MAX_TURNS = 12`). Cloud escalation counts as a turn.
+    #[serde(default)]
+    pub max_turns: Option<u32>,
+}
+
+#[derive(Serialize)]
+pub struct RespondResponse {
+    /// `ok` = consensus reached, playbook sealed. `failed` = loop ran
+    /// out of turns or hit the drift cap. `blocked` = truth-layer
+    /// veto (Phase 42 rule citation in `error`).
+    pub status: &'static str,
+
+    /// The final artifact — for staffing fills, `{fills: [{candidate_id, name}]}`.
+    /// Empty on failure / block.
+    pub artifact: serde_json::Value,
+
+    /// Structured cross-turn log. Same shape as orchestrator.ts LogEntry
+    /// so existing tooling (kb extractors, fact_extractor.ts) reads it
+    /// without change.
+    pub log: Vec<LogEntry>,
+
+    /// Iteration count actually used. ≤ max_turns. Stamped on
+    /// outcomes.jsonl per the indicator audit (2026-04-23).
+    pub iterations: u32,
+
+    /// Error message on non-ok status. Truth-rule citations land here
+    /// when `status == "blocked"`.
+    #[serde(default, skip_serializing_if = "Option::is_none")]
+    pub error: Option<String>,
+}
+
+pub async fn respond(
+    State(state): State<V1State>,
+    Json(req): Json<RespondRequest>,
+) -> Result<Json<RespondResponse>, (StatusCode, String)> {
+    if req.operation.is_empty() {
+        return Err((StatusCode::BAD_REQUEST, "operation must be non-empty".into()));
+    }
+    if req.task_class.is_empty() {
+        return Err((StatusCode::BAD_REQUEST, "task_class must be non-empty".into()));
+    }
+
+    let mut loop_runner = ExecutionLoop::new(state, req);
+    let outcome = loop_runner.run().await.map_err(|e| {
+        (StatusCode::INTERNAL_SERVER_ERROR, format!("execution loop: {e}"))
+    })?;
+
+    let (status, error) = match &outcome {
+        RespondOutcome::Ok { .. } => ("ok", None),
+        RespondOutcome::Failed { reason, .. } => ("failed", Some(reason.clone())),
+        RespondOutcome::Blocked { reason, .. } => ("blocked", Some(reason.clone())),
+    };
+
+    Ok(Json(RespondResponse {
+        status,
+        artifact: outcome.artifact(),
+        log: outcome.into_log(),
+        iterations: loop_runner.turns_used(),
+        error,
+    }))
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn respond_request_parses_minimal() {
+        let raw = r#"{
+            "task_class": "staffing.fill",
+            "operation": "fill: Welder x2 in Toledo, OH"
+        }"#;
+        let r: RespondRequest = serde_json::from_str(raw).unwrap();
+        assert_eq!(r.task_class, "staffing.fill");
+        assert_eq!(r.executor_model, None);
+        assert_eq!(r.max_turns, None);
+    }
+
+    #[test]
+    fn respond_request_parses_full() {
+        let raw = r#"{
+            "task_class": "staffing.fill",
+            "operation": "fill: Welder x2 in Toledo, OH",
+            "spec": {"target_role": "Welder", "target_count": 2, "target_city": "Toledo", "target_state": "OH"},
+            "executor_model": "qwen3.5:latest",
+            "reviewer_model": "qwen3:latest",
+            "max_turns": 12
+        }"#;
+        let r: RespondRequest = serde_json::from_str(raw).unwrap();
+        assert_eq!(r.executor_model.as_deref(), Some("qwen3.5:latest"));
+        assert_eq!(r.max_turns, Some(12));
+        assert_eq!(r.spec["target_count"], 2);
+    }
+}
--- a/crates/gateway/src/v1/session_log.rs
+++ b/crates/gateway/src/v1/session_log.rs
@ -0,0 +1,235 @@
+//! Coordinator session JSONL writer — Rust parity with the Go-side
+//! `internal/validator/session_log.go` (commit 1a3a82a in
+//! golangLAKEHOUSE). Same schema, same field names, same producer
+//! semantics, so a unified longitudinal log can pull from either
+//! runtime via DuckDB.
+//!
+//! Schema: `session.iterate.v1`. One row per `/v1/iterate` session.
+//! Append-only. Best-effort posture: errors warn and the iterate
+//! response always ships.
+//!
+//! See `golangLAKEHOUSE/docs/SESSION_LOG.md` for the full schema
+//! reference + DuckDB query examples. This module produces rows
+//! with `daemon: "gateway"`; the Go side produces `daemon:
+//! "validatord"`. Operators who want a unified stream can point both
+//! to the same path (the OS write-append is atomic for the row sizes
+//! we produce) or query both files together via duckdb's `read_json`
+//! glob support.
+
+use serde::{Deserialize, Serialize};
+use std::sync::Arc;
+use tokio::sync::Mutex;
+
+pub const SESSION_RECORD_SCHEMA: &str = "session.iterate.v1";
+
+/// One row in coordinator_sessions.jsonl. Field names are the on-wire
+/// names — must stay byte-equal to the Go side's
+/// `validator.SessionRecord` (proven by the cross-runtime parity
+/// probe at golangLAKEHOUSE/scripts/cutover/parity/).
+// Deserialize is supported so the parity helper binary can round-trip
+// fixture inputs through serde without hand-rolling a parser. Production
+// emit path uses Serialize only; SessionRecord rows are written by the
+// gateway and consumed by DuckDB / external tooling, never re-read by us.
+#[derive(Serialize, Deserialize)]
+pub struct SessionRecord {
+    pub schema: String,
+    pub session_id: String,
+    pub timestamp: String,
+    pub daemon: String,
+    pub kind: String,
+    pub model: String,
+    pub provider: String,
+    pub prompt: String,
+    pub iterations: u32,
+    pub max_iterations: u32,
+    pub final_verdict: String, // "accepted" | "max_iter_exhausted" | "infra_error"
+    pub attempts: Vec<SessionAttemptRecord>,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub artifact: Option<serde_json::Value>,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub grounded_in_roster: Option<bool>,
+    pub duration_ms: u64,
+}
+
+#[derive(Serialize, Deserialize)]
+pub struct SessionAttemptRecord {
+    pub iteration: u32,
+    pub verdict_kind: String, // "no_json" | "validation_failed" | "accepted" | "infra_error"
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub error: Option<String>,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub span_id: Option<String>,
+}
+
+/// Append-only writer. Cloneable handle — internal state is Arc'd so
+/// V1State can keep its own clone and per-request clones are cheap.
+#[derive(Clone)]
+pub struct SessionLogger {
+    inner: Arc<Inner>,
+}
+
+struct Inner {
+    path: String,
+    /// tokio::Mutex (not std) because we hold it across the async
+    /// fs write. Contention is low (one row per /v1/iterate session).
+    mu: Mutex<()>,
+}
+
+impl SessionLogger {
+    /// Construct a logger writing to `path`. Empty path → None
+    /// (skip the wiring in the iterate handler entirely).
+    pub fn from_path(path: &str) -> Option<Self> {
+        if path.is_empty() {
+            return None;
+        }
+        Some(Self {
+            inner: Arc::new(Inner {
+                path: path.to_string(),
+                mu: Mutex::new(()),
+            }),
+        })
+    }
+
+    /// Append one record. Best-effort: failures land in `tracing::warn!`
+    /// and the caller sees Ok(()) — observability is a witness, never
+    /// a gate. Returns Err only on impossible cases the type system
+    /// can't rule out (here: serde_json::to_string failing on a
+    /// well-formed struct, which shouldn't happen).
+    pub async fn append(&self, rec: SessionRecord) {
+        let body = match serde_json::to_string(&rec) {
+            Ok(s) => s,
+            Err(e) => {
+                tracing::warn!(target: "v1.session_log", "marshal: {e}");
+                return;
+            }
+        };
+        let _guard = self.inner.mu.lock().await;
+        if let Err(e) = self.write(&body).await {
+            tracing::warn!(target: "v1.session_log", "write {}: {e}", self.inner.path);
+        }
+    }
+
+    async fn write(&self, body: &str) -> std::io::Result<()> {
+        use tokio::fs::OpenOptions;
+        use tokio::io::AsyncWriteExt;
+
+        // Lazy mkdir on first write so a not-yet-mounted volume at
+        // startup doesn't kill the daemon.
+        if let Some(parent) = std::path::Path::new(&self.inner.path).parent() {
+            if !parent.as_os_str().is_empty() {
+                tokio::fs::create_dir_all(parent).await?;
+            }
+        }
+        let mut f = OpenOptions::new()
+            .append(true)
+            .create(true)
+            .open(&self.inner.path)
+            .await?;
+        f.write_all(body.as_bytes()).await?;
+        f.write_all(b"\n").await?;
+        Ok(())
+    }
+}
+
+/// Best-effort UTF-8 char truncation. Matches Go's `trim` helper so
+/// rows produced by either runtime cap fields at the same boundaries.
+pub fn truncate(s: &str, n: usize) -> String {
+    s.chars().take(n).collect()
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use std::path::PathBuf;
+    use tokio::fs;
+
+    fn fixture_record(session_id: &str) -> SessionRecord {
+        SessionRecord {
+            schema: SESSION_RECORD_SCHEMA.to_string(),
+            session_id: session_id.to_string(),
+            timestamp: "2026-05-02T08:00:00Z".to_string(),
+            daemon: "gateway".to_string(),
+            kind: "fill".to_string(),
+            model: "qwen3.5:latest".to_string(),
+            provider: "ollama".to_string(),
+            prompt: "produce a fill artifact".to_string(),
+            iterations: 1,
+            max_iterations: 3,
+            final_verdict: "accepted".to_string(),
+            attempts: vec![SessionAttemptRecord {
+                iteration: 0,
+                verdict_kind: "accepted".to_string(),
+                error: None,
+                span_id: Some("span-0".to_string()),
+            }],
+            artifact: Some(serde_json::json!({"fills":[{"candidate_id":"W-1"}]})),
+            grounded_in_roster: Some(true),
+            duration_ms: 50,
+        }
+    }
+
+    #[tokio::test]
+    async fn from_path_empty_returns_none() {
+        assert!(SessionLogger::from_path("").is_none());
+    }
+
+    #[tokio::test]
+    async fn append_writes_jsonl_row_with_schema_field() {
+        let dir = tempdir();
+        let path = dir.join("sessions.jsonl");
+        let path_str = path.to_string_lossy().to_string();
+        let logger = SessionLogger::from_path(&path_str).unwrap();
+        logger.append(fixture_record("trace-a")).await;
+
+        let body = fs::read_to_string(&path).await.unwrap();
+        assert!(body.contains("\"schema\":\"session.iterate.v1\""));
+        assert!(body.contains("\"session_id\":\"trace-a\""));
+        assert!(body.contains("\"grounded_in_roster\":true"));
+        assert!(body.ends_with('\n'));
+    }
+
+    #[tokio::test]
+    async fn append_concurrent_safe() {
+        let dir = tempdir();
+        let path = dir.join("sessions.jsonl");
+        let path_str = path.to_string_lossy().to_string();
+        let logger = SessionLogger::from_path(&path_str).unwrap();
+
+        let n = 32;
+        let mut handles = Vec::with_capacity(n);
+        for i in 0..n {
+            let l = logger.clone();
+            handles.push(tokio::spawn(async move {
+                l.append(fixture_record(&format!("trace-{i}"))).await;
+            }));
+        }
+        for h in handles {
+            h.await.unwrap();
+        }
+
+        let body = fs::read_to_string(&path).await.unwrap();
+        let lines: Vec<_> = body.lines().filter(|l| !l.is_empty()).collect();
+        assert_eq!(lines.len(), n, "expected {n} rows, got {}", lines.len());
+        // Every row must round-trip through serde — a torn write
+        // would surface as a parse error.
+        for line in lines {
+            let _: serde_json::Value = serde_json::from_str(line).expect("valid json per row");
+        }
+    }
+
+    fn tempdir() -> PathBuf {
+        // Per-test unique path so prior runs don't pollute the next.
+        // The static counter increments across the whole test binary,
+        // so back-to-back tests in the same module get distinct dirs.
+        static COUNTER: std::sync::atomic::AtomicU64 = std::sync::atomic::AtomicU64::new(0);
+        let n = COUNTER.fetch_add(1, std::sync::atomic::Ordering::Relaxed);
+        let p = std::env::temp_dir().join(format!(
+            "session_log_test_{}_{}_{}",
+            std::process::id(),
+            chrono::Utc::now().timestamp_nanos_opt().unwrap_or(0),
+            n,
+        ));
+        std::fs::create_dir_all(&p).unwrap();
+        p
+    }
+}
--- a/crates/gateway/src/v1/truth.rs
+++ b/crates/gateway/src/v1/truth.rs
@ -0,0 +1,47 @@
+use serde::Serialize;
+use truth::default_truth_store;
+
+// Note: truth_router() was a stub wrapper around a single /context route
+// that nothing called — v1/mod.rs wires get(truth::context) directly
+// onto its own router. Removed 2026-04-24 along with its #[allow(dead_code)]
+// attribute; the handler below is the real surface.
+
+#[derive(Serialize)]
+pub struct ContextResponse {
+    pub task_classes: Vec<String>,
+    pub rules: Vec<RuleInfo>,
+}
+
+#[derive(Serialize)]
+pub struct RuleInfo {
+    pub id: String,
+    pub task_class: String,
+    pub description: String,
+}
+
+pub async fn context() -> axum::Json<ContextResponse> {
+    let store = default_truth_store();
+    
+    let task_classes: Vec<String> = vec![
+        "staffing.fill".to_string(),
+        "staffing.rescue".to_string(),
+        "staffing.sms_draft".to_string(),
+        "staffing.any".to_string(),
+    ];
+    
+    let mut rules = Vec::new();
+    for tc in &task_classes {
+        for rule in store.get_rules(tc) {
+            rules.push(RuleInfo {
+                id: rule.id.clone(),
+                task_class: rule.task_class.clone(),
+                description: rule.description.clone(),
+            });
+        }
+    }
+    
+    axum::Json(ContextResponse {
+        task_classes,
+        rules,
+    })
+}
--- a/crates/gateway/src/v1/validate.rs
+++ b/crates/gateway/src/v1/validate.rs
@ -0,0 +1,82 @@
+//! /v1/validate — gateway-side artifact validation endpoint.
+//!
+//! Phase 43 v3 part 2: makes the validator crate network-callable.
+//! Any caller (scrum loop, test harness, future agent) can POST a
+//! generated artifact and get back a Report (success) or
+//! ValidationError (failure with structured field/reason).
+//!
+//! Request shape:
+//!   POST /v1/validate
+//!   {
+//!     "kind": "fill" | "email" | "playbook",
+//!     "artifact": { ... },
+//!     "context": { ... }   // optional — folded into artifact._context
+//!   }
+//!
+//! Response on success: 200 + Report JSON
+//! Response on failure: 422 + ValidationError JSON
+//! Response on bad request: 400 + plain-text error
+//!
+//! The shared WorkerLookup is loaded once at gateway startup from
+//! workers_500k.parquet (path configurable via LH_WORKERS_PARQUET
+//! env, defaults to data/datasets/workers_500k.parquet). Falls back
+//! to an empty InMemoryWorkerLookup if the file is missing — the
+//! validators will still run schema/length/PII checks but worker-
+//! existence checks will all fail (Consistency error), which is the
+//! correct behavior when the roster isn't configured.
+
+use axum::{extract::State, http::StatusCode, response::IntoResponse, Json};
+use serde::Deserialize;
+use validator::{
+    Artifact, Validator, ValidationError,
+    staffing::{
+        fill::FillValidator,
+        email::EmailValidator,
+        playbook::PlaybookValidator,
+    },
+};
+
+#[derive(Deserialize)]
+pub struct ValidateRequest {
+    /// `"fill" | "email" | "playbook"` — picks which validator runs.
+    pub kind: String,
+    /// The artifact JSON (free-form; shape depends on `kind`).
+    pub artifact: serde_json::Value,
+    /// Optional context bag — merged into `artifact._context` so the
+    /// validator can read fields like `target_count`, `city`,
+    /// `client_id`, `candidate_id` without callers having to embed
+    /// `_context` in the artifact themselves.
+    #[serde(default)]
+    pub context: Option<serde_json::Value>,
+}
+
+pub async fn validate(
+    State(state): State<super::V1State>,
+    Json(req): Json<ValidateRequest>,
+) -> impl IntoResponse {
+    // Merge context into artifact under `_context` so validators can
+    // pull contract metadata uniformly.
+    let mut artifact_value = req.artifact;
+    if let Some(ctx) = req.context {
+        if let Some(obj) = artifact_value.as_object_mut() {
+            obj.insert("_context".to_string(), ctx);
+        }
+    }
+
+    // Dispatch.
+    let workers = state.validate_workers.clone();
+    let result: Result<validator::Report, ValidationError> = match req.kind.as_str() {
+        "fill" => FillValidator::new(workers).validate(&Artifact::FillProposal(artifact_value)),
+        "email" => EmailValidator::new(workers).validate(&Artifact::EmailDraft(artifact_value)),
+        "playbook" => PlaybookValidator.validate(&Artifact::Playbook(artifact_value)),
+        other => return (
+            StatusCode::BAD_REQUEST,
+            format!("unknown kind '{other}' — expected fill | email | playbook"),
+        ).into_response(),
+    };
+
+    match result {
+        Ok(report) => (StatusCode::OK, Json(report)).into_response(),
+        Err(e) => (StatusCode::UNPROCESSABLE_ENTITY, Json(e)).into_response(),
+    }
+}
--- a/crates/ingestd/Cargo.toml
+++ b/crates/ingestd/Cargo.toml
@ -8,6 +8,7 @@ shared = { path = "../shared" }
 storaged = { path = "../storaged" }
 catalogd = { path = "../catalogd" }
 vectord = { path = "../vectord" }
+journald = { path = "../journald" }
 tokio = { workspace = true }
 axum = { workspace = true, features = ["multipart"] }
 lopdf = { workspace = true }
--- a/crates/ingestd/src/schema_evolution.rs
+++ b/crates/ingestd/src/schema_evolution.rs
@ -2,10 +2,9 @@
 /// When a source changes format (columns renamed, added, removed, type changed),
 /// the system detects the diff and can auto-map using AI or heuristic matching.

-use arrow::datatypes::{DataType, Schema, SchemaRef};
+use arrow::datatypes::{DataType, SchemaRef};
 use serde::{Deserialize, Serialize};
 use std::collections::HashMap;
-use std::sync::Arc;

 /// A detected change between two schema versions.
 #[derive(Debug, Clone, Serialize, Deserialize)]
@ -223,7 +222,8 @@ fn find_similar_column<'a>(
 #[cfg(test)]
 mod tests {
    use super::*;
-    use arrow::datatypes::Field;
+    use arrow::datatypes::{Field, Schema};
+    use std::sync::Arc;

    fn schema(fields: Vec<(&str, DataType)>) -> SchemaRef {
        Arc::new(Schema::new(
--- a/crates/ingestd/src/service.rs
+++ b/crates/ingestd/src/service.rs
@ -3,7 +3,7 @@ use axum::{
    extract::{Multipart, Path, Query, State},
    http::{HeaderMap, StatusCode},
    response::IntoResponse,
-    routing::{delete, get, patch, post},
+    routing::{get, post},
 };
 use bytes::Bytes;
 use object_store::ObjectStore;
@ -33,6 +33,11 @@ pub struct IngestState {
    /// Scheduled-ingest registry. The scheduler task runs against this
    /// store; HTTP CRUD endpoints write through it.
    pub schedules: schedule::ScheduleStore,
+    /// Event journal for ADR-012 mutation history. Optional for back-compat
+    /// with callers (like scheduled ingest tests) that don't wire it yet.
+    /// When present, successful ingests emit a record_ingest event — closes
+    /// P9-001 on the file-upload path. (2026-04-23)
+    pub journal: Option<journald::journal::Journal>,
 }

 /// Push `DatasetAppended` triggers for every HNSW index bound to this
@ -136,6 +141,22 @@ async fn ingest_file(
        Ok(result) => {
            if !result.deduplicated {
                notify_agent_on_append(&state, &result.dataset_name).await;
+                // P9-001 fix (2026-04-23): emit a mutation event on every
+                // non-deduplicated ingest. Dedup no-ops don't need events
+                // (ADR-020 register() is already idempotent on same fingerprint).
+                if let Some(ref journal) = state.journal {
+                    if let Err(e) = journal.record_ingest(
+                        &result.dataset_name,
+                        result.rows as usize,
+                        "ingest_api",
+                        &filename,
+                    ).await {
+                        tracing::warn!(
+                            "journal record_ingest failed for '{}': {}",
+                            result.dataset_name, e,
+                        );
+                    }
+                }
            }
            if result.deduplicated {
                Ok((StatusCode::OK, Json(result)))
@ -630,3 +651,108 @@ async fn run_schedule_now(
    }
    Ok(Json(outcome))
 }
+
+// ─── Tests ───
+
+#[cfg(test)]
+mod journal_integration_tests {
+    //! P9-001 integration test: prove that a successful ingest produces a
+    //! journal.record_ingest event. Block 2 on PR #10 was "journal event
+    //! verified live" being unbacked by the diff. This test makes the
+    //! verification committed and reproducible.
+
+    use journald::journal::{Event, Journal};
+    use object_store::memory::InMemory;
+    use std::sync::Arc;
+
+    // Helper: build a bare Journal against an in-memory object store.
+    // Flush threshold 1 so every recorded event is persisted immediately.
+    fn test_journal() -> Journal {
+        let store: Arc<dyn object_store::ObjectStore> = Arc::new(InMemory::new());
+        Journal::new(store, 1)
+    }
+
+    #[tokio::test]
+    async fn journal_record_ingest_increments_counter() {
+        // Arrange — fresh journal, counter starts at zero.
+        let journal = test_journal();
+        let stats0 = journal.stats().await;
+        assert_eq!(stats0.total_events_created, 0);
+        assert_eq!(stats0.buffer_events, 0);
+
+        // Act — simulate what the /ingest/file success path does.
+        journal
+            .record_ingest("test_dataset", 42, "ingest_api", "probe.csv")
+            .await
+            .expect("record_ingest should succeed");
+
+        // Assert — counter advanced, event exists. With threshold=1 the
+        // event flushed to store; with threshold>N it would be in-buffer.
+        let stats1 = journal.stats().await;
+        assert_eq!(stats1.total_events_created, 1, "counter should reflect one recorded event");
+
+        // Assert — the event is retrievable by entity.
+        let history = journal
+            .get_entity_history("batch:42")
+            .await
+            .expect("history lookup");
+        assert_eq!(history.len(), 1, "one event should be visible in history");
+        let ev = &history[0];
+        assert_eq!(ev.action, "ingest");
+        assert_eq!(ev.entity_type, "test_dataset");
+        assert_eq!(ev.actor, "ingest_api");
+        assert!(
+            ev.new_value.contains("probe.csv"),
+            "new_value should carry source filename, got: {}",
+            ev.new_value
+        );
+    }
+
+    #[tokio::test]
+    async fn optional_journal_field_none_is_valid_back_compat() {
+        // IngestState.journal is Option<Journal>. Back-compat path: when
+        // the field is None, the ingest handler MUST still succeed — the
+        // journal call is fire-and-forget, never load-bearing.
+        //
+        // This test asserts the type shape: Option<Journal> is what we
+        // expect. If a refactor makes it mandatory, this test forces an
+        // explicit re-consideration.
+        let none_journal: Option<Journal> = None;
+        assert!(none_journal.is_none());
+
+        let some_journal: Option<Journal> = Some(test_journal());
+        assert!(some_journal.is_some());
+    }
+
+    #[tokio::test]
+    async fn journal_record_event_fields_match_adr_012_schema() {
+        // ADR-012 locks the event schema: entity_type, entity_id, field,
+        // action, old_value, new_value, actor, source, workspace_id plus
+        // the auto-assigned event_id + timestamp. This test pins the
+        // field names so a future refactor can't silently drop one.
+        let journal = test_journal();
+        let base = Event {
+            event_id: String::new(),
+            timestamp: chrono::Utc::now(),
+            entity_type: "candidate".into(),
+            entity_id: "CAND-0001".into(),
+            field: "phone".into(),
+            action: "update".into(),
+            old_value: "555-0000".into(),
+            new_value: "555-9999".into(),
+            actor: "recruiter".into(),
+            source: "api".into(),
+            workspace_id: "ws-x".into(),
+        };
+        journal.record(base).await.expect("record should accept full-schema event");
+        let h = journal
+            .get_entity_history("CAND-0001")
+            .await
+            .expect("lookup");
+        assert_eq!(h.len(), 1);
+        assert_eq!(h[0].field, "phone");
+        assert_eq!(h[0].old_value, "555-0000");
+        assert_eq!(h[0].new_value, "555-9999");
+        assert_eq!(h[0].workspace_id, "ws-x");
+    }
+}
--- a/crates/ingestd/src/watcher.rs
+++ b/crates/ingestd/src/watcher.rs
@ -72,12 +72,14 @@ async fn process_inbox(

        let path = entry.path();

-        // Skip directories and hidden files
-        if path.is_dir() || path.file_name().map_or(true, |n| n.to_string_lossy().starts_with('.')) {
-            continue;
-        }
-
-        let filename = path.file_name().unwrap().to_string_lossy().to_string();
+        // Skip directories and hidden files. Bind filename once via
+        // let-else so the subsequent use is unwrap-free — previous
+        // version relied on a map_or guard above + an .unwrap() here
+        // being consistent, which is a fragile invariant.
+        if path.is_dir() { continue; }
+        let Some(fn_os) = path.file_name() else { continue; };
+        let filename = fn_os.to_string_lossy().to_string();
+        if filename.starts_with('.') { continue; }
        tracing::info!("watcher: found new file '{}'", filename);

        // Read file
--- a/crates/journald/src/journal.rs
+++ b/crates/journald/src/journal.rs
@ -5,7 +5,7 @@
 /// Storage: events buffer in memory, flush to Parquet periodically.
 /// Query: load Parquet files, filter by entity/field/actor/time.

-use arrow::array::{ArrayRef, RecordBatch, StringArray, UInt64Array};
+use arrow::array::{ArrayRef, RecordBatch, StringArray};
 use arrow::datatypes::{DataType, Field, Schema};
 use chrono::{DateTime, Utc};
 use object_store::ObjectStore;
--- a/crates/lance-bench/src/main.rs
+++ b/crates/lance-bench/src/main.rs
@ -456,6 +456,26 @@ async fn build_lance_vector_index(path: &str, _dims: usize) -> Result<()> {
        .await
        .context("create_index")?;

+    // Also build the scalar btree on doc_id. This bench's
+    // measure_random_access_lance uses take(row_position) which doesn't
+    // need the btree, but the dataset this bench writes is also queried
+    // downstream by /vectors/lance/doc/<name>/<doc_id> (the production
+    // lookup path) — without this index that path falls back to a full
+    // table scan. Cheap to build (~1.2s on 10M rows) and matches the
+    // gateway's lance_migrate handler behavior so bench-produced datasets
+    // are immediately production-shape.
+    use lance_index::scalar::ScalarIndexParams;
+    dataset
+        .create_index(
+            &["doc_id"],
+            IndexType::Scalar,
+            Some("doc_id_btree".into()),
+            &ScalarIndexParams::default(),
+            true,
+        )
+        .await
+        .context("create_index doc_id btree")?;
+
    Ok(())
 }

--- a/crates/queryd/Cargo.toml
+++ b/crates/queryd/Cargo.toml
@ -7,6 +7,7 @@ edition = "2024"
 shared = { path = "../shared" }
 catalogd = { path = "../catalogd" }
 storaged = { path = "../storaged" }
+truth = { path = "../truth" }
 tokio = { workspace = true }
 axum = { workspace = true }
 serde = { workspace = true }
--- a/crates/queryd/src/delta.rs
+++ b/crates/queryd/src/delta.rs
@ -84,14 +84,17 @@ pub async fn compact(
    // Load deltas
    let delta_batches = load_deltas(store, dataset_name).await?;
    let delta_count = delta_batches.len();
+    // Row counts captured before extend; previously base_rows subtracted delta_count (files) from rows — unit mismatch.
+    let base_row_count: usize = base_batches.iter().map(|b| b.num_rows()).sum();
+    let delta_row_count: usize = delta_batches.iter().map(|b| b.num_rows()).sum();

    let has_tombstones = !tombstones.is_empty();
    let nothing_to_do = delta_batches.is_empty() && !has_tombstones;
    if nothing_to_do {
        return Ok(CompactResult {
-            base_rows: base_batches.iter().map(|b| b.num_rows()).sum(),
+            base_rows: base_row_count,
            delta_rows: 0,
-            final_rows: base_batches.iter().map(|b| b.num_rows()).sum(),
+            final_rows: base_row_count,
            deltas_merged: 0,
            tombstones_applied: 0,
            rows_dropped_by_tombstones: 0,
@ -99,7 +102,7 @@ pub async fn compact(
    }

    base_batches.extend(delta_batches);
-    let pre_filter_rows: usize = base_batches.iter().map(|b| b.num_rows()).sum();
+    let pre_filter_rows: usize = base_row_count + delta_row_count;

    // If primary key specified, deduplicate (keep last occurrence)
    let merged_batches = if let Some(_pk) = primary_key_col {
@ -183,8 +186,8 @@ pub async fn compact(
    );

    Ok(CompactResult {
-        base_rows: pre_filter_rows - delta_count,  // rough base-before-deltas
-        delta_rows: delta_count,
+        base_rows: base_row_count,
+        delta_rows: delta_row_count,
        final_rows,
        deltas_merged: delta_count,
        tombstones_applied: tombstones.len(),
--- a/crates/queryd/src/service.rs
+++ b/crates/queryd/src/service.rs
@ -9,7 +9,9 @@ use axum::{
 };
 use serde::{Deserialize, Serialize};

-use crate::cache::CacheStats;
+use std::sync::Arc;
+use truth::{RuleAction, TruthStore};
+
 use crate::context::QueryEngine;
 use crate::delta;
 use crate::paged::ResultStore;
@ -18,12 +20,26 @@ use crate::paged::ResultStore;
 pub struct QueryState {
    pub engine: QueryEngine,
    pub result_store: ResultStore,
+    // Policy gate for incoming SQL. Every /sql and /paged request is
+    // evaluated against this store before hitting DataFusion. Added for
+    // P42-002 ("raw SQL forwarded without schema or policy gate") after
+    // the scrum master's queryd/service.rs finding looped across iters
+    // 3-5 without ever being reachable by the 6-line auto-applier.
+    pub truth: Arc<TruthStore>,
 }

 pub fn router(engine: QueryEngine) -> Router {
+    router_with_truth(engine, Arc::new(truth::sql_query_guard_store()))
+}
+
+/// Test/integration hook: construct the router with a caller-supplied
+/// TruthStore so tests can assert reject/pass behavior deterministically
+/// without depending on the default needle list.
+pub fn router_with_truth(engine: QueryEngine, truth: Arc<TruthStore>) -> Router {
    let state = QueryState {
        engine: engine.clone(),
        result_store: ResultStore::new(100, 50), // 100 rows/page, keep 50 results
+        truth,
    };
    Router::new()
        .route("/health", get(health))
@ -53,6 +69,11 @@ struct QueryResponse {
    columns: Vec<ColumnInfo>,
    rows: serde_json::Value,
    row_count: usize,
+    // Elapsed wall time from handler entry to response. Required for
+    // audit-log parity — gateway's audit row previously stored null here.
+    // Scrum iter 9 finding, populated from std::time::Instant captured
+    // at the top of execute_query / paged_query.
+    latency_ms: u64,
 }

 #[derive(Serialize)]
@ -72,12 +93,41 @@ fn batches_to_json(batches: &[RecordBatch]) -> Result<serde_json::Value, String>
    serde_json::from_slice(&buf).map_err(|e| format!("JSON parse error: {e}"))
 }

+/// Evaluate the request SQL against the configured TruthStore. Returns
+/// the Reject/Block message on the first failing mandatory rule so the
+/// handler can short-circuit. Returns None when all rules pass (or when
+/// the failures' declared action is non-mandatory like Redact/Pass).
+fn sql_policy_check(truth: &TruthStore, sql: &str) -> Option<String> {
+    let ctx = serde_json::json!({ "sql": sql });
+    for outcome in truth.evaluate("sql_query", &ctx) {
+        if !outcome.passed {
+            // FieldEmpty / FieldContainsAny etc. are enforced only when
+            // condition HOLDS (i.e. passed=true). Below means "passed=false",
+            // so the rule condition did not hold — no enforcement.
+            continue;
+        }
+        match &outcome.action {
+            RuleAction::Reject { message } | RuleAction::Block { message } => {
+                return Some(message.clone());
+            }
+            _ => {}
+        }
+    }
+    None
+}
+
 async fn execute_query(
    State(state): State<QueryState>,
    Json(req): Json<QueryRequest>,
 ) -> impl IntoResponse {
+    let started = std::time::Instant::now();
    tracing::info!("executing query: {}", req.sql);

+    if let Some(reason) = sql_policy_check(&state.truth, &req.sql) {
+        tracing::warn!("sql rejected by truth gate: {reason}");
+        return Err((StatusCode::FORBIDDEN, reason));
+    }
+
    match state.engine.query(&req.sql).await {
        Ok(batches) => {
            if batches.is_empty() {
@ -85,6 +135,7 @@ async fn execute_query(
                    columns: vec![],
                    rows: serde_json::Value::Array(vec![]),
                    row_count: 0,
+                    latency_ms: started.elapsed().as_millis() as u64,
                }));
            }

@ -103,6 +154,7 @@ async fn execute_query(
                columns,
                rows,
                row_count,
+                latency_ms: started.elapsed().as_millis() as u64,
            }))
        }
        Err(e) => Err((StatusCode::BAD_REQUEST, e)),
@ -116,6 +168,10 @@ async fn paged_query(
    Json(req): Json<QueryRequest>,
 ) -> impl IntoResponse {
    tracing::info!("paged query: {}", req.sql);
+    if let Some(reason) = sql_policy_check(&state.truth, &req.sql) {
+        tracing::warn!("paged sql rejected by truth gate: {reason}");
+        return Err((StatusCode::FORBIDDEN, reason));
+    }
    match state.result_store.execute_and_store(&state.engine, &req.sql).await {
        Ok(handle) => Ok(Json(handle)),
        Err(e) => Err((StatusCode::BAD_REQUEST, e)),
@ -212,3 +268,65 @@ async fn compact_dataset(
        Err(e) => Err((StatusCode::INTERNAL_SERVER_ERROR, e)),
    }
 }
+
+#[cfg(test)]
+mod sql_policy_tests {
+    use super::*;
+    use truth::sql_query_guard_store;
+
+    // These tests exercise the policy gate without spinning up a DataFusion
+    // engine — they only need `TruthStore`. Purpose: prove the P42-002
+    // enforcement point actually rejects destructive SQL. This is the
+    // regression guard for the queryd/service.rs finding that looped
+    // across scrum iters 3-5.
+
+    #[test]
+    fn blocks_drop_table() {
+        let store = sql_query_guard_store();
+        let reason = sql_policy_check(&store, "DROP TABLE users").expect("must reject");
+        assert!(reason.contains("destructive"), "reason: {reason}");
+    }
+
+    #[test]
+    fn blocks_delete_from() {
+        let store = sql_query_guard_store();
+        assert!(sql_policy_check(&store, "delete from t where 1=1").is_some());
+    }
+
+    #[test]
+    fn blocks_truncate() {
+        let store = sql_query_guard_store();
+        assert!(sql_policy_check(&store, "TRUNCATE workers").is_some());
+    }
+
+    #[test]
+    fn blocks_empty_sql() {
+        let store = sql_query_guard_store();
+        assert!(sql_policy_check(&store, "").is_some());
+    }
+
+    #[test]
+    fn allows_benign_select() {
+        let store = sql_query_guard_store();
+        assert!(sql_policy_check(&store, "SELECT count(*) FROM workers").is_none());
+    }
+
+    #[test]
+    fn allows_select_with_deleted_word_in_column() {
+        // Substring match is narrow ("delete from", not "delete"), so a
+        // column named `deleted_at` doesn't trip the guard. Important
+        // check — false positives on benign queries would make the gate
+        // unusable in practice.
+        let store = sql_query_guard_store();
+        assert!(
+            sql_policy_check(&store, "SELECT deleted_at FROM t").is_none(),
+            "column names containing 'delete' must not be rejected"
+        );
+    }
+
+    #[test]
+    fn case_insensitive_match_catches_mixed_case() {
+        let store = sql_query_guard_store();
+        assert!(sql_policy_check(&store, "Drop Table X").is_some());
+    }
+}
--- a/crates/queryd/src/workspace.rs
+++ b/crates/queryd/src/workspace.rs
@ -2,15 +2,12 @@
 /// Each workspace tracks an agent's activity on a specific contract or search,
 /// with daily/weekly/monthly tiers and instant handoff capability.

-use arrow::array::{ArrayRef, RecordBatch, StringArray, Int64Array};
-use arrow::datatypes::{DataType, Field, Schema};
 use chrono::{DateTime, Utc};
 use serde::{Deserialize, Serialize};
 use std::collections::HashMap;
 use std::sync::Arc;
 use tokio::sync::RwLock;

-use crate::delta;
 use object_store::ObjectStore;

 /// Retention tier for workspace data.
--- a/crates/shared/src/config.rs
+++ b/crates/shared/src/config.rs
@ -62,6 +62,15 @@ pub struct GatewayConfig {
    pub host: String,
    #[serde(default = "default_gateway_port")]
    pub port: u16,
+    /// Coordinator session JSONL output path. One row per
+    /// `/v1/iterate` session, schema=`session.iterate.v1`. Empty =
+    /// disabled. Cross-runtime parity with the Go side's
+    /// `[validatord].session_log_path` (added 2026-05-02). Default
+    /// empty so existing deployments aren't perturbed; production
+    /// sets `/var/lib/lakehouse/gateway/sessions.jsonl`. See
+    /// `golangLAKEHOUSE/docs/SESSION_LOG.md` for query examples.
+    #[serde(default)]
+    pub session_log_path: String,
 }

 #[derive(Debug, Clone, Deserialize)]
@ -149,7 +158,13 @@ fn default_gateway_port() -> u16 { 3100 }
 fn default_storage_root() -> String { "./data".to_string() }
 fn default_profile_root() -> String { "./data/_profiles".to_string() }
 fn default_manifest_prefix() -> String { "_catalog/manifests".to_string() }
-fn default_sidecar_url() -> String { "http://localhost:3200".to_string() }
+// Post-2026-05-02: AiClient talks directly to Ollama; the Python
+// sidecar's hot-path role was retired. The config field name
+// `[sidecar].url` is preserved for migration compatibility (operators
+// with existing TOMLs don't need to rename anything), but the value
+// now points at Ollama. Lab UI / pipeline_lab Python remains as a
+// dev-only tool; not on this URL.
+fn default_sidecar_url() -> String { "http://localhost:11434".to_string() }
 fn default_embed_model() -> String { "nomic-embed-text".to_string() }
 fn default_gen_model() -> String { "qwen2.5".to_string() }
 fn default_rerank_model() -> String { "qwen2.5".to_string() }
@ -184,7 +199,11 @@ impl Config {
 impl Default for Config {
    fn default() -> Self {
        Self {
-            gateway: GatewayConfig { host: default_host(), port: default_gateway_port() },
+            gateway: GatewayConfig {
+                host: default_host(),
+                port: default_gateway_port(),
+                session_log_path: String::new(),
+            },
            storage: StorageConfig {
                root: default_storage_root(),
                profile_root: default_profile_root(),
--- a/crates/shared/src/lib.rs
+++ b/crates/shared/src/lib.rs
@ -4,3 +4,5 @@ pub mod arrow_helpers;
 pub mod config;
 pub mod pii;
 pub mod secrets;
+pub mod model_matrix;
+pub mod profiles;
--- a/crates/shared/src/model_matrix.rs
+++ b/crates/shared/src/model_matrix.rs
@ -0,0 +1,69 @@
+//! Per-model token accounting. Entry point for the ModelMatrix work
+//! the aibridge `context::estimate_tokens` deprecation has been pointing
+//! at. Starts minimal — just `estimate_tokens` — so call sites can
+//! migrate off the deprecated helper. Extend with per-model context
+//! windows, max_tokens defaults, provider hints, etc. as we move the
+//! rest of `aibridge::context::known_windows` over.
+
+/// Namespace for per-model token + context accounting. Methods are
+/// associated functions — no instance required — because the underlying
+/// estimates are deterministic and stateless.
+pub struct ModelMatrix;
+
+impl ModelMatrix {
+    /// Rough token count — char count divided by 4, rounded up. This
+    /// is the same heuristic OpenAI's cookbook uses for English text;
+    /// it's within ±15% of BPE tokenizers for code + prose and doesn't
+    /// require a tokenizer lookup. Good enough for budget math where
+    /// the goal is "don't blow the context window" rather than exact
+    /// billing.
+    ///
+    /// Moved from `aibridge::context::estimate_tokens` (still there with
+    /// a `#[deprecated]` pointer — callers should migrate here). Empty
+    /// string → 0; one char → 1 (ceiling of 1/4 = 1).
+    pub fn estimate_tokens(text: &str) -> usize {
+        (text.chars().count() + 3) / 4
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::ModelMatrix;
+
+    #[test]
+    fn empty_string_is_zero_tokens() {
+        assert_eq!(ModelMatrix::estimate_tokens(""), 0);
+    }
+
+    #[test]
+    fn three_chars_is_one_token() {
+        // 3 → ceil(3/4) = 1. Matches the deprecated helper's behavior
+        // so the migration is a drop-in replacement.
+        assert_eq!(ModelMatrix::estimate_tokens("abc"), 1);
+    }
+
+    #[test]
+    fn four_chars_is_one_token() {
+        assert_eq!(ModelMatrix::estimate_tokens("abcd"), 1);
+    }
+
+    #[test]
+    fn five_chars_is_two_tokens() {
+        assert_eq!(ModelMatrix::estimate_tokens("abcde"), 2);
+    }
+
+    #[test]
+    fn counts_chars_not_bytes() {
+        // Multi-byte UTF-8 chars count as 1 char each — important for
+        // prompts with emoji or non-ASCII text. "héllo" is 5 chars
+        // (5 unicode scalars) → ceil(5/4) = 2 tokens, same as "hello".
+        assert_eq!(ModelMatrix::estimate_tokens("héllo"), 2);
+        assert_eq!(ModelMatrix::estimate_tokens("📚📚📚📚"), 1);  // 4 chars
+    }
+
+    #[test]
+    fn large_text_scales_linearly() {
+        assert_eq!(ModelMatrix::estimate_tokens(&"x".repeat(400)), 100);
+        assert_eq!(ModelMatrix::estimate_tokens(&"x".repeat(401)), 101);
+    }
+}
--- a/crates/shared/src/profiles/execution.rs
+++ b/crates/shared/src/profiles/execution.rs
@ -0,0 +1,14 @@
+//! ExecutionProfile — the Phase 41 rename of Phase 17's ModelProfile.
+//!
+//! Carries what's needed to RUN inference: model tag, dataset bindings,
+//! HNSW config, embed model, bucket binding. Today this is a type
+//! alias over `crate::types::ModelProfile` — the PRD's
+//! "Backward compat: ModelProfile still loads, aliased to
+//! ExecutionProfile" line, honored literally.
+//!
+//! When the migration off the old name finishes, this file can either
+//! absorb the full struct definition or continue as an alias. Callers
+//! should reference `ExecutionProfile` going forward; `ModelProfile`
+//! stays exported from `types` for on-disk schema compat.
+
+pub use crate::types::ModelProfile as ExecutionProfile;
--- a/crates/shared/src/profiles/memory.rs
+++ b/crates/shared/src/profiles/memory.rs
@ -0,0 +1,38 @@
+//! MemoryProfile — how the agent's execution memory is kept.
+//!
+//! Phase 41 decomposition: the Phase 19 playbook_memory + Phase 26
+//! successful_playbooks + Phase 45 doc_refs all need per-profile
+//! tuning. Rather than bolt those onto ExecutionProfile, they live
+//! here so a "thin" execution profile can reuse a "fat" memory
+//! profile and vice versa.
+
+use serde::{Deserialize, Serialize};
+
+#[derive(Clone, Debug, Serialize, Deserialize)]
+pub struct MemoryProfile {
+    pub id: String,
+    #[serde(default)]
+    pub description: String,
+    /// Phase 19: ceiling for playbook_memory boost on retrieval. 0
+    /// disables the boost entirely.
+    #[serde(default = "default_boost_ceiling")]
+    pub playbook_boost_ceiling: f32,
+    /// Phase 26: max history entries retained before rotation.
+    #[serde(default = "default_history_cap")]
+    pub history_cap: usize,
+    /// Phase 45: stale threshold for doc_refs before drift check
+    /// fires (hours).
+    #[serde(default = "default_stale_hours")]
+    pub doc_stale_hours: u32,
+    /// Phase 28: auto-retire playbooks that fail 3+ consecutive runs.
+    #[serde(default = "default_true")]
+    pub auto_retire_on_failure: bool,
+    pub created_at: chrono::DateTime<chrono::Utc>,
+    #[serde(default)]
+    pub created_by: String,
+}
+
+fn default_boost_ceiling() -> f32 { 0.35 }
+fn default_history_cap() -> usize { 1000 }
+fn default_stale_hours() -> u32 { 168 } // one week
+fn default_true() -> bool { true }
--- a/crates/shared/src/profiles/mod.rs
+++ b/crates/shared/src/profiles/mod.rs
@ -0,0 +1,28 @@
+//! Phase 41 profile types.
+//!
+//! The existing `ModelProfile` (Phase 17) is aliased as
+//! `ExecutionProfile` here — it continues to carry the model +
+//! bindings + HNSW config needed to run inference. Three new profile
+//! types land alongside: `RetrievalProfile`, `MemoryProfile`,
+//! `ObserverProfile` — each owns a distinct slice of what used to be
+//! bundled.
+//!
+//! Backward-compat rule (PRD Phase 41): existing `ModelProfile` on
+//! disk continues to deserialize unchanged. New fields on the new
+//! profile types are `#[serde(default)]` so old payloads load with
+//! empty defaults.
+//!
+//! These are the canonical shapes — downstream code converts via
+//! `From<ModelProfile> for ExecutionProfile` (they're the same struct
+//! today, just named differently) and constructs the other three
+//! as needed.
+
+pub mod execution;
+pub mod retrieval;
+pub mod memory;
+pub mod observer;
+
+pub use execution::ExecutionProfile;
+pub use memory::MemoryProfile;
+pub use observer::ObserverProfile;
+pub use retrieval::RetrievalProfile;
--- a/crates/shared/src/profiles/observer.rs
+++ b/crates/shared/src/profiles/observer.rs
@ -0,0 +1,38 @@
+//! ObserverProfile — how loudly the observer logs this workload.
+//!
+//! Phase 41 decomposition: the observer's alert thresholds, escalation
+//! cadence, and log retention need per-workload tuning. Hot-path
+//! staffing workflows want aggressive alerting; batch backfills want
+//! quieter. This profile is read by mcp-server/observer.ts at
+//! activation-time.
+
+use serde::{Deserialize, Serialize};
+
+#[derive(Clone, Debug, Serialize, Deserialize)]
+pub struct ObserverProfile {
+    pub id: String,
+    #[serde(default)]
+    pub description: String,
+    /// How many consecutive failures trigger a cluster escalation to
+    /// LLM Team `/v1/chat` (qwen3-coder:480b).
+    #[serde(default = "default_failure_cluster_size")]
+    pub failure_cluster_size: u32,
+    /// Minimum seconds between alert emails for the same sig_hash.
+    /// Prevents alert storms during a regression.
+    #[serde(default = "default_alert_cooldown")]
+    pub alert_cooldown_secs: u32,
+    /// Observer ring buffer size. Older events fall off when full.
+    #[serde(default = "default_ring_size")]
+    pub ring_size: usize,
+    /// Whether to forward events to external Langfuse
+    /// (/v1/langfuse_trace). Off by default.
+    #[serde(default)]
+    pub forward_to_langfuse: bool,
+    pub created_at: chrono::DateTime<chrono::Utc>,
+    #[serde(default)]
+    pub created_by: String,
+}
+
+fn default_failure_cluster_size() -> u32 { 3 }
+fn default_alert_cooldown() -> u32 { 300 } // 5 minutes
+fn default_ring_size() -> usize { 2000 }
--- a/crates/shared/src/profiles/retrieval.rs
+++ b/crates/shared/src/profiles/retrieval.rs
@ -0,0 +1,52 @@
+//! RetrievalProfile — what + how the agent reaches into memory.
+//!
+//! Phase 41 decomposition: the old ModelProfile bundled "what dataset
+//! can I read" (bound_datasets) AND "how do I rank results"
+//! (hnsw_config) with the model tag. Retrieval concerns split out here
+//! so a profile can swap its retrieval strategy without re-activating
+//! the model.
+//!
+//! Fields chosen for what's actually varied per-workload today:
+//!   - `top_k` / `rerank_top_k` — how many hits to fetch + rerank
+//!   - `freshness_cutoff_days` — Phase 45 doc-drift uses this
+//!   - `boost_playbook_memory` — Phase 19 meta-index feedback
+//!   - `enforce_sensitivity_gates` — Phase 13 access-control integration
+//!
+//! All fields are `#[serde(default)]` so loading a profile file that
+//! predates Phase 41 works without migration.
+
+use serde::{Deserialize, Serialize};
+
+#[derive(Clone, Debug, Serialize, Deserialize)]
+pub struct RetrievalProfile {
+    /// Unique id — slug form, separate namespace from ExecutionProfile.
+    pub id: String,
+    /// Free-text operator description.
+    #[serde(default)]
+    pub description: String,
+    /// Default top-K for /vectors/search + /vectors/hybrid.
+    #[serde(default = "default_top_k")]
+    pub top_k: u32,
+    /// How many of the top-K to pass through the reranker. 0 disables
+    /// reranking for this profile.
+    #[serde(default = "default_rerank_top_k")]
+    pub rerank_top_k: u32,
+    /// Don't consider playbooks / docs older than this (days). 0 or
+    /// absent = no freshness filter.
+    #[serde(default)]
+    pub freshness_cutoff_days: u32,
+    /// Phase 19: boost workers/results by playbook_memory similarity.
+    #[serde(default)]
+    pub boost_playbook_memory: bool,
+    /// Phase 13: apply access-control masking on sensitive columns.
+    /// Default on — safety-first.
+    #[serde(default = "default_true")]
+    pub enforce_sensitivity_gates: bool,
+    pub created_at: chrono::DateTime<chrono::Utc>,
+    #[serde(default)]
+    pub created_by: String,
+}
+
+fn default_top_k() -> u32 { 10 }
+fn default_rerank_top_k() -> u32 { 5 }
+fn default_true() -> bool { true }
--- a/crates/shared/src/types.rs
+++ b/crates/shared/src/types.rs
@ -269,6 +269,20 @@ pub struct ModelProfile {
    /// / 5M+ vector workloads.
    #[serde(default)]
    pub vector_backend: VectorBackend,
+    /// Phase 41: Profile type for routing + activation behavior.
+    /// Default Execution for backward compatibility.
+    #[serde(default)]
+    pub profile_type: ProfileType,
+}
+
+#[derive(Debug, Clone, Copy, PartialEq, Serialize, Deserialize, Default)]
+#[serde(rename_all = "snake_case")]
+pub enum ProfileType {
+    #[default]
+    Execution,
+    Retrieval,
+    Memory,
+    Observer,
 }

 fn default_embed_model() -> String { "nomic-embed-text".to_string() }
--- a/crates/truth/Cargo.toml
+++ b/crates/truth/Cargo.toml
@ -0,0 +1,11 @@
+[package]
+name = "truth"
+version = "0.1.0"
+edition = "2024"
+
+[dependencies]
+serde = { workspace = true }
+serde_json = { workspace = true }
+tokio = { workspace = true }
+tracing = { workspace = true }
+toml = { workspace = true }
--- a/crates/truth/src/devops.rs
+++ b/crates/truth/src/devops.rs
@ -0,0 +1,49 @@
+//! DevOps task-class rules — scaffold for the long-horizon phase.
+//!
+//! Phase 42 PRD: "Terraform/Ansible rule shapes are scaffolded but
+//! unpopulated until the long-horizon phase. Keeps the dispatcher
+//! signature stable so no refactor needed later."
+//!
+//! This module is intentionally minimal. It registers no rules yet.
+//! The `devops_rules` function exists so callers can compose it onto
+//! a store (e.g. `devops_rules(staffing_rules(TruthStore::new()))`)
+//! without branching on whether the DevOps phase has landed.
+//!
+//! When the long-horizon phase fleshes out the DevOps rule set, the
+//! implementations drop in here — same `RuleCondition` primitives, same
+//! `TruthStore::evaluate` contract, zero upstream refactor.
+
+use crate::TruthStore;
+
+/// Register DevOps rules on the store. Currently a no-op scaffold —
+/// no rules are added. Safe to compose with other rule-set functions.
+///
+/// Planned task classes (not yet populated):
+///   - `devops.terraform_plan` — `terraform validate` + pre-plan
+///     sanity checks (no destroys without confirm flag, etc.)
+///   - `devops.ansible_playbook` — `ansible-lint` + privileged-task
+///     gates (no `become: true` on untagged hosts)
+///   - `devops.shell_command` — whitelist / blocklist for
+///     AI-generated shell invocations (covers what Phase 42
+///     queryd SQL gate does for SQL — same idea, shell surface)
+pub fn devops_rules(store: TruthStore) -> TruthStore {
+    // Intentionally empty. See module-level doc for the phased rollout.
+    store
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn devops_rules_is_a_noop_for_now() {
+        // Scaffold guarantee: composing devops_rules onto an empty
+        // store must not add any rules. Future long-horizon work will
+        // populate this and the assertion shifts to counting the
+        // expected additions.
+        let store = devops_rules(TruthStore::new());
+        assert_eq!(store.get_rules("devops.terraform_plan").len(), 0);
+        assert_eq!(store.get_rules("devops.ansible_playbook").len(), 0);
+        assert_eq!(store.get_rules("devops.shell_command").len(), 0);
+    }
+}
--- a/crates/truth/src/lib.rs
+++ b/crates/truth/src/lib.rs
@ -0,0 +1,610 @@
+use serde::{Deserialize, Serialize};
+use std::collections::HashMap;
+
+pub mod staffing;
+pub mod devops;
+pub mod loader;
+
+#[derive(Clone, Debug, Serialize, Deserialize)]
+pub struct TruthRule {
+    pub id: String,
+    pub task_class: String,
+    pub description: String,
+    pub condition: RuleCondition,
+    pub action: RuleAction,
+}
+
+#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
+#[serde(tag = "type")]
+pub enum RuleCondition {
+    Always,
+    FieldEquals { field: String, value: String },
+    FieldMismatch { field: String, value: String },
+    FieldEmpty { field: String },
+    FieldGreater { field: String, threshold: i64 },
+    // Case-insensitive substring scan — true if the field value contains
+    // ANY of `needles`. Added for SQL/command guards where rules of the
+    // form "sql must not contain DROP/DELETE/TRUNCATE" need to express
+    // enforcement as a passing precondition being absent.
+    FieldContainsAny { field: String, needles: Vec<String> },
+}
+
+#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
+#[serde(tag = "type")]
+pub enum RuleAction {
+    Pass,
+    Reject { message: String },
+    Redact { fields: Vec<String> },
+    Block { message: String },
+}
+
+#[derive(Default)]
+pub struct TruthStore {
+    rules: HashMap<String, Vec<TruthRule>>,
+}
+
+impl TruthStore {
+    pub fn new() -> Self {
+        Self::default()
+    }
+
+    pub fn add_rule(&mut self, rule: TruthRule) {
+        self.rules
+            .entry(rule.task_class.clone())
+            .or_default()
+            .push(rule);
+    }
+
+    /// All rule IDs across every task class. Used by the file loader
+    /// to detect duplicate-ID collisions before registering new rules.
+    pub fn all_rule_ids(&self) -> std::collections::HashSet<String> {
+        self.rules
+            .values()
+            .flat_map(|v| v.iter().map(|r| r.id.clone()))
+            .collect()
+    }
+
+    pub fn get_rules(&self, task_class: &str) -> Vec<&TruthRule> {
+        self.rules
+            .get(task_class)
+            .map(|v| v.iter().collect())
+            .unwrap_or_default()
+    }
+
+    /// Legacy API: returns the list of actions registered for a task class
+    /// without evaluating conditions. Retained for backward compatibility
+    /// with callers that only want the action catalog. New callers should
+    /// prefer `evaluate()`, which actually walks `RuleCondition` against
+    /// a context and reports per-rule pass/fail.
+    pub fn check(&self, task_class: &str) -> Vec<RuleAction> {
+        let rules = self.get_rules(task_class);
+        rules
+            .into_iter()
+            .map(|r| r.action.clone())
+            .collect()
+    }
+
+    /// Evaluate every rule registered for `task_class` against `ctx`,
+    /// returning one `RuleOutcome` per rule. `passed = true` means the
+    /// rule's `condition` held; the rule's action is still attached so
+    /// callers can distinguish "passed and therefore no-op" (RuleAction::Pass)
+    /// from "passed and apply Redact". `passed = false` means the condition
+    /// failed — callers should treat the attached action as the enforcement
+    /// response (Reject/Block).
+    ///
+    /// Fixed P42-001 (2026-04-23): previously `check()` returned all actions
+    /// unconditionally — the `RuleCondition` field was ignored. Now every
+    /// rule is actually walked against the provided context.
+    pub fn evaluate(&self, task_class: &str, ctx: &serde_json::Value) -> Vec<RuleOutcome> {
+        self.get_rules(task_class)
+            .into_iter()
+            .map(|r| RuleOutcome {
+                rule_id: r.id.clone(),
+                passed: evaluate_condition(&r.condition, ctx),
+                action: r.action.clone(),
+            })
+            .collect()
+    }
+}
+
+/// Result of evaluating one rule against a context. `passed` reports
+/// whether the condition held; `action` is the rule's declared action
+/// regardless (callers decide how to apply it based on `passed`).
+#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
+pub struct RuleOutcome {
+    pub rule_id: String,
+    pub passed: bool,
+    pub action: RuleAction,
+}
+
+fn evaluate_condition(cond: &RuleCondition, ctx: &serde_json::Value) -> bool {
+    match cond {
+        RuleCondition::Always => true,
+        RuleCondition::FieldEquals { field, value } => {
+            field_as_string(ctx, field)
+                .map(|s| s == *value)
+                .unwrap_or(false)
+        }
+        RuleCondition::FieldMismatch { field, value } => {
+            field_as_string(ctx, field)
+                .map(|s| s != *value)
+                .unwrap_or(false)
+        }
+        RuleCondition::FieldEmpty { field } => {
+            match lookup(ctx, field) {
+                None => true,
+                Some(v) => v.is_null() || v.as_str().map(|s| s.is_empty()).unwrap_or(false),
+            }
+        }
+        RuleCondition::FieldGreater { field, threshold } => {
+            lookup(ctx, field)
+                .and_then(|v| v.as_i64().or_else(|| v.as_f64().map(|f| f as i64)))
+                .map(|n| n > *threshold)
+                .unwrap_or(false)
+        }
+        RuleCondition::FieldContainsAny { field, needles } => {
+            match field_as_string(ctx, field) {
+                None => false,
+                Some(s) => {
+                    let haystack = s.to_ascii_lowercase();
+                    needles.iter().any(|n| haystack.contains(&n.to_ascii_lowercase()))
+                }
+            }
+        }
+    }
+}
+
+/// Walk a dot-separated path through a serde_json::Value. `"worker.status"`
+/// → `ctx["worker"]["status"]`. Returns None if any segment is missing or
+/// a non-object is encountered mid-path.
+fn lookup<'a>(ctx: &'a serde_json::Value, path: &str) -> Option<&'a serde_json::Value> {
+    let mut cur = ctx;
+    for seg in path.split('.') {
+        cur = cur.get(seg)?;
+    }
+    Some(cur)
+}
+
+fn field_as_string(ctx: &serde_json::Value, path: &str) -> Option<String> {
+    lookup(ctx, path).and_then(|v| match v {
+        serde_json::Value::String(s) => Some(s.clone()),
+        serde_json::Value::Bool(b) => Some(b.to_string()),
+        serde_json::Value::Number(n) => Some(n.to_string()),
+        _ => None,
+    })
+}
+
+/// Minimal SQL guard — rejects destructive verbs (DROP/TRUNCATE/DELETE).
+/// queryd/src/service.rs loads this into its `QueryState` and evaluates
+/// every `/sql` request against it before hitting the DataFusion engine.
+/// This is the P42-002 enforcement point flagged across scrum iters 3-5
+/// ("raw SQL forwarded without schema or policy gate").
+///
+/// Intentionally narrow: it's a safety net, not a full SQL parser. If
+/// callers need richer AST-aware enforcement they should extend this with
+/// structured rules rather than new needles.
+pub fn sql_query_guard_store() -> TruthStore {
+    let mut store = TruthStore::new();
+    store.add_rule(TruthRule {
+        id: "no-destructive-sql".to_string(),
+        task_class: "sql_query".to_string(),
+        description: "SQL must not contain destructive verbs".to_string(),
+        condition: RuleCondition::FieldContainsAny {
+            field: "sql".to_string(),
+            needles: vec![
+                "drop table".to_string(),
+                "drop schema".to_string(),
+                "drop database".to_string(),
+                "truncate".to_string(),
+                "delete from".to_string(),
+            ],
+        },
+        action: RuleAction::Reject {
+            message: "destructive SQL rejected by truth.sql_query_guard".to_string(),
+        },
+    });
+    store.add_rule(TruthRule {
+        id: "sql-not-empty".to_string(),
+        task_class: "sql_query".to_string(),
+        description: "SQL must not be empty".to_string(),
+        condition: RuleCondition::FieldEmpty {
+            field: "sql".to_string(),
+        },
+        action: RuleAction::Reject {
+            message: "empty SQL rejected".to_string(),
+        },
+    });
+    store
+}
+
+/// Phase 42 default store: staffing rules + DevOps scaffold composed
+/// onto an empty TruthStore. Per the PRD: "Staffing rules ship first;
+/// Terraform/Ansible rule shapes are scaffolded but unpopulated until
+/// the long-horizon phase." The composition order is irrelevant here
+/// (DevOps is empty) but preserved so the shape matches the PRD's
+/// expected "compose on top" pattern.
+///
+/// Moved out of inline in-function rule registration (2026-04-24) to
+/// land the Phase 42 module split the PRD called for: `staffing.rs` +
+/// `devops.rs` each owns their task-class rule sets. Behavior unchanged
+/// for existing callers.
+pub fn default_truth_store() -> TruthStore {
+    devops::devops_rules(staffing::staffing_rules(TruthStore::new()))
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn truth_store_new_is_empty() {
+        let store = TruthStore::new();
+        assert!(store.rules.is_empty());
+    }
+
+    #[test]
+    fn add_rule_inserts_into_correct_task_class() {
+        let mut store = TruthStore::new();
+        store.add_rule(TruthRule {
+            id: "test-rule".to_string(),
+            task_class: "test.task".to_string(),
+            description: "Test rule".to_string(),
+            condition: RuleCondition::Always,
+            action: RuleAction::Pass,
+        });
+        let rules = store.get_rules("test.task");
+        assert_eq!(rules.len(), 1);
+        assert_eq!(rules[0].id, "test-rule");
+    }
+
+    #[test]
+    fn get_rules_returns_empty_for_unknown_class() {
+        let store = TruthStore::new();
+        let rules = store.get_rules("unknown.class");
+        assert!(rules.is_empty());
+    }
+
+    #[test]
+    fn check_returns_actions_for_task_class() {
+        let mut store = TruthStore::new();
+        store.add_rule(TruthRule {
+            id: "a1".to_string(),
+            task_class: "test".to_string(),
+            description: "A1".to_string(),
+            condition: RuleCondition::Always,
+            action: RuleAction::Pass,
+        });
+        store.add_rule(TruthRule {
+            id: "a2".to_string(),
+            task_class: "test".to_string(),
+            description: "A2".to_string(),
+            condition: RuleCondition::Always,
+            action: RuleAction::Reject {
+                message: "test reject".to_string(),
+            },
+        });
+        let actions = store.check("test");
+        assert_eq!(actions.len(), 2);
+    }
+
+    #[test]
+    fn rule_condition_serialize_always() {
+        let cond = RuleCondition::Always;
+        let json = serde_json::to_string(&cond).unwrap();
+        assert!(json.contains(r#""type":"Always"#));
+    }
+
+    #[test]
+    fn rule_condition_serialize_field_equals() {
+        let cond = RuleCondition::FieldEquals {
+            field: "foo".to_string(),
+            value: "bar".to_string(),
+        };
+        let json = serde_json::to_string(&cond).unwrap();
+        assert!(json.contains(r#""type":"FieldEquals""#));
+        assert!(json.contains(r#""field":"foo""#));
+        assert!(json.contains(r#""value":"bar""#));
+    }
+
+    #[test]
+    fn rule_action_serialize_redact() {
+        let action = RuleAction::Redact {
+            fields: vec!["ssn".to_string()],
+        };
+        let json = serde_json::to_string(&action).unwrap();
+        assert!(json.contains(r#""type":"Redact""#));
+        assert!(json.contains("ssn"));
+    }
+
+    #[test]
+    fn rule_action_serialize_reject() {
+        let action = RuleAction::Reject {
+            message: "test".to_string(),
+        };
+        let json = serde_json::to_string(&action).unwrap();
+        assert!(json.contains(r#""type":"Reject""#));
+    }
+
+    #[test]
+    fn default_truth_store_has_staffing_rules() {
+        let store = default_truth_store();
+        let fill_rules = store.get_rules("staffing.fill");
+        assert!(!fill_rules.is_empty());
+        let any_rules = store.get_rules("staffing.any");
+        assert!(!any_rules.is_empty());
+    }
+
+    #[test]
+    fn multiple_rules_same_task_class() {
+        let mut store = TruthStore::new();
+        for i in 0..5 {
+            store.add_rule(TruthRule {
+                id: format!("rule-{}", i),
+                task_class: "test".to_string(),
+                description: format!("Rule {}", i),
+                condition: RuleCondition::Always,
+                action: RuleAction::Pass,
+            });
+        }
+        let rules = store.get_rules("test");
+        assert_eq!(rules.len(), 5);
+    }
+
+    #[test]
+    fn truth_rule_clone_preserves_data() {
+        let rule = TruthRule {
+            id: "clone-test".to_string(),
+            task_class: "clone.task".to_string(),
+            description: "Clone test".to_string(),
+            condition: RuleCondition::FieldEquals {
+                field: "x".to_string(),
+                value: "y".to_string(),
+            },
+            action: RuleAction::Block {
+                message: "blocked".to_string(),
+            },
+        };
+        let cloned = rule.clone();
+        assert_eq!(cloned.id, rule.id);
+        assert_eq!(cloned.condition, rule.condition);
+        assert_eq!(cloned.action, rule.action);
+    }
+
+    #[test]
+    fn field_greater_condition_parse() {
+        let json = r#"{"type":"FieldGreater","field":"count","threshold":10}"#;
+        let cond: RuleCondition = serde_json::from_str(json).unwrap();
+        match cond {
+            RuleCondition::FieldGreater { field, threshold } => {
+                assert_eq!(field, "count");
+                assert_eq!(threshold, 10);
+            }
+            _ => panic!("Expected FieldGreater"),
+        }
+    }
+
+    #[test]
+    fn block_action_blocks_with_message() {
+        let action = RuleAction::Block {
+            message: "Rate limited".to_string(),
+        };
+        let json = serde_json::to_string(&action).unwrap();
+        assert!(json.contains("Rate limited"));
+    }
+
+    #[test]
+    fn empty_store_check_returns_empty() {
+        let store = TruthStore::new();
+        let actions = store.check("empty.class");
+        assert!(actions.is_empty());
+    }
+
+    // ── P42-001 evaluate() tests — actually walk RuleCondition ──
+
+    fn fill_store() -> TruthStore {
+        let mut s = TruthStore::new();
+        s.add_rule(TruthRule {
+            id: "active".into(),
+            task_class: "t".into(),
+            description: "must be active".into(),
+            condition: RuleCondition::FieldEquals {
+                field: "worker.status".into(),
+                value: "active".into(),
+            },
+            action: RuleAction::Reject {
+                message: "worker not active".into(),
+            },
+        });
+        s.add_rule(TruthRule {
+            id: "deadline".into(),
+            task_class: "t".into(),
+            description: "deadline required".into(),
+            condition: RuleCondition::FieldEmpty {
+                field: "contract.deadline".into(),
+            },
+            action: RuleAction::Reject {
+                message: "missing deadline".into(),
+            },
+        });
+        s.add_rule(TruthRule {
+            id: "budget".into(),
+            task_class: "t".into(),
+            description: "budget positive".into(),
+            condition: RuleCondition::FieldGreater {
+                field: "contract.budget".into(),
+                threshold: 0,
+            },
+            action: RuleAction::Block {
+                message: "budget must be positive".into(),
+            },
+        });
+        s
+    }
+
+    #[test]
+    fn evaluate_field_equals_pass_on_match() {
+        let s = fill_store();
+        let ctx = serde_json::json!({"worker": {"status": "active"}});
+        let o = s.evaluate("t", &ctx);
+        let active = o.iter().find(|r| r.rule_id == "active").unwrap();
+        assert!(active.passed, "active condition should hold");
+    }
+
+    #[test]
+    fn evaluate_field_equals_fail_on_mismatch() {
+        let s = fill_store();
+        let ctx = serde_json::json!({"worker": {"status": "terminated"}});
+        let o = s.evaluate("t", &ctx);
+        let active = o.iter().find(|r| r.rule_id == "active").unwrap();
+        assert!(!active.passed, "terminated should fail active condition");
+    }
+
+    #[test]
+    fn evaluate_field_equals_fail_on_missing() {
+        let s = fill_store();
+        let ctx = serde_json::json!({});
+        let o = s.evaluate("t", &ctx);
+        let active = o.iter().find(|r| r.rule_id == "active").unwrap();
+        assert!(!active.passed, "missing worker.status should fail");
+    }
+
+    #[test]
+    fn evaluate_field_empty_pass_when_absent() {
+        let s = fill_store();
+        // FieldEmpty passes when the field is missing/null/empty string.
+        // Deadline rule says "field empty means action fires" — so passed=true
+        // here means the rule's condition held (deadline IS empty).
+        let ctx = serde_json::json!({});
+        let o = s.evaluate("t", &ctx);
+        let deadline = o.iter().find(|r| r.rule_id == "deadline").unwrap();
+        assert!(deadline.passed);
+    }
+
+    #[test]
+    fn evaluate_field_empty_fail_when_present() {
+        let s = fill_store();
+        let ctx = serde_json::json!({"contract": {"deadline": "2026-05-01"}});
+        let o = s.evaluate("t", &ctx);
+        let deadline = o.iter().find(|r| r.rule_id == "deadline").unwrap();
+        assert!(!deadline.passed, "non-empty deadline should fail FieldEmpty check");
+    }
+
+    #[test]
+    fn evaluate_field_greater_pass_and_fail() {
+        let s = fill_store();
+        let ctx_ok = serde_json::json!({"contract": {"budget": 100}});
+        let ctx_bad = serde_json::json!({"contract": {"budget": 0}});
+        let ok = s.evaluate("t", &ctx_ok);
+        let bad = s.evaluate("t", &ctx_bad);
+        assert!(ok.iter().find(|r| r.rule_id == "budget").unwrap().passed);
+        assert!(!bad.iter().find(|r| r.rule_id == "budget").unwrap().passed);
+    }
+
+    #[test]
+    fn evaluate_always_condition_passes_unconditionally() {
+        let mut s = TruthStore::new();
+        s.add_rule(TruthRule {
+            id: "always".into(),
+            task_class: "x".into(),
+            description: "".into(),
+            condition: RuleCondition::Always,
+            action: RuleAction::Pass,
+        });
+        let o = s.evaluate("x", &serde_json::json!(null));
+        assert!(o[0].passed);
+    }
+
+    #[test]
+    fn evaluate_preserves_action_regardless_of_outcome() {
+        let s = fill_store();
+        let ctx = serde_json::json!({"worker": {"status": "active"}});
+        let o = s.evaluate("t", &ctx);
+        let active = o.iter().find(|r| r.rule_id == "active").unwrap();
+        // Action is attached whether the rule passed or not — the consumer
+        // decides how to use it.
+        assert_eq!(
+            active.action,
+            RuleAction::Reject {
+                message: "worker not active".into()
+            }
+        );
+    }
+
+    #[test]
+    fn evaluate_on_unknown_task_class_returns_empty() {
+        let s = fill_store();
+        let o = s.evaluate("nonexistent", &serde_json::json!({}));
+        assert!(o.is_empty());
+    }
+
+    #[test]
+    fn check_still_returns_actions_unconditionally_for_back_compat() {
+        // Legacy API should still behave the same — no condition walking.
+        let s = fill_store();
+        let actions = s.check("t");
+        assert_eq!(actions.len(), 3, "check returns one action per rule regardless of condition");
+    }
+
+    fn sql_guard_store() -> TruthStore {
+        let mut s = TruthStore::new();
+        s.add_rule(TruthRule {
+            id: "no-destructive".into(),
+            task_class: "sql_query".into(),
+            description: "SQL must not contain destructive verbs".into(),
+            condition: RuleCondition::FieldContainsAny {
+                field: "sql".into(),
+                needles: vec![
+                    "drop table".into(),
+                    "drop schema".into(),
+                    "truncate".into(),
+                    "delete from".into(),
+                ],
+            },
+            action: RuleAction::Reject {
+                message: "destructive SQL rejected".into(),
+            },
+        });
+        s
+    }
+
+    #[test]
+    fn field_contains_any_matches_case_insensitively() {
+        let s = sql_guard_store();
+        let ctx = serde_json::json!({"sql": "SELECT * FROM t; DROP TABLE users;"});
+        let o = s.evaluate("sql_query", &ctx);
+        assert!(o[0].passed, "condition holds when needle present (case-insensitive)");
+    }
+
+    #[test]
+    fn field_contains_any_is_false_when_no_needle_matches() {
+        let s = sql_guard_store();
+        let ctx = serde_json::json!({"sql": "SELECT count(*) FROM workers"});
+        let o = s.evaluate("sql_query", &ctx);
+        assert!(!o[0].passed, "benign SELECT should not match destructive needles");
+    }
+
+    #[test]
+    fn field_contains_any_false_when_field_missing() {
+        let s = sql_guard_store();
+        let ctx = serde_json::json!({});
+        let o = s.evaluate("sql_query", &ctx);
+        assert!(!o[0].passed, "missing field → condition cannot hold");
+    }
+
+    #[test]
+    fn field_contains_any_empty_needles_list_never_matches() {
+        let mut s = TruthStore::new();
+        s.add_rule(TruthRule {
+            id: "empty".into(),
+            task_class: "x".into(),
+            description: "".into(),
+            condition: RuleCondition::FieldContainsAny {
+                field: "sql".into(),
+                needles: vec![],
+            },
+            action: RuleAction::Pass,
+        });
+        let o = s.evaluate("x", &serde_json::json!({"sql": "anything"}));
+        assert!(!o[0].passed, "no needles → any::<bool> is false");
+    }
+}
--- a/crates/truth/src/loader.rs
+++ b/crates/truth/src/loader.rs
@ -0,0 +1,187 @@
+//! File-backed TruthRule loader (Phase 42 PRD).
+//!
+//! PRD: "truth/ dir at repo root — rule files, versioned in git."
+//! This module walks a directory, parses every `*.toml` file it finds,
+//! and registers the rules into a caller-supplied store. Rule IDs must
+//! be unique across the combined set — duplicate-ID collisions are
+//! load-time errors.
+//!
+//! The TOML format matches the shape at `truth/README.md`. The same
+//! `RuleCondition` + `RuleAction` enums used by the in-code registrars
+//! deserialize directly from `condition = { type = "FieldEquals", ... }`
+//! thanks to `#[serde(tag = "type")]`.
+
+use std::fs;
+use std::path::Path;
+use serde::Deserialize;
+
+use crate::{TruthRule, TruthStore};
+
+/// Deserialization wrapper — a TOML file is a list of [[rule]] blocks.
+#[derive(Deserialize)]
+struct RuleFile {
+    #[serde(default)]
+    rule: Vec<TruthRule>,
+}
+
+/// Load every `*.toml` file in `dir` and add its rules to `store`.
+/// Returns the number of rules loaded across all files.
+///
+/// Errors:
+///   - directory doesn't exist or can't be read
+///   - any `.toml` file fails to parse
+///   - any rule ID collides with an existing rule (same ID already
+///     registered in the store)
+///
+/// Non-goals: recursive walk (flat dir only), hot reload (one-shot load).
+pub fn load_from_dir(store: &mut TruthStore, dir: impl AsRef<Path>) -> Result<usize, String> {
+    let dir = dir.as_ref();
+    let entries = fs::read_dir(dir)
+        .map_err(|e| format!("read_dir {}: {e}", dir.display()))?;
+
+    let mut loaded_ids = store.all_rule_ids();
+    let mut count = 0usize;
+
+    let mut paths: Vec<_> = entries
+        .filter_map(|e| e.ok())
+        .map(|e| e.path())
+        .filter(|p| p.extension().and_then(|s| s.to_str()) == Some("toml"))
+        .collect();
+    // Deterministic order — alphabetical by filename. Matters when a
+    // cross-file ID collision happens; the earlier filename wins
+    // nothing (both error), but the error message is reproducible.
+    paths.sort();
+
+    for path in paths {
+        let raw = fs::read_to_string(&path)
+            .map_err(|e| format!("read {}: {e}", path.display()))?;
+        let file: RuleFile = toml::from_str(&raw)
+            .map_err(|e| format!("parse {}: {e}", path.display()))?;
+        for rule in file.rule {
+            if !loaded_ids.insert(rule.id.clone()) {
+                return Err(format!(
+                    "duplicate rule id '{}' from {}",
+                    rule.id,
+                    path.display()
+                ));
+            }
+            store.add_rule(rule);
+            count += 1;
+        }
+    }
+
+    Ok(count)
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use std::io::Write;
+
+    fn write_file(dir: &Path, name: &str, content: &str) {
+        let path = dir.join(name);
+        let mut f = fs::File::create(&path).unwrap();
+        f.write_all(content.as_bytes()).unwrap();
+    }
+
+    #[test]
+    fn loads_rules_from_toml_files() {
+        let tmp = tempdir_for("loader_test");
+        write_file(&tmp, "a.toml", r#"
+[[rule]]
+id = "a-rule"
+task_class = "test"
+description = "test rule"
+action = { type = "Pass" }
+
+[rule.condition]
+type = "Always"
+"#);
+        let mut store = TruthStore::new();
+        let n = load_from_dir(&mut store, &tmp).unwrap();
+        assert_eq!(n, 1);
+        assert_eq!(store.get_rules("test").len(), 1);
+        let _ = fs::remove_dir_all(&tmp);
+    }
+
+    #[test]
+    fn rejects_duplicate_rule_ids() {
+        let tmp = tempdir_for("dup_ids");
+        write_file(&tmp, "a.toml", r#"
+[[rule]]
+id = "same"
+task_class = "t"
+description = ""
+action = { type = "Pass" }
+[rule.condition]
+type = "Always"
+"#);
+        write_file(&tmp, "b.toml", r#"
+[[rule]]
+id = "same"
+task_class = "t"
+description = ""
+action = { type = "Pass" }
+[rule.condition]
+type = "Always"
+"#);
+        let mut store = TruthStore::new();
+        let err = load_from_dir(&mut store, &tmp).unwrap_err();
+        assert!(err.contains("duplicate"), "got: {err}");
+        let _ = fs::remove_dir_all(&tmp);
+    }
+
+    #[test]
+    fn duplicate_with_in_code_rule_is_rejected() {
+        // Existing in-store IDs count as "already registered." Operator
+        // can't shadow an in-code rule by file without changing the ID.
+        let tmp = tempdir_for("dup_in_code");
+        write_file(&tmp, "conflict.toml", r#"
+[[rule]]
+id = "worker-active"
+task_class = "staffing.fill"
+description = "file attempt"
+action = { type = "Pass" }
+[rule.condition]
+type = "Always"
+"#);
+        // staffing_rules registers "worker-active"
+        let mut store = crate::staffing::staffing_rules(TruthStore::new());
+        let err = load_from_dir(&mut store, &tmp).unwrap_err();
+        assert!(err.contains("duplicate") && err.contains("worker-active"));
+        let _ = fs::remove_dir_all(&tmp);
+    }
+
+    #[test]
+    fn skips_non_toml_files() {
+        let tmp = tempdir_for("skip_non_toml");
+        write_file(&tmp, "a.toml", r#"
+[[rule]]
+id = "x"
+task_class = "t"
+description = ""
+action = { type = "Pass" }
+[rule.condition]
+type = "Always"
+"#);
+        write_file(&tmp, "README.md", "not a toml file");
+        let mut store = TruthStore::new();
+        let n = load_from_dir(&mut store, &tmp).unwrap();
+        assert_eq!(n, 1); // README.md ignored
+        let _ = fs::remove_dir_all(&tmp);
+    }
+
+    #[test]
+    fn missing_dir_returns_error() {
+        let mut store = TruthStore::new();
+        let err = load_from_dir(&mut store, "/nonexistent/path/here").unwrap_err();
+        assert!(err.contains("read_dir"));
+    }
+
+    fn tempdir_for(tag: &str) -> std::path::PathBuf {
+        let dir = std::env::temp_dir().join(format!("truth_loader_{}_{}", tag,
+            std::process::id()));
+        fs::create_dir_all(&dir).unwrap();
+        dir
+    }
+}
--- a/Show More
+++ b/Show More