From 25b7e6c3a782c11b22216baa7833628394d6383f Mon Sep 17 00:00:00 2001
From: root <root@island37.com>
Date: Mon, 20 Apr 2026 06:21:13 -0500
Subject: [PATCH] Phase 19 wiring + Path 1/2 work + chain integrity fixes
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Backend:
- crates/vectord/src/playbook_memory.rs (new): Phase 19 in-memory boost
  store with seed/rebuild/snapshot, plus temporal decay (e^-age/30 per
  playbook), persist_to_sql endpoint backing successful_playbooks_live,
  and discover_patterns endpoint for meta-index pattern aggregation
  (recurring certs/skills/archetype/reliability across similar past fills).
- DEFAULT_TOP_K_PLAYBOOKS bumped 5 → 25; old default silently missed
  most boosts when memory had > 25 entries.
- service.rs: new routes /vectors/playbook_memory/{seed,rebuild,stats,
  persist_sql,patterns}.

Bun staffing co-pilot (mcp-server/):
- /search, /match, /verify, /proof, /simulation/run, MCP tools all
  forward use_playbook_memory:true and playbook_memory_k:25 to the
  hybrid endpoint. Boost was previously dark across the entire app.
- /log no longer POSTs to /ingest/file — that endpoint REPLACES the
  dataset's object list, so single-row CSV writes were wiping all prior
  rows in successful_playbooks (sp_rows went 33→1 in one /log call).
  /log now seeds playbook_memory with canonical short text and calls
  /persist_sql to keep successful_playbooks_live in sync.
- /simulation/run cumulative end-of-week CSV write removed for the same
  reason. Per-day per-contract /seed (added in this session) is the
  accumulating feedback path now.
- search.html addWorkerInsight renders a green "Endorsed · N playbooks"
  chip with playbook citations when boost > 0.

Internal Dioxus UI (crates/ui/):
- Dashboard phase list rewritten through Phase 19 (was stuck at "Phase
  16: File Watcher" / "Phase 17: DB Connector" — both wrong).
- Removed fabricated "27ms" stat label.
- Ask tab examples + SQL default replaced with real staffing prompts
  against candidates/clients/job_orders (was referencing nonexistent
  employees/products/events).
- New Playbook tab exposes /vectors/playbook_memory/{stats,rebuild} and
  side-by-side hybrid search (boost OFF vs ON) with citations.

Tests (tests/multi-agent/):
- run_e2e_rated.ts: parallel two-agent (mistral + qwen2.5) build phase
  + verifier rating (geo, auth, persist, boost, speed → /10).
- network_proving.ts: continuous build → verify → repeat with
  staffing-recruiter profile hot-swap; geo-discrimination check.
- chain_of_custody.ts: single recruiter operation traced through every
  layer (Bun /search, direct /vectors/hybrid parity, /log, SQL,
  playbook_memory growth, profile activation, post-op boost lift).
---
 crates/ui/assets/style.css            |  10 +
 crates/ui/src/main.rs                 | 461 +++++++++++++-
 crates/vectord/src/lib.rs             |   1 +
 crates/vectord/src/playbook_memory.rs | 825 ++++++++++++++++++++++++++
 crates/vectord/src/service.rs         | 519 +++++++++++++++-
 mcp-server/index.ts                   | 140 ++++-
 mcp-server/search.html                |  15 +-
 tests/multi-agent/agent.ts            | 351 +++++++++++
 tests/multi-agent/chain_of_custody.ts | 335 +++++++++++
 tests/multi-agent/network_proving.ts  | 469 +++++++++++++++
 tests/multi-agent/orchestrator.ts     | 302 ++++++++++
 tests/multi-agent/run_e2e_rated.ts    | 400 +++++++++++++
 tests/multi-agent/scenario.ts         | 822 +++++++++++++++++++++++++
 13 files changed, 4566 insertions(+), 84 deletions(-)
 create mode 100644 crates/vectord/src/playbook_memory.rs
 create mode 100644 tests/multi-agent/agent.ts
 create mode 100644 tests/multi-agent/chain_of_custody.ts
 create mode 100644 tests/multi-agent/network_proving.ts
 create mode 100644 tests/multi-agent/orchestrator.ts
 create mode 100644 tests/multi-agent/run_e2e_rated.ts
 create mode 100644 tests/multi-agent/scenario.ts

diff --git a/crates/ui/assets/style.css b/crates/ui/assets/style.css
index 1764800..85bac9c 100644
--- a/crates/ui/assets/style.css
+++ b/crates/ui/assets/style.css
@@ -205,3 +205,13 @@ tr:hover td { background: var(--accent-glow); }
     padding: 8px 12px; border-bottom: 1px solid var(--border); font-size: 13px;
 }
 .table-item:hover { background: var(--accent-glow); }
+
+/* Phase 19 — Playbook panel */
+.boosted-row { background: rgba(120, 200, 120, 0.10); }
+.boosted-row td { border-top: 1px solid rgba(120, 200, 120, 0.30); }
+.mono-cell {
+    font-family: ui-monospace, SFMono-Regular, Menlo, monospace;
+    font-size: 11px; color: var(--text-dim);
+    max-width: 220px; overflow: hidden; text-overflow: ellipsis; white-space: nowrap;
+}
+.panel-section + .panel-section { margin-top: 18px; }
diff --git a/crates/ui/src/main.rs b/crates/ui/src/main.rs
index 24d8c35..53c9ad2 100644
--- a/crates/ui/src/main.rs
+++ b/crates/ui/src/main.rs
@@ -178,9 +178,116 @@ enum Tab {
     Explore,
     Sql,
     Ingest,
+    Playbook,
     Status,
 }
 
+// --- Playbook memory types (Phase 19) ---
+
+#[derive(Debug, Clone, Deserialize, PartialEq)]
+struct PlaybookStats {
+    entries: usize,
+    entries_with_embeddings: usize,
+    #[serde(default)]
+    total_names_endorsed: usize,
+    #[serde(default)]
+    sample: Vec<PlaybookSample>,
+}
+
+#[derive(Debug, Clone, Deserialize, PartialEq)]
+struct PlaybookSample {
+    id: String,
+    operation: String,
+    #[serde(default)]
+    city: Option<String>,
+    #[serde(default)]
+    state: Option<String>,
+    #[serde(default)]
+    endorsed: Vec<String>,
+}
+
+#[derive(Debug, Clone, Deserialize, PartialEq)]
+struct HybridResp {
+    #[serde(default)]
+    sql_matches: usize,
+    #[serde(default)]
+    vector_reranked: usize,
+    #[serde(default)]
+    method: String,
+    #[serde(default)]
+    duration_ms: u64,
+    #[serde(default)]
+    answer: Option<String>,
+    #[serde(default)]
+    sources: Vec<HybridSource>,
+}
+
+#[derive(Debug, Clone, Deserialize, PartialEq)]
+struct HybridSource {
+    doc_id: String,
+    chunk_text: String,
+    score: f32,
+    #[serde(default)]
+    sql_verified: bool,
+    #[serde(default)]
+    playbook_boost: f32,
+    #[serde(default)]
+    playbook_citations: Vec<String>,
+}
+
+#[derive(Debug, Clone, Deserialize, PartialEq)]
+struct IndexInfo {
+    index_name: String,
+    source: String,
+    #[serde(default)]
+    chunk_count: usize,
+    #[serde(default)]
+    vector_backend: String,
+}
+
+async fn fetch_playbook_stats() -> Result<PlaybookStats, String> {
+    let resp = reqwest::get(&format!("{}/vectors/playbook_memory/stats", api_base()))
+        .await.map_err(|e| e.to_string())?;
+    if !resp.status().is_success() {
+        return Err(format!("HTTP {}: {}", resp.status(), resp.text().await.unwrap_or_default()));
+    }
+    resp.json().await.map_err(|e| e.to_string())
+}
+
+async fn rebuild_playbook_memory() -> Result<serde_json::Value, String> {
+    let client = reqwest::Client::new();
+    let resp = client.post(&format!("{}/vectors/playbook_memory/rebuild", api_base()))
+        .json(&serde_json::json!({}))
+        .send().await.map_err(|e| e.to_string())?;
+    if !resp.status().is_success() {
+        return Err(format!("HTTP {}: {}", resp.status(), resp.text().await.unwrap_or_default()));
+    }
+    resp.json().await.map_err(|e| e.to_string())
+}
+
+async fn fetch_indexes() -> Result<Vec<IndexInfo>, String> {
+    let resp = reqwest::get(&format!("{}/vectors/indexes", api_base()))
+        .await.map_err(|e| e.to_string())?;
+    resp.json().await.map_err(|e| e.to_string())
+}
+
+async fn hybrid_search(index_name: &str, question: &str, use_playbook: bool, top_k: usize) -> Result<HybridResp, String> {
+    let client = reqwest::Client::new();
+    let resp = client.post(&format!("{}/vectors/hybrid", api_base()))
+        .json(&serde_json::json!({
+            "index_name": index_name,
+            "question": question,
+            "top_k": top_k,
+            "generate": false,
+            "use_playbook_memory": use_playbook,
+        }))
+        .send().await.map_err(|e| e.to_string())?;
+    if !resp.status().is_success() {
+        return Err(format!("HTTP {}: {}", resp.status(), resp.text().await.unwrap_or_default()));
+    }
+    resp.json().await.map_err(|e| e.to_string())
+}
+
 // --- App ---
 
 #[component]
@@ -239,6 +346,11 @@ fn App() -> Element {
                         onclick: move |_| active_tab.set(Tab::Ingest),
                         "Ingest"
                     }
+                    button {
+                        class: if *active_tab.read() == Tab::Playbook { "tab active" } else { "tab" },
+                        onclick: move |_| active_tab.set(Tab::Playbook),
+                        "Playbook"
+                    }
                     button {
                         class: if *active_tab.read() == Tab::Status { "tab active" } else { "tab" },
                         onclick: move |_| active_tab.set(Tab::Status),
@@ -260,6 +372,7 @@ fn App() -> Element {
                     Tab::Explore => rsx! { ExplorePanel { datasets: datasets.read().clone() } },
                     Tab::Sql => rsx! { SqlPanel {} },
                     Tab::Ingest => rsx! { IngestPanel {} },
+                    Tab::Playbook => rsx! { PlaybookPanel {} },
                     Tab::Status => rsx! { StatusPanel {} },
                 }
             }
@@ -354,14 +467,14 @@ fn AskPanel(datasets: Vec<Dataset>) -> Element {
         div { class: "panel ask-panel",
             div { class: "ask-hero",
                 h2 { "Ask your data anything" }
-                p { class: "subtitle", "Natural language → SQL → Results. Powered by local AI." }
+                p { class: "subtitle", "Natural language → SQL → Results. Powered by local AI over the staffing dataset." }
             }
 
             div { class: "ask-input-row",
                 input {
                     class: "ask-input",
                     value: "{question}",
-                    placeholder: "e.g. Which department has the highest average salary?",
+                    placeholder: "e.g. Which clients placed the most candidates last quarter?",
                     oninput: move |e| question.set(e.value()),
                     onkeydown: move |e| {
                         if e.key() == Key::Enter {
@@ -432,10 +545,12 @@ fn AskPanel(datasets: Vec<Dataset>) -> Element {
 
             div { class: "ask-examples",
                 "Try: "
-                button { class: "example-btn", onclick: move |_| question.set("Which department has the highest average salary?".into()), "highest avg salary by dept" }
-                button { class: "example-btn", onclick: move |_| question.set("Show me the top 3 most expensive products".into()), "top 3 expensive products" }
-                button { class: "example-btn", onclick: move |_| question.set("How many events per action type?".into()), "events by action" }
-                button { class: "example-btn", onclick: move |_| question.set("List all employees who earn more than 90000".into()), "employees > 90k" }
+                button { class: "example-btn", onclick: move |_| question.set("How many candidates do we have by city?".into()), "candidates by city" }
+                button { class: "example-btn", onclick: move |_| question.set("Top 10 clients by total placements".into()), "top clients by placements" }
+                button { class: "example-btn", onclick: move |_| question.set("Open job orders ordered by bill rate descending".into()), "open jobs by rate" }
+                button { class: "example-btn", onclick: move |_| question.set("Recruiters with the highest placement count".into()), "top recruiters" }
+                button { class: "example-btn", onclick: move |_| question.set("Total billed hours per client last month".into()), "hours per client" }
+                button { class: "example-btn", onclick: move |_| question.set("Cold leads: candidates we called more than 5 times but never placed".into()), "cold leads" }
             }
 
             if let Some(sql) = generated_sql.read().as_ref() {
@@ -578,7 +693,7 @@ fn ExplorePanel(datasets: Vec<Dataset>) -> Element {
 
 #[component]
 fn SqlPanel() -> Element {
-    let mut query_text = use_signal(|| String::from("SELECT * FROM employees LIMIT 10"));
+    let mut query_text = use_signal(|| String::from("SELECT candidate_id, first_name, last_name, city, status FROM candidates LIMIT 10"));
     let mut result = use_signal(|| None::<Result<QueryResponse, String>>);
     let mut loading = use_signal(|| false);
 
@@ -727,7 +842,7 @@ fn DashboardPanel() -> Element {
                     }
                     div { class: "stat-card accent",
                         div { class: "stat-value", "{s[\"hnsw_loaded\"]}" }
-                        div { class: "stat-label", "HNSW Indexes (27ms)" }
+                        div { class: "stat-label", "HNSW Indexes Loaded" }
                     }
                     div { class: "stat-card",
                         div { class: "stat-value", "{s[\"tools\"]}" }
@@ -750,27 +865,27 @@ fn DashboardPanel() -> Element {
                     div { class: "arch-grid",
                         div { class: "arch-card",
                             div { class: "arch-title", "Ingest" }
-                            div { class: "arch-items", "CSV, JSON, PDF, Text, PostgreSQL, File Watcher" }
+                            div { class: "arch-items", "CSV · JSON · PDF (+OCR) · Text · Postgres · MySQL · Inbox watcher · Cron schedules" }
                         }
                         div { class: "arch-card",
                             div { class: "arch-title", "Storage" }
-                            div { class: "arch-items", "Parquet on Object Storage, Delta Writes, Compaction" }
+                            div { class: "arch-items", "Parquet on Object Storage · Delta writes · Compaction · Tombstones · Multi-bucket federation + rescue" }
                         }
                         div { class: "arch-card",
                             div { class: "arch-title", "Query" }
-                            div { class: "arch-items", "DataFusion SQL, MemCache (9.8x), Hot/Cold" }
+                            div { class: "arch-items", "DataFusion SQL · MemCache (9.8× hot) · Merge-on-read · AI-safe views" }
                         }
                         div { class: "arch-card",
-                            div { class: "arch-title", "AI" }
-                            div { class: "arch-items", "Ollama (local), Embed, Generate, RAG, HNSW" }
+                            div { class: "arch-title", "AI / Vector" }
+                            div { class: "arch-items", "Ollama (local) · Embed/Generate/RAG · HNSW (Parquet) · Lance IVF_PQ · Hybrid SQL+vector · Profile-scoped" }
+                        }
+                        div { class: "arch-card",
+                            div { class: "arch-title", "Learning loop" }
+                            div { class: "arch-items", "Playbook memory · Endorsement boost · Multi-agent orchestrator · Autotune agent (Pareto-promote)" }
                         }
                         div { class: "arch-card",
                             div { class: "arch-title", "Governance" }
-                            div { class: "arch-items", "Event Journal, PII Detection, Tool Registry, Access Control" }
-                        }
-                        div { class: "arch-card",
-                            div { class: "arch-title", "Agents" }
-                            div { class: "arch-items", "Workspaces, Handoff, Shortlists, Activity Logs" }
+                            div { class: "arch-items", "Event journal · PII detection · Tool registry · Access control · Audit log · Catalog v2 metadata" }
                         }
                     }
                 }
@@ -779,20 +894,23 @@ fn DashboardPanel() -> Element {
                     h3 { "Build Progression" }
                     div { class: "phase-list",
                         {rsx! {
-                            PhaseItem { num: "0-5", name: "Foundation", detail: "Storage, Catalog, DataFusion, AI, UI, gRPC" }
-                            PhaseItem { num: "6", name: "Ingest Pipeline", detail: "CSV/JSON/PDF/Text auto-schema" }
-                            PhaseItem { num: "7", name: "Vector + RAG", detail: "Embed, Search, LLM Answers" }
-                            PhaseItem { num: "8", name: "Hot Cache", detail: "9.8x speedup, Delta Writes" }
-                            PhaseItem { num: "8.5", name: "Agent Workspaces", detail: "Per-contract, Instant Handoff" }
-                            PhaseItem { num: "9", name: "Event Journal", detail: "Append-only Mutation History" }
-                            PhaseItem { num: "10", name: "Rich Catalog", detail: "PII Detection, Lineage" }
-                            PhaseItem { num: "11", name: "Embedding Versioning", detail: "Model-proof Vectors" }
-                            PhaseItem { num: "12", name: "Tool Registry", detail: "6 Governed Actions + Audit" }
-                            PhaseItem { num: "13", name: "Access Control", detail: "Role-based, Field-level" }
-                            PhaseItem { num: "14", name: "Schema Evolution", detail: "Diff Detection, AI Migration" }
-                            PhaseItem { num: "15", name: "HNSW Index", detail: "100K Search in 27ms" }
-                            PhaseItem { num: "16", name: "File Watcher", detail: "Auto-ingest from Inbox" }
-                            PhaseItem { num: "17", name: "DB Connector", detail: "PostgreSQL Import" }
+                            PhaseItem { num: "0-5", name: "Foundation", detail: "Storage · Catalog · DataFusion · Ollama · UI · gRPC" }
+                            PhaseItem { num: "6", name: "Ingest Pipeline", detail: "CSV · JSON · PDF · Text · auto-schema · dedupe" }
+                            PhaseItem { num: "7", name: "Vector + RAG", detail: "Embed · brute-force cosine · LLM grounded answers" }
+                            PhaseItem { num: "8", name: "Hot Cache + Deltas", detail: "MemTable LRU · 9.8× speedup · merge-on-read · compaction" }
+                            PhaseItem { num: "8.5", name: "Agent Workspaces", detail: "Per-contract · daily/weekly/monthly tiers · zero-copy handoff" }
+                            PhaseItem { num: "9", name: "Event Journal", detail: "Append-only mutation log · time-travel · audit" }
+                            PhaseItem { num: "10", name: "Rich Catalog v2", detail: "PII auto-detection · lineage · freshness SLA · sensitivity" }
+                            PhaseItem { num: "11", name: "Embedding Versioning", detail: "Per-index model+version · A/B · incremental re-embed" }
+                            PhaseItem { num: "12", name: "Tool Registry", detail: "Governed actions · param validation · audit · MCP-ready" }
+                            PhaseItem { num: "13", name: "Access Control", detail: "Roles · field-level sensitivity · column masking · query audit" }
+                            PhaseItem { num: "14", name: "Schema Evolution", detail: "Diff detection · AI migration prompts · versioned schemas" }
+                            PhaseItem { num: "15", name: "HNSW + Trials", detail: "100K vectors · p50 873µs · trial journal · eval harness" }
+                            PhaseItem { num: "16", name: "Hot-swap + Autotune", detail: "Promotion registry · rollback · ε-greedy agent · Pareto winner" }
+                            PhaseItem { num: "17", name: "Model Profiles + VRAM", detail: "ModelProfile manifests · scoped search · sequential model swap" }
+                            PhaseItem { num: "18", name: "Lance hybrid backend", detail: "IVF_PQ build 14× faster · random fetch 112× · S3-native · per-profile routing" }
+                            PhaseItem { num: "19", name: "Playbook memory", detail: "Feedback loop · endorsement boost (cap 0.25) · orchestrator write-through · citations" }
+                            PhaseItem { num: "+", name: "Federation + Schedules", detail: "Multi-bucket · rescue fallback · error journal · MySQL · PDF OCR · cron ingest · catalog dedupe" }
                         }}
                     }
                 }
@@ -816,6 +934,285 @@ fn PhaseItem(num: String, name: String, detail: String) -> Element {
     }
 }
 
+// === PLAYBOOK — Phase 19 meta-index feedback loop ===
+
+#[component]
+fn PlaybookPanel() -> Element {
+    let mut stats = use_signal(|| None::<Result<PlaybookStats, String>>);
+    let mut indexes = use_signal(Vec::<IndexInfo>::new);
+    let mut rebuild_status = use_signal(|| None::<Result<String, String>>);
+    let mut rebuilding = use_signal(|| false);
+    let mut loaded = use_signal(|| false);
+
+    // Comparison state
+    let mut selected_index = use_signal(|| String::new());
+    let mut question = use_signal(|| String::from("reliable assembler in Detroit"));
+    let mut top_k = use_signal(|| 10usize);
+    let mut compare_loading = use_signal(|| false);
+    let mut hits_off = use_signal(|| None::<Result<HybridResp, String>>);
+    let mut hits_on = use_signal(|| None::<Result<HybridResp, String>>);
+
+    let load_all = move || {
+        spawn(async move {
+            stats.set(Some(fetch_playbook_stats().await));
+            if let Ok(ix) = fetch_indexes().await {
+                if selected_index.read().is_empty() {
+                    if let Some(default) = ix.iter().find(|i| i.source == "workers_500k").or_else(|| ix.first()) {
+                        selected_index.set(default.index_name.clone());
+                    }
+                }
+                indexes.set(ix);
+            }
+        });
+    };
+
+    use_effect(move || {
+        if !*loaded.read() {
+            loaded.set(true);
+            load_all();
+        }
+    });
+
+    let do_rebuild = move |_| {
+        spawn(async move {
+            rebuilding.set(true);
+            rebuild_status.set(None);
+            match rebuild_playbook_memory().await {
+                Ok(v) => rebuild_status.set(Some(Ok(format!("rebuild ok — {}", v)))),
+                Err(e) => rebuild_status.set(Some(Err(e))),
+            }
+            // Refresh stats afterward
+            stats.set(Some(fetch_playbook_stats().await));
+            rebuilding.set(false);
+        });
+    };
+
+    let do_compare = move |_| {
+        let idx = selected_index.read().clone();
+        let q = question.read().clone();
+        let k = *top_k.read();
+        if idx.is_empty() || q.trim().is_empty() { return; }
+        spawn(async move {
+            compare_loading.set(true);
+            hits_off.set(None);
+            hits_on.set(None);
+            // Run both sequentially so the embedding cache is shared
+            hits_off.set(Some(hybrid_search(&idx, &q, false, k).await));
+            hits_on.set(Some(hybrid_search(&idx, &q, true, k).await));
+            compare_loading.set(false);
+        });
+    };
+
+    rsx! {
+        div { class: "panel",
+            div { class: "ask-hero",
+                h2 { "Playbook Memory" }
+                p { class: "subtitle",
+                    "Phase 19 feedback loop: past successful playbooks boost future search rankings. \
+                     Endorsed workers from semantically similar past operations re-rank toward the top, \
+                     with citations back to the playbook that endorsed them."
+                }
+            }
+
+            // Stats card
+            div { class: "panel-section",
+                match stats.read().as_ref() {
+                    None => rsx! { div { class: "loading", "loading playbook stats..." } },
+                    Some(Err(e)) => rsx! { div { class: "error", "stats: {e}" } },
+                    Some(Ok(s)) => rsx! {
+                        div { class: "stat-grid",
+                            div { class: "stat-card",
+                                div { class: "stat-value", "{s.entries}" }
+                                div { class: "stat-label", "Playbooks in Memory" }
+                            }
+                            div { class: "stat-card",
+                                div { class: "stat-value", "{s.entries_with_embeddings}" }
+                                div { class: "stat-label", "Embedded" }
+                            }
+                            div { class: "stat-card accent",
+                                div { class: "stat-value", "{s.total_names_endorsed}" }
+                                div { class: "stat-label", "Endorsed Worker-Tags" }
+                            }
+                        }
+                    }
+                }
+                div { class: "sql-actions",
+                    button {
+                        class: "btn",
+                        disabled: *rebuilding.read(),
+                        onclick: do_rebuild,
+                        if *rebuilding.read() { "rebuilding from successful_playbooks..." } else { "Rebuild from successful_playbooks" }
+                    }
+                }
+                if let Some(s) = rebuild_status.read().as_ref() {
+                    match s {
+                        Ok(msg) => rsx! { div { class: "result-box", "{msg}" } },
+                        Err(e) => rsx! { div { class: "error", "{e}" } },
+                    }
+                }
+            }
+
+            // Sample playbooks
+            if let Some(Ok(s)) = stats.read().as_ref() {
+                if !s.sample.is_empty() {
+                    div { class: "panel-section",
+                        h3 { "Sample playbooks" }
+                        div { class: "table-wrap",
+                            table {
+                                thead { tr {
+                                    th { "ID" }
+                                    th { "Operation" }
+                                    th { "Location" }
+                                    th { "Endorsed" }
+                                } }
+                                tbody {
+                                    for pb in s.sample.iter() {
+                                        {
+                                            let loc = match (&pb.city, &pb.state) {
+                                                (Some(c), Some(st)) => format!("{c}, {st}"),
+                                                _ => "—".into(),
+                                            };
+                                            let endorsed = if pb.endorsed.is_empty() {
+                                                "—".to_string()
+                                            } else {
+                                                pb.endorsed.join(", ")
+                                            };
+                                            let pid = pb.id.clone();
+                                            let op = pb.operation.clone();
+                                            rsx! {
+                                                tr {
+                                                    td { class: "mono-cell", title: "{pid}", "{pid}" }
+                                                    td { "{op}" }
+                                                    td { "{loc}" }
+                                                    td { "{endorsed}" }
+                                                }
+                                            }
+                                        }
+                                    }
+                                }
+                            }
+                        }
+                    }
+                }
+            }
+
+            // Side-by-side comparison: boost OFF vs ON
+            div { class: "panel-section",
+                h3 { "See the boost — search compared" }
+                p { class: "hint",
+                    "Run the same query against the same index twice — once with playbook boost OFF and once ON. \
+                     Hits with non-zero playbook_boost and citations are workers that past similar playbooks endorsed."
+                }
+                div { class: "form-row",
+                    label { "Index" }
+                    select {
+                        value: "{selected_index}",
+                        onchange: move |e| selected_index.set(e.value()),
+                        for ix in indexes.read().iter() {
+                            option { value: "{ix.index_name}", "{ix.index_name} ({ix.source}, {ix.chunk_count} chunks, {ix.vector_backend})" }
+                        }
+                    }
+                }
+                div { class: "form-row",
+                    label { "Question" }
+                    input {
+                        value: "{question}",
+                        oninput: move |e| question.set(e.value()),
+                        placeholder: "e.g. reliable assembler in Detroit"
+                    }
+                }
+                div { class: "form-row",
+                    label { "Top K" }
+                    input {
+                        r#type: "number",
+                        value: "{top_k}",
+                        oninput: move |e| {
+                            if let Ok(n) = e.value().parse::<usize>() { top_k.set(n.clamp(1, 50)); }
+                        }
+                    }
+                }
+                button {
+                    class: "btn btn-ask",
+                    disabled: *compare_loading.read(),
+                    onclick: do_compare,
+                    if *compare_loading.read() { "running both queries..." } else { "Run comparison" }
+                }
+
+                div { class: "explore-grid",
+                    div { class: "ds-detail",
+                        h3 { "Boost OFF (vanilla)" }
+                        match hits_off.read().as_ref() {
+                            None => rsx! { div { class: "empty", "—" } },
+                            Some(Err(e)) => rsx! { div { class: "error", "{e}" } },
+                            Some(Ok(r)) => rsx! { HybridHitTable { resp: r.clone() } },
+                        }
+                    }
+                    div { class: "ds-detail",
+                        h3 { "Boost ON (Phase 19)" }
+                        match hits_on.read().as_ref() {
+                            None => rsx! { div { class: "empty", "—" } },
+                            Some(Err(e)) => rsx! { div { class: "error", "{e}" } },
+                            Some(Ok(r)) => rsx! { HybridHitTable { resp: r.clone() } },
+                        }
+                    }
+                }
+            }
+        }
+    }
+}
+
+#[component]
+fn HybridHitTable(resp: HybridResp) -> Element {
+    rsx! {
+        div { class: "results-info",
+            "{resp.sources.len()} hits · {resp.duration_ms}ms · method={resp.method}"
+        }
+        if resp.sources.is_empty() {
+            div { class: "empty-sm", "no hits" }
+        } else {
+            div { class: "table-wrap",
+                table {
+                    thead { tr {
+                        th { "#" }
+                        th { "Doc" }
+                        th { "Score" }
+                        th { "Boost" }
+                        th { "Citations" }
+                        th { "Snippet" }
+                    } }
+                    tbody {
+                        for (i, h) in resp.sources.iter().enumerate() {
+                            {
+                                let snippet: String = h.chunk_text.chars().take(120).collect();
+                                let cites = if h.playbook_citations.is_empty() {
+                                    "—".to_string()
+                                } else {
+                                    h.playbook_citations.join(", ")
+                                };
+                                let row_class = if h.playbook_boost > 0.0 { "boosted-row" } else { "" };
+                                let rank = i + 1;
+                                let did = h.doc_id.clone();
+                                let score = format!("{:.3}", h.score);
+                                let boost = if h.playbook_boost > 0.0 { format!("+{:.3}", h.playbook_boost) } else { "—".into() };
+                                rsx! {
+                                    tr { class: "{row_class}",
+                                        td { "{rank}" }
+                                        td { class: "mono-cell", "{did}" }
+                                        td { "{score}" }
+                                        td { "{boost}" }
+                                        td { class: "mono-cell", title: "{cites}", "{cites}" }
+                                        td { "{snippet}" }
+                                    }
+                                }
+                            }
+                        }
+                    }
+                }
+            }
+        }
+    }
+}
+
 // === INGEST — Data on-ramp ===
 
 #[component]
diff --git a/crates/vectord/src/lib.rs b/crates/vectord/src/lib.rs
index e72c41e..0f0be40 100644
--- a/crates/vectord/src/lib.rs
+++ b/crates/vectord/src/lib.rs
@@ -7,6 +7,7 @@ pub mod harness;
 pub mod hnsw;
 pub mod index_registry;
 pub mod jobs;
+pub mod playbook_memory;
 pub mod promotion;
 pub mod refresh;
 pub mod store;
diff --git a/crates/vectord/src/playbook_memory.rs b/crates/vectord/src/playbook_memory.rs
new file mode 100644
index 0000000..4a5a380
--- /dev/null
+++ b/crates/vectord/src/playbook_memory.rs
@@ -0,0 +1,825 @@
+//! Phase 19: Playbook memory — the feedback loop that makes the index
+//! learn from real outcomes instead of just logging them.
+//!
+//! When an agent (multi-agent orchestrator or human operator) seals a
+//! successful playbook, it lands in the `successful_playbooks` dataset.
+//! Historically that was a write-only log. This module turns it into a
+//! re-ranking signal:
+//!
+//!   1. `rebuild` reads every row of `successful_playbooks`, embeds the
+//!      operation+approach+context as one vector per playbook, parses
+//!      out the worker names from the `result` column, and stores both
+//!      the vectors and the (playbook → names) endorsement map in memory.
+//!
+//!   2. At query time, `compute_boost_for` takes a new operation text
+//!      (e.g. "fill: Welder x2 in Toledo, OH"), embeds it, brute-force
+//!      ranks past playbooks by cosine similarity, and returns a boost
+//!      map keyed by (city, state, worker_name) → `BoostEntry`. Each
+//!      entry carries its similarity score and the citing playbook_ids,
+//!      so explanations ("ranked higher because of 3 similar past fills
+//!      in Toledo") are free.
+//!
+//!   3. The `use_playbook_memory` flag on `/vectors/hybrid` adds those
+//!      boosts to matching search hits and re-sorts.
+//!
+//! Why brute force instead of another HNSW: `successful_playbooks` grows
+//! by operators, not automation. A few thousand rows is the realistic
+//! ceiling for years. Brute force at 10K × 768d is <10ms on this hardware
+//! — not worth the operational cost of another indexed surface.
+//!
+//! Persistence: the endorsements map round-trips through
+//! `_playbook_memory/state.json` in primary storage so the cache
+//! survives restarts without a full rebuild.
+
+use std::collections::HashMap;
+use std::sync::Arc;
+use serde::{Deserialize, Serialize};
+use tokio::sync::RwLock;
+
+use aibridge::client::{AiClient, EmbedRequest};
+use object_store::ObjectStore;
+use storaged::ops;
+
+const STATE_KEY: &str = "_playbook_memory/state.json";
+
+/// Maximum boost a single worker can accumulate across all similar past
+/// playbooks. Prevents one very popular worker from always winning.
+pub const MAX_BOOST_PER_WORKER: f32 = 0.25;
+
+/// Default number of past playbooks to consider when ranking the current
+/// operation. Bumped 5 → 25 on 2026-04-20 because at >100 entries in
+/// memory the old default missed too many relevant playbooks — boost
+/// silently failed even when the seeded workers were ideal matches.
+/// 25 is brute-force-cheap (sub-ms) and covers most live operator memory.
+pub const DEFAULT_TOP_K_PLAYBOOKS: usize = 25;
+
+/// Half-life of a playbook's contribution to boost, in days. A playbook
+/// 30 days old contributes half what a fresh one would; 60 days old, a
+/// quarter; etc. Per Path 1 (deepen statistical) — stale endorsements
+/// shouldn't dominate fresh signal. Recruiter trust depends on this.
+pub const BOOST_HALF_LIFE_DAYS: f32 = 30.0;
+
+/// Shape of one playbook in memory. The embedding is optional so we can
+/// round-trip a cached state without re-embedding; the rebuild path
+/// populates it.
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct PlaybookEntry {
+    pub playbook_id: String,
+    pub operation: String,
+    pub approach: String,
+    pub context: String,
+    pub timestamp: String,
+    /// Parsed out of `result` (e.g. "2/2 filled → Matthew Roberts, Amy Davis").
+    /// Stored as raw names; matching against search results happens on
+    /// (city, state, name) tuples at boost time.
+    pub endorsed_names: Vec<String>,
+    /// City + state parsed out of the operation string. Kept separately
+    /// so boost matching doesn't re-parse on every query.
+    pub city: Option<String>,
+    pub state: Option<String>,
+    /// Embedding of `operation + approach + context`. Option so persisted
+    /// state can omit it on first load and have a later embed() fill in.
+    #[serde(default)]
+    pub embedding: Option<Vec<f32>>,
+}
+
+/// Persisted / in-memory state.
+#[derive(Debug, Clone, Serialize, Deserialize, Default)]
+struct PlaybookMemoryState {
+    entries: Vec<PlaybookEntry>,
+    /// Unix epoch millis when the last rebuild completed. Caller can
+    /// use this to gate "stale > N hours → trigger rebuild" behavior.
+    last_rebuilt_at: i64,
+}
+
+/// Per-worker boost payload. `citations` lets the response layer show
+/// "boosted because of these past fills" without a second lookup.
+#[derive(Debug, Clone, Serialize)]
+pub struct BoostEntry {
+    pub boost: f32,
+    pub citations: Vec<String>, // playbook_ids that endorsed this worker
+}
+
+/// Live handle passed around the service. Clone-cheap (all state is
+/// inside one Arc<RwLock>).
+#[derive(Clone)]
+pub struct PlaybookMemory {
+    state: Arc<RwLock<PlaybookMemoryState>>,
+    store: Arc<dyn ObjectStore>,
+}
+
+impl PlaybookMemory {
+    pub fn new(store: Arc<dyn ObjectStore>) -> Self {
+        Self {
+            state: Arc::new(RwLock::new(PlaybookMemoryState::default())),
+            store,
+        }
+    }
+
+    /// Best-effort load from primary storage. Missing = empty memory; the
+    /// first `/rebuild` call will hydrate it.
+    pub async fn load_from_storage(&self) -> Result<usize, String> {
+        let data = match ops::get(&self.store, STATE_KEY).await {
+            Ok(d) => d,
+            Err(_) => return Ok(0),
+        };
+        let persisted: PlaybookMemoryState = serde_json::from_slice(&data)
+            .map_err(|e| format!("parse playbook_memory state: {e}"))?;
+        let n = persisted.entries.len();
+        *self.state.write().await = persisted;
+        tracing::info!("playbook_memory: loaded {n} entries from {STATE_KEY}");
+        Ok(n)
+    }
+
+    async fn persist(&self) -> Result<(), String> {
+        let snapshot = self.state.read().await.clone();
+        let bytes = serde_json::to_vec_pretty(&snapshot).map_err(|e| e.to_string())?;
+        ops::put(&self.store, STATE_KEY, bytes.into()).await
+    }
+
+    /// Replace the full in-memory state atomically and persist.
+    pub async fn set_entries(&self, entries: Vec<PlaybookEntry>) -> Result<(), String> {
+        let mut s = self.state.write().await;
+        s.entries = entries;
+        s.last_rebuilt_at = chrono::Utc::now().timestamp_millis();
+        drop(s);
+        self.persist().await
+    }
+
+    pub async fn entry_count(&self) -> usize {
+        self.state.read().await.entries.len()
+    }
+
+    pub async fn snapshot(&self) -> Vec<PlaybookEntry> {
+        self.state.read().await.entries.clone()
+    }
+
+    /// Given an operation's embedding, find the top-K most similar past
+    /// playbooks (by cosine similarity) and return a per-worker boost map
+    /// keyed by (city, state, name). Worker is matched by the tuple so a
+    /// shared name across cities doesn't cross-pollinate.
+    ///
+    /// Boost formula: each qualifying playbook contributes
+    /// `similarity * base_weight / n_workers` to each worker it endorsed,
+    /// where `base_weight` is tuned to keep the cap realistic without
+    /// forcing every result to saturate. Total per worker is capped at
+    /// `MAX_BOOST_PER_WORKER`.
+    pub async fn compute_boost_for(
+        &self,
+        query_embedding: &[f32],
+        top_k_playbooks: usize,
+        base_weight: f32,
+    ) -> HashMap<(String, String, String), BoostEntry> {
+        let entries = self.state.read().await.entries.clone();
+
+        // Brute-force cosine. Empty / missing embeddings just skip.
+        let mut scored: Vec<(f32, &PlaybookEntry)> = entries
+            .iter()
+            .filter_map(|e| e.embedding.as_ref().map(|v| (cosine(query_embedding, v), e)))
+            .collect();
+        scored.sort_by(|a, b| b.0.partial_cmp(&a.0).unwrap_or(std::cmp::Ordering::Equal));
+        scored.truncate(top_k_playbooks.max(1));
+
+        let now = chrono::Utc::now();
+        let mut boosts: HashMap<(String, String, String), BoostEntry> = HashMap::new();
+        for (similarity, pb) in &scored {
+            // Negative or near-zero similarity = not actually related;
+            // skip so we don't inject noise when the memory is sparse.
+            if *similarity <= 0.05 { continue; }
+            let Some(city) = &pb.city else { continue; };
+            let Some(state) = &pb.state else { continue; };
+            let n_workers = pb.endorsed_names.len().max(1);
+            // Path 1 — temporal decay. Older playbooks weight less. Failure
+            // to parse the timestamp degrades to "no decay" (treat as fresh)
+            // rather than dropping the entry entirely; keeps backward
+            // compatibility with seed payloads that omitted timestamp.
+            let decay = chrono::DateTime::parse_from_rfc3339(&pb.timestamp)
+                .ok()
+                .map(|t| {
+                    let age_days = (now.signed_duration_since(t.with_timezone(&chrono::Utc))
+                        .num_seconds() as f32) / 86400.0;
+                    if age_days <= 0.0 { 1.0 }
+                    else { (-age_days / BOOST_HALF_LIFE_DAYS).exp() }
+                })
+                .unwrap_or(1.0);
+            let per_worker = similarity * base_weight * decay / (n_workers as f32);
+            for name in &pb.endorsed_names {
+                let key = (city.clone(), state.clone(), name.clone());
+                let entry = boosts.entry(key).or_insert(BoostEntry {
+                    boost: 0.0,
+                    citations: Vec::new(),
+                });
+                entry.boost = (entry.boost + per_worker).min(MAX_BOOST_PER_WORKER);
+                if !entry.citations.contains(&pb.playbook_id) {
+                    entry.citations.push(pb.playbook_id.clone());
+                }
+            }
+        }
+        boosts
+    }
+}
+
+/// Cosine similarity — pulled out so rebuild/boost share one impl.
+fn cosine(a: &[f32], b: &[f32]) -> f32 {
+    let (mut dot, mut na, mut nb) = (0.0_f32, 0.0_f32, 0.0_f32);
+    let n = a.len().min(b.len());
+    for i in 0..n {
+        dot += a[i] * b[i];
+        na += a[i] * a[i];
+        nb += b[i] * b[i];
+    }
+    if na == 0.0 || nb == 0.0 { return 0.0; }
+    dot / (na.sqrt() * nb.sqrt())
+}
+
+// ---------------- Pattern discovery (Path 2 — meta-index) ----------------
+//
+// Phase 19's boost path answers "for THIS exact city + role, which workers
+// have we used before?" Pattern discovery answers a different question:
+// "for queries like this one, what TRAITS have past successful fills had
+// in common — even if no exact prior playbook covers this geo?"
+//
+// The discovered pattern surfaces signals the operator didn't query for:
+// e.g. "every successful Welder fill we've seen carried OSHA-10 + lockout
+// /tagout — you may want to filter on those." That's the meta-index
+// dimension of the original PRD: identify things we didn't know about.
+
+#[derive(Debug, Clone, Serialize)]
+pub struct PatternReport {
+    pub query: String,
+    pub matched_playbooks: usize,
+    pub total_workers_examined: usize,
+    pub common_certifications: Vec<TraitFreq>,
+    pub common_skills: Vec<TraitFreq>,
+    pub modal_archetype: Option<String>,
+    pub reliability_p50: f64,
+    pub reliability_min: f64,
+    pub reliability_max: f64,
+    pub matched_playbook_ids: Vec<String>,
+    pub discovered_pattern: String,
+    pub duration_secs: f32,
+}
+
+#[derive(Debug, Clone, Serialize)]
+pub struct TraitFreq {
+    pub name: String,
+    pub count: usize,
+    pub frequency: f32,
+}
+
+pub async fn discover_patterns(
+    memory: &PlaybookMemory,
+    ai_client: &AiClient,
+    catalog: &catalogd::registry::Registry,
+    buckets: &Arc<storaged::registry::BucketRegistry>,
+    query: &str,
+    top_k_playbooks: usize,
+    min_trait_frequency: f32,
+) -> Result<PatternReport, String> {
+    let t0 = std::time::Instant::now();
+
+    // 1. Embed the query through the same nomic-embed-text model used
+    //    for playbook embeddings, so cosine is meaningful.
+    let resp = ai_client
+        .embed(EmbedRequest { texts: vec![query.into()], model: None })
+        .await
+        .map_err(|e| format!("embed query: {e}"))?;
+    if resp.embeddings.is_empty() {
+        return Err("embed returned no vectors".into());
+    }
+    let qv: Vec<f32> = resp.embeddings[0].iter().map(|x| *x as f32).collect();
+
+    // 2. Find top-K most similar past playbooks (cosine over embeddings).
+    let entries = memory.snapshot().await;
+    let mut scored: Vec<(f32, &PlaybookEntry)> = entries
+        .iter()
+        .filter_map(|e| e.embedding.as_ref().map(|v| (cosine(&qv, v), e)))
+        .collect();
+    scored.sort_by(|a, b| b.0.partial_cmp(&a.0).unwrap_or(std::cmp::Ordering::Equal));
+    scored.truncate(top_k_playbooks);
+    let matched: Vec<(f32, PlaybookEntry)> = scored
+        .into_iter()
+        .filter(|(s, _)| *s > 0.05)
+        .map(|(s, e)| (s, e.clone()))
+        .collect();
+
+    if matched.is_empty() {
+        return Ok(PatternReport {
+            query: query.into(),
+            matched_playbooks: 0,
+            total_workers_examined: 0,
+            common_certifications: vec![],
+            common_skills: vec![],
+            modal_archetype: None,
+            reliability_p50: 0.0, reliability_min: 0.0, reliability_max: 0.0,
+            matched_playbook_ids: vec![],
+            discovered_pattern: "No similar past playbooks found.".into(),
+            duration_secs: t0.elapsed().as_secs_f32(),
+        });
+    }
+
+    // 3. Pull each endorsed worker's full profile from workers_500k.
+    //    Restrict by (name, city, state) tuple so cross-city homonyms
+    //    don't pollute the aggregate.
+    let mut conditions: Vec<String> = Vec::new();
+    let mut matched_ids: Vec<String> = Vec::new();
+    for (_, pb) in &matched {
+        matched_ids.push(pb.playbook_id.clone());
+        let (Some(city), Some(state)) = (pb.city.as_ref(), pb.state.as_ref()) else { continue };
+        for name in &pb.endorsed_names {
+            let esc = |s: &str| s.replace('\'', "''");
+            conditions.push(format!(
+                "(name = '{}' AND city = '{}' AND state = '{}')",
+                esc(name), esc(city), esc(state)
+            ));
+        }
+    }
+    if conditions.is_empty() {
+        return Ok(PatternReport {
+            query: query.into(),
+            matched_playbooks: matched.len(),
+            total_workers_examined: 0,
+            common_certifications: vec![], common_skills: vec![],
+            modal_archetype: None, reliability_p50: 0.0,
+            reliability_min: 0.0, reliability_max: 0.0,
+            matched_playbook_ids: matched_ids,
+            discovered_pattern: "Matched playbooks but no endorsed names with city/state to lookup.".into(),
+            duration_secs: t0.elapsed().as_secs_f32(),
+        });
+    }
+
+    let sql = format!(
+        "SELECT name, role, city, state, certifications, skills, archetype, \
+         CAST(reliability AS DOUBLE) as reliability \
+         FROM workers_500k WHERE {} LIMIT 500",
+        conditions.join(" OR ")
+    );
+    let engine = queryd::context::QueryEngine::new(
+        catalog.clone(), buckets.clone(), queryd::cache::MemCache::new(0),
+    );
+    let batches = engine.query(&sql).await.map_err(|e| format!("worker lookup: {e}"))?;
+
+    // 4. Aggregate. Pipe-separated cert/skill lists, single-string archetype,
+    //    numeric reliability. Frequencies are share-of-workers.
+    use arrow::array::{Array, AsArray};
+    let mut cert_counts: HashMap<String, usize> = HashMap::new();
+    let mut skill_counts: HashMap<String, usize> = HashMap::new();
+    let mut arch_counts: HashMap<String, usize> = HashMap::new();
+    let mut reliabilities: Vec<f64> = Vec::new();
+    let mut total = 0usize;
+
+    let get_string = |b: &arrow::record_batch::RecordBatch, col: &str, row: usize| -> String {
+        let Some(c) = b.column_by_name(col) else { return String::new(); };
+        if let Some(arr) = c.as_string_view_opt() {
+            if arr.is_null(row) { return String::new(); }
+            return arr.value(row).to_string();
+        }
+        if let Some(arr) = c.as_string_opt::<i32>() {
+            if arr.is_null(row) { return String::new(); }
+            return arr.value(row).to_string();
+        }
+        String::new()
+    };
+    let get_f64 = |b: &arrow::record_batch::RecordBatch, col: &str, row: usize| -> f64 {
+        let Some(c) = b.column_by_name(col) else { return 0.0; };
+        if let Some(arr) = c.as_primitive_opt::<arrow::datatypes::Float64Type>() {
+            if arr.is_null(row) { return 0.0; }
+            return arr.value(row);
+        }
+        0.0
+    };
+
+    for b in &batches {
+        for row in 0..b.num_rows() {
+            total += 1;
+            let certs = get_string(b, "certifications", row);
+            for c in certs.split(['|', ',']).map(|s| s.trim()).filter(|s| !s.is_empty() && *s != "none") {
+                *cert_counts.entry(c.to_string()).or_insert(0) += 1;
+            }
+            let skills = get_string(b, "skills", row);
+            for s in skills.split(['|', ',']).map(|s| s.trim()).filter(|s| !s.is_empty()) {
+                *skill_counts.entry(s.to_string()).or_insert(0) += 1;
+            }
+            let arch = get_string(b, "archetype", row);
+            if !arch.is_empty() {
+                *arch_counts.entry(arch).or_insert(0) += 1;
+            }
+            let rel = get_f64(b, "reliability", row);
+            if rel > 0.0 { reliabilities.push(rel); }
+        }
+    }
+
+    let total_f = total.max(1) as f32;
+    let to_freq = |m: HashMap<String, usize>, min: f32| -> Vec<TraitFreq> {
+        let mut v: Vec<TraitFreq> = m.into_iter()
+            .map(|(name, count)| TraitFreq { name, count, frequency: count as f32 / total_f })
+            .filter(|t| t.frequency >= min)
+            .collect();
+        v.sort_by(|a, b| b.count.cmp(&a.count));
+        v.truncate(8);
+        v
+    };
+    let common_certifications = to_freq(cert_counts, min_trait_frequency);
+    let common_skills = to_freq(skill_counts, min_trait_frequency);
+    let modal_archetype = arch_counts.into_iter()
+        .max_by_key(|(_, c)| *c)
+        .map(|(name, _)| name);
+
+    reliabilities.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
+    let p50 = if reliabilities.is_empty() { 0.0 } else { reliabilities[reliabilities.len() / 2] };
+    let rmin = reliabilities.first().copied().unwrap_or(0.0);
+    let rmax = reliabilities.last().copied().unwrap_or(0.0);
+
+    // Build a human-readable discovered-pattern summary
+    let mut parts: Vec<String> = vec![
+        format!("Across {} similar past playbooks ({} workers examined)", matched.len(), total),
+    ];
+    if !common_certifications.is_empty() {
+        let head: Vec<String> = common_certifications.iter().take(3)
+            .map(|t| format!("{} ({:.0}%)", t.name, t.frequency * 100.0)).collect();
+        parts.push(format!("recurring certifications: {}", head.join(", ")));
+    }
+    if !common_skills.is_empty() {
+        let head: Vec<String> = common_skills.iter().take(3)
+            .map(|t| format!("{} ({:.0}%)", t.name, t.frequency * 100.0)).collect();
+        parts.push(format!("recurring skills: {}", head.join(", ")));
+    }
+    if let Some(a) = &modal_archetype { parts.push(format!("archetype mostly: {a}")); }
+    if !reliabilities.is_empty() {
+        parts.push(format!("reliability median {:.2} (range {:.2}–{:.2})", p50, rmin, rmax));
+    }
+    let discovered_pattern = parts.join(" · ");
+
+    Ok(PatternReport {
+        query: query.into(),
+        matched_playbooks: matched.len(),
+        total_workers_examined: total,
+        common_certifications, common_skills,
+        modal_archetype, reliability_p50: p50,
+        reliability_min: rmin, reliability_max: rmax,
+        matched_playbook_ids: matched_ids,
+        discovered_pattern,
+        duration_secs: t0.elapsed().as_secs_f32(),
+    })
+}
+
+// ---------------- Persist memory → SQL (Path 2 foundation) ----------------
+
+#[derive(Debug, Clone, Serialize)]
+pub struct PersistReport {
+    pub rows_persisted: usize,
+    pub dataset_name: String,
+    pub fingerprint: String,
+    pub duration_secs: f32,
+}
+
+/// Dump current in-memory state to a queryable Parquet under
+/// `successful_playbooks_live`. Registers fresh objects each call — safe
+/// because in-memory state is the source of truth here, so REPLACING the
+/// objects list reflects the real state, not destroying it.
+///
+/// Distinct from the existing `successful_playbooks` dataset (which is
+/// read by `rebuild()`), so this never collides with operator imports of
+/// historical playbook data. Recruiter-facing SQL surfaces should query
+/// `successful_playbooks_live` for current operator activity.
+pub async fn persist_to_sql(
+    memory: &PlaybookMemory,
+    catalog: &catalogd::registry::Registry,
+) -> Result<PersistReport, String> {
+    use arrow::array::StringArray;
+    use arrow::datatypes::{DataType, Field, Schema};
+    use arrow::record_batch::RecordBatch;
+
+    let t0 = std::time::Instant::now();
+    let entries = memory.snapshot().await;
+
+    let schema = Arc::new(Schema::new(vec![
+        Field::new("timestamp", DataType::Utf8, true),
+        Field::new("operation", DataType::Utf8, true),
+        Field::new("approach", DataType::Utf8, true),
+        Field::new("result", DataType::Utf8, true),
+        Field::new("context", DataType::Utf8, true),
+    ]));
+
+    let timestamps: Vec<&str> = entries.iter().map(|e| e.timestamp.as_str()).collect();
+    let operations: Vec<&str> = entries.iter().map(|e| e.operation.as_str()).collect();
+    let approaches: Vec<&str> = entries.iter().map(|e| e.approach.as_str()).collect();
+    let contexts:   Vec<&str> = entries.iter().map(|e| e.context.as_str()).collect();
+    // Result column is reconstructed from endorsed_names so SQL queries
+    // against successful_playbooks_live see the same shape as the original
+    // CSV-fed successful_playbooks ("N/N filled → Name1, Name2").
+    let results: Vec<String> = entries.iter().map(|e| {
+        if e.endorsed_names.is_empty() {
+            String::new()
+        } else {
+            let n = e.endorsed_names.len();
+            format!("{}/{} filled → {}", n, n, e.endorsed_names.join(", "))
+        }
+    }).collect();
+    let result_refs: Vec<&str> = results.iter().map(|s| s.as_str()).collect();
+
+    let batch = RecordBatch::try_new(schema.clone(), vec![
+        Arc::new(StringArray::from(timestamps)),
+        Arc::new(StringArray::from(operations)),
+        Arc::new(StringArray::from(approaches)),
+        Arc::new(StringArray::from(result_refs)),
+        Arc::new(StringArray::from(contexts)),
+    ]).map_err(|e| format!("build record batch: {e}"))?;
+
+    let parquet_bytes = shared::arrow_helpers::record_batch_to_parquet(&batch)?;
+    let fp = shared::arrow_helpers::fingerprint_schema(&schema);
+
+    let key = "datasets/successful_playbooks_live.parquet";
+    ops::put(&memory.store, key, parquet_bytes.clone()).await?;
+
+    let obj = shared::types::ObjectRef {
+        bucket: "primary".into(),
+        key: key.into(),
+        size_bytes: parquet_bytes.len() as u64,
+        created_at: chrono::Utc::now(),
+    };
+
+    let manifest = catalog.register(
+        "successful_playbooks_live".into(),
+        fp.clone(),
+        vec![obj],
+    ).await?;
+
+    Ok(PersistReport {
+        rows_persisted: entries.len(),
+        dataset_name: manifest.name,
+        fingerprint: fp.0,
+        duration_secs: t0.elapsed().as_secs_f32(),
+    })
+}
+
+// ---------------- Rebuild (the core of Phase 19) ----------------
+
+#[derive(Debug, Clone, Serialize)]
+pub struct RebuildReport {
+    pub rows_scanned: usize,
+    pub entries_built: usize,
+    pub total_names_endorsed: usize,
+    pub duration_secs: f32,
+}
+
+/// Full rebuild: scan `successful_playbooks`, extract endorsements, embed
+/// each row's operation+approach+context, replace the in-memory state.
+///
+/// Returns the report so callers can show operators what happened.
+pub async fn rebuild(
+    memory: &PlaybookMemory,
+    ai_client: &AiClient,
+    catalog: &catalogd::registry::Registry,
+    buckets: &Arc<storaged::registry::BucketRegistry>,
+) -> Result<RebuildReport, String> {
+    let t0 = std::time::Instant::now();
+
+    // 1. Pull every row of successful_playbooks through the query engine.
+    let sql = "SELECT timestamp, operation, approach, result, context \
+               FROM successful_playbooks";
+    let engine = queryd::context::QueryEngine::new(
+        catalog.clone(),
+        buckets.clone(),
+        queryd::cache::MemCache::new(0),
+    );
+    let batches = engine
+        .query(sql)
+        .await
+        .map_err(|e| format!("query successful_playbooks: {e}"))?;
+
+    let mut rows: Vec<(String, String, String, String, String)> = Vec::new();
+    for b in &batches {
+        let n = b.num_rows();
+        let get = |col: &str, row: usize| -> String {
+            use arrow::array::{Array, AsArray};
+            let Some(c) = b.column_by_name(col) else { return String::new(); };
+            if let Some(arr) = c.as_string_view_opt() {
+                if arr.is_null(row) { return String::new(); }
+                return arr.value(row).to_string();
+            }
+            if let Some(arr) = c.as_string_opt::<i32>() {
+                if arr.is_null(row) { return String::new(); }
+                return arr.value(row).to_string();
+            }
+            String::new()
+        };
+        for row in 0..n {
+            rows.push((
+                get("timestamp", row),
+                get("operation", row),
+                get("approach", row),
+                get("result", row),
+                get("context", row),
+            ));
+        }
+    }
+    let rows_scanned = rows.len();
+
+    // 2. For each row, build a PlaybookEntry (no embedding yet). Parse
+    //    the operation for (city, state) and the result for names.
+    let mut entries: Vec<PlaybookEntry> = rows
+        .into_iter()
+        .map(|(ts, op, approach, result, ctx)| {
+            let (city, state) = parse_city_state(&op);
+            let names = parse_names(&result);
+            PlaybookEntry {
+                playbook_id: stable_id(&ts, &op),
+                operation: op,
+                approach,
+                context: ctx,
+                timestamp: ts,
+                endorsed_names: names,
+                city,
+                state,
+                embedding: None,
+            }
+        })
+        .collect();
+
+    // 3. Embed in one batch. Sidecar's embed handles batching internally;
+    //    chunk here to ~64 per request to keep memory flat.
+    const EMBED_BATCH: usize = 64;
+    for chunk_start in (0..entries.len()).step_by(EMBED_BATCH) {
+        let end = (chunk_start + EMBED_BATCH).min(entries.len());
+        let texts: Vec<String> = entries[chunk_start..end]
+            .iter()
+            .map(embed_text)
+            .collect();
+        let req = EmbedRequest { texts, model: None };
+        let resp = ai_client
+            .embed(req)
+            .await
+            .map_err(|e| format!("embed batch [{chunk_start}..{end}]: {e}"))?;
+        for (i, v) in resp.embeddings.iter().enumerate() {
+            let f32v: Vec<f32> = v.iter().map(|&x| x as f32).collect();
+            entries[chunk_start + i].embedding = Some(f32v);
+        }
+    }
+
+    let total_names_endorsed: usize = entries.iter().map(|e| e.endorsed_names.len()).sum();
+    let entries_built = entries.len();
+
+    memory.set_entries(entries).await?;
+
+    Ok(RebuildReport {
+        rows_scanned,
+        entries_built,
+        total_names_endorsed,
+        duration_secs: t0.elapsed().as_secs_f32(),
+    })
+}
+
+fn embed_text(e: &PlaybookEntry) -> String {
+    // Compact one-liner per playbook. Excludes timestamp (no semantic
+    // signal) and includes the fills as words (they're occasionally
+    // meaningful — "Luis Harris" might semantically correlate with
+    // Spanish-speaker names in future queries).
+    format!(
+        "{} | {} | {} | fills: {}",
+        e.operation,
+        e.approach,
+        e.context,
+        e.endorsed_names.join(", "),
+    )
+}
+
+/// Derive a stable id from (timestamp, operation). Two playbooks with
+/// identical timestamp+operation collapse to one — benign dedup.
+fn stable_id(ts: &str, op: &str) -> String {
+    use sha2::{Digest, Sha256};
+    let mut h = Sha256::new();
+    h.update(ts.as_bytes());
+    h.update(b"|");
+    h.update(op.as_bytes());
+    let bytes = h.finalize();
+    format!("pb-{}", hex_short(&bytes, 12))
+}
+
+fn hex_short(b: &[u8], n: usize) -> String {
+    let mut s = String::with_capacity(n * 2);
+    for byte in &b[..b.len().min(n)] {
+        s.push_str(&format!("{byte:02x}"));
+    }
+    s
+}
+
+/// Parse "fill: Welder x2 in Toledo, OH" → ("Toledo", "OH").
+/// Returns None for malformed operations.
+fn parse_city_state(op: &str) -> (Option<String>, Option<String>) {
+    // Split on " in " then parse "City, ST"
+    let after_in = match op.split(" in ").nth(1) {
+        Some(s) => s,
+        None => return (None, None),
+    };
+    let parts: Vec<&str> = after_in.splitn(2, ',').collect();
+    if parts.len() != 2 {
+        return (None, None);
+    }
+    let city = parts[0].trim().to_string();
+    // state might be followed by more context; take leading alpha chars
+    let state: String = parts[1].trim()
+        .chars()
+        .take_while(|c| c.is_ascii_alphabetic())
+        .collect();
+    if city.is_empty() || state.is_empty() {
+        return (None, None);
+    }
+    (Some(city), Some(state))
+}
+
+/// Parse "2/2 filled → Matthew Roberts, Amy Davis" → ["Matthew Roberts", "Amy Davis"].
+fn parse_names(result: &str) -> Vec<String> {
+    // Everything after the arrow; split on ", ".
+    let after_arrow = match result.split('→').nth(1) {
+        Some(s) => s.trim(),
+        None => return Vec::new(),
+    };
+    // Strip trailing noise like "(and N more)" that some emitters add.
+    let cleaned = after_arrow.split(" (").next().unwrap_or(after_arrow);
+    cleaned
+        .split(',')
+        .map(|n| n.trim().to_string())
+        .filter(|n| !n.is_empty())
+        .collect()
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn parse_city_state_extracts_both() {
+        let (c, s) = parse_city_state("fill: Welder x2 in Toledo, OH");
+        assert_eq!(c.as_deref(), Some("Toledo"));
+        assert_eq!(s.as_deref(), Some("OH"));
+    }
+
+    #[test]
+    fn parse_city_state_handles_multiword_city() {
+        let (c, s) = parse_city_state("fill: Loader x1 in Grand Rapids, MI");
+        assert_eq!(c.as_deref(), Some("Grand Rapids"));
+        assert_eq!(s.as_deref(), Some("MI"));
+    }
+
+    #[test]
+    fn parse_city_state_malformed_returns_none() {
+        let (c, s) = parse_city_state("fill: something weird");
+        assert!(c.is_none());
+        assert!(s.is_none());
+    }
+
+    #[test]
+    fn parse_names_extracts_after_arrow() {
+        let ns = parse_names("2/2 filled → Matthew Roberts, Amy Davis");
+        assert_eq!(ns, vec!["Matthew Roberts".to_string(), "Amy Davis".to_string()]);
+    }
+
+    #[test]
+    fn parse_names_handles_single_fill() {
+        let ns = parse_names("1/1 filled → Jose Reed");
+        assert_eq!(ns, vec!["Jose Reed".to_string()]);
+    }
+
+    #[test]
+    fn parse_names_handles_no_arrow() {
+        let ns = parse_names("0/2 filled");
+        assert!(ns.is_empty());
+    }
+
+    #[test]
+    fn stable_id_is_deterministic() {
+        let a = stable_id("2026-04-20T00:00:00Z", "fill: Welder x2 in Toledo, OH");
+        let b = stable_id("2026-04-20T00:00:00Z", "fill: Welder x2 in Toledo, OH");
+        assert_eq!(a, b);
+        assert!(a.starts_with("pb-"));
+    }
+
+    #[test]
+    fn boost_caps_per_worker() {
+        // Even with 100 similar playbooks all endorsing the same name, the
+        // boost never exceeds MAX_BOOST_PER_WORKER.
+        let pm = PlaybookMemory::new(Arc::new(object_store::memory::InMemory::new()));
+        let entries: Vec<PlaybookEntry> = (0..100)
+            .map(|i| PlaybookEntry {
+                playbook_id: format!("pb-{i}"),
+                operation: "fill: Welder x1 in Toledo, OH".into(),
+                approach: "transfer".into(),
+                context: "".into(),
+                timestamp: "2026-04-20".into(),
+                endorsed_names: vec!["Deborah Powell".into()],
+                city: Some("Toledo".into()),
+                state: Some("OH".into()),
+                embedding: Some(vec![1.0, 0.0, 0.0]),
+            })
+            .collect();
+        tokio::runtime::Runtime::new().unwrap().block_on(async {
+            pm.set_entries(entries).await.unwrap();
+            let boosts = pm.compute_boost_for(&[1.0, 0.0, 0.0], 100, 0.5).await;
+            let key = ("Toledo".into(), "OH".into(), "Deborah Powell".into());
+            let entry = boosts.get(&key).expect("boost entry present");
+            assert!(entry.boost <= MAX_BOOST_PER_WORKER + 1e-6,
+                "boost {} exceeded cap {}", entry.boost, MAX_BOOST_PER_WORKER);
+        });
+    }
+}
diff --git a/crates/vectord/src/service.rs b/crates/vectord/src/service.rs
index 5bd6366..565c747 100644
--- a/crates/vectord/src/service.rs
+++ b/crates/vectord/src/service.rs
@@ -12,7 +12,7 @@ use std::sync::Arc;
 use aibridge::client::{AiClient, EmbedRequest, GenerateRequest};
 use catalogd::registry::Registry as CatalogRegistry;
 use storaged::registry::BucketRegistry;
-use crate::{agent, autotune, chunker, embedding_cache, harness, hnsw, index_registry, jobs, lance_backend, promotion, rag, refresh, search, store, supervisor, trial};
+use crate::{agent, autotune, chunker, embedding_cache, harness, hnsw, index_registry, jobs, lance_backend, playbook_memory, promotion, rag, refresh, search, store, supervisor, trial};
 
 #[derive(Clone)]
 pub struct VectorState {
@@ -23,6 +23,9 @@ pub struct VectorState {
     pub hnsw_store: hnsw::HnswStore,
     pub embedding_cache: embedding_cache::EmbeddingCache,
     pub trial_journal: trial::TrialJournal,
+    /// Federation-aware harness store — resolves eval artifacts to each
+    /// index's recorded bucket, falling back to primary for legacy evals.
+    pub harness_store: harness::HarnessStore,
     /// Catalog registry — needed by the Phase C refresh path to mark/clear
     /// staleness and look up dataset manifests.
     pub catalog: CatalogRegistry,
@@ -46,6 +49,10 @@ pub struct VectorState {
     /// ADR-019 hybrid: handles to Lance datasets keyed by index name.
     /// Lazy-created on first /vectors/lance/* call.
     pub lance: lance_backend::LanceRegistry,
+    /// Phase 19 — meta-index feedback. Embeds past successful_playbooks
+    /// and, when `use_playbook_memory` is set on /vectors/hybrid, boosts
+    /// workers that were actually filled in semantically-similar past ops.
+    pub playbook_memory: playbook_memory::PlaybookMemory,
 }
 
 /// What the active-profile singleton records. Narrow — we don't need the
@@ -63,6 +70,7 @@ pub fn router(state: VectorState) -> Router {
         .route("/index", post(create_index))
         .route("/indexes", get(list_indexes))
         .route("/indexes/{name}", get(get_index_meta))
+        .route("/indexes/{name}/bucket", axum::routing::patch(migrate_index_bucket))
         .route("/jobs", get(list_jobs))
         .route("/jobs/{id}", get(get_job))
         .route("/search", post(search_index))
@@ -110,6 +118,12 @@ pub fn router(state: VectorState) -> Router {
         .route("/lance/stats/{index_name}", get(lance_stats))
         .route("/lance/scalar-index/{index_name}/{column}", post(lance_build_scalar_index))
         .route("/lance/recall/{index_name}", post(lance_recall_harness))
+        // Phase 19: playbook memory — the meta-index feedback loop
+        .route("/playbook_memory/rebuild", post(rebuild_playbook_memory))
+        .route("/playbook_memory/stats", get(playbook_memory_stats))
+        .route("/playbook_memory/seed", post(seed_playbook_memory))
+        .route("/playbook_memory/persist_sql", post(persist_playbook_memory_sql))
+        .route("/playbook_memory/patterns", post(discover_playbook_patterns))
         .with_state(state)
 }
 
@@ -259,6 +273,174 @@ async fn get_index_meta(
     }
 }
 
+#[derive(Deserialize)]
+struct MigrateBucketRequest {
+    dest_bucket: String,
+    /// If true, delete artifacts from the source bucket after the pointer
+    /// flip. Default false — keeping source copies means a failed migration
+    /// is recoverable by editing IndexMeta.bucket back, and a successful
+    /// migration leaves inspectable forensics until an operator sweeps.
+    #[serde(default)]
+    delete_source: bool,
+}
+
+#[derive(Serialize)]
+struct MigrateBucketReport {
+    index_name: String,
+    source_bucket: String,
+    dest_bucket: String,
+    /// Artifact keys that were copied (or attempted). Order follows copy order.
+    copied: Vec<String>,
+    /// Artifact prefixes that had nothing to copy (optional files missing,
+    /// trial journal empty, etc).
+    skipped: Vec<String>,
+    /// Subset of `copied` that was subsequently deleted from the source.
+    deleted_source: Vec<String>,
+    duration_secs: f32,
+}
+
+/// Move an index's artifacts from its current bucket to `dest_bucket`.
+/// Parquet-backed indexes only — Lance migration needs URI rewriting that
+/// isn't in scope for this endpoint. Copies the vector data, trial journal,
+/// promotion file, and auto-generated harness; updates `IndexMeta.bucket`
+/// last so a mid-flight failure leaves the index still usable at its
+/// original location. Evicts the `EmbeddingCache` entry so the next load
+/// re-reads from the new bucket.
+async fn migrate_index_bucket(
+    State(state): State<VectorState>,
+    Path(name): Path<String>,
+    Json(req): Json<MigrateBucketRequest>,
+) -> Result<Json<MigrateBucketReport>, (StatusCode, String)> {
+    let t0 = std::time::Instant::now();
+
+    let mut meta = state
+        .index_registry
+        .get(&name)
+        .await
+        .ok_or_else(|| (StatusCode::NOT_FOUND, format!("index '{name}' not found")))?;
+
+    if meta.vector_backend == shared::types::VectorBackend::Lance {
+        return Err((
+            StatusCode::BAD_REQUEST,
+            "Lance-backed indexes cannot be migrated via this endpoint — \
+             Lance URIs are bucket-specific; a separate migrate_lance tool \
+             is needed".into(),
+        ));
+    }
+
+    if !state.bucket_registry.contains(&req.dest_bucket) {
+        return Err((
+            StatusCode::BAD_REQUEST,
+            format!("dest bucket '{}' not registered", req.dest_bucket),
+        ));
+    }
+
+    let source_bucket = meta.bucket.clone();
+    if source_bucket == req.dest_bucket {
+        return Err((
+            StatusCode::BAD_REQUEST,
+            format!("source and dest are both '{source_bucket}' — nothing to migrate"),
+        ));
+    }
+
+    let src = state
+        .bucket_registry
+        .get(&source_bucket)
+        .map_err(|e| (StatusCode::INTERNAL_SERVER_ERROR, e))?;
+    let dst = state
+        .bucket_registry
+        .get(&req.dest_bucket)
+        .map_err(|e| (StatusCode::INTERNAL_SERVER_ERROR, e))?;
+
+    let mut copied: Vec<String> = Vec::new();
+    let mut skipped: Vec<String> = Vec::new();
+
+    // 1. Vector data (single parquet file for this backend).
+    copy_key(&src, &dst, &meta.storage_key)
+        .await
+        .map_err(|e| {
+            (StatusCode::INTERNAL_SERVER_ERROR,
+             format!("copy {}: {e}", meta.storage_key))
+        })?;
+    copied.push(meta.storage_key.clone());
+
+    // 2. Trial journal batches — per-index directory of JSONL files.
+    let trial_prefix = format!("_hnsw_trials/{name}/");
+    let trial_keys = storaged::ops::list(&src, Some(&trial_prefix))
+        .await
+        .unwrap_or_default();
+    if trial_keys.is_empty() {
+        skipped.push(trial_prefix);
+    }
+    for k in &trial_keys {
+        copy_key(&src, &dst, k)
+            .await
+            .map_err(|e| (StatusCode::INTERNAL_SERVER_ERROR, format!("copy {k}: {e}")))?;
+        copied.push(k.clone());
+    }
+
+    // 3. Promotion file (optional — absent for never-promoted indexes).
+    let promo_key = format!("_hnsw_promotions/{name}.json");
+    match copy_key(&src, &dst, &promo_key).await {
+        Ok(()) => copied.push(promo_key),
+        Err(_) => skipped.push(promo_key),
+    }
+
+    // 4. Auto-generated harness (optional — absent if agent never ran).
+    let harness_key = format!("_hnsw_evals/{name}_auto.json");
+    match copy_key(&src, &dst, &harness_key).await {
+        Ok(()) => copied.push(harness_key),
+        Err(_) => skipped.push(harness_key),
+    }
+
+    // 5. Pointer flip — IndexMeta.bucket now points at destination. This
+    // is the commit point; earlier failures leave copies in dest but the
+    // index still usable at source.
+    meta.bucket = req.dest_bucket.clone();
+    state
+        .index_registry
+        .register(meta)
+        .await
+        .map_err(|e| (StatusCode::INTERNAL_SERVER_ERROR, format!("update meta: {e}")))?;
+
+    // 6. Cache eviction — next load reads the new bucket's parquet.
+    state.embedding_cache.evict(&name).await;
+
+    // 7. Optional source cleanup.
+    let mut deleted_source: Vec<String> = Vec::new();
+    if req.delete_source {
+        for k in &copied {
+            if storaged::ops::delete(&src, k).await.is_ok() {
+                deleted_source.push(k.clone());
+            }
+        }
+    }
+
+    Ok(Json(MigrateBucketReport {
+        index_name: name,
+        source_bucket,
+        dest_bucket: req.dest_bucket,
+        copied,
+        skipped,
+        deleted_source,
+        duration_secs: t0.elapsed().as_secs_f32(),
+    }))
+}
+
+/// Stream a single object from one bucket to another. Uses the existing
+/// `storaged::ops` get + put primitives — no native copy in object_store
+/// across heterogeneous backends (local ↔ S3), so an in-memory hop is
+/// unavoidable. Bounded by individual object size, which for our parquet
+/// + jsonl artifacts tops out around a few hundred MB.
+async fn copy_key(
+    src: &Arc<dyn ObjectStore>,
+    dst: &Arc<dyn ObjectStore>,
+    key: &str,
+) -> Result<(), String> {
+    let data = storaged::ops::get(src, key).await?;
+    storaged::ops::put(dst, key, data).await
+}
+
 // --- unused legacy function below, kept for reference ---
 
 #[allow(dead_code)]
@@ -420,6 +602,15 @@ struct HybridRequest {
     /// If false, just return the ranked matches (faster, no Ollama gen).
     #[serde(default = "default_true")]
     generate: bool,
+    /// Phase 19: consult `playbook_memory` and boost workers that past
+    /// similar playbooks successfully filled. Off by default so current
+    /// callers keep deterministic ranking; opt-in unlocks the feedback.
+    #[serde(default)]
+    use_playbook_memory: bool,
+    /// Number of past playbooks to consider when `use_playbook_memory`
+    /// is on. Ignored otherwise. Defaults to 5.
+    #[serde(default)]
+    playbook_memory_k: Option<usize>,
 }
 
 fn default_true() -> bool { true }
@@ -442,8 +633,18 @@ struct HybridSource {
     chunk_text: String,
     score: f32,
     sql_verified: bool,
+    /// Phase 19: how much the playbook_memory boost lifted this hit's
+    /// score. 0.0 when `use_playbook_memory=false` or no past playbook
+    /// endorsed this worker.
+    #[serde(default, skip_serializing_if = "is_zero")]
+    playbook_boost: f32,
+    /// playbook_ids whose endorsement contributed to `playbook_boost`.
+    #[serde(default, skip_serializing_if = "Vec::is_empty")]
+    playbook_citations: Vec<String>,
 }
 
+fn is_zero(x: &f32) -> bool { x.abs() < 1e-6 }
+
 async fn hybrid_search(
     State(state): State<VectorState>,
     Json(req): Json<HybridRequest>,
@@ -556,6 +757,11 @@ async fn hybrid_search(
         .and_then(|m| m.id_prefix.clone());
 
     let sql_count = valid_ids.as_ref().map(|s| s.len()).unwrap_or(0);
+    // Phase 19: when playbook_memory is consulted, pull a wider candidate
+    // pool so endorsed workers outside the vanilla top-K can still be
+    // boosted into visibility. 5× is a conservative multiplier — plenty
+    // for a +0.25 boost to flip rankings without dragging the cost up.
+    let fetch_k = if req.use_playbook_memory { req.top_k * 5 } else { req.top_k };
     let filtered: Vec<search::SearchResult> = if let Some(ref ids) = valid_ids {
         all_results.into_iter()
             .filter(|r| {
@@ -572,20 +778,54 @@ async fn hybrid_search(
                 };
                 ids.contains(raw_id)
             })
-            .take(req.top_k)
+            .take(fetch_k)
             .collect()
     } else {
-        all_results.into_iter().take(req.top_k).collect()
+        all_results.into_iter().take(fetch_k).collect()
     };
 
     // Step 4: Build sources with SQL-verified flag.
-    let sources: Vec<HybridSource> = filtered.iter().map(|r| HybridSource {
+    let mut sources: Vec<HybridSource> = filtered.iter().map(|r| HybridSource {
         doc_id: r.doc_id.clone(),
         chunk_text: r.chunk_text.clone(),
         score: r.score,
         sql_verified: valid_ids.is_some(),
+        playbook_boost: 0.0,
+        playbook_citations: Vec::new(),
     }).collect();
 
+    // Step 4b (Phase 19): if use_playbook_memory, look up semantically
+    // similar past playbooks and boost workers they endorsed. Name-match
+    // is on the tuple (city, state, name) extracted from chunk_text —
+    // hybrid_search's SQL filter already narrowed to one city+state, so
+    // this just needs to check the name against each playbook's endorsed
+    // set. Additive boost on the existing vector score, then re-sort.
+    if req.use_playbook_memory {
+        let boost_k = req.playbook_memory_k.unwrap_or(playbook_memory::DEFAULT_TOP_K_PLAYBOOKS);
+        // We embedded the question as `qv` above — reuse it for the
+        // playbook similarity lookup so we don't double-pay Ollama.
+        let boosts = state.playbook_memory.compute_boost_for(&qv, boost_k, 0.5).await;
+        for src in sources.iter_mut() {
+            // Parse "{Name} — {Role} in {City}, {State}. …" chunk. Being
+            // defensive: chunks from other datasets may not follow this
+            // exact shape, so absent fields just skip the boost.
+            if let Some((name, city, state)) = parse_worker_chunk(&src.chunk_text) {
+                let key = (city, state, name);
+                if let Some(entry) = boosts.get(&key) {
+                    src.score += entry.boost;
+                    src.playbook_boost = entry.boost;
+                    src.playbook_citations = entry.citations.clone();
+                }
+            }
+        }
+        // Re-rank: boosted scores can flip ordering.
+        sources.sort_by(|a, b| b.score.partial_cmp(&a.score).unwrap_or(std::cmp::Ordering::Equal));
+        // Finally trim to the caller's requested top_k — we pulled fetch_k
+        // (5× wider) above specifically so the boost could reach workers
+        // that would otherwise have been trimmed pre-boost.
+        sources.truncate(req.top_k);
+    }
+
     // Step 5: Generate answer if requested.
     let answer = if req.generate && !sources.is_empty() {
         let context: String = sources.iter().enumerate().map(|(i, s)| {
@@ -734,7 +974,7 @@ async fn run_trial(
     State(state): State<VectorState>,
     Json(req): Json<TrialRequest>,
 ) -> Result<Json<trial::Trial>, (StatusCode, String)> {
-    let mut harness_set = harness::EvalSet::load(&state.store, &req.harness)
+    let mut harness_set = state.harness_store.load_for_index(&req.index_name, &req.harness)
         .await
         .map_err(|e| (StatusCode::NOT_FOUND, format!("harness not found: {e}")))?;
 
@@ -764,8 +1004,8 @@ async fn run_trial(
             .await
             .map_err(|e| (StatusCode::INTERNAL_SERVER_ERROR, format!("ground truth: {e}")))?;
         tracing::info!("trial: ground truth built in {:.1}s", t0.elapsed().as_secs_f32());
-        harness_set
-            .save(&state.store)
+        state.harness_store
+            .save(&harness_set)
             .await
             .map_err(|e| (StatusCode::INTERNAL_SERVER_ERROR, format!("save harness: {e}")))?;
     }
@@ -890,17 +1130,14 @@ async fn best_trial(
 // --- Harness management ---
 
 async fn list_evals(State(state): State<VectorState>) -> impl IntoResponse {
-    match harness::EvalSet::list(&state.store).await {
-        Ok(names) => Ok(Json(names)),
-        Err(e) => Err((StatusCode::INTERNAL_SERVER_ERROR, e)),
-    }
+    Json(state.harness_store.list_all().await)
 }
 
 async fn get_eval(
     State(state): State<VectorState>,
     Path(name): Path<String>,
 ) -> impl IntoResponse {
-    match harness::EvalSet::load(&state.store, &name).await {
+    match state.harness_store.get_any(&name).await {
         Ok(e) => Ok(Json(e)),
         Err(err) => Err((StatusCode::NOT_FOUND, err)),
     }
@@ -916,7 +1153,7 @@ async fn put_eval(
         .queries
         .iter()
         .all(|q| q.ground_truth.is_some());
-    match harness_set.save(&state.store).await {
+    match state.harness_store.save(&harness_set).await {
         Ok(()) => Ok(Json(harness_set)),
         Err(e) => Err((StatusCode::INTERNAL_SERVER_ERROR, e)),
     }
@@ -957,8 +1194,8 @@ async fn autogen_eval(
         .await
         .map_err(|e| (StatusCode::INTERNAL_SERVER_ERROR, format!("ground truth: {e}")))?;
 
-    harness_set
-        .save(&state.store)
+    state.harness_store
+        .save(&harness_set)
         .await
         .map_err(|e| (StatusCode::INTERNAL_SERVER_ERROR, format!("save: {e}")))?;
 
@@ -1407,7 +1644,12 @@ async fn profile_scoped_search(
         let lance_store = state.lance.store_for(&req.index_name).await
             .map_err(|e| (StatusCode::BAD_REQUEST, e))?;
         let t0 = std::time::Instant::now();
-        match lance_store.search(&query_vec, top_k).await {
+        match lance_store.search(
+            &query_vec,
+            top_k,
+            Some(LANCE_DEFAULT_NPROBES),
+            Some(LANCE_DEFAULT_REFINE_FACTOR),
+        ).await {
             Ok(hits) => Ok(Json(serde_json::json!({
                 "profile": profile.id,
                 "source": index_meta.source,
@@ -1516,6 +1758,7 @@ async fn run_autotune_endpoint(
         &state.index_registry,
         &state.trial_journal,
         &state.promotion_registry,
+        &state.harness_store,
         &state.job_tracker,
     ).await {
         Ok(result) => Ok(Json(result)),
@@ -1636,8 +1879,25 @@ struct LanceSearchRequest {
     query: String,
     #[serde(default = "default_top_k")]
     top_k: usize,
+    /// IVF partitions to probe. `None` uses Lance's built-in default of
+    /// 1, which caps recall well below the index's real capability.
+    /// Recommended: 5–10% of num_partitions (≈20 for a 316-partition
+    /// index). Omitting it here picks the server-side default.
+    #[serde(default)]
+    nprobes: Option<usize>,
+    /// Refine factor — re-rank `top_k * factor` PQ-approximate candidates
+    /// with exact distances before returning `top_k`. Recovers recall
+    /// lost to product quantization.
+    #[serde(default)]
+    refine_factor: Option<u32>,
 }
 
+/// Server-side defaults when the caller doesn't pin nprobes / refine
+/// themselves. Tuned for the ~100K × 768d reference workload; see
+/// docs/ADR-019-vector-storage.md for the recall / latency trade-off.
+const LANCE_DEFAULT_NPROBES: usize = 20;
+const LANCE_DEFAULT_REFINE_FACTOR: u32 = 5;
+
 fn default_top_k() -> usize { 5 }
 
 /// Vector search against a Lance dataset. Embeds the query text via the
@@ -1660,7 +1920,9 @@ async fn lance_search(
         .map_err(|e| (StatusCode::BAD_REQUEST, e))?;
 
     let t0 = std::time::Instant::now();
-    let hits = lance_store.search(&qv, req.top_k).await
+    let nprobes = req.nprobes.or(Some(LANCE_DEFAULT_NPROBES));
+    let refine = req.refine_factor.or(Some(LANCE_DEFAULT_REFINE_FACTOR));
+    let hits = lance_store.search(&qv, req.top_k, nprobes, refine).await
         .map_err(|e| (StatusCode::INTERNAL_SERVER_ERROR, e))?;
 
     Ok(Json(serde_json::json!({
@@ -1761,6 +2023,12 @@ struct LanceRecallRequest {
     harness: String,
     #[serde(default = "default_top_k")]
     top_k: usize,
+    /// Override server defaults so operators can sweep nprobes /
+    /// refine_factor to chart the recall-vs-latency curve.
+    #[serde(default)]
+    nprobes: Option<usize>,
+    #[serde(default)]
+    refine_factor: Option<u32>,
 }
 
 #[derive(serde::Serialize)]
@@ -1784,6 +2052,214 @@ struct LanceRecallQuery {
     hits_returned: usize,
 }
 
+// --- Phase 19: playbook memory endpoints ---
+
+/// Extract (name, city, state) from a chunk formatted like
+/// "{Name} — {Role} in {City}, {State}. Skills: …".
+/// Returns None if the chunk doesn't match the shape; callers simply
+/// skip the boost for that hit.
+fn parse_worker_chunk(chunk: &str) -> Option<(String, String, String)> {
+    // "Name — Role in City, ST. …" → split on "—" then " in " then ","
+    let (name_part, rest) = chunk.split_once('—')?;
+    let rest = rest.trim();
+    let (_role, loc_part) = rest.split_once(" in ")?;
+    let loc_part = loc_part.trim();
+    let (city, state_plus) = loc_part.split_once(',')?;
+    let state: String = state_plus.trim()
+        .chars()
+        .take_while(|c| c.is_ascii_alphabetic())
+        .collect();
+    let name = name_part.trim().to_string();
+    let city = city.trim().to_string();
+    if name.is_empty() || city.is_empty() || state.is_empty() {
+        return None;
+    }
+    Some((name, city, state))
+}
+
+#[derive(Deserialize)]
+struct SeedPlaybookRequest {
+    /// One playbook with {operation, approach, context, endorsed_names}.
+    /// City + state are parsed from the operation text.
+    operation: String,
+    #[serde(default)]
+    approach: String,
+    #[serde(default)]
+    context: String,
+    endorsed_names: Vec<String>,
+    /// Append to the existing memory rather than replacing. Default true —
+    /// seeding is a bootstrap/demo tool, not a rebuild substitute.
+    #[serde(default = "default_true")]
+    append: bool,
+}
+
+/// Bootstrap / test-only: inject a playbook entry directly into
+/// `playbook_memory` without going through `successful_playbooks`. Useful
+/// when the source dataset has stale or phantom entries (as the initial
+/// staffing seed did — names that don't correspond to real workers), and
+/// you want to demonstrate the feedback loop with a known-good fixture.
+///
+/// Production path is always `/rebuild` — this endpoint is for operators
+/// who need to prime the memory before real playbooks accumulate.
+async fn seed_playbook_memory(
+    State(state): State<VectorState>,
+    Json(req): Json<SeedPlaybookRequest>,
+) -> impl IntoResponse {
+    // Embed the entry through the same text shape `rebuild` uses so
+    // similarity math is comparable across seed + real entries.
+    let tmp_entry = playbook_memory::PlaybookEntry {
+        playbook_id: String::new(),
+        operation: req.operation.clone(),
+        approach: req.approach.clone(),
+        context: req.context.clone(),
+        timestamp: chrono::Utc::now().to_rfc3339(),
+        endorsed_names: req.endorsed_names.clone(),
+        city: None, state: None, embedding: None,
+    };
+    let text = format!(
+        "{} | {} | {} | fills: {}",
+        tmp_entry.operation, tmp_entry.approach, tmp_entry.context,
+        tmp_entry.endorsed_names.join(", "),
+    );
+    let resp = match state.ai_client.embed(EmbedRequest { texts: vec![text], model: None }).await {
+        Ok(r) => r,
+        Err(e) => return Err((StatusCode::BAD_GATEWAY, format!("embed seed: {e}"))),
+    };
+    if resp.embeddings.is_empty() {
+        return Err((StatusCode::BAD_GATEWAY, "embed returned nothing".into()));
+    }
+    let emb: Vec<f32> = resp.embeddings[0].iter().map(|&x| x as f32).collect();
+
+    // Parse city/state from the operation ("fill: Role xN in City, ST").
+    // Parser lives in playbook_memory::rebuild — expose via a tiny helper
+    // or inline the same logic here; duplicated briefly since this seed
+    // path is stable but infrequently called.
+    let (city, state_) = {
+        let after_in = req.operation.split(" in ").nth(1).unwrap_or("");
+        let mut parts = after_in.splitn(2, ',');
+        let city = parts.next().map(|s| s.trim().to_string()).filter(|s| !s.is_empty());
+        let state = parts.next().map(|s| s.trim().chars().take_while(|c| c.is_ascii_alphabetic()).collect::<String>()).filter(|s| !s.is_empty());
+        (city, state)
+    };
+    if city.is_none() || state_.is_none() {
+        return Err((StatusCode::BAD_REQUEST,
+            "operation must match 'fill: Role xN in City, ST' shape".into()));
+    }
+
+    // Stable id: hash of timestamp + operation. Callers get the id back
+    // so they can reference it in citations.
+    let ts = chrono::Utc::now().to_rfc3339();
+    use sha2::{Digest, Sha256};
+    let mut h = Sha256::new();
+    h.update(ts.as_bytes());
+    h.update(b"|");
+    h.update(req.operation.as_bytes());
+    let bytes = h.finalize();
+    let pid = format!("pb-seed-{}", bytes.iter().take(8).map(|b| format!("{b:02x}")).collect::<String>());
+
+    let new_entry = playbook_memory::PlaybookEntry {
+        playbook_id: pid.clone(),
+        operation: req.operation,
+        approach: req.approach,
+        context: req.context,
+        timestamp: ts,
+        endorsed_names: req.endorsed_names,
+        city, state: state_,
+        embedding: Some(emb),
+    };
+
+    let mut current = state.playbook_memory.snapshot().await;
+    if req.append {
+        current.push(new_entry);
+    } else {
+        current = vec![new_entry];
+    }
+    if let Err(e) = state.playbook_memory.set_entries(current).await {
+        return Err((StatusCode::INTERNAL_SERVER_ERROR, format!("persist: {e}")));
+    }
+    Ok(Json(serde_json::json!({ "playbook_id": pid, "entries_after": state.playbook_memory.entry_count().await })))
+}
+
+async fn rebuild_playbook_memory(
+    State(state): State<VectorState>,
+) -> impl IntoResponse {
+    match playbook_memory::rebuild(
+        &state.playbook_memory,
+        &state.ai_client,
+        &state.catalog,
+        &state.bucket_registry,
+    ).await {
+        Ok(report) => Ok(Json(report)),
+        Err(e) => Err((StatusCode::INTERNAL_SERVER_ERROR, e)),
+    }
+}
+
+// Path 2 foundation — dump in-memory playbook_memory state to a fresh
+// `successful_playbooks_live` dataset. Cheap to call (writes one parquet,
+// updates one manifest), so /log can call it after every seed to keep the
+// SQL-queryable surface honest without the destructive REPLACE bug that
+// /ingest/file has.
+async fn persist_playbook_memory_sql(
+    State(state): State<VectorState>,
+) -> impl IntoResponse {
+    match playbook_memory::persist_to_sql(&state.playbook_memory, &state.catalog).await {
+        Ok(report) => Ok(Json(report)),
+        Err(e) => Err((StatusCode::INTERNAL_SERVER_ERROR, e)),
+    }
+}
+
+#[derive(Deserialize)]
+struct PatternsRequest {
+    query: String,
+    #[serde(default = "default_pattern_k")]
+    top_k_playbooks: usize,
+    /// Minimum frequency (0.0-1.0) for a trait to make the report.
+    /// Default 0.4 — at least 40% of examined workers must share it.
+    #[serde(default = "default_pattern_min_freq")]
+    min_trait_frequency: f32,
+}
+fn default_pattern_k() -> usize { 10 }
+fn default_pattern_min_freq() -> f32 { 0.4 }
+
+// Path 2 — meta-index discovery surface. "What did past similar fills
+// have in common that I didn't ask about?" — surfaces signals like
+// recurring certifications, skill clusters, archetype tendencies.
+async fn discover_playbook_patterns(
+    State(state): State<VectorState>,
+    Json(req): Json<PatternsRequest>,
+) -> impl IntoResponse {
+    match playbook_memory::discover_patterns(
+        &state.playbook_memory,
+        &state.ai_client,
+        &state.catalog,
+        &state.bucket_registry,
+        &req.query,
+        req.top_k_playbooks,
+        req.min_trait_frequency,
+    ).await {
+        Ok(report) => Ok(Json(report)),
+        Err(e) => Err((StatusCode::INTERNAL_SERVER_ERROR, e)),
+    }
+}
+
+async fn playbook_memory_stats(
+    State(state): State<VectorState>,
+) -> impl IntoResponse {
+    let entries = state.playbook_memory.snapshot().await;
+    Json(serde_json::json!({
+        "entries": entries.len(),
+        "total_names_endorsed": entries.iter().map(|e| e.endorsed_names.len()).sum::<usize>(),
+        "entries_with_embeddings": entries.iter().filter(|e| e.embedding.is_some()).count(),
+        "sample": entries.iter().take(3).map(|e| serde_json::json!({
+            "id": e.playbook_id,
+            "operation": e.operation,
+            "city": e.city,
+            "state": e.state,
+            "endorsed": e.endorsed_names,
+        })).collect::<Vec<_>>(),
+    }))
+}
+
 async fn lance_recall_harness(
     State(state): State<VectorState>,
     Path(index_name): Path<String>,
@@ -1791,7 +2267,7 @@ async fn lance_recall_harness(
 ) -> impl IntoResponse {
     let t0 = std::time::Instant::now();
 
-    let harness_set = harness::EvalSet::load(&state.store, &req.harness).await
+    let harness_set = state.harness_store.load_for_index(&index_name, &req.harness).await
         .map_err(|e| (StatusCode::NOT_FOUND, format!("harness: {e}")))?;
     if !harness_set.ground_truth_built {
         return Err((StatusCode::BAD_REQUEST,
@@ -1817,7 +2293,12 @@ async fn lance_recall_harness(
         };
 
         let qt0 = std::time::Instant::now();
-        let hits = lance_store.search(qv, k).await
+        let hits = lance_store.search(
+            qv,
+            k,
+            Some(req.nprobes.unwrap_or(LANCE_DEFAULT_NPROBES)),
+            Some(req.refine_factor.unwrap_or(LANCE_DEFAULT_REFINE_FACTOR)),
+        ).await
             .map_err(|e| (StatusCode::INTERNAL_SERVER_ERROR, format!("search: {e}")))?;
         let lat_us = qt0.elapsed().as_micros() as f32;
 
diff --git a/mcp-server/index.ts b/mcp-server/index.ts
index 54fa444..4f3a839 100644
--- a/mcp-server/index.ts
+++ b/mcp-server/index.ts
@@ -74,7 +74,10 @@ server.tool(
     top_k: z.number().default(5),
   },
   async ({ question, sql_filter, dataset, id_column, top_k }) => {
-    const body: any = { question, index_name: "workers_500k_v1", filter_dataset: dataset, id_column, top_k, generate: true };
+    const body: any = {
+      question, index_name: "workers_500k_v1", filter_dataset: dataset, id_column, top_k, generate: true,
+      use_playbook_memory: true,
+    };
     if (sql_filter) body.sql_filter = sql_filter;
     const r = await api("POST", "/vectors/hybrid", body);
     return { content: [{ type: "text" as const, text: JSON.stringify(r, null, 2) }] };
@@ -109,6 +112,7 @@ server.tool(
       index_name: "workers_500k_v1", sql_filter: filter,
       filter_dataset: "ethereal_workers", id_column: "worker_id",
       top_k: headcount * 2, generate: false,
+      use_playbook_memory: true,
     });
     let matches = r.sources || [];
     if (required_certs.length > 0) {
@@ -384,6 +388,11 @@ async function main() {
             question: b.question, index_name: b.index || "workers_500k_v1",
             sql_filter: b.sql_filter, filter_dataset: b.dataset || "ethereal_workers",
             id_column: b.id_column || "worker_id", top_k: b.top_k || 5, generate: b.generate !== false,
+            use_playbook_memory: b.use_playbook_memory !== false,
+            // Forward explicitly so Bun /search isn't capped by the
+            // server's default — boost silently misses good matches when
+            // memory has >25 entries and only top-5 playbooks are scanned.
+            playbook_memory_k: b.playbook_memory_k ?? 25,
           }));
         }
 
@@ -403,6 +412,8 @@ async function main() {
             index_name: b.index || "workers_500k_v1", sql_filter: filter,
             filter_dataset: b.dataset || "ethereal_workers",
             id_column: "worker_id", top_k: (b.headcount || 5) * 2, generate: false,
+            use_playbook_memory: true,
+            playbook_memory_k: 25,
           }));
         }
 
@@ -418,14 +429,57 @@ async function main() {
           return ok(await api("POST", "/vectors/rag", { index_name: b.index || "workers_500k_v1", question: b.question, top_k: b.top_k || 5 }));
         }
 
-        // Tool: log success
+        // Tool: log success.
+        //
+        // BUG FIX 2026-04-20: previously this also POSTed a 1-row CSV to
+        // /ingest/file?name=successful_playbooks. That endpoint REPLACES
+        // the dataset's object list rather than appending — so every /log
+        // call destroyed all prior rows in the SQL-queryable
+        // successful_playbooks table. Chain-of-custody trace caught it:
+        // sp_rows went 33 → 1 in a single /log call.
+        //
+        // Until a proper append endpoint exists (Phase 8 delta write
+        // surface for the SQL table), /log writes ONLY to playbook_memory
+        // (in-memory append-only store, works correctly for boost). The
+        // SQL successful_playbooks table is now treated as derived state
+        // that gets rebuilt explicitly via /vectors/playbook_memory/rebuild
+        // — never written to by the recruiter path.
         if (url.pathname === "/log") {
           const b = await json();
-          const csv = `timestamp,operation,approach,result,context\n"${new Date().toISOString()}","${(b.operation||"").replace(/"/g,'""')}","${(b.approach||"").replace(/"/g,'""')}","${(b.result||"").replace(/"/g,'""')}","${(b.context||"").replace(/"/g,'""')}"`;
-          const form = new FormData();
-          form.append("file", new Blob([csv], { type: "text/csv" }), "playbook.csv");
-          const r = await fetch(`${BASE}/ingest/file?name=successful_playbooks`, { method: "POST", body: form });
-          return ok({ logged: true, response: await r.text() });
+          // Result format expected: "{filled}/{needed} filled → Name1, Name2, Name3"
+          const result = String(b.result || "");
+          const arrowIdx = result.indexOf("→");
+          const namesPart = arrowIdx >= 0 ? result.slice(arrowIdx + 1) : "";
+          const endorsed = namesPart.split(",").map(s => s.trim()).filter(Boolean);
+          let seeded = 0;
+          let persisted_rows = 0;
+          if (endorsed.length && /fill:.+ in .+,.+/i.test(String(b.operation || ""))) {
+            const canonicalApproach = `${(b.approach || "manual log").split(/[\.\n]/)[0]}`.slice(0, 80);
+            const canonicalContext = `${(b.context || "").split(/[\.\n]/)[0]}`.slice(0, 80);
+            const seedRes = await api("POST", "/vectors/playbook_memory/seed", {
+              operation: b.operation,
+              approach: canonicalApproach,
+              context: canonicalContext,
+              endorsed_names: endorsed,
+              append: true,
+            }).catch(() => null) as any;
+            if (seedRes && seedRes.playbook_id) {
+              seeded = endorsed.length;
+              // After every successful seed, persist memory → SQL so the
+              // successful_playbooks_live table reflects current operator
+              // activity. /persist_sql writes the FULL state, which is safe
+              // because in-memory playbook_memory IS the source of truth
+              // (no concurrent writer outside this process modifies it).
+              const pr = await api("POST", "/vectors/playbook_memory/persist_sql", {}).catch(() => null) as any;
+              if (pr && typeof pr.rows_persisted === "number") persisted_rows = pr.rows_persisted;
+            }
+          }
+          return ok({
+            logged: true,
+            seeded,
+            persisted_to_sql: persisted_rows,
+            note: "successful_playbooks_live (NOT successful_playbooks) is the SQL surface for live operator activity. /log is non-destructive.",
+          });
         }
 
         // Tool: get playbooks
@@ -480,6 +534,7 @@ async function main() {
             question: "reliable forklift operator", index_name: "workers_500k_v1",
             sql_filter: "role = 'Forklift Operator' AND state = 'IL' AND CAST(reliability AS DOUBLE) > 0.8",
             filter_dataset: "workers_500k", id_column: "worker_id", top_k: 5, generate: false,
+            use_playbook_memory: true,
           });
           tests.push({
             name: "Hybrid SQL+Vector Search", ms: Date.now() - ht0,
@@ -987,6 +1042,7 @@ tr:hover{background:#111827}
             question: "reliable forklift operator", index_name: "workers_500k_v1",
             sql_filter: "role = 'Forklift Operator' AND state = 'IL' AND CAST(reliability AS DOUBLE) > 0.8",
             filter_dataset: "workers_500k", id_column: "worker_id", top_k: 5, generate: false,
+            use_playbook_memory: true,
           });
           tests.push({
             name: "Hybrid SQL+Vector", ms: Date.now() - ht0,
@@ -1435,6 +1491,26 @@ const SCENARIOS = [
 
 function pick<T>(arr: T[]): T { return arr[Math.floor(Math.random() * arr.length)]; }
 
+// Seed playbook_memory from a filled contract so the next hybrid query
+// ranks against it. Used by both runWeekSimulation (per-day) and the /log
+// endpoint (per manual logging). Fail-soft — seeding is best-effort.
+async function seedPlaybookFromContract(c: any) {
+  const names = (c.matches || []).slice(0, 5)
+    .map((m: any) => m.name || m.doc_id)
+    .filter((n: string) => n && !n.startsWith("W500-"));
+  if (!names.length) return;
+  const op = `fill: ${c.role} x${c.headcount} in ${c.city}, ${c.state}`;
+  try {
+    await api("POST", "/vectors/playbook_memory/seed", {
+      operation: op,
+      approach: `${c.situation || c.priority || "fill"} → hybrid search`,
+      context: `client=${c.client || ""} start=${c.start || ""}`,
+      endorsed_names: names,
+      append: true,
+    });
+  } catch {}
+}
+
 async function runWeekSimulation() {
   const days = ["Monday","Tuesday","Wednesday","Thursday","Friday"];
   const staffers = ["Sarah (Lead)","Mike (Senior)","Kim (Junior)"];
@@ -1468,7 +1544,7 @@ async function runWeekSimulation() {
       if (priority === "urgent") emergencies++;
       totalNeeded += headcount;
 
-      // Run hybrid search
+      // Run hybrid search — Phase 19: boost on so past playbooks shape ranking
       let filled = 0;
       let matches: any[] = [];
       try {
@@ -1481,12 +1557,15 @@ async function runWeekSimulation() {
           id_column: "worker_id",
           top_k: headcount + 2,
           generate: false,
+          use_playbook_memory: true,
         });
         matches = (r.sources || []).slice(0, headcount).map((s: any) => ({
           doc_id: s.doc_id,
           name: s.chunk_text?.split("—")[0]?.trim() || s.doc_id,
           score: s.score,
           chunk_text: s.chunk_text || "",
+          playbook_boost: s.playbook_boost || 0,
+          playbook_citations: s.playbook_citations || [],
         }));
         filled = matches.length;
       } catch {}
@@ -1501,7 +1580,15 @@ async function runWeekSimulation() {
       });
     }
 
-    // End of day: log playbook + prepare handoff
+    // End of day: seed playbook_memory with TODAY's filled contracts so
+    // tomorrow's hybrid search ranks against them. This is the in-week
+    // feedback loop — without this, day 5 doesn't benefit from day 1.
+    for (const c of contracts) {
+      if (c.matches && c.matches.length) {
+        await seedPlaybookFromContract(c).catch(() => {});
+      }
+    }
+
     if (d < 4) {
       handoffs++;
       try {
@@ -1530,29 +1617,18 @@ async function runWeekSimulation() {
     playbook_entries: playbookEntries,
   };
 
-  // Log every filled contract as a playbook entry — this is the training data
-  try {
-    const ts = new Date().toISOString();
-    const rows: string[] = [];
-    for (const day of results) {
-      for (const c of day.contracts) {
-        if (c.matches && c.matches.length > 0) {
-          const workerNames = c.matches.slice(0, 3).map((m: any) => m.name || m.doc_id).join(", ");
-          const op = `fill: ${c.role} x${c.headcount} in ${c.city}, ${c.state}`;
-          const approach = `${c.situation} (${c.priority}) → hybrid search`;
-          const result = `${c.filled}/${c.headcount} filled → ${workerNames}`;
-          const context = `client=${c.client} start=${c.start} scenario=${c.situation}`;
-          rows.push(`"${ts}","${op.replace(/"/g,'""')}","${approach}","${result.replace(/"/g,'""')}","${context.replace(/"/g,'""')}"`);
-        }
-      }
-    }
-    if (rows.length) {
-      const csv = `timestamp,operation,approach,result,context\n${rows.join("\n")}`;
-      const form = new FormData();
-      form.append("file", new Blob([csv], { type: "text/csv" }), "playbook.csv");
-      await fetch(`${BASE}/ingest/file?name=successful_playbooks`, { method: "POST", body: form });
-    }
-  } catch {}
+  // BUG FIX 2026-04-20: previously this POSTed a multi-row CSV to
+  // /ingest/file?name=successful_playbooks at end of every simulation.
+  // That endpoint REPLACES the dataset's object list — so each
+  // /simulation/run wiped the prior simulation's rows. The SQL
+  // successful_playbooks table was never accumulating; it always reflected
+  // only the most-recent simulation batch.
+  //
+  // Per-day per-contract seeding via /vectors/playbook_memory/seed
+  // (added Pass 1, runs inside the day loop above) is the path that
+  // actually accumulates feedback. The SQL successful_playbooks table is
+  // intentionally not written by /simulation/run anymore until a proper
+  // append surface exists.
 
   return { days: results, summary };
 }
diff --git a/mcp-server/search.html b/mcp-server/search.html
index d36c8e7..c502423 100644
--- a/mcp-server/search.html
+++ b/mcp-server/search.html
@@ -384,11 +384,13 @@ function addContractInsight(parent,c,isUrgent){
       if(isUrgent&&i===0)label='FIRST CHOICE — highest match score, call first';
       else if(isUrgent&&i>0&&i<c.headcount)label='';
       else if(isUrgent&&i>=c.headcount)label='BACKUP — if someone above can\'t make it';
+      // Phase 19: per-match boost info threaded down so the green chip renders
+      var boostInfo=(m.playbook_boost>0)?{boost:m.playbook_boost,citations:m.playbook_citations||[]}:null;
       addWorkerInsight(cd,w.nm,
         [w.role,w.loc].filter(Boolean).join(' · '),
         label||buildWhyText(w,c),i,
         isUrgent&&i===0?'#f85149':isUrgent&&i>=c.headcount?'#484f58':null,
-        w);
+        w,boostInfo);
     });
     var remaining=c.matches.length-showCount;
     if(remaining>0){
@@ -570,12 +572,23 @@ function addWorkerInsight(parent,name,detail,why,idx,highlight){
   if(highlight)w.style.borderLeft='3px solid '+highlight;
   w.style.cursor='pointer';
   var workerDataRef=arguments[6]||null; // passed as 7th arg
+  var boostInfo=arguments[7]||null;     // {boost, citations} — Phase 19
   w.onclick=function(){if(workerDataRef)showProfile(workerDataRef)};
   var av=document.createElement('div');av.className='av';av.style.background=AC[(idx||0)%AC.length];
   av.textContent=(name||'?').split(' ').map(function(n){return(n[0]||'').toUpperCase()}).join('').substring(0,2);
   w.appendChild(av);
   var info=document.createElement('div');info.className='info';
   var nm=document.createElement('div');nm.className='nm';nm.textContent=name;
+  // Phase 19: when a past playbook endorsed this worker, show a green chip
+  // next to the name. Hover reveals the citation IDs.
+  if(boostInfo && boostInfo.boost > 0){
+    var chip=document.createElement('span');
+    chip.style.cssText='display:inline-block;margin-left:8px;padding:2px 7px;border-radius:9px;font-size:10px;font-weight:600;background:#0d2818;border:1px solid #2ea043;color:#3fb950;vertical-align:middle';
+    var n=(boostInfo.citations && boostInfo.citations.length) || 0;
+    chip.textContent='Endorsed · '+n+' playbook'+(n!==1?'s':'');
+    chip.title='Boosted by past playbooks: '+(boostInfo.citations||[]).join(', ');
+    nm.appendChild(chip);
+  }
   var dt=document.createElement('div');dt.className='detail';dt.textContent=detail;
   info.appendChild(nm);info.appendChild(dt);
   if(why){var wh=document.createElement('div');wh.className='why';wh.textContent=why;info.appendChild(wh)}
diff --git a/tests/multi-agent/agent.ts b/tests/multi-agent/agent.ts
new file mode 100644
index 0000000..167e9c2
--- /dev/null
+++ b/tests/multi-agent/agent.ts
@@ -0,0 +1,351 @@
+// Shared runtime for one agent. An agent is a role (executor or reviewer),
+// a model name, and a conversation the orchestrator hands it. The agent
+// produces ONE structured Action per turn; the orchestrator applies tool
+// calls and feeds results back.
+//
+// Fail-fast: every HTTP error, parse error, and Ollama error throws. The
+// orchestrator catches at the top and exits non-zero with the full log.
+
+export const GATEWAY = "http://localhost:3100";
+export const SIDECAR = "http://localhost:3200";
+
+// --- Shared types ---
+
+export type Role = "executor" | "reviewer";
+
+export interface TaskSpec {
+  id: string;
+  operation: string;           // "fill: Welder x2 in Columbus, OH"
+  target_role: string;         // "Welder"
+  target_count: number;        // 2
+  target_city: string;         // "Columbus"
+  target_state: string;        // "OH"
+  approach_hint?: string;      // e.g. "hybrid search"; agent is free to ignore
+}
+
+export interface LogEntry {
+  turn: number;
+  role: Role;
+  model: string;
+  at: string;
+  kind:
+    | "plan"
+    | "tool_call"
+    | "tool_result"
+    | "critique"
+    | "propose_done"
+    | "consensus_done"
+    | "error";
+  content: any;
+}
+
+// Action = what an agent returns on one turn. Strict shape so we can
+// enforce it at parse time rather than prompt-engineer around malformed
+// JSON.
+export type Action =
+  | { kind: "tool_call"; tool: string; args: Record<string, any>; rationale: string }
+  | { kind: "propose_done"; fills: Fill[]; rationale: string }
+  | { kind: "critique"; verdict: "continue" | "drift" | "approve_done"; notes: string }
+  | { kind: "plan"; steps: string[] };
+
+export interface Fill {
+  candidate_id: string;
+  name: string;
+  reason: string;
+}
+
+// --- HTTP helpers (fail-fast) ---
+
+async function http<T>(method: string, url: string, body?: any): Promise<T> {
+  const res = await fetch(url, {
+    method,
+    headers: { "Content-Type": "application/json" },
+    body: body ? JSON.stringify(body) : undefined,
+  });
+  if (!res.ok) {
+    const text = await res.text();
+    throw new Error(`${method} ${url} → ${res.status}: ${text}`);
+  }
+  return (await res.json()) as T;
+}
+
+// Tool calls land in the Phase 12 audit log keyed by this agent name.
+// Distinguishable from human-driven calls (agent=="operator" or similar)
+// so post-hoc queries can separate multi-agent runs.
+export const TOOL_AGENT_ID = "multi-agent-test";
+
+export async function callTool(tool: string, args: Record<string, any>): Promise<any> {
+  return http("POST", `${GATEWAY}/tools/${tool}/call`, {
+    params: args,
+    agent: TOOL_AGENT_ID,
+  });
+}
+
+export async function hybridSearch(sql_filter: string, question: string, k = 10): Promise<any> {
+  return http("POST", `${GATEWAY}/vectors/hybrid`, { sql_filter, question, k });
+}
+
+export async function sqlQuery(sql: string): Promise<any> {
+  return http("POST", `${GATEWAY}/query/sql`, { sql, format: "json" });
+}
+
+// Sidecar generate. Ollama's default keep_alive (5 min) keeps the model
+// warm between turns on its own, so we don't need to pass it through.
+export async function generate(model: string, prompt: string, opts: {
+  max_tokens?: number;
+  temperature?: number;
+  system?: string;
+} = {}): Promise<string> {
+  const body: Record<string, any> = {
+    model,
+    prompt,
+    temperature: opts.temperature ?? 0.3,
+    max_tokens: opts.max_tokens ?? 800,
+  };
+  if (opts.system) body.system = opts.system;
+  const r = await http<any>("POST", `${SIDECAR}/generate`, body);
+  const text = r.text ?? "";
+  if (!text || typeof text !== "string") {
+    throw new Error(`generate returned empty text from ${model}: ${JSON.stringify(r).slice(0, 200)}`);
+  }
+  return text;
+}
+
+// --- Prompt construction ---
+
+const TOOL_CATALOG = `
+Available tools (each takes a JSON "args" object):
+
+- hybrid_search(sql_filter: string, question: string, index_name: string, k?: number)
+  → Narrow workers via SQL WHERE clause, then rank by semantic match.
+  → Canonical production tool for fill tasks. Always use this FIRST.
+  → Example args:
+    {"index_name":"workers_500k_v1",
+     "sql_filter":"LOWER(role) LIKE '%weld%' AND city = 'Toledo' AND state = 'OH' AND availability > 0.5",
+     "question":"reliable welder with OSHA certs",
+     "k":10}
+
+- sql(query: string)
+  → Raw read-only SELECT. Use for verification (confirm a worker exists,
+    check city/role/availability) after hybrid_search surfaces candidates.
+  → Schema of workers_500k: worker_id, name, role, email, phone, city,
+    state, zip, skills, certifications, archetype, reliability,
+    responsiveness, engagement, communications, compliance, availability,
+    resume_text.
+  → Example args:
+    {"query":"SELECT worker_id, name, role, city, state, availability FROM workers_500k WHERE worker_id = 'W123456'"}
+
+Rules:
+- hybrid_search returns sources[] each with {doc_id, chunk_text, score, sql_verified}.
+- **ID mapping:** vector doc_ids look like "W500K-7995" (prefix + number).
+  The SQL worker_id is an INTEGER. To go from doc_id to SQL, strip the
+  "W500K-" prefix and cast:
+    SELECT ... FROM workers_500k WHERE worker_id = CAST(SUBSTR('W500K-7995', 7) AS BIGINT)
+  or more simply: WHERE worker_id = 7995.
+- Names are NOT unique. Always identify by worker_id, never by name alone.
+- Return EXACTLY ONE JSON object per turn. No prose outside the JSON.
+`;
+
+// Smart per-kind summary so agents see the substance of each prior turn
+// without a raw-JSON wall of text. hybrid_search results especially need
+// this — raw JSON buries sources[] past any reasonable 400-char truncation.
+function summarizeEntry(e: LogEntry): string {
+  const c = e.content ?? {};
+  switch (e.kind) {
+    case "plan":
+      return `PLAN: ${(c.steps ?? []).map((s: string, i: number) => `${i + 1}.${s}`).join(" ")}`;
+    case "tool_call":
+      return `TOOL_CALL ${c.tool}(${JSON.stringify(c.args ?? {}).slice(0, 250)})${c.rationale ? ` — ${c.rationale}` : ""}`;
+    case "tool_result": {
+      if (c.error) return `TOOL_RESULT error: ${c.error}`;
+      // hybrid_search response
+      if (Array.isArray(c.sources)) {
+        const head = c.sources.slice(0, 5).map((s: any) =>
+          `${s.doc_id}${s.sql_verified ? "✓" : ""} score=${(s.score ?? 0).toFixed(2)}: ${String(s.chunk_text ?? "").slice(0, 80)}`
+        ).join(" | ");
+        return `TOOL_RESULT hybrid: sql_matches=${c.sql_matches} vector_reranked=${c.vector_reranked} sources=[${head}${c.sources.length > 5 ? ` +${c.sources.length - 5} more` : ""}]`;
+      }
+      // sql response
+      if (Array.isArray(c.rows)) {
+        const head = c.rows.slice(0, 5).map((r: any) => JSON.stringify(r)).join(" | ");
+        return `TOOL_RESULT sql: ${c.rows.length} rows${c.rows.length > 0 ? ` — ${head}${c.rows.length > 5 ? ` +${c.rows.length - 5} more` : ""}` : ""}`;
+      }
+      // fallback
+      return `TOOL_RESULT ${JSON.stringify(c).slice(0, 250)}`;
+    }
+    case "critique":
+      return `CRITIQUE verdict=${c.verdict} notes: ${String(c.notes ?? "").slice(0, 200)}`;
+    case "propose_done":
+      return `PROPOSE_DONE fills=[${(c.fills ?? []).map((f: Fill) => `${f.candidate_id}:${f.name}`).join(", ")}] rationale: ${String(c.rationale ?? "").slice(0, 120)}`;
+    case "consensus_done":
+      return `CONSENSUS ✓`;
+    case "error":
+      return `ERROR ${c.message ?? JSON.stringify(c)}`;
+  }
+  return JSON.stringify(c).slice(0, 200);
+}
+
+function renderLogForPrompt(log: LogEntry[]): string {
+  if (log.length === 0) return "(no turns yet)";
+  return log.slice(-12).map(e =>
+    `[t${e.turn} ${e.role}] ${summarizeEntry(e)}`
+  ).join("\n");
+}
+
+// Crawl the log for every hybrid_search tool_result and collect the
+// worker names + ids seen so far. LLMs routinely "forget" earlier turns
+// once the conversation grows, so we surface a running ledger in the
+// prompt as orchestrator-maintained state. The executor doesn't have to
+// track this itself — it just reads it.
+function candidatesSeen(log: LogEntry[]): Array<{ doc_id: string; name: string; city: string; state: string }> {
+  const seen = new Map<string, { doc_id: string; name: string; city: string; state: string }>();
+  for (const e of log) {
+    if (e.kind !== "tool_result") continue;
+    const sources = (e.content as any)?.sources;
+    if (!Array.isArray(sources)) continue;
+    for (const s of sources) {
+      // chunk_text shape "Name — Role in City, ST. …"
+      const t = String(s.chunk_text ?? "");
+      const [namePart, rest] = t.split("—", 2);
+      if (!namePart || !rest) continue;
+      const loc = rest.split(" in ")[1] ?? "";
+      const [city, stateRaw] = loc.split(",", 2);
+      const state = (stateRaw ?? "").trim().replace(/[^A-Za-z].*/, "");
+      if (!s.doc_id || !namePart.trim() || !city?.trim() || !state) continue;
+      if (!seen.has(s.doc_id)) {
+        seen.set(s.doc_id, {
+          doc_id: s.doc_id,
+          name: namePart.trim(),
+          city: city.trim(),
+          state,
+        });
+      }
+    }
+  }
+  return Array.from(seen.values());
+}
+
+export function executorPrompt(task: TaskSpec, log: LogEntry[]): string {
+  const logStr = renderLogForPrompt(log);
+  const seen = candidatesSeen(log);
+  const seenBlock = seen.length === 0
+    ? "(no candidates surfaced yet — start with hybrid_search)"
+    : seen.map(s => `  - ${s.doc_id} ${s.name} (${s.city}, ${s.state})`).join("\n");
+
+  return `You are the EXECUTOR agent. Your job is to complete this task:
+
+OPERATION: ${task.operation}
+TARGET: ${task.target_count} × ${task.target_role} in ${task.target_city}, ${task.target_state}
+${task.approach_hint ? `HINT: ${task.approach_hint}` : ""}
+
+The REVIEWER agent is watching every turn. They will flag drift. Stay on target.
+
+${TOOL_CATALOG}
+
+CANDIDATES SURFACED SO FAR (orchestrator-tracked, do not forget these):
+${seenBlock}
+
+SHARED LOG (recent turns):
+${logStr}
+
+Your next action MUST be a JSON object matching one of these shapes:
+{"kind":"plan","steps":["short step 1","short step 2",...]}
+  — use on turn 1 to outline your approach. Steps must be concrete.
+{"kind":"tool_call","tool":"...","args":{...},"rationale":"why"}
+  — call a tool and see its result next turn.
+{"kind":"propose_done","fills":[{"candidate_id":"...","name":"First Last","reason":"why them"}],"rationale":"..."}
+  — propose you've met the target. fills MUST have EXACTLY ${task.target_count} entries — count twice before emitting.
+
+Strategy tip: once "CANDIDATES SURFACED SO FAR" has ≥ ${task.target_count} entries in ${task.target_city}, ${task.target_state} matching ${task.target_role}, verify ONE via the sql tool (to satisfy the reviewer's SQL-verification criterion) and then propose_done with the top ${task.target_count}. Don't keep re-searching.
+
+Respond with ONLY the JSON object. No markdown fences, no prose.`;
+}
+
+export function reviewerPrompt(task: TaskSpec, log: LogEntry[]): string {
+  const logStr = renderLogForPrompt(log);
+
+  // If the most recent executor action was propose_done, the reviewer
+  // must commit to an up-or-down vote this turn — "continue" would stall
+  // the orchestrator forever. The wider prompt still describes all three
+  // verdicts, but we add a hard rule at the end that the model must obey.
+  const lastExec = [...log].reverse().find(e => e.role === "executor");
+  const awaitingApproval = lastExec?.kind === "propose_done";
+
+  return `You are the REVIEWER agent. The EXECUTOR is trying to complete this task:
+
+OPERATION: ${task.operation}
+TARGET: ${task.target_count} × ${task.target_role} in ${task.target_city}, ${task.target_state}
+
+Your job: catch drift. Agents often wander from the actual objective. Specifically watch for:
+- Proposing candidates who aren't in ${task.target_city}, ${task.target_state}.
+- Proposing candidates who don't have ${task.target_role} skill.
+- Proposing fewer or more than ${task.target_count} fills.
+- Irrelevant tool calls (e.g. revenue_by_client when the task is a fill).
+
+Available tools (for reference, but YOU don't call them):
+- hybrid_search(sql_filter, question, index_name, k) — production fill path
+- sql(query) — read-only SELECT for verification
+
+SHARED LOG (recent turns):
+${logStr}
+
+Your next action MUST be a JSON object:
+{"kind":"critique","verdict":"continue" | "drift" | "approve_done","notes":"..."}
+
+- "continue" → executor is on a reasonable path, let them keep going.
+- "drift" → executor is off-track; notes MUST tell them how to redirect.
+- "approve_done" → executor's propose_done meets the criteria. Seal it.
+
+APPROVAL CRITERIA (use these only for propose_done):
+1. Exactly ${task.target_count} fills.
+2. Each fill's name appears in a prior tool_result from ${task.target_city}, ${task.target_state} matching role "${task.target_role}".
+3. Executor has SQL-verified at least one of the fills (any prior sql tool_result with that worker).
+If 1–3 all hold, return approve_done. Do not demand further verification.
+${awaitingApproval ? `
+
+HARD RULE: The executor's most recent action was propose_done. On this turn you CANNOT return "continue" — it would stall the task. Choose approve_done (proposal is valid by the 3 criteria above) or drift (it fails one; state which in notes).` : ""}
+
+Respond with ONLY the JSON object.`;
+}
+
+// Parse an agent's response into an Action, or throw.
+export function parseAction(raw: string, role: Role): Action {
+  // Models sometimes wrap JSON in ```json fences; strip them.
+  let s = raw.trim();
+  if (s.startsWith("```")) {
+    s = s.replace(/^```(?:json)?\n?/, "").replace(/```$/, "").trim();
+  }
+  // Find the first {...} block.
+  const start = s.indexOf("{");
+  const end = s.lastIndexOf("}");
+  if (start < 0 || end <= start) {
+    throw new Error(`no JSON object in ${role} response: ${raw.slice(0, 300)}`);
+  }
+  const json = s.slice(start, end + 1);
+  let obj: any;
+  try {
+    obj = JSON.parse(json);
+  } catch (e) {
+    throw new Error(`invalid JSON from ${role}: ${(e as Error).message} | raw: ${json.slice(0, 300)}`);
+  }
+
+  if (role === "executor") {
+    if (obj.kind === "plan" && Array.isArray(obj.steps)) return obj as Action;
+    if (obj.kind === "tool_call" && typeof obj.tool === "string" && typeof obj.args === "object") return obj as Action;
+    if (obj.kind === "propose_done" && Array.isArray(obj.fills)) return obj as Action;
+    throw new Error(`executor returned unexpected shape: ${JSON.stringify(obj).slice(0, 200)}`);
+  } else {
+    // Normalize: some models (qwen2.5, mistral) emit the verdict AS the
+    // `kind` field directly instead of nesting it under a "critique"
+    // wrapper. Accept both shapes rather than hard-failing — the
+    // semantic content is identical, and rejecting would stall the
+    // orchestrator on a cosmetic schema miss.
+    if (obj.kind === "critique" && ["continue", "drift", "approve_done"].includes(obj.verdict)) {
+      return obj as Action;
+    }
+    if (["continue", "drift", "approve_done"].includes(obj.kind)) {
+      return { kind: "critique", verdict: obj.kind, notes: obj.notes ?? "" } as Action;
+    }
+    throw new Error(`reviewer returned unexpected shape: ${JSON.stringify(obj).slice(0, 200)}`);
+  }
+}
diff --git a/tests/multi-agent/chain_of_custody.ts b/tests/multi-agent/chain_of_custody.ts
new file mode 100644
index 0000000..ddd1685
--- /dev/null
+++ b/tests/multi-agent/chain_of_custody.ts
@@ -0,0 +1,335 @@
+// Chain-of-custody trace test.
+//
+// J's framing: "we have enough synthetic data, we've run enough AI responses
+// saved to the database. Test true quality. Don't ignore chain of custody.
+// Use real applications. Understand each aspect of the flow — not just
+// 'write a file or directory and open it'."
+//
+// One real recruiter operation, traced end-to-end through EVERY layer of the
+// live substrate. Every layer must record the operation correctly. Any layer
+// that drops it = chain-of-custody break = surfaced as a real bug.
+//
+// Layers verified:
+//   L0  Bun /search           — recruiter app surface (NOT bare /vectors/hybrid)
+//   L1  /vectors/hybrid       — direct gateway (parity check vs L0)
+//   L2  /vectors/playbook_memory/stats  — feedback loop count
+//   L3  Bun /log              — recruiter records the pick
+//   L4  successful_playbooks  — SQL-queryable table of past fills
+//   L5  /vectors/playbook_memory/stats  — count grew
+//   L6  tools/audit           — Phase 12 governance trail
+//   L7  /access/audit         — Phase 13 access trail
+//   L8  /journal/recent       — Phase 9 mutation events
+//   L9  /storage/errors       — Federation error journal (no new errors)
+//   L10 /vectors/profile/{id}/activate — Phase 17 hot-swap
+//   L11 Bun /search again     — boost lifts the just-logged worker
+//   L12 verifier qwen2.5      — reads cross-layer state, judges integrity
+//
+// Run:  bun run tests/multi-agent/chain_of_custody.ts
+//
+// Prints per-layer BEFORE/AFTER/DELTA. Exit non-zero on any chain break.
+
+import { generate, GATEWAY } from "./agent.ts";
+
+const BUN = "http://localhost:3700";
+const PROFILE_ID = "staffing-recruiter";
+
+// The trace operation — small, deterministic, real city/role with supply.
+// Helen Sanchez (worker_id 4661) is a known Toledo Welder; we record her
+// as the manual pick the recruiter would make from the /search results.
+const OPERATION = "fill: Welder x1 in Toledo, OH";
+const OP_ROLE = "Welder";
+const OP_CITY = "Toledo";
+const OP_STATE = "OH";
+const PICKED_WORKER = "Helen Sanchez";  // verified earlier to be a Toledo OH Welder
+
+// ─────────────────────── helpers ───────────────────────
+
+async function getJSON<T = any>(url: string): Promise<T | null> {
+  try {
+    const r = await fetch(url);
+    if (!r.ok) return null;
+    return r.json() as Promise<T>;
+  } catch { return null; }
+}
+
+async function postJSON<T = any>(url: string, body: any): Promise<T | null> {
+  try {
+    const r = await fetch(url, { method: "POST", headers: { "Content-Type": "application/json" }, body: JSON.stringify(body) });
+    if (!r.ok) return { _error: `${r.status}: ${await r.text()}` } as any;
+    return r.json() as Promise<T>;
+  } catch (e) { return { _error: (e as Error).message } as any; }
+}
+
+async function sql(query: string): Promise<{ rows?: any[]; error?: string } | null> {
+  return postJSON(`${GATEWAY}/query/sql`, { sql: query });
+}
+
+interface Snapshot {
+  pm_entries: number;
+  pm_names: number;
+  sp_rows: number;                     // successful_playbooks SQL row count
+  audit_count: number;                 // tools/audit count
+  access_count: number;                // access/audit count
+  journal_count: number;               // journal/stats events
+  storage_errors: number;              // bucket error journal
+}
+
+async function snapshot(): Promise<Snapshot> {
+  const pm = await getJSON<any>(`${GATEWAY}/vectors/playbook_memory/stats`);
+  // successful_playbooks_live is the live SQL surface populated by /log
+  // via /vectors/playbook_memory/persist_sql. The original
+  // successful_playbooks table is now legacy/historical (no writes).
+  const sp = await sql(`SELECT COUNT(*) AS c FROM successful_playbooks_live`);
+  const audit = await getJSON<any[]>(`${GATEWAY}/tools/audit`);
+  const access = await getJSON<any>(`${GATEWAY}/access/audit`);
+  const journalStats = await getJSON<any>(`${GATEWAY}/journal/stats`);
+  const storageErrors = await getJSON<any[]>(`${GATEWAY}/storage/errors`);
+
+  return {
+    pm_entries: pm?.entries ?? -1,
+    pm_names: pm?.total_names_endorsed ?? -1,
+    sp_rows: Number(sp?.rows?.[0]?.c ?? -1),
+    audit_count: Array.isArray(audit) ? audit.length : (audit as any)?.events?.length ?? -1,
+    access_count: Array.isArray(access) ? access.length : (access as any)?.events?.length ?? (access as any)?.audit?.length ?? -1,
+    journal_count: journalStats?.event_count ?? journalStats?.total_events ?? journalStats?.events ?? -1,
+    storage_errors: Array.isArray(storageErrors) ? storageErrors.length : (storageErrors as any)?.events?.length ?? 0,
+  };
+}
+
+function delta(b: Snapshot, a: Snapshot): Record<string, number> {
+  return {
+    pm_entries: a.pm_entries - b.pm_entries,
+    pm_names: a.pm_names - b.pm_names,
+    sp_rows: a.sp_rows - b.sp_rows,
+    audit_count: a.audit_count - b.audit_count,
+    access_count: a.access_count - b.access_count,
+    journal_count: a.journal_count - b.journal_count,
+    storage_errors: a.storage_errors - b.storage_errors,
+  };
+}
+
+function fmtRow(label: string, b: number, a: number): string {
+  const d = a - b;
+  const dStr = d === 0 ? "  ·  " : d > 0 ? `  +${d}` : `  ${d}`;
+  return `  ${label.padEnd(28)}  ${String(b).padStart(6)}   →   ${String(a).padStart(6)}  ${dStr}`;
+}
+
+// ─────────────────────── trace ───────────────────────
+
+interface TraceResult {
+  layer: string;
+  ok: boolean;
+  detail: string;
+}
+
+async function runTrace(): Promise<TraceResult[]> {
+  const out: TraceResult[] = [];
+  const note = (layer: string, ok: boolean, detail: string) => {
+    out.push({ layer, ok, detail });
+    console.log(`  ${ok ? "✓" : "✗"} ${layer.padEnd(32)} ${detail}`);
+  };
+
+  console.log(`\n▶ Trace operation: ${OPERATION} → pick=${PICKED_WORKER}\n`);
+
+  // ── BEFORE snapshot ──
+  console.log(`▶ Before-snapshot:`);
+  const before = await snapshot();
+  console.log(`  pm_entries=${before.pm_entries} pm_names=${before.pm_names} sp_rows=${before.sp_rows} `
+    + `audit=${before.audit_count} access=${before.access_count} journal=${before.journal_count} `
+    + `storage_errors=${before.storage_errors}\n`);
+
+  // ── L0: Bun /search ──
+  console.log(`▶ L0 — Bun /search (recruiter app surface)`);
+  const sql_filter = `role = '${OP_ROLE}' AND state = '${OP_STATE}' AND city = '${OP_CITY}'`;
+  const bunSearch = await postJSON<any>(`${BUN}/search`, {
+    question: `Welder in ${OP_CITY}, ${OP_STATE}`,
+    sql_filter, top_k: 5, generate: false,
+    id_column: "worker_id", dataset: "workers_500k", use_playbook_memory: true,
+  });
+  if (bunSearch?._error) {
+    note("L0 Bun /search", false, `error: ${bunSearch._error}`);
+  } else {
+    const sources = bunSearch?.sources ?? [];
+    const boostedHits = sources.filter((s: any) => (s.playbook_boost ?? 0) > 0).length;
+    note("L0 Bun /search", true, `sources=${sources.length} boosted=${boostedHits} sql_matches=${bunSearch?.sql_matches}`);
+  }
+
+  // ── L1: direct /vectors/hybrid (parity check) ──
+  console.log(`\n▶ L1 — Direct /vectors/hybrid (parity check vs Bun)`);
+  const directSearch = await postJSON<any>(`${GATEWAY}/vectors/hybrid`, {
+    index_name: "workers_500k_v1", filter_dataset: "workers_500k", id_column: "worker_id",
+    sql_filter, question: `Welder in ${OP_CITY}, ${OP_STATE}`,
+    top_k: 5, generate: false, use_playbook_memory: true, playbook_memory_k: 15,
+  });
+  const directBoosted = (directSearch?.sources ?? []).filter((s: any) => (s.playbook_boost ?? 0) > 0).length;
+  note("L1 Direct /vectors/hybrid", true, `boosted=${directBoosted} sql=${directSearch?.sql_matches}`);
+
+  const bunBoosted = (bunSearch?.sources ?? []).filter((s: any) => (s.playbook_boost ?? 0) > 0).length;
+  if (bunBoosted < directBoosted) {
+    note("CHAIN BREAK: Bun↔Direct parity", false,
+      `Bun=${bunBoosted} boosted vs Direct=${directBoosted}. Bun /search likely missing playbook_memory_k forward.`);
+  }
+
+  // ── L3: Bun /log (recruiter records the pick) ──
+  console.log(`\n▶ L3 — Bun /log (recruiter records the pick)`);
+  const logged = await postJSON<any>(`${BUN}/log`, {
+    operation: OPERATION,
+    approach: "chain-of-custody trace",
+    result: `1/1 filled → ${PICKED_WORKER}`,
+    context: `client=COC-${Date.now()} start=08:00 scenario=trace`,
+  });
+  if (logged?._error) note("L3 Bun /log", false, `error: ${logged._error}`);
+  else note("L3 Bun /log", true, `logged=${logged?.logged} seeded=${logged?.seeded}`);
+
+  // The /log response carries the result of the underlying /ingest/file too.
+  // If "response" mentions "different schema" or "error", the SQL-queryable
+  // path is broken even though seed succeeded. That's a chain break.
+  const logResp = String((logged as any)?.response ?? "");
+  if (logResp.includes("error") || logResp.includes("different schema") || logResp.includes("Error")) {
+    note("CHAIN BREAK: Bun /log → SQL ingest", false,
+      `successful_playbooks ingest failed. Bun returned logged=true but /log's underlying ingest reported: ${logResp.slice(0, 150)}`);
+  } else {
+    note("L3a /log → /ingest/file", true, "ingest accepted");
+  }
+
+  // Give the system a beat for any async fan-out (audit/journal/etc).
+  await new Promise(r => setTimeout(r, 500));
+
+  // ── AFTER snapshot ──
+  console.log(`\n▶ After-snapshot:`);
+  const after = await snapshot();
+  const d = delta(before, after);
+  console.log(fmtRow("playbook_memory.entries",    before.pm_entries,    after.pm_entries));
+  console.log(fmtRow("playbook_memory.names",      before.pm_names,      after.pm_names));
+  console.log(fmtRow("successful_playbooks.rows",  before.sp_rows,       after.sp_rows));
+  console.log(fmtRow("tools/audit.count",          before.audit_count,   after.audit_count));
+  console.log(fmtRow("access/audit.count",         before.access_count,  after.access_count));
+  console.log(fmtRow("journal.events",             before.journal_count, after.journal_count));
+  console.log(fmtRow("storage/errors.count",       before.storage_errors,after.storage_errors));
+
+  // ── L5: playbook_memory grew? ──
+  if (d.pm_entries === 1) note("L5 playbook_memory growth", true, "+1 entry as expected");
+  else note("L5 playbook_memory growth", d.pm_entries > 0,
+    `delta=${d.pm_entries} (expected 1 — seed-after-log path)`);
+
+  // ── L4: successful_playbooks SQL row appeared? ──
+  if (d.sp_rows >= 1) note("L4 successful_playbooks SQL", true, `+${d.sp_rows} row(s)`);
+  else note("L4 successful_playbooks SQL", false,
+    `delta=${d.sp_rows} — Bun /log claims success but SQL table didn't grow. Recruiter querying via SQL would miss this fill.`);
+
+  // ── L9: storage errors stayed quiet ──
+  if (d.storage_errors === 0) note("L9 storage error journal", true, "no new bucket op errors");
+  else note("L9 storage error journal", false, `+${d.storage_errors} new errors`);
+
+  // ── L10: Phase 17 profile activation ──
+  console.log(`\n▶ L10 — Activate profile ${PROFILE_ID}`);
+  const act = await postJSON<any>(`${GATEWAY}/vectors/profile/${PROFILE_ID}/activate`, {});
+  if (act?._error) note("L10 profile activation", false, `error: ${act._error}`);
+  else note("L10 profile activation", true,
+    `warmed=${(act?.warmed_indexes ?? []).length} duration_ms=${act?.duration_ms ?? "?"}`);
+
+  // ── L11: Bun /search again — boost should now lift PICKED_WORKER ──
+  console.log(`\n▶ L11 — Bun /search second time (boost lift verification)`);
+  const search2 = await postJSON<any>(`${BUN}/search`, {
+    question: `Welder in ${OP_CITY}, ${OP_STATE}`,
+    sql_filter, top_k: 10, generate: false,
+    id_column: "worker_id", dataset: "workers_500k", use_playbook_memory: true,
+  });
+  const sources2 = search2?.sources ?? [];
+  const pickedHit = sources2.find((s: any) => String(s.chunk_text ?? "").includes(PICKED_WORKER));
+  if (!pickedHit) {
+    note("L11 boost lifts logged pick (Bun)", false,
+      `${PICKED_WORKER} not in top-10 via Bun /search. Could be Bun-not-forwarding-playbook_memory_k bug from L1.`);
+  } else if ((pickedHit.playbook_boost ?? 0) > 0) {
+    note("L11 boost lifts logged pick (Bun)", true,
+      `${PICKED_WORKER} boost=+${(pickedHit.playbook_boost as number).toFixed(3)} cites=${(pickedHit.playbook_citations ?? []).length}`);
+  } else {
+    note("L11 boost lifts logged pick (Bun)", false,
+      `${PICKED_WORKER} present but boost=0 — playbook_memory_k forward bug likely`);
+  }
+
+  // Same probe via direct gateway to isolate Bun vs gateway
+  const direct2 = await postJSON<any>(`${GATEWAY}/vectors/hybrid`, {
+    index_name: "workers_500k_v1", filter_dataset: "workers_500k", id_column: "worker_id",
+    sql_filter, question: `Welder in ${OP_CITY}, ${OP_STATE}`,
+    top_k: 10, generate: false, use_playbook_memory: true, playbook_memory_k: 15,
+  });
+  const sources2d = direct2?.sources ?? [];
+  const pickedHitD = sources2d.find((s: any) => String(s.chunk_text ?? "").includes(PICKED_WORKER));
+  if (pickedHitD && (pickedHitD.playbook_boost ?? 0) > 0) {
+    note("L11b boost via direct gateway", true,
+      `${PICKED_WORKER} boost=+${(pickedHitD.playbook_boost as number).toFixed(3)} cites=${(pickedHitD.playbook_citations ?? []).length}`);
+  } else {
+    note("L11b boost via direct gateway", false, `direct call also did not boost ${PICKED_WORKER}`);
+  }
+
+  return out;
+}
+
+// ─────────────────────── verifier (fresh agent) ───────────────────────
+
+async function verifierJudgment(trace: TraceResult[]): Promise<{ verdict: string; confidence: number }> {
+  const summary = trace.map(t => `  ${t.ok ? "ok" : "FAIL"}  ${t.layer}: ${t.detail}`).join("\n");
+  const prompt = `You are the CHAIN-OF-CUSTODY VERIFIER agent. A real recruiter operation was just
+traced through every layer of the staffing substrate. Read the per-layer results and judge
+whether the system kept chain of custody intact (every layer recorded the operation as
+expected) or where it broke.
+
+Per-layer trace:
+${summary}
+
+Reply with ONE JSON object only:
+{"verdict": "<one tight sentence — what's the integrity status>", "confidence": 0-100}
+
+Be specific about which layer broke if any. confidence is how sure you are about the verdict.`;
+
+  try {
+    const raw = await generate("qwen2.5:latest", prompt, { temperature: 0.1, max_tokens: 200 });
+    const start = raw.indexOf("{"), end = raw.lastIndexOf("}");
+    if (start < 0 || end <= start) return { verdict: "verifier could not produce JSON", confidence: 0 };
+    const j = JSON.parse(raw.slice(start, end + 1));
+    return { verdict: j.verdict ?? "no verdict", confidence: Number(j.confidence) || 0 };
+  } catch (e) {
+    return { verdict: `verifier error: ${(e as Error).message}`, confidence: 0 };
+  }
+}
+
+// ─────────────────────── main ───────────────────────
+
+async function main() {
+  console.log(`▶ Chain-of-custody trace — single real recruiter operation through every layer`);
+
+  const trace = await runTrace();
+
+  console.log(`\n▶ L12 — Verifier (fresh qwen2.5 agent reads the cross-layer trace)`);
+  const v = await verifierJudgment(trace);
+  console.log(`  verdict (${v.confidence}%): ${v.verdict}`);
+
+  // Hard gate: any explicit CHAIN BREAK note = fail
+  const breaks = trace.filter(t => !t.ok && t.layer.startsWith("CHAIN BREAK"));
+  const fails = trace.filter(t => !t.ok);
+
+  console.log(`\n▶ Summary:`);
+  console.log(`  passing layers: ${trace.filter(t => t.ok).length}/${trace.length}`);
+  console.log(`  chain breaks:   ${breaks.length}`);
+  console.log(`  total failures: ${fails.length}`);
+  console.log(`  verifier confidence: ${v.confidence}%`);
+
+  if (breaks.length > 0) {
+    console.log(`\n✗ Chain of custody BROKEN at ${breaks.length} layer(s):`);
+    for (const b of breaks) console.log(`    - ${b.layer}: ${b.detail}`);
+    process.exit(1);
+  }
+  if (fails.length > 0) {
+    console.log(`\n◑ Trace completed with ${fails.length} non-blocking failures (no formal chain break)`);
+    process.exit(0);
+  }
+  console.log(`\n✓ Chain of custody intact across all layers`);
+  process.exit(0);
+}
+
+main().catch(e => {
+  console.error(`\n✗ ${(e as Error).message}`);
+  if ((e as any).stack) console.error((e as any).stack);
+  process.exit(1);
+});
diff --git a/tests/multi-agent/network_proving.ts b/tests/multi-agent/network_proving.ts
new file mode 100644
index 0000000..c488488
--- /dev/null
+++ b/tests/multi-agent/network_proving.ts
@@ -0,0 +1,469 @@
+// Network proving: continuous build → verify → repeat with hot-swap profile.
+//
+// J's framing: "have them guide each other, when the test is complete we have
+// a successful playbook, then spin up another agent that tests the viability
+// of our network with the playbook and the hot-swap profile. Keep spinning up
+// agents and testing — pass theory, real-world execution, not isolated unit
+// tests."
+//
+// Each round = TWO phases:
+//
+//   1. BUILD phase. Two agents (mistral executor + qwen2.5 reviewer) work
+//      on a real staffing fill task. They guide each other via the critique
+//      loop. On consensus → seal a playbook with CANONICAL short seed text
+//      (the Pass 1 lesson — verbose seeds silently kill boost). Real Ollama,
+//      real workers_500k, real /vectors/hybrid path.
+//
+//   2. VERIFY phase. A FRESH qwen2.5 agent spins up, activates the
+//      staffing-recruiter profile (Phase 17 hot-swap), runs a probe query
+//      against the same network, and judges from the live response whether
+//      prior rounds' playbooks actually surface relevant workers higher.
+//      The verifier writes a verdict: did the network learn?
+//
+// Three rounds, progressively harder:
+//   R0: Welder x2 in Toledo, OH                — baseline
+//   R1: Welder x2 in Cleveland, OH             — same role, different city
+//                                                  → tests geo discrimination
+//                                                  (Toledo workers MUST NOT
+//                                                  bleed into Cleveland boost)
+//   R2: Welder x3 in Toledo, OH                — re-fill same city, bigger
+//                                                  count → tests compounding
+//                                                  (R0's endorsements should
+//                                                  still rank up here)
+//
+// Run: bun run tests/multi-agent/network_proving.ts
+//
+// Fail-fast: any HTTP error or model crash bubbles to top-level, exits 1.
+
+import {
+  type LogEntry,
+  type TaskSpec,
+  type Action,
+  type Fill,
+  GATEWAY,
+  generate,
+  parseAction,
+  executorPrompt,
+  reviewerPrompt,
+  sqlQuery,
+  callTool,
+} from "./agent.ts";
+
+const EXECUTOR_MODEL = "mistral:latest";
+const REVIEWER_MODEL = "qwen2.5:latest";
+const VERIFIER_MODEL = "qwen2.5:latest";
+const PROFILE_ID = "staffing-recruiter";
+const INDEX_NAME = "workers_500k_v1";
+const MAX_TURNS = 12;
+const MAX_TOOL_ERRORS = 3;
+const MAX_DRIFTS = 3;
+
+const TASK_DECK: TaskSpec[] = [
+  {
+    id: "R0", operation: "fill: Welder x2 in Toledo, OH",
+    target_role: "Welder", target_count: 2, target_city: "Toledo", target_state: "OH",
+    approach_hint: "hybrid_search workers_500k_v1 with sql_filter role+state+city, then sql verify",
+  },
+  {
+    id: "R1", operation: "fill: Welder x2 in Cleveland, OH",
+    target_role: "Welder", target_count: 2, target_city: "Cleveland", target_state: "OH",
+    approach_hint: "hybrid_search workers_500k_v1 with sql_filter role+state+city, then sql verify",
+  },
+  {
+    id: "R2", operation: "fill: Welder x3 in Toledo, OH",
+    target_role: "Welder", target_count: 3, target_city: "Toledo", target_state: "OH",
+    approach_hint: "hybrid_search workers_500k_v1 with sql_filter role+state+city, then sql verify",
+  },
+];
+
+interface BuildResult {
+  ok: boolean;
+  task: TaskSpec;
+  fills: Fill[];
+  turns: number;
+  duration_secs: number;
+  playbook_id?: string;
+  entries_after_seed?: number;
+  error?: string;
+}
+
+interface VerifyResult {
+  profile_activated: boolean;
+  warmed_indexes: number;
+  probe_boost_total: number;          // sum of playbook_boost across top-K
+  probe_boosted_hits: number;         // how many hits had boost > 0
+  probe_top_citations: string[];      // playbook_ids cited
+  geo_discrimination_ok: boolean;     // when prior playbook is in different city, boost should NOT bleed
+  verdict: string;                    // qwen2.5's natural-language judgment
+  confidence: number;                 // 0-100 self-rated
+  duration_secs: number;
+}
+
+interface RoundLedger {
+  round: number;
+  task: TaskSpec;
+  build: BuildResult;
+  verify: VerifyResult;
+  score: number;                      // /10 per round
+  notes: string[];
+}
+
+// ─────────────────────── BUILD phase (two-agent loop) ───────────────────────
+
+async function executeToolCall(name: string, args: Record<string, any>): Promise<any> {
+  if (name === "hybrid_search") {
+    const { sql_filter, question, index_name, k } = args;
+    if (!sql_filter || !question || !index_name) {
+      throw new Error(`hybrid_search needs sql_filter+question+index_name, got keys=${Object.keys(args).join(",")}`);
+    }
+    const r = await fetch(`${GATEWAY}/vectors/hybrid`, {
+      method: "POST", headers: { "Content-Type": "application/json" },
+      body: JSON.stringify({
+        sql_filter, question, index_name,
+        filter_dataset: "workers_500k", id_column: "worker_id",
+        top_k: k ?? 10, generate: false, use_playbook_memory: true,
+      }),
+    });
+    if (!r.ok) throw new Error(`hybrid → ${r.status}: ${await r.text()}`);
+    return r.json();
+  }
+  if (name === "sql") {
+    if (!args.query) throw new Error("sql needs query");
+    if (!/^\s*SELECT/i.test(args.query)) throw new Error("sql allows SELECT only");
+    return sqlQuery(args.query);
+  }
+  return callTool(name, args);
+}
+
+function trim(r: any) {
+  if (r && Array.isArray(r.rows)) return { ...r, rows: r.rows.slice(0, 20) };
+  if (r && Array.isArray(r.sources)) return { ...r, sources: r.sources.slice(0, 12) };
+  return r;
+}
+
+function fmtTurn(prefix: string, e: Omit<LogEntry, "at">): string {
+  const c: any = e.content ?? {};
+  const head = `[${prefix} t${e.turn.toString().padStart(2, "0")} ${e.role.padEnd(8)} ${e.kind.padEnd(14)}]`;
+  if (e.kind === "tool_call") return `${head} ${c.tool}(${JSON.stringify(c.args ?? {}).slice(0, 70)})`;
+  if (e.kind === "tool_result") {
+    if (c.error) return `${head} error: ${c.error}`;
+    if (Array.isArray(c.sources)) return `${head} hybrid sql=${c.sql_matches} reranked=${c.vector_reranked}`;
+    if (Array.isArray(c.rows)) return `${head} sql ${c.rows.length} rows`;
+    return `${head} ${JSON.stringify(c).slice(0, 70)}`;
+  }
+  if (e.kind === "critique") return `${head} verdict=${c.verdict} ${(c.notes ?? "").slice(0, 50)}`;
+  if (e.kind === "propose_done") return `${head} ${(c.fills ?? []).length} fills: ${(c.fills ?? []).map((f: Fill) => f.name).join(", ")}`;
+  if (e.kind === "consensus_done") return `${head} ✓`;
+  if (e.kind === "plan") return `${head} ${(c.steps ?? []).length} steps`;
+  return `${head} ${JSON.stringify(c).slice(0, 60)}`;
+}
+
+async function buildPhase(task: TaskSpec, prefix: string): Promise<BuildResult> {
+  const t0 = Date.now();
+  const log: LogEntry[] = [];
+  let turn = 0, sealed: { fills: Fill[]; approach: string } | null = null;
+  let toolErrors = 0, drifts = 0;
+
+  const append = (e: Omit<LogEntry, "at">): LogEntry => {
+    const full: LogEntry = { ...e, at: new Date().toISOString() };
+    log.push(full);
+    console.log(fmtTurn(prefix, e));
+    return full;
+  };
+
+  try {
+    while (turn < MAX_TURNS && !sealed) {
+      turn += 1;
+
+      const execRaw = await generate(EXECUTOR_MODEL, executorPrompt(task, log), { temperature: 0.2, max_tokens: 600 });
+      const execAction = parseAction(execRaw, "executor");
+      append({ turn, role: "executor", model: EXECUTOR_MODEL, kind: execAction.kind as any, content: execAction });
+
+      if (execAction.kind === "tool_call") {
+        try {
+          const r = await executeToolCall(execAction.tool, execAction.args);
+          append({ turn, role: "executor", model: EXECUTOR_MODEL, kind: "tool_result", content: trim(r) });
+          toolErrors = 0;
+        } catch (e) {
+          append({ turn, role: "executor", model: EXECUTOR_MODEL, kind: "tool_result",
+            content: { error: (e as Error).message, tool: execAction.tool, args: execAction.args } });
+          toolErrors += 1;
+          if (toolErrors >= MAX_TOOL_ERRORS) throw new Error(`${MAX_TOOL_ERRORS} consecutive tool errors`);
+        }
+      }
+
+      const revRaw = await generate(REVIEWER_MODEL, reviewerPrompt(task, log), { temperature: 0.1, max_tokens: 400 });
+      const revAction = parseAction(revRaw, "reviewer");
+      append({ turn, role: "reviewer", model: REVIEWER_MODEL, kind: "critique", content: revAction });
+
+      if (revAction.kind !== "critique") throw new Error(`reviewer non-critique`);
+      if (revAction.verdict === "drift") {
+        drifts += 1;
+        if (drifts >= MAX_DRIFTS) throw new Error(`${MAX_DRIFTS} consecutive drifts`);
+      } else drifts = 0;
+
+      if (execAction.kind === "propose_done" && revAction.verdict === "approve_done") {
+        if (execAction.fills.length !== task.target_count) {
+          throw new Error(`fills=${execAction.fills.length} target=${task.target_count}`);
+        }
+        append({ turn, role: "reviewer", model: REVIEWER_MODEL, kind: "consensus_done", content: { fills: execAction.fills } });
+        sealed = { fills: execAction.fills, approach: (execAction as any).rationale ?? "multi-agent" };
+      }
+    }
+
+    if (!sealed) throw new Error(`no consensus after ${MAX_TURNS} turns`);
+
+    // Phase 19 seed — CANONICAL short text (Pass 1 lesson). The verbose
+    // executor rationale stays out of the embedding; we keep a separate
+    // human-readable record in the playbook log.
+    const canonicalApproach = `${task.target_role.toLowerCase()} fill via hybrid search`;
+    const canonicalContext = `${task.target_role} fill in ${task.target_city}, ${task.target_state}`;
+    let playbook_id: string | undefined;
+    let entries_after_seed: number | undefined;
+    try {
+      const sr = await fetch(`${GATEWAY}/vectors/playbook_memory/seed`, {
+        method: "POST", headers: { "Content-Type": "application/json" },
+        body: JSON.stringify({
+          operation: task.operation,
+          approach: canonicalApproach,
+          context: canonicalContext,
+          endorsed_names: sealed.fills.map(f => f.name),
+          append: true,
+        }),
+      });
+      if (sr.ok) {
+        const j = await sr.json() as any;
+        playbook_id = j.playbook_id;
+        entries_after_seed = j.entries_after;
+        console.log(`[${prefix}] ↳ seeded id=${playbook_id} entries=${entries_after_seed}`);
+      } else {
+        console.warn(`[${prefix}] ↳ seed failed: ${sr.status} ${await sr.text()}`);
+      }
+    } catch (e) {
+      console.warn(`[${prefix}] ↳ seed errored: ${(e as Error).message}`);
+    }
+
+    return {
+      ok: true, task, fills: sealed.fills, turns: turn,
+      duration_secs: Math.round((Date.now() - t0) / 1000),
+      playbook_id, entries_after_seed,
+    };
+  } catch (e) {
+    return {
+      ok: false, task, fills: [], turns: turn,
+      duration_secs: Math.round((Date.now() - t0) / 1000),
+      error: (e as Error).message,
+    };
+  }
+}
+
+// ─────────────────────── VERIFY phase (fresh single agent) ───────────────────────
+
+async function activateProfile(): Promise<{ ok: boolean; warmed: number; ms: number }> {
+  const t0 = Date.now();
+  const r = await fetch(`${GATEWAY}/vectors/profile/${PROFILE_ID}/activate`, { method: "POST" });
+  const ms = Date.now() - t0;
+  if (!r.ok) {
+    console.warn(`profile activation failed: ${r.status} ${await r.text()}`);
+    return { ok: false, warmed: 0, ms };
+  }
+  const j = await r.json() as any;
+  return { ok: true, warmed: (j.warmed_indexes ?? []).length, ms };
+}
+
+async function probeWithBoost(task: TaskSpec) {
+  const sql_filter = `role = '${task.target_role.replace(/'/g, "''")}' `
+    + `AND state = '${task.target_state}' `
+    + `AND city = '${task.target_city.replace(/'/g, "''")}'`;
+  const r = await fetch(`${GATEWAY}/vectors/hybrid`, {
+    method: "POST", headers: { "Content-Type": "application/json" },
+    body: JSON.stringify({
+      index_name: INDEX_NAME, filter_dataset: "workers_500k", id_column: "worker_id",
+      sql_filter, question: `${task.target_role} in ${task.target_city}, ${task.target_state}`,
+      top_k: 10, generate: false, use_playbook_memory: true, playbook_memory_k: 15,
+    }),
+  });
+  if (!r.ok) throw new Error(`probe → ${r.status}: ${await r.text()}`);
+  const j = (await r.json()) as any;
+  const sources: any[] = j.sources ?? [];
+  const boostedHits = sources.filter(s => (s.playbook_boost ?? 0) > 0).length;
+  const totalBoost = sources.reduce((s, x) => s + (x.playbook_boost ?? 0), 0);
+  const cites = Array.from(new Set(sources.flatMap(s => s.playbook_citations ?? []))).slice(0, 5);
+  const topNames = sources.slice(0, 5).map(s => {
+    const t = String(s.chunk_text ?? "");
+    return t.split("—")[0]?.trim() ?? s.doc_id;
+  });
+  return { sources, boostedHits, totalBoost, cites, topNames };
+}
+
+// Verifier prompt — fresh agent, no shared log with the build pair. It
+// gets the round's task, the prior rounds' sealed playbooks, and the live
+// probe result, and renders a human-readable verdict with a confidence.
+function verifierPrompt(task: TaskSpec, priorPlaybooks: Array<{op: string; fills: string[]}>,
+  probe: { boostedHits: number; totalBoost: number; cites: string[]; topNames: string[] }
+): string {
+  const priorBlock = priorPlaybooks.length === 0
+    ? "(no prior playbooks — this is the first round)"
+    : priorPlaybooks.map((p, i) => `  ${i+1}. ${p.op} → endorsed [${p.fills.join(", ")}]`).join("\n");
+
+  return `You are the VERIFIER agent. A fresh round just sealed a playbook on a real staffing
+substrate. Your job: judge whether the system learned from prior rounds.
+
+CURRENT ROUND:
+  task:    ${task.operation}
+  in city: ${task.target_city}, ${task.target_state}
+
+PRIOR PLAYBOOKS (in playbook_memory):
+${priorBlock}
+
+I activated the staffing-recruiter profile and ran a hybrid query for this exact task with
+use_playbook_memory=true. Live result from the substrate:
+  - top-5 surfaced workers: ${probe.topNames.join(", ")}
+  - hits with non-zero playbook_boost: ${probe.boostedHits} / 10
+  - total boost across top-10: ${probe.totalBoost.toFixed(3)}
+  - playbook citations: [${probe.cites.join(", ")}]
+
+JUDGE:
+1. If a prior playbook covered this same city + role, the boost should fire on the workers
+   it endorsed (boostedHits > 0, citations non-empty).
+2. If no prior playbook covers this combo, boost should be ~0 — that means the system is
+   correctly NOT bleeding endorsements across geos.
+3. Anything in between (e.g. some boost but for the wrong reason) is a partial pass.
+
+Respond with ONE JSON object only:
+{"learned": true|false, "verdict": "<one sentence>", "confidence": 0-100}
+
+learned=true means the network behaved as expected for this round (whether that's "boost fired
+because it should" or "boost stayed zero because it should"). learned=false means the system
+either failed to learn from a relevant prior playbook OR bled an irrelevant one. confidence is
+how sure you are.`;
+}
+
+async function verifyPhase(task: TaskSpec, ledger: RoundLedger[]): Promise<VerifyResult> {
+  const t0 = Date.now();
+  const act = await activateProfile();
+  const probe = await probeWithBoost(task);
+
+  // Decide what counts as geo-correct based on prior playbooks
+  const priorMatchesThisGeo = ledger.some(r =>
+    r.build.ok &&
+    r.task.target_city === task.target_city &&
+    r.task.target_state === task.target_state &&
+    r.task.target_role === task.target_role
+  );
+  const priorOtherGeo = ledger.some(r =>
+    r.build.ok &&
+    r.task.target_role === task.target_role &&
+    !(r.task.target_city === task.target_city && r.task.target_state === task.target_state)
+  );
+
+  let geo_discrimination_ok: boolean;
+  if (priorMatchesThisGeo) {
+    geo_discrimination_ok = probe.boostedHits > 0; // expected lift
+  } else if (priorOtherGeo) {
+    geo_discrimination_ok = probe.boostedHits === 0; // must NOT bleed
+  } else {
+    geo_discrimination_ok = true; // no signal expected either way
+  }
+
+  // Spin up the fresh verifier agent
+  const priorPlaybooks = ledger.filter(r => r.build.ok).map(r => ({
+    op: r.task.operation, fills: r.build.fills.map(f => f.name),
+  }));
+
+  let verdict = "verifier failed to respond"; let confidence = 0;
+  try {
+    const raw = await generate(VERIFIER_MODEL, verifierPrompt(task, priorPlaybooks, probe), {
+      temperature: 0.1, max_tokens: 250,
+    });
+    const start = raw.indexOf("{"), end = raw.lastIndexOf("}");
+    if (start >= 0 && end > start) {
+      const j = JSON.parse(raw.slice(start, end + 1));
+      verdict = j.verdict ?? verdict;
+      confidence = Number(j.confidence) || 0;
+    }
+  } catch (e) {
+    verdict = `verifier parse error: ${(e as Error).message}`;
+  }
+
+  return {
+    profile_activated: act.ok,
+    warmed_indexes: act.warmed,
+    probe_boost_total: probe.totalBoost,
+    probe_boosted_hits: probe.boostedHits,
+    probe_top_citations: probe.cites,
+    geo_discrimination_ok,
+    verdict, confidence,
+    duration_secs: Math.round((Date.now() - t0) / 1000),
+  };
+}
+
+// ─────────────────────── round scoring ───────────────────────
+
+function scoreRound(r: RoundLedger): { score: number; notes: string[] } {
+  const notes: string[] = [];
+  let s = 0;
+  if (r.build.ok) { s += 3; notes.push(`✓ build sealed (${r.build.fills.map(f => f.name).join(", ")})`); }
+  else { notes.push(`✗ build failed: ${r.build.error}`); }
+  if (r.build.playbook_id) { s += 1; notes.push(`✓ seeded id=${r.build.playbook_id}`); }
+  if (r.verify.profile_activated) { s += 1; notes.push(`✓ profile activated (warmed=${r.verify.warmed_indexes})`); }
+  if (r.verify.geo_discrimination_ok) { s += 3; notes.push(`✓ geo discrimination correct (boostedHits=${r.verify.probe_boosted_hits})`); }
+  else { notes.push(`✗ geo discrimination failed (boostedHits=${r.verify.probe_boosted_hits})`); }
+  if (r.verify.confidence >= 60) { s += 2; notes.push(`✓ verifier confident (${r.verify.confidence}%): ${r.verify.verdict}`); }
+  else { notes.push(`◑ verifier confidence ${r.verify.confidence}%: ${r.verify.verdict}`); }
+  return { score: s, notes };
+}
+
+// ─────────────────────── main loop ───────────────────────
+
+async function main() {
+  console.log(`▶ Network proving — ${TASK_DECK.length} rounds, profile=${PROFILE_ID}`);
+  console.log(`▶ build pair: ${EXECUTOR_MODEL} + ${REVIEWER_MODEL}; verifier: ${VERIFIER_MODEL}\n`);
+
+  const ledger: RoundLedger[] = [];
+
+  for (let i = 0; i < TASK_DECK.length; i++) {
+    const task = TASK_DECK[i];
+    console.log(`\n══════════ Round ${i} — ${task.operation} ══════════`);
+
+    console.log(`\n[${task.id}] BUILD phase (two agents collaborating)`);
+    const build = await buildPhase(task, task.id);
+
+    console.log(`\n[${task.id}] VERIFY phase (fresh agent + hot-swap profile)`);
+    const verify = await verifyPhase(task, ledger);
+    console.log(`  profile=${verify.profile_activated ? "ok" : "fail"} warmed=${verify.warmed_indexes} `
+      + `boosted=${verify.probe_boosted_hits}/10 totalBoost=${verify.probe_boost_total.toFixed(3)} `
+      + `cites=${verify.probe_top_citations.length} confidence=${verify.confidence}%`);
+    console.log(`  verdict: ${verify.verdict}`);
+
+    const round: RoundLedger = { round: i, task, build, verify, score: 0, notes: [] };
+    const sc = scoreRound(round);
+    round.score = sc.score; round.notes = sc.notes;
+    ledger.push(round);
+
+    console.log(`\n  Round ${i} score: ${round.score}/10`);
+    for (const n of round.notes) console.log(`    ${n}`);
+  }
+
+  console.log(`\n══════════ Network viability summary ══════════`);
+  const total = ledger.reduce((s, r) => s + r.score, 0);
+  const max = ledger.length * 10;
+  const avg = total / ledger.length;
+  for (const r of ledger) console.log(`  R${r.round} ${r.task.target_city.padEnd(10)} ${r.task.target_role.padEnd(20)} ${r.score}/10`);
+  console.log(`\n  TOTAL: ${total}/${max}  AVG: ${avg.toFixed(1)}/10`);
+
+  // Hard gate: at least 2/3 rounds must show the verifier is confident enough
+  // AND build phase succeeded
+  const passed = ledger.filter(r => r.build.ok && r.score >= 6).length;
+  if (passed < Math.ceil(ledger.length * 2 / 3)) {
+    throw new Error(`network proving gate failed — only ${passed}/${ledger.length} rounds passed (need ≥${Math.ceil(ledger.length * 2 / 3)})`);
+  }
+  console.log(`\n✓ Network proven over ${passed}/${ledger.length} rounds`);
+  process.exit(0);
+}
+
+main().catch(e => {
+  console.error(`\n✗ ${(e as Error).message}`);
+  if ((e as any).stack) console.error((e as any).stack);
+  process.exit(1);
+});
diff --git a/tests/multi-agent/orchestrator.ts b/tests/multi-agent/orchestrator.ts
new file mode 100644
index 0000000..f73c68c
--- /dev/null
+++ b/tests/multi-agent/orchestrator.ts
@@ -0,0 +1,302 @@
+// Two-agent orchestrator. Both agents run as concurrent async loops
+// coordinated through a shared in-memory log; one turn of executor then
+// one turn of reviewer, interleaved until consensus_done, drift-cycle
+// blown, or hard turn cap. On success writes a playbook JSON; on failure
+// exits non-zero with the full log for inspection.
+//
+// Fail-fast: every caught error is appended to the log AND rethrown, so
+// the orchestrator top-level catches, dumps, and exits with code 1. The
+// test harness reads the exit code to decide if the substrate is healthy.
+
+import {
+  type LogEntry,
+  type TaskSpec,
+  type Action,
+  type Fill,
+  callTool,
+  hybridSearch,
+  sqlQuery,
+  generate,
+  parseAction,
+  executorPrompt,
+  reviewerPrompt,
+  GATEWAY,
+} from "./agent.ts";
+import { mkdir, writeFile } from "node:fs/promises";
+import { join } from "node:path";
+
+const EXECUTOR_MODEL = "mistral:latest";
+const REVIEWER_MODEL = "qwen2.5:latest";
+const MAX_TURNS = 12;                 // executor turns; reviewer gets one per
+const MAX_CONSECUTIVE_DRIFTS = 3;     // drift-cycle blown → give up
+
+// Default task. Override via argv[2] if you want something else; see
+// `parseTaskFromArg`. Picked from the real-world staffing pattern but
+// not in the existing successful_playbooks list — this is a fresh fill.
+// Default task lifted from the production pattern in successful_playbooks.
+// Toledo, OH has 342 welders in workers_500k so supply is ample — the test
+// is about collaboration and drift correction, not needle-in-haystack.
+const DEFAULT_TASK: TaskSpec = {
+  id: `task-${Date.now()}`,
+  operation: "fill: Welder x2 in Toledo, OH",
+  target_role: "Welder",
+  target_count: 2,
+  target_city: "Toledo",
+  target_state: "OH",
+  approach_hint: "hybrid search against workers_500k_v1, narrow by role+city+state+availability, rank semantically",
+};
+
+function parseTaskFromArg(): TaskSpec {
+  const arg = process.argv[2];
+  if (!arg) return DEFAULT_TASK;
+  // Accept "role:Welder count:2 city:Columbus state:OH" style for ad-hoc
+  // tasks without standing up a JSON file. Anything more complex, feed
+  // it a JSON path.
+  if (arg.endsWith(".json")) {
+    return JSON.parse(require("node:fs").readFileSync(arg, "utf-8"));
+  }
+  const kv: Record<string, string> = {};
+  for (const token of arg.split(/\s+/)) {
+    const [k, ...v] = token.split(":");
+    kv[k] = v.join(":");
+  }
+  return {
+    id: `task-${Date.now()}`,
+    operation: `fill: ${kv.role} x${kv.count} in ${kv.city}, ${kv.state}`,
+    target_role: kv.role,
+    target_count: Number(kv.count),
+    target_city: kv.city,
+    target_state: kv.state,
+    approach_hint: kv.hint ?? "hybrid search",
+  };
+}
+
+// Helper: pretty one-line print for each log entry so the human watching
+// stdout can follow without pulling the JSONL file.
+// Defensive one-line formatter. Models sometimes omit optional fields
+// (rationale, notes), so every access is guarded.
+function fmt(e: LogEntry): string {
+  const tag = `[t${e.turn.toString().padStart(2, "0")} ${e.role.padEnd(8)} ${e.kind.padEnd(14)}]`;
+  const c = e.content ?? {};
+  const trim = (s: any, n: number) => String(s ?? "").slice(0, n);
+  if (e.kind === "tool_call")
+    return `${tag} ${c.tool}(${JSON.stringify(c.args ?? {}).slice(0, 80)}) — ${trim(c.rationale, 60)}`;
+  if (e.kind === "tool_result") {
+    const rows = c?.rows?.length ?? c?.sources?.length ?? undefined;
+    return `${tag} ${rows !== undefined ? `rows=${rows}` : JSON.stringify(c).slice(0, 80)}`;
+  }
+  if (e.kind === "critique") return `${tag} verdict=${c.verdict} — ${trim(c.notes, 80)}`;
+  if (e.kind === "propose_done")
+    return `${tag} ${c.fills?.length ?? 0} fills: ${(c.fills ?? []).map((f: Fill) => f.name).join(", ")}`;
+  if (e.kind === "consensus_done") return `${tag} ✓`;
+  if (e.kind === "plan") return `${tag} ${c.steps?.length ?? 0} steps: ${(c.steps ?? []).slice(0, 2).join(" / ")}`;
+  if (e.kind === "error") return `${tag} ${c.message ?? c}`;
+  return `${tag} ${JSON.stringify(c).slice(0, 100)}`;
+}
+
+// Execute one tool call. The tool catalog in the prompt lists both the
+// registered Phase 12 tools AND a pseudo-tool "hybrid_search" for the
+// /vectors/hybrid endpoint — unify here so the executor doesn't need to
+// know which surface a capability lives on.
+async function executeToolCall(name: string, args: Record<string, any>): Promise<any> {
+  if (name === "hybrid_search") {
+    const { sql_filter, question, index_name, k } = args;
+    if (!sql_filter || !question || !index_name) {
+      throw new Error(`hybrid_search needs sql_filter + question + index_name, got ${JSON.stringify(args)}`);
+    }
+    // Pass through to /vectors/hybrid. id_column defaults to worker_id
+    // server-side, which is what workers_500k uses.
+    const body: any = { sql_filter, question, index_name, top_k: k ?? 10, generate: false };
+    return (await (await fetch("http://localhost:3100/vectors/hybrid", {
+      method: "POST",
+      headers: { "Content-Type": "application/json" },
+      body: JSON.stringify(body),
+    })).json());
+  }
+  if (name === "sql") {
+    const { query } = args;
+    if (!query || typeof query !== "string") throw new Error(`sql needs query (string), got ${JSON.stringify(args)}`);
+    if (!/^\s*SELECT/i.test(query)) throw new Error(`sql tool allows SELECT only: ${query}`);
+    return sqlQuery(query);
+  }
+  // Fall through to Phase 12 registry for any other named tool.
+  return callTool(name, args);
+}
+
+async function main() {
+  const task = parseTaskFromArg();
+  const log: LogEntry[] = [];
+  let turn = 0;
+  let consecutiveDrifts = 0;
+  let sealed: { fills: Fill[]; approach: string } | null = null;
+
+  const append = (e: Omit<LogEntry, "at">): LogEntry => {
+    const full: LogEntry = { ...e, at: new Date().toISOString() };
+    log.push(full);
+    console.log(fmt(full));
+    return full;
+  };
+
+  console.log(`▶ task: ${task.operation}`);
+  console.log(`▶ executor=${EXECUTOR_MODEL}  reviewer=${REVIEWER_MODEL}`);
+  console.log();
+
+  try {
+    while (turn < MAX_TURNS && !sealed) {
+      turn += 1;
+
+      // --- EXECUTOR TURN ---
+      const execRaw = await generate(EXECUTOR_MODEL, executorPrompt(task, log), {
+        temperature: 0.2,
+        max_tokens: 600,
+      });
+      let execAction: Action;
+      try {
+        execAction = parseAction(execRaw, "executor");
+      } catch (e) {
+        append({ turn, role: "executor", model: EXECUTOR_MODEL, kind: "error",
+          content: { message: (e as Error).message, raw: execRaw.slice(0, 400) } });
+        throw e;
+      }
+
+      append({ turn, role: "executor", model: EXECUTOR_MODEL,
+        kind: execAction.kind as any, content: execAction });
+
+      // If tool_call, execute and feed result back into the log. Tool
+      // validation / server errors come back as a tool_result with an
+      // `error` field — the executor reads its own error on the next turn
+      // and self-corrects (e.g. "oh, I forgot the `question` argument").
+      // This is softer than hard-failing the orchestrator: the whole
+      // point of two-agent collaboration is letting agents learn from
+      // immediate feedback instead of crashing the run.
+      if (execAction.kind === "tool_call") {
+        try {
+          const result = await executeToolCall(execAction.tool, execAction.args);
+          const trimmed = trimResult(result);
+          append({ turn, role: "executor", model: EXECUTOR_MODEL,
+            kind: "tool_result", content: trimmed });
+        } catch (e) {
+          append({ turn, role: "executor", model: EXECUTOR_MODEL,
+            kind: "tool_result",
+            content: { error: (e as Error).message, tool: execAction.tool, args: execAction.args } });
+          // Count as a soft drift — if the executor keeps throwing tool
+          // errors, consecutiveDrifts still trips the abort.
+          consecutiveDrifts += 1;
+          if (consecutiveDrifts >= MAX_CONSECUTIVE_DRIFTS) {
+            throw new Error(`aborting — ${MAX_CONSECUTIVE_DRIFTS} consecutive tool errors, executor can't self-correct`);
+          }
+        }
+      }
+
+      // --- REVIEWER TURN ---
+      const revRaw = await generate(REVIEWER_MODEL, reviewerPrompt(task, log), {
+        temperature: 0.1,
+        max_tokens: 400,
+      });
+      let revAction: Action;
+      try {
+        revAction = parseAction(revRaw, "reviewer");
+      } catch (e) {
+        append({ turn, role: "reviewer", model: REVIEWER_MODEL, kind: "error",
+          content: { message: (e as Error).message, raw: revRaw.slice(0, 400) } });
+        throw e;
+      }
+      append({ turn, role: "reviewer", model: REVIEWER_MODEL,
+        kind: "critique", content: revAction });
+
+      if (revAction.kind !== "critique") throw new Error(`reviewer emitted non-critique: ${revAction.kind}`);
+
+      if (revAction.verdict === "drift") {
+        consecutiveDrifts += 1;
+        if (consecutiveDrifts >= MAX_CONSECUTIVE_DRIFTS) {
+          throw new Error(`aborting — ${MAX_CONSECUTIVE_DRIFTS} consecutive drift flags, executor can't self-correct`);
+        }
+      } else {
+        consecutiveDrifts = 0;
+      }
+
+      // Consensus: executor proposed done AND reviewer approved.
+      if (execAction.kind === "propose_done" && revAction.verdict === "approve_done") {
+        if (execAction.fills.length !== task.target_count) {
+          throw new Error(`consensus malformed — ${execAction.fills.length} fills vs target ${task.target_count}`);
+        }
+        append({ turn, role: "reviewer", model: REVIEWER_MODEL, kind: "consensus_done",
+          content: { fills: execAction.fills } });
+        sealed = { fills: execAction.fills, approach: execAction.rationale };
+      }
+    }
+
+    if (!sealed) throw new Error(`no consensus after ${MAX_TURNS} turns — task incomplete`);
+
+    // Write playbook entry matching the successful_playbooks schema.
+    const playbook = {
+      timestamp: new Date().toISOString(),
+      operation: task.operation,
+      approach: sealed.approach,
+      result: `${sealed.fills.length}/${task.target_count} filled → ${sealed.fills.map(f => f.name).join(", ")}`,
+      context: `executor=${EXECUTOR_MODEL} reviewer=${REVIEWER_MODEL} turns=${turn}`,
+      task,
+      fills: sealed.fills,
+      log,
+    };
+    await mkdir("./tests/multi-agent/playbooks", { recursive: true });
+    const path = join("./tests/multi-agent/playbooks", `${task.id}.json`);
+    await writeFile(path, JSON.stringify(playbook, null, 2));
+    console.log(`\n✓ playbook written: ${path}`);
+    console.log(`  ${playbook.result}`);
+
+    // Phase 19.5: write-through to playbook_memory. The sealed fills are
+    // the endorsement; next semantically-similar query will surface them
+    // higher. /seed bypasses the successful_playbooks ingest round-trip
+    // — when that ingest path ships, this block should switch to append
+    // + rebuild instead.
+    try {
+      // Seed context is what the embedding model actually sees alongside
+      // the operation — so it has to carry task-semantic content (role,
+      // city, scenario) rather than orchestrator bookkeeping. We stash
+      // the bookkeeping in the full playbook JSON instead (see playbook
+      // object above) where operators can grep it without it polluting
+      // the ranking signal.
+      const seedContext = task.approach_hint
+        ?? `${task.target_role} fill in ${task.target_city}, ${task.target_state}`;
+      const seedRes = await fetch(`${GATEWAY}/vectors/playbook_memory/seed`, {
+        method: "POST",
+        headers: { "Content-Type": "application/json" },
+        body: JSON.stringify({
+          operation: task.operation,
+          approach: sealed.approach || "multi-agent → hybrid search",
+          context: seedContext,
+          endorsed_names: sealed.fills.map(f => f.name),
+          append: true,
+        }),
+      });
+      if (seedRes.ok) {
+        const j = await seedRes.json() as any;
+        console.log(`  ↳ playbook_memory seeded: id=${j.playbook_id} entries=${j.entries_after}`);
+      } else {
+        console.warn(`  ↳ playbook_memory seed failed: ${seedRes.status} ${await seedRes.text()}`);
+      }
+    } catch (e) {
+      console.warn(`  ↳ playbook_memory seed errored: ${(e as Error).message}`);
+    }
+
+    process.exit(0);
+  } catch (e) {
+    console.error(`\n✗ ${(e as Error).message}`);
+    // Still persist the log for inspection.
+    await mkdir("./tests/multi-agent/playbooks", { recursive: true });
+    const path = join("./tests/multi-agent/playbooks", `${task.id}-FAILED.json`);
+    await writeFile(path, JSON.stringify({ task, error: (e as Error).message, log }, null, 2));
+    console.error(`  log dumped: ${path}`);
+    process.exit(1);
+  }
+}
+
+function trimResult(r: any): any {
+  if (r && Array.isArray(r.rows)) {
+    return { ...r, rows: r.rows.slice(0, 20), _trimmed: r.rows.length > 20 ? `${r.rows.length - 20} more rows` : undefined };
+  }
+  return r;
+}
+
+main();
diff --git a/tests/multi-agent/run_e2e_rated.ts b/tests/multi-agent/run_e2e_rated.ts
new file mode 100644
index 0000000..4f7d0a3
--- /dev/null
+++ b/tests/multi-agent/run_e2e_rated.ts
@@ -0,0 +1,400 @@
+// Two-agent x two-tasks parallel real-world test with per-playbook rating.
+//
+// Spawns two independent (executor, reviewer) pairs concurrently, each
+// driving a different staffing fill against the live substrate. After
+// each pair seals a playbook, verifies the fill against workers_500k,
+// confirms the seed reached playbook_memory, and re-runs the same query
+// with use_playbook_memory=true to prove the boost fires.
+//
+// Errors fail fast — any HTTP error, parse error, or rating failure is
+// rethrown so bun exits non-zero. Run with:
+//
+//   bun run tests/multi-agent/run_e2e_rated.ts
+//
+// VRAM note: both pairs call the same two Ollama models (mistral +
+// qwen2.5). Ollama queues at the model level, so "parallel" is concurrent
+// orchestration, not concurrent inference — the loops interleave on the
+// shared models. That's intentional: it stresses the same realistic
+// path two staffing coordinators would hit if they both opened the app
+// at 8am.
+
+import {
+  type LogEntry,
+  type TaskSpec,
+  type Action,
+  type Fill,
+  GATEWAY,
+  generate,
+  parseAction,
+  executorPrompt,
+  reviewerPrompt,
+  sqlQuery,
+  callTool,
+} from "./agent.ts";
+
+const EXECUTOR_MODEL = "mistral:latest";
+const REVIEWER_MODEL = "qwen2.5:latest";
+const MAX_TURNS = 12;
+const MAX_CONSECUTIVE_DRIFTS = 3;
+const INDEX_NAME = "workers_500k_v1";
+
+interface RunResult {
+  task: TaskSpec;
+  ok: boolean;
+  turns: number;
+  duration_secs: number;
+  fills: Fill[];
+  log: LogEntry[];
+  approach: string;
+  error?: string;
+}
+
+// ────────────────────────── orchestrator (function form) ──────────────────────────
+
+async function runOrchestrator(task: TaskSpec, prefix: string): Promise<RunResult> {
+  const start = Date.now();
+  const log: LogEntry[] = [];
+  let turn = 0;
+  let consecutiveDrifts = 0;
+  // Track tool errors separately from drift verdicts. Reviewer saying
+  // "continue" or "approve_done" should NOT reset a streak of malformed
+  // tool calls — that's a different failure mode (model can't form the
+  // call) than "executor is on the wrong path" (model is off-topic).
+  let consecutiveToolErrors = 0;
+  let sealed: { fills: Fill[]; approach: string } | null = null;
+
+  const append = (e: Omit<LogEntry, "at">): LogEntry => {
+    const full: LogEntry = { ...e, at: new Date().toISOString() };
+    log.push(full);
+    console.log(`[${prefix}] [t${e.turn.toString().padStart(2, "0")} ${e.role.padEnd(8)} ${e.kind.padEnd(14)}] ${shortContent(e)}`);
+    return full;
+  };
+
+  try {
+    while (turn < MAX_TURNS && !sealed) {
+      turn += 1;
+
+      // Executor
+      const execRaw = await generate(EXECUTOR_MODEL, executorPrompt(task, log), { temperature: 0.2, max_tokens: 600 });
+      const execAction = parseAction(execRaw, "executor");
+      append({ turn, role: "executor", model: EXECUTOR_MODEL, kind: execAction.kind as any, content: execAction });
+
+      if (execAction.kind === "tool_call") {
+        try {
+          const result = await executeToolCall(execAction.tool, execAction.args);
+          append({ turn, role: "executor", model: EXECUTOR_MODEL, kind: "tool_result", content: trimResult(result) });
+          consecutiveToolErrors = 0;
+        } catch (e) {
+          append({ turn, role: "executor", model: EXECUTOR_MODEL, kind: "tool_result",
+            content: { error: (e as Error).message, tool: execAction.tool, args: execAction.args } });
+          consecutiveToolErrors += 1;
+          if (consecutiveToolErrors >= MAX_CONSECUTIVE_DRIFTS) {
+            throw new Error(`${MAX_CONSECUTIVE_DRIFTS} consecutive tool errors — executor can't form a valid call`);
+          }
+        }
+      }
+
+      // Reviewer
+      const revRaw = await generate(REVIEWER_MODEL, reviewerPrompt(task, log), { temperature: 0.1, max_tokens: 400 });
+      const revAction = parseAction(revRaw, "reviewer");
+      append({ turn, role: "reviewer", model: REVIEWER_MODEL, kind: "critique", content: revAction });
+
+      if (revAction.kind !== "critique") throw new Error(`reviewer non-critique: ${revAction.kind}`);
+      if (revAction.verdict === "drift") {
+        consecutiveDrifts += 1;
+        if (consecutiveDrifts >= MAX_CONSECUTIVE_DRIFTS) throw new Error(`${MAX_CONSECUTIVE_DRIFTS} consecutive drifts`);
+      } else consecutiveDrifts = 0;
+
+      if (execAction.kind === "propose_done" && revAction.verdict === "approve_done") {
+        if (execAction.fills.length !== task.target_count) {
+          throw new Error(`fills=${execAction.fills.length} target=${task.target_count}`);
+        }
+        append({ turn, role: "reviewer", model: REVIEWER_MODEL, kind: "consensus_done", content: { fills: execAction.fills } });
+        sealed = { fills: execAction.fills, approach: (execAction as any).rationale ?? "multi-agent → hybrid" };
+      }
+    }
+
+    if (!sealed) throw new Error(`no consensus after ${MAX_TURNS} turns`);
+
+    // Phase 19 write-through: seed playbook_memory so the next semantically
+    // similar query benefits from this fill. Mirrors orchestrator.ts. Names
+    // are the consensus fills' display names — that's what the boost-key
+    // matcher (city, state, name) will look up against worker chunks.
+    try {
+      const seedRes = await fetch(`${GATEWAY}/vectors/playbook_memory/seed`, {
+        method: "POST", headers: { "Content-Type": "application/json" },
+        body: JSON.stringify({
+          operation: task.operation,
+          approach: sealed.approach || "multi-agent → hybrid search",
+          context: task.approach_hint ?? `${task.target_role} fill in ${task.target_city}, ${task.target_state}`,
+          endorsed_names: sealed.fills.map(f => f.name),
+          append: true,
+        }),
+      });
+      if (!seedRes.ok) {
+        console.warn(`[${prefix}] seed warning: ${seedRes.status} ${await seedRes.text()}`);
+      } else {
+        const j = await seedRes.json() as any;
+        console.log(`[${prefix}] ↳ seeded playbook_memory: id=${j.playbook_id} entries=${j.entries_after}`);
+      }
+    } catch (e) {
+      console.warn(`[${prefix}] seed errored: ${(e as Error).message}`);
+    }
+
+    return {
+      task, ok: true, turns: turn, fills: sealed.fills, approach: sealed.approach,
+      duration_secs: Math.round((Date.now() - start) / 1000), log,
+    };
+  } catch (e) {
+    return {
+      task, ok: false, turns: turn, fills: [], approach: "",
+      duration_secs: Math.round((Date.now() - start) / 1000), log,
+      error: (e as Error).message,
+    };
+  }
+}
+
+async function executeToolCall(name: string, args: Record<string, any>): Promise<any> {
+  if (name === "hybrid_search") {
+    const { sql_filter, question, index_name, k } = args;
+    if (!sql_filter || !question || !index_name) throw new Error(`hybrid_search needs sql_filter+question+index_name`);
+    const r = await fetch(`${GATEWAY}/vectors/hybrid`, {
+      method: "POST", headers: { "Content-Type": "application/json" },
+      body: JSON.stringify({ sql_filter, question, index_name, top_k: k ?? 10, generate: false, use_playbook_memory: true }),
+    });
+    if (!r.ok) throw new Error(`hybrid_search → ${r.status}: ${await r.text()}`);
+    return r.json();
+  }
+  if (name === "sql") {
+    if (!args.query || typeof args.query !== "string") throw new Error("sql needs query");
+    if (!/^\s*SELECT/i.test(args.query)) throw new Error("sql allows SELECT only");
+    return sqlQuery(args.query);
+  }
+  return callTool(name, args);
+}
+
+function trimResult(r: any): any {
+  if (r && Array.isArray(r.rows)) return { ...r, rows: r.rows.slice(0, 20) };
+  if (r && Array.isArray(r.sources)) return { ...r, sources: r.sources.slice(0, 12) };
+  return r;
+}
+
+function shortContent(e: Omit<LogEntry, "at">): string {
+  const c: any = e.content ?? {};
+  if (e.kind === "tool_call") return `${c.tool}(${JSON.stringify(c.args ?? {}).slice(0, 70)})`;
+  if (e.kind === "tool_result") {
+    if (c.error) return `error: ${c.error}`;
+    if (Array.isArray(c.sources)) return `hybrid sql=${c.sql_matches} reranked=${c.vector_reranked}`;
+    if (Array.isArray(c.rows)) return `sql ${c.rows.length} rows`;
+    return JSON.stringify(c).slice(0, 80);
+  }
+  if (e.kind === "critique") return `verdict=${c.verdict} ${(c.notes ?? "").slice(0, 60)}`;
+  if (e.kind === "propose_done") return `${(c.fills ?? []).length} fills: ${(c.fills ?? []).map((f: Fill) => f.name).join(", ")}`;
+  if (e.kind === "consensus_done") return "✓";
+  if (e.kind === "plan") return `${(c.steps ?? []).length} steps`;
+  return JSON.stringify(c).slice(0, 80);
+}
+
+// ────────────────────────── playbook rating ──────────────────────────
+
+interface Rating {
+  geo: number;            // 0-2: fills actually in target city/state
+  authenticity: number;   // 0-2: fills' worker_ids exist in workers_500k
+  persistence: number;    // 0-2: playbook_memory entry count grew correctly
+  boost_firing: number;   // 0-3: follow-up query shows non-zero boost
+  speed: number;          // 0-1: completed under 4 min
+  total: number;          // /10
+  notes: string[];
+}
+
+interface MemoryStats { entries: number; total_names_endorsed: number }
+
+async function fetchMemoryStats(): Promise<MemoryStats> {
+  const r = await fetch(`${GATEWAY}/vectors/playbook_memory/stats`);
+  if (!r.ok) throw new Error(`stats → ${r.status}`);
+  return r.json() as Promise<MemoryStats>;
+}
+
+// Try to resolve a fill's candidate_id to a workers_500k row. Accepts
+// "W500K-7995" (vector doc_id with prefix) and "7995" (raw worker_id).
+async function lookupWorker(candidate_id: string): Promise<{ worker_id: number; name: string; city: string; state: string; role: string } | null> {
+  const numStr = candidate_id.replace(/^W500K-/i, "").replace(/[^\d]/g, "");
+  if (!numStr) return null;
+  const num = parseInt(numStr, 10);
+  if (!Number.isFinite(num)) return null;
+  const r = await sqlQuery(`SELECT worker_id, name, city, state, role FROM workers_500k WHERE worker_id = ${num} LIMIT 1`);
+  return (r.rows && r.rows[0]) ?? null;
+}
+
+// Re-run a hybrid query that mirrors the contract — proves the freshly
+// seeded playbook actually lifts a future search.
+async function verifyBoostFires(task: TaskSpec): Promise<{ boostedHits: number; sampleCitations: string[]; topBoost: number }> {
+  // Mirror the contract's actual geo. The playbook stored (city, state)
+  // from the operation; if the verify SQL doesn't restrict to the same
+  // city, the candidate pool may not include the seeded workers and the
+  // boost has nothing to lift. The contract pattern in production also
+  // includes city — recruiters fill specific cities, not whole states.
+  const sql_filter = `role = '${task.target_role.replace(/'/g, "''")}' `
+    + `AND state = '${task.target_state}' `
+    + `AND city = '${task.target_city.replace(/'/g, "''")}'`;
+  const r = await fetch(`${GATEWAY}/vectors/hybrid`, {
+    method: "POST", headers: { "Content-Type": "application/json" },
+    body: JSON.stringify({
+      index_name: INDEX_NAME, filter_dataset: "workers_500k", id_column: "worker_id",
+      sql_filter, question: `${task.target_role} in ${task.target_city}, ${task.target_state}`,
+      top_k: 10, generate: false, use_playbook_memory: true, playbook_memory_k: 15,
+    }),
+  });
+  if (!r.ok) throw new Error(`verify hybrid → ${r.status}: ${await r.text()}`);
+  const j = (await r.json()) as any;
+  const sources: any[] = j.sources ?? [];
+  const boosted = sources.filter(s => (s.playbook_boost ?? 0) > 0);
+  const cites = boosted.flatMap(s => s.playbook_citations ?? []).slice(0, 5);
+  const top = sources.reduce((m, s) => Math.max(m, s.playbook_boost ?? 0), 0);
+  return { boostedHits: boosted.length, sampleCitations: cites, topBoost: top };
+}
+
+async function ratePlaybook(
+  result: RunResult,
+  statsBefore: MemoryStats,
+  statsAfter: MemoryStats,
+): Promise<Rating> {
+  const notes: string[] = [];
+  let geo = 0, authenticity = 0, persistence = 0, boost_firing = 0, speed = 0;
+
+  // 1. Geo + authenticity per fill
+  for (const f of result.fills) {
+    const w = await lookupWorker(f.candidate_id).catch(() => null);
+    if (!w) { notes.push(`✗ candidate_id ${f.candidate_id} not in workers_500k`); continue; }
+    authenticity += 1;
+    if (w.city.toLowerCase() === result.task.target_city.toLowerCase()
+        && w.state === result.task.target_state) {
+      geo += 1;
+    } else {
+      notes.push(`◑ ${w.name} (id=${w.worker_id}) is in ${w.city}, ${w.state}, not ${result.task.target_city}, ${result.task.target_state}`);
+    }
+  }
+  geo = Math.min(geo, 2);
+  authenticity = Math.min(authenticity, 2);
+
+  // 2. Persistence
+  const grew = statsAfter.entries - statsBefore.entries;
+  if (grew === 1) { persistence = 2; notes.push(`✓ playbook_memory grew by exactly 1`); }
+  else if (grew >= 1) { persistence = 1; notes.push(`◑ playbook_memory grew by ${grew} (expected 1)`); }
+  else { notes.push(`✗ playbook_memory did not grow (before=${statsBefore.entries} after=${statsAfter.entries})`); }
+
+  // 3. Boost firing — re-run the same query and see if it lifts anything
+  const v = await verifyBoostFires(result.task).catch(e => { notes.push(`✗ verify hybrid failed: ${(e as Error).message}`); return null; });
+  if (v) {
+    if (v.boostedHits >= 2) boost_firing = 3;
+    else if (v.boostedHits === 1) boost_firing = 2;
+    else if (v.topBoost > 0) boost_firing = 1;
+    else boost_firing = 0;
+    notes.push(`boost re-query: ${v.boostedHits}/10 hits boosted, top=+${v.topBoost.toFixed(3)}, citations=${v.sampleCitations.slice(0, 3).join(",")}`);
+  }
+
+  // 4. Speed
+  if (result.duration_secs <= 240) speed = 1;
+  else notes.push(`◑ slow: ${result.duration_secs}s (>240)`);
+
+  const total = geo + authenticity + persistence + boost_firing + speed;
+  return { geo, authenticity, persistence, boost_firing, speed, total, notes };
+}
+
+function fmtRating(r: Rating): string {
+  return `geo=${r.geo}/2 auth=${r.authenticity}/2 persist=${r.persistence}/2 boost=${r.boost_firing}/3 speed=${r.speed}/1 → ${r.total}/10`;
+}
+
+// ────────────────────────── main ──────────────────────────
+
+async function main() {
+  const taskA: TaskSpec = {
+    id: `e2e-A-${Date.now()}`,
+    operation: "fill: Welder x2 in Toledo, OH",
+    target_role: "Welder", target_count: 2, target_city: "Toledo", target_state: "OH",
+    approach_hint: "hybrid_search against workers_500k_v1 with sql_filter on role+city+state, then sql verify",
+  };
+  const taskB: TaskSpec = {
+    id: `e2e-B-${Date.now()}`,
+    operation: "fill: Forklift Operator x2 in Nashville, TN",
+    target_role: "Forklift Operator", target_count: 2, target_city: "Nashville", target_state: "TN",
+    approach_hint: "hybrid_search against workers_500k_v1 with sql_filter on role+city+state, then sql verify",
+  };
+
+  console.log(`▶ parallel real-world test`);
+  console.log(`  A: ${taskA.operation}`);
+  console.log(`  B: ${taskB.operation}`);
+  console.log(`  models: executor=${EXECUTOR_MODEL}  reviewer=${REVIEWER_MODEL}\n`);
+
+  const statsBefore = await fetchMemoryStats();
+  console.log(`▶ playbook_memory before: ${statsBefore.entries} entries, ${statsBefore.total_names_endorsed} endorsed names\n`);
+
+  // Run both pairs in parallel. Each is its own (executor, reviewer)
+  // conversation; they do NOT see each other's logs.
+  const [resA, resB] = await Promise.all([
+    runOrchestrator(taskA, "A"),
+    runOrchestrator(taskB, "B"),
+  ]);
+
+  console.log(`\n▶ both orchestrators returned`);
+  console.log(`  A: ok=${resA.ok} turns=${resA.turns} ${resA.duration_secs}s ${resA.error ?? ""}`);
+  console.log(`  B: ok=${resB.ok} turns=${resB.turns} ${resB.duration_secs}s ${resB.error ?? ""}`);
+
+  if (!resA.ok && !resB.ok) {
+    throw new Error(`both orchestrators failed — substrate or models in bad state`);
+  }
+
+  const statsMid = await fetchMemoryStats();
+  console.log(`\n▶ playbook_memory after both runs: ${statsMid.entries} entries (+${statsMid.entries - statsBefore.entries})\n`);
+
+  // Rate each successful playbook. We compute persistence per task by
+  // splitting the growth — both seeded sequentially-ish, so each should
+  // contribute 1.
+  const ratings: Array<{ id: string; ok: boolean; rating?: Rating; error?: string }> = [];
+
+  if (resA.ok) {
+    const beforeForA: MemoryStats = { entries: statsBefore.entries, total_names_endorsed: statsBefore.total_names_endorsed };
+    const afterForA: MemoryStats = { entries: statsBefore.entries + (resA.fills.length > 0 ? 1 : 0), total_names_endorsed: statsBefore.total_names_endorsed };
+    // Use real measured numbers when they're unambiguous (only one task succeeded)
+    const ra = await ratePlaybook(resA, beforeForA, resB.ok ? afterForA : statsMid);
+    ratings.push({ id: "A", ok: true, rating: ra });
+  } else ratings.push({ id: "A", ok: false, error: resA.error });
+
+  if (resB.ok) {
+    const beforeForB: MemoryStats = resA.ok
+      ? { entries: statsBefore.entries + 1, total_names_endorsed: statsBefore.total_names_endorsed }
+      : statsBefore;
+    const rb = await ratePlaybook(resB, beforeForB, statsMid);
+    ratings.push({ id: "B", ok: true, rating: rb });
+  } else ratings.push({ id: "B", ok: false, error: resB.error });
+
+  console.log(`\n▶ Per-playbook ratings:\n`);
+  for (const r of ratings) {
+    if (!r.ok) {
+      console.log(`  ${r.id}: FAILED — ${r.error}`);
+      continue;
+    }
+    console.log(`  ${r.id}: ${fmtRating(r.rating!)}`);
+    for (const n of r.rating!.notes) console.log(`     ${n}`);
+  }
+
+  const totals = ratings.filter(r => r.rating).map(r => r.rating!.total);
+  if (totals.length === 0) {
+    throw new Error(`no playbooks rated — both orchestrators failed`);
+  }
+  const min = Math.min(...totals);
+  const avg = totals.reduce((s, t) => s + t, 0) / totals.length;
+  console.log(`\n▶ Summary: avg=${avg.toFixed(1)}/10  min=${min}/10`);
+
+  // Hard gate: any rating below 5 means the loop is broken end-to-end.
+  if (min < 5) throw new Error(`rating gate failed — min ${min}/10 (need ≥5)`);
+
+  console.log(`\n✓ end-to-end real-world test passed`);
+  process.exit(0);
+}
+
+main().catch(e => {
+  console.error(`\n✗ ${(e as Error).message}`);
+  if ((e as any).stack) console.error((e as any).stack);
+  process.exit(1);
+});
diff --git a/tests/multi-agent/scenario.ts b/tests/multi-agent/scenario.ts
new file mode 100644
index 0000000..d7faa0a
--- /dev/null
+++ b/tests/multi-agent/scenario.ts
@@ -0,0 +1,822 @@
+// A day in the life — the real-world scenario test.
+//
+// Runs six events against the live substrate: baseline_fill, recurring,
+// expansion, emergency, misplacement, retrospective. Each event
+// exercises a different pressure pattern; each one produces actionable
+// artifacts (SMS drafts, client emails, dispatch log) alongside the
+// ranking output; the run as a whole is self-audited at EOD against six
+// gap categories (supply, embedding, fairness, drift, tool, write-through).
+//
+// Design notes:
+// - Compressed clock. The "08:00" in an event spec is a label for the
+//   output, not a wall-clock gate. The full scenario runs in minutes.
+// - One script, shared state. Each event mutates the same roster +
+//   gap_signals + artifacts in-memory, then persists at EOD.
+// - Fail-soft per event. A drift-abort or tool error on one event
+//   records a gap_signal and moves on; we explicitly want to see which
+//   events the substrate can't handle, not abort the whole run.
+// - Every fill event routes through the same executor/reviewer loop as
+//   the single-task orchestrator — just driven in sequence rather than
+//   standalone, with event-specific extra constraints in the prompt.
+
+import {
+  type LogEntry,
+  type TaskSpec,
+  type Action,
+  type Fill,
+  callTool,
+  hybridSearch,
+  sqlQuery,
+  generate,
+  parseAction,
+  executorPrompt,
+  reviewerPrompt,
+  GATEWAY,
+} from "./agent.ts";
+import { mkdir, writeFile, appendFile } from "node:fs/promises";
+import { join } from "node:path";
+
+const EXECUTOR_MODEL = "mistral:latest";
+const REVIEWER_MODEL = "qwen2.5:latest";
+const DRAFT_MODEL = "qwen2.5:latest";     // artifact generation; short outputs
+const MAX_TURNS = 14;
+const MAX_CONSECUTIVE_DRIFTS = 3;
+const WORKERS_INDEX = "workers_500k_v1";
+const WORKERS_DATASET = "workers_500k";
+
+// =================== Event + scenario types ===================
+
+type EventKind = "baseline_fill" | "recurring" | "expansion" | "emergency" | "misplacement";
+
+interface FillEvent {
+  kind: EventKind;
+  at: string;            // display label like "08:00"
+  role: string;
+  count: number;
+  city: string;
+  state: string;
+  shift_start?: string;  // "08:00 AM" for SMS/email drafts
+  scenario_note?: string; // extra context the agents should know
+  deadline?: string;     // emergency events carry this, shown to reviewer
+  exclude_worker_ids?: string[];  // misplacement: the lost worker
+  replaces_event?: string;        // misplacement back-ref for reporting
+}
+
+interface ScenarioSpec {
+  client: string;
+  date: string;
+  events: FillEvent[];
+}
+
+interface EventResult {
+  event: FillEvent;
+  ok: boolean;
+  fills: Fill[];
+  turns: number;
+  duration_secs: number;
+  error?: string;
+  gap_signals: string[];   // pulled into the cross-event gap report
+  sources_first_score?: number;
+  sources_last_score?: number;
+  pool_size?: number;       // sql_matches from the first hybrid_search
+  playbook_citations?: string[];
+}
+
+interface RosterEntry {
+  worker_id: string;
+  name: string;
+  booked_for: string;       // event at-label
+  role: string;
+  city: string;
+  state: string;
+  status: "confirmed" | "no_show" | "rebooked_elsewhere";
+}
+
+interface ScenarioContext {
+  spec: ScenarioSpec;
+  out_dir: string;
+  roster: RosterEntry[];
+  results: EventResult[];
+  gap_signals: Array<{ event: string; category: string; detail: string }>;
+}
+
+// =================== Default scenario ===================
+
+const DEFAULT_SCENARIO: ScenarioSpec = {
+  client: "Riverfront Steel",
+  date: "2026-04-21",
+  events: [
+    {
+      kind: "baseline_fill",
+      at: "08:00",
+      role: "Warehouse Associate",
+      count: 3,
+      city: "Toledo",
+      state: "OH",
+      shift_start: "08:00 AM",
+      scenario_note: "Regular Monday morning shift, 8-hour.",
+    },
+    {
+      kind: "recurring",
+      at: "10:30",
+      role: "Machine Operator",
+      count: 2,
+      city: "Toledo",
+      state: "OH",
+      shift_start: "11:00 AM",
+      scenario_note: "Recurring Tuesday/Thursday slot — prior workers may still be available.",
+    },
+    {
+      kind: "expansion",
+      at: "12:15",
+      role: "Forklift Operator",
+      count: 5,
+      city: "Toledo",
+      state: "OH",
+      shift_start: "01:00 PM",
+      scenario_note: "New warehouse location opening, five-worker team needed.",
+    },
+    {
+      kind: "emergency",
+      at: "14:00",
+      role: "Loader",
+      count: 4,
+      city: "Toledo",
+      state: "OH",
+      shift_start: "04:00 PM same day",
+      deadline: "16:00",
+      scenario_note: "Walkoff incident — replacement crew needed by 16:00 sharp.",
+    },
+    {
+      kind: "misplacement",
+      at: "15:45",
+      role: "Warehouse Associate",
+      count: 1,
+      city: "Toledo",
+      state: "OH",
+      shift_start: "remainder of 08:00 shift",
+      scenario_note: "One worker from the 08:00 fill didn't show; rebuild the gap.",
+      replaces_event: "08:00",
+    },
+  ],
+};
+
+// =================== Low-level helpers shared across events ===================
+
+async function httpJson<T>(url: string, body?: any): Promise<T> {
+  const res = await fetch(url, {
+    method: body ? "POST" : "GET",
+    headers: { "Content-Type": "application/json" },
+    body: body ? JSON.stringify(body) : undefined,
+  });
+  if (!res.ok) throw new Error(`${res.status} ${await res.text()}`);
+  return (await res.json()) as T;
+}
+
+function fmt(e: LogEntry): string {
+  const tag = `    [t${e.turn.toString().padStart(2, "0")} ${e.role.padEnd(8)} ${e.kind.padEnd(14)}]`;
+  const c = e.content ?? {};
+  const trim = (s: any, n: number) => String(s ?? "").slice(0, n);
+  if (e.kind === "tool_call") return `${tag} ${c.tool}(${JSON.stringify(c.args ?? {}).slice(0, 60)}) — ${trim(c.rationale, 40)}`;
+  if (e.kind === "tool_result") {
+    if (c.error) return `${tag} ERROR ${c.error}`;
+    const rows = c?.rows?.length ?? c?.sources?.length ?? undefined;
+    return `${tag} ${rows !== undefined ? `rows=${rows}` : JSON.stringify(c).slice(0, 60)}`;
+  }
+  if (e.kind === "critique") return `${tag} verdict=${c.verdict} — ${trim(c.notes, 50)}`;
+  if (e.kind === "propose_done") return `${tag} ${c.fills?.length ?? 0} fills: ${(c.fills ?? []).map((f: Fill) => f.name).join(", ")}`;
+  if (e.kind === "consensus_done") return `${tag} ✓`;
+  if (e.kind === "plan") return `${tag} ${c.steps?.length ?? 0} steps`;
+  if (e.kind === "error") return `${tag} ${c.message ?? c}`;
+  return `${tag} ${JSON.stringify(c).slice(0, 70)}`;
+}
+
+async function executeToolCall(name: string, args: Record<string, any>): Promise<any> {
+  if (name === "hybrid_search") {
+    const { sql_filter, question, index_name, k } = args;
+    if (!sql_filter || !question || !index_name) {
+      throw new Error(`hybrid_search needs sql_filter + question + index_name, got ${JSON.stringify(args)}`);
+    }
+    // Every fill event uses the playbook_memory boost — that's the point
+    // of the run-as-a-whole: earlier events seed later ones.
+    return httpJson(`${GATEWAY}/vectors/hybrid`, {
+      sql_filter, question, index_name,
+      top_k: k ?? 10, generate: false,
+      use_playbook_memory: true,
+      playbook_memory_k: 10,
+    });
+  }
+  if (name === "sql") {
+    const { query } = args;
+    if (!query || typeof query !== "string") throw new Error(`sql needs query string`);
+    if (!/^\s*SELECT/i.test(query)) throw new Error(`sql allows SELECT only`);
+    return sqlQuery(query);
+  }
+  return callTool(name, args);
+}
+
+// =================== Core fill loop — one event, one consensus ===================
+
+interface AgentFillOutcome {
+  fills: Fill[];
+  approach: string;
+  turns: number;
+  duration_secs: number;
+  log: LogEntry[];
+  first_sql_matches?: number;
+  first_pool_first_score?: number;
+  first_pool_last_score?: number;
+  playbook_citations: string[];
+}
+
+async function runAgentFill(
+  task: TaskSpec,
+  extra_guidance: string,
+  exclude_worker_ids: string[],
+): Promise<AgentFillOutcome> {
+  const t0 = Date.now();
+  const log: LogEntry[] = [];
+  let turn = 0;
+  let consecutiveDrifts = 0;
+  let sealed: { fills: Fill[]; approach: string } | null = null;
+  let first_sql_matches: number | undefined;
+  let first_pool_first: number | undefined;
+  let first_pool_last: number | undefined;
+  const playbook_citations = new Set<string>();
+
+  const append = (e: Omit<LogEntry, "at">): LogEntry => {
+    const full: LogEntry = { ...e, at: new Date().toISOString() };
+    log.push(full);
+    console.log(fmt(full));
+    return full;
+  };
+
+  // Build executor prompt with the scenario-specific guidance + exclusions
+  // injected as an extra block. Reuses the base prompt so drift detection
+  // and output-shape rules are unchanged.
+  const withExtras = (base: string): string => {
+    let addon = "";
+    if (extra_guidance) addon += `\n\nEVENT-SPECIFIC GUIDANCE:\n${extra_guidance}`;
+    if (exclude_worker_ids.length > 0) {
+      addon += `\n\nEXCLUDE these workers (already booked / unavailable today): ${exclude_worker_ids.join(", ")}\nIf your tool results include them, skip them — never propose them.`;
+    }
+    return base + addon;
+  };
+
+  while (turn < MAX_TURNS && !sealed) {
+    turn += 1;
+
+    const execRaw = await generate(
+      EXECUTOR_MODEL,
+      withExtras(executorPrompt(task, log)),
+      { temperature: 0.2, max_tokens: 600 },
+    );
+    let execAction: Action;
+    try {
+      execAction = parseAction(execRaw, "executor");
+    } catch (e) {
+      append({ turn, role: "executor", model: EXECUTOR_MODEL, kind: "error",
+        content: { message: (e as Error).message, raw: execRaw.slice(0, 300) } });
+      throw e;
+    }
+    append({ turn, role: "executor", model: EXECUTOR_MODEL,
+      kind: execAction.kind as any, content: execAction });
+
+    if (execAction.kind === "tool_call") {
+      try {
+        const result = await executeToolCall(execAction.tool, execAction.args);
+        // Filter tool results to enforce the exclusion list — defense in
+        // depth since the prompt alone isn't enough for weak models.
+        const filtered = maskExclusions(result, exclude_worker_ids);
+        // Capture the first hybrid_search pool stats for gap detection.
+        if (execAction.tool === "hybrid_search" && first_sql_matches === undefined) {
+          first_sql_matches = (filtered as any).sql_matches;
+          const sources = (filtered as any).sources ?? [];
+          if (sources.length > 0) {
+            first_pool_first = sources[0].score;
+            first_pool_last = sources[sources.length - 1].score;
+          }
+        }
+        const trimmed = trimResult(filtered);
+        append({ turn, role: "executor", model: EXECUTOR_MODEL,
+          kind: "tool_result", content: trimmed });
+
+        // Accumulate playbook citations from any hybrid result that
+        // carried them — the scenario-level report needs them.
+        if (Array.isArray((filtered as any).sources)) {
+          for (const s of (filtered as any).sources) {
+            for (const c of s.playbook_citations ?? []) {
+              playbook_citations.add(c);
+            }
+          }
+        }
+      } catch (e) {
+        append({ turn, role: "executor", model: EXECUTOR_MODEL, kind: "tool_result",
+          content: { error: (e as Error).message, tool: execAction.tool } });
+        consecutiveDrifts += 1;
+        if (consecutiveDrifts >= MAX_CONSECUTIVE_DRIFTS) {
+          throw new Error(`aborted — ${MAX_CONSECUTIVE_DRIFTS} consecutive tool errors`);
+        }
+      }
+    }
+
+    const revRaw = await generate(
+      REVIEWER_MODEL,
+      withExtras(reviewerPrompt(task, log)),
+      { temperature: 0.1, max_tokens: 400 },
+    );
+    let revAction: Action;
+    try {
+      revAction = parseAction(revRaw, "reviewer");
+    } catch (e) {
+      append({ turn, role: "reviewer", model: REVIEWER_MODEL, kind: "error",
+        content: { message: (e as Error).message, raw: revRaw.slice(0, 300) } });
+      throw e;
+    }
+    append({ turn, role: "reviewer", model: REVIEWER_MODEL,
+      kind: "critique", content: revAction });
+
+    if (revAction.kind !== "critique") throw new Error(`reviewer emitted non-critique: ${revAction.kind}`);
+
+    if (revAction.verdict === "drift") {
+      consecutiveDrifts += 1;
+      if (consecutiveDrifts >= MAX_CONSECUTIVE_DRIFTS) {
+        throw new Error(`aborted — ${MAX_CONSECUTIVE_DRIFTS} consecutive drift flags`);
+      }
+    } else {
+      consecutiveDrifts = 0;
+    }
+
+    if (execAction.kind === "propose_done" && revAction.verdict === "approve_done") {
+      if (execAction.fills.length !== task.target_count) {
+        throw new Error(`consensus malformed — ${execAction.fills.length} fills vs target ${task.target_count}`);
+      }
+      // Enforce exclusion at seal time too, in case the models ignored
+      // both prompt + tool-result filtering.
+      for (const f of execAction.fills) {
+        if (exclude_worker_ids.includes(f.candidate_id)) {
+          throw new Error(`consensus proposed excluded worker ${f.candidate_id}`);
+        }
+      }
+      append({ turn, role: "reviewer", model: REVIEWER_MODEL, kind: "consensus_done",
+        content: { fills: execAction.fills } });
+      sealed = { fills: execAction.fills, approach: execAction.rationale ?? "multi-agent hybrid" };
+    }
+  }
+
+  if (!sealed) throw new Error(`no consensus after ${MAX_TURNS} turns`);
+
+  return {
+    fills: sealed.fills,
+    approach: sealed.approach,
+    turns: turn,
+    duration_secs: (Date.now() - t0) / 1000,
+    log,
+    first_sql_matches,
+    first_pool_first_score: first_pool_first,
+    first_pool_last_score: first_pool_last,
+    playbook_citations: Array.from(playbook_citations),
+  };
+}
+
+function maskExclusions(result: any, exclude: string[]): any {
+  if (exclude.length === 0) return result;
+  if (Array.isArray(result.sources)) {
+    return { ...result, sources: result.sources.filter((s: any) => !exclude.includes(s.doc_id)) };
+  }
+  if (Array.isArray(result.rows)) {
+    return { ...result, rows: result.rows.filter((r: any) => {
+      const id = r.worker_id ?? r.doc_id;
+      return id === undefined || !exclude.includes(String(id));
+    }) };
+  }
+  return result;
+}
+
+function trimResult(r: any): any {
+  if (r && Array.isArray(r.sources)) {
+    return { ...r, sources: r.sources.slice(0, 20), _trimmed: r.sources.length > 20 ? `${r.sources.length - 20} more` : undefined };
+  }
+  if (r && Array.isArray(r.rows)) {
+    return { ...r, rows: r.rows.slice(0, 20), _trimmed: r.rows.length > 20 ? `${r.rows.length - 20} more` : undefined };
+  }
+  return r;
+}
+
+// =================== Per-event guidance strings ===================
+
+function guidanceFor(event: FillEvent, ctx: ScenarioContext): string {
+  switch (event.kind) {
+    case "baseline_fill":
+      return `Standard Monday fill. Client ${ctx.spec.client}. Shift starts ${event.shift_start ?? "at start time"}. Take the top candidates by semantic match and availability.`;
+    case "recurring":
+      return `RECURRING slot — ${ctx.spec.client} runs this shift every Tues/Thurs. If playbook_memory surfaces candidates endorsed by past similar fills (you'll see 'cites' on hybrid sources), those are the preferred workers. Shift starts ${event.shift_start ?? "at start time"}.`;
+    case "expansion":
+      return `EXPANSION at ${ctx.spec.client}. New location, ${event.count}-worker team needed at once — search broadly and prefer workers with team/collaboration signals (engagement, communications scores). Shift starts ${event.shift_start ?? "at start time"}.`;
+    case "emergency":
+      return `EMERGENCY walkoff — ${ctx.spec.client} needs ${event.count} ${event.role}s BY ${event.deadline ?? "end of day"}. Prioritize availability over perfect skill match. A good-enough worker who can report today beats a perfect worker who can't.`;
+    case "misplacement":
+      return `MISPLACEMENT refill. A worker from the 08:00 shift no-showed. You must replace them WITHOUT proposing the same worker or anyone already booked today (see EXCLUDE list). Shift is ${event.shift_start ?? "in progress"} so speed matters.`;
+  }
+}
+
+// =================== Artifact generation ===================
+
+interface ArtifactBundle {
+  sms: string;
+  email: string;
+}
+
+// One Ollama call per event for SMS (to the filled workers) + one for
+// the client email. Short outputs, low temperature — these are drafts,
+// not creative writing.
+async function generateArtifacts(event: FillEvent, outcome: AgentFillOutcome, ctx: ScenarioContext): Promise<ArtifactBundle> {
+  const smsPrompt = `Generate short, friendly, professional SMS messages to confirm a shift for each worker. ONE message per worker. Format as:
+
+TO: {Name}
+{message body under 180 chars}
+
+---
+
+Details:
+- Client: ${ctx.spec.client}
+- Role: ${event.role}
+- Location: ${event.city}, ${event.state}
+- Shift starts: ${event.shift_start ?? "TBD"}
+- Scenario: ${event.scenario_note ?? ""}
+
+Workers to message:
+${outcome.fills.map(f => `- ${f.name} (id ${f.candidate_id})`).join("\n")}
+
+Respond with only the message blocks, separated by "---". No commentary.`;
+
+  const emailPrompt = `Generate a short professional email confirmation to the staffing client.
+
+TO: staffing@${ctx.spec.client.toLowerCase().replace(/ /g, "")}.example
+FROM: dispatch@lakehouse.example
+SUBJECT: (3-word subject)
+
+Body (4-6 lines max). Be specific about:
+- Number of workers filled (${outcome.fills.length} of ${event.count})
+- Roles: ${event.role}
+- Names filled
+- Shift start: ${event.shift_start ?? "TBD"}
+- Any scenario flag: ${event.scenario_note ?? "(none)"}
+
+Workers:
+${outcome.fills.map(f => `- ${f.name} (${f.reason.slice(0, 60)})`).join("\n")}
+
+Respond with only the email. No commentary.`;
+
+  const [sms, email] = await Promise.all([
+    generate(DRAFT_MODEL, smsPrompt, { temperature: 0.3, max_tokens: 500 }),
+    generate(DRAFT_MODEL, emailPrompt, { temperature: 0.3, max_tokens: 400 }),
+  ]);
+
+  return { sms: sms.trim(), email: email.trim() };
+}
+
+// =================== Per-event runner ===================
+
+async function runEvent(event: FillEvent, ctx: ScenarioContext): Promise<EventResult> {
+  console.log(`\n════════ ${event.at} — ${event.kind.toUpperCase()}: fill ${event.count}× ${event.role} in ${event.city}, ${event.state} ════════`);
+
+  const t0 = Date.now();
+
+  // Build the task spec the agent loop expects.
+  const task: TaskSpec = {
+    id: `${ctx.spec.date}-${event.at.replace(":", "")}-${event.kind}`,
+    operation: `fill: ${event.role} x${event.count} in ${event.city}, ${event.state}`,
+    target_role: event.role,
+    target_count: event.count,
+    target_city: event.city,
+    target_state: event.state,
+    approach_hint: `hybrid search against ${WORKERS_INDEX} for ${event.kind}`,
+  };
+
+  // Exclusion set: everyone already in today's roster + any explicit
+  // exclusions from the event spec.
+  const excludeIds = [
+    ...ctx.roster
+      .filter(r => r.status === "confirmed")
+      .map(r => r.worker_id),
+    ...(event.exclude_worker_ids ?? []),
+  ];
+
+  const gap_signals: string[] = [];
+  let outcome: AgentFillOutcome;
+  try {
+    outcome = await runAgentFill(task, guidanceFor(event, ctx), excludeIds);
+  } catch (e) {
+    return {
+      event,
+      ok: false,
+      fills: [],
+      turns: 0,
+      duration_secs: (Date.now() - t0) / 1000,
+      error: (e as Error).message,
+      gap_signals: [`drift_or_tool: ${(e as Error).message}`],
+    };
+  }
+
+  // Resolve worker_ids via SQL so the roster has stable IDs (models
+  // sometimes return names-only). Best-effort — if name lookup finds
+  // zero or many matches, we flag a gap.
+  const resolved = await resolveWorkerIds(outcome.fills, event);
+
+  // Roster double-book check.
+  for (const r of resolved) {
+    const conflict = ctx.roster.find(e => e.worker_id === r.worker_id && e.status === "confirmed");
+    if (conflict) {
+      gap_signals.push(`double_book: ${r.worker_id} ${r.name} already booked for ${conflict.booked_for}`);
+    }
+    ctx.roster.push({
+      worker_id: r.worker_id,
+      name: r.name,
+      booked_for: event.at,
+      role: event.role,
+      city: event.city,
+      state: event.state,
+      status: "confirmed",
+    });
+  }
+
+  // Pool-size signal (Gap 1 — supply).
+  const supply_threshold = event.count * 3;
+  if ((outcome.first_sql_matches ?? 0) < supply_threshold) {
+    gap_signals.push(
+      `supply: only ${outcome.first_sql_matches} candidates for ${event.count}× ${event.role} in ${event.city} (< ${supply_threshold}, our 3× comfort margin)`
+    );
+  }
+
+  // Score-spread signal (Gap 2 — embedding).
+  const spread = (outcome.first_pool_first_score ?? 0) - (outcome.first_pool_last_score ?? 0);
+  if (spread > 0 && spread < 0.02) {
+    gap_signals.push(
+      `embedding: top-K score spread ${spread.toFixed(3)} < 0.02 — model struggles to differentiate`
+    );
+  }
+
+  // Generate artifacts (SMS + email) — fail-soft; artifacts are cosmetic
+  // relative to the consensus itself.
+  let bundle: ArtifactBundle | null = null;
+  try {
+    bundle = await generateArtifacts(event, { ...outcome, fills: resolved }, ctx);
+    await appendFile(join(ctx.out_dir, "sms.md"),
+      `\n## ${event.at} ${event.kind} — ${event.role} x${event.count} in ${event.city}, ${event.state}\n\n${bundle.sms}\n`);
+    await appendFile(join(ctx.out_dir, "emails.md"),
+      `\n## ${event.at} ${event.kind} — ${event.role} x${event.count}\n\n${bundle.email}\n`);
+  } catch (e) {
+    gap_signals.push(`artifact: ${(e as Error).message}`);
+  }
+
+  // Dispatch log (structured).
+  await appendFile(join(ctx.out_dir, "dispatch.jsonl"),
+    JSON.stringify({
+      at: event.at,
+      kind: event.kind,
+      operation: task.operation,
+      fills: resolved,
+      turns: outcome.turns,
+      duration_secs: outcome.duration_secs,
+      pool_size: outcome.first_sql_matches,
+      playbook_citations: outcome.playbook_citations,
+    }) + "\n");
+
+  // Always seed playbook_memory after a sealed fill — keep the learning
+  // loop tight across the whole day so recurring/misplacement events
+  // later in the run benefit from earlier ones.
+  try {
+    await httpJson(`${GATEWAY}/vectors/playbook_memory/seed`, {
+      operation: task.operation,
+      approach: outcome.approach || `${event.kind} → hybrid search`,
+      context: `client=${ctx.spec.client} scenario=${event.kind} shift=${event.shift_start ?? "tbd"}`,
+      endorsed_names: resolved.map(r => r.name),
+      append: true,
+    });
+  } catch (e) {
+    gap_signals.push(`write_through: ${(e as Error).message}`);
+  }
+
+  return {
+    event,
+    ok: true,
+    fills: outcome.fills,
+    turns: outcome.turns,
+    duration_secs: outcome.duration_secs,
+    gap_signals,
+    sources_first_score: outcome.first_pool_first_score,
+    sources_last_score: outcome.first_pool_last_score,
+    pool_size: outcome.first_sql_matches,
+    playbook_citations: outcome.playbook_citations,
+  };
+}
+
+// =================== Worker ID resolution ===================
+
+// Models emit candidate_ids or names in propose_done. Some return the
+// W500K-XXX doc_id, others just the name, others a random tag. Resolve
+// to canonical (worker_id, name) via SQL so the roster is reliable.
+async function resolveWorkerIds(fills: Fill[], event: FillEvent): Promise<Fill[]> {
+  const resolved: Fill[] = [];
+  for (const f of fills) {
+    // Case 1: candidate_id looks like W500K-NNN — accept as-is.
+    if (/^W500K-\d+$/.test(f.candidate_id)) {
+      resolved.push(f);
+      continue;
+    }
+    // Case 2: candidate_id is a bare integer — promote to W500K-N.
+    if (/^\d+$/.test(f.candidate_id)) {
+      resolved.push({ ...f, candidate_id: `W500K-${f.candidate_id}` });
+      continue;
+    }
+    // Case 3: look up by (name, city, state). Take the first match.
+    const q = `SELECT worker_id FROM ${WORKERS_DATASET} WHERE name = '${f.name.replace(/'/g, "''")}' AND city = '${event.city.replace(/'/g, "''")}' AND state = '${event.state.replace(/'/g, "''")}' LIMIT 1`;
+    try {
+      const r = await sqlQuery(q);
+      if (r.rows && r.rows.length > 0) {
+        resolved.push({ ...f, candidate_id: `W500K-${r.rows[0].worker_id}` });
+      } else {
+        // No match — keep the fill but leave candidate_id as-is; the
+        // gap report will flag it.
+        resolved.push(f);
+      }
+    } catch {
+      resolved.push(f);
+    }
+  }
+  return resolved;
+}
+
+// =================== EOD gap report ===================
+
+async function writeRetrospective(ctx: ScenarioContext): Promise<void> {
+  const lines: string[] = [];
+  lines.push(`# Scenario retrospective — ${ctx.spec.client}, ${ctx.spec.date}`);
+  lines.push("");
+  lines.push(`Executor: \`${EXECUTOR_MODEL}\`   Reviewer: \`${REVIEWER_MODEL}\`   Draft: \`${DRAFT_MODEL}\``);
+  lines.push("");
+
+  // --- Per-event summary ---
+  lines.push("## Events");
+  lines.push("");
+  lines.push("| At | Kind | Role / Count | Pool | Fills | Turns | Dur(s) | Cites | Gaps |");
+  lines.push("|---|---|---|---|---|---|---|---|---|");
+  for (const r of ctx.results) {
+    const status = r.ok ? "✓" : "✗";
+    lines.push(
+      `| ${r.event.at} | ${r.event.kind} | ${r.event.role} × ${r.event.count} | ${r.pool_size ?? "-"} | ${status} ${r.fills.length} | ${r.turns} | ${r.duration_secs.toFixed(1)} | ${r.playbook_citations?.length ?? 0} | ${r.gap_signals.length} |`
+    );
+  }
+  lines.push("");
+
+  // --- Roster ---
+  lines.push("## Final roster");
+  lines.push("");
+  lines.push("| Worker | Booked | Role | City, ST | Status |");
+  lines.push("|---|---|---|---|---|");
+  for (const e of ctx.roster) {
+    lines.push(`| ${e.worker_id} ${e.name} | ${e.booked_for} | ${e.role} | ${e.city}, ${e.state} | ${e.status} |`);
+  }
+  lines.push("");
+
+  // --- Gap analysis by category ---
+  const bycat: Record<string, string[]> = {};
+  for (const g of ctx.gap_signals) {
+    if (!bycat[g.category]) bycat[g.category] = [];
+    bycat[g.category].push(`**${g.event}** — ${g.detail}`);
+  }
+
+  // Add cross-event categories computed here:
+  // Gap 3 — fairness (Gini-lite on roster)
+  const bookedIds = ctx.roster.filter(r => r.status === "confirmed").map(r => r.worker_id);
+  const counts = new Map<string, number>();
+  for (const id of bookedIds) counts.set(id, (counts.get(id) ?? 0) + 1);
+  const multis = [...counts.entries()].filter(([_, n]) => n > 1);
+  if (multis.length > 0) {
+    bycat["fairness"] = bycat["fairness"] ?? [];
+    for (const [id, n] of multis) {
+      const name = ctx.roster.find(r => r.worker_id === id)?.name ?? id;
+      bycat["fairness"].push(`_cross-event_ — ${name} (${id}) booked ${n} times today`);
+    }
+  }
+
+  // Gap 5 — tool errors already captured per-event via gap_signals.
+
+  // Gap 6 — write-through coverage: compare # events vs # new playbook_memory entries.
+  try {
+    const stats = await httpJson<any>(`${GATEWAY}/vectors/playbook_memory/stats`);
+    bycat["write_through_audit"] = bycat["write_through_audit"] ?? [];
+    bycat["write_through_audit"].push(`_post-run_ — playbook_memory has ${stats.entries} entries (ran ${ctx.results.length} events, expected ≥ ${ctx.results.filter(r => r.ok).length} new entries from this run)`);
+  } catch { /* non-fatal */ }
+
+  lines.push("## Gap signals");
+  lines.push("");
+  if (Object.keys(bycat).length === 0) {
+    lines.push("_None surfaced — either everything worked or detection is under-tuned._");
+  } else {
+    for (const [cat, items] of Object.entries(bycat)) {
+      lines.push(`### ${cat}`);
+      for (const item of items) lines.push(`- ${item}`);
+      lines.push("");
+    }
+  }
+
+  // --- Narrative summary ---
+  lines.push("## Narrative");
+  lines.push("");
+  lines.push(`- ${ctx.results.filter(r => r.ok).length}/${ctx.results.length} events reached consensus.`);
+  lines.push(`- Final roster: ${ctx.roster.length} bookings across ${new Set(ctx.roster.map(r => r.worker_id)).size} distinct workers.`);
+  const totalCites = ctx.results.reduce((a, r) => a + (r.playbook_citations?.length ?? 0), 0);
+  lines.push(`- Playbook citations across the day: ${totalCites} (proof the feedback loop fired across events).`);
+  const droppedEvents = ctx.results.filter(r => !r.ok);
+  if (droppedEvents.length > 0) {
+    lines.push(`- Dropped events: ${droppedEvents.map(r => r.event.at + " " + r.event.kind).join(", ")}.`);
+  }
+
+  await writeFile(join(ctx.out_dir, "report.md"), lines.join("\n"));
+  console.log(`\n✓ report → ${join(ctx.out_dir, "report.md")}`);
+}
+
+// =================== Main driver ===================
+
+async function main() {
+  const specPath = process.argv[2];
+  const spec: ScenarioSpec = specPath
+    ? JSON.parse(await Bun.file(specPath).text())
+    : DEFAULT_SCENARIO;
+
+  const stamp = new Date().toISOString().replace(/[:.]/g, "-").slice(0, 19);
+  const out_dir = join("tests/multi-agent/playbooks", `scenario-${stamp}`);
+  await mkdir(out_dir, { recursive: true });
+
+  const ctx: ScenarioContext = {
+    spec,
+    out_dir,
+    roster: [],
+    results: [],
+    gap_signals: [],
+  };
+
+  // Initialize output files
+  await writeFile(join(out_dir, "sms.md"), `# SMS drafts — ${spec.client}, ${spec.date}\n`);
+  await writeFile(join(out_dir, "emails.md"), `# Client emails — ${spec.client}, ${spec.date}\n`);
+  await writeFile(join(out_dir, "dispatch.jsonl"), "");
+
+  console.log(`▶ scenario: ${spec.client}, ${spec.date}, ${spec.events.length} events`);
+  console.log(`▶ out: ${out_dir}\n`);
+
+  for (const event of spec.events) {
+    // Expand misplacement-style exclusions from the current roster: it
+    // wants to replace a worker from a prior event, so grab everyone
+    // booked at that at-label and add as exclusions.
+    if (event.kind === "misplacement" && event.replaces_event) {
+      const priorBooked = ctx.roster
+        .filter(r => r.booked_for === event.replaces_event && r.status === "confirmed")
+        .map(r => r.worker_id);
+      if (priorBooked.length > 0) {
+        // Pick one arbitrarily to mark as no_show — in a real system the
+        // external signal would pick. For the test, first one works.
+        const lost = priorBooked[0];
+        const lostEntry = ctx.roster.find(r => r.worker_id === lost);
+        if (lostEntry) {
+          lostEntry.status = "no_show";
+          console.log(`   (misplacement: marking ${lost} ${lostEntry.name} as no-show)`);
+        }
+        // Exclude all prior bookings so the refill doesn't pick anyone
+        // already scheduled for today.
+        event.exclude_worker_ids = priorBooked;
+      }
+    }
+
+    const result = await runEvent(event, ctx);
+    ctx.results.push(result);
+    for (const s of result.gap_signals) {
+      const [category, ...rest] = s.split(":");
+      ctx.gap_signals.push({ event: event.at, category: category.trim(), detail: rest.join(":").trim() });
+    }
+
+    // Small breather to not hammer Ollama on back-to-back runs.
+    await new Promise(r => setTimeout(r, 500));
+  }
+
+  // Persist structured state for forensics.
+  await writeFile(join(out_dir, "roster.json"), JSON.stringify(ctx.roster, null, 2));
+  await writeFile(join(out_dir, "results.json"), JSON.stringify(ctx.results, null, 2));
+
+  await writeRetrospective(ctx);
+
+  const okCount = ctx.results.filter(r => r.ok).length;
+  if (okCount < ctx.results.length) {
+    console.log(`\n⚠ ${okCount}/${ctx.results.length} events succeeded. See ${out_dir}/report.md for gaps.`);
+    process.exit(2);
+  }
+  console.log(`\n✓ ${okCount}/${ctx.results.length} events succeeded. See ${out_dir}/report.md.`);
+  process.exit(0);
+}
+
+main().catch(e => {
+  console.error(`\n✗ scenario driver crashed: ${(e as Error).message}`);
+  console.error((e as Error).stack);
+  process.exit(1);
+});