Phase 19 wiring + Path 1/2 work + chain integrity fixes

Backend: - crates/vectord/src/playbook_memory.rs (new): Phase 19 in-memory boost store with seed/rebuild/snapshot, plus temporal decay (e^-age/30 per playbook), persist_to_sql endpoint backing successful_playbooks_live, and discover_patterns endpoint for meta-index pattern aggregation (recurring certs/skills/archetype/reliability across similar past fills). - DEFAULT_TOP_K_PLAYBOOKS bumped 5 → 25; old default silently missed most boosts when memory had > 25 entries. - service.rs: new routes /vectors/playbook_memory/{seed,rebuild,stats, persist_sql,patterns}. Bun staffing co-pilot (mcp-server/): - /search, /match, /verify, /proof, /simulation/run, MCP tools all forward use_playbook_memory:true and playbook_memory_k:25 to the hybrid endpoint. Boost was previously dark across the entire app. - /log no longer POSTs to /ingest/file — that endpoint REPLACES the dataset's object list, so single-row CSV writes were wiping all prior rows in successful_playbooks (sp_rows went 33→1 in one /log call). /log now seeds playbook_memory with canonical short text and calls /persist_sql to keep successful_playbooks_live in sync. - /simulation/run cumulative end-of-week CSV write removed for the same reason. Per-day per-contract /seed (added in this session) is the accumulating feedback path now. - search.html addWorkerInsight renders a green "Endorsed · N playbooks" chip with playbook citations when boost > 0. Internal Dioxus UI (crates/ui/): - Dashboard phase list rewritten through Phase 19 (was stuck at "Phase 16: File Watcher" / "Phase 17: DB Connector" — both wrong). - Removed fabricated "27ms" stat label. - Ask tab examples + SQL default replaced with real staffing prompts against candidates/clients/job_orders (was referencing nonexistent employees/products/events). - New Playbook tab exposes /vectors/playbook_memory/{stats,rebuild} and side-by-side hybrid search (boost OFF vs ON) with citations. Tests (tests/multi-agent/): - run_e2e_rated.ts: parallel two-agent (mistral + qwen2.5) build phase + verifier rating (geo, auth, persist, boost, speed → /10). - network_proving.ts: continuous build → verify → repeat with staffing-recruiter profile hot-swap; geo-discrimination check. - chain_of_custody.ts: single recruiter operation traced through every layer (Bun /search, direct /vectors/hybrid parity, /log, SQL, playbook_memory growth, profile activation, post-op boost lift).
2026-04-20 06:21:13 -05:00 · 2026-04-20 06:21:13 -05:00 · 25b7e6c3a7
commit 25b7e6c3a7
parent 8e3cac5812
13 changed files with 4566 additions and 84 deletions
--- a/crates/ui/assets/style.css
+++ b/crates/ui/assets/style.css
@ -205,3 +205,13 @@ tr:hover td { background: var(--accent-glow); }
    padding: 8px 12px; border-bottom: 1px solid var(--border); font-size: 13px;
 }
 .table-item:hover { background: var(--accent-glow); }
 /* Phase 19 — Playbook panel */
 .boosted-row { background: rgba(120, 200, 120, 0.10); }
 .boosted-row td { border-top: 1px solid rgba(120, 200, 120, 0.30); }
 .mono-cell {
    font-family: ui-monospace, SFMono-Regular, Menlo, monospace;
    font-size: 11px; color: var(--text-dim);
    max-width: 220px; overflow: hidden; text-overflow: ellipsis; white-space: nowrap;
 }
 .panel-section + .panel-section { margin-top: 18px; }
--- a/crates/ui/src/main.rs
+++ b/crates/ui/src/main.rs
@ -178,9 +178,116 @@ enum Tab {
    Explore,
    Sql,
    Ingest,
    Playbook,
    Status,
 }
 // --- Playbook memory types (Phase 19) ---
 #[derive(Debug, Clone, Deserialize, PartialEq)]
 struct PlaybookStats {
    entries: usize,
    entries_with_embeddings: usize,
    #[serde(default)]
    total_names_endorsed: usize,
    #[serde(default)]
    sample: Vec<PlaybookSample>,
 }
 #[derive(Debug, Clone, Deserialize, PartialEq)]
 struct PlaybookSample {
    id: String,
    operation: String,
    #[serde(default)]
    city: Option<String>,
    #[serde(default)]
    state: Option<String>,
    #[serde(default)]
    endorsed: Vec<String>,
 }
 #[derive(Debug, Clone, Deserialize, PartialEq)]
 struct HybridResp {
    #[serde(default)]
    sql_matches: usize,
    #[serde(default)]
    vector_reranked: usize,
    #[serde(default)]
    method: String,
    #[serde(default)]
    duration_ms: u64,
    #[serde(default)]
    answer: Option<String>,
    #[serde(default)]
    sources: Vec<HybridSource>,
 }
 #[derive(Debug, Clone, Deserialize, PartialEq)]
 struct HybridSource {
    doc_id: String,
    chunk_text: String,
    score: f32,
    #[serde(default)]
    sql_verified: bool,
    #[serde(default)]
    playbook_boost: f32,
    #[serde(default)]
    playbook_citations: Vec<String>,
 }
 #[derive(Debug, Clone, Deserialize, PartialEq)]
 struct IndexInfo {
    index_name: String,
    source: String,
    #[serde(default)]
    chunk_count: usize,
    #[serde(default)]
    vector_backend: String,
 }
 async fn fetch_playbook_stats() -> Result<PlaybookStats, String> {
    let resp = reqwest::get(&format!("{}/vectors/playbook_memory/stats", api_base()))
        .await.map_err(|e| e.to_string())?;
    if !resp.status().is_success() {
        return Err(format!("HTTP {}: {}", resp.status(), resp.text().await.unwrap_or_default()));
    }
    resp.json().await.map_err(|e| e.to_string())
 }
 async fn rebuild_playbook_memory() -> Result<serde_json::Value, String> {
    let client = reqwest::Client::new();
    let resp = client.post(&format!("{}/vectors/playbook_memory/rebuild", api_base()))
        .json(&serde_json::json!({}))
        .send().await.map_err(|e| e.to_string())?;
    if !resp.status().is_success() {
        return Err(format!("HTTP {}: {}", resp.status(), resp.text().await.unwrap_or_default()));
    }
    resp.json().await.map_err(|e| e.to_string())
 }
 async fn fetch_indexes() -> Result<Vec<IndexInfo>, String> {
    let resp = reqwest::get(&format!("{}/vectors/indexes", api_base()))
        .await.map_err(|e| e.to_string())?;
    resp.json().await.map_err(|e| e.to_string())
 }
 async fn hybrid_search(index_name: &str, question: &str, use_playbook: bool, top_k: usize) -> Result<HybridResp, String> {
    let client = reqwest::Client::new();
    let resp = client.post(&format!("{}/vectors/hybrid", api_base()))
        .json(&serde_json::json!({
            "index_name": index_name,
            "question": question,
            "top_k": top_k,
            "generate": false,
            "use_playbook_memory": use_playbook,
        }))
        .send().await.map_err(|e| e.to_string())?;
    if !resp.status().is_success() {
        return Err(format!("HTTP {}: {}", resp.status(), resp.text().await.unwrap_or_default()));
    }
    resp.json().await.map_err(|e| e.to_string())
 }
 // --- App ---
 #[component]
@ -239,6 +346,11 @@ fn App() -> Element {
                        onclick: move |_| active_tab.set(Tab::Ingest),
                        "Ingest"
                    }
                    button {
                        class: if *active_tab.read() == Tab::Playbook { "tab active" } else { "tab" },
                        onclick: move |_| active_tab.set(Tab::Playbook),
                        "Playbook"
                    }
                    button {
                        class: if *active_tab.read() == Tab::Status { "tab active" } else { "tab" },
                        onclick: move |_| active_tab.set(Tab::Status),
@ -260,6 +372,7 @@ fn App() -> Element {
                    Tab::Explore => rsx! { ExplorePanel { datasets: datasets.read().clone() } },
                    Tab::Sql => rsx! { SqlPanel {} },
                    Tab::Ingest => rsx! { IngestPanel {} },
                    Tab::Playbook => rsx! { PlaybookPanel {} },
                    Tab::Status => rsx! { StatusPanel {} },
                }
            }
@ -354,14 +467,14 @@ fn AskPanel(datasets: Vec<Dataset>) -> Element {
        div { class: "panel ask-panel",
            div { class: "ask-hero",
                h2 { "Ask your data anything" }
-                p { class: "subtitle", "Natural language → SQL → Results. Powered by local AI." }
+                p { class: "subtitle", "Natural language → SQL → Results. Powered by local AI over the staffing dataset." }
            }
            div { class: "ask-input-row",
                input {
                    class: "ask-input",
                    value: "{question}",
-                    placeholder: "e.g. Which department has the highest average salary?",
+                    placeholder: "e.g. Which clients placed the most candidates last quarter?",
                    oninput: move |e| question.set(e.value()),
                    onkeydown: move |e| {
                        if e.key() == Key::Enter {
@ -432,10 +545,12 @@ fn AskPanel(datasets: Vec<Dataset>) -> Element {
            div { class: "ask-examples",
                "Try: "
-                button { class: "example-btn", onclick: move |_| question.set("Which department has the highest average salary?".into()), "highest avg salary by dept" }
+                button { class: "example-btn", onclick: move |_| question.set("How many candidates do we have by city?".into()), "candidates by city" }
-                button { class: "example-btn", onclick: move |_| question.set("Show me the top 3 most expensive products".into()), "top 3 expensive products" }
+                button { class: "example-btn", onclick: move |_| question.set("Top 10 clients by total placements".into()), "top clients by placements" }
-                button { class: "example-btn", onclick: move |_| question.set("How many events per action type?".into()), "events by action" }
+                button { class: "example-btn", onclick: move |_| question.set("Open job orders ordered by bill rate descending".into()), "open jobs by rate" }
-                button { class: "example-btn", onclick: move |_| question.set("List all employees who earn more than 90000".into()), "employees > 90k" }
+                button { class: "example-btn", onclick: move |_| question.set("Recruiters with the highest placement count".into()), "top recruiters" }
                button { class: "example-btn", onclick: move |_| question.set("Total billed hours per client last month".into()), "hours per client" }
                button { class: "example-btn", onclick: move |_| question.set("Cold leads: candidates we called more than 5 times but never placed".into()), "cold leads" }
            }
            if let Some(sql) = generated_sql.read().as_ref() {
@ -578,7 +693,7 @@ fn ExplorePanel(datasets: Vec<Dataset>) -> Element {
 #[component]
 fn SqlPanel() -> Element {
-    let mut query_text = use_signal(|| String::from("SELECT * FROM employees LIMIT 10"));
+    let mut query_text = use_signal(|| String::from("SELECT candidate_id, first_name, last_name, city, status FROM candidates LIMIT 10"));
    let mut result = use_signal(|| None::<Result<QueryResponse, String>>);
    let mut loading = use_signal(|| false);
@ -727,7 +842,7 @@ fn DashboardPanel() -> Element {
                    }
                    div { class: "stat-card accent",
                        div { class: "stat-value", "{s[\"hnsw_loaded\"]}" }
-                        div { class: "stat-label", "HNSW Indexes (27ms)" }
+                        div { class: "stat-label", "HNSW Indexes Loaded" }
                    }
                    div { class: "stat-card",
                        div { class: "stat-value", "{s[\"tools\"]}" }
@ -750,27 +865,27 @@ fn DashboardPanel() -> Element {
                    div { class: "arch-grid",
                        div { class: "arch-card",
                            div { class: "arch-title", "Ingest" }
-                            div { class: "arch-items", "CSV, JSON, PDF, Text, PostgreSQL, File Watcher" }
+                            div { class: "arch-items", "CSV · JSON · PDF (+OCR) · Text · Postgres · MySQL · Inbox watcher · Cron schedules" }
                        }
                        div { class: "arch-card",
                            div { class: "arch-title", "Storage" }
-                            div { class: "arch-items", "Parquet on Object Storage, Delta Writes, Compaction" }
+                            div { class: "arch-items", "Parquet on Object Storage · Delta writes · Compaction · Tombstones · Multi-bucket federation + rescue" }
                        }
                        div { class: "arch-card",
                            div { class: "arch-title", "Query" }
-                            div { class: "arch-items", "DataFusion SQL, MemCache (9.8x), Hot/Cold" }
+                            div { class: "arch-items", "DataFusion SQL · MemCache (9.8× hot) · Merge-on-read · AI-safe views" }
                        }
                        div { class: "arch-card",
-                            div { class: "arch-title", "AI" }
+                            div { class: "arch-title", "AI / Vector" }
-                            div { class: "arch-items", "Ollama (local), Embed, Generate, RAG, HNSW" }
+                            div { class: "arch-items", "Ollama (local) · Embed/Generate/RAG · HNSW (Parquet) · Lance IVF_PQ · Hybrid SQL+vector · Profile-scoped" }
                        }
                        div { class: "arch-card",
                            div { class: "arch-title", "Learning loop" }
                            div { class: "arch-items", "Playbook memory · Endorsement boost · Multi-agent orchestrator · Autotune agent (Pareto-promote)" }
                        }
                        div { class: "arch-card",
                            div { class: "arch-title", "Governance" }
-                            div { class: "arch-items", "Event Journal, PII Detection, Tool Registry, Access Control" }
+                            div { class: "arch-items", "Event journal · PII detection · Tool registry · Access control · Audit log · Catalog v2 metadata" }
                        }
                        div { class: "arch-card",
                            div { class: "arch-title", "Agents" }
                            div { class: "arch-items", "Workspaces, Handoff, Shortlists, Activity Logs" }
                        }
                    }
                }
@ -779,20 +894,23 @@ fn DashboardPanel() -> Element {
                    h3 { "Build Progression" }
                    div { class: "phase-list",
                        {rsx! {
-                            PhaseItem { num: "0-5", name: "Foundation", detail: "Storage, Catalog, DataFusion, AI, UI, gRPC" }
+                            PhaseItem { num: "0-5", name: "Foundation", detail: "Storage · Catalog · DataFusion · Ollama · UI · gRPC" }
-                            PhaseItem { num: "6", name: "Ingest Pipeline", detail: "CSV/JSON/PDF/Text auto-schema" }
+                            PhaseItem { num: "6", name: "Ingest Pipeline", detail: "CSV · JSON · PDF · Text · auto-schema · dedupe" }
-                            PhaseItem { num: "7", name: "Vector + RAG", detail: "Embed, Search, LLM Answers" }
+                            PhaseItem { num: "7", name: "Vector + RAG", detail: "Embed · brute-force cosine · LLM grounded answers" }
-                            PhaseItem { num: "8", name: "Hot Cache", detail: "9.8x speedup, Delta Writes" }
+                            PhaseItem { num: "8", name: "Hot Cache + Deltas", detail: "MemTable LRU · 9.8× speedup · merge-on-read · compaction" }
-                            PhaseItem { num: "8.5", name: "Agent Workspaces", detail: "Per-contract, Instant Handoff" }
+                            PhaseItem { num: "8.5", name: "Agent Workspaces", detail: "Per-contract · daily/weekly/monthly tiers · zero-copy handoff" }
-                            PhaseItem { num: "9", name: "Event Journal", detail: "Append-only Mutation History" }
+                            PhaseItem { num: "9", name: "Event Journal", detail: "Append-only mutation log · time-travel · audit" }
-                            PhaseItem { num: "10", name: "Rich Catalog", detail: "PII Detection, Lineage" }
+                            PhaseItem { num: "10", name: "Rich Catalog v2", detail: "PII auto-detection · lineage · freshness SLA · sensitivity" }
-                            PhaseItem { num: "11", name: "Embedding Versioning", detail: "Model-proof Vectors" }
+                            PhaseItem { num: "11", name: "Embedding Versioning", detail: "Per-index model+version · A/B · incremental re-embed" }
-                            PhaseItem { num: "12", name: "Tool Registry", detail: "6 Governed Actions + Audit" }
+                            PhaseItem { num: "12", name: "Tool Registry", detail: "Governed actions · param validation · audit · MCP-ready" }
-                            PhaseItem { num: "13", name: "Access Control", detail: "Role-based, Field-level" }
+                            PhaseItem { num: "13", name: "Access Control", detail: "Roles · field-level sensitivity · column masking · query audit" }
-                            PhaseItem { num: "14", name: "Schema Evolution", detail: "Diff Detection, AI Migration" }
+                            PhaseItem { num: "14", name: "Schema Evolution", detail: "Diff detection · AI migration prompts · versioned schemas" }
-                            PhaseItem { num: "15", name: "HNSW Index", detail: "100K Search in 27ms" }
+                            PhaseItem { num: "15", name: "HNSW + Trials", detail: "100K vectors · p50 873µs · trial journal · eval harness" }
-                            PhaseItem { num: "16", name: "File Watcher", detail: "Auto-ingest from Inbox" }
+                            PhaseItem { num: "16", name: "Hot-swap + Autotune", detail: "Promotion registry · rollback · ε-greedy agent · Pareto winner" }
-                            PhaseItem { num: "17", name: "DB Connector", detail: "PostgreSQL Import" }
+                            PhaseItem { num: "17", name: "Model Profiles + VRAM", detail: "ModelProfile manifests · scoped search · sequential model swap" }
                            PhaseItem { num: "18", name: "Lance hybrid backend", detail: "IVF_PQ build 14× faster · random fetch 112× · S3-native · per-profile routing" }
                            PhaseItem { num: "19", name: "Playbook memory", detail: "Feedback loop · endorsement boost (cap 0.25) · orchestrator write-through · citations" }
                            PhaseItem { num: "+", name: "Federation + Schedules", detail: "Multi-bucket · rescue fallback · error journal · MySQL · PDF OCR · cron ingest · catalog dedupe" }
                        }}
                    }
                }
@ -816,6 +934,285 @@ fn PhaseItem(num: String, name: String, detail: String) -> Element {
    }
 }
 // === PLAYBOOK — Phase 19 meta-index feedback loop ===
 #[component]
 fn PlaybookPanel() -> Element {
    let mut stats = use_signal(|| None::<Result<PlaybookStats, String>>);
    let mut indexes = use_signal(Vec::<IndexInfo>::new);
    let mut rebuild_status = use_signal(|| None::<Result<String, String>>);
    let mut rebuilding = use_signal(|| false);
    let mut loaded = use_signal(|| false);
    // Comparison state
    let mut selected_index = use_signal(|| String::new());
    let mut question = use_signal(|| String::from("reliable assembler in Detroit"));
    let mut top_k = use_signal(|| 10usize);
    let mut compare_loading = use_signal(|| false);
    let mut hits_off = use_signal(|| None::<Result<HybridResp, String>>);
    let mut hits_on = use_signal(|| None::<Result<HybridResp, String>>);
    let load_all = move || {
        spawn(async move {
            stats.set(Some(fetch_playbook_stats().await));
            if let Ok(ix) = fetch_indexes().await {
                if selected_index.read().is_empty() {
                    if let Some(default) = ix.iter().find(|i| i.source == "workers_500k").or_else(|| ix.first()) {
                        selected_index.set(default.index_name.clone());
                    }
                }
                indexes.set(ix);
            }
        });
    };
    use_effect(move || {
        if !*loaded.read() {
            loaded.set(true);
            load_all();
        }
    });
    let do_rebuild = move |_| {
        spawn(async move {
            rebuilding.set(true);
            rebuild_status.set(None);
            match rebuild_playbook_memory().await {
                Ok(v) => rebuild_status.set(Some(Ok(format!("rebuild ok — {}", v)))),
                Err(e) => rebuild_status.set(Some(Err(e))),
            }
            // Refresh stats afterward
            stats.set(Some(fetch_playbook_stats().await));
            rebuilding.set(false);
        });
    };
    let do_compare = move |_| {
        let idx = selected_index.read().clone();
        let q = question.read().clone();
        let k = *top_k.read();
        if idx.is_empty() || q.trim().is_empty() { return; }
        spawn(async move {
            compare_loading.set(true);
            hits_off.set(None);
            hits_on.set(None);
            // Run both sequentially so the embedding cache is shared
            hits_off.set(Some(hybrid_search(&idx, &q, false, k).await));
            hits_on.set(Some(hybrid_search(&idx, &q, true, k).await));
            compare_loading.set(false);
        });
    };
    rsx! {
        div { class: "panel",
            div { class: "ask-hero",
                h2 { "Playbook Memory" }
                p { class: "subtitle",
                    "Phase 19 feedback loop: past successful playbooks boost future search rankings. \
                     Endorsed workers from semantically similar past operations re-rank toward the top, \
                     with citations back to the playbook that endorsed them."
                }
            }
            // Stats card
            div { class: "panel-section",
                match stats.read().as_ref() {
                    None => rsx! { div { class: "loading", "loading playbook stats..." } },
                    Some(Err(e)) => rsx! { div { class: "error", "stats: {e}" } },
                    Some(Ok(s)) => rsx! {
                        div { class: "stat-grid",
                            div { class: "stat-card",
                                div { class: "stat-value", "{s.entries}" }
                                div { class: "stat-label", "Playbooks in Memory" }
                            }
                            div { class: "stat-card",
                                div { class: "stat-value", "{s.entries_with_embeddings}" }
                                div { class: "stat-label", "Embedded" }
                            }
                            div { class: "stat-card accent",
                                div { class: "stat-value", "{s.total_names_endorsed}" }
                                div { class: "stat-label", "Endorsed Worker-Tags" }
                            }
                        }
                    }
                }
                div { class: "sql-actions",
                    button {
                        class: "btn",
                        disabled: *rebuilding.read(),
                        onclick: do_rebuild,
                        if *rebuilding.read() { "rebuilding from successful_playbooks..." } else { "Rebuild from successful_playbooks" }
                    }
                }
                if let Some(s) = rebuild_status.read().as_ref() {
                    match s {
                        Ok(msg) => rsx! { div { class: "result-box", "{msg}" } },
                        Err(e) => rsx! { div { class: "error", "{e}" } },
                    }
                }
            }
            // Sample playbooks
            if let Some(Ok(s)) = stats.read().as_ref() {
                if !s.sample.is_empty() {
                    div { class: "panel-section",
                        h3 { "Sample playbooks" }
                        div { class: "table-wrap",
                            table {
                                thead { tr {
                                    th { "ID" }
                                    th { "Operation" }
                                    th { "Location" }
                                    th { "Endorsed" }
                                } }
                                tbody {
                                    for pb in s.sample.iter() {
                                        {
                                            let loc = match (&pb.city, &pb.state) {
                                                (Some(c), Some(st)) => format!("{c}, {st}"),
                                                _ => "—".into(),
                                            };
                                            let endorsed = if pb.endorsed.is_empty() {
                                                "—".to_string()
                                            } else {
                                                pb.endorsed.join(", ")
                                            };
                                            let pid = pb.id.clone();
                                            let op = pb.operation.clone();
                                            rsx! {
                                                tr {
                                                    td { class: "mono-cell", title: "{pid}", "{pid}" }
                                                    td { "{op}" }
                                                    td { "{loc}" }
                                                    td { "{endorsed}" }
                                                }
                                            }
                                        }
                                    }
                                }
                            }
                        }
                    }
                }
            }
            // Side-by-side comparison: boost OFF vs ON
            div { class: "panel-section",
                h3 { "See the boost — search compared" }
                p { class: "hint",
                    "Run the same query against the same index twice — once with playbook boost OFF and once ON. \
                     Hits with non-zero playbook_boost and citations are workers that past similar playbooks endorsed."
                }
                div { class: "form-row",
                    label { "Index" }
                    select {
                        value: "{selected_index}",
                        onchange: move |e| selected_index.set(e.value()),
                        for ix in indexes.read().iter() {
                            option { value: "{ix.index_name}", "{ix.index_name} ({ix.source}, {ix.chunk_count} chunks, {ix.vector_backend})" }
                        }
                    }
                }
                div { class: "form-row",
                    label { "Question" }
                    input {
                        value: "{question}",
                        oninput: move |e| question.set(e.value()),
                        placeholder: "e.g. reliable assembler in Detroit"
                    }
                }
                div { class: "form-row",
                    label { "Top K" }
                    input {
                        r#type: "number",
                        value: "{top_k}",
                        oninput: move |e| {
                            if let Ok(n) = e.value().parse::<usize>() { top_k.set(n.clamp(1, 50)); }
                        }
                    }
                }
                button {
                    class: "btn btn-ask",
                    disabled: *compare_loading.read(),
                    onclick: do_compare,
                    if *compare_loading.read() { "running both queries..." } else { "Run comparison" }
                }
                div { class: "explore-grid",
                    div { class: "ds-detail",
                        h3 { "Boost OFF (vanilla)" }
                        match hits_off.read().as_ref() {
                            None => rsx! { div { class: "empty", "—" } },
                            Some(Err(e)) => rsx! { div { class: "error", "{e}" } },
                            Some(Ok(r)) => rsx! { HybridHitTable { resp: r.clone() } },
                        }
                    }
                    div { class: "ds-detail",
                        h3 { "Boost ON (Phase 19)" }
                        match hits_on.read().as_ref() {
                            None => rsx! { div { class: "empty", "—" } },
                            Some(Err(e)) => rsx! { div { class: "error", "{e}" } },
                            Some(Ok(r)) => rsx! { HybridHitTable { resp: r.clone() } },
                        }
                    }
                }
            }
        }
    }
 }
 #[component]
 fn HybridHitTable(resp: HybridResp) -> Element {
    rsx! {
        div { class: "results-info",
            "{resp.sources.len()} hits · {resp.duration_ms}ms · method={resp.method}"
        }
        if resp.sources.is_empty() {
            div { class: "empty-sm", "no hits" }
        } else {
            div { class: "table-wrap",
                table {
                    thead { tr {
                        th { "#" }
                        th { "Doc" }
                        th { "Score" }
                        th { "Boost" }
                        th { "Citations" }
                        th { "Snippet" }
                    } }
                    tbody {
                        for (i, h) in resp.sources.iter().enumerate() {
                            {
                                let snippet: String = h.chunk_text.chars().take(120).collect();
                                let cites = if h.playbook_citations.is_empty() {
                                    "—".to_string()
                                } else {
                                    h.playbook_citations.join(", ")
                                };
                                let row_class = if h.playbook_boost > 0.0 { "boosted-row" } else { "" };
                                let rank = i + 1;
                                let did = h.doc_id.clone();
                                let score = format!("{:.3}", h.score);
                                let boost = if h.playbook_boost > 0.0 { format!("+{:.3}", h.playbook_boost) } else { "—".into() };
                                rsx! {
                                    tr { class: "{row_class}",
                                        td { "{rank}" }
                                        td { class: "mono-cell", "{did}" }
                                        td { "{score}" }
                                        td { "{boost}" }
                                        td { class: "mono-cell", title: "{cites}", "{cites}" }
                                        td { "{snippet}" }
                                    }
                                }
                            }
                        }
                    }
                }
            }
        }
    }
 }
 // === INGEST — Data on-ramp ===
 #[component]
--- a/crates/vectord/src/lib.rs
+++ b/crates/vectord/src/lib.rs
@ -7,6 +7,7 @@ pub mod harness;
 pub mod hnsw;
 pub mod index_registry;
 pub mod jobs;
 pub mod playbook_memory;
 pub mod promotion;
 pub mod refresh;
 pub mod store;
--- a/crates/vectord/src/playbook_memory.rs
+++ b/crates/vectord/src/playbook_memory.rs
@ -0,0 +1,825 @@
 //! Phase 19: Playbook memory — the feedback loop that makes the index
 //! learn from real outcomes instead of just logging them.
 //!
 //! When an agent (multi-agent orchestrator or human operator) seals a
 //! successful playbook, it lands in the `successful_playbooks` dataset.
 //! Historically that was a write-only log. This module turns it into a
 //! re-ranking signal:
 //!
 //!   1. `rebuild` reads every row of `successful_playbooks`, embeds the
 //!      operation+approach+context as one vector per playbook, parses
 //!      out the worker names from the `result` column, and stores both
 //!      the vectors and the (playbook → names) endorsement map in memory.
 //!
 //!   2. At query time, `compute_boost_for` takes a new operation text
 //!      (e.g. "fill: Welder x2 in Toledo, OH"), embeds it, brute-force
 //!      ranks past playbooks by cosine similarity, and returns a boost
 //!      map keyed by (city, state, worker_name) → `BoostEntry`. Each
 //!      entry carries its similarity score and the citing playbook_ids,
 //!      so explanations ("ranked higher because of 3 similar past fills
 //!      in Toledo") are free.
 //!
 //!   3. The `use_playbook_memory` flag on `/vectors/hybrid` adds those
 //!      boosts to matching search hits and re-sorts.
 //!
 //! Why brute force instead of another HNSW: `successful_playbooks` grows
 //! by operators, not automation. A few thousand rows is the realistic
 //! ceiling for years. Brute force at 10K × 768d is <10ms on this hardware
 //! — not worth the operational cost of another indexed surface.
 //!
 //! Persistence: the endorsements map round-trips through
 //! `_playbook_memory/state.json` in primary storage so the cache
 //! survives restarts without a full rebuild.
 use std::collections::HashMap;
 use std::sync::Arc;
 use serde::{Deserialize, Serialize};
 use tokio::sync::RwLock;
 use aibridge::client::{AiClient, EmbedRequest};
 use object_store::ObjectStore;
 use storaged::ops;
 const STATE_KEY: &str = "_playbook_memory/state.json";
 /// Maximum boost a single worker can accumulate across all similar past
 /// playbooks. Prevents one very popular worker from always winning.
 pub const MAX_BOOST_PER_WORKER: f32 = 0.25;
 /// Default number of past playbooks to consider when ranking the current
 /// operation. Bumped 5 → 25 on 2026-04-20 because at >100 entries in
 /// memory the old default missed too many relevant playbooks — boost
 /// silently failed even when the seeded workers were ideal matches.
 /// 25 is brute-force-cheap (sub-ms) and covers most live operator memory.
 pub const DEFAULT_TOP_K_PLAYBOOKS: usize = 25;
 /// Half-life of a playbook's contribution to boost, in days. A playbook
 /// 30 days old contributes half what a fresh one would; 60 days old, a
 /// quarter; etc. Per Path 1 (deepen statistical) — stale endorsements
 /// shouldn't dominate fresh signal. Recruiter trust depends on this.
 pub const BOOST_HALF_LIFE_DAYS: f32 = 30.0;
 /// Shape of one playbook in memory. The embedding is optional so we can
 /// round-trip a cached state without re-embedding; the rebuild path
 /// populates it.
 #[derive(Debug, Clone, Serialize, Deserialize)]
 pub struct PlaybookEntry {
    pub playbook_id: String,
    pub operation: String,
    pub approach: String,
    pub context: String,
    pub timestamp: String,
    /// Parsed out of `result` (e.g. "2/2 filled → Matthew Roberts, Amy Davis").
    /// Stored as raw names; matching against search results happens on
    /// (city, state, name) tuples at boost time.
    pub endorsed_names: Vec<String>,
    /// City + state parsed out of the operation string. Kept separately
    /// so boost matching doesn't re-parse on every query.
    pub city: Option<String>,
    pub state: Option<String>,
    /// Embedding of `operation + approach + context`. Option so persisted
    /// state can omit it on first load and have a later embed() fill in.
    #[serde(default)]
    pub embedding: Option<Vec<f32>>,
 }
 /// Persisted / in-memory state.
 #[derive(Debug, Clone, Serialize, Deserialize, Default)]
 struct PlaybookMemoryState {
    entries: Vec<PlaybookEntry>,
    /// Unix epoch millis when the last rebuild completed. Caller can
    /// use this to gate "stale > N hours → trigger rebuild" behavior.
    last_rebuilt_at: i64,
 }
 /// Per-worker boost payload. `citations` lets the response layer show
 /// "boosted because of these past fills" without a second lookup.
 #[derive(Debug, Clone, Serialize)]
 pub struct BoostEntry {
    pub boost: f32,
    pub citations: Vec<String>, // playbook_ids that endorsed this worker
 }
 /// Live handle passed around the service. Clone-cheap (all state is
 /// inside one Arc<RwLock>).
 #[derive(Clone)]
 pub struct PlaybookMemory {
    state: Arc<RwLock<PlaybookMemoryState>>,
    store: Arc<dyn ObjectStore>,
 }
 impl PlaybookMemory {
    pub fn new(store: Arc<dyn ObjectStore>) -> Self {
        Self {
            state: Arc::new(RwLock::new(PlaybookMemoryState::default())),
            store,
        }
    }
    /// Best-effort load from primary storage. Missing = empty memory; the
    /// first `/rebuild` call will hydrate it.
    pub async fn load_from_storage(&self) -> Result<usize, String> {
        let data = match ops::get(&self.store, STATE_KEY).await {
            Ok(d) => d,
            Err(_) => return Ok(0),
        };
        let persisted: PlaybookMemoryState = serde_json::from_slice(&data)
            .map_err(|e| format!("parse playbook_memory state: {e}"))?;
        let n = persisted.entries.len();
        *self.state.write().await = persisted;
        tracing::info!("playbook_memory: loaded {n} entries from {STATE_KEY}");
        Ok(n)
    }
    async fn persist(&self) -> Result<(), String> {
        let snapshot = self.state.read().await.clone();
        let bytes = serde_json::to_vec_pretty(&snapshot).map_err(|e| e.to_string())?;
        ops::put(&self.store, STATE_KEY, bytes.into()).await
    }
    /// Replace the full in-memory state atomically and persist.
    pub async fn set_entries(&self, entries: Vec<PlaybookEntry>) -> Result<(), String> {
        let mut s = self.state.write().await;
        s.entries = entries;
        s.last_rebuilt_at = chrono::Utc::now().timestamp_millis();
        drop(s);
        self.persist().await
    }
    pub async fn entry_count(&self) -> usize {
        self.state.read().await.entries.len()
    }
    pub async fn snapshot(&self) -> Vec<PlaybookEntry> {
        self.state.read().await.entries.clone()
    }
    /// Given an operation's embedding, find the top-K most similar past
    /// playbooks (by cosine similarity) and return a per-worker boost map
    /// keyed by (city, state, name). Worker is matched by the tuple so a
    /// shared name across cities doesn't cross-pollinate.
    ///
    /// Boost formula: each qualifying playbook contributes
    /// `similarity * base_weight / n_workers` to each worker it endorsed,
    /// where `base_weight` is tuned to keep the cap realistic without
    /// forcing every result to saturate. Total per worker is capped at
    /// `MAX_BOOST_PER_WORKER`.
    pub async fn compute_boost_for(
        &self,
        query_embedding: &[f32],
        top_k_playbooks: usize,
        base_weight: f32,
    ) -> HashMap<(String, String, String), BoostEntry> {
        let entries = self.state.read().await.entries.clone();
        // Brute-force cosine. Empty / missing embeddings just skip.
        let mut scored: Vec<(f32, &PlaybookEntry)> = entries
            .iter()
            .filter_map(|e| e.embedding.as_ref().map(|v| (cosine(query_embedding, v), e)))
            .collect();
        scored.sort_by(|a, b| b.0.partial_cmp(&a.0).unwrap_or(std::cmp::Ordering::Equal));
        scored.truncate(top_k_playbooks.max(1));
        let now = chrono::Utc::now();
        let mut boosts: HashMap<(String, String, String), BoostEntry> = HashMap::new();
        for (similarity, pb) in &scored {
            // Negative or near-zero similarity = not actually related;
            // skip so we don't inject noise when the memory is sparse.
            if *similarity <= 0.05 { continue; }
            let Some(city) = &pb.city else { continue; };
            let Some(state) = &pb.state else { continue; };
            let n_workers = pb.endorsed_names.len().max(1);
            // Path 1 — temporal decay. Older playbooks weight less. Failure
            // to parse the timestamp degrades to "no decay" (treat as fresh)
            // rather than dropping the entry entirely; keeps backward
            // compatibility with seed payloads that omitted timestamp.
            let decay = chrono::DateTime::parse_from_rfc3339(&pb.timestamp)
                .ok()
                .map(|t| {
                    let age_days = (now.signed_duration_since(t.with_timezone(&chrono::Utc))
                        .num_seconds() as f32) / 86400.0;
                    if age_days <= 0.0 { 1.0 }
                    else { (-age_days / BOOST_HALF_LIFE_DAYS).exp() }
                })
                .unwrap_or(1.0);
            let per_worker = similarity * base_weight * decay / (n_workers as f32);
            for name in &pb.endorsed_names {
                let key = (city.clone(), state.clone(), name.clone());
                let entry = boosts.entry(key).or_insert(BoostEntry {
                    boost: 0.0,
                    citations: Vec::new(),
                });
                entry.boost = (entry.boost + per_worker).min(MAX_BOOST_PER_WORKER);
                if !entry.citations.contains(&pb.playbook_id) {
                    entry.citations.push(pb.playbook_id.clone());
                }
            }
        }
        boosts
    }
 }
 /// Cosine similarity — pulled out so rebuild/boost share one impl.
 fn cosine(a: &[f32], b: &[f32]) -> f32 {
    let (mut dot, mut na, mut nb) = (0.0_f32, 0.0_f32, 0.0_f32);
    let n = a.len().min(b.len());
    for i in 0..n {
        dot += a[i] * b[i];
        na += a[i] * a[i];
        nb += b[i] * b[i];
    }
    if na == 0.0 || nb == 0.0 { return 0.0; }
    dot / (na.sqrt() * nb.sqrt())
 }
 // ---------------- Pattern discovery (Path 2 — meta-index) ----------------
 //
 // Phase 19's boost path answers "for THIS exact city + role, which workers
 // have we used before?" Pattern discovery answers a different question:
 // "for queries like this one, what TRAITS have past successful fills had
 // in common — even if no exact prior playbook covers this geo?"
 //
 // The discovered pattern surfaces signals the operator didn't query for:
 // e.g. "every successful Welder fill we've seen carried OSHA-10 + lockout
 // /tagout — you may want to filter on those." That's the meta-index
 // dimension of the original PRD: identify things we didn't know about.
 #[derive(Debug, Clone, Serialize)]
 pub struct PatternReport {
    pub query: String,
    pub matched_playbooks: usize,
    pub total_workers_examined: usize,
    pub common_certifications: Vec<TraitFreq>,
    pub common_skills: Vec<TraitFreq>,
    pub modal_archetype: Option<String>,
    pub reliability_p50: f64,
    pub reliability_min: f64,
    pub reliability_max: f64,
    pub matched_playbook_ids: Vec<String>,
    pub discovered_pattern: String,
    pub duration_secs: f32,
 }
 #[derive(Debug, Clone, Serialize)]
 pub struct TraitFreq {
    pub name: String,
    pub count: usize,
    pub frequency: f32,
 }
 pub async fn discover_patterns(
    memory: &PlaybookMemory,
    ai_client: &AiClient,
    catalog: &catalogd::registry::Registry,
    buckets: &Arc<storaged::registry::BucketRegistry>,
    query: &str,
    top_k_playbooks: usize,
    min_trait_frequency: f32,
 ) -> Result<PatternReport, String> {
    let t0 = std::time::Instant::now();
    // 1. Embed the query through the same nomic-embed-text model used
    //    for playbook embeddings, so cosine is meaningful.
    let resp = ai_client
        .embed(EmbedRequest { texts: vec![query.into()], model: None })
        .await
        .map_err(|e| format!("embed query: {e}"))?;
    if resp.embeddings.is_empty() {
        return Err("embed returned no vectors".into());
    }
    let qv: Vec<f32> = resp.embeddings[0].iter().map(|x| *x as f32).collect();
    // 2. Find top-K most similar past playbooks (cosine over embeddings).
    let entries = memory.snapshot().await;
    let mut scored: Vec<(f32, &PlaybookEntry)> = entries
        .iter()
        .filter_map(|e| e.embedding.as_ref().map(|v| (cosine(&qv, v), e)))
        .collect();
    scored.sort_by(|a, b| b.0.partial_cmp(&a.0).unwrap_or(std::cmp::Ordering::Equal));
    scored.truncate(top_k_playbooks);
    let matched: Vec<(f32, PlaybookEntry)> = scored
        .into_iter()
        .filter(|(s, _)| *s > 0.05)
        .map(|(s, e)| (s, e.clone()))
        .collect();
    if matched.is_empty() {
        return Ok(PatternReport {
            query: query.into(),
            matched_playbooks: 0,
            total_workers_examined: 0,
            common_certifications: vec![],
            common_skills: vec![],
            modal_archetype: None,
            reliability_p50: 0.0, reliability_min: 0.0, reliability_max: 0.0,
            matched_playbook_ids: vec![],
            discovered_pattern: "No similar past playbooks found.".into(),
            duration_secs: t0.elapsed().as_secs_f32(),
        });
    }
    // 3. Pull each endorsed worker's full profile from workers_500k.
    //    Restrict by (name, city, state) tuple so cross-city homonyms
    //    don't pollute the aggregate.
    let mut conditions: Vec<String> = Vec::new();
    let mut matched_ids: Vec<String> = Vec::new();
    for (_, pb) in &matched {
        matched_ids.push(pb.playbook_id.clone());
        let (Some(city), Some(state)) = (pb.city.as_ref(), pb.state.as_ref()) else { continue };
        for name in &pb.endorsed_names {
            let esc = |s: &str| s.replace('\'', "''");
            conditions.push(format!(
                "(name = '{}' AND city = '{}' AND state = '{}')",
                esc(name), esc(city), esc(state)
            ));
        }
    }
    if conditions.is_empty() {
        return Ok(PatternReport {
            query: query.into(),
            matched_playbooks: matched.len(),
            total_workers_examined: 0,
            common_certifications: vec![], common_skills: vec![],
            modal_archetype: None, reliability_p50: 0.0,
            reliability_min: 0.0, reliability_max: 0.0,
            matched_playbook_ids: matched_ids,
            discovered_pattern: "Matched playbooks but no endorsed names with city/state to lookup.".into(),
            duration_secs: t0.elapsed().as_secs_f32(),
        });
    }
    let sql = format!(
        "SELECT name, role, city, state, certifications, skills, archetype, \
         CAST(reliability AS DOUBLE) as reliability \
         FROM workers_500k WHERE {} LIMIT 500",
        conditions.join(" OR ")
    );
    let engine = queryd::context::QueryEngine::new(
        catalog.clone(), buckets.clone(), queryd::cache::MemCache::new(0),
    );
    let batches = engine.query(&sql).await.map_err(|e| format!("worker lookup: {e}"))?;
    // 4. Aggregate. Pipe-separated cert/skill lists, single-string archetype,
    //    numeric reliability. Frequencies are share-of-workers.
    use arrow::array::{Array, AsArray};
    let mut cert_counts: HashMap<String, usize> = HashMap::new();
    let mut skill_counts: HashMap<String, usize> = HashMap::new();
    let mut arch_counts: HashMap<String, usize> = HashMap::new();
    let mut reliabilities: Vec<f64> = Vec::new();
    let mut total = 0usize;
    let get_string = |b: &arrow::record_batch::RecordBatch, col: &str, row: usize| -> String {
        let Some(c) = b.column_by_name(col) else { return String::new(); };
        if let Some(arr) = c.as_string_view_opt() {
            if arr.is_null(row) { return String::new(); }
            return arr.value(row).to_string();
        }
        if let Some(arr) = c.as_string_opt::<i32>() {
            if arr.is_null(row) { return String::new(); }
            return arr.value(row).to_string();
        }
        String::new()
    };
    let get_f64 = |b: &arrow::record_batch::RecordBatch, col: &str, row: usize| -> f64 {
        let Some(c) = b.column_by_name(col) else { return 0.0; };
        if let Some(arr) = c.as_primitive_opt::<arrow::datatypes::Float64Type>() {
            if arr.is_null(row) { return 0.0; }
            return arr.value(row);
        }
        0.0
    };
    for b in &batches {
        for row in 0..b.num_rows() {
            total += 1;
            let certs = get_string(b, "certifications", row);
            for c in certs.split(['|', ',']).map(|s| s.trim()).filter(|s| !s.is_empty() && *s != "none") {
                *cert_counts.entry(c.to_string()).or_insert(0) += 1;
            }
            let skills = get_string(b, "skills", row);
            for s in skills.split(['|', ',']).map(|s| s.trim()).filter(|s| !s.is_empty()) {
                *skill_counts.entry(s.to_string()).or_insert(0) += 1;
            }
            let arch = get_string(b, "archetype", row);
            if !arch.is_empty() {
                *arch_counts.entry(arch).or_insert(0) += 1;
            }
            let rel = get_f64(b, "reliability", row);
            if rel > 0.0 { reliabilities.push(rel); }
        }
    }
    let total_f = total.max(1) as f32;
    let to_freq = |m: HashMap<String, usize>, min: f32| -> Vec<TraitFreq> {
        let mut v: Vec<TraitFreq> = m.into_iter()
            .map(|(name, count)| TraitFreq { name, count, frequency: count as f32 / total_f })
            .filter(|t| t.frequency >= min)
            .collect();
        v.sort_by(|a, b| b.count.cmp(&a.count));
        v.truncate(8);
        v
    };
    let common_certifications = to_freq(cert_counts, min_trait_frequency);
    let common_skills = to_freq(skill_counts, min_trait_frequency);
    let modal_archetype = arch_counts.into_iter()
        .max_by_key(|(_, c)| *c)
        .map(|(name, _)| name);
    reliabilities.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
    let p50 = if reliabilities.is_empty() { 0.0 } else { reliabilities[reliabilities.len() / 2] };
    let rmin = reliabilities.first().copied().unwrap_or(0.0);
    let rmax = reliabilities.last().copied().unwrap_or(0.0);
    // Build a human-readable discovered-pattern summary
    let mut parts: Vec<String> = vec![
        format!("Across {} similar past playbooks ({} workers examined)", matched.len(), total),
    ];
    if !common_certifications.is_empty() {
        let head: Vec<String> = common_certifications.iter().take(3)
            .map(|t| format!("{} ({:.0}%)", t.name, t.frequency * 100.0)).collect();
        parts.push(format!("recurring certifications: {}", head.join(", ")));
    }
    if !common_skills.is_empty() {
        let head: Vec<String> = common_skills.iter().take(3)
            .map(|t| format!("{} ({:.0}%)", t.name, t.frequency * 100.0)).collect();
        parts.push(format!("recurring skills: {}", head.join(", ")));
    }
    if let Some(a) = &modal_archetype { parts.push(format!("archetype mostly: {a}")); }
    if !reliabilities.is_empty() {
        parts.push(format!("reliability median {:.2} (range {:.2}–{:.2})", p50, rmin, rmax));
    }
    let discovered_pattern = parts.join(" · ");
    Ok(PatternReport {
        query: query.into(),
        matched_playbooks: matched.len(),
        total_workers_examined: total,
        common_certifications, common_skills,
        modal_archetype, reliability_p50: p50,
        reliability_min: rmin, reliability_max: rmax,
        matched_playbook_ids: matched_ids,
        discovered_pattern,
        duration_secs: t0.elapsed().as_secs_f32(),
    })
 }
 // ---------------- Persist memory → SQL (Path 2 foundation) ----------------
 #[derive(Debug, Clone, Serialize)]
 pub struct PersistReport {
    pub rows_persisted: usize,
    pub dataset_name: String,
    pub fingerprint: String,
    pub duration_secs: f32,
 }
 /// Dump current in-memory state to a queryable Parquet under
 /// `successful_playbooks_live`. Registers fresh objects each call — safe
 /// because in-memory state is the source of truth here, so REPLACING the
 /// objects list reflects the real state, not destroying it.
 ///
 /// Distinct from the existing `successful_playbooks` dataset (which is
 /// read by `rebuild()`), so this never collides with operator imports of
 /// historical playbook data. Recruiter-facing SQL surfaces should query
 /// `successful_playbooks_live` for current operator activity.
 pub async fn persist_to_sql(
    memory: &PlaybookMemory,
    catalog: &catalogd::registry::Registry,
 ) -> Result<PersistReport, String> {
    use arrow::array::StringArray;
    use arrow::datatypes::{DataType, Field, Schema};
    use arrow::record_batch::RecordBatch;
    let t0 = std::time::Instant::now();
    let entries = memory.snapshot().await;
    let schema = Arc::new(Schema::new(vec![
        Field::new("timestamp", DataType::Utf8, true),
        Field::new("operation", DataType::Utf8, true),
        Field::new("approach", DataType::Utf8, true),
        Field::new("result", DataType::Utf8, true),
        Field::new("context", DataType::Utf8, true),
    ]));
    let timestamps: Vec<&str> = entries.iter().map(|e| e.timestamp.as_str()).collect();
    let operations: Vec<&str> = entries.iter().map(|e| e.operation.as_str()).collect();
    let approaches: Vec<&str> = entries.iter().map(|e| e.approach.as_str()).collect();
    let contexts:   Vec<&str> = entries.iter().map(|e| e.context.as_str()).collect();
    // Result column is reconstructed from endorsed_names so SQL queries
    // against successful_playbooks_live see the same shape as the original
    // CSV-fed successful_playbooks ("N/N filled → Name1, Name2").
    let results: Vec<String> = entries.iter().map(|e| {
        if e.endorsed_names.is_empty() {
            String::new()
        } else {
            let n = e.endorsed_names.len();
            format!("{}/{} filled → {}", n, n, e.endorsed_names.join(", "))
        }
    }).collect();
    let result_refs: Vec<&str> = results.iter().map(|s| s.as_str()).collect();
    let batch = RecordBatch::try_new(schema.clone(), vec![
        Arc::new(StringArray::from(timestamps)),
        Arc::new(StringArray::from(operations)),
        Arc::new(StringArray::from(approaches)),
        Arc::new(StringArray::from(result_refs)),
        Arc::new(StringArray::from(contexts)),
    ]).map_err(|e| format!("build record batch: {e}"))?;
    let parquet_bytes = shared::arrow_helpers::record_batch_to_parquet(&batch)?;
    let fp = shared::arrow_helpers::fingerprint_schema(&schema);
    let key = "datasets/successful_playbooks_live.parquet";
    ops::put(&memory.store, key, parquet_bytes.clone()).await?;
    let obj = shared::types::ObjectRef {
        bucket: "primary".into(),
        key: key.into(),
        size_bytes: parquet_bytes.len() as u64,
        created_at: chrono::Utc::now(),
    };
    let manifest = catalog.register(
        "successful_playbooks_live".into(),
        fp.clone(),
        vec![obj],
    ).await?;
    Ok(PersistReport {
        rows_persisted: entries.len(),
        dataset_name: manifest.name,
        fingerprint: fp.0,
        duration_secs: t0.elapsed().as_secs_f32(),
    })
 }
 // ---------------- Rebuild (the core of Phase 19) ----------------
 #[derive(Debug, Clone, Serialize)]
 pub struct RebuildReport {
    pub rows_scanned: usize,
    pub entries_built: usize,
    pub total_names_endorsed: usize,
    pub duration_secs: f32,
 }
 /// Full rebuild: scan `successful_playbooks`, extract endorsements, embed
 /// each row's operation+approach+context, replace the in-memory state.
 ///
 /// Returns the report so callers can show operators what happened.
 pub async fn rebuild(
    memory: &PlaybookMemory,
    ai_client: &AiClient,
    catalog: &catalogd::registry::Registry,
    buckets: &Arc<storaged::registry::BucketRegistry>,
 ) -> Result<RebuildReport, String> {
    let t0 = std::time::Instant::now();
    // 1. Pull every row of successful_playbooks through the query engine.
    let sql = "SELECT timestamp, operation, approach, result, context \
               FROM successful_playbooks";
    let engine = queryd::context::QueryEngine::new(
        catalog.clone(),
        buckets.clone(),
        queryd::cache::MemCache::new(0),
    );
    let batches = engine
        .query(sql)
        .await
        .map_err(|e| format!("query successful_playbooks: {e}"))?;
    let mut rows: Vec<(String, String, String, String, String)> = Vec::new();
    for b in &batches {
        let n = b.num_rows();
        let get = |col: &str, row: usize| -> String {
            use arrow::array::{Array, AsArray};
            let Some(c) = b.column_by_name(col) else { return String::new(); };
            if let Some(arr) = c.as_string_view_opt() {
                if arr.is_null(row) { return String::new(); }
                return arr.value(row).to_string();
            }
            if let Some(arr) = c.as_string_opt::<i32>() {
                if arr.is_null(row) { return String::new(); }
                return arr.value(row).to_string();
            }
            String::new()
        };
        for row in 0..n {
            rows.push((
                get("timestamp", row),
                get("operation", row),
                get("approach", row),
                get("result", row),
                get("context", row),
            ));
        }
    }
    let rows_scanned = rows.len();
    // 2. For each row, build a PlaybookEntry (no embedding yet). Parse
    //    the operation for (city, state) and the result for names.
    let mut entries: Vec<PlaybookEntry> = rows
        .into_iter()
        .map(|(ts, op, approach, result, ctx)| {
            let (city, state) = parse_city_state(&op);
            let names = parse_names(&result);
            PlaybookEntry {
                playbook_id: stable_id(&ts, &op),
                operation: op,
                approach,
                context: ctx,
                timestamp: ts,
                endorsed_names: names,
                city,
                state,
                embedding: None,
            }
        })
        .collect();
    // 3. Embed in one batch. Sidecar's embed handles batching internally;
    //    chunk here to ~64 per request to keep memory flat.
    const EMBED_BATCH: usize = 64;
    for chunk_start in (0..entries.len()).step_by(EMBED_BATCH) {
        let end = (chunk_start + EMBED_BATCH).min(entries.len());
        let texts: Vec<String> = entries[chunk_start..end]
            .iter()
            .map(embed_text)
            .collect();
        let req = EmbedRequest { texts, model: None };
        let resp = ai_client
            .embed(req)
            .await
            .map_err(|e| format!("embed batch [{chunk_start}..{end}]: {e}"))?;
        for (i, v) in resp.embeddings.iter().enumerate() {
            let f32v: Vec<f32> = v.iter().map(|&x| x as f32).collect();
            entries[chunk_start + i].embedding = Some(f32v);
        }
    }
    let total_names_endorsed: usize = entries.iter().map(|e| e.endorsed_names.len()).sum();
    let entries_built = entries.len();
    memory.set_entries(entries).await?;
    Ok(RebuildReport {
        rows_scanned,
        entries_built,
        total_names_endorsed,
        duration_secs: t0.elapsed().as_secs_f32(),
    })
 }
 fn embed_text(e: &PlaybookEntry) -> String {
    // Compact one-liner per playbook. Excludes timestamp (no semantic
    // signal) and includes the fills as words (they're occasionally
    // meaningful — "Luis Harris" might semantically correlate with
    // Spanish-speaker names in future queries).
    format!(
        "{} | {} | {} | fills: {}",
        e.operation,
        e.approach,
        e.context,
        e.endorsed_names.join(", "),
    )
 }
 /// Derive a stable id from (timestamp, operation). Two playbooks with
 /// identical timestamp+operation collapse to one — benign dedup.
 fn stable_id(ts: &str, op: &str) -> String {
    use sha2::{Digest, Sha256};
    let mut h = Sha256::new();
    h.update(ts.as_bytes());
    h.update(b"|");
    h.update(op.as_bytes());
    let bytes = h.finalize();
    format!("pb-{}", hex_short(&bytes, 12))
 }
 fn hex_short(b: &[u8], n: usize) -> String {
    let mut s = String::with_capacity(n * 2);
    for byte in &b[..b.len().min(n)] {
        s.push_str(&format!("{byte:02x}"));
    }
    s
 }
 /// Parse "fill: Welder x2 in Toledo, OH" → ("Toledo", "OH").
 /// Returns None for malformed operations.
 fn parse_city_state(op: &str) -> (Option<String>, Option<String>) {
    // Split on " in " then parse "City, ST"
    let after_in = match op.split(" in ").nth(1) {
        Some(s) => s,
        None => return (None, None),
    };
    let parts: Vec<&str> = after_in.splitn(2, ',').collect();
    if parts.len() != 2 {
        return (None, None);
    }
    let city = parts[0].trim().to_string();
    // state might be followed by more context; take leading alpha chars
    let state: String = parts[1].trim()
        .chars()
        .take_while(|c| c.is_ascii_alphabetic())
        .collect();
    if city.is_empty() || state.is_empty() {
        return (None, None);
    }
    (Some(city), Some(state))
 }
 /// Parse "2/2 filled → Matthew Roberts, Amy Davis" → ["Matthew Roberts", "Amy Davis"].
 fn parse_names(result: &str) -> Vec<String> {
    // Everything after the arrow; split on ", ".
    let after_arrow = match result.split('→').nth(1) {
        Some(s) => s.trim(),
        None => return Vec::new(),
    };
    // Strip trailing noise like "(and N more)" that some emitters add.
    let cleaned = after_arrow.split(" (").next().unwrap_or(after_arrow);
    cleaned
        .split(',')
        .map(|n| n.trim().to_string())
        .filter(|n| !n.is_empty())
        .collect()
 }
 #[cfg(test)]
 mod tests {
    use super::*;
    #[test]
    fn parse_city_state_extracts_both() {
        let (c, s) = parse_city_state("fill: Welder x2 in Toledo, OH");
        assert_eq!(c.as_deref(), Some("Toledo"));
        assert_eq!(s.as_deref(), Some("OH"));
    }
    #[test]
    fn parse_city_state_handles_multiword_city() {
        let (c, s) = parse_city_state("fill: Loader x1 in Grand Rapids, MI");
        assert_eq!(c.as_deref(), Some("Grand Rapids"));
        assert_eq!(s.as_deref(), Some("MI"));
    }
    #[test]
    fn parse_city_state_malformed_returns_none() {
        let (c, s) = parse_city_state("fill: something weird");
        assert!(c.is_none());
        assert!(s.is_none());
    }
    #[test]
    fn parse_names_extracts_after_arrow() {
        let ns = parse_names("2/2 filled → Matthew Roberts, Amy Davis");
        assert_eq!(ns, vec!["Matthew Roberts".to_string(), "Amy Davis".to_string()]);
    }
    #[test]
    fn parse_names_handles_single_fill() {
        let ns = parse_names("1/1 filled → Jose Reed");
        assert_eq!(ns, vec!["Jose Reed".to_string()]);
    }
    #[test]
    fn parse_names_handles_no_arrow() {
        let ns = parse_names("0/2 filled");
        assert!(ns.is_empty());
    }
    #[test]
    fn stable_id_is_deterministic() {
        let a = stable_id("2026-04-20T00:00:00Z", "fill: Welder x2 in Toledo, OH");
        let b = stable_id("2026-04-20T00:00:00Z", "fill: Welder x2 in Toledo, OH");
        assert_eq!(a, b);
        assert!(a.starts_with("pb-"));
    }
    #[test]
    fn boost_caps_per_worker() {
        // Even with 100 similar playbooks all endorsing the same name, the
        // boost never exceeds MAX_BOOST_PER_WORKER.
        let pm = PlaybookMemory::new(Arc::new(object_store::memory::InMemory::new()));
        let entries: Vec<PlaybookEntry> = (0..100)
            .map(|i| PlaybookEntry {
                playbook_id: format!("pb-{i}"),
                operation: "fill: Welder x1 in Toledo, OH".into(),
                approach: "transfer".into(),
                context: "".into(),
                timestamp: "2026-04-20".into(),
                endorsed_names: vec!["Deborah Powell".into()],
                city: Some("Toledo".into()),
                state: Some("OH".into()),
                embedding: Some(vec![1.0, 0.0, 0.0]),
            })
            .collect();
        tokio::runtime::Runtime::new().unwrap().block_on(async {
            pm.set_entries(entries).await.unwrap();
            let boosts = pm.compute_boost_for(&[1.0, 0.0, 0.0], 100, 0.5).await;
            let key = ("Toledo".into(), "OH".into(), "Deborah Powell".into());
            let entry = boosts.get(&key).expect("boost entry present");
            assert!(entry.boost <= MAX_BOOST_PER_WORKER + 1e-6,
                "boost {} exceeded cap {}", entry.boost, MAX_BOOST_PER_WORKER);
        });
    }
 }
--- a/crates/vectord/src/service.rs
+++ b/crates/vectord/src/service.rs
@ -12,7 +12,7 @@ use std::sync::Arc;
 use aibridge::client::{AiClient, EmbedRequest, GenerateRequest};
 use catalogd::registry::Registry as CatalogRegistry;
 use storaged::registry::BucketRegistry;
-use crate::{agent, autotune, chunker, embedding_cache, harness, hnsw, index_registry, jobs, lance_backend, promotion, rag, refresh, search, store, supervisor, trial};
+use crate::{agent, autotune, chunker, embedding_cache, harness, hnsw, index_registry, jobs, lance_backend, playbook_memory, promotion, rag, refresh, search, store, supervisor, trial};
 #[derive(Clone)]
 pub struct VectorState {
@ -23,6 +23,9 @@ pub struct VectorState {
    pub hnsw_store: hnsw::HnswStore,
    pub embedding_cache: embedding_cache::EmbeddingCache,
    pub trial_journal: trial::TrialJournal,
    /// Federation-aware harness store — resolves eval artifacts to each
    /// index's recorded bucket, falling back to primary for legacy evals.
    pub harness_store: harness::HarnessStore,
    /// Catalog registry — needed by the Phase C refresh path to mark/clear
    /// staleness and look up dataset manifests.
    pub catalog: CatalogRegistry,
@ -46,6 +49,10 @@ pub struct VectorState {
    /// ADR-019 hybrid: handles to Lance datasets keyed by index name.
    /// Lazy-created on first /vectors/lance/* call.
    pub lance: lance_backend::LanceRegistry,
    /// Phase 19 — meta-index feedback. Embeds past successful_playbooks
    /// and, when `use_playbook_memory` is set on /vectors/hybrid, boosts
    /// workers that were actually filled in semantically-similar past ops.
    pub playbook_memory: playbook_memory::PlaybookMemory,
 }
 /// What the active-profile singleton records. Narrow — we don't need the
@ -63,6 +70,7 @@ pub fn router(state: VectorState) -> Router {
        .route("/index", post(create_index))
        .route("/indexes", get(list_indexes))
        .route("/indexes/{name}", get(get_index_meta))
        .route("/indexes/{name}/bucket", axum::routing::patch(migrate_index_bucket))
        .route("/jobs", get(list_jobs))
        .route("/jobs/{id}", get(get_job))
        .route("/search", post(search_index))
@ -110,6 +118,12 @@ pub fn router(state: VectorState) -> Router {
        .route("/lance/stats/{index_name}", get(lance_stats))
        .route("/lance/scalar-index/{index_name}/{column}", post(lance_build_scalar_index))
        .route("/lance/recall/{index_name}", post(lance_recall_harness))
        // Phase 19: playbook memory — the meta-index feedback loop
        .route("/playbook_memory/rebuild", post(rebuild_playbook_memory))
        .route("/playbook_memory/stats", get(playbook_memory_stats))
        .route("/playbook_memory/seed", post(seed_playbook_memory))
        .route("/playbook_memory/persist_sql", post(persist_playbook_memory_sql))
        .route("/playbook_memory/patterns", post(discover_playbook_patterns))
        .with_state(state)
 }
@ -259,6 +273,174 @@ async fn get_index_meta(
    }
 }
 #[derive(Deserialize)]
 struct MigrateBucketRequest {
    dest_bucket: String,
    /// If true, delete artifacts from the source bucket after the pointer
    /// flip. Default false — keeping source copies means a failed migration
    /// is recoverable by editing IndexMeta.bucket back, and a successful
    /// migration leaves inspectable forensics until an operator sweeps.
    #[serde(default)]
    delete_source: bool,
 }
 #[derive(Serialize)]
 struct MigrateBucketReport {
    index_name: String,
    source_bucket: String,
    dest_bucket: String,
    /// Artifact keys that were copied (or attempted). Order follows copy order.
    copied: Vec<String>,
    /// Artifact prefixes that had nothing to copy (optional files missing,
    /// trial journal empty, etc).
    skipped: Vec<String>,
    /// Subset of `copied` that was subsequently deleted from the source.
    deleted_source: Vec<String>,
    duration_secs: f32,
 }
 /// Move an index's artifacts from its current bucket to `dest_bucket`.
 /// Parquet-backed indexes only — Lance migration needs URI rewriting that
 /// isn't in scope for this endpoint. Copies the vector data, trial journal,
 /// promotion file, and auto-generated harness; updates `IndexMeta.bucket`
 /// last so a mid-flight failure leaves the index still usable at its
 /// original location. Evicts the `EmbeddingCache` entry so the next load
 /// re-reads from the new bucket.
 async fn migrate_index_bucket(
    State(state): State<VectorState>,
    Path(name): Path<String>,
    Json(req): Json<MigrateBucketRequest>,
 ) -> Result<Json<MigrateBucketReport>, (StatusCode, String)> {
    let t0 = std::time::Instant::now();
    let mut meta = state
        .index_registry
        .get(&name)
        .await
        .ok_or_else(|| (StatusCode::NOT_FOUND, format!("index '{name}' not found")))?;
    if meta.vector_backend == shared::types::VectorBackend::Lance {
        return Err((
            StatusCode::BAD_REQUEST,
            "Lance-backed indexes cannot be migrated via this endpoint — \
             Lance URIs are bucket-specific; a separate migrate_lance tool \
             is needed".into(),
        ));
    }
    if !state.bucket_registry.contains(&req.dest_bucket) {
        return Err((
            StatusCode::BAD_REQUEST,
            format!("dest bucket '{}' not registered", req.dest_bucket),
        ));
    }
    let source_bucket = meta.bucket.clone();
    if source_bucket == req.dest_bucket {
        return Err((
            StatusCode::BAD_REQUEST,
            format!("source and dest are both '{source_bucket}' — nothing to migrate"),
        ));
    }
    let src = state
        .bucket_registry
        .get(&source_bucket)
        .map_err(|e| (StatusCode::INTERNAL_SERVER_ERROR, e))?;
    let dst = state
        .bucket_registry
        .get(&req.dest_bucket)
        .map_err(|e| (StatusCode::INTERNAL_SERVER_ERROR, e))?;
    let mut copied: Vec<String> = Vec::new();
    let mut skipped: Vec<String> = Vec::new();
    // 1. Vector data (single parquet file for this backend).
    copy_key(&src, &dst, &meta.storage_key)
        .await
        .map_err(|e| {
            (StatusCode::INTERNAL_SERVER_ERROR,
             format!("copy {}: {e}", meta.storage_key))
        })?;
    copied.push(meta.storage_key.clone());
    // 2. Trial journal batches — per-index directory of JSONL files.
    let trial_prefix = format!("_hnsw_trials/{name}/");
    let trial_keys = storaged::ops::list(&src, Some(&trial_prefix))
        .await
        .unwrap_or_default();
    if trial_keys.is_empty() {
        skipped.push(trial_prefix);
    }
    for k in &trial_keys {
        copy_key(&src, &dst, k)
            .await
            .map_err(|e| (StatusCode::INTERNAL_SERVER_ERROR, format!("copy {k}: {e}")))?;
        copied.push(k.clone());
    }
    // 3. Promotion file (optional — absent for never-promoted indexes).
    let promo_key = format!("_hnsw_promotions/{name}.json");
    match copy_key(&src, &dst, &promo_key).await {
        Ok(()) => copied.push(promo_key),
        Err(_) => skipped.push(promo_key),
    }
    // 4. Auto-generated harness (optional — absent if agent never ran).
    let harness_key = format!("_hnsw_evals/{name}_auto.json");
    match copy_key(&src, &dst, &harness_key).await {
        Ok(()) => copied.push(harness_key),
        Err(_) => skipped.push(harness_key),
    }
    // 5. Pointer flip — IndexMeta.bucket now points at destination. This
    // is the commit point; earlier failures leave copies in dest but the
    // index still usable at source.
    meta.bucket = req.dest_bucket.clone();
    state
        .index_registry
        .register(meta)
        .await
        .map_err(|e| (StatusCode::INTERNAL_SERVER_ERROR, format!("update meta: {e}")))?;
    // 6. Cache eviction — next load reads the new bucket's parquet.
    state.embedding_cache.evict(&name).await;
    // 7. Optional source cleanup.
    let mut deleted_source: Vec<String> = Vec::new();
    if req.delete_source {
        for k in &copied {
            if storaged::ops::delete(&src, k).await.is_ok() {
                deleted_source.push(k.clone());
            }
        }
    }
    Ok(Json(MigrateBucketReport {
        index_name: name,
        source_bucket,
        dest_bucket: req.dest_bucket,
        copied,
        skipped,
        deleted_source,
        duration_secs: t0.elapsed().as_secs_f32(),
    }))
 }
 /// Stream a single object from one bucket to another. Uses the existing
 /// `storaged::ops` get + put primitives — no native copy in object_store
 /// across heterogeneous backends (local ↔ S3), so an in-memory hop is
 /// unavoidable. Bounded by individual object size, which for our parquet
 /// + jsonl artifacts tops out around a few hundred MB.
 async fn copy_key(
    src: &Arc<dyn ObjectStore>,
    dst: &Arc<dyn ObjectStore>,
    key: &str,
 ) -> Result<(), String> {
    let data = storaged::ops::get(src, key).await?;
    storaged::ops::put(dst, key, data).await
 }
 // --- unused legacy function below, kept for reference ---
 #[allow(dead_code)]
@ -420,6 +602,15 @@ struct HybridRequest {
    /// If false, just return the ranked matches (faster, no Ollama gen).
    #[serde(default = "default_true")]
    generate: bool,
    /// Phase 19: consult `playbook_memory` and boost workers that past
    /// similar playbooks successfully filled. Off by default so current
    /// callers keep deterministic ranking; opt-in unlocks the feedback.
    #[serde(default)]
    use_playbook_memory: bool,
    /// Number of past playbooks to consider when `use_playbook_memory`
    /// is on. Ignored otherwise. Defaults to 5.
    #[serde(default)]
    playbook_memory_k: Option<usize>,
 }
 fn default_true() -> bool { true }
@ -442,8 +633,18 @@ struct HybridSource {
    chunk_text: String,
    score: f32,
    sql_verified: bool,
    /// Phase 19: how much the playbook_memory boost lifted this hit's
    /// score. 0.0 when `use_playbook_memory=false` or no past playbook
    /// endorsed this worker.
    #[serde(default, skip_serializing_if = "is_zero")]
    playbook_boost: f32,
    /// playbook_ids whose endorsement contributed to `playbook_boost`.
    #[serde(default, skip_serializing_if = "Vec::is_empty")]
    playbook_citations: Vec<String>,
 }
 fn is_zero(x: &f32) -> bool { x.abs() < 1e-6 }
 async fn hybrid_search(
    State(state): State<VectorState>,
    Json(req): Json<HybridRequest>,
@ -556,6 +757,11 @@ async fn hybrid_search(
        .and_then(|m| m.id_prefix.clone());
    let sql_count = valid_ids.as_ref().map(|s| s.len()).unwrap_or(0);
    // Phase 19: when playbook_memory is consulted, pull a wider candidate
    // pool so endorsed workers outside the vanilla top-K can still be
    // boosted into visibility. 5× is a conservative multiplier — plenty
    // for a +0.25 boost to flip rankings without dragging the cost up.
    let fetch_k = if req.use_playbook_memory { req.top_k * 5 } else { req.top_k };
    let filtered: Vec<search::SearchResult> = if let Some(ref ids) = valid_ids {
        all_results.into_iter()
            .filter(|r| {
@ -572,20 +778,54 @@ async fn hybrid_search(
                };
                ids.contains(raw_id)
            })
-            .take(req.top_k)
+            .take(fetch_k)
            .collect()
    } else {
-        all_results.into_iter().take(req.top_k).collect()
+        all_results.into_iter().take(fetch_k).collect()
    };
    // Step 4: Build sources with SQL-verified flag.
-    let sources: Vec<HybridSource> = filtered.iter().map(|r| HybridSource {
+    let mut sources: Vec<HybridSource> = filtered.iter().map(|r| HybridSource {
        doc_id: r.doc_id.clone(),
        chunk_text: r.chunk_text.clone(),
        score: r.score,
        sql_verified: valid_ids.is_some(),
        playbook_boost: 0.0,
        playbook_citations: Vec::new(),
    }).collect();
    // Step 4b (Phase 19): if use_playbook_memory, look up semantically
    // similar past playbooks and boost workers they endorsed. Name-match
    // is on the tuple (city, state, name) extracted from chunk_text —
    // hybrid_search's SQL filter already narrowed to one city+state, so
    // this just needs to check the name against each playbook's endorsed
    // set. Additive boost on the existing vector score, then re-sort.
    if req.use_playbook_memory {
        let boost_k = req.playbook_memory_k.unwrap_or(playbook_memory::DEFAULT_TOP_K_PLAYBOOKS);
        // We embedded the question as `qv` above — reuse it for the
        // playbook similarity lookup so we don't double-pay Ollama.
        let boosts = state.playbook_memory.compute_boost_for(&qv, boost_k, 0.5).await;
        for src in sources.iter_mut() {
            // Parse "{Name} — {Role} in {City}, {State}. …" chunk. Being
            // defensive: chunks from other datasets may not follow this
            // exact shape, so absent fields just skip the boost.
            if let Some((name, city, state)) = parse_worker_chunk(&src.chunk_text) {
                let key = (city, state, name);
                if let Some(entry) = boosts.get(&key) {
                    src.score += entry.boost;
                    src.playbook_boost = entry.boost;
                    src.playbook_citations = entry.citations.clone();
                }
            }
        }
        // Re-rank: boosted scores can flip ordering.
        sources.sort_by(|a, b| b.score.partial_cmp(&a.score).unwrap_or(std::cmp::Ordering::Equal));
        // Finally trim to the caller's requested top_k — we pulled fetch_k
        // (5× wider) above specifically so the boost could reach workers
        // that would otherwise have been trimmed pre-boost.
        sources.truncate(req.top_k);
    }
    // Step 5: Generate answer if requested.
    let answer = if req.generate && !sources.is_empty() {
        let context: String = sources.iter().enumerate().map(|(i, s)| {
@ -734,7 +974,7 @@ async fn run_trial(
    State(state): State<VectorState>,
    Json(req): Json<TrialRequest>,
 ) -> Result<Json<trial::Trial>, (StatusCode, String)> {
-    let mut harness_set = harness::EvalSet::load(&state.store, &req.harness)
+    let mut harness_set = state.harness_store.load_for_index(&req.index_name, &req.harness)
        .await
        .map_err(|e| (StatusCode::NOT_FOUND, format!("harness not found: {e}")))?;
@ -764,8 +1004,8 @@ async fn run_trial(
            .await
            .map_err(|e| (StatusCode::INTERNAL_SERVER_ERROR, format!("ground truth: {e}")))?;
        tracing::info!("trial: ground truth built in {:.1}s", t0.elapsed().as_secs_f32());
-        harness_set
+        state.harness_store
-            .save(&state.store)
+            .save(&harness_set)
            .await
            .map_err(|e| (StatusCode::INTERNAL_SERVER_ERROR, format!("save harness: {e}")))?;
    }
@ -890,17 +1130,14 @@ async fn best_trial(
 // --- Harness management ---
 async fn list_evals(State(state): State<VectorState>) -> impl IntoResponse {
-    match harness::EvalSet::list(&state.store).await {
+    Json(state.harness_store.list_all().await)
        Ok(names) => Ok(Json(names)),
        Err(e) => Err((StatusCode::INTERNAL_SERVER_ERROR, e)),
    }
 }
 async fn get_eval(
    State(state): State<VectorState>,
    Path(name): Path<String>,
 ) -> impl IntoResponse {
-    match harness::EvalSet::load(&state.store, &name).await {
+    match state.harness_store.get_any(&name).await {
        Ok(e) => Ok(Json(e)),
        Err(err) => Err((StatusCode::NOT_FOUND, err)),
    }
@ -916,7 +1153,7 @@ async fn put_eval(
        .queries
        .iter()
        .all(|q| q.ground_truth.is_some());
-    match harness_set.save(&state.store).await {
+    match state.harness_store.save(&harness_set).await {
        Ok(()) => Ok(Json(harness_set)),
        Err(e) => Err((StatusCode::INTERNAL_SERVER_ERROR, e)),
    }
@ -957,8 +1194,8 @@ async fn autogen_eval(
        .await
        .map_err(|e| (StatusCode::INTERNAL_SERVER_ERROR, format!("ground truth: {e}")))?;
-    harness_set
+    state.harness_store
-        .save(&state.store)
+        .save(&harness_set)
        .await
        .map_err(|e| (StatusCode::INTERNAL_SERVER_ERROR, format!("save: {e}")))?;
@ -1407,7 +1644,12 @@ async fn profile_scoped_search(
        let lance_store = state.lance.store_for(&req.index_name).await
            .map_err(|e| (StatusCode::BAD_REQUEST, e))?;
        let t0 = std::time::Instant::now();
-        match lance_store.search(&query_vec, top_k).await {
+        match lance_store.search(
            &query_vec,
            top_k,
            Some(LANCE_DEFAULT_NPROBES),
            Some(LANCE_DEFAULT_REFINE_FACTOR),
        ).await {
            Ok(hits) => Ok(Json(serde_json::json!({
                "profile": profile.id,
                "source": index_meta.source,
@ -1516,6 +1758,7 @@ async fn run_autotune_endpoint(
        &state.index_registry,
        &state.trial_journal,
        &state.promotion_registry,
        &state.harness_store,
        &state.job_tracker,
    ).await {
        Ok(result) => Ok(Json(result)),
@ -1636,8 +1879,25 @@ struct LanceSearchRequest {
    query: String,
    #[serde(default = "default_top_k")]
    top_k: usize,
    /// IVF partitions to probe. `None` uses Lance's built-in default of
    /// 1, which caps recall well below the index's real capability.
    /// Recommended: 5–10% of num_partitions (≈20 for a 316-partition
    /// index). Omitting it here picks the server-side default.
    #[serde(default)]
    nprobes: Option<usize>,
    /// Refine factor — re-rank `top_k * factor` PQ-approximate candidates
    /// with exact distances before returning `top_k`. Recovers recall
    /// lost to product quantization.
    #[serde(default)]
    refine_factor: Option<u32>,
 }
 /// Server-side defaults when the caller doesn't pin nprobes / refine
 /// themselves. Tuned for the ~100K × 768d reference workload; see
 /// docs/ADR-019-vector-storage.md for the recall / latency trade-off.
 const LANCE_DEFAULT_NPROBES: usize = 20;
 const LANCE_DEFAULT_REFINE_FACTOR: u32 = 5;
 fn default_top_k() -> usize { 5 }
 /// Vector search against a Lance dataset. Embeds the query text via the
@ -1660,7 +1920,9 @@ async fn lance_search(
        .map_err(|e| (StatusCode::BAD_REQUEST, e))?;
    let t0 = std::time::Instant::now();
-    let hits = lance_store.search(&qv, req.top_k).await
+    let nprobes = req.nprobes.or(Some(LANCE_DEFAULT_NPROBES));
    let refine = req.refine_factor.or(Some(LANCE_DEFAULT_REFINE_FACTOR));
    let hits = lance_store.search(&qv, req.top_k, nprobes, refine).await
        .map_err(|e| (StatusCode::INTERNAL_SERVER_ERROR, e))?;
    Ok(Json(serde_json::json!({
@ -1761,6 +2023,12 @@ struct LanceRecallRequest {
    harness: String,
    #[serde(default = "default_top_k")]
    top_k: usize,
    /// Override server defaults so operators can sweep nprobes /
    /// refine_factor to chart the recall-vs-latency curve.
    #[serde(default)]
    nprobes: Option<usize>,
    #[serde(default)]
    refine_factor: Option<u32>,
 }
 #[derive(serde::Serialize)]
@ -1784,6 +2052,214 @@ struct LanceRecallQuery {
    hits_returned: usize,
 }
 // --- Phase 19: playbook memory endpoints ---
 /// Extract (name, city, state) from a chunk formatted like
 /// "{Name} — {Role} in {City}, {State}. Skills: …".
 /// Returns None if the chunk doesn't match the shape; callers simply
 /// skip the boost for that hit.
 fn parse_worker_chunk(chunk: &str) -> Option<(String, String, String)> {
    // "Name — Role in City, ST. …" → split on "—" then " in " then ","
    let (name_part, rest) = chunk.split_once('—')?;
    let rest = rest.trim();
    let (_role, loc_part) = rest.split_once(" in ")?;
    let loc_part = loc_part.trim();
    let (city, state_plus) = loc_part.split_once(',')?;
    let state: String = state_plus.trim()
        .chars()
        .take_while(|c| c.is_ascii_alphabetic())
        .collect();
    let name = name_part.trim().to_string();
    let city = city.trim().to_string();
    if name.is_empty() || city.is_empty() || state.is_empty() {
        return None;
    }
    Some((name, city, state))
 }
 #[derive(Deserialize)]
 struct SeedPlaybookRequest {
    /// One playbook with {operation, approach, context, endorsed_names}.
    /// City + state are parsed from the operation text.
    operation: String,
    #[serde(default)]
    approach: String,
    #[serde(default)]
    context: String,
    endorsed_names: Vec<String>,
    /// Append to the existing memory rather than replacing. Default true —
    /// seeding is a bootstrap/demo tool, not a rebuild substitute.
    #[serde(default = "default_true")]
    append: bool,
 }
 /// Bootstrap / test-only: inject a playbook entry directly into
 /// `playbook_memory` without going through `successful_playbooks`. Useful
 /// when the source dataset has stale or phantom entries (as the initial
 /// staffing seed did — names that don't correspond to real workers), and
 /// you want to demonstrate the feedback loop with a known-good fixture.
 ///
 /// Production path is always `/rebuild` — this endpoint is for operators
 /// who need to prime the memory before real playbooks accumulate.
 async fn seed_playbook_memory(
    State(state): State<VectorState>,
    Json(req): Json<SeedPlaybookRequest>,
 ) -> impl IntoResponse {
    // Embed the entry through the same text shape `rebuild` uses so
    // similarity math is comparable across seed + real entries.
    let tmp_entry = playbook_memory::PlaybookEntry {
        playbook_id: String::new(),
        operation: req.operation.clone(),
        approach: req.approach.clone(),
        context: req.context.clone(),
        timestamp: chrono::Utc::now().to_rfc3339(),
        endorsed_names: req.endorsed_names.clone(),
        city: None, state: None, embedding: None,
    };
    let text = format!(
        "{} | {} | {} | fills: {}",
        tmp_entry.operation, tmp_entry.approach, tmp_entry.context,
        tmp_entry.endorsed_names.join(", "),
    );
    let resp = match state.ai_client.embed(EmbedRequest { texts: vec![text], model: None }).await {
        Ok(r) => r,
        Err(e) => return Err((StatusCode::BAD_GATEWAY, format!("embed seed: {e}"))),
    };
    if resp.embeddings.is_empty() {
        return Err((StatusCode::BAD_GATEWAY, "embed returned nothing".into()));
    }
    let emb: Vec<f32> = resp.embeddings[0].iter().map(|&x| x as f32).collect();
    // Parse city/state from the operation ("fill: Role xN in City, ST").
    // Parser lives in playbook_memory::rebuild — expose via a tiny helper
    // or inline the same logic here; duplicated briefly since this seed
    // path is stable but infrequently called.
    let (city, state_) = {
        let after_in = req.operation.split(" in ").nth(1).unwrap_or("");
        let mut parts = after_in.splitn(2, ',');
        let city = parts.next().map(|s| s.trim().to_string()).filter(|s| !s.is_empty());
        let state = parts.next().map(|s| s.trim().chars().take_while(|c| c.is_ascii_alphabetic()).collect::<String>()).filter(|s| !s.is_empty());
        (city, state)
    };
    if city.is_none() || state_.is_none() {
        return Err((StatusCode::BAD_REQUEST,
            "operation must match 'fill: Role xN in City, ST' shape".into()));
    }
    // Stable id: hash of timestamp + operation. Callers get the id back
    // so they can reference it in citations.
    let ts = chrono::Utc::now().to_rfc3339();
    use sha2::{Digest, Sha256};
    let mut h = Sha256::new();
    h.update(ts.as_bytes());
    h.update(b"|");
    h.update(req.operation.as_bytes());
    let bytes = h.finalize();
    let pid = format!("pb-seed-{}", bytes.iter().take(8).map(|b| format!("{b:02x}")).collect::<String>());
    let new_entry = playbook_memory::PlaybookEntry {
        playbook_id: pid.clone(),
        operation: req.operation,
        approach: req.approach,
        context: req.context,
        timestamp: ts,
        endorsed_names: req.endorsed_names,
        city, state: state_,
        embedding: Some(emb),
    };
    let mut current = state.playbook_memory.snapshot().await;
    if req.append {
        current.push(new_entry);
    } else {
        current = vec![new_entry];
    }
    if let Err(e) = state.playbook_memory.set_entries(current).await {
        return Err((StatusCode::INTERNAL_SERVER_ERROR, format!("persist: {e}")));
    }
    Ok(Json(serde_json::json!({ "playbook_id": pid, "entries_after": state.playbook_memory.entry_count().await })))
 }
 async fn rebuild_playbook_memory(
    State(state): State<VectorState>,
 ) -> impl IntoResponse {
    match playbook_memory::rebuild(
        &state.playbook_memory,
        &state.ai_client,
        &state.catalog,
        &state.bucket_registry,
    ).await {
        Ok(report) => Ok(Json(report)),
        Err(e) => Err((StatusCode::INTERNAL_SERVER_ERROR, e)),
    }
 }
 // Path 2 foundation — dump in-memory playbook_memory state to a fresh
 // `successful_playbooks_live` dataset. Cheap to call (writes one parquet,
 // updates one manifest), so /log can call it after every seed to keep the
 // SQL-queryable surface honest without the destructive REPLACE bug that
 // /ingest/file has.
 async fn persist_playbook_memory_sql(
    State(state): State<VectorState>,
 ) -> impl IntoResponse {
    match playbook_memory::persist_to_sql(&state.playbook_memory, &state.catalog).await {
        Ok(report) => Ok(Json(report)),
        Err(e) => Err((StatusCode::INTERNAL_SERVER_ERROR, e)),
    }
 }
 #[derive(Deserialize)]
 struct PatternsRequest {
    query: String,
    #[serde(default = "default_pattern_k")]
    top_k_playbooks: usize,
    /// Minimum frequency (0.0-1.0) for a trait to make the report.
    /// Default 0.4 — at least 40% of examined workers must share it.
    #[serde(default = "default_pattern_min_freq")]
    min_trait_frequency: f32,
 }
 fn default_pattern_k() -> usize { 10 }
 fn default_pattern_min_freq() -> f32 { 0.4 }
 // Path 2 — meta-index discovery surface. "What did past similar fills
 // have in common that I didn't ask about?" — surfaces signals like
 // recurring certifications, skill clusters, archetype tendencies.
 async fn discover_playbook_patterns(
    State(state): State<VectorState>,
    Json(req): Json<PatternsRequest>,
 ) -> impl IntoResponse {
    match playbook_memory::discover_patterns(
        &state.playbook_memory,
        &state.ai_client,
        &state.catalog,
        &state.bucket_registry,
        &req.query,
        req.top_k_playbooks,
        req.min_trait_frequency,
    ).await {
        Ok(report) => Ok(Json(report)),
        Err(e) => Err((StatusCode::INTERNAL_SERVER_ERROR, e)),
    }
 }
 async fn playbook_memory_stats(
    State(state): State<VectorState>,
 ) -> impl IntoResponse {
    let entries = state.playbook_memory.snapshot().await;
    Json(serde_json::json!({
        "entries": entries.len(),
        "total_names_endorsed": entries.iter().map(|e| e.endorsed_names.len()).sum::<usize>(),
        "entries_with_embeddings": entries.iter().filter(|e| e.embedding.is_some()).count(),
        "sample": entries.iter().take(3).map(|e| serde_json::json!({
            "id": e.playbook_id,
            "operation": e.operation,
            "city": e.city,
            "state": e.state,
            "endorsed": e.endorsed_names,
        })).collect::<Vec<_>>(),
    }))
 }
 async fn lance_recall_harness(
    State(state): State<VectorState>,
    Path(index_name): Path<String>,
@ -1791,7 +2267,7 @@ async fn lance_recall_harness(
 ) -> impl IntoResponse {
    let t0 = std::time::Instant::now();
-    let harness_set = harness::EvalSet::load(&state.store, &req.harness).await
+    let harness_set = state.harness_store.load_for_index(&index_name, &req.harness).await
        .map_err(|e| (StatusCode::NOT_FOUND, format!("harness: {e}")))?;
    if !harness_set.ground_truth_built {
        return Err((StatusCode::BAD_REQUEST,
@ -1817,7 +2293,12 @@ async fn lance_recall_harness(
        };
        let qt0 = std::time::Instant::now();
-        let hits = lance_store.search(qv, k).await
+        let hits = lance_store.search(
            qv,
            k,
            Some(req.nprobes.unwrap_or(LANCE_DEFAULT_NPROBES)),
            Some(req.refine_factor.unwrap_or(LANCE_DEFAULT_REFINE_FACTOR)),
        ).await
            .map_err(|e| (StatusCode::INTERNAL_SERVER_ERROR, format!("search: {e}")))?;
        let lat_us = qt0.elapsed().as_micros() as f32;
--- a/mcp-server/index.ts
+++ b/mcp-server/index.ts
@ -74,7 +74,10 @@ server.tool(
    top_k: z.number().default(5),
  },
  async ({ question, sql_filter, dataset, id_column, top_k }) => {
-    const body: any = { question, index_name: "workers_500k_v1", filter_dataset: dataset, id_column, top_k, generate: true };
+    const body: any = {
      question, index_name: "workers_500k_v1", filter_dataset: dataset, id_column, top_k, generate: true,
      use_playbook_memory: true,
    };
    if (sql_filter) body.sql_filter = sql_filter;
    const r = await api("POST", "/vectors/hybrid", body);
    return { content: [{ type: "text" as const, text: JSON.stringify(r, null, 2) }] };
@ -109,6 +112,7 @@ server.tool(
      index_name: "workers_500k_v1", sql_filter: filter,
      filter_dataset: "ethereal_workers", id_column: "worker_id",
      top_k: headcount * 2, generate: false,
      use_playbook_memory: true,
    });
    let matches = r.sources || [];
    if (required_certs.length > 0) {
@ -384,6 +388,11 @@ async function main() {
            question: b.question, index_name: b.index || "workers_500k_v1",
            sql_filter: b.sql_filter, filter_dataset: b.dataset || "ethereal_workers",
            id_column: b.id_column || "worker_id", top_k: b.top_k || 5, generate: b.generate !== false,
            use_playbook_memory: b.use_playbook_memory !== false,
            // Forward explicitly so Bun /search isn't capped by the
            // server's default — boost silently misses good matches when
            // memory has >25 entries and only top-5 playbooks are scanned.
            playbook_memory_k: b.playbook_memory_k ?? 25,
          }));
        }
@ -403,6 +412,8 @@ async function main() {
            index_name: b.index || "workers_500k_v1", sql_filter: filter,
            filter_dataset: b.dataset || "ethereal_workers",
            id_column: "worker_id", top_k: (b.headcount || 5) * 2, generate: false,
            use_playbook_memory: true,
            playbook_memory_k: 25,
          }));
        }
@ -418,14 +429,57 @@ async function main() {
          return ok(await api("POST", "/vectors/rag", { index_name: b.index || "workers_500k_v1", question: b.question, top_k: b.top_k || 5 }));
        }
-        // Tool: log success
+        // Tool: log success.
        //
        // BUG FIX 2026-04-20: previously this also POSTed a 1-row CSV to
        // /ingest/file?name=successful_playbooks. That endpoint REPLACES
        // the dataset's object list rather than appending — so every /log
        // call destroyed all prior rows in the SQL-queryable
        // successful_playbooks table. Chain-of-custody trace caught it:
        // sp_rows went 33 → 1 in a single /log call.
        //
        // Until a proper append endpoint exists (Phase 8 delta write
        // surface for the SQL table), /log writes ONLY to playbook_memory
        // (in-memory append-only store, works correctly for boost). The
        // SQL successful_playbooks table is now treated as derived state
        // that gets rebuilt explicitly via /vectors/playbook_memory/rebuild
        // — never written to by the recruiter path.
        if (url.pathname === "/log") {
          const b = await json();
-          const csv = `timestamp,operation,approach,result,context\n"${new Date().toISOString()}","${(b.operation||"").replace(/"/g,'""')}","${(b.approach||"").replace(/"/g,'""')}","${(b.result||"").replace(/"/g,'""')}","${(b.context||"").replace(/"/g,'""')}"`;
+          // Result format expected: "{filled}/{needed} filled → Name1, Name2, Name3"
-          const form = new FormData();
+          const result = String(b.result || "");
-          form.append("file", new Blob([csv], { type: "text/csv" }), "playbook.csv");
+          const arrowIdx = result.indexOf("→");
-          const r = await fetch(`${BASE}/ingest/file?name=successful_playbooks`, { method: "POST", body: form });
+          const namesPart = arrowIdx >= 0 ? result.slice(arrowIdx + 1) : "";
-          return ok({ logged: true, response: await r.text() });
+          const endorsed = namesPart.split(",").map(s => s.trim()).filter(Boolean);
          let seeded = 0;
          let persisted_rows = 0;
          if (endorsed.length && /fill:.+ in .+,.+/i.test(String(b.operation || ""))) {
            const canonicalApproach = `${(b.approach || "manual log").split(/[\.\n]/)[0]}`.slice(0, 80);
            const canonicalContext = `${(b.context || "").split(/[\.\n]/)[0]}`.slice(0, 80);
            const seedRes = await api("POST", "/vectors/playbook_memory/seed", {
              operation: b.operation,
              approach: canonicalApproach,
              context: canonicalContext,
              endorsed_names: endorsed,
              append: true,
            }).catch(() => null) as any;
            if (seedRes && seedRes.playbook_id) {
              seeded = endorsed.length;
              // After every successful seed, persist memory → SQL so the
              // successful_playbooks_live table reflects current operator
              // activity. /persist_sql writes the FULL state, which is safe
              // because in-memory playbook_memory IS the source of truth
              // (no concurrent writer outside this process modifies it).
              const pr = await api("POST", "/vectors/playbook_memory/persist_sql", {}).catch(() => null) as any;
              if (pr && typeof pr.rows_persisted === "number") persisted_rows = pr.rows_persisted;
            }
          }
          return ok({
            logged: true,
            seeded,
            persisted_to_sql: persisted_rows,
            note: "successful_playbooks_live (NOT successful_playbooks) is the SQL surface for live operator activity. /log is non-destructive.",
          });
        }
        // Tool: get playbooks
@ -480,6 +534,7 @@ async function main() {
            question: "reliable forklift operator", index_name: "workers_500k_v1",
            sql_filter: "role = 'Forklift Operator' AND state = 'IL' AND CAST(reliability AS DOUBLE) > 0.8",
            filter_dataset: "workers_500k", id_column: "worker_id", top_k: 5, generate: false,
            use_playbook_memory: true,
          });
          tests.push({
            name: "Hybrid SQL+Vector Search", ms: Date.now() - ht0,
@ -987,6 +1042,7 @@ tr:hover{background:#111827}
            question: "reliable forklift operator", index_name: "workers_500k_v1",
            sql_filter: "role = 'Forklift Operator' AND state = 'IL' AND CAST(reliability AS DOUBLE) > 0.8",
            filter_dataset: "workers_500k", id_column: "worker_id", top_k: 5, generate: false,
            use_playbook_memory: true,
          });
          tests.push({
            name: "Hybrid SQL+Vector", ms: Date.now() - ht0,
@ -1435,6 +1491,26 @@ const SCENARIOS = [
 function pick<T>(arr: T[]): T { return arr[Math.floor(Math.random() * arr.length)]; }
 // Seed playbook_memory from a filled contract so the next hybrid query
 // ranks against it. Used by both runWeekSimulation (per-day) and the /log
 // endpoint (per manual logging). Fail-soft — seeding is best-effort.
 async function seedPlaybookFromContract(c: any) {
  const names = (c.matches || []).slice(0, 5)
    .map((m: any) => m.name || m.doc_id)
    .filter((n: string) => n && !n.startsWith("W500-"));
  if (!names.length) return;
  const op = `fill: ${c.role} x${c.headcount} in ${c.city}, ${c.state}`;
  try {
    await api("POST", "/vectors/playbook_memory/seed", {
      operation: op,
      approach: `${c.situation || c.priority || "fill"} → hybrid search`,
      context: `client=${c.client || ""} start=${c.start || ""}`,
      endorsed_names: names,
      append: true,
    });
  } catch {}
 }
 async function runWeekSimulation() {
  const days = ["Monday","Tuesday","Wednesday","Thursday","Friday"];
  const staffers = ["Sarah (Lead)","Mike (Senior)","Kim (Junior)"];
@ -1468,7 +1544,7 @@ async function runWeekSimulation() {
      if (priority === "urgent") emergencies++;
      totalNeeded += headcount;
-      // Run hybrid search
+      // Run hybrid search — Phase 19: boost on so past playbooks shape ranking
      let filled = 0;
      let matches: any[] = [];
      try {
@ -1481,12 +1557,15 @@ async function runWeekSimulation() {
          id_column: "worker_id",
          top_k: headcount + 2,
          generate: false,
          use_playbook_memory: true,
        });
        matches = (r.sources || []).slice(0, headcount).map((s: any) => ({
          doc_id: s.doc_id,
          name: s.chunk_text?.split("—")[0]?.trim() || s.doc_id,
          score: s.score,
          chunk_text: s.chunk_text || "",
          playbook_boost: s.playbook_boost || 0,
          playbook_citations: s.playbook_citations || [],
        }));
        filled = matches.length;
      } catch {}
@ -1501,7 +1580,15 @@ async function runWeekSimulation() {
      });
    }
-    // End of day: log playbook + prepare handoff
+    // End of day: seed playbook_memory with TODAY's filled contracts so
    // tomorrow's hybrid search ranks against them. This is the in-week
    // feedback loop — without this, day 5 doesn't benefit from day 1.
    for (const c of contracts) {
      if (c.matches && c.matches.length) {
        await seedPlaybookFromContract(c).catch(() => {});
      }
    }
    if (d < 4) {
      handoffs++;
      try {
@ -1530,29 +1617,18 @@ async function runWeekSimulation() {
    playbook_entries: playbookEntries,
  };
-  // Log every filled contract as a playbook entry — this is the training data
+  // BUG FIX 2026-04-20: previously this POSTed a multi-row CSV to
-  try {
+  // /ingest/file?name=successful_playbooks at end of every simulation.
-    const ts = new Date().toISOString();
+  // That endpoint REPLACES the dataset's object list — so each
-    const rows: string[] = [];
+  // /simulation/run wiped the prior simulation's rows. The SQL
-    for (const day of results) {
+  // successful_playbooks table was never accumulating; it always reflected
-      for (const c of day.contracts) {
+  // only the most-recent simulation batch.
-        if (c.matches && c.matches.length > 0) {
+  //
-          const workerNames = c.matches.slice(0, 3).map((m: any) => m.name || m.doc_id).join(", ");
+  // Per-day per-contract seeding via /vectors/playbook_memory/seed
-          const op = `fill: ${c.role} x${c.headcount} in ${c.city}, ${c.state}`;
+  // (added Pass 1, runs inside the day loop above) is the path that
-          const approach = `${c.situation} (${c.priority}) → hybrid search`;
+  // actually accumulates feedback. The SQL successful_playbooks table is
-          const result = `${c.filled}/${c.headcount} filled → ${workerNames}`;
+  // intentionally not written by /simulation/run anymore until a proper
-          const context = `client=${c.client} start=${c.start} scenario=${c.situation}`;
+  // append surface exists.
          rows.push(`"${ts}","${op.replace(/"/g,'""')}","${approach}","${result.replace(/"/g,'""')}","${context.replace(/"/g,'""')}"`);
        }
      }
    }
    if (rows.length) {
      const csv = `timestamp,operation,approach,result,context\n${rows.join("\n")}`;
      const form = new FormData();
      form.append("file", new Blob([csv], { type: "text/csv" }), "playbook.csv");
      await fetch(`${BASE}/ingest/file?name=successful_playbooks`, { method: "POST", body: form });
    }
  } catch {}
  return { days: results, summary };
 }
--- a/mcp-server/search.html
+++ b/mcp-server/search.html
@ -384,11 +384,13 @@ function addContractInsight(parent,c,isUrgent){
      if(isUrgent&&i===0)label='FIRST CHOICE — highest match score, call first';
      else if(isUrgent&&i>0&&i<c.headcount)label='';
      else if(isUrgent&&i>=c.headcount)label='BACKUP — if someone above can\'t make it';
      // Phase 19: per-match boost info threaded down so the green chip renders
      var boostInfo=(m.playbook_boost>0)?{boost:m.playbook_boost,citations:m.playbook_citations||[]}:null;
      addWorkerInsight(cd,w.nm,
        [w.role,w.loc].filter(Boolean).join(' · '),
        label||buildWhyText(w,c),i,
        isUrgent&&i===0?'#f85149':isUrgent&&i>=c.headcount?'#484f58':null,
-        w);
+        w,boostInfo);
    });
    var remaining=c.matches.length-showCount;
    if(remaining>0){
@ -570,12 +572,23 @@ function addWorkerInsight(parent,name,detail,why,idx,highlight){
  if(highlight)w.style.borderLeft='3px solid '+highlight;
  w.style.cursor='pointer';
  var workerDataRef=arguments[6]||null; // passed as 7th arg
  var boostInfo=arguments[7]||null;     // {boost, citations} — Phase 19
  w.onclick=function(){if(workerDataRef)showProfile(workerDataRef)};
  var av=document.createElement('div');av.className='av';av.style.background=AC[(idx||0)%AC.length];
  av.textContent=(name||'?').split(' ').map(function(n){return(n[0]||'').toUpperCase()}).join('').substring(0,2);
  w.appendChild(av);
  var info=document.createElement('div');info.className='info';
  var nm=document.createElement('div');nm.className='nm';nm.textContent=name;
  // Phase 19: when a past playbook endorsed this worker, show a green chip
  // next to the name. Hover reveals the citation IDs.
  if(boostInfo && boostInfo.boost > 0){
    var chip=document.createElement('span');
    chip.style.cssText='display:inline-block;margin-left:8px;padding:2px 7px;border-radius:9px;font-size:10px;font-weight:600;background:#0d2818;border:1px solid #2ea043;color:#3fb950;vertical-align:middle';
    var n=(boostInfo.citations && boostInfo.citations.length) || 0;
    chip.textContent='Endorsed · '+n+' playbook'+(n!==1?'s':'');
    chip.title='Boosted by past playbooks: '+(boostInfo.citations||[]).join(', ');
    nm.appendChild(chip);
  }
  var dt=document.createElement('div');dt.className='detail';dt.textContent=detail;
  info.appendChild(nm);info.appendChild(dt);
  if(why){var wh=document.createElement('div');wh.className='why';wh.textContent=why;info.appendChild(wh)}
--- a/tests/multi-agent/agent.ts
+++ b/tests/multi-agent/agent.ts
@ -0,0 +1,351 @@
 // Shared runtime for one agent. An agent is a role (executor or reviewer),
 // a model name, and a conversation the orchestrator hands it. The agent
 // produces ONE structured Action per turn; the orchestrator applies tool
 // calls and feeds results back.
 //
 // Fail-fast: every HTTP error, parse error, and Ollama error throws. The
 // orchestrator catches at the top and exits non-zero with the full log.
 export const GATEWAY = "http://localhost:3100";
 export const SIDECAR = "http://localhost:3200";
 // --- Shared types ---
 export type Role = "executor" | "reviewer";
 export interface TaskSpec {
  id: string;
  operation: string;           // "fill: Welder x2 in Columbus, OH"
  target_role: string;         // "Welder"
  target_count: number;        // 2
  target_city: string;         // "Columbus"
  target_state: string;        // "OH"
  approach_hint?: string;      // e.g. "hybrid search"; agent is free to ignore
 }
 export interface LogEntry {
  turn: number;
  role: Role;
  model: string;
  at: string;
  kind:
    | "plan"
    | "tool_call"
    | "tool_result"
    | "critique"
    | "propose_done"
    | "consensus_done"
    | "error";
  content: any;
 }
 // Action = what an agent returns on one turn. Strict shape so we can
 // enforce it at parse time rather than prompt-engineer around malformed
 // JSON.
 export type Action =
  | { kind: "tool_call"; tool: string; args: Record<string, any>; rationale: string }
  | { kind: "propose_done"; fills: Fill[]; rationale: string }
  | { kind: "critique"; verdict: "continue" | "drift" | "approve_done"; notes: string }
  | { kind: "plan"; steps: string[] };
 export interface Fill {
  candidate_id: string;
  name: string;
  reason: string;
 }
 // --- HTTP helpers (fail-fast) ---
 async function http<T>(method: string, url: string, body?: any): Promise<T> {
  const res = await fetch(url, {
    method,
    headers: { "Content-Type": "application/json" },
    body: body ? JSON.stringify(body) : undefined,
  });
  if (!res.ok) {
    const text = await res.text();
    throw new Error(`${method} ${url} → ${res.status}: ${text}`);
  }
  return (await res.json()) as T;
 }
 // Tool calls land in the Phase 12 audit log keyed by this agent name.
 // Distinguishable from human-driven calls (agent=="operator" or similar)
 // so post-hoc queries can separate multi-agent runs.
 export const TOOL_AGENT_ID = "multi-agent-test";
 export async function callTool(tool: string, args: Record<string, any>): Promise<any> {
  return http("POST", `${GATEWAY}/tools/${tool}/call`, {
    params: args,
    agent: TOOL_AGENT_ID,
  });
 }
 export async function hybridSearch(sql_filter: string, question: string, k = 10): Promise<any> {
  return http("POST", `${GATEWAY}/vectors/hybrid`, { sql_filter, question, k });
 }
 export async function sqlQuery(sql: string): Promise<any> {
  return http("POST", `${GATEWAY}/query/sql`, { sql, format: "json" });
 }
 // Sidecar generate. Ollama's default keep_alive (5 min) keeps the model
 // warm between turns on its own, so we don't need to pass it through.
 export async function generate(model: string, prompt: string, opts: {
  max_tokens?: number;
  temperature?: number;
  system?: string;
 } = {}): Promise<string> {
  const body: Record<string, any> = {
    model,
    prompt,
    temperature: opts.temperature ?? 0.3,
    max_tokens: opts.max_tokens ?? 800,
  };
  if (opts.system) body.system = opts.system;
  const r = await http<any>("POST", `${SIDECAR}/generate`, body);
  const text = r.text ?? "";
  if (!text || typeof text !== "string") {
    throw new Error(`generate returned empty text from ${model}: ${JSON.stringify(r).slice(0, 200)}`);
  }
  return text;
 }
 // --- Prompt construction ---
 const TOOL_CATALOG = `
 Available tools (each takes a JSON "args" object):
 - hybrid_search(sql_filter: string, question: string, index_name: string, k?: number)
  → Narrow workers via SQL WHERE clause, then rank by semantic match.
  → Canonical production tool for fill tasks. Always use this FIRST.
  → Example args:
    {"index_name":"workers_500k_v1",
     "sql_filter":"LOWER(role) LIKE '%weld%' AND city = 'Toledo' AND state = 'OH' AND availability > 0.5",
     "question":"reliable welder with OSHA certs",
     "k":10}
 - sql(query: string)
  → Raw read-only SELECT. Use for verification (confirm a worker exists,
    check city/role/availability) after hybrid_search surfaces candidates.
  → Schema of workers_500k: worker_id, name, role, email, phone, city,
    state, zip, skills, certifications, archetype, reliability,
    responsiveness, engagement, communications, compliance, availability,
    resume_text.
  → Example args:
    {"query":"SELECT worker_id, name, role, city, state, availability FROM workers_500k WHERE worker_id = 'W123456'"}
 Rules:
 - hybrid_search returns sources[] each with {doc_id, chunk_text, score, sql_verified}.
 - **ID mapping:** vector doc_ids look like "W500K-7995" (prefix + number).
  The SQL worker_id is an INTEGER. To go from doc_id to SQL, strip the
  "W500K-" prefix and cast:
    SELECT ... FROM workers_500k WHERE worker_id = CAST(SUBSTR('W500K-7995', 7) AS BIGINT)
  or more simply: WHERE worker_id = 7995.
 - Names are NOT unique. Always identify by worker_id, never by name alone.
 - Return EXACTLY ONE JSON object per turn. No prose outside the JSON.
 `;
 // Smart per-kind summary so agents see the substance of each prior turn
 // without a raw-JSON wall of text. hybrid_search results especially need
 // this — raw JSON buries sources[] past any reasonable 400-char truncation.
 function summarizeEntry(e: LogEntry): string {
  const c = e.content ?? {};
  switch (e.kind) {
    case "plan":
      return `PLAN: ${(c.steps ?? []).map((s: string, i: number) => `${i + 1}.${s}`).join(" ")}`;
    case "tool_call":
      return `TOOL_CALL ${c.tool}(${JSON.stringify(c.args ?? {}).slice(0, 250)})${c.rationale ? ` — ${c.rationale}` : ""}`;
    case "tool_result": {
      if (c.error) return `TOOL_RESULT error: ${c.error}`;
      // hybrid_search response
      if (Array.isArray(c.sources)) {
        const head = c.sources.slice(0, 5).map((s: any) =>
          `${s.doc_id}${s.sql_verified ? "✓" : ""} score=${(s.score ?? 0).toFixed(2)}: ${String(s.chunk_text ?? "").slice(0, 80)}`
        ).join(" | ");
        return `TOOL_RESULT hybrid: sql_matches=${c.sql_matches} vector_reranked=${c.vector_reranked} sources=[${head}${c.sources.length > 5 ? ` +${c.sources.length - 5} more` : ""}]`;
      }
      // sql response
      if (Array.isArray(c.rows)) {
        const head = c.rows.slice(0, 5).map((r: any) => JSON.stringify(r)).join(" | ");
        return `TOOL_RESULT sql: ${c.rows.length} rows${c.rows.length > 0 ? ` — ${head}${c.rows.length > 5 ? ` +${c.rows.length - 5} more` : ""}` : ""}`;
      }
      // fallback
      return `TOOL_RESULT ${JSON.stringify(c).slice(0, 250)}`;
    }
    case "critique":
      return `CRITIQUE verdict=${c.verdict} notes: ${String(c.notes ?? "").slice(0, 200)}`;
    case "propose_done":
      return `PROPOSE_DONE fills=[${(c.fills ?? []).map((f: Fill) => `${f.candidate_id}:${f.name}`).join(", ")}] rationale: ${String(c.rationale ?? "").slice(0, 120)}`;
    case "consensus_done":
      return `CONSENSUS ✓`;
    case "error":
      return `ERROR ${c.message ?? JSON.stringify(c)}`;
  }
  return JSON.stringify(c).slice(0, 200);
 }
 function renderLogForPrompt(log: LogEntry[]): string {
  if (log.length === 0) return "(no turns yet)";
  return log.slice(-12).map(e =>
    `[t${e.turn} ${e.role}] ${summarizeEntry(e)}`
  ).join("\n");
 }
 // Crawl the log for every hybrid_search tool_result and collect the
 // worker names + ids seen so far. LLMs routinely "forget" earlier turns
 // once the conversation grows, so we surface a running ledger in the
 // prompt as orchestrator-maintained state. The executor doesn't have to
 // track this itself — it just reads it.
 function candidatesSeen(log: LogEntry[]): Array<{ doc_id: string; name: string; city: string; state: string }> {
  const seen = new Map<string, { doc_id: string; name: string; city: string; state: string }>();
  for (const e of log) {
    if (e.kind !== "tool_result") continue;
    const sources = (e.content as any)?.sources;
    if (!Array.isArray(sources)) continue;
    for (const s of sources) {
      // chunk_text shape "Name — Role in City, ST. …"
      const t = String(s.chunk_text ?? "");
      const [namePart, rest] = t.split("—", 2);
      if (!namePart || !rest) continue;
      const loc = rest.split(" in ")[1] ?? "";
      const [city, stateRaw] = loc.split(",", 2);
      const state = (stateRaw ?? "").trim().replace(/[^A-Za-z].*/, "");
      if (!s.doc_id || !namePart.trim() || !city?.trim() || !state) continue;
      if (!seen.has(s.doc_id)) {
        seen.set(s.doc_id, {
          doc_id: s.doc_id,
          name: namePart.trim(),
          city: city.trim(),
          state,
        });
      }
    }
  }
  return Array.from(seen.values());
 }
 export function executorPrompt(task: TaskSpec, log: LogEntry[]): string {
  const logStr = renderLogForPrompt(log);
  const seen = candidatesSeen(log);
  const seenBlock = seen.length === 0
    ? "(no candidates surfaced yet — start with hybrid_search)"
    : seen.map(s => `  - ${s.doc_id} ${s.name} (${s.city}, ${s.state})`).join("\n");
  return `You are the EXECUTOR agent. Your job is to complete this task:
 OPERATION: ${task.operation}
 TARGET: ${task.target_count} × ${task.target_role} in ${task.target_city}, ${task.target_state}
 ${task.approach_hint ? `HINT: ${task.approach_hint}` : ""}
 The REVIEWER agent is watching every turn. They will flag drift. Stay on target.
 ${TOOL_CATALOG}
 CANDIDATES SURFACED SO FAR (orchestrator-tracked, do not forget these):
 ${seenBlock}
 SHARED LOG (recent turns):
 ${logStr}
 Your next action MUST be a JSON object matching one of these shapes:
 {"kind":"plan","steps":["short step 1","short step 2",...]}
  — use on turn 1 to outline your approach. Steps must be concrete.
 {"kind":"tool_call","tool":"...","args":{...},"rationale":"why"}
  — call a tool and see its result next turn.
 {"kind":"propose_done","fills":[{"candidate_id":"...","name":"First Last","reason":"why them"}],"rationale":"..."}
  — propose you've met the target. fills MUST have EXACTLY ${task.target_count} entries — count twice before emitting.
 Strategy tip: once "CANDIDATES SURFACED SO FAR" has ≥ ${task.target_count} entries in ${task.target_city}, ${task.target_state} matching ${task.target_role}, verify ONE via the sql tool (to satisfy the reviewer's SQL-verification criterion) and then propose_done with the top ${task.target_count}. Don't keep re-searching.
 Respond with ONLY the JSON object. No markdown fences, no prose.`;
 }
 export function reviewerPrompt(task: TaskSpec, log: LogEntry[]): string {
  const logStr = renderLogForPrompt(log);
  // If the most recent executor action was propose_done, the reviewer
  // must commit to an up-or-down vote this turn — "continue" would stall
  // the orchestrator forever. The wider prompt still describes all three
  // verdicts, but we add a hard rule at the end that the model must obey.
  const lastExec = [...log].reverse().find(e => e.role === "executor");
  const awaitingApproval = lastExec?.kind === "propose_done";
  return `You are the REVIEWER agent. The EXECUTOR is trying to complete this task:
 OPERATION: ${task.operation}
 TARGET: ${task.target_count} × ${task.target_role} in ${task.target_city}, ${task.target_state}
 Your job: catch drift. Agents often wander from the actual objective. Specifically watch for:
 - Proposing candidates who aren't in ${task.target_city}, ${task.target_state}.
 - Proposing candidates who don't have ${task.target_role} skill.
 - Proposing fewer or more than ${task.target_count} fills.
 - Irrelevant tool calls (e.g. revenue_by_client when the task is a fill).
 Available tools (for reference, but YOU don't call them):
 - hybrid_search(sql_filter, question, index_name, k) — production fill path
 - sql(query) — read-only SELECT for verification
 SHARED LOG (recent turns):
 ${logStr}
 Your next action MUST be a JSON object:
 {"kind":"critique","verdict":"continue" | "drift" | "approve_done","notes":"..."}
 - "continue" → executor is on a reasonable path, let them keep going.
 - "drift" → executor is off-track; notes MUST tell them how to redirect.
 - "approve_done" → executor's propose_done meets the criteria. Seal it.
 APPROVAL CRITERIA (use these only for propose_done):
 1. Exactly ${task.target_count} fills.
 2. Each fill's name appears in a prior tool_result from ${task.target_city}, ${task.target_state} matching role "${task.target_role}".
 3. Executor has SQL-verified at least one of the fills (any prior sql tool_result with that worker).
 If 1–3 all hold, return approve_done. Do not demand further verification.
 ${awaitingApproval ? `
 HARD RULE: The executor's most recent action was propose_done. On this turn you CANNOT return "continue" — it would stall the task. Choose approve_done (proposal is valid by the 3 criteria above) or drift (it fails one; state which in notes).` : ""}
 Respond with ONLY the JSON object.`;
 }
 // Parse an agent's response into an Action, or throw.
 export function parseAction(raw: string, role: Role): Action {
  // Models sometimes wrap JSON in ```json fences; strip them.
  let s = raw.trim();
  if (s.startsWith("```")) {
    s = s.replace(/^```(?:json)?\n?/, "").replace(/```$/, "").trim();
  }
  // Find the first {...} block.
  const start = s.indexOf("{");
  const end = s.lastIndexOf("}");
  if (start < 0 || end <= start) {
    throw new Error(`no JSON object in ${role} response: ${raw.slice(0, 300)}`);
  }
  const json = s.slice(start, end + 1);
  let obj: any;
  try {
    obj = JSON.parse(json);
  } catch (e) {
    throw new Error(`invalid JSON from ${role}: ${(e as Error).message} | raw: ${json.slice(0, 300)}`);
  }
  if (role === "executor") {
    if (obj.kind === "plan" && Array.isArray(obj.steps)) return obj as Action;
    if (obj.kind === "tool_call" && typeof obj.tool === "string" && typeof obj.args === "object") return obj as Action;
    if (obj.kind === "propose_done" && Array.isArray(obj.fills)) return obj as Action;
    throw new Error(`executor returned unexpected shape: ${JSON.stringify(obj).slice(0, 200)}`);
  } else {
    // Normalize: some models (qwen2.5, mistral) emit the verdict AS the
    // `kind` field directly instead of nesting it under a "critique"
    // wrapper. Accept both shapes rather than hard-failing — the
    // semantic content is identical, and rejecting would stall the
    // orchestrator on a cosmetic schema miss.
    if (obj.kind === "critique" && ["continue", "drift", "approve_done"].includes(obj.verdict)) {
      return obj as Action;
    }
    if (["continue", "drift", "approve_done"].includes(obj.kind)) {
      return { kind: "critique", verdict: obj.kind, notes: obj.notes ?? "" } as Action;
    }
    throw new Error(`reviewer returned unexpected shape: ${JSON.stringify(obj).slice(0, 200)}`);
  }
 }
--- a/tests/multi-agent/chain_of_custody.ts
+++ b/tests/multi-agent/chain_of_custody.ts
@ -0,0 +1,335 @@
 // Chain-of-custody trace test.
 //
 // J's framing: "we have enough synthetic data, we've run enough AI responses
 // saved to the database. Test true quality. Don't ignore chain of custody.
 // Use real applications. Understand each aspect of the flow — not just
 // 'write a file or directory and open it'."
 //
 // One real recruiter operation, traced end-to-end through EVERY layer of the
 // live substrate. Every layer must record the operation correctly. Any layer
 // that drops it = chain-of-custody break = surfaced as a real bug.
 //
 // Layers verified:
 //   L0  Bun /search           — recruiter app surface (NOT bare /vectors/hybrid)
 //   L1  /vectors/hybrid       — direct gateway (parity check vs L0)
 //   L2  /vectors/playbook_memory/stats  — feedback loop count
 //   L3  Bun /log              — recruiter records the pick
 //   L4  successful_playbooks  — SQL-queryable table of past fills
 //   L5  /vectors/playbook_memory/stats  — count grew
 //   L6  tools/audit           — Phase 12 governance trail
 //   L7  /access/audit         — Phase 13 access trail
 //   L8  /journal/recent       — Phase 9 mutation events
 //   L9  /storage/errors       — Federation error journal (no new errors)
 //   L10 /vectors/profile/{id}/activate — Phase 17 hot-swap
 //   L11 Bun /search again     — boost lifts the just-logged worker
 //   L12 verifier qwen2.5      — reads cross-layer state, judges integrity
 //
 // Run:  bun run tests/multi-agent/chain_of_custody.ts
 //
 // Prints per-layer BEFORE/AFTER/DELTA. Exit non-zero on any chain break.
 import { generate, GATEWAY } from "./agent.ts";
 const BUN = "http://localhost:3700";
 const PROFILE_ID = "staffing-recruiter";
 // The trace operation — small, deterministic, real city/role with supply.
 // Helen Sanchez (worker_id 4661) is a known Toledo Welder; we record her
 // as the manual pick the recruiter would make from the /search results.
 const OPERATION = "fill: Welder x1 in Toledo, OH";
 const OP_ROLE = "Welder";
 const OP_CITY = "Toledo";
 const OP_STATE = "OH";
 const PICKED_WORKER = "Helen Sanchez";  // verified earlier to be a Toledo OH Welder
 // ─────────────────────── helpers ───────────────────────
 async function getJSON<T = any>(url: string): Promise<T | null> {
  try {
    const r = await fetch(url);
    if (!r.ok) return null;
    return r.json() as Promise<T>;
  } catch { return null; }
 }
 async function postJSON<T = any>(url: string, body: any): Promise<T | null> {
  try {
    const r = await fetch(url, { method: "POST", headers: { "Content-Type": "application/json" }, body: JSON.stringify(body) });
    if (!r.ok) return { _error: `${r.status}: ${await r.text()}` } as any;
    return r.json() as Promise<T>;
  } catch (e) { return { _error: (e as Error).message } as any; }
 }
 async function sql(query: string): Promise<{ rows?: any[]; error?: string } | null> {
  return postJSON(`${GATEWAY}/query/sql`, { sql: query });
 }
 interface Snapshot {
  pm_entries: number;
  pm_names: number;
  sp_rows: number;                     // successful_playbooks SQL row count
  audit_count: number;                 // tools/audit count
  access_count: number;                // access/audit count
  journal_count: number;               // journal/stats events
  storage_errors: number;              // bucket error journal
 }
 async function snapshot(): Promise<Snapshot> {
  const pm = await getJSON<any>(`${GATEWAY}/vectors/playbook_memory/stats`);
  // successful_playbooks_live is the live SQL surface populated by /log
  // via /vectors/playbook_memory/persist_sql. The original
  // successful_playbooks table is now legacy/historical (no writes).
  const sp = await sql(`SELECT COUNT(*) AS c FROM successful_playbooks_live`);
  const audit = await getJSON<any[]>(`${GATEWAY}/tools/audit`);
  const access = await getJSON<any>(`${GATEWAY}/access/audit`);
  const journalStats = await getJSON<any>(`${GATEWAY}/journal/stats`);
  const storageErrors = await getJSON<any[]>(`${GATEWAY}/storage/errors`);
  return {
    pm_entries: pm?.entries ?? -1,
    pm_names: pm?.total_names_endorsed ?? -1,
    sp_rows: Number(sp?.rows?.[0]?.c ?? -1),
    audit_count: Array.isArray(audit) ? audit.length : (audit as any)?.events?.length ?? -1,
    access_count: Array.isArray(access) ? access.length : (access as any)?.events?.length ?? (access as any)?.audit?.length ?? -1,
    journal_count: journalStats?.event_count ?? journalStats?.total_events ?? journalStats?.events ?? -1,
    storage_errors: Array.isArray(storageErrors) ? storageErrors.length : (storageErrors as any)?.events?.length ?? 0,
  };
 }
 function delta(b: Snapshot, a: Snapshot): Record<string, number> {
  return {
    pm_entries: a.pm_entries - b.pm_entries,
    pm_names: a.pm_names - b.pm_names,
    sp_rows: a.sp_rows - b.sp_rows,
    audit_count: a.audit_count - b.audit_count,
    access_count: a.access_count - b.access_count,
    journal_count: a.journal_count - b.journal_count,
    storage_errors: a.storage_errors - b.storage_errors,
  };
 }
 function fmtRow(label: string, b: number, a: number): string {
  const d = a - b;
  const dStr = d === 0 ? "  ·  " : d > 0 ? `  +${d}` : `  ${d}`;
  return `  ${label.padEnd(28)}  ${String(b).padStart(6)}   →   ${String(a).padStart(6)}  ${dStr}`;
 }
 // ─────────────────────── trace ───────────────────────
 interface TraceResult {
  layer: string;
  ok: boolean;
  detail: string;
 }
 async function runTrace(): Promise<TraceResult[]> {
  const out: TraceResult[] = [];
  const note = (layer: string, ok: boolean, detail: string) => {
    out.push({ layer, ok, detail });
    console.log(`  ${ok ? "✓" : "✗"} ${layer.padEnd(32)} ${detail}`);
  };
  console.log(`\n▶ Trace operation: ${OPERATION} → pick=${PICKED_WORKER}\n`);
  // ── BEFORE snapshot ──
  console.log(`▶ Before-snapshot:`);
  const before = await snapshot();
  console.log(`  pm_entries=${before.pm_entries} pm_names=${before.pm_names} sp_rows=${before.sp_rows} `
    + `audit=${before.audit_count} access=${before.access_count} journal=${before.journal_count} `
    + `storage_errors=${before.storage_errors}\n`);
  // ── L0: Bun /search ──
  console.log(`▶ L0 — Bun /search (recruiter app surface)`);
  const sql_filter = `role = '${OP_ROLE}' AND state = '${OP_STATE}' AND city = '${OP_CITY}'`;
  const bunSearch = await postJSON<any>(`${BUN}/search`, {
    question: `Welder in ${OP_CITY}, ${OP_STATE}`,
    sql_filter, top_k: 5, generate: false,
    id_column: "worker_id", dataset: "workers_500k", use_playbook_memory: true,
  });
  if (bunSearch?._error) {
    note("L0 Bun /search", false, `error: ${bunSearch._error}`);
  } else {
    const sources = bunSearch?.sources ?? [];
    const boostedHits = sources.filter((s: any) => (s.playbook_boost ?? 0) > 0).length;
    note("L0 Bun /search", true, `sources=${sources.length} boosted=${boostedHits} sql_matches=${bunSearch?.sql_matches}`);
  }
  // ── L1: direct /vectors/hybrid (parity check) ──
  console.log(`\n▶ L1 — Direct /vectors/hybrid (parity check vs Bun)`);
  const directSearch = await postJSON<any>(`${GATEWAY}/vectors/hybrid`, {
    index_name: "workers_500k_v1", filter_dataset: "workers_500k", id_column: "worker_id",
    sql_filter, question: `Welder in ${OP_CITY}, ${OP_STATE}`,
    top_k: 5, generate: false, use_playbook_memory: true, playbook_memory_k: 15,
  });
  const directBoosted = (directSearch?.sources ?? []).filter((s: any) => (s.playbook_boost ?? 0) > 0).length;
  note("L1 Direct /vectors/hybrid", true, `boosted=${directBoosted} sql=${directSearch?.sql_matches}`);
  const bunBoosted = (bunSearch?.sources ?? []).filter((s: any) => (s.playbook_boost ?? 0) > 0).length;
  if (bunBoosted < directBoosted) {
    note("CHAIN BREAK: Bun↔Direct parity", false,
      `Bun=${bunBoosted} boosted vs Direct=${directBoosted}. Bun /search likely missing playbook_memory_k forward.`);
  }
  // ── L3: Bun /log (recruiter records the pick) ──
  console.log(`\n▶ L3 — Bun /log (recruiter records the pick)`);
  const logged = await postJSON<any>(`${BUN}/log`, {
    operation: OPERATION,
    approach: "chain-of-custody trace",
    result: `1/1 filled → ${PICKED_WORKER}`,
    context: `client=COC-${Date.now()} start=08:00 scenario=trace`,
  });
  if (logged?._error) note("L3 Bun /log", false, `error: ${logged._error}`);
  else note("L3 Bun /log", true, `logged=${logged?.logged} seeded=${logged?.seeded}`);
  // The /log response carries the result of the underlying /ingest/file too.
  // If "response" mentions "different schema" or "error", the SQL-queryable
  // path is broken even though seed succeeded. That's a chain break.
  const logResp = String((logged as any)?.response ?? "");
  if (logResp.includes("error") || logResp.includes("different schema") || logResp.includes("Error")) {
    note("CHAIN BREAK: Bun /log → SQL ingest", false,
      `successful_playbooks ingest failed. Bun returned logged=true but /log's underlying ingest reported: ${logResp.slice(0, 150)}`);
  } else {
    note("L3a /log → /ingest/file", true, "ingest accepted");
  }
  // Give the system a beat for any async fan-out (audit/journal/etc).
  await new Promise(r => setTimeout(r, 500));
  // ── AFTER snapshot ──
  console.log(`\n▶ After-snapshot:`);
  const after = await snapshot();
  const d = delta(before, after);
  console.log(fmtRow("playbook_memory.entries",    before.pm_entries,    after.pm_entries));
  console.log(fmtRow("playbook_memory.names",      before.pm_names,      after.pm_names));
  console.log(fmtRow("successful_playbooks.rows",  before.sp_rows,       after.sp_rows));
  console.log(fmtRow("tools/audit.count",          before.audit_count,   after.audit_count));
  console.log(fmtRow("access/audit.count",         before.access_count,  after.access_count));
  console.log(fmtRow("journal.events",             before.journal_count, after.journal_count));
  console.log(fmtRow("storage/errors.count",       before.storage_errors,after.storage_errors));
  // ── L5: playbook_memory grew? ──
  if (d.pm_entries === 1) note("L5 playbook_memory growth", true, "+1 entry as expected");
  else note("L5 playbook_memory growth", d.pm_entries > 0,
    `delta=${d.pm_entries} (expected 1 — seed-after-log path)`);
  // ── L4: successful_playbooks SQL row appeared? ──
  if (d.sp_rows >= 1) note("L4 successful_playbooks SQL", true, `+${d.sp_rows} row(s)`);
  else note("L4 successful_playbooks SQL", false,
    `delta=${d.sp_rows} — Bun /log claims success but SQL table didn't grow. Recruiter querying via SQL would miss this fill.`);
  // ── L9: storage errors stayed quiet ──
  if (d.storage_errors === 0) note("L9 storage error journal", true, "no new bucket op errors");
  else note("L9 storage error journal", false, `+${d.storage_errors} new errors`);
  // ── L10: Phase 17 profile activation ──
  console.log(`\n▶ L10 — Activate profile ${PROFILE_ID}`);
  const act = await postJSON<any>(`${GATEWAY}/vectors/profile/${PROFILE_ID}/activate`, {});
  if (act?._error) note("L10 profile activation", false, `error: ${act._error}`);
  else note("L10 profile activation", true,
    `warmed=${(act?.warmed_indexes ?? []).length} duration_ms=${act?.duration_ms ?? "?"}`);
  // ── L11: Bun /search again — boost should now lift PICKED_WORKER ──
  console.log(`\n▶ L11 — Bun /search second time (boost lift verification)`);
  const search2 = await postJSON<any>(`${BUN}/search`, {
    question: `Welder in ${OP_CITY}, ${OP_STATE}`,
    sql_filter, top_k: 10, generate: false,
    id_column: "worker_id", dataset: "workers_500k", use_playbook_memory: true,
  });
  const sources2 = search2?.sources ?? [];
  const pickedHit = sources2.find((s: any) => String(s.chunk_text ?? "").includes(PICKED_WORKER));
  if (!pickedHit) {
    note("L11 boost lifts logged pick (Bun)", false,
      `${PICKED_WORKER} not in top-10 via Bun /search. Could be Bun-not-forwarding-playbook_memory_k bug from L1.`);
  } else if ((pickedHit.playbook_boost ?? 0) > 0) {
    note("L11 boost lifts logged pick (Bun)", true,
      `${PICKED_WORKER} boost=+${(pickedHit.playbook_boost as number).toFixed(3)} cites=${(pickedHit.playbook_citations ?? []).length}`);
  } else {
    note("L11 boost lifts logged pick (Bun)", false,
      `${PICKED_WORKER} present but boost=0 — playbook_memory_k forward bug likely`);
  }
  // Same probe via direct gateway to isolate Bun vs gateway
  const direct2 = await postJSON<any>(`${GATEWAY}/vectors/hybrid`, {
    index_name: "workers_500k_v1", filter_dataset: "workers_500k", id_column: "worker_id",
    sql_filter, question: `Welder in ${OP_CITY}, ${OP_STATE}`,
    top_k: 10, generate: false, use_playbook_memory: true, playbook_memory_k: 15,
  });
  const sources2d = direct2?.sources ?? [];
  const pickedHitD = sources2d.find((s: any) => String(s.chunk_text ?? "").includes(PICKED_WORKER));
  if (pickedHitD && (pickedHitD.playbook_boost ?? 0) > 0) {
    note("L11b boost via direct gateway", true,
      `${PICKED_WORKER} boost=+${(pickedHitD.playbook_boost as number).toFixed(3)} cites=${(pickedHitD.playbook_citations ?? []).length}`);
  } else {
    note("L11b boost via direct gateway", false, `direct call also did not boost ${PICKED_WORKER}`);
  }
  return out;
 }
 // ─────────────────────── verifier (fresh agent) ───────────────────────
 async function verifierJudgment(trace: TraceResult[]): Promise<{ verdict: string; confidence: number }> {
  const summary = trace.map(t => `  ${t.ok ? "ok" : "FAIL"}  ${t.layer}: ${t.detail}`).join("\n");
  const prompt = `You are the CHAIN-OF-CUSTODY VERIFIER agent. A real recruiter operation was just
 traced through every layer of the staffing substrate. Read the per-layer results and judge
 whether the system kept chain of custody intact (every layer recorded the operation as
 expected) or where it broke.
 Per-layer trace:
 ${summary}
 Reply with ONE JSON object only:
 {"verdict": "<one tight sentence — what's the integrity status>", "confidence": 0-100}
 Be specific about which layer broke if any. confidence is how sure you are about the verdict.`;
  try {
    const raw = await generate("qwen2.5:latest", prompt, { temperature: 0.1, max_tokens: 200 });
    const start = raw.indexOf("{"), end = raw.lastIndexOf("}");
    if (start < 0 || end <= start) return { verdict: "verifier could not produce JSON", confidence: 0 };
    const j = JSON.parse(raw.slice(start, end + 1));
    return { verdict: j.verdict ?? "no verdict", confidence: Number(j.confidence) || 0 };
  } catch (e) {
    return { verdict: `verifier error: ${(e as Error).message}`, confidence: 0 };
  }
 }
 // ─────────────────────── main ───────────────────────
 async function main() {
  console.log(`▶ Chain-of-custody trace — single real recruiter operation through every layer`);
  const trace = await runTrace();
  console.log(`\n▶ L12 — Verifier (fresh qwen2.5 agent reads the cross-layer trace)`);
  const v = await verifierJudgment(trace);
  console.log(`  verdict (${v.confidence}%): ${v.verdict}`);
  // Hard gate: any explicit CHAIN BREAK note = fail
  const breaks = trace.filter(t => !t.ok && t.layer.startsWith("CHAIN BREAK"));
  const fails = trace.filter(t => !t.ok);
  console.log(`\n▶ Summary:`);
  console.log(`  passing layers: ${trace.filter(t => t.ok).length}/${trace.length}`);
  console.log(`  chain breaks:   ${breaks.length}`);
  console.log(`  total failures: ${fails.length}`);
  console.log(`  verifier confidence: ${v.confidence}%`);
  if (breaks.length > 0) {
    console.log(`\n✗ Chain of custody BROKEN at ${breaks.length} layer(s):`);
    for (const b of breaks) console.log(`    - ${b.layer}: ${b.detail}`);
    process.exit(1);
  }
  if (fails.length > 0) {
    console.log(`\n◑ Trace completed with ${fails.length} non-blocking failures (no formal chain break)`);
    process.exit(0);
  }
  console.log(`\n✓ Chain of custody intact across all layers`);
  process.exit(0);
 }
 main().catch(e => {
  console.error(`\n✗ ${(e as Error).message}`);
  if ((e as any).stack) console.error((e as any).stack);
  process.exit(1);
 });
--- a/tests/multi-agent/network_proving.ts
+++ b/tests/multi-agent/network_proving.ts
@ -0,0 +1,469 @@
 // Network proving: continuous build → verify → repeat with hot-swap profile.
 //
 // J's framing: "have them guide each other, when the test is complete we have
 // a successful playbook, then spin up another agent that tests the viability
 // of our network with the playbook and the hot-swap profile. Keep spinning up
 // agents and testing — pass theory, real-world execution, not isolated unit
 // tests."
 //
 // Each round = TWO phases:
 //
 //   1. BUILD phase. Two agents (mistral executor + qwen2.5 reviewer) work
 //      on a real staffing fill task. They guide each other via the critique
 //      loop. On consensus → seal a playbook with CANONICAL short seed text
 //      (the Pass 1 lesson — verbose seeds silently kill boost). Real Ollama,
 //      real workers_500k, real /vectors/hybrid path.
 //
 //   2. VERIFY phase. A FRESH qwen2.5 agent spins up, activates the
 //      staffing-recruiter profile (Phase 17 hot-swap), runs a probe query
 //      against the same network, and judges from the live response whether
 //      prior rounds' playbooks actually surface relevant workers higher.
 //      The verifier writes a verdict: did the network learn?
 //
 // Three rounds, progressively harder:
 //   R0: Welder x2 in Toledo, OH                — baseline
 //   R1: Welder x2 in Cleveland, OH             — same role, different city
 //                                                  → tests geo discrimination
 //                                                  (Toledo workers MUST NOT
 //                                                  bleed into Cleveland boost)
 //   R2: Welder x3 in Toledo, OH                — re-fill same city, bigger
 //                                                  count → tests compounding
 //                                                  (R0's endorsements should
 //                                                  still rank up here)
 //
 // Run: bun run tests/multi-agent/network_proving.ts
 //
 // Fail-fast: any HTTP error or model crash bubbles to top-level, exits 1.
 import {
  type LogEntry,
  type TaskSpec,
  type Action,
  type Fill,
  GATEWAY,
  generate,
  parseAction,
  executorPrompt,
  reviewerPrompt,
  sqlQuery,
  callTool,
 } from "./agent.ts";
 const EXECUTOR_MODEL = "mistral:latest";
 const REVIEWER_MODEL = "qwen2.5:latest";
 const VERIFIER_MODEL = "qwen2.5:latest";
 const PROFILE_ID = "staffing-recruiter";
 const INDEX_NAME = "workers_500k_v1";
 const MAX_TURNS = 12;
 const MAX_TOOL_ERRORS = 3;
 const MAX_DRIFTS = 3;
 const TASK_DECK: TaskSpec[] = [
  {
    id: "R0", operation: "fill: Welder x2 in Toledo, OH",
    target_role: "Welder", target_count: 2, target_city: "Toledo", target_state: "OH",
    approach_hint: "hybrid_search workers_500k_v1 with sql_filter role+state+city, then sql verify",
  },
  {
    id: "R1", operation: "fill: Welder x2 in Cleveland, OH",
    target_role: "Welder", target_count: 2, target_city: "Cleveland", target_state: "OH",
    approach_hint: "hybrid_search workers_500k_v1 with sql_filter role+state+city, then sql verify",
  },
  {
    id: "R2", operation: "fill: Welder x3 in Toledo, OH",
    target_role: "Welder", target_count: 3, target_city: "Toledo", target_state: "OH",
    approach_hint: "hybrid_search workers_500k_v1 with sql_filter role+state+city, then sql verify",
  },
 ];
 interface BuildResult {
  ok: boolean;
  task: TaskSpec;
  fills: Fill[];
  turns: number;
  duration_secs: number;
  playbook_id?: string;
  entries_after_seed?: number;
  error?: string;
 }
 interface VerifyResult {
  profile_activated: boolean;
  warmed_indexes: number;
  probe_boost_total: number;          // sum of playbook_boost across top-K
  probe_boosted_hits: number;         // how many hits had boost > 0
  probe_top_citations: string[];      // playbook_ids cited
  geo_discrimination_ok: boolean;     // when prior playbook is in different city, boost should NOT bleed
  verdict: string;                    // qwen2.5's natural-language judgment
  confidence: number;                 // 0-100 self-rated
  duration_secs: number;
 }
 interface RoundLedger {
  round: number;
  task: TaskSpec;
  build: BuildResult;
  verify: VerifyResult;
  score: number;                      // /10 per round
  notes: string[];
 }
 // ─────────────────────── BUILD phase (two-agent loop) ───────────────────────
 async function executeToolCall(name: string, args: Record<string, any>): Promise<any> {
  if (name === "hybrid_search") {
    const { sql_filter, question, index_name, k } = args;
    if (!sql_filter || !question || !index_name) {
      throw new Error(`hybrid_search needs sql_filter+question+index_name, got keys=${Object.keys(args).join(",")}`);
    }
    const r = await fetch(`${GATEWAY}/vectors/hybrid`, {
      method: "POST", headers: { "Content-Type": "application/json" },
      body: JSON.stringify({
        sql_filter, question, index_name,
        filter_dataset: "workers_500k", id_column: "worker_id",
        top_k: k ?? 10, generate: false, use_playbook_memory: true,
      }),
    });
    if (!r.ok) throw new Error(`hybrid → ${r.status}: ${await r.text()}`);
    return r.json();
  }
  if (name === "sql") {
    if (!args.query) throw new Error("sql needs query");
    if (!/^\s*SELECT/i.test(args.query)) throw new Error("sql allows SELECT only");
    return sqlQuery(args.query);
  }
  return callTool(name, args);
 }
 function trim(r: any) {
  if (r && Array.isArray(r.rows)) return { ...r, rows: r.rows.slice(0, 20) };
  if (r && Array.isArray(r.sources)) return { ...r, sources: r.sources.slice(0, 12) };
  return r;
 }
 function fmtTurn(prefix: string, e: Omit<LogEntry, "at">): string {
  const c: any = e.content ?? {};
  const head = `[${prefix} t${e.turn.toString().padStart(2, "0")} ${e.role.padEnd(8)} ${e.kind.padEnd(14)}]`;
  if (e.kind === "tool_call") return `${head} ${c.tool}(${JSON.stringify(c.args ?? {}).slice(0, 70)})`;
  if (e.kind === "tool_result") {
    if (c.error) return `${head} error: ${c.error}`;
    if (Array.isArray(c.sources)) return `${head} hybrid sql=${c.sql_matches} reranked=${c.vector_reranked}`;
    if (Array.isArray(c.rows)) return `${head} sql ${c.rows.length} rows`;
    return `${head} ${JSON.stringify(c).slice(0, 70)}`;
  }
  if (e.kind === "critique") return `${head} verdict=${c.verdict} ${(c.notes ?? "").slice(0, 50)}`;
  if (e.kind === "propose_done") return `${head} ${(c.fills ?? []).length} fills: ${(c.fills ?? []).map((f: Fill) => f.name).join(", ")}`;
  if (e.kind === "consensus_done") return `${head} ✓`;
  if (e.kind === "plan") return `${head} ${(c.steps ?? []).length} steps`;
  return `${head} ${JSON.stringify(c).slice(0, 60)}`;
 }
 async function buildPhase(task: TaskSpec, prefix: string): Promise<BuildResult> {
  const t0 = Date.now();
  const log: LogEntry[] = [];
  let turn = 0, sealed: { fills: Fill[]; approach: string } | null = null;
  let toolErrors = 0, drifts = 0;
  const append = (e: Omit<LogEntry, "at">): LogEntry => {
    const full: LogEntry = { ...e, at: new Date().toISOString() };
    log.push(full);
    console.log(fmtTurn(prefix, e));
    return full;
  };
  try {
    while (turn < MAX_TURNS && !sealed) {
      turn += 1;
      const execRaw = await generate(EXECUTOR_MODEL, executorPrompt(task, log), { temperature: 0.2, max_tokens: 600 });
      const execAction = parseAction(execRaw, "executor");
      append({ turn, role: "executor", model: EXECUTOR_MODEL, kind: execAction.kind as any, content: execAction });
      if (execAction.kind === "tool_call") {
        try {
          const r = await executeToolCall(execAction.tool, execAction.args);
          append({ turn, role: "executor", model: EXECUTOR_MODEL, kind: "tool_result", content: trim(r) });
          toolErrors = 0;
        } catch (e) {
          append({ turn, role: "executor", model: EXECUTOR_MODEL, kind: "tool_result",
            content: { error: (e as Error).message, tool: execAction.tool, args: execAction.args } });
          toolErrors += 1;
          if (toolErrors >= MAX_TOOL_ERRORS) throw new Error(`${MAX_TOOL_ERRORS} consecutive tool errors`);
        }
      }
      const revRaw = await generate(REVIEWER_MODEL, reviewerPrompt(task, log), { temperature: 0.1, max_tokens: 400 });
      const revAction = parseAction(revRaw, "reviewer");
      append({ turn, role: "reviewer", model: REVIEWER_MODEL, kind: "critique", content: revAction });
      if (revAction.kind !== "critique") throw new Error(`reviewer non-critique`);
      if (revAction.verdict === "drift") {
        drifts += 1;
        if (drifts >= MAX_DRIFTS) throw new Error(`${MAX_DRIFTS} consecutive drifts`);
      } else drifts = 0;
      if (execAction.kind === "propose_done" && revAction.verdict === "approve_done") {
        if (execAction.fills.length !== task.target_count) {
          throw new Error(`fills=${execAction.fills.length} target=${task.target_count}`);
        }
        append({ turn, role: "reviewer", model: REVIEWER_MODEL, kind: "consensus_done", content: { fills: execAction.fills } });
        sealed = { fills: execAction.fills, approach: (execAction as any).rationale ?? "multi-agent" };
      }
    }
    if (!sealed) throw new Error(`no consensus after ${MAX_TURNS} turns`);
    // Phase 19 seed — CANONICAL short text (Pass 1 lesson). The verbose
    // executor rationale stays out of the embedding; we keep a separate
    // human-readable record in the playbook log.
    const canonicalApproach = `${task.target_role.toLowerCase()} fill via hybrid search`;
    const canonicalContext = `${task.target_role} fill in ${task.target_city}, ${task.target_state}`;
    let playbook_id: string | undefined;
    let entries_after_seed: number | undefined;
    try {
      const sr = await fetch(`${GATEWAY}/vectors/playbook_memory/seed`, {
        method: "POST", headers: { "Content-Type": "application/json" },
        body: JSON.stringify({
          operation: task.operation,
          approach: canonicalApproach,
          context: canonicalContext,
          endorsed_names: sealed.fills.map(f => f.name),
          append: true,
        }),
      });
      if (sr.ok) {
        const j = await sr.json() as any;
        playbook_id = j.playbook_id;
        entries_after_seed = j.entries_after;
        console.log(`[${prefix}] ↳ seeded id=${playbook_id} entries=${entries_after_seed}`);
      } else {
        console.warn(`[${prefix}] ↳ seed failed: ${sr.status} ${await sr.text()}`);
      }
    } catch (e) {
      console.warn(`[${prefix}] ↳ seed errored: ${(e as Error).message}`);
    }
    return {
      ok: true, task, fills: sealed.fills, turns: turn,
      duration_secs: Math.round((Date.now() - t0) / 1000),
      playbook_id, entries_after_seed,
    };
  } catch (e) {
    return {
      ok: false, task, fills: [], turns: turn,
      duration_secs: Math.round((Date.now() - t0) / 1000),
      error: (e as Error).message,
    };
  }
 }
 // ─────────────────────── VERIFY phase (fresh single agent) ───────────────────────
 async function activateProfile(): Promise<{ ok: boolean; warmed: number; ms: number }> {
  const t0 = Date.now();
  const r = await fetch(`${GATEWAY}/vectors/profile/${PROFILE_ID}/activate`, { method: "POST" });
  const ms = Date.now() - t0;
  if (!r.ok) {
    console.warn(`profile activation failed: ${r.status} ${await r.text()}`);
    return { ok: false, warmed: 0, ms };
  }
  const j = await r.json() as any;
  return { ok: true, warmed: (j.warmed_indexes ?? []).length, ms };
 }
 async function probeWithBoost(task: TaskSpec) {
  const sql_filter = `role = '${task.target_role.replace(/'/g, "''")}' `
    + `AND state = '${task.target_state}' `
    + `AND city = '${task.target_city.replace(/'/g, "''")}'`;
  const r = await fetch(`${GATEWAY}/vectors/hybrid`, {
    method: "POST", headers: { "Content-Type": "application/json" },
    body: JSON.stringify({
      index_name: INDEX_NAME, filter_dataset: "workers_500k", id_column: "worker_id",
      sql_filter, question: `${task.target_role} in ${task.target_city}, ${task.target_state}`,
      top_k: 10, generate: false, use_playbook_memory: true, playbook_memory_k: 15,
    }),
  });
  if (!r.ok) throw new Error(`probe → ${r.status}: ${await r.text()}`);
  const j = (await r.json()) as any;
  const sources: any[] = j.sources ?? [];
  const boostedHits = sources.filter(s => (s.playbook_boost ?? 0) > 0).length;
  const totalBoost = sources.reduce((s, x) => s + (x.playbook_boost ?? 0), 0);
  const cites = Array.from(new Set(sources.flatMap(s => s.playbook_citations ?? []))).slice(0, 5);
  const topNames = sources.slice(0, 5).map(s => {
    const t = String(s.chunk_text ?? "");
    return t.split("—")[0]?.trim() ?? s.doc_id;
  });
  return { sources, boostedHits, totalBoost, cites, topNames };
 }
 // Verifier prompt — fresh agent, no shared log with the build pair. It
 // gets the round's task, the prior rounds' sealed playbooks, and the live
 // probe result, and renders a human-readable verdict with a confidence.
 function verifierPrompt(task: TaskSpec, priorPlaybooks: Array<{op: string; fills: string[]}>,
  probe: { boostedHits: number; totalBoost: number; cites: string[]; topNames: string[] }
 ): string {
  const priorBlock = priorPlaybooks.length === 0
    ? "(no prior playbooks — this is the first round)"
    : priorPlaybooks.map((p, i) => `  ${i+1}. ${p.op} → endorsed [${p.fills.join(", ")}]`).join("\n");
  return `You are the VERIFIER agent. A fresh round just sealed a playbook on a real staffing
 substrate. Your job: judge whether the system learned from prior rounds.
 CURRENT ROUND:
  task:    ${task.operation}
  in city: ${task.target_city}, ${task.target_state}
 PRIOR PLAYBOOKS (in playbook_memory):
 ${priorBlock}
 I activated the staffing-recruiter profile and ran a hybrid query for this exact task with
 use_playbook_memory=true. Live result from the substrate:
  - top-5 surfaced workers: ${probe.topNames.join(", ")}
  - hits with non-zero playbook_boost: ${probe.boostedHits} / 10
  - total boost across top-10: ${probe.totalBoost.toFixed(3)}
  - playbook citations: [${probe.cites.join(", ")}]
 JUDGE:
 1. If a prior playbook covered this same city + role, the boost should fire on the workers
   it endorsed (boostedHits > 0, citations non-empty).
 2. If no prior playbook covers this combo, boost should be ~0 — that means the system is
   correctly NOT bleeding endorsements across geos.
 3. Anything in between (e.g. some boost but for the wrong reason) is a partial pass.
 Respond with ONE JSON object only:
 {"learned": true|false, "verdict": "<one sentence>", "confidence": 0-100}
 learned=true means the network behaved as expected for this round (whether that's "boost fired
 because it should" or "boost stayed zero because it should"). learned=false means the system
 either failed to learn from a relevant prior playbook OR bled an irrelevant one. confidence is
 how sure you are.`;
 }
 async function verifyPhase(task: TaskSpec, ledger: RoundLedger[]): Promise<VerifyResult> {
  const t0 = Date.now();
  const act = await activateProfile();
  const probe = await probeWithBoost(task);
  // Decide what counts as geo-correct based on prior playbooks
  const priorMatchesThisGeo = ledger.some(r =>
    r.build.ok &&
    r.task.target_city === task.target_city &&
    r.task.target_state === task.target_state &&
    r.task.target_role === task.target_role
  );
  const priorOtherGeo = ledger.some(r =>
    r.build.ok &&
    r.task.target_role === task.target_role &&
    !(r.task.target_city === task.target_city && r.task.target_state === task.target_state)
  );
  let geo_discrimination_ok: boolean;
  if (priorMatchesThisGeo) {
    geo_discrimination_ok = probe.boostedHits > 0; // expected lift
  } else if (priorOtherGeo) {
    geo_discrimination_ok = probe.boostedHits === 0; // must NOT bleed
  } else {
    geo_discrimination_ok = true; // no signal expected either way
  }
  // Spin up the fresh verifier agent
  const priorPlaybooks = ledger.filter(r => r.build.ok).map(r => ({
    op: r.task.operation, fills: r.build.fills.map(f => f.name),
  }));
  let verdict = "verifier failed to respond"; let confidence = 0;
  try {
    const raw = await generate(VERIFIER_MODEL, verifierPrompt(task, priorPlaybooks, probe), {
      temperature: 0.1, max_tokens: 250,
    });
    const start = raw.indexOf("{"), end = raw.lastIndexOf("}");
    if (start >= 0 && end > start) {
      const j = JSON.parse(raw.slice(start, end + 1));
      verdict = j.verdict ?? verdict;
      confidence = Number(j.confidence) || 0;
    }
  } catch (e) {
    verdict = `verifier parse error: ${(e as Error).message}`;
  }
  return {
    profile_activated: act.ok,
    warmed_indexes: act.warmed,
    probe_boost_total: probe.totalBoost,
    probe_boosted_hits: probe.boostedHits,
    probe_top_citations: probe.cites,
    geo_discrimination_ok,
    verdict, confidence,
    duration_secs: Math.round((Date.now() - t0) / 1000),
  };
 }
 // ─────────────────────── round scoring ───────────────────────
 function scoreRound(r: RoundLedger): { score: number; notes: string[] } {
  const notes: string[] = [];
  let s = 0;
  if (r.build.ok) { s += 3; notes.push(`✓ build sealed (${r.build.fills.map(f => f.name).join(", ")})`); }
  else { notes.push(`✗ build failed: ${r.build.error}`); }
  if (r.build.playbook_id) { s += 1; notes.push(`✓ seeded id=${r.build.playbook_id}`); }
  if (r.verify.profile_activated) { s += 1; notes.push(`✓ profile activated (warmed=${r.verify.warmed_indexes})`); }
  if (r.verify.geo_discrimination_ok) { s += 3; notes.push(`✓ geo discrimination correct (boostedHits=${r.verify.probe_boosted_hits})`); }
  else { notes.push(`✗ geo discrimination failed (boostedHits=${r.verify.probe_boosted_hits})`); }
  if (r.verify.confidence >= 60) { s += 2; notes.push(`✓ verifier confident (${r.verify.confidence}%): ${r.verify.verdict}`); }
  else { notes.push(`◑ verifier confidence ${r.verify.confidence}%: ${r.verify.verdict}`); }
  return { score: s, notes };
 }
 // ─────────────────────── main loop ───────────────────────
 async function main() {
  console.log(`▶ Network proving — ${TASK_DECK.length} rounds, profile=${PROFILE_ID}`);
  console.log(`▶ build pair: ${EXECUTOR_MODEL} + ${REVIEWER_MODEL}; verifier: ${VERIFIER_MODEL}\n`);
  const ledger: RoundLedger[] = [];
  for (let i = 0; i < TASK_DECK.length; i++) {
    const task = TASK_DECK[i];
    console.log(`\n══════════ Round ${i} — ${task.operation} ══════════`);
    console.log(`\n[${task.id}] BUILD phase (two agents collaborating)`);
    const build = await buildPhase(task, task.id);
    console.log(`\n[${task.id}] VERIFY phase (fresh agent + hot-swap profile)`);
    const verify = await verifyPhase(task, ledger);
    console.log(`  profile=${verify.profile_activated ? "ok" : "fail"} warmed=${verify.warmed_indexes} `
      + `boosted=${verify.probe_boosted_hits}/10 totalBoost=${verify.probe_boost_total.toFixed(3)} `
      + `cites=${verify.probe_top_citations.length} confidence=${verify.confidence}%`);
    console.log(`  verdict: ${verify.verdict}`);
    const round: RoundLedger = { round: i, task, build, verify, score: 0, notes: [] };
    const sc = scoreRound(round);
    round.score = sc.score; round.notes = sc.notes;
    ledger.push(round);
    console.log(`\n  Round ${i} score: ${round.score}/10`);
    for (const n of round.notes) console.log(`    ${n}`);
  }
  console.log(`\n══════════ Network viability summary ══════════`);
  const total = ledger.reduce((s, r) => s + r.score, 0);
  const max = ledger.length * 10;
  const avg = total / ledger.length;
  for (const r of ledger) console.log(`  R${r.round} ${r.task.target_city.padEnd(10)} ${r.task.target_role.padEnd(20)} ${r.score}/10`);
  console.log(`\n  TOTAL: ${total}/${max}  AVG: ${avg.toFixed(1)}/10`);
  // Hard gate: at least 2/3 rounds must show the verifier is confident enough
  // AND build phase succeeded
  const passed = ledger.filter(r => r.build.ok && r.score >= 6).length;
  if (passed < Math.ceil(ledger.length * 2 / 3)) {
    throw new Error(`network proving gate failed — only ${passed}/${ledger.length} rounds passed (need ≥${Math.ceil(ledger.length * 2 / 3)})`);
  }
  console.log(`\n✓ Network proven over ${passed}/${ledger.length} rounds`);
  process.exit(0);
 }
 main().catch(e => {
  console.error(`\n✗ ${(e as Error).message}`);
  if ((e as any).stack) console.error((e as any).stack);
  process.exit(1);
 });
--- a/tests/multi-agent/orchestrator.ts
+++ b/tests/multi-agent/orchestrator.ts
@ -0,0 +1,302 @@
 // Two-agent orchestrator. Both agents run as concurrent async loops
 // coordinated through a shared in-memory log; one turn of executor then
 // one turn of reviewer, interleaved until consensus_done, drift-cycle
 // blown, or hard turn cap. On success writes a playbook JSON; on failure
 // exits non-zero with the full log for inspection.
 //
 // Fail-fast: every caught error is appended to the log AND rethrown, so
 // the orchestrator top-level catches, dumps, and exits with code 1. The
 // test harness reads the exit code to decide if the substrate is healthy.
 import {
  type LogEntry,
  type TaskSpec,
  type Action,
  type Fill,
  callTool,
  hybridSearch,
  sqlQuery,
  generate,
  parseAction,
  executorPrompt,
  reviewerPrompt,
  GATEWAY,
 } from "./agent.ts";
 import { mkdir, writeFile } from "node:fs/promises";
 import { join } from "node:path";
 const EXECUTOR_MODEL = "mistral:latest";
 const REVIEWER_MODEL = "qwen2.5:latest";
 const MAX_TURNS = 12;                 // executor turns; reviewer gets one per
 const MAX_CONSECUTIVE_DRIFTS = 3;     // drift-cycle blown → give up
 // Default task. Override via argv[2] if you want something else; see
 // `parseTaskFromArg`. Picked from the real-world staffing pattern but
 // not in the existing successful_playbooks list — this is a fresh fill.
 // Default task lifted from the production pattern in successful_playbooks.
 // Toledo, OH has 342 welders in workers_500k so supply is ample — the test
 // is about collaboration and drift correction, not needle-in-haystack.
 const DEFAULT_TASK: TaskSpec = {
  id: `task-${Date.now()}`,
  operation: "fill: Welder x2 in Toledo, OH",
  target_role: "Welder",
  target_count: 2,
  target_city: "Toledo",
  target_state: "OH",
  approach_hint: "hybrid search against workers_500k_v1, narrow by role+city+state+availability, rank semantically",
 };
 function parseTaskFromArg(): TaskSpec {
  const arg = process.argv[2];
  if (!arg) return DEFAULT_TASK;
  // Accept "role:Welder count:2 city:Columbus state:OH" style for ad-hoc
  // tasks without standing up a JSON file. Anything more complex, feed
  // it a JSON path.
  if (arg.endsWith(".json")) {
    return JSON.parse(require("node:fs").readFileSync(arg, "utf-8"));
  }
  const kv: Record<string, string> = {};
  for (const token of arg.split(/\s+/)) {
    const [k, ...v] = token.split(":");
    kv[k] = v.join(":");
  }
  return {
    id: `task-${Date.now()}`,
    operation: `fill: ${kv.role} x${kv.count} in ${kv.city}, ${kv.state}`,
    target_role: kv.role,
    target_count: Number(kv.count),
    target_city: kv.city,
    target_state: kv.state,
    approach_hint: kv.hint ?? "hybrid search",
  };
 }
 // Helper: pretty one-line print for each log entry so the human watching
 // stdout can follow without pulling the JSONL file.
 // Defensive one-line formatter. Models sometimes omit optional fields
 // (rationale, notes), so every access is guarded.
 function fmt(e: LogEntry): string {
  const tag = `[t${e.turn.toString().padStart(2, "0")} ${e.role.padEnd(8)} ${e.kind.padEnd(14)}]`;
  const c = e.content ?? {};
  const trim = (s: any, n: number) => String(s ?? "").slice(0, n);
  if (e.kind === "tool_call")
    return `${tag} ${c.tool}(${JSON.stringify(c.args ?? {}).slice(0, 80)}) — ${trim(c.rationale, 60)}`;
  if (e.kind === "tool_result") {
    const rows = c?.rows?.length ?? c?.sources?.length ?? undefined;
    return `${tag} ${rows !== undefined ? `rows=${rows}` : JSON.stringify(c).slice(0, 80)}`;
  }
  if (e.kind === "critique") return `${tag} verdict=${c.verdict} — ${trim(c.notes, 80)}`;
  if (e.kind === "propose_done")
    return `${tag} ${c.fills?.length ?? 0} fills: ${(c.fills ?? []).map((f: Fill) => f.name).join(", ")}`;
  if (e.kind === "consensus_done") return `${tag} ✓`;
  if (e.kind === "plan") return `${tag} ${c.steps?.length ?? 0} steps: ${(c.steps ?? []).slice(0, 2).join(" / ")}`;
  if (e.kind === "error") return `${tag} ${c.message ?? c}`;
  return `${tag} ${JSON.stringify(c).slice(0, 100)}`;
 }
 // Execute one tool call. The tool catalog in the prompt lists both the
 // registered Phase 12 tools AND a pseudo-tool "hybrid_search" for the
 // /vectors/hybrid endpoint — unify here so the executor doesn't need to
 // know which surface a capability lives on.
 async function executeToolCall(name: string, args: Record<string, any>): Promise<any> {
  if (name === "hybrid_search") {
    const { sql_filter, question, index_name, k } = args;
    if (!sql_filter || !question || !index_name) {
      throw new Error(`hybrid_search needs sql_filter + question + index_name, got ${JSON.stringify(args)}`);
    }
    // Pass through to /vectors/hybrid. id_column defaults to worker_id
    // server-side, which is what workers_500k uses.
    const body: any = { sql_filter, question, index_name, top_k: k ?? 10, generate: false };
    return (await (await fetch("http://localhost:3100/vectors/hybrid", {
      method: "POST",
      headers: { "Content-Type": "application/json" },
      body: JSON.stringify(body),
    })).json());
  }
  if (name === "sql") {
    const { query } = args;
    if (!query || typeof query !== "string") throw new Error(`sql needs query (string), got ${JSON.stringify(args)}`);
    if (!/^\s*SELECT/i.test(query)) throw new Error(`sql tool allows SELECT only: ${query}`);
    return sqlQuery(query);
  }
  // Fall through to Phase 12 registry for any other named tool.
  return callTool(name, args);
 }
 async function main() {
  const task = parseTaskFromArg();
  const log: LogEntry[] = [];
  let turn = 0;
  let consecutiveDrifts = 0;
  let sealed: { fills: Fill[]; approach: string } | null = null;
  const append = (e: Omit<LogEntry, "at">): LogEntry => {
    const full: LogEntry = { ...e, at: new Date().toISOString() };
    log.push(full);
    console.log(fmt(full));
    return full;
  };
  console.log(`▶ task: ${task.operation}`);
  console.log(`▶ executor=${EXECUTOR_MODEL}  reviewer=${REVIEWER_MODEL}`);
  console.log();
  try {
    while (turn < MAX_TURNS && !sealed) {
      turn += 1;
      // --- EXECUTOR TURN ---
      const execRaw = await generate(EXECUTOR_MODEL, executorPrompt(task, log), {
        temperature: 0.2,
        max_tokens: 600,
      });
      let execAction: Action;
      try {
        execAction = parseAction(execRaw, "executor");
      } catch (e) {
        append({ turn, role: "executor", model: EXECUTOR_MODEL, kind: "error",
          content: { message: (e as Error).message, raw: execRaw.slice(0, 400) } });
        throw e;
      }
      append({ turn, role: "executor", model: EXECUTOR_MODEL,
        kind: execAction.kind as any, content: execAction });
      // If tool_call, execute and feed result back into the log. Tool
      // validation / server errors come back as a tool_result with an
      // `error` field — the executor reads its own error on the next turn
      // and self-corrects (e.g. "oh, I forgot the `question` argument").
      // This is softer than hard-failing the orchestrator: the whole
      // point of two-agent collaboration is letting agents learn from
      // immediate feedback instead of crashing the run.
      if (execAction.kind === "tool_call") {
        try {
          const result = await executeToolCall(execAction.tool, execAction.args);
          const trimmed = trimResult(result);
          append({ turn, role: "executor", model: EXECUTOR_MODEL,
            kind: "tool_result", content: trimmed });
        } catch (e) {
          append({ turn, role: "executor", model: EXECUTOR_MODEL,
            kind: "tool_result",
            content: { error: (e as Error).message, tool: execAction.tool, args: execAction.args } });
          // Count as a soft drift — if the executor keeps throwing tool
          // errors, consecutiveDrifts still trips the abort.
          consecutiveDrifts += 1;
          if (consecutiveDrifts >= MAX_CONSECUTIVE_DRIFTS) {
            throw new Error(`aborting — ${MAX_CONSECUTIVE_DRIFTS} consecutive tool errors, executor can't self-correct`);
          }
        }
      }
      // --- REVIEWER TURN ---
      const revRaw = await generate(REVIEWER_MODEL, reviewerPrompt(task, log), {
        temperature: 0.1,
        max_tokens: 400,
      });
      let revAction: Action;
      try {
        revAction = parseAction(revRaw, "reviewer");
      } catch (e) {
        append({ turn, role: "reviewer", model: REVIEWER_MODEL, kind: "error",
          content: { message: (e as Error).message, raw: revRaw.slice(0, 400) } });
        throw e;
      }
      append({ turn, role: "reviewer", model: REVIEWER_MODEL,
        kind: "critique", content: revAction });
      if (revAction.kind !== "critique") throw new Error(`reviewer emitted non-critique: ${revAction.kind}`);
      if (revAction.verdict === "drift") {
        consecutiveDrifts += 1;
        if (consecutiveDrifts >= MAX_CONSECUTIVE_DRIFTS) {
          throw new Error(`aborting — ${MAX_CONSECUTIVE_DRIFTS} consecutive drift flags, executor can't self-correct`);
        }
      } else {
        consecutiveDrifts = 0;
      }
      // Consensus: executor proposed done AND reviewer approved.
      if (execAction.kind === "propose_done" && revAction.verdict === "approve_done") {
        if (execAction.fills.length !== task.target_count) {
          throw new Error(`consensus malformed — ${execAction.fills.length} fills vs target ${task.target_count}`);
        }
        append({ turn, role: "reviewer", model: REVIEWER_MODEL, kind: "consensus_done",
          content: { fills: execAction.fills } });
        sealed = { fills: execAction.fills, approach: execAction.rationale };
      }
    }
    if (!sealed) throw new Error(`no consensus after ${MAX_TURNS} turns — task incomplete`);
    // Write playbook entry matching the successful_playbooks schema.
    const playbook = {
      timestamp: new Date().toISOString(),
      operation: task.operation,
      approach: sealed.approach,
      result: `${sealed.fills.length}/${task.target_count} filled → ${sealed.fills.map(f => f.name).join(", ")}`,
      context: `executor=${EXECUTOR_MODEL} reviewer=${REVIEWER_MODEL} turns=${turn}`,
      task,
      fills: sealed.fills,
      log,
    };
    await mkdir("./tests/multi-agent/playbooks", { recursive: true });
    const path = join("./tests/multi-agent/playbooks", `${task.id}.json`);
    await writeFile(path, JSON.stringify(playbook, null, 2));
    console.log(`\n✓ playbook written: ${path}`);
    console.log(`  ${playbook.result}`);
    // Phase 19.5: write-through to playbook_memory. The sealed fills are
    // the endorsement; next semantically-similar query will surface them
    // higher. /seed bypasses the successful_playbooks ingest round-trip
    // — when that ingest path ships, this block should switch to append
    // + rebuild instead.
    try {
      // Seed context is what the embedding model actually sees alongside
      // the operation — so it has to carry task-semantic content (role,
      // city, scenario) rather than orchestrator bookkeeping. We stash
      // the bookkeeping in the full playbook JSON instead (see playbook
      // object above) where operators can grep it without it polluting
      // the ranking signal.
      const seedContext = task.approach_hint
        ?? `${task.target_role} fill in ${task.target_city}, ${task.target_state}`;
      const seedRes = await fetch(`${GATEWAY}/vectors/playbook_memory/seed`, {
        method: "POST",
        headers: { "Content-Type": "application/json" },
        body: JSON.stringify({
          operation: task.operation,
          approach: sealed.approach || "multi-agent → hybrid search",
          context: seedContext,
          endorsed_names: sealed.fills.map(f => f.name),
          append: true,
        }),
      });
      if (seedRes.ok) {
        const j = await seedRes.json() as any;
        console.log(`  ↳ playbook_memory seeded: id=${j.playbook_id} entries=${j.entries_after}`);
      } else {
        console.warn(`  ↳ playbook_memory seed failed: ${seedRes.status} ${await seedRes.text()}`);
      }
    } catch (e) {
      console.warn(`  ↳ playbook_memory seed errored: ${(e as Error).message}`);
    }
    process.exit(0);
  } catch (e) {
    console.error(`\n✗ ${(e as Error).message}`);
    // Still persist the log for inspection.
    await mkdir("./tests/multi-agent/playbooks", { recursive: true });
    const path = join("./tests/multi-agent/playbooks", `${task.id}-FAILED.json`);
    await writeFile(path, JSON.stringify({ task, error: (e as Error).message, log }, null, 2));
    console.error(`  log dumped: ${path}`);
    process.exit(1);
  }
 }
 function trimResult(r: any): any {
  if (r && Array.isArray(r.rows)) {
    return { ...r, rows: r.rows.slice(0, 20), _trimmed: r.rows.length > 20 ? `${r.rows.length - 20} more rows` : undefined };
  }
  return r;
 }
 main();
--- a/tests/multi-agent/run_e2e_rated.ts
+++ b/tests/multi-agent/run_e2e_rated.ts
@ -0,0 +1,400 @@
 // Two-agent x two-tasks parallel real-world test with per-playbook rating.
 //
 // Spawns two independent (executor, reviewer) pairs concurrently, each
 // driving a different staffing fill against the live substrate. After
 // each pair seals a playbook, verifies the fill against workers_500k,
 // confirms the seed reached playbook_memory, and re-runs the same query
 // with use_playbook_memory=true to prove the boost fires.
 //
 // Errors fail fast — any HTTP error, parse error, or rating failure is
 // rethrown so bun exits non-zero. Run with:
 //
 //   bun run tests/multi-agent/run_e2e_rated.ts
 //
 // VRAM note: both pairs call the same two Ollama models (mistral +
 // qwen2.5). Ollama queues at the model level, so "parallel" is concurrent
 // orchestration, not concurrent inference — the loops interleave on the
 // shared models. That's intentional: it stresses the same realistic
 // path two staffing coordinators would hit if they both opened the app
 // at 8am.
 import {
  type LogEntry,
  type TaskSpec,
  type Action,
  type Fill,
  GATEWAY,
  generate,
  parseAction,
  executorPrompt,
  reviewerPrompt,
  sqlQuery,
  callTool,
 } from "./agent.ts";
 const EXECUTOR_MODEL = "mistral:latest";
 const REVIEWER_MODEL = "qwen2.5:latest";
 const MAX_TURNS = 12;
 const MAX_CONSECUTIVE_DRIFTS = 3;
 const INDEX_NAME = "workers_500k_v1";
 interface RunResult {
  task: TaskSpec;
  ok: boolean;
  turns: number;
  duration_secs: number;
  fills: Fill[];
  log: LogEntry[];
  approach: string;
  error?: string;
 }
 // ────────────────────────── orchestrator (function form) ──────────────────────────
 async function runOrchestrator(task: TaskSpec, prefix: string): Promise<RunResult> {
  const start = Date.now();
  const log: LogEntry[] = [];
  let turn = 0;
  let consecutiveDrifts = 0;
  // Track tool errors separately from drift verdicts. Reviewer saying
  // "continue" or "approve_done" should NOT reset a streak of malformed
  // tool calls — that's a different failure mode (model can't form the
  // call) than "executor is on the wrong path" (model is off-topic).
  let consecutiveToolErrors = 0;
  let sealed: { fills: Fill[]; approach: string } | null = null;
  const append = (e: Omit<LogEntry, "at">): LogEntry => {
    const full: LogEntry = { ...e, at: new Date().toISOString() };
    log.push(full);
    console.log(`[${prefix}] [t${e.turn.toString().padStart(2, "0")} ${e.role.padEnd(8)} ${e.kind.padEnd(14)}] ${shortContent(e)}`);
    return full;
  };
  try {
    while (turn < MAX_TURNS && !sealed) {
      turn += 1;
      // Executor
      const execRaw = await generate(EXECUTOR_MODEL, executorPrompt(task, log), { temperature: 0.2, max_tokens: 600 });
      const execAction = parseAction(execRaw, "executor");
      append({ turn, role: "executor", model: EXECUTOR_MODEL, kind: execAction.kind as any, content: execAction });
      if (execAction.kind === "tool_call") {
        try {
          const result = await executeToolCall(execAction.tool, execAction.args);
          append({ turn, role: "executor", model: EXECUTOR_MODEL, kind: "tool_result", content: trimResult(result) });
          consecutiveToolErrors = 0;
        } catch (e) {
          append({ turn, role: "executor", model: EXECUTOR_MODEL, kind: "tool_result",
            content: { error: (e as Error).message, tool: execAction.tool, args: execAction.args } });
          consecutiveToolErrors += 1;
          if (consecutiveToolErrors >= MAX_CONSECUTIVE_DRIFTS) {
            throw new Error(`${MAX_CONSECUTIVE_DRIFTS} consecutive tool errors — executor can't form a valid call`);
          }
        }
      }
      // Reviewer
      const revRaw = await generate(REVIEWER_MODEL, reviewerPrompt(task, log), { temperature: 0.1, max_tokens: 400 });
      const revAction = parseAction(revRaw, "reviewer");
      append({ turn, role: "reviewer", model: REVIEWER_MODEL, kind: "critique", content: revAction });
      if (revAction.kind !== "critique") throw new Error(`reviewer non-critique: ${revAction.kind}`);
      if (revAction.verdict === "drift") {
        consecutiveDrifts += 1;
        if (consecutiveDrifts >= MAX_CONSECUTIVE_DRIFTS) throw new Error(`${MAX_CONSECUTIVE_DRIFTS} consecutive drifts`);
      } else consecutiveDrifts = 0;
      if (execAction.kind === "propose_done" && revAction.verdict === "approve_done") {
        if (execAction.fills.length !== task.target_count) {
          throw new Error(`fills=${execAction.fills.length} target=${task.target_count}`);
        }
        append({ turn, role: "reviewer", model: REVIEWER_MODEL, kind: "consensus_done", content: { fills: execAction.fills } });
        sealed = { fills: execAction.fills, approach: (execAction as any).rationale ?? "multi-agent → hybrid" };
      }
    }
    if (!sealed) throw new Error(`no consensus after ${MAX_TURNS} turns`);
    // Phase 19 write-through: seed playbook_memory so the next semantically
    // similar query benefits from this fill. Mirrors orchestrator.ts. Names
    // are the consensus fills' display names — that's what the boost-key
    // matcher (city, state, name) will look up against worker chunks.
    try {
      const seedRes = await fetch(`${GATEWAY}/vectors/playbook_memory/seed`, {
        method: "POST", headers: { "Content-Type": "application/json" },
        body: JSON.stringify({
          operation: task.operation,
          approach: sealed.approach || "multi-agent → hybrid search",
          context: task.approach_hint ?? `${task.target_role} fill in ${task.target_city}, ${task.target_state}`,
          endorsed_names: sealed.fills.map(f => f.name),
          append: true,
        }),
      });
      if (!seedRes.ok) {
        console.warn(`[${prefix}] seed warning: ${seedRes.status} ${await seedRes.text()}`);
      } else {
        const j = await seedRes.json() as any;
        console.log(`[${prefix}] ↳ seeded playbook_memory: id=${j.playbook_id} entries=${j.entries_after}`);
      }
    } catch (e) {
      console.warn(`[${prefix}] seed errored: ${(e as Error).message}`);
    }
    return {
      task, ok: true, turns: turn, fills: sealed.fills, approach: sealed.approach,
      duration_secs: Math.round((Date.now() - start) / 1000), log,
    };
  } catch (e) {
    return {
      task, ok: false, turns: turn, fills: [], approach: "",
      duration_secs: Math.round((Date.now() - start) / 1000), log,
      error: (e as Error).message,
    };
  }
 }
 async function executeToolCall(name: string, args: Record<string, any>): Promise<any> {
  if (name === "hybrid_search") {
    const { sql_filter, question, index_name, k } = args;
    if (!sql_filter || !question || !index_name) throw new Error(`hybrid_search needs sql_filter+question+index_name`);
    const r = await fetch(`${GATEWAY}/vectors/hybrid`, {
      method: "POST", headers: { "Content-Type": "application/json" },
      body: JSON.stringify({ sql_filter, question, index_name, top_k: k ?? 10, generate: false, use_playbook_memory: true }),
    });
    if (!r.ok) throw new Error(`hybrid_search → ${r.status}: ${await r.text()}`);
    return r.json();
  }
  if (name === "sql") {
    if (!args.query || typeof args.query !== "string") throw new Error("sql needs query");
    if (!/^\s*SELECT/i.test(args.query)) throw new Error("sql allows SELECT only");
    return sqlQuery(args.query);
  }
  return callTool(name, args);
 }
 function trimResult(r: any): any {
  if (r && Array.isArray(r.rows)) return { ...r, rows: r.rows.slice(0, 20) };
  if (r && Array.isArray(r.sources)) return { ...r, sources: r.sources.slice(0, 12) };
  return r;
 }
 function shortContent(e: Omit<LogEntry, "at">): string {
  const c: any = e.content ?? {};
  if (e.kind === "tool_call") return `${c.tool}(${JSON.stringify(c.args ?? {}).slice(0, 70)})`;
  if (e.kind === "tool_result") {
    if (c.error) return `error: ${c.error}`;
    if (Array.isArray(c.sources)) return `hybrid sql=${c.sql_matches} reranked=${c.vector_reranked}`;
    if (Array.isArray(c.rows)) return `sql ${c.rows.length} rows`;
    return JSON.stringify(c).slice(0, 80);
  }
  if (e.kind === "critique") return `verdict=${c.verdict} ${(c.notes ?? "").slice(0, 60)}`;
  if (e.kind === "propose_done") return `${(c.fills ?? []).length} fills: ${(c.fills ?? []).map((f: Fill) => f.name).join(", ")}`;
  if (e.kind === "consensus_done") return "✓";
  if (e.kind === "plan") return `${(c.steps ?? []).length} steps`;
  return JSON.stringify(c).slice(0, 80);
 }
 // ────────────────────────── playbook rating ──────────────────────────
 interface Rating {
  geo: number;            // 0-2: fills actually in target city/state
  authenticity: number;   // 0-2: fills' worker_ids exist in workers_500k
  persistence: number;    // 0-2: playbook_memory entry count grew correctly
  boost_firing: number;   // 0-3: follow-up query shows non-zero boost
  speed: number;          // 0-1: completed under 4 min
  total: number;          // /10
  notes: string[];
 }
 interface MemoryStats { entries: number; total_names_endorsed: number }
 async function fetchMemoryStats(): Promise<MemoryStats> {
  const r = await fetch(`${GATEWAY}/vectors/playbook_memory/stats`);
  if (!r.ok) throw new Error(`stats → ${r.status}`);
  return r.json() as Promise<MemoryStats>;
 }
 // Try to resolve a fill's candidate_id to a workers_500k row. Accepts
 // "W500K-7995" (vector doc_id with prefix) and "7995" (raw worker_id).
 async function lookupWorker(candidate_id: string): Promise<{ worker_id: number; name: string; city: string; state: string; role: string } | null> {
  const numStr = candidate_id.replace(/^W500K-/i, "").replace(/[^\d]/g, "");
  if (!numStr) return null;
  const num = parseInt(numStr, 10);
  if (!Number.isFinite(num)) return null;
  const r = await sqlQuery(`SELECT worker_id, name, city, state, role FROM workers_500k WHERE worker_id = ${num} LIMIT 1`);
  return (r.rows && r.rows[0]) ?? null;
 }
 // Re-run a hybrid query that mirrors the contract — proves the freshly
 // seeded playbook actually lifts a future search.
 async function verifyBoostFires(task: TaskSpec): Promise<{ boostedHits: number; sampleCitations: string[]; topBoost: number }> {
  // Mirror the contract's actual geo. The playbook stored (city, state)
  // from the operation; if the verify SQL doesn't restrict to the same
  // city, the candidate pool may not include the seeded workers and the
  // boost has nothing to lift. The contract pattern in production also
  // includes city — recruiters fill specific cities, not whole states.
  const sql_filter = `role = '${task.target_role.replace(/'/g, "''")}' `
    + `AND state = '${task.target_state}' `
    + `AND city = '${task.target_city.replace(/'/g, "''")}'`;
  const r = await fetch(`${GATEWAY}/vectors/hybrid`, {
    method: "POST", headers: { "Content-Type": "application/json" },
    body: JSON.stringify({
      index_name: INDEX_NAME, filter_dataset: "workers_500k", id_column: "worker_id",
      sql_filter, question: `${task.target_role} in ${task.target_city}, ${task.target_state}`,
      top_k: 10, generate: false, use_playbook_memory: true, playbook_memory_k: 15,
    }),
  });
  if (!r.ok) throw new Error(`verify hybrid → ${r.status}: ${await r.text()}`);
  const j = (await r.json()) as any;
  const sources: any[] = j.sources ?? [];
  const boosted = sources.filter(s => (s.playbook_boost ?? 0) > 0);
  const cites = boosted.flatMap(s => s.playbook_citations ?? []).slice(0, 5);
  const top = sources.reduce((m, s) => Math.max(m, s.playbook_boost ?? 0), 0);
  return { boostedHits: boosted.length, sampleCitations: cites, topBoost: top };
 }
 async function ratePlaybook(
  result: RunResult,
  statsBefore: MemoryStats,
  statsAfter: MemoryStats,
 ): Promise<Rating> {
  const notes: string[] = [];
  let geo = 0, authenticity = 0, persistence = 0, boost_firing = 0, speed = 0;
  // 1. Geo + authenticity per fill
  for (const f of result.fills) {
    const w = await lookupWorker(f.candidate_id).catch(() => null);
    if (!w) { notes.push(`✗ candidate_id ${f.candidate_id} not in workers_500k`); continue; }
    authenticity += 1;
    if (w.city.toLowerCase() === result.task.target_city.toLowerCase()
        && w.state === result.task.target_state) {
      geo += 1;
    } else {
      notes.push(`◑ ${w.name} (id=${w.worker_id}) is in ${w.city}, ${w.state}, not ${result.task.target_city}, ${result.task.target_state}`);
    }
  }
  geo = Math.min(geo, 2);
  authenticity = Math.min(authenticity, 2);
  // 2. Persistence
  const grew = statsAfter.entries - statsBefore.entries;
  if (grew === 1) { persistence = 2; notes.push(`✓ playbook_memory grew by exactly 1`); }
  else if (grew >= 1) { persistence = 1; notes.push(`◑ playbook_memory grew by ${grew} (expected 1)`); }
  else { notes.push(`✗ playbook_memory did not grow (before=${statsBefore.entries} after=${statsAfter.entries})`); }
  // 3. Boost firing — re-run the same query and see if it lifts anything
  const v = await verifyBoostFires(result.task).catch(e => { notes.push(`✗ verify hybrid failed: ${(e as Error).message}`); return null; });
  if (v) {
    if (v.boostedHits >= 2) boost_firing = 3;
    else if (v.boostedHits === 1) boost_firing = 2;
    else if (v.topBoost > 0) boost_firing = 1;
    else boost_firing = 0;
    notes.push(`boost re-query: ${v.boostedHits}/10 hits boosted, top=+${v.topBoost.toFixed(3)}, citations=${v.sampleCitations.slice(0, 3).join(",")}`);
  }
  // 4. Speed
  if (result.duration_secs <= 240) speed = 1;
  else notes.push(`◑ slow: ${result.duration_secs}s (>240)`);
  const total = geo + authenticity + persistence + boost_firing + speed;
  return { geo, authenticity, persistence, boost_firing, speed, total, notes };
 }
 function fmtRating(r: Rating): string {
  return `geo=${r.geo}/2 auth=${r.authenticity}/2 persist=${r.persistence}/2 boost=${r.boost_firing}/3 speed=${r.speed}/1 → ${r.total}/10`;
 }
 // ────────────────────────── main ──────────────────────────
 async function main() {
  const taskA: TaskSpec = {
    id: `e2e-A-${Date.now()}`,
    operation: "fill: Welder x2 in Toledo, OH",
    target_role: "Welder", target_count: 2, target_city: "Toledo", target_state: "OH",
    approach_hint: "hybrid_search against workers_500k_v1 with sql_filter on role+city+state, then sql verify",
  };
  const taskB: TaskSpec = {
    id: `e2e-B-${Date.now()}`,
    operation: "fill: Forklift Operator x2 in Nashville, TN",
    target_role: "Forklift Operator", target_count: 2, target_city: "Nashville", target_state: "TN",
    approach_hint: "hybrid_search against workers_500k_v1 with sql_filter on role+city+state, then sql verify",
  };
  console.log(`▶ parallel real-world test`);
  console.log(`  A: ${taskA.operation}`);
  console.log(`  B: ${taskB.operation}`);
  console.log(`  models: executor=${EXECUTOR_MODEL}  reviewer=${REVIEWER_MODEL}\n`);
  const statsBefore = await fetchMemoryStats();
  console.log(`▶ playbook_memory before: ${statsBefore.entries} entries, ${statsBefore.total_names_endorsed} endorsed names\n`);
  // Run both pairs in parallel. Each is its own (executor, reviewer)
  // conversation; they do NOT see each other's logs.
  const [resA, resB] = await Promise.all([
    runOrchestrator(taskA, "A"),
    runOrchestrator(taskB, "B"),
  ]);
  console.log(`\n▶ both orchestrators returned`);
  console.log(`  A: ok=${resA.ok} turns=${resA.turns} ${resA.duration_secs}s ${resA.error ?? ""}`);
  console.log(`  B: ok=${resB.ok} turns=${resB.turns} ${resB.duration_secs}s ${resB.error ?? ""}`);
  if (!resA.ok && !resB.ok) {
    throw new Error(`both orchestrators failed — substrate or models in bad state`);
  }
  const statsMid = await fetchMemoryStats();
  console.log(`\n▶ playbook_memory after both runs: ${statsMid.entries} entries (+${statsMid.entries - statsBefore.entries})\n`);
  // Rate each successful playbook. We compute persistence per task by
  // splitting the growth — both seeded sequentially-ish, so each should
  // contribute 1.
  const ratings: Array<{ id: string; ok: boolean; rating?: Rating; error?: string }> = [];
  if (resA.ok) {
    const beforeForA: MemoryStats = { entries: statsBefore.entries, total_names_endorsed: statsBefore.total_names_endorsed };
    const afterForA: MemoryStats = { entries: statsBefore.entries + (resA.fills.length > 0 ? 1 : 0), total_names_endorsed: statsBefore.total_names_endorsed };
    // Use real measured numbers when they're unambiguous (only one task succeeded)
    const ra = await ratePlaybook(resA, beforeForA, resB.ok ? afterForA : statsMid);
    ratings.push({ id: "A", ok: true, rating: ra });
  } else ratings.push({ id: "A", ok: false, error: resA.error });
  if (resB.ok) {
    const beforeForB: MemoryStats = resA.ok
      ? { entries: statsBefore.entries + 1, total_names_endorsed: statsBefore.total_names_endorsed }
      : statsBefore;
    const rb = await ratePlaybook(resB, beforeForB, statsMid);
    ratings.push({ id: "B", ok: true, rating: rb });
  } else ratings.push({ id: "B", ok: false, error: resB.error });
  console.log(`\n▶ Per-playbook ratings:\n`);
  for (const r of ratings) {
    if (!r.ok) {
      console.log(`  ${r.id}: FAILED — ${r.error}`);
      continue;
    }
    console.log(`  ${r.id}: ${fmtRating(r.rating!)}`);
    for (const n of r.rating!.notes) console.log(`     ${n}`);
  }
  const totals = ratings.filter(r => r.rating).map(r => r.rating!.total);
  if (totals.length === 0) {
    throw new Error(`no playbooks rated — both orchestrators failed`);
  }
  const min = Math.min(...totals);
  const avg = totals.reduce((s, t) => s + t, 0) / totals.length;
  console.log(`\n▶ Summary: avg=${avg.toFixed(1)}/10  min=${min}/10`);
  // Hard gate: any rating below 5 means the loop is broken end-to-end.
  if (min < 5) throw new Error(`rating gate failed — min ${min}/10 (need ≥5)`);
  console.log(`\n✓ end-to-end real-world test passed`);
  process.exit(0);
 }
 main().catch(e => {
  console.error(`\n✗ ${(e as Error).message}`);
  if ((e as any).stack) console.error((e as any).stack);
  process.exit(1);
 });
--- a/tests/multi-agent/scenario.ts
+++ b/tests/multi-agent/scenario.ts
@ -0,0 +1,822 @@
 // A day in the life — the real-world scenario test.
 //
 // Runs six events against the live substrate: baseline_fill, recurring,
 // expansion, emergency, misplacement, retrospective. Each event
 // exercises a different pressure pattern; each one produces actionable
 // artifacts (SMS drafts, client emails, dispatch log) alongside the
 // ranking output; the run as a whole is self-audited at EOD against six
 // gap categories (supply, embedding, fairness, drift, tool, write-through).
 //
 // Design notes:
 // - Compressed clock. The "08:00" in an event spec is a label for the
 //   output, not a wall-clock gate. The full scenario runs in minutes.
 // - One script, shared state. Each event mutates the same roster +
 //   gap_signals + artifacts in-memory, then persists at EOD.
 // - Fail-soft per event. A drift-abort or tool error on one event
 //   records a gap_signal and moves on; we explicitly want to see which
 //   events the substrate can't handle, not abort the whole run.
 // - Every fill event routes through the same executor/reviewer loop as
 //   the single-task orchestrator — just driven in sequence rather than
 //   standalone, with event-specific extra constraints in the prompt.
 import {
  type LogEntry,
  type TaskSpec,
  type Action,
  type Fill,
  callTool,
  hybridSearch,
  sqlQuery,
  generate,
  parseAction,
  executorPrompt,
  reviewerPrompt,
  GATEWAY,
 } from "./agent.ts";
 import { mkdir, writeFile, appendFile } from "node:fs/promises";
 import { join } from "node:path";
 const EXECUTOR_MODEL = "mistral:latest";
 const REVIEWER_MODEL = "qwen2.5:latest";
 const DRAFT_MODEL = "qwen2.5:latest";     // artifact generation; short outputs
 const MAX_TURNS = 14;
 const MAX_CONSECUTIVE_DRIFTS = 3;
 const WORKERS_INDEX = "workers_500k_v1";
 const WORKERS_DATASET = "workers_500k";
 // =================== Event + scenario types ===================
 type EventKind = "baseline_fill" | "recurring" | "expansion" | "emergency" | "misplacement";
 interface FillEvent {
  kind: EventKind;
  at: string;            // display label like "08:00"
  role: string;
  count: number;
  city: string;
  state: string;
  shift_start?: string;  // "08:00 AM" for SMS/email drafts
  scenario_note?: string; // extra context the agents should know
  deadline?: string;     // emergency events carry this, shown to reviewer
  exclude_worker_ids?: string[];  // misplacement: the lost worker
  replaces_event?: string;        // misplacement back-ref for reporting
 }
 interface ScenarioSpec {
  client: string;
  date: string;
  events: FillEvent[];
 }
 interface EventResult {
  event: FillEvent;
  ok: boolean;
  fills: Fill[];
  turns: number;
  duration_secs: number;
  error?: string;
  gap_signals: string[];   // pulled into the cross-event gap report
  sources_first_score?: number;
  sources_last_score?: number;
  pool_size?: number;       // sql_matches from the first hybrid_search
  playbook_citations?: string[];
 }
 interface RosterEntry {
  worker_id: string;
  name: string;
  booked_for: string;       // event at-label
  role: string;
  city: string;
  state: string;
  status: "confirmed" | "no_show" | "rebooked_elsewhere";
 }
 interface ScenarioContext {
  spec: ScenarioSpec;
  out_dir: string;
  roster: RosterEntry[];
  results: EventResult[];
  gap_signals: Array<{ event: string; category: string; detail: string }>;
 }
 // =================== Default scenario ===================
 const DEFAULT_SCENARIO: ScenarioSpec = {
  client: "Riverfront Steel",
  date: "2026-04-21",
  events: [
    {
      kind: "baseline_fill",
      at: "08:00",
      role: "Warehouse Associate",
      count: 3,
      city: "Toledo",
      state: "OH",
      shift_start: "08:00 AM",
      scenario_note: "Regular Monday morning shift, 8-hour.",
    },
    {
      kind: "recurring",
      at: "10:30",
      role: "Machine Operator",
      count: 2,
      city: "Toledo",
      state: "OH",
      shift_start: "11:00 AM",
      scenario_note: "Recurring Tuesday/Thursday slot — prior workers may still be available.",
    },
    {
      kind: "expansion",
      at: "12:15",
      role: "Forklift Operator",
      count: 5,
      city: "Toledo",
      state: "OH",
      shift_start: "01:00 PM",
      scenario_note: "New warehouse location opening, five-worker team needed.",
    },
    {
      kind: "emergency",
      at: "14:00",
      role: "Loader",
      count: 4,
      city: "Toledo",
      state: "OH",
      shift_start: "04:00 PM same day",
      deadline: "16:00",
      scenario_note: "Walkoff incident — replacement crew needed by 16:00 sharp.",
    },
    {
      kind: "misplacement",
      at: "15:45",
      role: "Warehouse Associate",
      count: 1,
      city: "Toledo",
      state: "OH",
      shift_start: "remainder of 08:00 shift",
      scenario_note: "One worker from the 08:00 fill didn't show; rebuild the gap.",
      replaces_event: "08:00",
    },
  ],
 };
 // =================== Low-level helpers shared across events ===================
 async function httpJson<T>(url: string, body?: any): Promise<T> {
  const res = await fetch(url, {
    method: body ? "POST" : "GET",
    headers: { "Content-Type": "application/json" },
    body: body ? JSON.stringify(body) : undefined,
  });
  if (!res.ok) throw new Error(`${res.status} ${await res.text()}`);
  return (await res.json()) as T;
 }
 function fmt(e: LogEntry): string {
  const tag = `    [t${e.turn.toString().padStart(2, "0")} ${e.role.padEnd(8)} ${e.kind.padEnd(14)}]`;
  const c = e.content ?? {};
  const trim = (s: any, n: number) => String(s ?? "").slice(0, n);
  if (e.kind === "tool_call") return `${tag} ${c.tool}(${JSON.stringify(c.args ?? {}).slice(0, 60)}) — ${trim(c.rationale, 40)}`;
  if (e.kind === "tool_result") {
    if (c.error) return `${tag} ERROR ${c.error}`;
    const rows = c?.rows?.length ?? c?.sources?.length ?? undefined;
    return `${tag} ${rows !== undefined ? `rows=${rows}` : JSON.stringify(c).slice(0, 60)}`;
  }
  if (e.kind === "critique") return `${tag} verdict=${c.verdict} — ${trim(c.notes, 50)}`;
  if (e.kind === "propose_done") return `${tag} ${c.fills?.length ?? 0} fills: ${(c.fills ?? []).map((f: Fill) => f.name).join(", ")}`;
  if (e.kind === "consensus_done") return `${tag} ✓`;
  if (e.kind === "plan") return `${tag} ${c.steps?.length ?? 0} steps`;
  if (e.kind === "error") return `${tag} ${c.message ?? c}`;
  return `${tag} ${JSON.stringify(c).slice(0, 70)}`;
 }
 async function executeToolCall(name: string, args: Record<string, any>): Promise<any> {
  if (name === "hybrid_search") {
    const { sql_filter, question, index_name, k } = args;
    if (!sql_filter || !question || !index_name) {
      throw new Error(`hybrid_search needs sql_filter + question + index_name, got ${JSON.stringify(args)}`);
    }
    // Every fill event uses the playbook_memory boost — that's the point
    // of the run-as-a-whole: earlier events seed later ones.
    return httpJson(`${GATEWAY}/vectors/hybrid`, {
      sql_filter, question, index_name,
      top_k: k ?? 10, generate: false,
      use_playbook_memory: true,
      playbook_memory_k: 10,
    });
  }
  if (name === "sql") {
    const { query } = args;
    if (!query || typeof query !== "string") throw new Error(`sql needs query string`);
    if (!/^\s*SELECT/i.test(query)) throw new Error(`sql allows SELECT only`);
    return sqlQuery(query);
  }
  return callTool(name, args);
 }
 // =================== Core fill loop — one event, one consensus ===================
 interface AgentFillOutcome {
  fills: Fill[];
  approach: string;
  turns: number;
  duration_secs: number;
  log: LogEntry[];
  first_sql_matches?: number;
  first_pool_first_score?: number;
  first_pool_last_score?: number;
  playbook_citations: string[];
 }
 async function runAgentFill(
  task: TaskSpec,
  extra_guidance: string,
  exclude_worker_ids: string[],
 ): Promise<AgentFillOutcome> {
  const t0 = Date.now();
  const log: LogEntry[] = [];
  let turn = 0;
  let consecutiveDrifts = 0;
  let sealed: { fills: Fill[]; approach: string } | null = null;
  let first_sql_matches: number | undefined;
  let first_pool_first: number | undefined;
  let first_pool_last: number | undefined;
  const playbook_citations = new Set<string>();
  const append = (e: Omit<LogEntry, "at">): LogEntry => {
    const full: LogEntry = { ...e, at: new Date().toISOString() };
    log.push(full);
    console.log(fmt(full));
    return full;
  };
  // Build executor prompt with the scenario-specific guidance + exclusions
  // injected as an extra block. Reuses the base prompt so drift detection
  // and output-shape rules are unchanged.
  const withExtras = (base: string): string => {
    let addon = "";
    if (extra_guidance) addon += `\n\nEVENT-SPECIFIC GUIDANCE:\n${extra_guidance}`;
    if (exclude_worker_ids.length > 0) {
      addon += `\n\nEXCLUDE these workers (already booked / unavailable today): ${exclude_worker_ids.join(", ")}\nIf your tool results include them, skip them — never propose them.`;
    }
    return base + addon;
  };
  while (turn < MAX_TURNS && !sealed) {
    turn += 1;
    const execRaw = await generate(
      EXECUTOR_MODEL,
      withExtras(executorPrompt(task, log)),
      { temperature: 0.2, max_tokens: 600 },
    );
    let execAction: Action;
    try {
      execAction = parseAction(execRaw, "executor");
    } catch (e) {
      append({ turn, role: "executor", model: EXECUTOR_MODEL, kind: "error",
        content: { message: (e as Error).message, raw: execRaw.slice(0, 300) } });
      throw e;
    }
    append({ turn, role: "executor", model: EXECUTOR_MODEL,
      kind: execAction.kind as any, content: execAction });
    if (execAction.kind === "tool_call") {
      try {
        const result = await executeToolCall(execAction.tool, execAction.args);
        // Filter tool results to enforce the exclusion list — defense in
        // depth since the prompt alone isn't enough for weak models.
        const filtered = maskExclusions(result, exclude_worker_ids);
        // Capture the first hybrid_search pool stats for gap detection.
        if (execAction.tool === "hybrid_search" && first_sql_matches === undefined) {
          first_sql_matches = (filtered as any).sql_matches;
          const sources = (filtered as any).sources ?? [];
          if (sources.length > 0) {
            first_pool_first = sources[0].score;
            first_pool_last = sources[sources.length - 1].score;
          }
        }
        const trimmed = trimResult(filtered);
        append({ turn, role: "executor", model: EXECUTOR_MODEL,
          kind: "tool_result", content: trimmed });
        // Accumulate playbook citations from any hybrid result that
        // carried them — the scenario-level report needs them.
        if (Array.isArray((filtered as any).sources)) {
          for (const s of (filtered as any).sources) {
            for (const c of s.playbook_citations ?? []) {
              playbook_citations.add(c);
            }
          }
        }
      } catch (e) {
        append({ turn, role: "executor", model: EXECUTOR_MODEL, kind: "tool_result",
          content: { error: (e as Error).message, tool: execAction.tool } });
        consecutiveDrifts += 1;
        if (consecutiveDrifts >= MAX_CONSECUTIVE_DRIFTS) {
          throw new Error(`aborted — ${MAX_CONSECUTIVE_DRIFTS} consecutive tool errors`);
        }
      }
    }
    const revRaw = await generate(
      REVIEWER_MODEL,
      withExtras(reviewerPrompt(task, log)),
      { temperature: 0.1, max_tokens: 400 },
    );
    let revAction: Action;
    try {
      revAction = parseAction(revRaw, "reviewer");
    } catch (e) {
      append({ turn, role: "reviewer", model: REVIEWER_MODEL, kind: "error",
        content: { message: (e as Error).message, raw: revRaw.slice(0, 300) } });
      throw e;
    }
    append({ turn, role: "reviewer", model: REVIEWER_MODEL,
      kind: "critique", content: revAction });
    if (revAction.kind !== "critique") throw new Error(`reviewer emitted non-critique: ${revAction.kind}`);
    if (revAction.verdict === "drift") {
      consecutiveDrifts += 1;
      if (consecutiveDrifts >= MAX_CONSECUTIVE_DRIFTS) {
        throw new Error(`aborted — ${MAX_CONSECUTIVE_DRIFTS} consecutive drift flags`);
      }
    } else {
      consecutiveDrifts = 0;
    }
    if (execAction.kind === "propose_done" && revAction.verdict === "approve_done") {
      if (execAction.fills.length !== task.target_count) {
        throw new Error(`consensus malformed — ${execAction.fills.length} fills vs target ${task.target_count}`);
      }
      // Enforce exclusion at seal time too, in case the models ignored
      // both prompt + tool-result filtering.
      for (const f of execAction.fills) {
        if (exclude_worker_ids.includes(f.candidate_id)) {
          throw new Error(`consensus proposed excluded worker ${f.candidate_id}`);
        }
      }
      append({ turn, role: "reviewer", model: REVIEWER_MODEL, kind: "consensus_done",
        content: { fills: execAction.fills } });
      sealed = { fills: execAction.fills, approach: execAction.rationale ?? "multi-agent hybrid" };
    }
  }
  if (!sealed) throw new Error(`no consensus after ${MAX_TURNS} turns`);
  return {
    fills: sealed.fills,
    approach: sealed.approach,
    turns: turn,
    duration_secs: (Date.now() - t0) / 1000,
    log,
    first_sql_matches,
    first_pool_first_score: first_pool_first,
    first_pool_last_score: first_pool_last,
    playbook_citations: Array.from(playbook_citations),
  };
 }
 function maskExclusions(result: any, exclude: string[]): any {
  if (exclude.length === 0) return result;
  if (Array.isArray(result.sources)) {
    return { ...result, sources: result.sources.filter((s: any) => !exclude.includes(s.doc_id)) };
  }
  if (Array.isArray(result.rows)) {
    return { ...result, rows: result.rows.filter((r: any) => {
      const id = r.worker_id ?? r.doc_id;
      return id === undefined || !exclude.includes(String(id));
    }) };
  }
  return result;
 }
 function trimResult(r: any): any {
  if (r && Array.isArray(r.sources)) {
    return { ...r, sources: r.sources.slice(0, 20), _trimmed: r.sources.length > 20 ? `${r.sources.length - 20} more` : undefined };
  }
  if (r && Array.isArray(r.rows)) {
    return { ...r, rows: r.rows.slice(0, 20), _trimmed: r.rows.length > 20 ? `${r.rows.length - 20} more` : undefined };
  }
  return r;
 }
 // =================== Per-event guidance strings ===================
 function guidanceFor(event: FillEvent, ctx: ScenarioContext): string {
  switch (event.kind) {
    case "baseline_fill":
      return `Standard Monday fill. Client ${ctx.spec.client}. Shift starts ${event.shift_start ?? "at start time"}. Take the top candidates by semantic match and availability.`;
    case "recurring":
      return `RECURRING slot — ${ctx.spec.client} runs this shift every Tues/Thurs. If playbook_memory surfaces candidates endorsed by past similar fills (you'll see 'cites' on hybrid sources), those are the preferred workers. Shift starts ${event.shift_start ?? "at start time"}.`;
    case "expansion":
      return `EXPANSION at ${ctx.spec.client}. New location, ${event.count}-worker team needed at once — search broadly and prefer workers with team/collaboration signals (engagement, communications scores). Shift starts ${event.shift_start ?? "at start time"}.`;
    case "emergency":
      return `EMERGENCY walkoff — ${ctx.spec.client} needs ${event.count} ${event.role}s BY ${event.deadline ?? "end of day"}. Prioritize availability over perfect skill match. A good-enough worker who can report today beats a perfect worker who can't.`;
    case "misplacement":
      return `MISPLACEMENT refill. A worker from the 08:00 shift no-showed. You must replace them WITHOUT proposing the same worker or anyone already booked today (see EXCLUDE list). Shift is ${event.shift_start ?? "in progress"} so speed matters.`;
  }
 }
 // =================== Artifact generation ===================
 interface ArtifactBundle {
  sms: string;
  email: string;
 }
 // One Ollama call per event for SMS (to the filled workers) + one for
 // the client email. Short outputs, low temperature — these are drafts,
 // not creative writing.
 async function generateArtifacts(event: FillEvent, outcome: AgentFillOutcome, ctx: ScenarioContext): Promise<ArtifactBundle> {
  const smsPrompt = `Generate short, friendly, professional SMS messages to confirm a shift for each worker. ONE message per worker. Format as:
 TO: {Name}
 {message body under 180 chars}
 ---
 Details:
 - Client: ${ctx.spec.client}
 - Role: ${event.role}
 - Location: ${event.city}, ${event.state}
 - Shift starts: ${event.shift_start ?? "TBD"}
 - Scenario: ${event.scenario_note ?? ""}
 Workers to message:
 ${outcome.fills.map(f => `- ${f.name} (id ${f.candidate_id})`).join("\n")}
 Respond with only the message blocks, separated by "---". No commentary.`;
  const emailPrompt = `Generate a short professional email confirmation to the staffing client.
 TO: staffing@${ctx.spec.client.toLowerCase().replace(/ /g, "")}.example
 FROM: dispatch@lakehouse.example
 SUBJECT: (3-word subject)
 Body (4-6 lines max). Be specific about:
 - Number of workers filled (${outcome.fills.length} of ${event.count})
 - Roles: ${event.role}
 - Names filled
 - Shift start: ${event.shift_start ?? "TBD"}
 - Any scenario flag: ${event.scenario_note ?? "(none)"}
 Workers:
 ${outcome.fills.map(f => `- ${f.name} (${f.reason.slice(0, 60)})`).join("\n")}
 Respond with only the email. No commentary.`;
  const [sms, email] = await Promise.all([
    generate(DRAFT_MODEL, smsPrompt, { temperature: 0.3, max_tokens: 500 }),
    generate(DRAFT_MODEL, emailPrompt, { temperature: 0.3, max_tokens: 400 }),
  ]);
  return { sms: sms.trim(), email: email.trim() };
 }
 // =================== Per-event runner ===================
 async function runEvent(event: FillEvent, ctx: ScenarioContext): Promise<EventResult> {
  console.log(`\n════════ ${event.at} — ${event.kind.toUpperCase()}: fill ${event.count}× ${event.role} in ${event.city}, ${event.state} ════════`);
  const t0 = Date.now();
  // Build the task spec the agent loop expects.
  const task: TaskSpec = {
    id: `${ctx.spec.date}-${event.at.replace(":", "")}-${event.kind}`,
    operation: `fill: ${event.role} x${event.count} in ${event.city}, ${event.state}`,
    target_role: event.role,
    target_count: event.count,
    target_city: event.city,
    target_state: event.state,
    approach_hint: `hybrid search against ${WORKERS_INDEX} for ${event.kind}`,
  };
  // Exclusion set: everyone already in today's roster + any explicit
  // exclusions from the event spec.
  const excludeIds = [
    ...ctx.roster
      .filter(r => r.status === "confirmed")
      .map(r => r.worker_id),
    ...(event.exclude_worker_ids ?? []),
  ];
  const gap_signals: string[] = [];
  let outcome: AgentFillOutcome;
  try {
    outcome = await runAgentFill(task, guidanceFor(event, ctx), excludeIds);
  } catch (e) {
    return {
      event,
      ok: false,
      fills: [],
      turns: 0,
      duration_secs: (Date.now() - t0) / 1000,
      error: (e as Error).message,
      gap_signals: [`drift_or_tool: ${(e as Error).message}`],
    };
  }
  // Resolve worker_ids via SQL so the roster has stable IDs (models
  // sometimes return names-only). Best-effort — if name lookup finds
  // zero or many matches, we flag a gap.
  const resolved = await resolveWorkerIds(outcome.fills, event);
  // Roster double-book check.
  for (const r of resolved) {
    const conflict = ctx.roster.find(e => e.worker_id === r.worker_id && e.status === "confirmed");
    if (conflict) {
      gap_signals.push(`double_book: ${r.worker_id} ${r.name} already booked for ${conflict.booked_for}`);
    }
    ctx.roster.push({
      worker_id: r.worker_id,
      name: r.name,
      booked_for: event.at,
      role: event.role,
      city: event.city,
      state: event.state,
      status: "confirmed",
    });
  }
  // Pool-size signal (Gap 1 — supply).
  const supply_threshold = event.count * 3;
  if ((outcome.first_sql_matches ?? 0) < supply_threshold) {
    gap_signals.push(
      `supply: only ${outcome.first_sql_matches} candidates for ${event.count}× ${event.role} in ${event.city} (< ${supply_threshold}, our 3× comfort margin)`
    );
  }
  // Score-spread signal (Gap 2 — embedding).
  const spread = (outcome.first_pool_first_score ?? 0) - (outcome.first_pool_last_score ?? 0);
  if (spread > 0 && spread < 0.02) {
    gap_signals.push(
      `embedding: top-K score spread ${spread.toFixed(3)} < 0.02 — model struggles to differentiate`
    );
  }
  // Generate artifacts (SMS + email) — fail-soft; artifacts are cosmetic
  // relative to the consensus itself.
  let bundle: ArtifactBundle | null = null;
  try {
    bundle = await generateArtifacts(event, { ...outcome, fills: resolved }, ctx);
    await appendFile(join(ctx.out_dir, "sms.md"),
      `\n## ${event.at} ${event.kind} — ${event.role} x${event.count} in ${event.city}, ${event.state}\n\n${bundle.sms}\n`);
    await appendFile(join(ctx.out_dir, "emails.md"),
      `\n## ${event.at} ${event.kind} — ${event.role} x${event.count}\n\n${bundle.email}\n`);
  } catch (e) {
    gap_signals.push(`artifact: ${(e as Error).message}`);
  }
  // Dispatch log (structured).
  await appendFile(join(ctx.out_dir, "dispatch.jsonl"),
    JSON.stringify({
      at: event.at,
      kind: event.kind,
      operation: task.operation,
      fills: resolved,
      turns: outcome.turns,
      duration_secs: outcome.duration_secs,
      pool_size: outcome.first_sql_matches,
      playbook_citations: outcome.playbook_citations,
    }) + "\n");
  // Always seed playbook_memory after a sealed fill — keep the learning
  // loop tight across the whole day so recurring/misplacement events
  // later in the run benefit from earlier ones.
  try {
    await httpJson(`${GATEWAY}/vectors/playbook_memory/seed`, {
      operation: task.operation,
      approach: outcome.approach || `${event.kind} → hybrid search`,
      context: `client=${ctx.spec.client} scenario=${event.kind} shift=${event.shift_start ?? "tbd"}`,
      endorsed_names: resolved.map(r => r.name),
      append: true,
    });
  } catch (e) {
    gap_signals.push(`write_through: ${(e as Error).message}`);
  }
  return {
    event,
    ok: true,
    fills: outcome.fills,
    turns: outcome.turns,
    duration_secs: outcome.duration_secs,
    gap_signals,
    sources_first_score: outcome.first_pool_first_score,
    sources_last_score: outcome.first_pool_last_score,
    pool_size: outcome.first_sql_matches,
    playbook_citations: outcome.playbook_citations,
  };
 }
 // =================== Worker ID resolution ===================
 // Models emit candidate_ids or names in propose_done. Some return the
 // W500K-XXX doc_id, others just the name, others a random tag. Resolve
 // to canonical (worker_id, name) via SQL so the roster is reliable.
 async function resolveWorkerIds(fills: Fill[], event: FillEvent): Promise<Fill[]> {
  const resolved: Fill[] = [];
  for (const f of fills) {
    // Case 1: candidate_id looks like W500K-NNN — accept as-is.
    if (/^W500K-\d+$/.test(f.candidate_id)) {
      resolved.push(f);
      continue;
    }
    // Case 2: candidate_id is a bare integer — promote to W500K-N.
    if (/^\d+$/.test(f.candidate_id)) {
      resolved.push({ ...f, candidate_id: `W500K-${f.candidate_id}` });
      continue;
    }
    // Case 3: look up by (name, city, state). Take the first match.
    const q = `SELECT worker_id FROM ${WORKERS_DATASET} WHERE name = '${f.name.replace(/'/g, "''")}' AND city = '${event.city.replace(/'/g, "''")}' AND state = '${event.state.replace(/'/g, "''")}' LIMIT 1`;
    try {
      const r = await sqlQuery(q);
      if (r.rows && r.rows.length > 0) {
        resolved.push({ ...f, candidate_id: `W500K-${r.rows[0].worker_id}` });
      } else {
        // No match — keep the fill but leave candidate_id as-is; the
        // gap report will flag it.
        resolved.push(f);
      }
    } catch {
      resolved.push(f);
    }
  }
  return resolved;
 }
 // =================== EOD gap report ===================
 async function writeRetrospective(ctx: ScenarioContext): Promise<void> {
  const lines: string[] = [];
  lines.push(`# Scenario retrospective — ${ctx.spec.client}, ${ctx.spec.date}`);
  lines.push("");
  lines.push(`Executor: \`${EXECUTOR_MODEL}\`   Reviewer: \`${REVIEWER_MODEL}\`   Draft: \`${DRAFT_MODEL}\``);
  lines.push("");
  // --- Per-event summary ---
  lines.push("## Events");
  lines.push("");
  lines.push("| At | Kind | Role / Count | Pool | Fills | Turns | Dur(s) | Cites | Gaps |");
  lines.push("|---|---|---|---|---|---|---|---|---|");
  for (const r of ctx.results) {
    const status = r.ok ? "✓" : "✗";
    lines.push(
      `| ${r.event.at} | ${r.event.kind} | ${r.event.role} × ${r.event.count} | ${r.pool_size ?? "-"} | ${status} ${r.fills.length} | ${r.turns} | ${r.duration_secs.toFixed(1)} | ${r.playbook_citations?.length ?? 0} | ${r.gap_signals.length} |`
    );
  }
  lines.push("");
  // --- Roster ---
  lines.push("## Final roster");
  lines.push("");
  lines.push("| Worker | Booked | Role | City, ST | Status |");
  lines.push("|---|---|---|---|---|");
  for (const e of ctx.roster) {
    lines.push(`| ${e.worker_id} ${e.name} | ${e.booked_for} | ${e.role} | ${e.city}, ${e.state} | ${e.status} |`);
  }
  lines.push("");
  // --- Gap analysis by category ---
  const bycat: Record<string, string[]> = {};
  for (const g of ctx.gap_signals) {
    if (!bycat[g.category]) bycat[g.category] = [];
    bycat[g.category].push(`**${g.event}** — ${g.detail}`);
  }
  // Add cross-event categories computed here:
  // Gap 3 — fairness (Gini-lite on roster)
  const bookedIds = ctx.roster.filter(r => r.status === "confirmed").map(r => r.worker_id);
  const counts = new Map<string, number>();
  for (const id of bookedIds) counts.set(id, (counts.get(id) ?? 0) + 1);
  const multis = [...counts.entries()].filter(([_, n]) => n > 1);
  if (multis.length > 0) {
    bycat["fairness"] = bycat["fairness"] ?? [];
    for (const [id, n] of multis) {
      const name = ctx.roster.find(r => r.worker_id === id)?.name ?? id;
      bycat["fairness"].push(`_cross-event_ — ${name} (${id}) booked ${n} times today`);
    }
  }
  // Gap 5 — tool errors already captured per-event via gap_signals.
  // Gap 6 — write-through coverage: compare # events vs # new playbook_memory entries.
  try {
    const stats = await httpJson<any>(`${GATEWAY}/vectors/playbook_memory/stats`);
    bycat["write_through_audit"] = bycat["write_through_audit"] ?? [];
    bycat["write_through_audit"].push(`_post-run_ — playbook_memory has ${stats.entries} entries (ran ${ctx.results.length} events, expected ≥ ${ctx.results.filter(r => r.ok).length} new entries from this run)`);
  } catch { /* non-fatal */ }
  lines.push("## Gap signals");
  lines.push("");
  if (Object.keys(bycat).length === 0) {
    lines.push("_None surfaced — either everything worked or detection is under-tuned._");
  } else {
    for (const [cat, items] of Object.entries(bycat)) {
      lines.push(`### ${cat}`);
      for (const item of items) lines.push(`- ${item}`);
      lines.push("");
    }
  }
  // --- Narrative summary ---
  lines.push("## Narrative");
  lines.push("");
  lines.push(`- ${ctx.results.filter(r => r.ok).length}/${ctx.results.length} events reached consensus.`);
  lines.push(`- Final roster: ${ctx.roster.length} bookings across ${new Set(ctx.roster.map(r => r.worker_id)).size} distinct workers.`);
  const totalCites = ctx.results.reduce((a, r) => a + (r.playbook_citations?.length ?? 0), 0);
  lines.push(`- Playbook citations across the day: ${totalCites} (proof the feedback loop fired across events).`);
  const droppedEvents = ctx.results.filter(r => !r.ok);
  if (droppedEvents.length > 0) {
    lines.push(`- Dropped events: ${droppedEvents.map(r => r.event.at + " " + r.event.kind).join(", ")}.`);
  }
  await writeFile(join(ctx.out_dir, "report.md"), lines.join("\n"));
  console.log(`\n✓ report → ${join(ctx.out_dir, "report.md")}`);
 }
 // =================== Main driver ===================
 async function main() {
  const specPath = process.argv[2];
  const spec: ScenarioSpec = specPath
    ? JSON.parse(await Bun.file(specPath).text())
    : DEFAULT_SCENARIO;
  const stamp = new Date().toISOString().replace(/[:.]/g, "-").slice(0, 19);
  const out_dir = join("tests/multi-agent/playbooks", `scenario-${stamp}`);
  await mkdir(out_dir, { recursive: true });
  const ctx: ScenarioContext = {
    spec,
    out_dir,
    roster: [],
    results: [],
    gap_signals: [],
  };
  // Initialize output files
  await writeFile(join(out_dir, "sms.md"), `# SMS drafts — ${spec.client}, ${spec.date}\n`);
  await writeFile(join(out_dir, "emails.md"), `# Client emails — ${spec.client}, ${spec.date}\n`);
  await writeFile(join(out_dir, "dispatch.jsonl"), "");
  console.log(`▶ scenario: ${spec.client}, ${spec.date}, ${spec.events.length} events`);
  console.log(`▶ out: ${out_dir}\n`);
  for (const event of spec.events) {
    // Expand misplacement-style exclusions from the current roster: it
    // wants to replace a worker from a prior event, so grab everyone
    // booked at that at-label and add as exclusions.
    if (event.kind === "misplacement" && event.replaces_event) {
      const priorBooked = ctx.roster
        .filter(r => r.booked_for === event.replaces_event && r.status === "confirmed")
        .map(r => r.worker_id);
      if (priorBooked.length > 0) {
        // Pick one arbitrarily to mark as no_show — in a real system the
        // external signal would pick. For the test, first one works.
        const lost = priorBooked[0];
        const lostEntry = ctx.roster.find(r => r.worker_id === lost);
        if (lostEntry) {
          lostEntry.status = "no_show";
          console.log(`   (misplacement: marking ${lost} ${lostEntry.name} as no-show)`);
        }
        // Exclude all prior bookings so the refill doesn't pick anyone
        // already scheduled for today.
        event.exclude_worker_ids = priorBooked;
      }
    }
    const result = await runEvent(event, ctx);
    ctx.results.push(result);
    for (const s of result.gap_signals) {
      const [category, ...rest] = s.split(":");
      ctx.gap_signals.push({ event: event.at, category: category.trim(), detail: rest.join(":").trim() });
    }
    // Small breather to not hammer Ollama on back-to-back runs.
    await new Promise(r => setTimeout(r, 500));
  }
  // Persist structured state for forensics.
  await writeFile(join(out_dir, "roster.json"), JSON.stringify(ctx.roster, null, 2));
  await writeFile(join(out_dir, "results.json"), JSON.stringify(ctx.results, null, 2));
  await writeRetrospective(ctx);
  const okCount = ctx.results.filter(r => r.ok).length;
  if (okCount < ctx.results.length) {
    console.log(`\n⚠ ${okCount}/${ctx.results.length} events succeeded. See ${out_dir}/report.md for gaps.`);
    process.exit(2);
  }
  console.log(`\n✓ ${okCount}/${ctx.results.length} events succeeded. See ${out_dir}/report.md.`);
  process.exit(0);
 }
 main().catch(e => {
  console.error(`\n✗ scenario driver crashed: ${(e as Error).message}`);
  console.error((e as Error).stack);
  process.exit(1);
 });