Phase 28-36 body of work

Accumulated since a6f12e2 (Phase 21 Rust port + Phase 27 versioning): - Phase 36: embed_semaphore on VectorState (permits=1) serializes seed embed calls — prevents sidecar socket collisions under concurrent /seed stress load - Phase 31+: run_stress.ts 6-task diverse stress scaffolding; run_e2e_rated.ts + orchestrator.ts tightening - Catalog dedupe cleanup: 16 duplicate manifests removed; canonical candidates.parquet (10.5MB -> 76KB) + placements.parquet (1.2MB -> 11KB) regenerated post-dedupe; fresh manifests for active datasets - vectord: harness EvalSet refinements (+181), agent portfolio rotation + ingest triggers (+158), autotune + rag adjustments - catalogd/storaged/ingestd/mcp-server: misc tightening - docs: Phase 28-36 PRD entries + DECISIONS ADR additions; control-plane pivot banner added to top of docs/PRD.md (pointing at docs/CONTROL_PLANE_PRD.md which lands in next commit) Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-22 02:41:15 -05:00 · 2026-04-22 02:41:15 -05:00 · 5b1fcf6d27
commit 5b1fcf6d27
parent a6f12e2609
52 changed files with 2575 additions and 1012 deletions
--- a/Cargo.lock
+++ b/Cargo.lock
@ -1254,7 +1254,7 @@ version = "3.9.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "519bd3116aeeb42d5372c29d982d16d0170d3d4a5ed85fc7dd91642ffff3c67c"
 dependencies = [
- "darling",
+ "darling 0.21.3",
 "ident_case",
 "prettyplease",
 "proc-macro2",
@ -1759,6 +1759,17 @@ dependencies = [
 "cfg-if",
 ]

+[[package]]
+name = "croner"
+version = "3.0.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4aa42bcd3d846ebf66e15bd528d1087f75d1c6c1c66ebff626178a106353c576"
+dependencies = [
+ "chrono",
+ "derive_builder",
+ "strum 0.27.2",
+]
+
 [[package]]
 name = "crossbeam"
 version = "0.8.4"
@ -1862,14 +1873,38 @@ dependencies = [
 "memchr",
 ]

+[[package]]
+name = "darling"
+version = "0.20.11"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "fc7f46116c46ff9ab3eb1597a45688b6715c6e628b5c133e288e709a29bcb4ee"
+dependencies = [
+ "darling_core 0.20.11",
+ "darling_macro 0.20.11",
+]
+
 [[package]]
 name = "darling"
 version = "0.21.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "9cdf337090841a411e2a7f3deb9187445851f91b309c0c0a29e05f74a00a48c0"
 dependencies = [
- "darling_core",
- "darling_macro",
+ "darling_core 0.21.3",
+ "darling_macro 0.21.3",
+]
+
+[[package]]
+name = "darling_core"
+version = "0.20.11"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0d00b9596d185e565c2207a0b01f8bd1a135483d02d9b7b0a54b11da8d53412e"
+dependencies = [
+ "fnv",
+ "ident_case",
+ "proc-macro2",
+ "quote",
+ "strsim",
+ "syn 2.0.117",
 ]

 [[package]]
@ -1886,13 +1921,24 @@ dependencies = [
 "syn 2.0.117",
 ]

+[[package]]
+name = "darling_macro"
+version = "0.20.11"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "fc34b93ccb385b40dc71c6fceac4b2ad23662c7eeb248cf10d529b7e055b6ead"
+dependencies = [
+ "darling_core 0.20.11",
+ "quote",
+ "syn 2.0.117",
+]
+
 [[package]]
 name = "darling_macro"
 version = "0.21.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "d38308df82d1080de0afee5d069fa14b0326a88c14f15c5ccda35b4a6c414c81"
 dependencies = [
- "darling_core",
+ "darling_core 0.21.3",
 "quote",
 "syn 2.0.117",
 ]
@ -3149,6 +3195,37 @@ dependencies = [
 "serde_core",
 ]

+[[package]]
+name = "derive_builder"
+version = "0.20.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "507dfb09ea8b7fa618fcf76e953f4f5e192547945816d5358edffe39f6f94947"
+dependencies = [
+ "derive_builder_macro",
+]
+
+[[package]]
+name = "derive_builder_core"
+version = "0.20.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2d5bcf7b024d6835cfb3d473887cd966994907effbe9227e8c8219824d06c4e8"
+dependencies = [
+ "darling 0.20.11",
+ "proc-macro2",
+ "quote",
+ "syn 2.0.117",
+]
+
+[[package]]
+name = "derive_builder_macro"
+version = "0.20.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ab63b0e2bf4d5928aff72e83a7dace85d7bba5fe12dcc3c5a572d78caffd3f3c"
+dependencies = [
+ "derive_builder_core",
+ "syn 2.0.117",
+]
+
 [[package]]
 name = "derive_more"
 version = "2.1.1"
@ -3713,7 +3790,7 @@ version = "0.14.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "f43e744e4ea338060faee68ed933e46e722fb7f3617e722a5772d7e856d8b3ce"
 dependencies = [
- "darling",
+ "darling 0.21.3",
 "proc-macro2",
 "quote",
 "syn 2.0.117",
@ -4598,6 +4675,7 @@ dependencies = [
 "bytes",
 "catalogd",
 "chrono",
+ "croner",
 "csv",
 "lopdf",
 "mysql_async",
@ -5096,7 +5174,7 @@ dependencies = [
 "prost-types 0.14.3",
 "rand 0.9.2",
 "snafu",
- "strum",
+ "strum 0.26.3",
 "tokio",
 "tracing",
 "xxhash-rust",
@ -7857,7 +7935,16 @@ version = "0.26.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "8fec0f0aef304996cf250b31b5a10dee7980c85da9d759361292b8bca5a18f06"
 dependencies = [
- "strum_macros",
+ "strum_macros 0.26.4",
+]
+
+[[package]]
+name = "strum"
+version = "0.27.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "af23d6f6c1a224baef9d3f61e287d2761385a5b88fdab4eb4c6f11aeb54c4bcf"
+dependencies = [
+ "strum_macros 0.27.2",
 ]

 [[package]]
@ -7873,6 +7960,18 @@ dependencies = [
 "syn 2.0.117",
 ]

+[[package]]
+name = "strum_macros"
+version = "0.27.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7695ce3845ea4b33927c055a39dc438a45b059f7c1b3d91d38d10355fb8cbca7"
+dependencies = [
+ "heck",
+ "proc-macro2",
+ "quote",
+ "syn 2.0.117",
+]
+
 [[package]]
 name = "subprocess"
 version = "0.2.15"
@ -8814,12 +8913,14 @@ dependencies = [
 "bytes",
 "catalogd",
 "chrono",
+ "futures",
 "instant-distance",
 "object_store",
 "parquet 55.2.0",
 "queryd",
 "serde",
 "serde_json",
+ "sha2",
 "shared",
 "storaged",
 "tokio",
--- a/bun.lock
+++ b/bun.lock
@ -0,0 +1,18 @@
+{
+  "lockfileVersion": 1,
+  "configVersion": 1,
+  "workspaces": {
+    "": {
+      "dependencies": {
+        "langfuse": "^3.38.20",
+      },
+    },
+  },
+  "packages": {
+    "langfuse": ["langfuse@3.38.20", "", { "dependencies": { "langfuse-core": "^3.38.20" } }, "sha512-MAmBAASSzJtmK1O9HQegA1mFsQhT8Yf+OJRGvE7FXkyv3g/eiBE0glLD0Ohg3pkxhoPdggM5SejK7ue9ctlaMA=="],
+
+    "langfuse-core": ["langfuse-core@3.38.20", "", { "dependencies": { "mustache": "^4.2.0" } }, "sha512-zBKVmQN/1oT5VWZUBYlWzvokIlkC/6mnpgr/2atMyTeAm+jR3ia7w2iJMjlrF5/oG8ukO1s8+LDRCzJpF1QeEA=="],
+
+    "mustache": ["mustache@4.2.0", "", { "bin": { "mustache": "bin/mustache" } }, "sha512-71ippSywq5Yb7/tVYyGbkBggbU8H3u5Rz56fH60jGFgr8uHwxs+aSKeqmluIVzM0m0kB7xQjKS6qPfd0b2ZoqQ=="],
+  }
+}
--- a/crates/catalogd/src/grpc.rs
+++ b/crates/catalogd/src/grpc.rs
@ -54,7 +54,16 @@ impl CatalogService for CatalogGrpc {
        let manifest = self.registry
            .register(req.name, SchemaFingerprint(req.schema_fingerprint), objects)
            .await
-            .map_err(|e| Status::internal(e))?;
+            .map_err(|e| {
+                // Mirror the HTTP side's 409 mapping: schema drift is a
+                // precondition failure, not a server error. Keeps gRPC
+                // and HTTP callers seeing the same diagnostic signal.
+                if e.contains("different schema") {
+                    Status::failed_precondition(e)
+                } else {
+                    Status::internal(e)
+                }
+            })?;

        Ok(Response::new(manifest_to_proto(&manifest)))
    }
--- a/crates/catalogd/src/registry.rs
+++ b/crates/catalogd/src/registry.rs
@ -28,6 +28,27 @@ pub struct MigrateBucketsReport {
    pub manifests_persisted: usize,
 }

+#[derive(Debug, Clone, Default, serde::Serialize)]
+pub struct DedupeReport {
+    pub groups: usize,
+    pub removed: usize,
+    pub kept: Vec<DedupeKept>,
+    pub errors: Vec<DedupeError>,
+}
+
+#[derive(Debug, Clone, serde::Serialize)]
+pub struct DedupeKept {
+    pub name: String,
+    pub kept_id: String,
+    pub removed: usize,
+}
+
+#[derive(Debug, Clone, serde::Serialize)]
+pub struct DedupeError {
+    pub manifest_id: String,
+    pub error: String,
+}
+
 /// Partial metadata update — only set fields are applied.
 #[derive(Debug, Clone, Default, serde::Deserialize)]
 pub struct MetadataUpdate {
@ -212,13 +233,65 @@ impl Registry {
        Ok(())
    }

-    /// Register a new dataset. Persists manifest to storage before updating memory.
+    /// Register a dataset. Idempotent on `name`:
+    ///   - No existing dataset by this name → create new manifest.
+    ///   - Exists with same schema fingerprint → update objects + `updated_at`
+    ///     in place (re-ingest of identical-shape data).
+    ///   - Exists with different schema fingerprint → reject as `Err`
+    ///     (callers map to HTTP 409 / gRPC FAILED_PRECONDITION). Schema
+    ///     evolution is not yet exposed as an HTTP endpoint; callers must
+    ///     register under a new name or delete the existing manifest first.
+    ///
+    /// Concurrency: the write lock is held across the storage write. That
+    /// serializes concurrent registers but is correctness-mandatory —
+    /// dropping the lock across the check→insert sequence creates a TOCTOU
+    /// window where two callers with the same name both see "no existing"
+    /// and both insert, reintroducing the duplicate-manifest bug this
+    /// function exists to prevent. Register is a metadata-only hop on the
+    /// ingest path, so the serialization cost is acceptable. Precedent:
+    /// `update_metadata` also holds the write lock across its I/O.
+    ///
+    /// Legacy state: if multiple manifests with the same name already exist
+    /// (from before this was idempotent), the most recently updated one is
+    /// treated as canonical. Run `dedupe_by_name` to clean up the rest.
    pub async fn register(
        &self,
        name: String,
        schema_fingerprint: SchemaFingerprint,
        objects: Vec<ObjectRef>,
    ) -> Result<DatasetManifest, String> {
+        let mut datasets = self.datasets.write().await;
+
+        let existing = datasets
+            .values()
+            .filter(|d| d.name == name)
+            .max_by_key(|d| d.updated_at)
+            .cloned();
+
+        if let Some(mut manifest) = existing {
+            if manifest.schema_fingerprint != schema_fingerprint {
+                return Err(format!(
+                    "dataset '{}' already exists with a different schema \
+                     (existing fingerprint: {}, new: {}). Schema-evolution \
+                     migration is not yet exposed as an HTTP endpoint; \
+                     register under a new name or delete the existing \
+                     manifest first.",
+                    name, manifest.schema_fingerprint.0, schema_fingerprint.0,
+                ));
+            }
+            manifest.objects = objects;
+            manifest.updated_at = chrono::Utc::now();
+
+            let manifest_key = format!("{MANIFEST_PREFIX}/{}.json", manifest.id);
+            let json = serde_json::to_vec_pretty(&manifest).map_err(|e| e.to_string())?;
+            ops::put(&self.store, &manifest_key, json.into()).await?;
+
+            datasets.insert(manifest.id.clone(), manifest.clone());
+
+            tracing::info!("re-registered (idempotent): {} ({})", manifest.name, manifest.id);
+            return Ok(manifest);
+        }
+
        let now = chrono::Utc::now();
        let manifest = DatasetManifest {
            id: DatasetId::new(),
@ -240,12 +313,10 @@ impl Registry {
            embedding_refresh_policy: None,
        };

-        // Write-ahead: persist before in-memory update
        let manifest_key = format!("{MANIFEST_PREFIX}/{}.json", manifest.id);
        let json = serde_json::to_vec_pretty(&manifest).map_err(|e| e.to_string())?;
        ops::put(&self.store, &manifest_key, json.into()).await?;

-        let mut datasets = self.datasets.write().await;
        datasets.insert(manifest.id.clone(), manifest.clone());

        tracing::info!("registered dataset: {} ({})", manifest.name, manifest.id);
@ -380,6 +451,72 @@ impl Registry {
        Ok(manifest.clone())
    }

+    /// Collapse duplicate manifests that share a `name`. Winner per group:
+    ///   1. Prefer a manifest with a non-null `row_count` (already resynced).
+    ///   2. Break ties by newest `updated_at`.
+    /// Losers are removed from the in-memory registry AND their manifest
+    /// JSON is deleted from object storage. Datasets with a single manifest
+    /// are untouched. Parquet data files are never touched — only catalog
+    /// metadata. See `register` for the prevention side of this fix.
+    ///
+    /// Operator-only. Do not run while ingest is active: a concurrent
+    /// `register` between the snapshot and the delete sweep can create a
+    /// new manifest under a name we just deduped, producing a transient
+    /// count of 2 until the next dedupe run. Safe outside ingest windows.
+    pub async fn dedupe_by_name(&self) -> DedupeReport {
+        let groups: Vec<(String, Vec<DatasetManifest>)> = {
+            let datasets = self.datasets.read().await;
+            let mut by_name: HashMap<String, Vec<DatasetManifest>> = HashMap::new();
+            for m in datasets.values() {
+                by_name.entry(m.name.clone()).or_default().push(m.clone());
+            }
+            by_name.into_iter().filter(|(_, v)| v.len() > 1).collect()
+        };
+
+        let mut report = DedupeReport::default();
+        let mut to_delete: Vec<DatasetId> = Vec::new();
+
+        for (name, mut manifests) in groups {
+            report.groups += 1;
+            manifests.sort_by(|a, b| {
+                b.row_count.is_some().cmp(&a.row_count.is_some())
+                    .then(b.updated_at.cmp(&a.updated_at))
+            });
+            let winner = &manifests[0];
+            report.kept.push(DedupeKept {
+                name: name.clone(),
+                kept_id: winner.id.to_string(),
+                removed: manifests.len() - 1,
+            });
+            for loser in &manifests[1..] {
+                to_delete.push(loser.id.clone());
+            }
+        }
+
+        {
+            let mut datasets = self.datasets.write().await;
+            for id in &to_delete {
+                datasets.remove(id);
+            }
+        }
+        for id in &to_delete {
+            let key = format!("{MANIFEST_PREFIX}/{}.json", id);
+            match ops::delete(&self.store, &key).await {
+                Ok(_) => report.removed += 1,
+                Err(e) => report.errors.push(DedupeError {
+                    manifest_id: id.to_string(),
+                    error: e,
+                }),
+            }
+        }
+
+        tracing::info!(
+            "dedupe: {} groups collapsed, {} manifests removed, {} errors",
+            report.groups, report.removed, report.errors.len(),
+        );
+        report
+    }
+
    /// Resync every dataset that currently has a null row_count.
    /// Returns (successes, failures) where each entry is (name, detail).
    pub async fn resync_missing(&self) -> (Vec<(String, u64)>, Vec<(String, String)>) {
@ -549,6 +686,46 @@ impl Registry {
        Ok(())
    }

+    /// Remove a dataset from the catalog by name. Deletes the manifest
+    /// from both the in-memory registry and object storage.
+    ///
+    /// Scope is metadata-only: the underlying parquet files, vector
+    /// indexes, tombstones, trial journals, and AiViews that reference
+    /// this dataset are NOT touched. Caller is responsible for any
+    /// cascade cleanup. This mirrors how a DROP TABLE in a typical
+    /// warehouse separates "forget about this dataset" from "actually
+    /// reclaim the bytes".
+    ///
+    /// Returns Ok(()) when the dataset existed and was removed; an Err
+    /// with "dataset not found" when no manifest by that name exists.
+    /// Legacy state with duplicate names: all matching manifests are
+    /// removed (effectively a post-hoc dedupe on that name).
+    pub async fn delete_dataset(&self, name: &str) -> Result<usize, String> {
+        let ids_to_remove: Vec<DatasetId> = {
+            let datasets = self.datasets.read().await;
+            datasets.values().filter(|d| d.name == name).map(|d| d.id.clone()).collect()
+        };
+        if ids_to_remove.is_empty() {
+            return Err(format!("dataset not found: {name}"));
+        }
+
+        for id in &ids_to_remove {
+            let key = format!("{MANIFEST_PREFIX}/{}.json", id);
+            if let Err(e) = ops::delete(&self.store, &key).await {
+                // Storage delete failed — log and keep going. The in-memory
+                // remove below is still correct; on restart, the missing
+                // storage object just means nothing to rehydrate.
+                tracing::warn!("delete_dataset '{name}': storage delete of {key} failed: {e}");
+            }
+        }
+        let mut datasets = self.datasets.write().await;
+        for id in &ids_to_remove {
+            datasets.remove(id);
+        }
+        tracing::info!("deleted dataset '{}' ({} manifest(s))", name, ids_to_remove.len());
+        Ok(ids_to_remove.len())
+    }
+
    /// List datasets whose `embedding_stale_since` is set — they need a refresh.
    pub async fn stale_datasets(&self) -> Vec<DatasetManifest> {
        let datasets = self.datasets.read().await;
@ -579,3 +756,275 @@ impl Registry {
        Ok(manifest.clone())
    }
 }
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use object_store::memory::InMemory;
+
+    fn fixture() -> Registry {
+        Registry::new(Arc::new(InMemory::new()))
+    }
+
+    fn fp(s: &str) -> SchemaFingerprint {
+        SchemaFingerprint(s.to_string())
+    }
+
+    fn obj(key: &str) -> ObjectRef {
+        ObjectRef {
+            bucket: "primary".to_string(),
+            key: key.to_string(),
+            size_bytes: 100,
+            created_at: chrono::Utc::now(),
+        }
+    }
+
+    #[tokio::test]
+    async fn fresh_register_creates_new_manifest() {
+        let reg = fixture();
+        let m = reg.register("a".into(), fp("f1"), vec![obj("a.parquet")]).await.unwrap();
+        assert_eq!(m.name, "a");
+        assert_eq!(m.schema_fingerprint, fp("f1"));
+        assert_eq!(reg.list().await.len(), 1);
+        assert_eq!(reg.get_by_name("a").await.unwrap().id, m.id);
+    }
+
+    #[tokio::test]
+    async fn re_register_same_fingerprint_is_idempotent() {
+        let reg = fixture();
+        let first = reg.register("a".into(), fp("f1"), vec![obj("old.parquet")]).await.unwrap();
+        let second = reg.register("a".into(), fp("f1"), vec![obj("new.parquet")]).await.unwrap();
+
+        assert_eq!(first.id, second.id, "same name+fp should reuse ID");
+        assert_eq!(reg.list().await.len(), 1, "no duplicate manifest created");
+        let fetched = reg.get_by_name("a").await.unwrap();
+        assert_eq!(fetched.objects[0].key, "new.parquet", "objects replaced");
+        assert!(fetched.updated_at >= first.updated_at, "updated_at bumped");
+    }
+
+    #[tokio::test]
+    async fn re_register_different_fingerprint_rejects() {
+        let reg = fixture();
+        reg.register("a".into(), fp("f1"), vec![obj("a.parquet")]).await.unwrap();
+        let err = reg.register("a".into(), fp("f2"), vec![obj("a.parquet")]).await.unwrap_err();
+
+        assert!(err.contains("different schema"), "error mentions schema drift: {err}");
+        assert_eq!(reg.list().await.len(), 1, "failed register did not mutate state");
+        assert_eq!(reg.get_by_name("a").await.unwrap().schema_fingerprint, fp("f1"));
+    }
+
+    #[tokio::test]
+    async fn delete_dataset_removes_manifest_and_in_memory_entry() {
+        let reg = fixture();
+        reg.register("to_delete".into(), fp("f1"), vec![obj("d.parquet")]).await.unwrap();
+        reg.register("keepme".into(),    fp("f2"), vec![obj("k.parquet")]).await.unwrap();
+        assert_eq!(reg.list().await.len(), 2);
+
+        let removed = reg.delete_dataset("to_delete").await.unwrap();
+        assert_eq!(removed, 1, "one manifest removed");
+        assert!(reg.get_by_name("to_delete").await.is_none());
+        assert!(reg.get_by_name("keepme").await.is_some(), "unrelated dataset untouched");
+        assert_eq!(reg.list().await.len(), 1);
+    }
+
+    #[tokio::test]
+    async fn delete_dataset_404s_on_unknown_name() {
+        let reg = fixture();
+        let err = reg.delete_dataset("nope").await.unwrap_err();
+        assert!(err.starts_with("dataset not found"), "error wording: {err}");
+    }
+
+    #[tokio::test]
+    async fn delete_dataset_removes_all_dupes_with_matching_name() {
+        // Legacy zombies: direct insert so we can seed duplicates,
+        // mirroring pre-idempotent-register state. delete_dataset should
+        // sweep every manifest sharing that name in a single call.
+        let reg = fixture();
+        let now = chrono::Utc::now();
+        for _ in 0..3 {
+            let m = DatasetManifest {
+                id: DatasetId::new(),
+                name: "zombie".into(),
+                schema_fingerprint: fp("fp"),
+                objects: vec![obj("z.parquet")],
+                created_at: now, updated_at: now,
+                row_count: None, description: String::new(), owner: String::new(),
+                sensitivity: None, columns: vec![], lineage: None, freshness: None,
+                tags: vec![], last_embedded_at: None, embedding_stale_since: None,
+                embedding_refresh_policy: None,
+            };
+            reg.datasets.write().await.insert(m.id.clone(), m);
+        }
+        assert_eq!(reg.list().await.len(), 3);
+
+        let removed = reg.delete_dataset("zombie").await.unwrap();
+        assert_eq!(removed, 3, "all three manifests removed in one sweep");
+        assert_eq!(reg.list().await.len(), 0);
+    }
+
+    #[tokio::test]
+    async fn dedupe_no_duplicates_is_noop() {
+        let reg = fixture();
+        reg.register("a".into(), fp("f1"), vec![obj("a.parquet")]).await.unwrap();
+        reg.register("b".into(), fp("f2"), vec![obj("b.parquet")]).await.unwrap();
+
+        let report = reg.dedupe_by_name().await;
+        assert_eq!(report.groups, 0);
+        assert_eq!(report.removed, 0);
+        assert_eq!(reg.list().await.len(), 2);
+    }
+
+    #[tokio::test(flavor = "multi_thread", worker_threads = 8)]
+    async fn concurrent_register_stress_many_workers_one_manifest() {
+        // Stress variant: fire 32 concurrent registers for the same new
+        // name across 8 worker threads. Single-manifest invariant must
+        // hold regardless of how the scheduler interleaves them. This
+        // catches race regressions the 2-call variant below might miss
+        // under a forgiving scheduler.
+        let reg = fixture();
+        let mut handles = Vec::new();
+        for i in 0..32 {
+            let r = reg.clone();
+            handles.push(tokio::spawn(async move {
+                r.register(
+                    "stress".into(),
+                    fp("stress-fp"),
+                    vec![obj(&format!("{i}.parquet"))],
+                ).await
+            }));
+        }
+        for h in handles {
+            h.await.unwrap().expect("each register succeeds (idempotent)");
+        }
+        let all = reg.list().await;
+        let stress_manifests: Vec<_> = all.iter().filter(|d| d.name == "stress").collect();
+        assert_eq!(stress_manifests.len(), 1,
+            "32 concurrent registers produced {} manifests for 'stress' — expected 1",
+            stress_manifests.len());
+    }
+
+    #[tokio::test(flavor = "multi_thread", worker_threads = 4)]
+    async fn concurrent_register_same_new_name_collapses_to_one_manifest() {
+        // Pins the TOCTOU fix: without the write-lock held across the
+        // check→insert sequence, two parallel registers of the same
+        // previously-unknown name would both see "no existing" and both
+        // insert. Multi-thread flavor is required to actually exercise
+        // the race — under single-threaded tokio the scheduler doesn't
+        // preempt between awaits on the same task's critical section.
+        // Under the current impl, whichever acquires the write lock
+        // first creates the manifest; the second observes it and takes
+        // the idempotent-update branch.
+        let reg = fixture();
+        let a = reg.clone();
+        let b = reg.clone();
+
+        let (ra, rb) = tokio::join!(
+            async move {
+                a.register("race".into(), fp("shared-fp"), vec![obj("a.parquet")]).await
+            },
+            async move {
+                b.register("race".into(), fp("shared-fp"), vec![obj("b.parquet")]).await
+            },
+        );
+
+        let m_a = ra.expect("first register succeeds");
+        let m_b = rb.expect("second register succeeds (same fp = idempotent)");
+        assert_eq!(m_a.id, m_b.id, "both calls resolve to a single DatasetId");
+        assert_eq!(reg.list().await.len(), 1, "no duplicate manifest created");
+    }
+
+    #[tokio::test(flavor = "multi_thread", worker_threads = 4)]
+    async fn concurrent_register_same_name_different_fp_one_wins_one_rejects() {
+        // Reverse case: concurrent registers with conflicting fingerprints.
+        // Deterministic outcome: one call's fingerprint gets persisted as
+        // canonical, the other is rejected on seeing the now-established
+        // fingerprint. Which one wins is scheduler-dependent — we only
+        // assert exactly one success and exactly one rejection.
+        let reg = fixture();
+        let a = reg.clone();
+        let b = reg.clone();
+
+        let (ra, rb) = tokio::join!(
+            async move { a.register("race".into(), fp("fp1"), vec![obj("a.parquet")]).await },
+            async move { b.register("race".into(), fp("fp2"), vec![obj("b.parquet")]).await },
+        );
+
+        let successes = [&ra, &rb].iter().filter(|r| r.is_ok()).count();
+        let rejections = [&ra, &rb].iter().filter(|r| r.is_err()).count();
+        assert_eq!(successes, 1, "exactly one register wins");
+        assert_eq!(rejections, 1, "the other is rejected on fingerprint");
+        let rejection_msg = [&ra, &rb].iter()
+            .find_map(|r| r.as_ref().err())
+            .unwrap();
+        assert!(rejection_msg.contains("different schema"));
+        assert_eq!(reg.list().await.len(), 1);
+    }
+
+    #[tokio::test]
+    async fn dedupe_collapses_dupes_preferring_non_null_row_count() {
+        let reg = fixture();
+
+        // Simulate legacy pre-idempotent-register state by inserting
+        // manifests with the same `name` directly. Mirrors what happened
+        // with the 308× successful_playbooks duplication.
+        let now = chrono::Utc::now();
+        let old_with_rows = DatasetManifest {
+            id: DatasetId::new(),
+            name: "dupes".into(),
+            schema_fingerprint: fp("f1"),
+            objects: vec![obj("dupes.parquet")],
+            created_at: now - chrono::Duration::hours(2),
+            updated_at: now - chrono::Duration::hours(2),
+            row_count: Some(42),
+            description: String::new(),
+            owner: String::new(),
+            sensitivity: None,
+            columns: vec![],
+            lineage: None,
+            freshness: None,
+            tags: vec![],
+            last_embedded_at: None,
+            embedding_stale_since: None,
+            embedding_refresh_policy: None,
+        };
+        let newer_no_rows = DatasetManifest {
+            id: DatasetId::new(),
+            updated_at: now, // newer, but null row_count
+            row_count: None,
+            ..old_with_rows.clone()
+        };
+        let oldest = DatasetManifest {
+            id: DatasetId::new(),
+            updated_at: now - chrono::Duration::hours(5),
+            row_count: None,
+            ..old_with_rows.clone()
+        };
+
+        let kept_id = old_with_rows.id.clone();
+        let loser_a = newer_no_rows.id.clone();
+        let loser_b = oldest.id.clone();
+
+        {
+            let mut d = reg.datasets.write().await;
+            d.insert(old_with_rows.id.clone(), old_with_rows);
+            d.insert(newer_no_rows.id.clone(), newer_no_rows);
+            d.insert(oldest.id.clone(), oldest);
+        }
+        assert_eq!(reg.list().await.len(), 3);
+
+        let report = reg.dedupe_by_name().await;
+        assert_eq!(report.groups, 1);
+        assert_eq!(report.removed, 2);
+        assert_eq!(report.errors.len(), 0);
+        assert_eq!(report.kept.len(), 1);
+        assert_eq!(report.kept[0].name, "dupes");
+        assert_eq!(report.kept[0].kept_id, kept_id.to_string(),
+            "should keep the manifest with non-null row_count");
+
+        let remaining = reg.list().await;
+        assert_eq!(remaining.len(), 1);
+        assert_eq!(remaining[0].id, kept_id);
+        assert!(reg.get(&loser_a).await.is_none());
+        assert!(reg.get(&loser_b).await.is_none());
+    }
+}
--- a/crates/catalogd/src/service.rs
+++ b/crates/catalogd/src/service.rs
@ -17,11 +17,12 @@ pub fn router(registry: Registry) -> Router {
        .route("/datasets", post(create_dataset))
        .route("/datasets", get(list_datasets))
        .route("/datasets/{id}", get(get_dataset))
-        .route("/datasets/by-name/{name}", get(get_dataset_by_name))
+        .route("/datasets/by-name/{name}", get(get_dataset_by_name).delete(delete_dataset_by_name))
        .route("/datasets/by-name/{name}/metadata", post(update_metadata))
        .route("/datasets/by-name/{name}/resync", post(resync_dataset))
        .route("/resync-missing", post(resync_all_missing))
        .route("/migrate-buckets", post(migrate_buckets))
+        .route("/dedupe", post(dedupe_by_name))
        // Phase D: AI-safe views
        .route("/views", post(create_view).get(list_views))
        .route("/views/{name}", get(get_view).delete(delete_view))
@ -121,7 +122,14 @@ async fn create_dataset(
            let resp = DatasetResponse::from(&manifest);
            Ok((StatusCode::CREATED, Json(resp)))
        }
-        Err(e) => Err((StatusCode::INTERNAL_SERVER_ERROR, e)),
+        Err(e) => {
+            let status = if e.contains("different schema") {
+                StatusCode::CONFLICT
+            } else {
+                StatusCode::INTERNAL_SERVER_ERROR
+            };
+            Err((status, e))
+        }
    }
 }

@ -153,6 +161,25 @@ async fn get_dataset_by_name(
    }
 }

+/// Remove a dataset manifest by name. Metadata only — parquet files,
+/// vector indexes, and tombstones are NOT cascade-deleted. See
+/// `Registry::delete_dataset` for the full scope.
+async fn delete_dataset_by_name(
+    State(registry): State<Registry>,
+    Path(name): Path<String>,
+) -> impl IntoResponse {
+    match registry.delete_dataset(&name).await {
+        Ok(removed) => {
+            let body = serde_json::json!({ "name": name, "manifests_removed": removed });
+            Ok(Json(body))
+        }
+        Err(e) if e.starts_with("dataset not found") => {
+            Err((StatusCode::NOT_FOUND, e))
+        }
+        Err(e) => Err((StatusCode::INTERNAL_SERVER_ERROR, e)),
+    }
+}
+
 async fn update_metadata(
    State(registry): State<Registry>,
    Path(name): Path<String>,
@ -214,6 +241,12 @@ async fn migrate_buckets(State(registry): State<Registry>) -> impl IntoResponse
    }
 }

+/// Collapse duplicate manifests by `name` — see `Registry::dedupe_by_name`.
+/// Safe to run repeatedly; single-manifest datasets are untouched.
+async fn dedupe_by_name(State(registry): State<Registry>) -> impl IntoResponse {
+    Json(registry.dedupe_by_name().await)
+}
+
 // --- Phase D: AI-safe views ---

 #[derive(Deserialize)]
--- a/crates/gateway/src/main.rs
+++ b/crates/gateway/src/main.rs
@ -86,6 +86,11 @@ async fn main() {
    // primary. Journal / registry resolve per-call via the bucket registry.
    let tj = vectord::trial::TrialJournal::new(bucket_registry.clone(), index_reg.clone());
    let pr = vectord::promotion::PromotionRegistry::new(bucket_registry.clone(), index_reg.clone());
+    let hs = vectord::harness::HarnessStore::new(bucket_registry.clone(), index_reg.clone());
+    // Phase 19: playbook memory. Load cached state; empty-on-miss is fine,
+    // operators call POST /vectors/playbook_memory/rebuild to populate.
+    let pbm = vectord::playbook_memory::PlaybookMemory::new(store.clone());
+    let _ = pbm.load_from_storage().await;

    // Phase 16.2: spawn the autotune agent. When config.agent.enabled=false
    // this returns a handle that drops triggers silently — no surprise load.
@ -107,6 +112,7 @@ async fn main() {
            embedding_cache: emb_cache.clone(),
            trial_journal: tj.clone(),
            promotion_registry: pr.clone(),
+            harness_store: hs.clone(),
        },
    );

@ -155,6 +161,7 @@ async fn main() {
            hnsw_store: hnsw,
            embedding_cache: emb_cache,
            trial_journal: tj,
+            harness_store: hs,
            catalog: registry.clone(),
            promotion_registry: pr,
            agent_handle,
@ -163,6 +170,8 @@ async fn main() {
            lance: vectord::lance_backend::LanceRegistry::new(
                bucket_registry.clone(), index_reg.clone(),
            ),
+            playbook_memory: pbm,
+            embed_semaphore: std::sync::Arc::new(tokio::sync::Semaphore::new(1)),
        }))
        .nest("/workspaces", queryd::workspace_service::router(workspace_mgr))
        .nest("/journal", journald::service::router(journal))
--- a/crates/gateway/src/tools/registry.rs
+++ b/crates/gateway/src/tools/registry.rs
@ -103,7 +103,7 @@ impl ToolRegistry {
                    ParamDef { name: "limit".into(), param_type: "integer".into(), required: false, description: "Max results".into(), default: Some(serde_json::json!(20)) },
                ],
                returns: "List of candidates with id, name, phone, email, skills, experience".into(),
-                sql_template: "SELECT candidate_id, first_name, last_name, phone, email, city, state, zip, vertical, skills, years_experience FROM candidates WHERE 1=1 {skills_filter} {city_filter} {state_filter} {years_filter} {status_filter} ORDER BY years_experience DESC LIMIT {limit}".into(),
+                sql_template: "SELECT candidate_id, first_name, last_name, phone, email, city, state, skills, years_experience, status FROM candidates WHERE 1=1 {skills_filter} {city_filter} {state_filter} {years_filter} {status_filter} ORDER BY years_experience DESC LIMIT {limit}".into(),
                category: "candidates".into(),
            },
            ToolDef {
--- a/crates/ingestd/Cargo.toml
+++ b/crates/ingestd/Cargo.toml
@ -24,3 +24,4 @@ object_store = { workspace = true }
 tokio-postgres = { workspace = true }
 mysql_async = { workspace = true }
 uuid = { workspace = true }
+croner = "3"
--- a/crates/ingestd/src/schedule.rs
+++ b/crates/ingestd/src/schedule.rs
@ -17,8 +17,8 @@
 //! shouldn't pile up runs). Different schedules run concurrently.
 //!
 //! What's deliberately not in scope here:
-//! - Cron expressions (the trigger enum has the variant but parsing is
-//!   stubbed). Intervals cover 90% of operational scheduling; cron is
+//! - Sub-minute cron precision. 5-field Unix cron is supported; seconds
+//!   are pinned to 0. Intervals cover sub-minute cases; cron is
 //!   easy to bolt on later.
 //! - Backoff / retry policies. A failed run records the failure and
 //!   schedules `next_run_at` as if it succeeded — no exponential backoff.
@ -85,8 +85,12 @@ impl ScheduleKind {
 pub enum ScheduleTrigger {
    /// Run every N seconds. Time anchor = compute_next_run_at decision.
    Interval { secs: u64 },
-    /// Cron expression — parsing not implemented yet. Defining the
-    /// variant now so the JSON shape is forward-compatible.
+    /// 5-field Unix cron expression: "min hour dom month dow".
+    /// Seconds are implicitly 0 (minute-granularity scheduling).
+    /// Examples: "15 14 * * *" = 14:15 UTC daily, "0 */6 * * *" = every
+    /// 6 hours on the hour, "0 9 * * 1-5" = 09:00 UTC weekdays.
+    /// Timezone: UTC always — the scheduler's clock is UTC and we don't
+    /// carry a per-schedule timezone field.
    Cron { expr: String },
 }

@ -162,22 +166,55 @@ impl ScheduleDef {
 //      Parse the expression, find the next match after `now`. Most
 //    expressive but pulls in a cron parser dep.
 //
-// Cron is currently stubbed — falling back to "now + 1 hour" so a
-// mistakenly-configured Cron schedule doesn't melt the GPU.
 pub fn compute_next_run_at(
    trigger: &ScheduleTrigger,
    completed_at: DateTime<Utc>,
    _previous_next: DateTime<Utc>,
 ) -> DateTime<Utc> {
-    // TODO(J): pick a strategy. Starter = since-last-completion.
    match trigger {
        ScheduleTrigger::Interval { secs } => {
            completed_at + Duration::seconds(*secs as i64)
        }
-        ScheduleTrigger::Cron { expr: _ } => {
-            // Cron parsing not implemented — fall back to a safe 1h
-            // window so a bad config can't fire-loop the system.
-            completed_at + Duration::hours(1)
+        ScheduleTrigger::Cron { expr } => {
+            next_cron_fire(expr, completed_at)
+                // Creation-time validation (see `validate_trigger`) rejects
+                // unparseable expressions, so this fallback is only reached
+                // if the schedule JSON was hand-edited on disk. One-hour
+                // window keeps a bad expr from fire-looping the system.
+                .unwrap_or(completed_at + Duration::hours(1))
+        }
+    }
+}
+
+/// Parse a Vixie/POSIX cron expression and return the next fire time
+/// strictly after `after`. Accepts 5-field (`min hour dom month dow`) and
+/// 6-field (with leading `sec`) forms natively — croner handles both.
+/// Day-of-week follows Unix convention: 0=Sun, 1=Mon, …, 6=Sat (7=Sun as
+/// synonym). Returns None if the expression fails to parse or produces no
+/// upcoming match (which can happen for year-bounded patterns in the past).
+fn next_cron_fire(expr: &str, after: DateTime<Utc>) -> Option<DateTime<Utc>> {
+    use std::str::FromStr;
+    let cron = croner::Cron::from_str(expr).ok()?;
+    cron.find_next_occurrence(&after, false).ok()
+}
+
+/// Reject a ScheduleTrigger whose content can't be interpreted — used by
+/// the HTTP handlers to fail fast at create/patch time rather than falling
+/// back silently at fire time. Interval triggers get a sanity gate
+/// (no zero secs); cron triggers get a full parse through croner.
+pub fn validate_trigger(trigger: &ScheduleTrigger) -> Result<(), String> {
+    match trigger {
+        ScheduleTrigger::Interval { secs } => {
+            if *secs == 0 {
+                return Err("interval secs must be > 0".into());
+            }
+            Ok(())
+        }
+        ScheduleTrigger::Cron { expr } => {
+            use std::str::FromStr;
+            croner::Cron::from_str(expr)
+                .map_err(|e| format!("invalid cron expression '{expr}': {e}"))?;
+            Ok(())
        }
    }
 }
@ -551,6 +588,7 @@ fn redact_dsn(dsn: &str) -> String {
 #[cfg(test)]
 mod tests {
    use super::*;
+    use chrono::Timelike;

    fn mk(secs: u64) -> ScheduleDef {
        ScheduleDef {
@ -600,12 +638,73 @@ mod tests {
    }

    #[test]
-    fn cron_falls_back_to_one_hour() {
+    fn cron_every_minute_fires_within_sixty_seconds() {
        let trig = ScheduleTrigger::Cron { expr: "* * * * *".into() };
        let now = Utc::now();
        let next = compute_next_run_at(&trig, now, now);
        let delta = next - now;
-        assert_eq!(delta.num_seconds(), 3600);
+        assert!(delta.num_seconds() > 0 && delta.num_seconds() <= 60,
+            "expected next fire within 60s, got {}s", delta.num_seconds());
+    }
+
+    #[test]
+    fn cron_daily_at_1415_utc_is_within_24h() {
+        // 14:15 UTC daily — whether we're before or after 14:15 today, the
+        // next fire is at most 24h out.
+        let trig = ScheduleTrigger::Cron { expr: "15 14 * * *".into() };
+        let now: DateTime<Utc> = "2026-04-20T10:00:00Z".parse().unwrap();
+        let next = compute_next_run_at(&trig, now, now);
+        assert!(next > now);
+        assert!((next - now).num_hours() <= 24);
+        // And the minute-of-hour is 15.
+        assert_eq!(next.minute(), 15);
+        assert_eq!(next.hour(), 14);
+    }
+
+    #[test]
+    fn cron_weekday_skips_weekend() {
+        // 09:00 UTC weekdays only. On a Saturday, next fire is Monday.
+        let trig = ScheduleTrigger::Cron { expr: "0 9 * * 1-5".into() };
+        // 2026-04-18 is a Saturday.
+        let sat: DateTime<Utc> = "2026-04-18T10:00:00Z".parse().unwrap();
+        let next = compute_next_run_at(&trig, sat, sat);
+        // Monday 2026-04-20 at 09:00 UTC.
+        assert_eq!(next, "2026-04-20T09:00:00Z".parse::<DateTime<Utc>>().unwrap());
+    }
+
+    #[test]
+    fn cron_six_field_seconds_granularity() {
+        // 6-field (seconds included): fire every minute at 30s past the minute.
+        let trig = ScheduleTrigger::Cron { expr: "30 * * * * *".into() };
+        let now = Utc::now();
+        let next = compute_next_run_at(&trig, now, now);
+        assert_eq!(next.second(), 30);
+    }
+
+    #[test]
+    fn validate_rejects_bad_cron() {
+        let bad = ScheduleTrigger::Cron { expr: "not a cron".into() };
+        let err = validate_trigger(&bad).unwrap_err();
+        assert!(err.contains("cron"), "error should mention cron, got: {err}");
+    }
+
+    #[test]
+    fn validate_rejects_wrong_field_count() {
+        // 4 fields — neither 5 nor 6.
+        let bad = ScheduleTrigger::Cron { expr: "* * * *".into() };
+        assert!(validate_trigger(&bad).is_err());
+    }
+
+    #[test]
+    fn validate_rejects_zero_interval() {
+        let bad = ScheduleTrigger::Interval { secs: 0 };
+        assert!(validate_trigger(&bad).is_err());
+    }
+
+    #[test]
+    fn validate_accepts_good_cron() {
+        let good = ScheduleTrigger::Cron { expr: "0 */6 * * *".into() };
+        assert!(validate_trigger(&good).is_ok());
    }

    #[test]
--- a/crates/ingestd/src/service.rs
+++ b/crates/ingestd/src/service.rs
@ -510,6 +510,9 @@ async fn create_schedule(
    if state.schedules.get(&req.id).await.is_some() {
        return Err((StatusCode::CONFLICT, format!("schedule '{}' already exists", req.id)));
    }
+    if let Err(e) = schedule::validate_trigger(&req.trigger) {
+        return Err((StatusCode::BAD_REQUEST, e));
+    }
    let now = chrono::Utc::now();
    let def = schedule::ScheduleDef {
        id: req.id,
@ -564,6 +567,11 @@ async fn patch_schedule(
    let Some(mut def) = state.schedules.get(&id).await else {
        return Err((StatusCode::NOT_FOUND, format!("schedule '{id}' not found")));
    };
+    if let Some(t) = &req.trigger {
+        if let Err(e) = schedule::validate_trigger(t) {
+            return Err((StatusCode::BAD_REQUEST, e));
+        }
+    }
    if let Some(e) = req.enabled { def.enabled = e; }
    if let Some(t) = req.trigger { def.trigger = t; }
    match state.schedules.put(def).await {
--- a/crates/storaged/src/append_log.rs
+++ b/crates/storaged/src/append_log.rs
@ -18,7 +18,7 @@
 ///   to small JSONL events.
 ///
 /// Storage layout:
-/// ```
+/// ```text
 /// {prefix}/
 ///   batch_0001776319628000123.jsonl
 ///   batch_0001776319745987654.jsonl
@ -221,15 +221,15 @@ pub struct CompactStats {
    pub new_key: Option<String>,
 }

-// Proactively flush on drop, best-effort.
-// We can't `.await` in Drop; we spawn the flush on the tokio runtime if one
-// is available. If the runtime is already shutting down the flush is lost —
-// acceptable for these observability journals, which are hints not records.
+// Log unflushed-buffer size on drop. We can't `.await` from a sync `Drop`,
+// so a real flush isn't possible here — callers are responsible for calling
+// `.flush()` before dropping if durability matters. These journals are
+// observability hints; a few lost buffered events at shutdown are
+// acceptable per ADR-018.
 impl Drop for AppendLog {
    fn drop(&mut self) {
        let buf_len = self.buffer.try_lock().map(|b| b.len()).unwrap_or(0);
        if buf_len == 0 { return; }
-        // Can't spawn from sync Drop on every runtime shape; log + move on.
        tracing::debug!(
            "append_log '{}' dropping with {} unflushed events",
            self.prefix, buf_len,
@ -237,3 +237,94 @@ impl Drop for AppendLog {
    }
 }

+#[cfg(test)]
+mod tests {
+    use super::*;
+    use object_store::memory::InMemory;
+
+    fn mk(threshold: usize) -> AppendLog {
+        AppendLog::new(Arc::new(InMemory::new()), "prefix")
+            .with_flush_threshold(threshold)
+    }
+
+    #[tokio::test]
+    async fn append_stays_buffered_below_threshold() {
+        let log = mk(5);
+        log.append(b"one".to_vec()).await.unwrap();
+        log.append(b"two".to_vec()).await.unwrap();
+        assert_eq!(log.file_count().await.unwrap(), 0, "no files until threshold");
+        let all = log.read_all().await.unwrap();
+        assert_eq!(all, vec![b"one".to_vec(), b"two".to_vec()],
+            "read_all surfaces unflushed buffer");
+    }
+
+    #[tokio::test]
+    async fn append_auto_flushes_on_threshold() {
+        let log = mk(3);
+        for i in 0..3 {
+            log.append(format!("evt{i}").into_bytes()).await.unwrap();
+        }
+        assert_eq!(log.file_count().await.unwrap(), 1, "threshold triggered one flush");
+
+        // A fourth append stays buffered until the next threshold.
+        log.append(b"evt3".to_vec()).await.unwrap();
+        assert_eq!(log.file_count().await.unwrap(), 1, "below threshold again");
+    }
+
+    #[tokio::test]
+    async fn flush_empty_is_noop() {
+        let log = mk(32);
+        log.flush().await.unwrap();
+        log.flush().await.unwrap();
+        assert_eq!(log.file_count().await.unwrap(), 0);
+    }
+
+    #[tokio::test]
+    async fn read_all_orders_events_across_flushes() {
+        let log = mk(1); // flush-on-every-append
+        for i in 0..5 {
+            log.append(format!("e{i}").into_bytes()).await.unwrap();
+            // Spread writes out so timestamps sort strictly.
+            tokio::time::sleep(std::time::Duration::from_millis(2)).await;
+        }
+        let all = log.read_all().await.unwrap();
+        let strs: Vec<String> = all.into_iter()
+            .map(|v| String::from_utf8(v).unwrap())
+            .collect();
+        assert_eq!(strs, vec!["e0", "e1", "e2", "e3", "e4"],
+            "lex sort of batch keys == chronological event order");
+    }
+
+    #[tokio::test]
+    async fn compact_merges_multiple_files_into_one() {
+        let log = mk(1); // force file-per-append
+        for i in 0..4 {
+            log.append(format!("e{i}").into_bytes()).await.unwrap();
+            tokio::time::sleep(std::time::Duration::from_millis(2)).await;
+        }
+        assert_eq!(log.file_count().await.unwrap(), 4);
+
+        let stats = log.compact().await.unwrap();
+        assert_eq!(stats.merged_files, 4);
+        assert_eq!(stats.events, 4);
+        assert!(stats.new_key.is_some());
+
+        assert_eq!(log.file_count().await.unwrap(), 1, "originals deleted, 1 survivor");
+        let all = log.read_all().await.unwrap();
+        assert_eq!(all.len(), 4, "no events lost in compaction");
+    }
+
+    #[tokio::test]
+    async fn compact_with_single_file_is_noop() {
+        let log = mk(1);
+        log.append(b"only".to_vec()).await.unwrap();
+        assert_eq!(log.file_count().await.unwrap(), 1);
+
+        let stats = log.compact().await.unwrap();
+        assert_eq!(stats.merged_files, 1);
+        assert_eq!(stats.events, 0, "nothing to consolidate");
+        assert!(stats.new_key.is_none(), "no new file written");
+        assert_eq!(log.file_count().await.unwrap(), 1, "original untouched");
+    }
+}
+
--- a/crates/storaged/src/error_journal.rs
+++ b/crates/storaged/src/error_journal.rs
@ -198,3 +198,92 @@ impl ErrorJournal {
        self.log.file_count().await
    }
 }
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use object_store::memory::InMemory;
+
+    fn mk() -> ErrorJournal {
+        ErrorJournal::new(Arc::new(InMemory::new()))
+    }
+
+    #[tokio::test]
+    async fn append_updates_ring_immediately() {
+        let j = mk();
+        j.append(BucketErrorEvent::new_read("tenant_a", "k1", "timeout")).await;
+        j.append(BucketErrorEvent::new_write("tenant_b", "k2", "denied")).await;
+
+        let recent = j.recent(10).await;
+        assert_eq!(recent.len(), 2);
+        assert_eq!(recent[0].target, "tenant_a");
+        assert_eq!(recent[1].target, "tenant_b");
+    }
+
+    #[tokio::test]
+    async fn load_recent_rehydrates_ring_from_disk() {
+        // Use a shared InMemory store so the second journal reads the first
+        // journal's writes — mirrors gateway restart semantics.
+        let store: Arc<dyn ObjectStore> = Arc::new(InMemory::new());
+        {
+            let j1 = ErrorJournal {
+                log: Arc::new(AppendLog::new(store.clone(), JOURNAL_PREFIX).with_flush_threshold(1)),
+                ring: Arc::new(RwLock::new(VecDeque::with_capacity(RING_CAPACITY))),
+            };
+            j1.append(BucketErrorEvent::new_read("t", "k", "err")).await;
+            j1.flush().await.unwrap();
+        }
+
+        let j2 = ErrorJournal {
+            log: Arc::new(AppendLog::new(store.clone(), JOURNAL_PREFIX)),
+            ring: Arc::new(RwLock::new(VecDeque::with_capacity(RING_CAPACITY))),
+        };
+        assert_eq!(j2.recent(10).await.len(), 0, "fresh ring is empty");
+
+        let loaded = j2.load_recent().await.unwrap();
+        assert_eq!(loaded, 1);
+        assert_eq!(j2.recent(10).await[0].target, "t");
+    }
+
+    #[tokio::test]
+    async fn mark_rescued_updates_most_recent_match() {
+        let j = mk();
+        j.append(BucketErrorEvent::new_read("t1", "k", "err")).await;
+        j.append(BucketErrorEvent::new_read("t2", "k", "err")).await;
+        j.append(BucketErrorEvent::new_read("t1", "k", "err")).await;
+
+        j.mark_rescued_last("t1", "k").await;
+
+        let recent = j.recent(10).await;
+        assert!(!recent[0].rescued, "older t1 event not touched");
+        assert!(!recent[1].rescued, "t2 not touched");
+        assert!(recent[2].rescued, "most recent t1 was rescued");
+    }
+
+    #[tokio::test]
+    async fn health_flags_buckets_with_three_or_more_errors() {
+        let j = mk();
+        for _ in 0..3 { j.append(BucketErrorEvent::new_read("broken", "k", "e")).await; }
+        j.append(BucketErrorEvent::new_read("flaky", "k", "e")).await;
+
+        let h = j.health(60).await;
+        assert_eq!(h.total_errors, 4);
+        assert_eq!(h.per_bucket.get("broken"), Some(&3));
+        assert_eq!(h.per_bucket.get("flaky"), Some(&1));
+        assert!(h.unhealthy_buckets.contains(&"broken".to_string()));
+        assert!(!h.unhealthy_buckets.contains(&"flaky".to_string()));
+    }
+
+    #[tokio::test]
+    async fn filter_by_bucket_returns_chronological_order() {
+        let j = mk();
+        j.append(BucketErrorEvent::new_read("a", "k1", "e")).await;
+        j.append(BucketErrorEvent::new_read("b", "k2", "e")).await;
+        j.append(BucketErrorEvent::new_read("a", "k3", "e")).await;
+
+        let only_a = j.filter(Some("a"), None, 10).await;
+        assert_eq!(only_a.len(), 2);
+        assert_eq!(only_a[0].key, "k1", "oldest first");
+        assert_eq!(only_a[1].key, "k3");
+    }
+}
--- a/crates/vectord-lance/src/lib.rs
+++ b/crates/vectord-lance/src/lib.rs
@ -357,7 +357,25 @@ impl LanceVectorStore {
    /// Search for top_k nearest neighbors of `query`. Uses the IVF_PQ
    /// index if one exists; otherwise does a full scan (slow but
    /// correct — useful during development before index build).
-    pub async fn search(&self, query: &[f32], top_k: usize) -> Result<Vec<Hit>, String> {
+    ///
+    /// `nprobes` tells Lance how many IVF partitions to probe per query.
+    /// Lance's built-in default is **1**, which caps recall well below
+    /// what the index is actually capable of. Passing `None` keeps that
+    /// default (only sensible when latency trumps recall); for real
+    /// workloads set `nprobes` to 5–10% of `num_partitions` (e.g. 20–30
+    /// on a 316-partition index).
+    ///
+    /// `refine_factor` re-ranks the PQ-approximate top-k by computing
+    /// exact distances on `top_k * refine_factor` candidates, then
+    /// trimming to `top_k`. Cheap way to buy back recall lost to product
+    /// quantization. `None` skips the re-rank pass.
+    pub async fn search(
+        &self,
+        query: &[f32],
+        top_k: usize,
+        nprobes: Option<usize>,
+        refine_factor: Option<u32>,
+    ) -> Result<Vec<Hit>, String> {
        use lance::dataset::Dataset;

        let dataset = Dataset::open(&self.path).await.map_err(e)?;
@ -365,6 +383,8 @@ impl LanceVectorStore {

        let mut scanner = dataset.scan();
        scanner.nearest("vector", &qarr, top_k as usize).map_err(e)?;
+        if let Some(n) = nprobes { scanner.nprobes(n); }
+        if let Some(f) = refine_factor { scanner.refine(f); }
        scanner.project(&["doc_id", "chunk_text"]).map_err(e)?;

        let mut stream = scanner.try_into_stream().await.map_err(e)?;
--- a/crates/vectord/Cargo.toml
+++ b/crates/vectord/Cargo.toml
@ -9,6 +9,7 @@ storaged = { path = "../storaged" }
 aibridge = { path = "../aibridge" }
 catalogd = { path = "../catalogd" }
 queryd = { path = "../queryd" }
+futures = { workspace = true }
 # ADR-019 firewall — vectord-lance owns its own Arrow 57 / Lance 4 deps.
 # Public API uses only std types so no version conflict propagates here.
 vectord-lance = { path = "../vectord-lance" }
@ -24,3 +25,4 @@ arrow = { workspace = true }
 chrono = { workspace = true }
 instant-distance = { workspace = true }
 uuid = { workspace = true }
+sha2 = { workspace = true }
--- a/crates/vectord/src/agent.rs
+++ b/crates/vectord/src/agent.rs
@ -204,6 +204,7 @@ pub struct AgentDeps {
    pub embedding_cache: EmbeddingCache,
    pub trial_journal: TrialJournal,
    pub promotion_registry: PromotionRegistry,
+    pub harness_store: crate::harness::HarnessStore,
 }

 // -------- Spawn --------
@ -354,12 +355,11 @@ async fn run_one_cycle(
    // Read history.
    let history = deps.trial_journal.list(&event.index_name).await
        .map_err(|e| format!("read journal: {e}"))?;
-    if history.is_empty() {
-        return Err(format!(
-            "no trials yet for '{}' — seed with at least one POST /hnsw/trial first",
-            event.index_name,
-        ));
-    }
+
+    // Bootstrap mode: first ever visit to this index. Auto-generate a
+    // harness (if missing) and seed with the default config — the Phase 15
+    // known-good ec=80/es=30. Subsequent visits go through the proposer.
+    let is_bootstrap = history.is_empty();

    // Current champion (if any) is the promoted config.
    let champion = deps.promotion_registry.get_current(&event.index_name).await;
@ -367,24 +367,26 @@ async fn run_one_cycle(
        history.iter().find(|t| t.id == p.trial_id).cloned()
    });

-    // Propose the next config.
-    let Some(next_config) = propose_next_config(&history, champion_trial.as_ref()) else {
+    let (next_config, harness_name) = if is_bootstrap {
+        let name = ensure_auto_harness(&event.index_name, deps).await?;
+        (HnswConfig::default(), name)
+    } else {
+        let Some(cfg) = propose_next_config(&history, champion_trial.as_ref()) else {
            return Err("proposer returned None — search space exhausted".into());
        };
-
        // Validate bounds defensively.
-    if !(10..=400).contains(&next_config.ef_construction) {
-        return Err(format!("proposed ef_construction={} out of bounds", next_config.ef_construction));
+        if !(10..=400).contains(&cfg.ef_construction) {
+            return Err(format!("proposed ef_construction={} out of bounds", cfg.ef_construction));
        }
-    if !(10..=200).contains(&next_config.ef_search) {
-        return Err(format!("proposed ef_search={} out of bounds", next_config.ef_search));
+        if !(10..=200).contains(&cfg.ef_search) {
+            return Err(format!("proposed ef_search={} out of bounds", cfg.ef_search));
        }
-
-    // Need a harness to measure. Use the most recent one in history.
-    // (A future refinement: remember per-index "canonical harness" on
-    // the index metadata. For now: latest wins.)
-    let harness_name = history.last().unwrap().eval_set.clone();
-    let mut harness_set = harness::EvalSet::load(&deps.store, &harness_name).await
+        // A future refinement: remember per-index "canonical harness" on
+        // the index metadata. For now: latest wins.
+        let hname = history.last().unwrap().eval_set.clone();
+        (cfg, hname)
+    };
+    let mut harness_set = deps.harness_store.load_for_index(&event.index_name, &harness_name).await
        .map_err(|e| format!("load harness '{harness_name}': {e}"))?;

    let embeddings = deps.embedding_cache.get_or_load(&event.index_name).await
@ -393,7 +395,7 @@ async fn run_one_cycle(
    if !harness_set.ground_truth_built {
        harness::compute_ground_truth(&mut harness_set, &embeddings, &deps.ai_client).await
            .map_err(|e| format!("ground truth: {e}"))?;
-        harness_set.save(&deps.store).await.ok();
+        deps.harness_store.save(&harness_set).await.ok();
    }

    // Build + bench.
@ -428,11 +430,14 @@ async fn run_one_cycle(
    let dims = embeddings.first().map(|e| e.vector.len()).unwrap_or(0);
    let memory_bytes = (embeddings.len() * dims * std::mem::size_of::<f32>() + embeddings.len() * 128) as u64;

-    let note = match &event.reason {
+    let note = {
+        let base = match &event.reason {
            TriggerReason::DatasetAppended { dataset } => format!("agent: dataset_appended({dataset})"),
            TriggerReason::Manual => "agent: manual".to_string(),
            TriggerReason::Periodic => "agent: periodic".to_string(),
        };
+        if is_bootstrap { format!("{base} bootstrap") } else { base }
+    };

    let trial = Trial {
        id: trial_id,
@ -597,19 +602,92 @@ pub fn propose_next_config(history: &[Trial], champion: Option<&Trial>) -> Optio

 // -------- Helpers --------

-/// Find an index to poke on a periodic wake. Strategy: the one with the
-/// most recent trial. If nothing's been trialed yet, return None.
+/// Minimum vectors for an index to be worth auto-tuning. Below this,
+/// HNSW's win over brute-force is too small to matter and trial budget
+/// is better spent on bigger indexes.
+const AUTOTUNE_MIN_VECTORS: usize = 1_000;
+
+/// Find an index to poke on a periodic wake. Strategy: least-recently-tuned
+/// wins — pick the index (from `IndexRegistry`, not just promoted ones)
+/// whose most-recent trial is oldest, so trial budget spreads across the
+/// whole portfolio. Never-trialed indexes sort first (None < Some), which
+/// is exactly what we want: bootstrap them on their first visit.
+///
+/// Why not "most recently promoted" (the original strategy): a converged
+/// index like `threat_intel_v1` sits at recall=1.0 on a minimal graph and
+/// can't be improved — but it was always the freshest promotion, so the
+/// agent burned every trial on it while 40+ other indexes got zero
+/// attention. Rotating by last-trial-time fixes that without any explicit
+/// convergence detection.
+///
+/// Why IndexRegistry instead of PromotionRegistry: on a fresh system only
+/// a handful of indexes have ever been promoted, so promotion-based
+/// picking starves new ones. IndexRegistry lists every live index the
+/// platform knows about; filtering by `AUTOTUNE_MIN_VECTORS` keeps the
+/// proposer off indexes where the graph config doesn't matter.
+///
+/// Cost: N extra journal reads per periodic tick, where N = eligible
+/// indexes. Fine at dozens; if the portfolio grows into thousands, cache
+/// `last_trial_at` on the IndexMeta.
 async fn pick_periodic_target(deps: &AgentDeps) -> Option<String> {
-    // `/agent` runs against any index that has a trial journal. We don't
-    // have a "list all journals" helper, so we derive candidates from the
-    // promotion registry (indexes with a human ever promoting are live).
-    let promos = deps.promotion_registry.list_all().await.ok()?;
-    // Prefer the one most recently promoted — it's the one a human cares
-    // about right now.
-    promos.into_iter()
-        .filter_map(|f| f.current.map(|c| (f.index_name, c.promoted_at)))
-        .max_by_key(|(_, at)| *at)
-        .map(|(name, _)| name)
+    let candidates: Vec<String> = deps
+        .index_registry
+        .list(None, None)
+        .await
+        .into_iter()
+        .filter(|m| m.chunk_count >= AUTOTUNE_MIN_VECTORS)
+        .map(|m| m.index_name)
+        .collect();
+    if candidates.is_empty() { return None; }
+
+    let mut scored: Vec<(String, Option<DateTime<Utc>>)> = Vec::with_capacity(candidates.len());
+    for name in candidates {
+        let last_trial_at = deps
+            .trial_journal
+            .list(&name)
+            .await
+            .ok()
+            .and_then(|trials| trials.into_iter().map(|t| t.created_at).max());
+        scored.push((name, last_trial_at));
+    }
+
+    scored.into_iter().min_by_key(|(_, last)| *last).map(|(n, _)| n)
+}
+
+/// On the first visit to an index, load or synthesize an eval harness so
+/// bootstrap trials have something to measure recall against. Returns the
+/// harness name. Synthetic harnesses sample 20 chunks and use their first
+/// 200 chars as self-queries — the ground truth is computed by brute-force
+/// cosine, so recall numbers are real (a config that finds the chunk in
+/// top-k counts as a hit).
+async fn ensure_auto_harness(index_name: &str, deps: &AgentDeps) -> Result<String, String> {
+    let name = format!("{index_name}_auto");
+
+    if deps.harness_store.load_for_index(index_name, &name).await.is_ok() {
+        return Ok(name);
+    }
+
+    let embeddings = deps
+        .embedding_cache
+        .get_or_load(index_name)
+        .await
+        .map_err(|e| format!("load embeddings for auto-harness: {e}"))?;
+    if embeddings.is_empty() {
+        return Err(format!("index '{index_name}' has no embeddings — cannot bootstrap"));
+    }
+
+    let mut eval = harness::synthetic_from_chunks(&name, index_name, &embeddings, 20, 10);
+    harness::compute_ground_truth(&mut eval, &embeddings, &deps.ai_client)
+        .await
+        .map_err(|e| format!("auto-harness ground truth: {e}"))?;
+    deps.harness_store.save(&eval).await
+        .map_err(|e| format!("save auto-harness: {e}"))?;
+
+    tracing::info!(
+        "agent: bootstrapped harness '{}' for index '{}' (20 self-queries, k=10)",
+        name, index_name
+    );
+    Ok(name)
 }

 async fn over_rate_limit(inner: &Arc<AgentInner>, cap: u32) -> bool {
--- a/crates/vectord/src/autotune.rs
+++ b/crates/vectord/src/autotune.rs
@ -28,7 +28,7 @@ use aibridge::client::AiClient;
 use catalogd::registry::Registry as CatalogRegistry;

 use crate::embedding_cache::EmbeddingCache;
-use crate::harness;
+use crate::harness::{self, HarnessStore};
 use crate::hnsw::HnswStore;
 use crate::index_registry::IndexRegistry;
 use crate::jobs::JobTracker;
@ -128,7 +128,7 @@ fn pick_winner(trials: &[Trial], min_recall: f32) -> Option<&Trial> {
 #[allow(clippy::too_many_arguments)]
 pub async fn run_autotune(
    req: AutotuneRequest,
-    store: &Arc<dyn ObjectStore>,
+    _store: &Arc<dyn ObjectStore>,
    catalog: &CatalogRegistry,
    ai_client: &AiClient,
    embedding_cache: &EmbeddingCache,
@ -136,6 +136,7 @@ pub async fn run_autotune(
    index_registry: &IndexRegistry,
    trial_journal: &TrialJournal,
    promotion_registry: &PromotionRegistry,
+    harness_store: &HarnessStore,
    _job_tracker: &JobTracker,
 ) -> Result<AutotuneResult, String> {
    let t0 = std::time::Instant::now();
@ -146,8 +147,11 @@ pub async fn run_autotune(
    }
    let _ = catalog; // reserved for future audit emission

-    // Load the harness once, compute ground truth once.
-    let mut harness_set = harness::EvalSet::load(store, &req.harness)
+    // Load the harness once, compute ground truth once. Harness resolves
+    // to the index's bucket via HarnessStore, with primary as fallback for
+    // pre-federation evals.
+    let mut harness_set = harness_store
+        .load_for_index(&req.index_name, &req.harness)
        .await
        .map_err(|e| format!("load harness: {e}"))?;

@ -160,7 +164,7 @@ pub async fn run_autotune(
        harness::compute_ground_truth(&mut harness_set, &embeddings, ai_client)
            .await
            .map_err(|e| format!("ground truth: {e}"))?;
-        harness_set.save(store).await.ok();
+        harness_store.save(&harness_set).await.ok();
    }

    let (grid, rejected) = sanitize_grid(req.grid.clone().unwrap_or_else(default_grid));
--- a/crates/vectord/src/harness.rs
+++ b/crates/vectord/src/harness.rs
@ -10,11 +10,14 @@
 use chrono::{DateTime, Utc};
 use object_store::ObjectStore;
 use serde::{Deserialize, Serialize};
+use std::collections::HashSet;
 use std::sync::Arc;

 use aibridge::client::{AiClient, EmbedRequest};
 use storaged::ops;
+use storaged::registry::BucketRegistry;

+use crate::index_registry::IndexRegistry;
 use crate::store::StoredEmbedding;

 /// A single eval query with optional pre-computed ground truth.
@ -85,6 +88,108 @@ impl EvalSet {
    }
 }

+/// Federation-aware wrapper around EvalSet persistence. Mirrors the
+/// `TrialJournal` / `PromotionRegistry` pattern: harness files colocate
+/// with their index's bucket (looked up via `IndexMeta.bucket`), falling
+/// back to `primary` for indexes the registry has never seen. Legacy
+/// harnesses predating federation remain discoverable — lookups transparently
+/// try the resolved bucket first, then `primary` as a fallback. Cross-bucket
+/// listing dedupes across every registered bucket so `GET /hnsw/evals`
+/// returns a complete picture.
+#[derive(Clone)]
+pub struct HarnessStore {
+    buckets: Arc<BucketRegistry>,
+    index_registry: IndexRegistry,
+}
+
+impl HarnessStore {
+    pub fn new(buckets: Arc<BucketRegistry>, index_registry: IndexRegistry) -> Self {
+        Self { buckets, index_registry }
+    }
+
+    /// Resolve which bucket holds this index's eval artifacts. Indexes the
+    /// registry has never heard of fall through to `primary`.
+    async fn bucket_for_index(&self, index_name: &str) -> String {
+        self.index_registry
+            .get(index_name)
+            .await
+            .map(|m| m.bucket)
+            .unwrap_or_else(|| "primary".to_string())
+    }
+
+    /// Save to the bucket that owns `eval.index_name`. Writes under the
+    /// standard `_hnsw_evals/{name}.json` prefix of the resolved bucket.
+    pub async fn save(&self, eval: &EvalSet) -> Result<(), String> {
+        let bucket = self.bucket_for_index(&eval.index_name).await;
+        let store = self.buckets.get(&bucket)?;
+        eval.save(&store).await
+    }
+
+    /// Load a harness by name, given the index it belongs to. Tries the
+    /// index's bucket first; if the file is absent AND the resolved bucket
+    /// isn't `primary`, falls through to `primary` so pre-federation evals
+    /// remain reachable without migration.
+    pub async fn load_for_index(
+        &self,
+        index_name: &str,
+        harness_name: &str,
+    ) -> Result<EvalSet, String> {
+        let bucket = self.bucket_for_index(index_name).await;
+        let primary_store = self.buckets.get("primary")?;
+        let store = self.buckets.get(&bucket)?;
+
+        match EvalSet::load(&store, harness_name).await {
+            Ok(e) => Ok(e),
+            Err(e) if bucket != "primary" => EvalSet::load(&primary_store, harness_name)
+                .await
+                .map_err(|primary_err| format!("{bucket}: {e}; primary fallback: {primary_err}")),
+            Err(e) => Err(e),
+        }
+    }
+
+    /// Find a harness by name without knowing which index it belongs to —
+    /// used by `GET /hnsw/evals/{name}`. Scans every registered bucket;
+    /// first hit wins. Primary is searched first so pre-federation evals
+    /// with the same name as a federated one resolve deterministically.
+    pub async fn get_any(&self, harness_name: &str) -> Result<EvalSet, String> {
+        let bucket_infos = self.buckets.list().await;
+        let mut ordered: Vec<String> = bucket_infos.iter().map(|b| b.name.clone()).collect();
+        ordered.sort_by_key(|n| if n == "primary" { 0 } else { 1 });
+
+        let mut last_err = None;
+        for b in ordered {
+            let store = match self.buckets.get(&b) {
+                Ok(s) => s,
+                Err(e) => { last_err = Some(e); continue; }
+            };
+            match EvalSet::load(&store, harness_name).await {
+                Ok(e) => return Ok(e),
+                Err(e) => { last_err = Some(e); }
+            }
+        }
+        Err(last_err.unwrap_or_else(|| format!("no buckets registered for eval '{harness_name}'")))
+    }
+
+    /// Union of every harness name across every registered bucket.
+    /// Duplicates (same name in multiple buckets — pathological but
+    /// possible after a manual migration) are collapsed.
+    pub async fn list_all(&self) -> Vec<String> {
+        let mut all: HashSet<String> = HashSet::new();
+        for b in self.buckets.list().await {
+            let store = match self.buckets.get(&b.name) {
+                Ok(s) => s,
+                Err(_) => continue,
+            };
+            if let Ok(names) = EvalSet::list(&store).await {
+                all.extend(names);
+            }
+        }
+        let mut out: Vec<String> = all.into_iter().collect();
+        out.sort();
+        out
+    }
+}
+
 /// Cosine similarity for two same-length f32 slices.
 fn cosine(a: &[f32], b: &[f32]) -> f32 {
    let mut dot = 0.0f32;
@ -127,13 +232,15 @@ pub fn recall_at_k(predicted: &[String], ground_truth: &[String], k: usize) -> f
    if ground_truth.is_empty() || k == 0 {
        return 0.0;
    }
+    // Set-intersection recall@k. Previous implementation counted duplicates
+    // in `predicted` (a corpus with repeated chunks — e.g. cached LLM
+    // responses — returns the same doc_id multiple times via HNSW), which
+    // inflated recall above 1.0 and poisoned promotion decisions.
    let gt_set: std::collections::HashSet<&String> =
        ground_truth.iter().take(k).collect();
-    let hits = predicted
-        .iter()
-        .take(k)
-        .filter(|d| gt_set.contains(d))
-        .count();
+    let pred_set: std::collections::HashSet<&String> =
+        predicted.iter().take(k).collect();
+    let hits = pred_set.intersection(&gt_set).count();
    hits as f32 / gt_set.len() as f32
 }

@ -214,3 +321,67 @@ pub fn synthetic_from_chunks(
        ground_truth_built: false,
    }
 }
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    fn s(v: &[&str]) -> Vec<String> { v.iter().map(|x| x.to_string()).collect() }
+
+    #[test]
+    fn recall_empty_ground_truth_is_zero() {
+        assert_eq!(recall_at_k(&s(&["a", "b"]), &[], 10), 0.0);
+    }
+
+    #[test]
+    fn recall_k_zero_is_zero() {
+        assert_eq!(recall_at_k(&s(&["a"]), &s(&["a"]), 0), 0.0);
+    }
+
+    #[test]
+    fn recall_perfect_match_equals_one() {
+        let pred = s(&["a", "b", "c"]);
+        let gt = s(&["a", "b", "c"]);
+        assert!((recall_at_k(&pred, &gt, 3) - 1.0).abs() < 1e-6);
+    }
+
+    #[test]
+    fn recall_half_match() {
+        let pred = s(&["a", "b", "x", "y"]);
+        let gt = s(&["a", "b", "c", "d"]);
+        assert!((recall_at_k(&pred, &gt, 4) - 0.5).abs() < 1e-6);
+    }
+
+    #[test]
+    fn recall_duplicates_in_predicted_do_not_inflate() {
+        // Regression guard: the previous implementation counted each
+        // duplicate in `predicted` separately, which could push recall
+        // above 1.0 on corpora with repeated chunks (cached responses etc).
+        // Set-intersection semantics keep it bounded in [0, 1].
+        let pred = s(&["a", "a", "a", "a"]);
+        let gt = s(&["a", "b", "c", "d"]);
+        let r = recall_at_k(&pred, &gt, 4);
+        assert!(r <= 1.0, "recall {r} must not exceed 1.0");
+        // One unique match out of four in gt = 0.25.
+        assert!((r - 0.25).abs() < 1e-6);
+    }
+
+    #[test]
+    fn recall_duplicates_in_ground_truth_handled() {
+        // gt with dupes reduces effective |gt|; matching one hits all.
+        let pred = s(&["x"]);
+        let gt = s(&["x", "x", "x"]);
+        let r = recall_at_k(&pred, &gt, 3);
+        assert!(r <= 1.0);
+        assert!((r - 1.0).abs() < 1e-6); // |pred ∩ gt| / |gt_set| = 1/1
+    }
+
+    #[test]
+    fn recall_respects_k_bound() {
+        // k=2 means only the first 2 of pred and gt count toward the set.
+        let pred = s(&["a", "b", "c", "d"]);
+        let gt = s(&["a", "b", "c", "d"]);
+        let r = recall_at_k(&pred, &gt, 2);
+        assert!((r - 1.0).abs() < 1e-6);
+    }
+}
--- a/crates/vectord/src/rag.rs
+++ b/crates/vectord/src/rag.rs
@ -10,6 +10,7 @@ use object_store::ObjectStore;
 use std::sync::Arc;

 use aibridge::client::{AiClient, EmbedRequest, GenerateRequest};
+use aibridge::continuation::{generate_continuable, ContinuableOpts, ResponseShape};
 use crate::search::{self, SearchResult};
 use crate::store;

@ -42,7 +43,10 @@ async fn rerank(
        system: None,
        temperature: Some(0.0),
        max_tokens: Some(50),
-        think: None,
+        // Reranker returns a comma-separated int list — pure structured
+        // output, zero benefit from hidden reasoning. Opt out to avoid
+        // the empty-response failure mode Phase 21 catalogued.
+        think: Some(false),
    }).await;

    match resp {
@ -151,19 +155,29 @@ pub async fn query(
        Answer:"
    );

-    let gen_resp = ai_client.generate(GenerateRequest {
-        prompt,
-        model: None,
-        system: None,
-        temperature: Some(0.2),
-        max_tokens: Some(512),
-        think: None,
-    }).await?;
+    // Route the answer call through Phase 21's generate_continuable so
+    // a thinking-model empty-response or a mid-JSON truncation self-
+    // repairs instead of silently returning half an answer. Shape is
+    // Text (the answer is prose, not JSON), think is Some(false) to
+    // opt out of hidden reasoning on the hot path. This is the first
+    // production caller of the Phase 21 primitives — see audit finding
+    // "Phase 21 Rust primitives are wired but not CALLED by any
+    // production surface" from 2026-04-21.
+    let mut cont_opts = ContinuableOpts::new("qwen2.5:latest");
+    cont_opts.max_tokens = Some(512);
+    cont_opts.temperature = Some(0.2);
+    cont_opts.shape = ResponseShape::Text;
+    cont_opts.think = Some(false);
+    let outcome = generate_continuable(ai_client, &prompt, &cont_opts).await?;

    Ok(RagResponse {
-        answer: gen_resp.text.trim().to_string(),
-        model: gen_resp.model,
+        answer: outcome.text.trim().to_string(),
+        // generate_continuable doesn't surface the model name (sidecar
+        // echoes whatever Ollama loaded). Use the configured tier model
+        // for now; if RAG needs to report the actual resolved model,
+        // the runner can add a post-call ps probe later.
+        model: "qwen2.5:latest".to_string(),
        sources: results,
-        tokens_generated: gen_resp.tokens_generated,
+        tokens_generated: None,
    })
 }
--- a/crates/vectord/src/service.rs
+++ b/crates/vectord/src/service.rs
@ -13,6 +13,7 @@ use aibridge::client::{AiClient, EmbedRequest, GenerateRequest};
 use catalogd::registry::Registry as CatalogRegistry;
 use storaged::registry::BucketRegistry;
 use crate::{agent, autotune, chunker, embedding_cache, harness, hnsw, index_registry, jobs, lance_backend, playbook_memory, promotion, rag, refresh, search, store, supervisor, trial};
+use tokio::sync::Semaphore;

 #[derive(Clone)]
 pub struct VectorState {
@ -53,6 +54,9 @@ pub struct VectorState {
    /// and, when `use_playbook_memory` is set on /vectors/hybrid, boosts
    /// workers that were actually filled in semantically-similar past ops.
    pub playbook_memory: playbook_memory::PlaybookMemory,
+    /// Serializes embed calls from seed_playbook_memory to avoid
+    /// concurrent socket collisions with the Python sidecar.
+    pub embed_semaphore: Arc<Semaphore>,
 }

 /// What the active-profile singleton records. Narrow — we don't need the
@ -893,7 +897,9 @@ async fn hybrid_search(
            system: None,
            temperature: Some(0.2),
            max_tokens: Some(512),
-            think: None,
+            // Hybrid's answer step — prose output over retrieved records,
+            // no reasoning needed on the hot path.
+            think: Some(false),
        }).await;

        gen_resp.ok().map(|r| r.text.trim().to_string())
@ -1583,7 +1589,7 @@ async fn activate_profile(
        });
    }

-    Ok(Json(ActivateReport {
+    let report = ActivateReport {
        profile_id: profile.id,
        ollama_name: profile.ollama_name,
        indexes_warmed: warmed,
@ -1592,7 +1598,9 @@ async fn activate_profile(
        duration_secs: t0.elapsed().as_secs_f32(),
        model_preloaded,
        previous_profile: previous_slot.map(|s| s.profile_id),
-    }))
+    };
+
+    Ok(Json(report))
 }

 /// Unload this profile's model and clear the active slot. No-op if the
@ -2217,6 +2225,10 @@ async fn seed_playbook_memory(
    State(state): State<VectorState>,
    Json(req): Json<SeedPlaybookRequest>,
 ) -> impl IntoResponse {
+    // Serialize embed calls to avoid concurrent socket collisions.
+    let _permit = state.embed_semaphore.acquire().await.map_err(|e|
+        (StatusCode::INTERNAL_SERVER_ERROR, format!("semaphore error: {e}")))?;
+
    // Embed the entry through the same text shape `rebuild` uses so
    // similarity math is comparable across seed + real entries.
    let tmp_entry = playbook_memory::PlaybookEntry {
@ -2549,17 +2561,47 @@ async fn revise_playbook_memory(
            "operation must match 'fill: Role xN in City, ST' shape".into()));
    }

-    let ts = chrono::Utc::now().to_rfc3339();
+    // Phase 27 — deterministic pid derived ONLY from content-shaped
+    // inputs (parent_id + operation + approach + context + sorted
+    // endorsed_names). Excluding wall-clock ts means two revise calls
+    // with identical payloads produce the same pid, which is the
+    // contract the docstring promises. On retry the caller gets back
+    // the same id and the pre-flight idempotency check below short-
+    // circuits to the existing entry instead of re-appending.
+    let mut names_sorted = req.endorsed_names.clone();
+    names_sorted.sort();
    use sha2::{Digest, Sha256};
    let mut h = Sha256::new();
-    h.update(ts.as_bytes());
-    h.update(b"|");
-    h.update(req.parent_id.as_bytes());
-    h.update(b"|");
-    h.update(req.operation.as_bytes());
+    h.update(req.parent_id.as_bytes()); h.update(b"|");
+    h.update(req.operation.as_bytes()); h.update(b"|");
+    h.update(req.approach.as_bytes());  h.update(b"|");
+    h.update(req.context.as_bytes());   h.update(b"|");
+    h.update(names_sorted.join(",").as_bytes());
    let bytes = h.finalize();
    let pid = format!("pb-rev-{}", bytes.iter().take(8).map(|b| format!("{b:02x}")).collect::<String>());

+    // Idempotency short-circuit — if this exact pid already exists in
+    // memory (from a prior successful revise with the same content),
+    // return it directly rather than re-appending or 400ing on the
+    // superseded-parent rejection. Walks the parent's chain via
+    // history() because the parent may itself have been superseded by
+    // our prior successful call.
+    let chain = state.playbook_memory.history(&req.parent_id).await;
+    if let Some(existing) = chain.iter().find(|e| e.playbook_id == pid) {
+        return Ok(Json(serde_json::json!({
+            "outcome": {
+                "parent_id": req.parent_id,
+                "parent_version": existing.version.saturating_sub(1),
+                "new_playbook_id": existing.playbook_id,
+                "new_version": existing.version,
+                "superseded_at": existing.superseded_at.clone().unwrap_or_default(),
+                "idempotent_return": true,
+            },
+            "entries_after": state.playbook_memory.entry_count().await,
+        })));
+    }
+
+    let ts = chrono::Utc::now().to_rfc3339();
    let new_entry = playbook_memory::PlaybookEntry {
        playbook_id: pid.clone(),
        operation: req.operation,
--- a/data/_catalog/manifests/07c119c3-2e59-463c-89a8-7980a849b6d6.json
+++ b/data/_catalog/manifests/07c119c3-2e59-463c-89a8-7980a849b6d6.json
@ -0,0 +1,159 @@
+{
+  "id": "07c119c3-2e59-463c-89a8-7980a849b6d6",
+  "name": "workers_500k",
+  "schema_fingerprint": "71686e497f5ee2fc77d50b13c39a46d89ca3d7673e9a0879e67c2db476764b25",
+  "objects": [
+    {
+      "bucket": "primary",
+      "key": "datasets/workers_500k.parquet",
+      "size_bytes": 323794818,
+      "created_at": "2026-04-17T05:57:41.972944716Z"
+    }
+  ],
+  "created_at": "2026-04-17T05:57:41.972945960Z",
+  "updated_at": "2026-04-17T05:57:41.973093967Z",
+  "description": "",
+  "owner": "",
+  "sensitivity": "pii",
+  "columns": [
+    {
+      "name": "worker_id",
+      "data_type": "Int64",
+      "sensitivity": null,
+      "description": "",
+      "is_pii": false
+    },
+    {
+      "name": "name",
+      "data_type": "Utf8",
+      "sensitivity": "pii",
+      "description": "",
+      "is_pii": true
+    },
+    {
+      "name": "role",
+      "data_type": "Utf8",
+      "sensitivity": null,
+      "description": "",
+      "is_pii": false
+    },
+    {
+      "name": "email",
+      "data_type": "Utf8",
+      "sensitivity": "pii",
+      "description": "",
+      "is_pii": true
+    },
+    {
+      "name": "phone",
+      "data_type": "Int64",
+      "sensitivity": "pii",
+      "description": "",
+      "is_pii": true
+    },
+    {
+      "name": "city",
+      "data_type": "Utf8",
+      "sensitivity": null,
+      "description": "",
+      "is_pii": false
+    },
+    {
+      "name": "state",
+      "data_type": "Utf8",
+      "sensitivity": null,
+      "description": "",
+      "is_pii": false
+    },
+    {
+      "name": "zip",
+      "data_type": "Int64",
+      "sensitivity": "pii",
+      "description": "",
+      "is_pii": true
+    },
+    {
+      "name": "skills",
+      "data_type": "Utf8",
+      "sensitivity": null,
+      "description": "",
+      "is_pii": false
+    },
+    {
+      "name": "certifications",
+      "data_type": "Utf8",
+      "sensitivity": null,
+      "description": "",
+      "is_pii": false
+    },
+    {
+      "name": "archetype",
+      "data_type": "Utf8",
+      "sensitivity": null,
+      "description": "",
+      "is_pii": false
+    },
+    {
+      "name": "reliability",
+      "data_type": "Float64",
+      "sensitivity": null,
+      "description": "",
+      "is_pii": false
+    },
+    {
+      "name": "responsiveness",
+      "data_type": "Float64",
+      "sensitivity": null,
+      "description": "",
+      "is_pii": false
+    },
+    {
+      "name": "engagement",
+      "data_type": "Float64",
+      "sensitivity": null,
+      "description": "",
+      "is_pii": false
+    },
+    {
+      "name": "compliance",
+      "data_type": "Float64",
+      "sensitivity": null,
+      "description": "",
+      "is_pii": false
+    },
+    {
+      "name": "availability",
+      "data_type": "Float64",
+      "sensitivity": null,
+      "description": "",
+      "is_pii": false
+    },
+    {
+      "name": "communications",
+      "data_type": "Utf8",
+      "sensitivity": null,
+      "description": "",
+      "is_pii": false
+    },
+    {
+      "name": "resume_text",
+      "data_type": "Utf8",
+      "sensitivity": null,
+      "description": "",
+      "is_pii": false
+    }
+  ],
+  "lineage": {
+    "source_system": "csv",
+    "source_file": "workers_500k.csv",
+    "ingest_job": "ingest-1776405461972",
+    "ingest_timestamp": "2026-04-17T05:57:41.972944716Z",
+    "parent_datasets": []
+  },
+  "freshness": null,
+  "tags": [],
+  "row_count": 500000,
+  "last_embedded_at": null,
+  "embedding_stale_since": null,
+  "embedding_refresh_policy": null
+}
--- a/data/_catalog/manifests/2b8e8b01-a44e-49c5-a103-a1b55898bee1.json
+++ b/data/_catalog/manifests/2b8e8b01-a44e-49c5-a103-a1b55898bee1.json
@ -1,5 +1,5 @@
 {
-  "id": "443d63f1-b0ed-4d4b-8e5a-ce59c097b97b",
+  "id": "2b8e8b01-a44e-49c5-a103-a1b55898bee1",
  "name": "kb_memory_entries",
  "schema_fingerprint": "15dbebd0abb906577e11cbb73083abeb8961b8c603a2472cafa8c46100c3fb1c",
  "objects": [
@ -7,11 +7,11 @@
      "bucket": "primary",
      "key": "datasets/kb_memory_entries.parquet",
      "size_bytes": 8795,
-      "created_at": "2026-04-17T02:54:43.330897823Z"
+      "created_at": "2026-04-17T05:22:06.356227657Z"
    }
  ],
-  "created_at": "2026-04-17T02:54:43.330898330Z",
-  "updated_at": "2026-04-17T02:54:43.330972517Z",
+  "created_at": "2026-04-17T05:22:06.356228933Z",
+  "updated_at": "2026-04-17T05:22:06.356776285Z",
  "description": "",
  "owner": "",
  "sensitivity": null,
@ -97,8 +97,8 @@
  "lineage": {
    "source_system": "postgresql",
    "source_file": "dsn: postgresql://kbuser@localhost:5432/knowledge_base",
-    "ingest_job": "pg-stream-1776394483330",
-    "ingest_timestamp": "2026-04-17T02:54:43.330897823Z",
+    "ingest_job": "pg-stream-1776403326356",
+    "ingest_timestamp": "2026-04-17T05:22:06.356227657Z",
    "parent_datasets": []
  },
  "freshness": null,
--- a/data/_catalog/manifests/2db2d92f-1171-4f2f-a707-ad387b6cd464.json
+++ b/data/_catalog/manifests/2db2d92f-1171-4f2f-a707-ad387b6cd464.json
@ -0,0 +1,117 @@
+{
+  "id": "2db2d92f-1171-4f2f-a707-ad387b6cd464",
+  "name": "onboard_live_1776729919",
+  "schema_fingerprint": "9cdf2fc924bad875e63bd12a82dfb1cd080c5cbba71f671b4a2d41e746df187d",
+  "objects": [
+    {
+      "bucket": "primary",
+      "key": "datasets/onboard_live_1776729919.parquet",
+      "size_bytes": 31444,
+      "created_at": "2026-04-21T00:05:19.717239967Z"
+    }
+  ],
+  "created_at": "2026-04-21T00:05:19.717241621Z",
+  "updated_at": "2026-04-21T00:05:19.717370050Z",
+  "description": "",
+  "owner": "",
+  "sensitivity": "pii",
+  "columns": [
+    {
+      "name": "worker_id",
+      "data_type": "Utf8",
+      "sensitivity": null,
+      "description": "",
+      "is_pii": false
+    },
+    {
+      "name": "name",
+      "data_type": "Utf8",
+      "sensitivity": "pii",
+      "description": "",
+      "is_pii": true
+    },
+    {
+      "name": "role",
+      "data_type": "Utf8",
+      "sensitivity": null,
+      "description": "",
+      "is_pii": false
+    },
+    {
+      "name": "city",
+      "data_type": "Utf8",
+      "sensitivity": null,
+      "description": "",
+      "is_pii": false
+    },
+    {
+      "name": "state",
+      "data_type": "Utf8",
+      "sensitivity": null,
+      "description": "",
+      "is_pii": false
+    },
+    {
+      "name": "email",
+      "data_type": "Utf8",
+      "sensitivity": "pii",
+      "description": "",
+      "is_pii": true
+    },
+    {
+      "name": "phone",
+      "data_type": "Utf8",
+      "sensitivity": "pii",
+      "description": "",
+      "is_pii": true
+    },
+    {
+      "name": "skills",
+      "data_type": "Utf8",
+      "sensitivity": null,
+      "description": "",
+      "is_pii": false
+    },
+    {
+      "name": "certifications",
+      "data_type": "Utf8",
+      "sensitivity": null,
+      "description": "",
+      "is_pii": false
+    },
+    {
+      "name": "availability",
+      "data_type": "Float64",
+      "sensitivity": null,
+      "description": "",
+      "is_pii": false
+    },
+    {
+      "name": "reliability",
+      "data_type": "Float64",
+      "sensitivity": null,
+      "description": "",
+      "is_pii": false
+    },
+    {
+      "name": "archetype",
+      "data_type": "Utf8",
+      "sensitivity": null,
+      "description": "",
+      "is_pii": false
+    }
+  ],
+  "lineage": {
+    "source_system": "csv",
+    "source_file": "sample_1776729919.csv",
+    "ingest_job": "ingest-1776729919717",
+    "ingest_timestamp": "2026-04-21T00:05:19.717239967Z",
+    "parent_datasets": []
+  },
+  "freshness": null,
+  "tags": [],
+  "row_count": 173,
+  "last_embedded_at": null,
+  "embedding_stale_since": null,
+  "embedding_refresh_policy": null
+}
--- a/data/_catalog/manifests/3178f0da-5985-4731-b343-162dd415f3be.json
+++ b/data/_catalog/manifests/3178f0da-5985-4731-b343-162dd415f3be.json
@ -1,5 +1,5 @@
 {
-  "id": "ab2b610a-cee7-40e1-9dab-c709e2292709",
+  "id": "3178f0da-5985-4731-b343-162dd415f3be",
  "name": "kb_self_reports",
  "schema_fingerprint": "76382b028b92bb0b361306dced3d773af0ee8de6aa160cedfc4d649f2860167a",
  "objects": [
@ -7,11 +7,11 @@
      "bucket": "primary",
      "key": "datasets/kb_self_reports.parquet",
      "size_bytes": 51134,
-      "created_at": "2026-04-17T02:54:43.335457986Z"
+      "created_at": "2026-04-17T04:14:34.355419647Z"
    }
  ],
-  "created_at": "2026-04-17T02:54:43.335458749Z",
-  "updated_at": "2026-04-17T02:54:43.335536869Z",
+  "created_at": "2026-04-17T04:14:34.355420701Z",
+  "updated_at": "2026-04-17T06:06:21.358604060Z",
  "description": "",
  "owner": "",
  "sensitivity": null,
@ -62,8 +62,8 @@
  "lineage": {
    "source_system": "postgresql",
    "source_file": "dsn: postgresql://kbuser@localhost:5432/knowledge_base",
-    "ingest_job": "pg-stream-1776394483335",
-    "ingest_timestamp": "2026-04-17T02:54:43.335457986Z",
+    "ingest_job": "pg-stream-1776399274355",
+    "ingest_timestamp": "2026-04-17T04:14:34.355419647Z",
    "parent_datasets": []
  },
  "freshness": null,
--- a/data/_catalog/manifests/32ee74a0-59b4-4e5b-8edb-70c9347a4bf3.json
+++ b/data/_catalog/manifests/32ee74a0-59b4-4e5b-8edb-70c9347a4bf3.json
@ -0,0 +1,26 @@
+{
+  "id": "32ee74a0-59b4-4e5b-8edb-70c9347a4bf3",
+  "name": "successful_playbooks_live",
+  "schema_fingerprint": "4e28e94650d9f5689e952674aebfd09db70edfcba617713081c0a83bf59121ce",
+  "objects": [
+    {
+      "bucket": "primary",
+      "key": "datasets/successful_playbooks_live.parquet",
+      "size_bytes": 284341,
+      "created_at": "2026-04-22T03:28:28.343840136Z"
+    }
+  ],
+  "created_at": "2026-04-20T11:07:57.308050648Z",
+  "updated_at": "2026-04-22T03:28:28.343843823Z",
+  "description": "",
+  "owner": "",
+  "sensitivity": null,
+  "columns": [],
+  "lineage": null,
+  "freshness": null,
+  "tags": [],
+  "row_count": null,
+  "last_embedded_at": null,
+  "embedding_stale_since": null,
+  "embedding_refresh_policy": null
+}
--- a/data/_catalog/manifests/33544879-fcfa-4458-b782-a4ca8dafed08.json
+++ b/data/_catalog/manifests/33544879-fcfa-4458-b782-a4ca8dafed08.json
@ -0,0 +1,159 @@
+{
+  "id": "33544879-fcfa-4458-b782-a4ca8dafed08",
+  "name": "workers_100k",
+  "schema_fingerprint": "1cc2a0caf174aa9bf67b1e2cb7858a312a0c94d604d906f807eb5610864ff70e",
+  "objects": [
+    {
+      "bucket": "primary",
+      "key": "datasets/workers_100k.parquet",
+      "size_bytes": 65225188,
+      "created_at": "2026-04-17T04:54:03.306737596Z"
+    }
+  ],
+  "created_at": "2026-04-17T04:54:03.306740279Z",
+  "updated_at": "2026-04-17T04:54:03.306927852Z",
+  "description": "",
+  "owner": "",
+  "sensitivity": "pii",
+  "columns": [
+    {
+      "name": "worker_id",
+      "data_type": "Int64",
+      "sensitivity": null,
+      "description": "",
+      "is_pii": false
+    },
+    {
+      "name": "name",
+      "data_type": "Utf8",
+      "sensitivity": "pii",
+      "description": "",
+      "is_pii": true
+    },
+    {
+      "name": "role",
+      "data_type": "Utf8",
+      "sensitivity": null,
+      "description": "",
+      "is_pii": false
+    },
+    {
+      "name": "email",
+      "data_type": "Utf8",
+      "sensitivity": "pii",
+      "description": "",
+      "is_pii": true
+    },
+    {
+      "name": "phone",
+      "data_type": "Int64",
+      "sensitivity": "pii",
+      "description": "",
+      "is_pii": true
+    },
+    {
+      "name": "city",
+      "data_type": "Utf8",
+      "sensitivity": null,
+      "description": "",
+      "is_pii": false
+    },
+    {
+      "name": "state",
+      "data_type": "Utf8",
+      "sensitivity": null,
+      "description": "",
+      "is_pii": false
+    },
+    {
+      "name": "zip",
+      "data_type": "Int64",
+      "sensitivity": "pii",
+      "description": "",
+      "is_pii": true
+    },
+    {
+      "name": "skills",
+      "data_type": "Utf8",
+      "sensitivity": null,
+      "description": "",
+      "is_pii": false
+    },
+    {
+      "name": "certifications",
+      "data_type": "Utf8",
+      "sensitivity": null,
+      "description": "",
+      "is_pii": false
+    },
+    {
+      "name": "archetype",
+      "data_type": "Utf8",
+      "sensitivity": null,
+      "description": "",
+      "is_pii": false
+    },
+    {
+      "name": "reliability",
+      "data_type": "Float64",
+      "sensitivity": null,
+      "description": "",
+      "is_pii": false
+    },
+    {
+      "name": "responsiveness",
+      "data_type": "Float64",
+      "sensitivity": null,
+      "description": "",
+      "is_pii": false
+    },
+    {
+      "name": "engagement",
+      "data_type": "Float64",
+      "sensitivity": null,
+      "description": "",
+      "is_pii": false
+    },
+    {
+      "name": "compliance",
+      "data_type": "Float64",
+      "sensitivity": null,
+      "description": "",
+      "is_pii": false
+    },
+    {
+      "name": "availability",
+      "data_type": "Float64",
+      "sensitivity": null,
+      "description": "",
+      "is_pii": false
+    },
+    {
+      "name": "communications",
+      "data_type": "Utf8",
+      "sensitivity": null,
+      "description": "",
+      "is_pii": false
+    },
+    {
+      "name": "resume_text",
+      "data_type": "Utf8",
+      "sensitivity": null,
+      "description": "",
+      "is_pii": false
+    }
+  ],
+  "lineage": {
+    "source_system": "csv",
+    "source_file": "workers_100k.csv",
+    "ingest_job": "ingest-1776401643306",
+    "ingest_timestamp": "2026-04-17T04:54:03.306737596Z",
+    "parent_datasets": []
+  },
+  "freshness": null,
+  "tags": [],
+  "row_count": 100000,
+  "last_embedded_at": null,
+  "embedding_stale_since": null,
+  "embedding_refresh_policy": null
+}
--- a/data/_catalog/manifests/35f36c38-5cd3-438e-b93d-7ff7720f0346.json
+++ b/data/_catalog/manifests/35f36c38-5cd3-438e-b93d-7ff7720f0346.json
@ -1,5 +1,5 @@
 {
-  "id": "ce3fe55b-61cd-4ac4-95de-b91f9186d6e3",
+  "id": "35f36c38-5cd3-438e-b93d-7ff7720f0346",
  "name": "kb_meta_runs",
  "schema_fingerprint": "68f2c0d7a3ceb0aaa3c17c64900704519c72d213161bc9e5179c42ee53f6d0df",
  "objects": [
@ -7,11 +7,11 @@
      "bucket": "primary",
      "key": "datasets/kb_meta_runs.parquet",
      "size_bytes": 886387,
-      "created_at": "2026-04-17T02:54:43.299467359Z"
+      "created_at": "2026-04-17T05:22:06.314413387Z"
    }
  ],
-  "created_at": "2026-04-17T02:54:43.299468094Z",
-  "updated_at": "2026-04-17T04:23:16.134134852Z",
+  "created_at": "2026-04-17T05:22:06.314414515Z",
+  "updated_at": "2026-04-17T06:06:21.300610797Z",
  "description": "",
  "owner": "",
  "sensitivity": null,
@ -80,13 +80,7 @@
      "is_pii": false
    }
  ],
-  "lineage": {
-    "source_system": "postgresql",
-    "source_file": "dsn: postgresql://kbuser@localhost:5432/knowledge_base",
-    "ingest_job": "pg-stream-1776399796133",
-    "ingest_timestamp": "2026-04-17T04:23:16.133868872Z",
-    "parent_datasets": []
-  },
+  "lineage": null,
  "freshness": null,
  "tags": [],
  "row_count": 48,
--- a/data/_catalog/manifests/36b0f141-93a1-4776-beef-b0b8d17a12a9.json
+++ b/data/_catalog/manifests/36b0f141-93a1-4776-beef-b0b8d17a12a9.json
@ -1,96 +0,0 @@
-{
-  "id": "36b0f141-93a1-4776-beef-b0b8d17a12a9",
-  "name": "demo_customers",
-  "schema_fingerprint": "7af9708f2366f790e0ae03db6f681f5a4d4b8af4d1527b091bedfee1be81d304",
-  "objects": [
-    {
-      "bucket": "primary",
-      "key": "datasets/demo_customers.parquet",
-      "size_bytes": 4583,
-      "created_at": "2026-04-17T01:35:01.636480725Z"
-    }
-  ],
-  "created_at": "2026-04-17T01:35:01.636481698Z",
-  "updated_at": "2026-04-17T01:35:01.636776595Z",
-  "description": "",
-  "owner": "",
-  "sensitivity": "pii",
-  "columns": [
-    {
-      "name": "id",
-      "data_type": "Int32",
-      "sensitivity": null,
-      "description": "",
-      "is_pii": false
-    },
-    {
-      "name": "name",
-      "data_type": "Utf8",
-      "sensitivity": "pii",
-      "description": "",
-      "is_pii": true
-    },
-    {
-      "name": "email",
-      "data_type": "Utf8",
-      "sensitivity": "pii",
-      "description": "",
-      "is_pii": true
-    },
-    {
-      "name": "city",
-      "data_type": "Utf8",
-      "sensitivity": null,
-      "description": "",
-      "is_pii": false
-    },
-    {
-      "name": "tier",
-      "data_type": "Utf8",
-      "sensitivity": null,
-      "description": "",
-      "is_pii": false
-    },
-    {
-      "name": "lifetime_spend",
-      "data_type": "Float64",
-      "sensitivity": null,
-      "description": "",
-      "is_pii": false
-    },
-    {
-      "name": "is_active",
-      "data_type": "Int32",
-      "sensitivity": null,
-      "description": "",
-      "is_pii": false
-    },
-    {
-      "name": "signed_up_at",
-      "data_type": "Utf8",
-      "sensitivity": null,
-      "description": "",
-      "is_pii": false
-    },
-    {
-      "name": "notes",
-      "data_type": "Utf8",
-      "sensitivity": null,
-      "description": "",
-      "is_pii": false
-    }
-  ],
-  "lineage": {
-    "source_system": "mysql",
-    "source_file": "dsn: mysql://lh:***@127.0.0.1:3306/lh_demo",
-    "ingest_job": "scheduled-mysql-1776389701636",
-    "ingest_timestamp": "2026-04-17T01:35:01.636480725Z",
-    "parent_datasets": []
-  },
-  "freshness": null,
-  "tags": [],
-  "row_count": 11,
-  "last_embedded_at": null,
-  "embedding_stale_since": null,
-  "embedding_refresh_policy": null
-}
--- a/data/_catalog/manifests/564b00ae-cbf3-4efd-aa55-84cdb6d2b0b7.json
+++ b/data/_catalog/manifests/564b00ae-cbf3-4efd-aa55-84cdb6d2b0b7.json
@ -0,0 +1,117 @@
+{
+  "id": "564b00ae-cbf3-4efd-aa55-84cdb6d2b0b7",
+  "name": "client_workerskjkk",
+  "schema_fingerprint": "cdfe85348885ddf329e5e6e9bf0e2c75c92d1a86fdb0fd3875ed46e3f93c4d82",
+  "objects": [
+    {
+      "bucket": "primary",
+      "key": "datasets/client_workerskjkk.parquet",
+      "size_bytes": 32201,
+      "created_at": "2026-04-21T00:49:04.623625149Z"
+    }
+  ],
+  "created_at": "2026-04-21T00:49:04.623626738Z",
+  "updated_at": "2026-04-21T00:49:04.623901788Z",
+  "description": "",
+  "owner": "",
+  "sensitivity": "pii",
+  "columns": [
+    {
+      "name": "worker_id",
+      "data_type": "Utf8",
+      "sensitivity": null,
+      "description": "",
+      "is_pii": false
+    },
+    {
+      "name": "name",
+      "data_type": "Utf8",
+      "sensitivity": "pii",
+      "description": "",
+      "is_pii": true
+    },
+    {
+      "name": "role",
+      "data_type": "Utf8",
+      "sensitivity": null,
+      "description": "",
+      "is_pii": false
+    },
+    {
+      "name": "city",
+      "data_type": "Utf8",
+      "sensitivity": null,
+      "description": "",
+      "is_pii": false
+    },
+    {
+      "name": "state",
+      "data_type": "Utf8",
+      "sensitivity": null,
+      "description": "",
+      "is_pii": false
+    },
+    {
+      "name": "email",
+      "data_type": "Utf8",
+      "sensitivity": "pii",
+      "description": "",
+      "is_pii": true
+    },
+    {
+      "name": "phone",
+      "data_type": "Utf8",
+      "sensitivity": "pii",
+      "description": "",
+      "is_pii": true
+    },
+    {
+      "name": "skills",
+      "data_type": "Utf8",
+      "sensitivity": null,
+      "description": "",
+      "is_pii": false
+    },
+    {
+      "name": "certifications",
+      "data_type": "Utf8",
+      "sensitivity": null,
+      "description": "",
+      "is_pii": false
+    },
+    {
+      "name": "availability",
+      "data_type": "Float64",
+      "sensitivity": null,
+      "description": "",
+      "is_pii": false
+    },
+    {
+      "name": "reliability",
+      "data_type": "Float64",
+      "sensitivity": null,
+      "description": "",
+      "is_pii": false
+    },
+    {
+      "name": "archetype",
+      "data_type": "Utf8",
+      "sensitivity": null,
+      "description": "",
+      "is_pii": false
+    }
+  ],
+  "lineage": {
+    "source_system": "csv",
+    "source_file": "staffing_roster_sample.csv",
+    "ingest_job": "ingest-1776732544623",
+    "ingest_timestamp": "2026-04-21T00:49:04.623625149Z",
+    "parent_datasets": []
+  },
+  "freshness": null,
+  "tags": [],
+  "row_count": 180,
+  "last_embedded_at": null,
+  "embedding_stale_since": null,
+  "embedding_refresh_policy": null
+}
--- a/data/_catalog/manifests/5bab2fac-8bdd-41b7-a993-ed48fecc60b3.json
+++ b/data/_catalog/manifests/5bab2fac-8bdd-41b7-a993-ed48fecc60b3.json
@ -0,0 +1,117 @@
+{
+  "id": "5bab2fac-8bdd-41b7-a993-ed48fecc60b3",
+  "name": "onboard_demo2",
+  "schema_fingerprint": "274575423f7d2648cea49a628a2e827dbd8822cef1f4eebfcf8e0a5f8602fbf7",
+  "objects": [
+    {
+      "bucket": "primary",
+      "key": "datasets/onboard_demo2.parquet",
+      "size_bytes": 7833,
+      "created_at": "2026-04-20T23:13:36.886222056Z"
+    }
+  ],
+  "created_at": "2026-04-20T23:13:36.886223472Z",
+  "updated_at": "2026-04-20T23:13:36.886426834Z",
+  "description": "",
+  "owner": "",
+  "sensitivity": "pii",
+  "columns": [
+    {
+      "name": "worker_id",
+      "data_type": "Utf8",
+      "sensitivity": null,
+      "description": "",
+      "is_pii": false
+    },
+    {
+      "name": "name",
+      "data_type": "Utf8",
+      "sensitivity": "pii",
+      "description": "",
+      "is_pii": true
+    },
+    {
+      "name": "role",
+      "data_type": "Utf8",
+      "sensitivity": null,
+      "description": "",
+      "is_pii": false
+    },
+    {
+      "name": "city",
+      "data_type": "Utf8",
+      "sensitivity": null,
+      "description": "",
+      "is_pii": false
+    },
+    {
+      "name": "state",
+      "data_type": "Utf8",
+      "sensitivity": null,
+      "description": "",
+      "is_pii": false
+    },
+    {
+      "name": "email",
+      "data_type": "Utf8",
+      "sensitivity": "pii",
+      "description": "",
+      "is_pii": true
+    },
+    {
+      "name": "phone",
+      "data_type": "Utf8",
+      "sensitivity": "pii",
+      "description": "",
+      "is_pii": true
+    },
+    {
+      "name": "skills",
+      "data_type": "Utf8",
+      "sensitivity": null,
+      "description": "",
+      "is_pii": false
+    },
+    {
+      "name": "certifications",
+      "data_type": "Utf8",
+      "sensitivity": null,
+      "description": "",
+      "is_pii": false
+    },
+    {
+      "name": "availability",
+      "data_type": "Float64",
+      "sensitivity": null,
+      "description": "",
+      "is_pii": false
+    },
+    {
+      "name": "reliability",
+      "data_type": "Float64",
+      "sensitivity": null,
+      "description": "",
+      "is_pii": false
+    },
+    {
+      "name": "archetype",
+      "data_type": "Utf8",
+      "sensitivity": null,
+      "description": "",
+      "is_pii": false
+    }
+  ],
+  "lineage": {
+    "source_system": "csv",
+    "source_file": "staffing_roster_sample.csv",
+    "ingest_job": "ingest-1776726816886",
+    "ingest_timestamp": "2026-04-20T23:13:36.886222056Z",
+    "parent_datasets": []
+  },
+  "freshness": null,
+  "tags": [],
+  "row_count": 25,
+  "last_embedded_at": null,
+  "embedding_stale_since": null,
+  "embedding_refresh_policy": null
+}
--- a/data/_catalog/manifests/8192d934-fc90-46dd-b8bd-c443a4743b19.json
+++ b/data/_catalog/manifests/8192d934-fc90-46dd-b8bd-c443a4743b19.json
@ -1,96 +0,0 @@
-{
-  "id": "8192d934-fc90-46dd-b8bd-c443a4743b19",
-  "name": "demo_customers",
-  "schema_fingerprint": "7af9708f2366f790e0ae03db6f681f5a4d4b8af4d1527b091bedfee1be81d304",
-  "objects": [
-    {
-      "bucket": "primary",
-      "key": "datasets/demo_customers.parquet",
-      "size_bytes": 4519,
-      "created_at": "2026-04-17T01:00:48.361437263Z"
-    }
-  ],
-  "created_at": "2026-04-17T01:00:48.361437860Z",
-  "updated_at": "2026-04-17T01:00:48.361681724Z",
-  "description": "",
-  "owner": "",
-  "sensitivity": "pii",
-  "columns": [
-    {
-      "name": "id",
-      "data_type": "Int32",
-      "sensitivity": null,
-      "description": "",
-      "is_pii": false
-    },
-    {
-      "name": "name",
-      "data_type": "Utf8",
-      "sensitivity": "pii",
-      "description": "",
-      "is_pii": true
-    },
-    {
-      "name": "email",
-      "data_type": "Utf8",
-      "sensitivity": "pii",
-      "description": "",
-      "is_pii": true
-    },
-    {
-      "name": "city",
-      "data_type": "Utf8",
-      "sensitivity": null,
-      "description": "",
-      "is_pii": false
-    },
-    {
-      "name": "tier",
-      "data_type": "Utf8",
-      "sensitivity": null,
-      "description": "",
-      "is_pii": false
-    },
-    {
-      "name": "lifetime_spend",
-      "data_type": "Float64",
-      "sensitivity": null,
-      "description": "",
-      "is_pii": false
-    },
-    {
-      "name": "is_active",
-      "data_type": "Int32",
-      "sensitivity": null,
-      "description": "",
-      "is_pii": false
-    },
-    {
-      "name": "signed_up_at",
-      "data_type": "Utf8",
-      "sensitivity": null,
-      "description": "",
-      "is_pii": false
-    },
-    {
-      "name": "notes",
-      "data_type": "Utf8",
-      "sensitivity": null,
-      "description": "",
-      "is_pii": false
-    }
-  ],
-  "lineage": {
-    "source_system": "mysql",
-    "source_file": "dsn: mysql://lh:***@127.0.0.1:3306/lh_demo",
-    "ingest_job": "mysql-stream-1776387648361",
-    "ingest_timestamp": "2026-04-17T01:00:48.361437263Z",
-    "parent_datasets": []
-  },
-  "freshness": null,
-  "tags": [],
-  "row_count": 10,
-  "last_embedded_at": null,
-  "embedding_stale_since": null,
-  "embedding_refresh_policy": null
-}
--- a/data/_catalog/manifests/8621894c-f9c6-4eea-8e71-e80b9cdc9a36.json
+++ b/data/_catalog/manifests/8621894c-f9c6-4eea-8e71-e80b9cdc9a36.json
@ -1,90 +0,0 @@
-{
-  "id": "8621894c-f9c6-4eea-8e71-e80b9cdc9a36",
-  "name": "demo_customers",
-  "schema_fingerprint": "7af9708f2366f790e0ae03db6f681f5a4d4b8af4d1527b091bedfee1be81d304",
-  "objects": [
-    {
-      "bucket": "primary",
-      "key": "datasets/demo_customers.parquet",
-      "size_bytes": 4583,
-      "created_at": "2026-04-17T01:34:30.158848713Z"
-    }
-  ],
-  "created_at": "2026-04-17T01:34:30.158849587Z",
-  "updated_at": "2026-04-17T05:56:27.642214035Z",
-  "description": "",
-  "owner": "",
-  "sensitivity": null,
-  "columns": [
-    {
-      "name": "id",
-      "data_type": "Int32",
-      "sensitivity": null,
-      "description": "",
-      "is_pii": false
-    },
-    {
-      "name": "name",
-      "data_type": "Utf8",
-      "sensitivity": null,
-      "description": "",
-      "is_pii": false
-    },
-    {
-      "name": "email",
-      "data_type": "Utf8",
-      "sensitivity": null,
-      "description": "",
-      "is_pii": false
-    },
-    {
-      "name": "city",
-      "data_type": "Utf8",
-      "sensitivity": null,
-      "description": "",
-      "is_pii": false
-    },
-    {
-      "name": "tier",
-      "data_type": "Utf8",
-      "sensitivity": null,
-      "description": "",
-      "is_pii": false
-    },
-    {
-      "name": "lifetime_spend",
-      "data_type": "Float64",
-      "sensitivity": null,
-      "description": "",
-      "is_pii": false
-    },
-    {
-      "name": "is_active",
-      "data_type": "Int32",
-      "sensitivity": null,
-      "description": "",
-      "is_pii": false
-    },
-    {
-      "name": "signed_up_at",
-      "data_type": "Utf8",
-      "sensitivity": null,
-      "description": "",
-      "is_pii": false
-    },
-    {
-      "name": "notes",
-      "data_type": "Utf8",
-      "sensitivity": null,
-      "description": "",
-      "is_pii": false
-    }
-  ],
-  "lineage": null,
-  "freshness": null,
-  "tags": [],
-  "row_count": 11,
-  "last_embedded_at": null,
-  "embedding_stale_since": null,
-  "embedding_refresh_policy": null
-}
--- a/data/_catalog/manifests/8f37454b-27c8-4f61-aeca-8e48070db552.json
+++ b/data/_catalog/manifests/8f37454b-27c8-4f61-aeca-8e48070db552.json
@ -1,96 +0,0 @@
-{
-  "id": "8f37454b-27c8-4f61-aeca-8e48070db552",
-  "name": "demo_customers",
-  "schema_fingerprint": "7af9708f2366f790e0ae03db6f681f5a4d4b8af4d1527b091bedfee1be81d304",
-  "objects": [
-    {
-      "bucket": "primary",
-      "key": "datasets/demo_customers.parquet",
-      "size_bytes": 4519,
-      "created_at": "2026-04-17T01:34:00.160615345Z"
-    }
-  ],
-  "created_at": "2026-04-17T01:34:00.160616631Z",
-  "updated_at": "2026-04-17T02:45:58.283076939Z",
-  "description": "",
-  "owner": "",
-  "sensitivity": "pii",
-  "columns": [
-    {
-      "name": "id",
-      "data_type": "Int32",
-      "sensitivity": null,
-      "description": "",
-      "is_pii": false
-    },
-    {
-      "name": "name",
-      "data_type": "Utf8",
-      "sensitivity": null,
-      "description": "",
-      "is_pii": false
-    },
-    {
-      "name": "email",
-      "data_type": "Utf8",
-      "sensitivity": null,
-      "description": "",
-      "is_pii": false
-    },
-    {
-      "name": "city",
-      "data_type": "Utf8",
-      "sensitivity": null,
-      "description": "",
-      "is_pii": false
-    },
-    {
-      "name": "tier",
-      "data_type": "Utf8",
-      "sensitivity": null,
-      "description": "",
-      "is_pii": false
-    },
-    {
-      "name": "lifetime_spend",
-      "data_type": "Float64",
-      "sensitivity": null,
-      "description": "",
-      "is_pii": false
-    },
-    {
-      "name": "is_active",
-      "data_type": "Int32",
-      "sensitivity": null,
-      "description": "",
-      "is_pii": false
-    },
-    {
-      "name": "signed_up_at",
-      "data_type": "Utf8",
-      "sensitivity": null,
-      "description": "",
-      "is_pii": false
-    },
-    {
-      "name": "notes",
-      "data_type": "Utf8",
-      "sensitivity": null,
-      "description": "",
-      "is_pii": false
-    }
-  ],
-  "lineage": {
-    "source_system": "mysql",
-    "source_file": "dsn: mysql://lh:***@127.0.0.1:3306/lh_demo",
-    "ingest_job": "scheduled-mysql-1776389700165",
-    "ingest_timestamp": "2026-04-17T01:35:00.165218923Z",
-    "parent_datasets": []
-  },
-  "freshness": null,
-  "tags": [],
-  "row_count": 11,
-  "last_embedded_at": null,
-  "embedding_stale_since": null,
-  "embedding_refresh_policy": null
-}
--- a/data/_catalog/manifests/909a1fd1-0924-4a5e-a527-d249a07540a1.json
+++ b/data/_catalog/manifests/909a1fd1-0924-4a5e-a527-d249a07540a1.json
@ -1,17 +1,17 @@
 {
-  "id": "14698884-071c-4adb-ae50-cfb8d885656c",
+  "id": "909a1fd1-0924-4a5e-a527-d249a07540a1",
  "name": "kb_response_cache",
  "schema_fingerprint": "c90d7be310b5025d2c4d398cf07692d8d9bb46ed591c0a87b339bafcac9ddeed",
  "objects": [
    {
      "bucket": "primary",
      "key": "datasets/kb_response_cache.parquet",
-      "size_bytes": 8360233,
-      "created_at": "2026-04-17T02:54:43.145342968Z"
+      "size_bytes": 8464841,
+      "created_at": "2026-04-17T05:22:06.168071783Z"
    }
  ],
-  "created_at": "2026-04-17T02:54:43.145343876Z",
-  "updated_at": "2026-04-17T02:54:43.145629696Z",
+  "created_at": "2026-04-17T05:22:06.168072339Z",
+  "updated_at": "2026-04-17T06:06:21.356504054Z",
  "description": "",
  "owner": "",
  "sensitivity": null,
@ -94,16 +94,10 @@
      "is_pii": false
    }
  ],
-  "lineage": {
-    "source_system": "postgresql",
-    "source_file": "dsn: postgresql://kbuser@localhost:5432/knowledge_base",
-    "ingest_job": "pg-stream-1776394483145",
-    "ingest_timestamp": "2026-04-17T02:54:43.145342968Z",
-    "parent_datasets": []
-  },
+  "lineage": null,
  "freshness": null,
  "tags": [],
-  "row_count": 195,
+  "row_count": 197,
  "last_embedded_at": null,
  "embedding_stale_since": null,
  "embedding_refresh_policy": null
--- a/data/_catalog/manifests/9861b817-f571-496b-9d7b-c4d385e51ca5.json
+++ b/data/_catalog/manifests/9861b817-f571-496b-9d7b-c4d385e51ca5.json
@ -0,0 +1,75 @@
+{
+  "id": "9861b817-f571-496b-9d7b-c4d385e51ca5",
+  "name": "sparse_workers",
+  "schema_fingerprint": "1055d5e17fa3285a1cac7d4c949ae03699cd046af671f22d0d1cf4d0024c66a4",
+  "objects": [
+    {
+      "bucket": "primary",
+      "key": "datasets/sparse_workers.parquet",
+      "size_bytes": 7619,
+      "created_at": "2026-04-17T20:31:24.968688194Z"
+    }
+  ],
+  "created_at": "2026-04-17T20:31:24.968689336Z",
+  "updated_at": "2026-04-17T20:31:24.968969734Z",
+  "description": "",
+  "owner": "",
+  "sensitivity": "pii",
+  "columns": [
+    {
+      "name": "name",
+      "data_type": "Utf8",
+      "sensitivity": "pii",
+      "description": "",
+      "is_pii": true
+    },
+    {
+      "name": "phone",
+      "data_type": "Int64",
+      "sensitivity": "pii",
+      "description": "",
+      "is_pii": true
+    },
+    {
+      "name": "role",
+      "data_type": "Utf8",
+      "sensitivity": null,
+      "description": "",
+      "is_pii": false
+    },
+    {
+      "name": "city",
+      "data_type": "Utf8",
+      "sensitivity": null,
+      "description": "",
+      "is_pii": false
+    },
+    {
+      "name": "state",
+      "data_type": "Utf8",
+      "sensitivity": null,
+      "description": "",
+      "is_pii": false
+    },
+    {
+      "name": "notes",
+      "data_type": "Utf8",
+      "sensitivity": null,
+      "description": "",
+      "is_pii": false
+    }
+  ],
+  "lineage": {
+    "source_system": "csv",
+    "source_file": "sparse_workers.csv",
+    "ingest_job": "ingest-1776457884968",
+    "ingest_timestamp": "2026-04-17T20:31:24.968688194Z",
+    "parent_datasets": []
+  },
+  "freshness": null,
+  "tags": [],
+  "row_count": 200,
+  "last_embedded_at": null,
+  "embedding_stale_since": null,
+  "embedding_refresh_policy": null
+}
--- a/data/_catalog/manifests/a2206c79-28d6-40f2-b97b-61a5df5bee06.json
+++ b/data/_catalog/manifests/a2206c79-28d6-40f2-b97b-61a5df5bee06.json
@ -1,17 +1,17 @@
 {
-  "id": "b076cff9-c522-48fc-b892-b7cba7b29c9b",
+  "id": "a2206c79-28d6-40f2-b97b-61a5df5bee06",
  "name": "kb_threat_intel",
  "schema_fingerprint": "df1e126046147b3de42086880e10c3501a3a615ecddf336bc24957a24c321241",
  "objects": [
    {
      "bucket": "primary",
      "key": "datasets/kb_threat_intel.parquet",
-      "size_bytes": 247112,
-      "created_at": "2026-04-17T02:54:43.321496407Z"
+      "size_bytes": 247318,
+      "created_at": "2026-04-17T04:23:16.149697926Z"
    }
  ],
-  "created_at": "2026-04-17T02:54:43.321497035Z",
-  "updated_at": "2026-04-17T02:54:43.321758360Z",
+  "created_at": "2026-04-17T04:23:16.149698245Z",
+  "updated_at": "2026-04-17T05:56:44.994303201Z",
  "description": "",
  "owner": "",
  "sensitivity": null,
@ -227,13 +227,7 @@
      "is_pii": false
    }
  ],
-  "lineage": {
-    "source_system": "postgresql",
-    "source_file": "dsn: postgresql://kbuser@localhost:5432/knowledge_base",
-    "ingest_job": "pg-stream-1776394483321",
-    "ingest_timestamp": "2026-04-17T02:54:43.321496407Z",
-    "parent_datasets": []
-  },
+  "lineage": null,
  "freshness": null,
  "tags": [],
  "row_count": 54,
--- a/data/_catalog/manifests/a99da7e2-8f42-4ff3-8fa7-679d4e514581.json
+++ b/data/_catalog/manifests/a99da7e2-8f42-4ff3-8fa7-679d4e514581.json
@ -1,17 +1,17 @@
 {
-  "id": "eb370ff1-c037-476f-8c3f-61b96b3a6046",
+  "id": "a99da7e2-8f42-4ff3-8fa7-679d4e514581",
  "name": "kb_response_cache_history",
  "schema_fingerprint": "9a05c209a51f9543bd7dc9387695b0c67a6abc135b53eaeec458140712bfcf50",
  "objects": [
    {
      "bucket": "primary",
      "key": "datasets/kb_response_cache_history.parquet",
-      "size_bytes": 8292303,
-      "created_at": "2026-04-17T02:54:43.242879858Z"
+      "size_bytes": 8396568,
+      "created_at": "2026-04-17T04:14:34.260241351Z"
    }
  ],
-  "created_at": "2026-04-17T02:54:43.242880413Z",
-  "updated_at": "2026-04-17T05:56:45.008387335Z",
+  "created_at": "2026-04-17T04:14:34.260242474Z",
+  "updated_at": "2026-04-17T06:06:21.334716946Z",
  "description": "",
  "owner": "",
  "sensitivity": null,
@ -69,8 +69,8 @@
  "lineage": {
    "source_system": "postgresql",
    "source_file": "dsn: postgresql://kbuser@localhost:5432/knowledge_base",
-    "ingest_job": "pg-stream-1776394483242",
-    "ingest_timestamp": "2026-04-17T02:54:43.242879858Z",
+    "ingest_job": "pg-stream-1776403326260",
+    "ingest_timestamp": "2026-04-17T05:22:06.260249210Z",
    "parent_datasets": []
  },
  "freshness": null,
--- a/data/_catalog/manifests/c2f0acdb-df65-4c21-9633-08af4248c85b.json
+++ b/data/_catalog/manifests/c2f0acdb-df65-4c21-9633-08af4248c85b.json
@ -1,5 +1,5 @@
 {
-  "id": "a5858f94-267f-4382-ba32-0934e1b984f7",
+  "id": "c2f0acdb-df65-4c21-9633-08af4248c85b",
  "name": "kb_lab_trials",
  "schema_fingerprint": "1d5782349402439a7e44efd0ccab9ae64ac3044221adef9e828b60b8bbb44dd5",
  "objects": [
@ -7,11 +7,11 @@
      "bucket": "primary",
      "key": "datasets/kb_lab_trials.parquet",
      "size_bytes": 68221,
-      "created_at": "2026-04-17T02:54:43.423304357Z"
+      "created_at": "2026-04-17T05:22:06.382106104Z"
    }
  ],
-  "created_at": "2026-04-17T02:54:43.423304931Z",
-  "updated_at": "2026-04-17T04:23:16.166692762Z",
+  "created_at": "2026-04-17T05:22:06.382107354Z",
+  "updated_at": "2026-04-17T06:06:21.358304760Z",
  "description": "",
  "owner": "",
  "sensitivity": null,
@ -87,13 +87,7 @@
      "is_pii": false
    }
  ],
-  "lineage": {
-    "source_system": "postgresql",
-    "source_file": "dsn: postgresql://kbuser@localhost:5432/knowledge_base",
-    "ingest_job": "pg-stream-1776399796166",
-    "ingest_timestamp": "2026-04-17T04:23:16.166457198Z",
-    "parent_datasets": []
-  },
+  "lineage": null,
  "freshness": null,
  "tags": [],
  "row_count": 41,
--- a/data/_catalog/manifests/d668a036-7b50-4845-8194-6a03ef370c8d.json
+++ b/data/_catalog/manifests/d668a036-7b50-4845-8194-6a03ef370c8d.json
@ -1,5 +1,5 @@
 {
-  "id": "03c62234-f9f8-40e9-a27e-d5e09ab2713d",
+  "id": "d668a036-7b50-4845-8194-6a03ef370c8d",
  "name": "kb_pipeline_runs",
  "schema_fingerprint": "c019b81feb58ff2aefe4cbe700056a100e25716f2aa8e8415ba8f20656812f75",
  "objects": [
@ -7,11 +7,11 @@
      "bucket": "primary",
      "key": "datasets/kb_pipeline_runs.parquet",
      "size_bytes": 1518415,
-      "created_at": "2026-04-17T02:54:43.281447563Z"
+      "created_at": "2026-04-17T05:22:06.289208160Z"
    }
  ],
-  "created_at": "2026-04-17T02:54:43.281448143Z",
-  "updated_at": "2026-04-17T05:56:44.999492793Z",
+  "created_at": "2026-04-17T05:22:06.289209657Z",
+  "updated_at": "2026-04-17T06:06:21.311384739Z",
  "description": "",
  "owner": "",
  "sensitivity": null,
@ -87,13 +87,7 @@
      "is_pii": false
    }
  ],
-  "lineage": {
-    "source_system": "postgresql",
-    "source_file": "dsn: postgresql://kbuser@localhost:5432/knowledge_base",
-    "ingest_job": "pg-stream-1776403326289",
-    "ingest_timestamp": "2026-04-17T05:22:06.289208160Z",
-    "parent_datasets": []
-  },
+  "lineage": null,
  "freshness": null,
  "tags": [],
  "row_count": 195,
--- a/data/_catalog/manifests/d92c5d57-14f9-4ce0-9c4f-19c6db94a170.json
+++ b/data/_catalog/manifests/d92c5d57-14f9-4ce0-9c4f-19c6db94a170.json
@ -1,17 +1,17 @@
 {
-  "id": "7ad27f97-622e-49e3-9c38-327cb2334fa1",
+  "id": "d92c5d57-14f9-4ce0-9c4f-19c6db94a170",
  "name": "kb_team_runs",
  "schema_fingerprint": "d704ee2b9b434774aed2258da2fddcdcbab226547a011ba24d4281253657bdd3",
  "objects": [
    {
      "bucket": "primary",
      "key": "datasets/kb_team_runs.parquet",
-      "size_bytes": 19185025,
-      "created_at": "2026-04-17T02:54:43.043742588Z"
+      "size_bytes": 19293718,
+      "created_at": "2026-04-17T05:22:06.071898629Z"
    }
  ],
-  "created_at": "2026-04-17T02:54:43.043743165Z",
-  "updated_at": "2026-04-17T02:54:43.043968762Z",
+  "created_at": "2026-04-17T05:22:06.071899235Z",
+  "updated_at": "2026-04-17T05:56:44.992569105Z",
  "description": "",
  "owner": "",
  "sensitivity": null,
@ -108,16 +108,10 @@
      "is_pii": false
    }
  ],
-  "lineage": {
-    "source_system": "postgresql",
-    "source_file": "dsn: postgresql://kbuser@localhost:5432/knowledge_base",
-    "ingest_job": "pg-stream-1776394483043",
-    "ingest_timestamp": "2026-04-17T02:54:43.043742588Z",
-    "parent_datasets": []
-  },
+  "lineage": null,
  "freshness": null,
  "tags": [],
-  "row_count": 588,
+  "row_count": 590,
  "last_embedded_at": null,
  "embedding_stale_since": null,
  "embedding_refresh_policy": null
--- a/data/_catalog/manifests/dab56959-d79b-4c68-a6bd-e5fc487621ff.json
+++ b/data/_catalog/manifests/dab56959-d79b-4c68-a6bd-e5fc487621ff.json
@ -0,0 +1,68 @@
+{
+  "id": "dab56959-d79b-4c68-a6bd-e5fc487621ff",
+  "name": "successful_playbooks",
+  "schema_fingerprint": "30b52caca7b6303852cd648882eae9ed87205b80e5e570f6fda08dc8735d9b06",
+  "objects": [
+    {
+      "bucket": "primary",
+      "key": "datasets/successful_playbooks.parquet",
+      "size_bytes": 2540,
+      "created_at": "2026-04-17T22:25:34.467634304Z"
+    }
+  ],
+  "created_at": "2026-04-17T22:25:34.467635066Z",
+  "updated_at": "2026-04-19T08:44:08.533058251Z",
+  "description": "",
+  "owner": "",
+  "sensitivity": null,
+  "columns": [
+    {
+      "name": "timestamp",
+      "data_type": "Utf8",
+      "sensitivity": null,
+      "description": "",
+      "is_pii": false
+    },
+    {
+      "name": "operation",
+      "data_type": "Utf8",
+      "sensitivity": null,
+      "description": "",
+      "is_pii": false
+    },
+    {
+      "name": "approach",
+      "data_type": "Utf8",
+      "sensitivity": null,
+      "description": "",
+      "is_pii": false
+    },
+    {
+      "name": "result",
+      "data_type": "Utf8",
+      "sensitivity": null,
+      "description": "",
+      "is_pii": false
+    },
+    {
+      "name": "context",
+      "data_type": "Utf8",
+      "sensitivity": null,
+      "description": "",
+      "is_pii": false
+    }
+  ],
+  "lineage": {
+    "source_system": "csv",
+    "source_file": "playbook.csv",
+    "ingest_job": "ingest-1776588248532",
+    "ingest_timestamp": "2026-04-19T08:44:08.532686879Z",
+    "parent_datasets": []
+  },
+  "freshness": null,
+  "tags": [],
+  "row_count": 29,
+  "last_embedded_at": null,
+  "embedding_stale_since": null,
+  "embedding_refresh_policy": null
+}
--- a/data/_catalog/manifests/e7304f05-5278-4e17-961a-51f2588fd2aa.json
+++ b/data/_catalog/manifests/e7304f05-5278-4e17-961a-51f2588fd2aa.json
@ -1,243 +0,0 @@
-{
-  "id": "e7304f05-5278-4e17-961a-51f2588fd2aa",
-  "name": "threat_intel",
-  "schema_fingerprint": "df1e126046147b3de42086880e10c3501a3a615ecddf336bc24957a24c321241",
-  "objects": [
-    {
-      "bucket": "primary",
-      "key": "datasets/threat_intel.parquet",
-      "size_bytes": 111130,
-      "created_at": "2026-03-28T01:14:03.054140697Z"
-    }
-  ],
-  "created_at": "2026-03-28T01:14:03.054141294Z",
-  "updated_at": "2026-04-17T05:56:45.009810081Z",
-  "description": "",
-  "owner": "",
-  "sensitivity": null,
-  "columns": [
-    {
-      "name": "id",
-      "data_type": "Int32",
-      "sensitivity": null,
-      "description": "",
-      "is_pii": false
-    },
-    {
-      "name": "ip",
-      "data_type": "Utf8",
-      "sensitivity": null,
-      "description": "",
-      "is_pii": false
-    },
-    {
-      "name": "threat_level",
-      "data_type": "Utf8",
-      "sensitivity": null,
-      "description": "",
-      "is_pii": false
-    },
-    {
-      "name": "classification",
-      "data_type": "Utf8",
-      "sensitivity": null,
-      "description": "",
-      "is_pii": false
-    },
-    {
-      "name": "confidence",
-      "data_type": "Float64",
-      "sensitivity": null,
-      "description": "",
-      "is_pii": false
-    },
-    {
-      "name": "summary",
-      "data_type": "Utf8",
-      "sensitivity": null,
-      "description": "",
-      "is_pii": false
-    },
-    {
-      "name": "indicators",
-      "data_type": "Utf8",
-      "sensitivity": null,
-      "description": "",
-      "is_pii": false
-    },
-    {
-      "name": "recommendation",
-      "data_type": "Utf8",
-      "sensitivity": null,
-      "description": "",
-      "is_pii": false
-    },
-    {
-      "name": "pattern",
-      "data_type": "Utf8",
-      "sensitivity": null,
-      "description": "",
-      "is_pii": false
-    },
-    {
-      "name": "attack_type",
-      "data_type": "Utf8",
-      "sensitivity": null,
-      "description": "",
-      "is_pii": false
-    },
-    {
-      "name": "likely_automated",
-      "data_type": "Boolean",
-      "sensitivity": null,
-      "description": "",
-      "is_pii": false
-    },
-    {
-      "name": "country",
-      "data_type": "Utf8",
-      "sensitivity": null,
-      "description": "",
-      "is_pii": false
-    },
-    {
-      "name": "country_code",
-      "data_type": "Utf8",
-      "sensitivity": null,
-      "description": "",
-      "is_pii": false
-    },
-    {
-      "name": "city",
-      "data_type": "Utf8",
-      "sensitivity": null,
-      "description": "",
-      "is_pii": false
-    },
-    {
-      "name": "isp",
-      "data_type": "Utf8",
-      "sensitivity": null,
-      "description": "",
-      "is_pii": false
-    },
-    {
-      "name": "org",
-      "data_type": "Utf8",
-      "sensitivity": null,
-      "description": "",
-      "is_pii": false
-    },
-    {
-      "name": "asn",
-      "data_type": "Utf8",
-      "sensitivity": null,
-      "description": "",
-      "is_pii": false
-    },
-    {
-      "name": "is_proxy",
-      "data_type": "Boolean",
-      "sensitivity": null,
-      "description": "",
-      "is_pii": false
-    },
-    {
-      "name": "is_hosting",
-      "data_type": "Boolean",
-      "sensitivity": null,
-      "description": "",
-      "is_pii": false
-    },
-    {
-      "name": "open_ports",
-      "data_type": "Utf8",
-      "sensitivity": null,
-      "description": "",
-      "is_pii": false
-    },
-    {
-      "name": "blocklist_count",
-      "data_type": "Int32",
-      "sensitivity": null,
-      "description": "",
-      "is_pii": false
-    },
-    {
-      "name": "blocklist_total",
-      "data_type": "Int32",
-      "sensitivity": null,
-      "description": "",
-      "is_pii": false
-    },
-    {
-      "name": "blocklists_blocked",
-      "data_type": "Utf8",
-      "sensitivity": null,
-      "description": "",
-      "is_pii": false
-    },
-    {
-      "name": "reverse_dns",
-      "data_type": "Utf8",
-      "sensitivity": null,
-      "description": "",
-      "is_pii": false
-    },
-    {
-      "name": "traceroute",
-      "data_type": "Utf8",
-      "sensitivity": null,
-      "description": "",
-      "is_pii": false
-    },
-    {
-      "name": "log_count",
-      "data_type": "Int32",
-      "sensitivity": null,
-      "description": "",
-      "is_pii": false
-    },
-    {
-      "name": "banned",
-      "data_type": "Boolean",
-      "sensitivity": null,
-      "description": "",
-      "is_pii": false
-    },
-    {
-      "name": "enriched_at",
-      "data_type": "Utf8",
-      "sensitivity": null,
-      "description": "",
-      "is_pii": false
-    },
-    {
-      "name": "updated_at",
-      "data_type": "Utf8",
-      "sensitivity": null,
-      "description": "",
-      "is_pii": false
-    },
-    {
-      "name": "raw_data",
-      "data_type": "Utf8",
-      "sensitivity": null,
-      "description": "",
-      "is_pii": false
-    }
-  ],
-  "lineage": {
-    "source_system": "postgresql",
-    "source_file": "dsn: postgresql://postgres@127.0.0.1:5432/knowledge_base",
-    "ingest_job": "pg-stream-1776326353882",
-    "ingest_timestamp": "2026-04-16T07:59:13.882669337Z",
-    "parent_datasets": []
-  },
-  "freshness": null,
-  "tags": [],
-  "row_count": 20,
-  "last_embedded_at": "2026-04-16T15:08:32.348412159Z",
-  "embedding_stale_since": null,
-  "embedding_refresh_policy": null
-}
--- a/data/_catalog/manifests/eeb0d288-e227-478d-9407-78a43b623c5c.json
+++ b/data/_catalog/manifests/eeb0d288-e227-478d-9407-78a43b623c5c.json
@ -1,5 +1,5 @@
 {
-  "id": "765985e6-ea16-46b5-a6c0-ddaff346827f",
+  "id": "eeb0d288-e227-478d-9407-78a43b623c5c",
  "name": "kb_meta_pipelines",
  "schema_fingerprint": "cabe1d7fc442e1dfcfaabf663509d590c6edc84b445a91acaf0ae68b94aff518",
  "objects": [
@ -7,14 +7,14 @@
      "bucket": "primary",
      "key": "datasets/kb_meta_pipelines.parquet",
      "size_bytes": 32085,
-      "created_at": "2026-04-17T02:54:43.307966396Z"
+      "created_at": "2026-04-17T04:14:34.315211960Z"
    }
  ],
-  "created_at": "2026-04-17T02:54:43.307966799Z",
-  "updated_at": "2026-04-17T04:23:16.140272666Z",
+  "created_at": "2026-04-17T04:14:34.315212764Z",
+  "updated_at": "2026-04-17T06:06:21.357779083Z",
  "description": "",
  "owner": "",
-  "sensitivity": "pii",
+  "sensitivity": null,
  "columns": [
    {
      "name": "id",
@ -26,9 +26,9 @@
    {
      "name": "name",
      "data_type": "Utf8",
-      "sensitivity": "pii",
+      "sensitivity": null,
      "description": "",
-      "is_pii": true
+      "is_pii": false
    },
    {
      "name": "data_source",
@ -108,13 +108,7 @@
      "is_pii": false
    }
  ],
-  "lineage": {
-    "source_system": "postgresql",
-    "source_file": "dsn: postgresql://kbuser@localhost:5432/knowledge_base",
-    "ingest_job": "pg-stream-1776399796139",
-    "ingest_timestamp": "2026-04-17T04:23:16.139992290Z",
-    "parent_datasets": []
-  },
+  "lineage": null,
  "freshness": null,
  "tags": [],
  "row_count": 6,
--- a/data/_catalog/manifests/ef6c0acf-de7a-4be0-877e-e0c2889f390f.json
+++ b/data/_catalog/manifests/ef6c0acf-de7a-4be0-877e-e0c2889f390f.json
@ -7,11 +7,11 @@
      "bucket": "primary",
      "key": "datasets/threat_intel.parquet",
      "size_bytes": 247112,
-      "created_at": "2026-04-16T07:59:13.882669337Z"
+      "created_at": "2026-04-19T11:19:40.517865325Z"
    }
  ],
  "created_at": "2026-04-16T07:59:13.882669807Z",
-  "updated_at": "2026-04-17T06:06:21.359639461Z",
+  "updated_at": "2026-04-19T11:19:40.517873046Z",
  "description": "",
  "owner": "",
  "sensitivity": null,
--- a/data/_catalog/manifests/f2fda821-2f86-4c81-af58-488d184d8243.json
+++ b/data/_catalog/manifests/f2fda821-2f86-4c81-af58-488d184d8243.json
@ -0,0 +1,47 @@
+{
+  "id": "f2fda821-2f86-4c81-af58-488d184d8243",
+  "name": "resumes",
+  "schema_fingerprint": "a2b2c191412faf317800637da5887d30d749d5ba0222d0a8edd12cde08daeb8a",
+  "objects": [
+    {
+      "bucket": "primary",
+      "key": "datasets/resumes.parquet",
+      "size_bytes": 3159,
+      "created_at": "2026-04-19T11:26:29.607950059Z"
+    }
+  ],
+  "created_at": "2026-04-19T11:26:29.607951447Z",
+  "updated_at": "2026-04-19T11:26:29.608054435Z",
+  "description": "",
+  "owner": "",
+  "sensitivity": null,
+  "columns": [
+    {
+      "name": "doc_id",
+      "data_type": "Utf8",
+      "sensitivity": null,
+      "description": "",
+      "is_pii": false
+    },
+    {
+      "name": "resume_text",
+      "data_type": "Utf8",
+      "sensitivity": null,
+      "description": "",
+      "is_pii": false
+    }
+  ],
+  "lineage": {
+    "source_system": "ndjson",
+    "source_file": "resumes.ndjson",
+    "ingest_job": "ingest-1776597989607",
+    "ingest_timestamp": "2026-04-19T11:26:29.607950059Z",
+    "parent_datasets": []
+  },
+  "freshness": null,
+  "tags": [],
+  "row_count": 10,
+  "last_embedded_at": null,
+  "embedding_stale_since": null,
+  "embedding_refresh_policy": null
+}
--- a/data/_catalog/manifests/f429ac10-bc7d-41cf-b30f-a9590760ee32.json
+++ b/data/_catalog/manifests/f429ac10-bc7d-41cf-b30f-a9590760ee32.json
@ -1,124 +0,0 @@
-{
-  "id": "f429ac10-bc7d-41cf-b30f-a9590760ee32",
-  "name": "kb_team_runs",
-  "schema_fingerprint": "d704ee2b9b434774aed2258da2fddcdcbab226547a011ba24d4281253657bdd3",
-  "objects": [
-    {
-      "bucket": "primary",
-      "key": "datasets/kb_team_runs.parquet",
-      "size_bytes": 19616539,
-      "created_at": "2026-04-16T06:07:08.188675427Z"
-    }
-  ],
-  "created_at": "2026-04-16T06:07:08.188676317Z",
-  "updated_at": "2026-04-17T05:56:27.639888971Z",
-  "description": "",
-  "owner": "",
-  "sensitivity": null,
-  "columns": [
-    {
-      "name": "id",
-      "data_type": "Int32",
-      "sensitivity": null,
-      "description": "",
-      "is_pii": false
-    },
-    {
-      "name": "mode",
-      "data_type": "Utf8",
-      "sensitivity": null,
-      "description": "",
-      "is_pii": false
-    },
-    {
-      "name": "prompt",
-      "data_type": "Utf8",
-      "sensitivity": null,
-      "description": "",
-      "is_pii": false
-    },
-    {
-      "name": "config",
-      "data_type": "Utf8",
-      "sensitivity": null,
-      "description": "",
-      "is_pii": false
-    },
-    {
-      "name": "responses",
-      "data_type": "Utf8",
-      "sensitivity": null,
-      "description": "",
-      "is_pii": false
-    },
-    {
-      "name": "models_used",
-      "data_type": "Utf8",
-      "sensitivity": null,
-      "description": "",
-      "is_pii": false
-    },
-    {
-      "name": "created_at",
-      "data_type": "Utf8",
-      "sensitivity": null,
-      "description": "",
-      "is_pii": false
-    },
-    {
-      "name": "archived",
-      "data_type": "Boolean",
-      "sensitivity": null,
-      "description": "",
-      "is_pii": false
-    },
-    {
-      "name": "tags",
-      "data_type": "Utf8",
-      "sensitivity": null,
-      "description": "",
-      "is_pii": false
-    },
-    {
-      "name": "notes",
-      "data_type": "Utf8",
-      "sensitivity": null,
-      "description": "",
-      "is_pii": false
-    },
-    {
-      "name": "quality_score",
-      "data_type": "Float64",
-      "sensitivity": null,
-      "description": "",
-      "is_pii": false
-    },
-    {
-      "name": "score_method",
-      "data_type": "Utf8",
-      "sensitivity": null,
-      "description": "",
-      "is_pii": false
-    },
-    {
-      "name": "score_metadata",
-      "data_type": "Utf8",
-      "sensitivity": null,
-      "description": "",
-      "is_pii": false
-    }
-  ],
-  "lineage": {
-    "source_system": "postgresql",
-    "source_file": "dsn: postgresql://kbuser@localhost:5432/knowledge_base",
-    "ingest_job": "pg-stream-1776403326071",
-    "ingest_timestamp": "2026-04-17T05:22:06.071898629Z",
-    "parent_datasets": []
-  },
-  "freshness": null,
-  "tags": [],
-  "row_count": 590,
-  "last_embedded_at": "2026-04-16T15:08:32.471504656Z",
-  "embedding_stale_since": "2026-04-17T05:22:06.072418149Z",
-  "embedding_refresh_policy": null
-}
--- a/data/datasets/candidates.parquet
+++ b/data/datasets/candidates.parquet
--- a/data/datasets/placements.parquet
+++ b/data/datasets/placements.parquet
--- a/docs/DECISIONS.md
+++ b/docs/DECISIONS.md
@ -94,3 +94,8 @@
 **Date:** 2026-04-16
 **Decision:** Keep Parquet + binary-blob vectors + in-RAM HNSW as the primary vector backend. Add Lance as a second backend available per-profile for workloads where Lance wins architecturally. Per-profile `vector_backend: Parquet | Lance` field becomes part of Phase 17 model profiles. Implementation kicks off via the standalone `crates/lance-bench` crate and is promoted into `vectord::lance_store` when the API stabilizes.
 **Rationale:** Head-to-head benchmark on the 100K × 768d `resumes_100k_v2` index (see `docs/ADR-019-vector-storage.md` for the full scorecard). Parquet+HNSW wins current-scale search latency by 2.55× (873us vs 2229us p50). Lance wins index build time by 14× (16s vs 230s), random row access by 112× (311us vs ~35ms full-file scan), and append speed structurally (0.08s vs full Parquet rewrite). Neither strictly dominates — the dual-use PRD framing (staffing + LLM brain) means both workloads exist in the same system. Keeps ADR-008's "Parquet is the format" principle intact for dataset tables; adds Lance as a purpose-built vector-tier option without discarding the tuned HNSW stack.
+
+## ADR-020: `register()` is idempotent by name with a schema-fingerprint gate
+**Date:** 2026-04-19
+**Decision:** `catalogd::Registry::register(name, fingerprint, objects)` is idempotent on `name`. If no manifest for `name` exists, create one. If one exists with the same `schema_fingerprint`, reuse its `DatasetId`, replace `objects`, bump `updated_at`, and write through. If one exists with a different `schema_fingerprint`, reject with `409 Conflict` (HTTP) / `FAILED_PRECONDITION` (gRPC). A one-shot operator endpoint `POST /catalog/dedupe` collapses any pre-existing duplicates (preferring the manifest with a non-null `row_count`, then the most recently updated).
+**Rationale:** Registry was keyed by surrogate `DatasetId` with no uniqueness constraint on `name`, so every caller that re-registered (re-ingest, external cron, gRPC retry) silently created a parallel manifest pointing at the same parquet — accumulating 308× `successful_playbooks` in live state before detection. The fingerprint gate turns re-ingest into an explicit no-op (matching PRD invariant #5 "ingestd is idempotent — re-ingesting the same file is a no-op") while forcing schema drift to be visible instead of silently clobbering. 409 status separates policy rejections from server errors, which matters for the Phase 12 tool-consumer ecosystem. Concurrency: the write lock is held across the storage write to close the check→insert TOCTOU window; serializing registers is acceptable because registers-per-second is low on the ingest path. Audit: idempotent-register events are visible as bumps to the stored manifest's `updated_at` field and in `catalogd` tracing output (tracing is non-durable, operator view only); `DedupeReport` is the return-value audit for cleanup runs. No event-journal entries are emitted — ADR-012 scopes the journal to row-level mutations, not catalog-manifest operations.
--- a/docs/PRD.md
+++ b/docs/PRD.md
@ -1,8 +1,28 @@
 # PRD: Lakehouse — Rust-First Substrate for Versioned Knowledge Stores

-**Status:** Active — Phases 0-18 shipped; hybrid SQL+Vector search operational; IVF_PQ recall tuned to 1.000 at p50 ≈ 7.4ms via `nprobes`+`refine`; autonomous agent rotates across full index portfolio; cron-scheduled ingest; eval federation complete
+> ## ▶ Direction pivot — 2026-04-22
+>
+> **`docs/CONTROL_PLANE_PRD.md` is now the long-horizon architecture target.**
+> Lakehouse (this document) is being refactored to conform to the Universal AI
+> Control Plane architecture — six layers, `/v1/*` universal API, multi-provider
+> routing, Truth Layer for DevOps constraints.
+>
+> This PRD remains the source of truth for **everything Phases 0-37** — the
+> shipped staffing / AI-substrate system below is preserved as the reference
+> implementation and the first domain-specific consumer of the control plane.
+> All existing phases, ADRs, and invariants continue to apply.
+>
+> **Phases 38+ live in `CONTROL_PLANE_PRD.md`** (Universal API, provider
+> adapters, routing engine, expanded profile system, Truth Layer, validation
+> pipeline, caller migration).
+>
+> Cross-read: this PRD for what's shipped, CONTROL_PLANE_PRD for where it's going.
+
+---
+
+**Status:** Active — Phases 0-36 shipped, Phase 37 (hot-swap async) TODO; control-plane pivot in flight; E2E test 9.0/10; stress test passes
 **Created:** 2026-03-27
-**Last updated:** 2026-04-20 — portfolio-wide autotune, real cron, evals federation, bucket-migrate, IVF_PQ recall 0.805 → 1.000
+**Last updated:** 2026-04-22 — Control-plane pivot; Gateway seed concurrency (Semaphore); stress test with 6 diverse tasks
 **Owner:** J

 ---
@ -80,6 +100,9 @@ No new frameworks without documented ADR.
 | **aibridge** | Rust↔Python boundary — HTTP client to FastAPI sidecar |
 | **ui** | Dioxus frontend — Ask, Explore, SQL, System tabs |
 | **shared** | Types, errors, Arrow helpers, config, protobuf definitions, **secrets provider trait**, **PII detection** |
+| **mcp-server** | Agent gateway (Bun) — MCP tools, intelligence endpoints, scenario observer (:3700) |
+| **observer** | Autonomous iteration loop — records ops, error analysis, playbook consolidation (:3800) |
+| **scenario** | Day-in-the-life orchestrator — T5 tiers, KB integration, continuation tree-split (:bun) |

 **Federation building blocks** (shipped 2026-04-16):
 - `shared::secrets::SecretsProvider` trait + `FileSecretsProvider` reading `/etc/lakehouse/secrets.toml` (0600 enforced)
@ -536,14 +559,180 @@ Same-day re-seed no longer duplicates rows. `/seed` with `append=true` routes th

 `PlaybookEntry` gained `version: u32` (default 1), `parent_id`, `superseded_at`, `superseded_by` — all `#[serde(default)]` so pre-Phase-27 state loads as roots. `revise_entry(parent_id, new_entry)` appends a new version, stamps the parent superseded, rejects revising a retired or already-superseded parent. `history(id)` returns the root→tip chain from any node. Superseded entries excluded from boost (same rule as retired). Endpoints: `POST /vectors/playbook_memory/revise`, `GET /vectors/playbook_memory/history/{id}`. `/status` reports `superseded` as a distinct counter. 8 new tests; 51/51 vectord lib tests green.

-### Phase 28+: Further horizon
+### Phase 28: Agent Gateway + MCP Server (SHIPPED 2026-04-21)
+
+The agent gateway wraps the Rust substrate with an MCP-first interface (Model Context Protocol), enabling Claude Code, GPT agents, and internal scripts to interact with the lakehouse through named tools rather than raw HTTP. Built on Bun (not Node), serving as the "front door" for all AI consumers.
+
+**Files:** `mcp-server/index.ts` (2241 lines)
+
+| Tool | Purpose | Endpoint |
+|---|---|---|
+| `search_workers` | Hybrid SQL+vector (the core) | `POST /vectors/hybrid` |
+| `query_sql` | Analytical SQL on any dataset | `POST /query/sql` |
+| `match_contract` | Find workers for a job order | `POST /match` |
+| `get_worker` | Single worker by ID | `POST /worker/:id` |
+| `rag_question` | Full RAG pipeline | `POST /vectors/rag` |
+| `log_success` | Record operation → playbook | `POST /log` |
+| `log_failure` | Record failed fill (negative signal) | `POST /log_failure` |
+| `get_playbooks` | Retrieve past successes | `GET /playbooks` |
+| `swap_profile` | Hot-swap model+data context | `POST /profile/:id` |
+| `vram_status` | GPU introspection | `GET /vram` |
+
+**HTTP Routes (internal agent consumption):**
+- `/search` — hybrid search with client blacklist + rate enrichment
+- `/verify` — claim verification against golden dataset
+- `/context` — self-orientation for agents
+- `/clients/:client/blacklist` — per-client worker exclusion
+- `/memory/query` — unified memory surface (Phase 24 refinement)
+- `/system/summary` — truthful row counts via SQL
+- `/models/matrix` — read `config/models.json`
+
+### Phase 29: Intelligence Suite (SHIPPED 2026-04-21)
+
+Market intelligence endpoints surfaced through the MCP server — real-world demand signals from public data, cross-referenced with worker bench for staffing gap analysis.
+
+**Files:** `mcp-server/index.ts` continued
+
+| Endpoint | Purpose | Data Source |
+|---|---|---|
+| `/intelligence/market` | Building permits → demand forecast | Chicago Socrata API |
+| `/intelligence/staffing_forecast` | 30-day demand by role | Chicago permits + bench |
+| `/intelligence/permit_contracts` | Permits + Phase 19 ranked candidates | Chicago permits + Workers 500K |
+| `/intelligence/activity` | Activity feed + learned patterns | successful_playbooks |
+| `/intelligence/brief` | Parallel analytics across 500K profiles | Workers 500K |
+| `/intelligence/chat` | Natural language → routed queries | Hybrid + RAG |
+
+**Market signals captured:**
+- Largest permits by cost (top 50)
+- Work type breakout (electrical, mechanical, masonry, plumbing)
+- Cross-reference with IL bench supply/available/reliable
+- Demand forecast using $150K/worker industry heuristic
+
+### Phase 30: Observer + Autotune Integration (SHIPPED 2026-04-20, commit b95dd86)
+
+The gap: `lakehouse-observer.service` wrapped MCP :3700, while `tests/multi-agent/scenario.ts` hit gateway :3100 directly. Observer idle at 0 ops across 3600+ cycles — scenarios invisible to ERROR_ANALYZER and PLAYBOOK_BUILDER, autotune running blind to outcomes.
+
+**Files:** `mcp-server/observer.ts` (335 lines)
+
+**What shipped:**
+- `observer.ts` Bun HTTP listener on `OBSERVER_PORT` (default 3800): `GET /health`, `GET /stats` (totals, by_source, recent scenario digest), `POST /event` for scenario outcomes.
+- `ObservedOp` carries provenance — `source="scenario" | "mcp"` + `staffer_id` + `sig_hash` + `event_kind` + geo + rescue flags.
+- `recordExternalOp()` — shared ring-buffer insert; main analyzer + playbook builder no longer care where the op came from.
+- `persistOp()` fix: old path POSTed to `/ingest/file?name=observed_operations` which has REPLACE semantics (wiped prior ops); now uses append-friendly write-through to `data/_observer/ops.jsonl`.
+
+**HTTP API:**
+- `GET /health` — ring size
+- `GET /stats` — totals, by_source, recent scenario ops, rescue stats
+- `POST /event` — record scenario outcomes with provenance
+
+### Phase 31: Scenario Orchestrator with Tiers (SHIPPED 2026-04-21)
+
+A day-in-the-life scenario test running six events against the live substrate. Exercises baseline_fill, recurring, expansion, emergency, misplacement. Routes through the same executor/reviewer loop as the single-task orchestrator with event-specific constraints.
+
+**Files:** `tests/multi-agent/scenario.ts` (1772 lines)
+
+**Model Matrix (configurable per-tier):**
+
+| Tier | Purpose | Model | Context Budget |
+|---|---|---|---|
+| T1 hot | 50-200 calls/scenario | qwen3.5:latest | 8K, think:false |
+| T2 review | 5-14 calls/event | qwen3:latest | 8K, think:false |
+| T3 overview | 1-3 calls/scenario | gpt-oss:120b | 32K, thinking on |
+| T4 strategic | 1-10 calls/day | gpt-oss:120b | 64K |
+| T5 gatekeeper | 1-5 calls/day | kimi-k2.6 | 128K, audit-logged |
+
+**Staffer tool_levels (Phase 23):**
+- `full` — qwen3.5 + qwen3 local + cloud T3 + cloud rescue
+- `local` — qwen3.5 + qwen3 local + local gpt-oss:20b T3 + rescue
+- `basic` — kimi-k2.5 cloud exec + qwen3 local reviewer, no rescue
+- `minimal` — kimi-k2.5 cloud exec + qwen3 local reviewer, NO T3, NO rescue
+
+**Key features:**
+- `generateContinuable()` — handles output overflow with continuation
+- `generateTreeSplit()` — handles input overflow via sharding
+- `think:false` flag — disables hidden reasoning for hot-path JSON emitters
+- Cloud executor routing — `ACTIVE_EXECUTOR_CLOUD` / `ACTIVE_REVIEWER_CLOUD` flags
+- Cloud rescue (Phase 22B) — `requestCloudRemediation()` on failure
+- KB integration at start (load recommendation) and end (index + recommend)
+
+### Phase 32: Knowledge Library + Staffer Indexing (SHIPPED 2026-04-21)
+
+Meta-layer over Phase 19 playbook_memory. Tracks which configs worked for which playbook signatures.
+
+**Files:** `tests/multi-agent/kb.ts` (600 lines)
+
+**Files under `data/_kb/`:**
+- `signatures.jsonl` — (sig_hash, embedding[], first_seen, last_seen, run_count)
+- `outcomes.jsonl` — per-run: {sig, run_id, models, ok/total, turns, citations}
+- `pathway_recommendations.jsonl` — AI-synthesized for next run
+- `error_corrections.jsonl` — detected fail→succeed pairs
+- `config_snapshots.jsonl` — history of model changes
+- `staffers.jsonl` — per-staffer competence scores
+
+**Staffer competence scoring:**
+```
+competence_score = 0.45·fill_rate + 0.20·turn_efficiency + 0.20·citation_density + 0.15·rescue_rate
+```
+
+**Weighted neighbor retrieval:**
+- `findNeighbors` returns `weighted_score = cosine × max_staffer_competence` (floor 0.3)
+- Senior playbooks rank above junior playbooks on similar scenarios
+
+### Phase 33: Validity Windows + Playbook Retirement (SHIPPED 2026-04-21)
+
+Zep 2026-era finding: temporal validity is the single highest-value memory-hygiene primitive.
+
+**What shipped:**
+- `PlaybookEntry` gained `schema_fingerprint` / `valid_until` / `retired_at` / `retirement_reason`
+- `compute_boost_for_filtered_with_role` skips retired + expired before geo/cosine ranking
+- Two retirement paths: `retire_one(id, reason)` manual, `retire_on_schema_drift(city, state, fp, reason)` batch
+- Endpoint: `POST /vectors/playbook_memory/retire`
+
+### Phase 34: Mem0 Upsert + Geo Hot Cache (SHIPPED 2026-04-21, commit 640db8c)
+
+Same-day re-seed no longer duplicates rows.
+
+**What shipped:**
+- `/seed` with `append=true` routes through `upsert_entry` — decides ADD / UPDATE / NOOP on `(operation, day, city, state)`
+- Playbook_id stays stable on UPDATE so existing citations remain valid
+- `PlaybookMemory.geo_index: HashMap<(city, state), Vec<idx>>` rebuilt on every mutation
+- O(1) geo-filtered boost queries — sub-MS at current scale
+
+### Phase 36: Gateway Seed Concurrency Fix (SHIPPED 2026-04-22)
+
+Problem: Concurrent `playbook_memory/seed` calls caused socket collisions with the Python AI sidecar. Two staffing coordinators hitting seed simultaneously → one socket error.
+
+Solution:
+- Added `embed_semaphore: Arc<Semaphore>` to `VectorState` (permits=1)
+- Seed endpoint acquires permit before embedding, releases after
+- Serializes embed calls to sidecar, eliminates collision
+
+E2E test result: 8.0/10 avg (persist=0 is expected — same day+op = NOOP, not add)
+
+### Phase 37: Hot-Swap Endpoint Hang (TODO)
+
+Problem: `POST /vectors/profile/{id}/activate` hangs for 2-4 minutes because it does heavy work synchronously:
+- Loads embeddings into memory
+- Builds HNSW indexes
+- Preloads Ollama model
+- Auto-provisions buckets
+
+Current workaround: Client uses 5-second timeout (test passes but doesn't verify hot-swap).
+
+**Required fix**: Convert to async job pattern:
+1. Endpoint accepts request, spawns background task, returns job_id immediately
+2. Client polls `GET /jobs/{job_id}` for status
+3. Progress events streamed via SSE during activation
+4. Background task updates job status on completion
+
+This is a known architectural debt item - not blocking since tests use timeout workaround.
+
+### Phase 35+: Further horizon

 - Specialized fine-tuned models per domain (staffing matcher, resume parser)
 - Video/audio transcript ingest + multimodal embeddings
- Neural re-ranker over (query, candidate, outcome) triples — only if Phase 19's statistical feedback plateaus below usable recall
+- Neural re-ranker over (query, candidate, outcome) triples — only if Phase 19's statistical feedback plateaus
 - True distributed query (DataFusion multi-node) — only if single-machine ceilings bite
- Playbook versioning (version + parent_id + retired_at) — touches gateway + catalogd + mcp-server
- Playbook board (6-phase deep_analysis applied to playbook ranking)

 ---

--- a/mcp-server/index.ts
+++ b/mcp-server/index.ts
@ -26,7 +26,9 @@ const MODE = process.env.MCP_TRANSPORT || "http"; // "stdio" or "http"
 // Active trace for the current request — set per-request in the HTTP handler
 let activeTrace: ReturnType<typeof startTrace> | null = null;

-async function api(method: string, path: string, body?: any) {
+async function api(method: string, path: string, body?: any, retries = 2) {
+  for (let attempt = 0; attempt <= retries; attempt++) {
+    try {
      const t0 = Date.now();
      const resp = await fetch(`${BASE}${path}`, {
        method,
@ -37,6 +39,18 @@ async function api(method: string, path: string, body?: any) {
      const ms = Date.now() - t0;
      let parsed: any;
      try { parsed = JSON.parse(text); } catch { parsed = { raw: text, status: resp.status }; }
+      return parsed;
+    } catch (e: any) {
+      if (attempt === retries) throw e;
+      if (e.message?.includes("socket connection was closed") || e.message?.includes("ECONNREFUSED")) {
+        await Bun.sleep(500 * (attempt + 1));
+        continue;
+      }
+      throw e;
+    }
+  }
+  throw new Error("unreachable");
+}

  // Trace the call if we have an active trace
  if (activeTrace) {
--- a/tests/multi-agent/orchestrator.ts
+++ b/tests/multi-agent/orchestrator.ts
@ -149,6 +149,7 @@ async function main() {
      const execRaw = await generate(EXECUTOR_MODEL, executorPrompt(task, log), {
        temperature: 0.2,
        max_tokens: 1200,
+        think: false,
      });
      let execAction: Action;
      try {
@ -192,6 +193,7 @@ async function main() {
      const revRaw = await generate(REVIEWER_MODEL, reviewerPrompt(task, log), {
        temperature: 0.1,
        max_tokens: 1000,
+        think: false,
      });
      let revAction: Action;
      try {
@ -259,7 +261,10 @@ async function main() {
      // the ranking signal.
      const seedContext = task.approach_hint
        ?? `${task.target_role} fill in ${task.target_city}, ${task.target_state}`;
-      const seedRes = await fetch(`${GATEWAY}/vectors/playbook_memory/seed`, {
+      let seedRes: Response | null = null;
+      for (let attempt = 0; attempt < 3; attempt++) {
+        try {
+          seedRes = await fetch(`${GATEWAY}/vectors/playbook_memory/seed`, {
            method: "POST",
            headers: { "Content-Type": "application/json" },
            body: JSON.stringify({
@ -270,9 +275,15 @@ async function main() {
              append: true,
            }),
          });
+          break;
+        } catch (e) {
+          if (attempt === 2) throw e;
+          await Bun.sleep(1000 * (attempt + 1));
+        }
+      }
      if (seedRes.ok) {
        const j = await seedRes.json() as any;
-        console.log(`  ↳ playbook_memory seeded: id=${j.playbook_id} entries=${j.entries_after}`);
+        console.log(`  ↳ playbook_memory seeded: id=${j.outcome?.playbook_id ?? j.playbook_id} entries=${j.entries_after}`);
      } else {
        console.warn(`  ↳ playbook_memory seed failed: ${seedRes.status} ${await seedRes.text()}`);
      }
--- a/tests/multi-agent/run_e2e_rated.ts
+++ b/tests/multi-agent/run_e2e_rated.ts
@ -75,7 +75,7 @@ async function runOrchestrator(task: TaskSpec, prefix: string): Promise<RunResul
      turn += 1;

      // Executor
-      const execRaw = await generate(EXECUTOR_MODEL, executorPrompt(task, log), { temperature: 0.2, max_tokens: 1200 });
+      const execRaw = await generate(EXECUTOR_MODEL, executorPrompt(task, log), { temperature: 0.2, max_tokens: 1200, think: false });
      const execAction = parseAction(execRaw, "executor");
      append({ turn, role: "executor", model: EXECUTOR_MODEL, kind: execAction.kind as any, content: execAction });

@ -95,7 +95,7 @@ async function runOrchestrator(task: TaskSpec, prefix: string): Promise<RunResul
      }

      // Reviewer
-      const revRaw = await generate(REVIEWER_MODEL, reviewerPrompt(task, log), { temperature: 0.1, max_tokens: 1000 });
+      const revRaw = await generate(REVIEWER_MODEL, reviewerPrompt(task, log), { temperature: 0.1, max_tokens: 1000, think: false });
      const revAction = parseAction(revRaw, "reviewer");
      append({ turn, role: "reviewer", model: REVIEWER_MODEL, kind: "critique", content: revAction });

@ -116,31 +116,6 @@ async function runOrchestrator(task: TaskSpec, prefix: string): Promise<RunResul

    if (!sealed) throw new Error(`no consensus after ${MAX_TURNS} turns`);

-    // Phase 19 write-through: seed playbook_memory so the next semantically
-    // similar query benefits from this fill. Mirrors orchestrator.ts. Names
-    // are the consensus fills' display names — that's what the boost-key
-    // matcher (city, state, name) will look up against worker chunks.
-    try {
-      const seedRes = await fetch(`${GATEWAY}/vectors/playbook_memory/seed`, {
-        method: "POST", headers: { "Content-Type": "application/json" },
-        body: JSON.stringify({
-          operation: task.operation,
-          approach: sealed.approach || "multi-agent → hybrid search",
-          context: task.approach_hint ?? `${task.target_role} fill in ${task.target_city}, ${task.target_state}`,
-          endorsed_names: sealed.fills.map(f => f.name),
-          append: true,
-        }),
-      });
-      if (!seedRes.ok) {
-        console.warn(`[${prefix}] seed warning: ${seedRes.status} ${await seedRes.text()}`);
-      } else {
-        const j = await seedRes.json() as any;
-        console.log(`[${prefix}] ↳ seeded playbook_memory: id=${j.playbook_id} entries=${j.entries_after}`);
-      }
-    } catch (e) {
-      console.warn(`[${prefix}] seed errored: ${(e as Error).message}`);
-    }
-
    return {
      task, ok: true, turns: turn, fills: sealed.fills, approach: sealed.approach,
      duration_secs: Math.round((Date.now() - start) / 1000), log,
@ -344,6 +319,39 @@ async function main() {
    throw new Error(`both orchestrators failed — substrate or models in bad state`);
  }

+  // Sequential seed after parallel runs — avoids concurrent embed socket collision
+  async function seedTask(res: RunResult, prefix: string) {
+    if (!res.ok || res.fills.length === 0) return;
+    for (let attempt = 0; attempt < 3; attempt++) {
+      try {
+        const r = await fetch(`${GATEWAY}/vectors/playbook_memory/seed`, {
+          method: "POST", headers: { "Content-Type": "application/json" },
+          body: JSON.stringify({
+            operation: res.task.operation,
+            approach: res.approach || "multi-agent → hybrid search",
+            context: res.task.approach_hint ?? `${res.task.target_role} fill in ${res.task.target_city}, ${res.task.target_state}`,
+            endorsed_names: res.fills.map(f => f.name),
+            append: true,
+          }),
+        });
+        if (r.ok) {
+          const j = await r.json() as any;
+          console.log(`[${prefix}] ↳ seeded playbook_memory: id=${j.outcome?.playbook_id ?? j.playbook_id} entries=${j.entries_after}`);
+        } else {
+          console.warn(`[${prefix}] seed warning: ${r.status} ${await r.text()}`);
+        }
+        return;
+      } catch (e) {
+        if (attempt === 2) { console.warn(`[${prefix}] seed errored: ${(e as Error).message}`); return; }
+        await Bun.sleep(1000 * (attempt + 1));
+      }
+    }
+  }
+
+  await seedTask(resA, "A");
+  await Bun.sleep(5000);
+  await seedTask(resB, "B");
+
  const statsMid = await fetchMemoryStats();
  console.log(`\n▶ playbook_memory after both runs: ${statsMid.entries} entries (+${statsMid.entries - statsBefore.entries})\n`);