lakehouse/crates/vectord/src/service.rs

use axum::{
    Json, Router,
    extract::{Path, Query, State},
    http::StatusCode,
    response::IntoResponse,
    routing::{get, post},
};
use object_store::ObjectStore;
use serde::{Deserialize, Serialize};
use serde_json::json;
use std::sync::Arc;

use aibridge::client::{AiClient, EmbedRequest, GenerateRequest};
use catalogd::registry::Registry as CatalogRegistry;
use storaged::registry::BucketRegistry;
use crate::{agent, autotune, chunker, embedding_cache, harness, hnsw, index_registry, jobs, lance_backend, pathway_memory, playbook_memory, promotion, rag, refresh, search, store, supervisor, trial};
use tokio::sync::Semaphore;

#[derive(Clone)]
pub struct VectorState {
    pub store: Arc<dyn ObjectStore>,
    pub ai_client: AiClient,
    pub job_tracker: jobs::JobTracker,
    pub index_registry: index_registry::IndexRegistry,
    pub hnsw_store: hnsw::HnswStore,
    pub embedding_cache: embedding_cache::EmbeddingCache,
    pub trial_journal: trial::TrialJournal,
    /// Federation-aware harness store — resolves eval artifacts to each
    /// index's recorded bucket, falling back to primary for legacy evals.
    pub harness_store: harness::HarnessStore,
    /// Catalog registry — needed by the Phase C refresh path to mark/clear
    /// staleness and look up dataset manifests.
    pub catalog: CatalogRegistry,
    /// Phase 16: promoted HNSW configs. Activation + autotune read/write here.
    pub promotion_registry: promotion::PromotionRegistry,
    /// Phase 16.2: handle to the background autotune agent. Always
    /// present — if the agent is disabled in config, the handle drops
    /// incoming triggers silently.
    pub agent_handle: agent::AgentHandle,
    /// Phase B (federation layer 2): bucket registry for per-profile
    /// bucket auto-provisioning on activation.
    pub bucket_registry: Arc<BucketRegistry>,
    /// Phase C (two-profile VRAM gate): tracks which profile is currently
    /// "active" on the GPU. Singleton — one profile at a time holds its
    /// model in VRAM. Swapping profiles with different ollama_name unloads
    /// the previous one (keep_alive=0) before preloading the new one.
    ///
    /// `None` = no profile has been activated this session; any first
    /// activation just preloads and takes the slot.
    pub active_profile: Arc<tokio::sync::RwLock<Option<ActiveProfileSlot>>>,
    /// ADR-019 hybrid: handles to Lance datasets keyed by index name.
    /// Lazy-created on first /vectors/lance/* call.
    pub lance: lance_backend::LanceRegistry,
    /// Phase 19 — meta-index feedback. Embeds past successful_playbooks
    /// and, when `use_playbook_memory` is set on /vectors/hybrid, boosts
    /// workers that were actually filled in semantically-similar past ops.
    pub playbook_memory: playbook_memory::PlaybookMemory,
    /// Pathway memory — consensus-designed sidecar for full-context
    /// backtracking + hot-swap of successful review pathways. See
    /// crates/vectord/src/pathway_memory.rs for the design rationale
    /// (10-probe N=3 ensemble, locked 2026-04-24).
    pub pathway_memory: pathway_memory::PathwayMemory,
    /// Serializes embed calls from seed_playbook_memory to avoid
    /// concurrent socket collisions with the Python sidecar.
    pub embed_semaphore: Arc<Semaphore>,
}

/// What the active-profile singleton records. Narrow — we don't need the
/// full ModelProfile here, just enough to know what to unload on swap.
#[derive(Debug, Clone, Serialize)]
pub struct ActiveProfileSlot {
    pub profile_id: String,
    pub ollama_name: String,
    pub activated_at: chrono::DateTime<chrono::Utc>,
}

pub fn router(state: VectorState) -> Router {
    Router::new()
        .route("/health", get(health))
        .route("/index", post(create_index))
        .route("/indexes", get(list_indexes))
        .route("/indexes/{name}", get(get_index_meta))
        .route("/indexes/{name}/bucket", axum::routing::patch(migrate_index_bucket))
        .route("/jobs", get(list_jobs))
        .route("/jobs/{id}", get(get_job))
        // PRD Phase 41 alias — docs/CONTROL_PLANE_PRD.md specifies
        // GET /vectors/profile/jobs/{id} for polling profile activations.
        // Same handler as /jobs/{id}; the alias just matches the PRD URL.
        .route("/profile/jobs/{id}", get(get_job))
        .route("/search", post(search_index))
        .route("/rag", post(rag_query))
        .route("/hybrid", post(hybrid_search))
        .route("/hnsw/build", post(build_hnsw))
        .route("/hnsw/search", post(search_hnsw))
        .route("/hnsw/list", get(list_hnsw))
        // Trial system — parameterized tuning loop
        .route("/hnsw/trial", post(run_trial))
        .route("/hnsw/trials/{index_name}", get(list_trials))
        .route("/hnsw/trials/{index_name}/best", get(best_trial))
        // Eval sets
        .route("/hnsw/evals", get(list_evals))
        .route("/hnsw/evals/{name}", get(get_eval).put(put_eval))
        .route("/hnsw/evals/{name}/autogen", post(autogen_eval))
        // Cache management
        .route("/hnsw/cache/stats", get(cache_stats))
        .route("/hnsw/cache/{index_name}", axum::routing::delete(cache_evict))
        // Phase C: embedding refresh
        .route("/refresh/{dataset_name}", post(refresh_dataset))
        .route("/stale", get(list_stale))
        // Phase 17: profile activation — pre-load caches + HNSW for this
        // model's bound data. First search after activate is warm.
        .route("/profile/{id}/activate", post(activate_profile))
        .route("/profile/{id}/deactivate", post(deactivate_profile))
        .route("/profile/{id}/search", post(profile_scoped_search))
        // Phase 17 VRAM gate: which profile currently owns the GPU?
        .route("/profile/active", get(get_active_profile))
        // Phase 16: promotion + autotune
        .route("/hnsw/promote/{index}/{trial_id}", post(promote_trial))
        .route("/hnsw/rollback/{index}", post(rollback_promotion))
        .route("/hnsw/promoted/{index}", get(get_promoted))
        .route("/hnsw/autotune", post(run_autotune_endpoint))
        // Phase 16.2: background autotune agent
        .route("/agent/status", get(agent_status))
        .route("/agent/stop", post(agent_stop))
        .route("/agent/enqueue/{index_name}", post(agent_enqueue))
        // ADR-019: Lance hybrid backend
        .route("/lance/migrate/{index_name}", post(lance_migrate))
        .route("/lance/index/{index_name}", post(lance_build_index))
        .route("/lance/search/{index_name}", post(lance_search))
        .route("/lance/doc/{index_name}/{doc_id}", get(lance_get_doc))
        .route("/lance/append/{index_name}", post(lance_append))
        .route("/lance/stats/{index_name}", get(lance_stats))
        .route("/lance/scalar-index/{index_name}/{column}", post(lance_build_scalar_index))
        .route("/lance/recall/{index_name}", post(lance_recall_harness))
        // Phase 19: playbook memory — the meta-index feedback loop
        .route("/playbook_memory/rebuild", post(rebuild_playbook_memory))
        .route("/playbook_memory/stats", get(playbook_memory_stats))
        .route("/playbook_memory/seed", post(seed_playbook_memory))
        .route("/playbook_memory/persist_sql", post(persist_playbook_memory_sql))
        .route("/playbook_memory/patterns", post(discover_playbook_patterns))
        .route("/playbook_memory/mark_failed", post(mark_playbook_failed))
        .route("/playbook_memory/retire", post(retire_playbook_memory))
        .route("/playbook_memory/revise", post(revise_playbook_memory))
        .route("/playbook_memory/history/{id}", get(playbook_memory_history))
        .route("/playbook_memory/status", get(playbook_memory_status))
        // Phase 45 slice 3 — doc drift detection + human re-admission.
        .route("/playbook_memory/doc_drift/check/{id}", post(check_doc_drift))
        .route("/playbook_memory/doc_drift/resolve/{id}", post(resolve_doc_drift))
        // Phase 45 closure (2026-04-27) — batch scan across all active
        // playbooks. Operator runs this on a schedule (cron or manual);
        // each newly-detected drift writes a row to
        // data/_kb/doc_drift_corrections.jsonl for downstream review.
        .route("/playbook_memory/doc_drift/scan", post(scan_doc_drift))
        // Pathway memory — consensus-designed sidecar (2026-04-24).
        // scrum_master_pipeline POSTs /pathway/insert at the end of each
        // review, calls /pathway/query before running the ladder for a
        // potential hot-swap, and posts /pathway/record_replay after a
        // hot-swap succeeds or fails.
        .route("/pathway/insert", post(pathway_insert))
        .route("/pathway/query", post(pathway_query))
        .route("/pathway/record_replay", post(pathway_record_replay))
        .route("/pathway/stats", get(pathway_stats))
        // ADR-021 Phase C: pre-review bug-fingerprint retrieval.
        .route("/pathway/bug_fingerprints", post(pathway_bug_fingerprints))
        // Mem0 ops (J 2026-04-25): upsert/retire/revise/history.
        .route("/pathway/upsert", post(pathway_upsert))
        .route("/pathway/retire", post(pathway_retire))
        .route("/pathway/revise", post(pathway_revise))
        .route("/pathway/history/{trace_uid}", get(pathway_history))
        .with_state(state)
}

async fn health() -> &'static str {
    "vectord ok"
}

// --- Background Index Creation ---

#[derive(Deserialize)]
struct CreateIndexRequest {
    index_name: String,
    source: String,
    documents: Vec<DocInput>,
    chunk_size: Option<usize>,
    overlap: Option<usize>,
    /// Federation layer 2: optional bucket to hold this index's trial
    /// journal + promotion file. Defaults to "primary" — pre-existing
    /// clients that don't know about federation keep working unchanged.
    #[serde(default)]
    bucket: Option<String>,
}

#[derive(Deserialize)]
struct DocInput {
    id: String,
    text: String,
}

#[derive(Serialize)]
struct CreateIndexResponse {
    job_id: String,
    index_name: String,
    documents: usize,
    chunks: usize,
    message: String,
}

async fn create_index(
    State(state): State<VectorState>,
    Json(req): Json<CreateIndexRequest>,
) -> impl IntoResponse {
    let chunk_size = req.chunk_size.unwrap_or(500);
    let overlap = req.overlap.unwrap_or(50);

    // Chunk synchronously (fast)
    let doc_ids: Vec<String> = req.documents.iter().map(|d| d.id.clone()).collect();
    let texts: Vec<String> = req.documents.iter().map(|d| d.text.clone()).collect();
    let chunks = chunker::chunk_column(&req.source, &doc_ids, &texts, chunk_size, overlap);

    if chunks.is_empty() {
        return Err((StatusCode::BAD_REQUEST, "no text to index".to_string()));
    }

    let n_docs = req.documents.len();
    let n_chunks = chunks.len();
    let index_name = req.index_name.clone();
    let bucket = req.bucket.clone().unwrap_or_else(|| "primary".to_string());

    // Create job and return immediately
    let job_id = state.job_tracker.create_embed(&index_name, n_chunks).await;
    tracing::info!("job {job_id}: indexing '{}' — {} docs → {} chunks (background)", index_name, n_docs, n_chunks);

    // Spawn supervised dual-pipeline embedding
    let tracker = state.job_tracker.clone();
    let ai_client = state.ai_client.clone();
    let obj_store = state.store.clone();
    let registry = state.index_registry.clone();
    let jid = job_id.clone();
    let source_name = req.source.clone();
    let idx_name = req.index_name.clone();

    tokio::spawn(async move {
        let start_time = std::time::Instant::now();
        let config = supervisor::SupervisorConfig::default();
        let result = supervisor::run_supervised(
            &jid, &idx_name, chunks, &ai_client, &obj_store, &tracker, config,
        ).await;
        match result {
            Ok(key) => {
                let elapsed = start_time.elapsed().as_secs_f32();
                let rate = if elapsed > 0.0 { n_chunks as f32 / elapsed } else { 0.0 };

                // Register index metadata with model version info
                let meta = index_registry::IndexMeta {
                    index_name: idx_name.clone(),
                    source: source_name,
                    model_name: "nomic-embed-text".to_string(), // from sidecar config
                    model_version: "latest".to_string(),
                    dimensions: 768,
                    chunk_count: n_chunks,
                    doc_count: n_docs,
                    chunk_size: chunk_size,
                    overlap: overlap,
                    storage_key: key.clone(),
                    created_at: chrono::Utc::now(),
                    build_time_secs: elapsed,
                    chunks_per_sec: rate,
                    bucket: bucket.clone(),
                    vector_backend: shared::types::VectorBackend::Parquet,
                    id_prefix: None,
                    last_used: None,
                    build_signature: None,
                };
                let _ = registry.register(meta).await;

                tracker.complete(&jid, Some(json!({ "storage_key": key }))).await;
                tracing::info!("job {jid}: completed — {n_chunks} chunks in {elapsed:.0}s ({rate:.0}/sec)");
            }
            Err(e) => {
                tracker.fail(&jid, e.clone()).await;
                tracing::error!("job {jid}: failed — {e}");
            }
        }
    });

    Ok((StatusCode::ACCEPTED, Json(CreateIndexResponse {
        job_id,
        index_name: req.index_name,
        documents: n_docs,
        chunks: n_chunks,
        message: format!("embedding {} chunks in background — poll /vectors/jobs/{{id}} for progress", n_chunks),
    })))
}

// --- Index Registry ---

#[derive(Deserialize)]
struct IndexListQuery {
    source: Option<String>,
    model: Option<String>,
}

async fn list_indexes(
    State(state): State<VectorState>,
    Query(q): Query<IndexListQuery>,
) -> impl IntoResponse {
    let indexes = state.index_registry.list(q.source.as_deref(), q.model.as_deref()).await;
    Json(indexes)
}

async fn get_index_meta(
    State(state): State<VectorState>,
    Path(name): Path<String>,
) -> impl IntoResponse {
    match state.index_registry.get(&name).await {
        Some(meta) => Ok(Json(meta)),
        None => Err((StatusCode::NOT_FOUND, format!("index not found: {name}"))),
    }
}

#[derive(Deserialize)]
struct MigrateBucketRequest {
    dest_bucket: String,
    /// If true, delete artifacts from the source bucket after the pointer
    /// flip. Default false — keeping source copies means a failed migration
    /// is recoverable by editing IndexMeta.bucket back, and a successful
    /// migration leaves inspectable forensics until an operator sweeps.
    #[serde(default)]
    delete_source: bool,
}

#[derive(Serialize)]
struct MigrateBucketReport {
    index_name: String,
    source_bucket: String,
    dest_bucket: String,
    /// Artifact keys that were copied (or attempted). Order follows copy order.
    copied: Vec<String>,
    /// Artifact prefixes that had nothing to copy (optional files missing,
    /// trial journal empty, etc).
    skipped: Vec<String>,
    /// Subset of `copied` that was subsequently deleted from the source.
    deleted_source: Vec<String>,
    duration_secs: f32,
}

/// Move an index's artifacts from its current bucket to `dest_bucket`.
/// Parquet-backed indexes only — Lance migration needs URI rewriting that
/// isn't in scope for this endpoint. Copies the vector data, trial journal,
/// promotion file, and auto-generated harness; updates `IndexMeta.bucket`
/// last so a mid-flight failure leaves the index still usable at its
/// original location. Evicts the `EmbeddingCache` entry so the next load
/// re-reads from the new bucket.
async fn migrate_index_bucket(
    State(state): State<VectorState>,
    Path(name): Path<String>,
    Json(req): Json<MigrateBucketRequest>,
) -> Result<Json<MigrateBucketReport>, (StatusCode, String)> {
    let t0 = std::time::Instant::now();

    let mut meta = state
        .index_registry
        .get(&name)
        .await
        .ok_or_else(|| (StatusCode::NOT_FOUND, format!("index '{name}' not found")))?;

    if meta.vector_backend == shared::types::VectorBackend::Lance {
        return Err((
            StatusCode::BAD_REQUEST,
            "Lance-backed indexes cannot be migrated via this endpoint — \
             Lance URIs are bucket-specific; a separate migrate_lance tool \
             is needed".into(),
        ));
    }

    if !state.bucket_registry.contains(&req.dest_bucket) {
        return Err((
            StatusCode::BAD_REQUEST,
            format!("dest bucket '{}' not registered", req.dest_bucket),
        ));
    }

    let source_bucket = meta.bucket.clone();
    if source_bucket == req.dest_bucket {
        return Err((
            StatusCode::BAD_REQUEST,
            format!("source and dest are both '{source_bucket}' — nothing to migrate"),
        ));
    }

    let src = state
        .bucket_registry
        .get(&source_bucket)
        .map_err(|e| (StatusCode::INTERNAL_SERVER_ERROR, e))?;
    let dst = state
        .bucket_registry
        .get(&req.dest_bucket)
        .map_err(|e| (StatusCode::INTERNAL_SERVER_ERROR, e))?;

    let mut copied: Vec<String> = Vec::new();
    let mut skipped: Vec<String> = Vec::new();

    // 1. Vector data (single parquet file for this backend).
    copy_key(&src, &dst, &meta.storage_key)
        .await
        .map_err(|e| {
            (StatusCode::INTERNAL_SERVER_ERROR,
             format!("copy {}: {e}", meta.storage_key))
        })?;
    copied.push(meta.storage_key.clone());

    // 2. Trial journal batches — per-index directory of JSONL files.
    let trial_prefix = format!("_hnsw_trials/{name}/");
    let trial_keys = storaged::ops::list(&src, Some(&trial_prefix))
        .await
        .unwrap_or_default();
    if trial_keys.is_empty() {
        skipped.push(trial_prefix);
    }
    for k in &trial_keys {
        copy_key(&src, &dst, k)
            .await
            .map_err(|e| (StatusCode::INTERNAL_SERVER_ERROR, format!("copy {k}: {e}")))?;
        copied.push(k.clone());
    }

    // 3. Promotion file (optional — absent for never-promoted indexes).
    let promo_key = format!("_hnsw_promotions/{name}.json");
    match copy_key(&src, &dst, &promo_key).await {
        Ok(()) => copied.push(promo_key),
        Err(_) => skipped.push(promo_key),
    }

    // 4. Auto-generated harness (optional — absent if agent never ran).
    let harness_key = format!("_hnsw_evals/{name}_auto.json");
    match copy_key(&src, &dst, &harness_key).await {
        Ok(()) => copied.push(harness_key),
        Err(_) => skipped.push(harness_key),
    }

    // 5. Pointer flip — IndexMeta.bucket now points at destination. This
    // is the commit point; earlier failures leave copies in dest but the
    // index still usable at source.
    meta.bucket = req.dest_bucket.clone();
    state
        .index_registry
        .register(meta)
        .await
        .map_err(|e| (StatusCode::INTERNAL_SERVER_ERROR, format!("update meta: {e}")))?;

    // 6. Cache eviction — next load reads the new bucket's parquet.
    state.embedding_cache.evict(&name).await;

    // 7. Optional source cleanup.
    let mut deleted_source: Vec<String> = Vec::new();
    if req.delete_source {
        for k in &copied {
            if storaged::ops::delete(&src, k).await.is_ok() {
                deleted_source.push(k.clone());
            }
        }
    }

    Ok(Json(MigrateBucketReport {
        index_name: name,
        source_bucket,
        dest_bucket: req.dest_bucket,
        copied,
        skipped,
        deleted_source,
        duration_secs: t0.elapsed().as_secs_f32(),
    }))
}

/// Stream a single object from one bucket to another. Uses the existing
/// `storaged::ops` get + put primitives — no native copy in object_store
/// across heterogeneous backends (local ↔ S3), so an in-memory hop is
/// unavoidable. Bounded by individual object size, which for our parquet
/// + jsonl artifacts tops out around a few hundred MB.
async fn copy_key(
    src: &Arc<dyn ObjectStore>,
    dst: &Arc<dyn ObjectStore>,
    key: &str,
) -> Result<(), String> {
    let data = storaged::ops::get(src, key).await?;
    storaged::ops::put(dst, key, data).await
}

// --- Job Status ---

async fn list_jobs(State(state): State<VectorState>) -> impl IntoResponse {
    let jobs = state.job_tracker.list().await;
    Json(jobs)
}

async fn get_job(
    State(state): State<VectorState>,
    Path(id): Path<String>,
) -> impl IntoResponse {
    match state.job_tracker.get(&id).await {
        Some(job) => Ok(Json(job)),
        None => Err((StatusCode::NOT_FOUND, format!("job not found: {id}"))),
    }
}

// --- Search ---

#[derive(Deserialize)]
struct SearchRequest {
    index_name: String,
    query: String,
    top_k: Option<usize>,
}

#[derive(Serialize)]
struct SearchResponse {
    results: Vec<search::SearchResult>,
    query: String,
}

async fn search_index(
    State(state): State<VectorState>,
    Json(req): Json<SearchRequest>,
) -> impl IntoResponse {
    let top_k = req.top_k.unwrap_or(5);

    let embed_resp = state.ai_client.embed(EmbedRequest {
        texts: vec![req.query.clone()],
        model: None,
    }).await.map_err(|e| (StatusCode::BAD_GATEWAY, format!("embed error: {e}")))?;

    if embed_resp.embeddings.is_empty() {
        return Err((StatusCode::BAD_GATEWAY, "no embedding returned".to_string()));
    }

    let query_vec: Vec<f32> = embed_resp.embeddings[0].iter().map(|&x| x as f32).collect();

    let embeddings = store::load_embeddings(&state.store, &req.index_name)
        .await
        .map_err(|e| (StatusCode::NOT_FOUND, format!("index not found: {e}")))?;

    let results = search::search(&query_vec, &embeddings, top_k);

    Ok(Json(SearchResponse {
        results,
        query: req.query,
    }))
}

// --- RAG ---

#[derive(Deserialize)]
struct RagRequest {
    index_name: String,
    question: String,
    top_k: Option<usize>,
}

async fn rag_query(
    State(state): State<VectorState>,
    Json(req): Json<RagRequest>,
) -> impl IntoResponse {
    let top_k = req.top_k.unwrap_or(5);

    match rag::query(&req.question, &req.index_name, top_k, &state.store, &state.ai_client).await {
        Ok(resp) => Ok(Json(resp)),
        Err(e) => Err((StatusCode::INTERNAL_SERVER_ERROR, e)),
    }
}

// --- Hybrid SQL+Vector Search ---
//
// The fix for the core RAG gap: vector search alone can't do structured
// filtering (state, role, reliability threshold). SQL alone can't do
// semantic similarity ("who could handle this kind of work"). Hybrid
// does both: SQL narrows to structurally-valid candidates, vector
// ranks them by semantic relevance, LLM generates from verified context.

#[derive(Deserialize)]
struct HybridRequest {
    /// Natural language question — used for embedding + LLM generation.
    question: String,
    /// Vector index to search against.
    index_name: String,
    /// SQL WHERE clause to pre-filter. Applied against the index's source
    /// dataset. Example: "state = 'IL' AND reliability > 0.8"
    /// Safety: runs through DataFusion's parser so injection is bounded
    /// by what DataFusion accepts (no DDL, no writes).
    #[serde(default)]
    sql_filter: Option<String>,
    /// Dataset to run the SQL filter against. Defaults to the index's
    /// source if omitted.
    #[serde(default)]
    filter_dataset: Option<String>,
    /// Column in the SQL result that maps to the vector index's doc_id.
    /// Default: "worker_id" (for the Ethereal dataset) or "candidate_id".
    #[serde(default)]
    id_column: Option<String>,
    #[serde(default = "default_top_k")]
    top_k: usize,
    /// If true, generate an LLM answer from the matched context.
    /// If false, just return the ranked matches (faster, no Ollama gen).
    #[serde(default = "default_true")]
    generate: bool,
    /// Phase 19: consult `playbook_memory` and boost workers that past
    /// similar playbooks successfully filled. Off by default so current
    /// callers keep deterministic ranking; opt-in unlocks the feedback.
    #[serde(default)]
    use_playbook_memory: bool,
    /// Number of past playbooks to consider when `use_playbook_memory`
    /// is on. Ignored otherwise. Defaults to 5.
    #[serde(default)]
    playbook_memory_k: Option<usize>,
}

fn default_true() -> bool { true }

#[derive(serde::Serialize)]
struct HybridResponse {
    question: String,
    sql_filter: Option<String>,
    sql_matches: usize,
    vector_reranked: usize,
    method: String,
    answer: Option<String>,
    sources: Vec<HybridSource>,
    duration_ms: u64,
}

#[derive(serde::Serialize)]
struct HybridSource {
    doc_id: String,
    chunk_text: String,
    score: f32,
    sql_verified: bool,
    /// Phase 19: how much the playbook_memory boost lifted this hit's
    /// score. 0.0 when `use_playbook_memory=false` or no past playbook
    /// endorsed this worker.
    #[serde(default, skip_serializing_if = "is_zero")]
    playbook_boost: f32,
    /// playbook_ids whose endorsement contributed to `playbook_boost`.
    #[serde(default, skip_serializing_if = "Vec::is_empty")]
    playbook_citations: Vec<String>,
}

fn is_zero(x: &f32) -> bool { x.abs() < 1e-6 }

async fn hybrid_search(
    State(state): State<VectorState>,
    Json(req): Json<HybridRequest>,
) -> impl IntoResponse {
    let t0 = std::time::Instant::now();

    // Step 1: If SQL filter provided, run it to get the set of valid IDs.
    let valid_ids: Option<std::collections::HashSet<String>> = if let Some(ref filter) = req.sql_filter {
        let index_meta = state.index_registry.get(&req.index_name).await;
        let dataset = req.filter_dataset.clone()
            .or_else(|| index_meta.map(|m| m.source.clone()))
            .unwrap_or_else(|| req.index_name.clone());
        let id_col = req.id_column.clone().unwrap_or_else(|| "worker_id".into());

        let sql = format!("SELECT CAST({id_col} AS VARCHAR) AS id FROM {dataset} WHERE {filter}");
        tracing::info!("hybrid: SQL filter → {sql}");

        // Use queryd through the catalog — same engine as /query/sql
        // Use the query engine to get JSON rows — avoids Arrow type
        // wrangling across DataFusion's Utf8View/StringViewArray variants.
        let engine = queryd::context::QueryEngine::new(
            state.catalog.clone(),
            state.bucket_registry.clone(),
            queryd::cache::MemCache::new(0),
        );
        match engine.query(&sql).await {
            Ok(batches) => {
                use arrow::array::{Array, AsArray};
                let mut ids = std::collections::HashSet::new();
                for batch in &batches {
                    if let Some(col) = batch.column_by_name("id") {
                        // DataFusion CAST(x AS VARCHAR) → StringViewArray.
                        // Try StringView first, then String, then Int.
                        if let Some(arr) = col.as_string_view_opt() {
                            for i in 0..arr.len() {
                                if !arr.is_null(i) { ids.insert(arr.value(i).to_string()); }
                            }
                        } else if let Some(arr) = col.as_string_opt::<i32>() {
                            for i in 0..arr.len() {
                                if !arr.is_null(i) { ids.insert(arr.value(i).to_string()); }
                            }
                        } else {
                            // Fallback: try as Int32/Int64 (if CAST didn't happen)
                            if let Some(arr) = col.as_any().downcast_ref::<arrow::array::Int32Array>() {
                                for i in 0..arr.len() {
                                    if !arr.is_null(i) { ids.insert(arr.value(i).to_string()); }
                                }
                            } else if let Some(arr) = col.as_any().downcast_ref::<arrow::array::Int64Array>() {
                                for i in 0..arr.len() {
                                    if !arr.is_null(i) { ids.insert(arr.value(i).to_string()); }
                                }
                            }
                        }
                    }
                }
                tracing::info!("hybrid: SQL filter returned {} IDs", ids.len());
                if ids.is_empty() { None } else { Some(ids) }
            }
            Err(e) => {
                return Err((StatusCode::BAD_REQUEST, format!("SQL filter error: {e}")));
            }
        }
    } else {
        None
    };

    // Step 2: Vector search — embed question, search index.
    let embed_resp = state.ai_client
        .embed(EmbedRequest { texts: vec![req.question.clone()], model: None })
        .await
        .map_err(|e| (StatusCode::BAD_GATEWAY, format!("embed: {e}")))?;
    if embed_resp.embeddings.is_empty() {
        return Err((StatusCode::BAD_GATEWAY, "no embedding".into()));
    }
    let qv: Vec<f32> = embed_resp.embeddings[0].iter().map(|&x| x as f32).collect();

    // When SQL-filtered: use brute-force cosine over all embeddings,
    // then filter by SQL IDs, then take top_k. HNSW's ef_search caps
    // results at ~30, which is too few to reliably intersect with
    // narrow SQL filters. Brute-force on 10K vectors is ~50ms — fast
    // enough for the hybrid path. Without SQL filter, use HNSW normally.
    let all_results = if valid_ids.is_some() {
        // Brute-force path: score ALL vectors, filter by SQL IDs later.
        let embeddings = store::load_embeddings(&state.store, &req.index_name).await
            .map_err(|e| (StatusCode::NOT_FOUND, format!("load embeddings: {e}")))?;
        search::search(&qv, &embeddings, embeddings.len()) // score everything
    } else if state.hnsw_store.has_index(&req.index_name).await {
        state.hnsw_store.search(&req.index_name, &qv, req.top_k).await
            .map_err(|e| (StatusCode::INTERNAL_SERVER_ERROR, e))?
            .into_iter()
            .map(|h| search::SearchResult {
                doc_id: h.doc_id,
                chunk_text: h.chunk_text,
                score: h.score,
                source: h.source,
                chunk_idx: h.chunk_idx as u32,
            })
            .collect::<Vec<_>>()
    } else {
        let embeddings = store::load_embeddings(&state.store, &req.index_name).await
            .map_err(|e| (StatusCode::NOT_FOUND, format!("load embeddings: {e}")))?;
        search::search(&qv, &embeddings, req.top_k)
    };

    // Step 3: Filter vector results to only SQL-verified IDs.
    // ADR-020: read the index's id_prefix from the catalog instead of
    // hardcoding prefix stripping. Falls back to heuristic for legacy indexes.
    let id_prefix: Option<String> = state.index_registry
        .get(&req.index_name).await
        .and_then(|m| m.id_prefix.clone());

    let sql_count = valid_ids.as_ref().map(|s| s.len()).unwrap_or(0);
    // Phase 19: when playbook_memory is consulted, pull a wider candidate
    // pool so endorsed workers outside the vanilla top-K can still be
    // boosted into visibility. 5× is a conservative multiplier — plenty
    // for a +0.25 boost to flip rankings without dragging the cost up.
    let fetch_k = if req.use_playbook_memory { req.top_k * 5 } else { req.top_k };
    let filtered: Vec<search::SearchResult> = if let Some(ref ids) = valid_ids {
        all_results.into_iter()
            .filter(|r| {
                let raw_id = if let Some(ref prefix) = id_prefix {
                    r.doc_id.strip_prefix(prefix.as_str()).unwrap_or(&r.doc_id)
                } else {
                    // Legacy: heuristic strip for pre-ADR-020 indexes
                    r.doc_id.strip_prefix("W500K-")
                        .or_else(|| r.doc_id.strip_prefix("W500-"))
                        .or_else(|| r.doc_id.strip_prefix("W5K-"))
                        .or_else(|| r.doc_id.strip_prefix("W-"))
                        .or_else(|| r.doc_id.strip_prefix("CAND-"))
                        .unwrap_or(&r.doc_id)
                };
                ids.contains(raw_id)
            })
            .take(fetch_k)
            .collect()
    } else {
        all_results.into_iter().take(fetch_k).collect()
    };

    // Step 4: Build sources with SQL-verified flag.
    let mut sources: Vec<HybridSource> = filtered.iter().map(|r| HybridSource {
        doc_id: r.doc_id.clone(),
        chunk_text: r.chunk_text.clone(),
        score: r.score,
        sql_verified: valid_ids.is_some(),
        playbook_boost: 0.0,
        playbook_citations: Vec::new(),
    }).collect();

    // Step 4b (Phase 19): if use_playbook_memory, look up semantically
    // similar past playbooks and boost workers they endorsed. Name-match
    // is on the tuple (city, state, name) extracted from chunk_text —
    // hybrid_search's SQL filter already narrowed to one city+state, so
    // this just needs to check the name against each playbook's endorsed
    // set. Additive boost on the existing vector score, then re-sort.
    if req.use_playbook_memory {
        let boost_k = req.playbook_memory_k.unwrap_or(playbook_memory::DEFAULT_TOP_K_PLAYBOOKS);
        // Extract target (city, state, role) from the SQL filter so
        // compute_boost_for can skip playbooks from other cities AND
        // prioritize exact role matches via the multi-strategy path.
        // The executor's filter shape is stable:
        //   `... role = 'Welder' AND city = 'Toledo' AND state = 'OH' ...`.
        // Case-insensitive match, tolerant of single quotes.
        let target_geo = req.sql_filter.as_deref().and_then(extract_target_geo);
        let target_role = req.sql_filter.as_deref().and_then(extract_target_role);
        // We embedded the question as `qv` above — reuse it for the
        // playbook similarity lookup so we don't double-pay Ollama.
        let boosts = state.playbook_memory
            .compute_boost_for_filtered_with_role(
                &qv,
                boost_k,
                0.5,
                target_geo.as_ref().map(|(c, s)| (c.as_str(), s.as_str())),
                target_role.as_deref(),
            )
            .await;

        // Diagnostics for Phase 19 boost pipeline. Logged so item 3
        // investigation has ground truth:
        //   - boosts.len(): how many (city,state,name) keys surfaced for
        //     this query (0 = playbook_memory found nothing semantically
        //     similar to the question).
        //   - parsed: how many candidate chunks parsed cleanly into
        //     (name,city,state) via parse_worker_chunk.
        //   - matched: how many parsed keys matched an entry in boosts.
        // 2026-04-21 — 20-scenario batch showed 34/40 ok combos never
        // got a citation. These counters pin whether the gap is on the
        // SIMILARITY side (boosts empty) or the MATCH side (parsed vs
        // boosted keys mismatch — e.g. name format drift).
        let mut parsed_count = 0usize;
        let mut matched_count = 0usize;
        for src in sources.iter_mut() {
            // Parse "{Name} — {Role} in {City}, {State}. …" chunk. Being
            // defensive: chunks from other datasets may not follow this
            // exact shape, so absent fields just skip the boost.
            if let Some((name, city, state)) = parse_worker_chunk(&src.chunk_text) {
                parsed_count += 1;
                let key = (city, state, name);
                if let Some(entry) = boosts.get(&key) {
                    src.score += entry.boost;
                    src.playbook_boost = entry.boost;
                    src.playbook_citations = entry.citations.clone();
                    matched_count += 1;
                }
            }
        }
        tracing::info!(
            "playbook_boost: boosts={} sources={} parsed={} matched={} target_geo={:?} target_role={:?} (query='{}')",
            boosts.len(),
            sources.len(),
            parsed_count,
            matched_count,
            target_geo,
            target_role,
            req.question.chars().take(60).collect::<String>(),
        );
        // Re-rank: boosted scores can flip ordering.
        sources.sort_by(|a, b| b.score.partial_cmp(&a.score).unwrap_or(std::cmp::Ordering::Equal));
        // Finally trim to the caller's requested top_k — we pulled fetch_k
        // (5× wider) above specifically so the boost could reach workers
        // that would otherwise have been trimmed pre-boost.
        sources.truncate(req.top_k);
    }

    // Step 5: Generate answer if requested.
    let answer = if req.generate && !sources.is_empty() {
        let context: String = sources.iter().enumerate().map(|(i, s)| {
            format!("[{}] (id: {}, verified: {}) {}", i + 1, s.doc_id, s.sql_verified, s.chunk_text)
        }).collect::<Vec<_>>().join("\n\n");

        let gen_resp = state.ai_client.generate(GenerateRequest {
            prompt: format!(
                "You are a staffing intelligence assistant. Answer based ONLY on these \
                 verified worker records. Every record has been SQL-verified against the \
                 database — you can trust the facts in them. Be specific: cite names, \
                 skills, certifications, scores, and locations.\n\n\
                 Records:\n{context}\n\n\
                 Question: {}\n\nAnswer:", req.question,
            ),
            model: None,
            system: None,
            temperature: Some(0.2),
            max_tokens: Some(512),
            // Hybrid's answer step — prose output over retrieved records,
            // no reasoning needed on the hot path.
            think: Some(false),
        }).await;

        gen_resp.ok().map(|r| r.text.trim().to_string())
    } else {
        None
    };

    let method = if valid_ids.is_some() { "hybrid_sql_vector" } else { "vector_only" };

    Ok(Json(HybridResponse {
        question: req.question,
        sql_filter: req.sql_filter,
        sql_matches: sql_count,
        vector_reranked: sources.len(),
        method: method.into(),
        answer,
        sources,
        duration_ms: t0.elapsed().as_millis() as u64,
    }))
}

// --- HNSW Fast Search ---

#[derive(Deserialize)]
struct BuildHnswRequest {
    /// Name of the stored vector index to build HNSW from
    index_name: String,
    /// Optional config override. Omit to use the production default
    /// (ec=80 es=30 — see HnswConfig::default docs for rationale).
    #[serde(default)]
    config: Option<trial::HnswConfig>,
}

/// Build an HNSW index from an existing stored vector index.
/// Uses the embedding cache so repeated builds don't reload from Parquet.
async fn build_hnsw(
    State(state): State<VectorState>,
    Json(req): Json<BuildHnswRequest>,
) -> impl IntoResponse {
    let config = req.config.unwrap_or_default();
    tracing::info!(
        "building HNSW for '{}' ef_construction={} ef_search={}",
        req.index_name, config.ef_construction, config.ef_search,
    );

    let embeddings = state
        .embedding_cache
        .get_or_load(&req.index_name)
        .await
        .map_err(|e| (StatusCode::NOT_FOUND, format!("index not found: {e}")))?;

    match state
        .hnsw_store
        .build_index_with_config(&req.index_name, (*embeddings).clone(), &config)
        .await
    {
        Ok(stats) => Ok(Json(stats)),
        Err(e) => Err((StatusCode::INTERNAL_SERVER_ERROR, e)),
    }
}

#[derive(Deserialize)]
struct HnswSearchRequest {
    index_name: String,
    query: String,
    top_k: Option<usize>,
}

/// Search using HNSW — approximate nearest neighbors, much faster than brute-force.
async fn search_hnsw(
    State(state): State<VectorState>,
    Json(req): Json<HnswSearchRequest>,
) -> impl IntoResponse {
    let top_k = req.top_k.unwrap_or(5);

    // Embed query
    let embed_resp = state.ai_client.embed(EmbedRequest {
        texts: vec![req.query.clone()],
        model: None,
    }).await.map_err(|e| (StatusCode::BAD_GATEWAY, format!("embed error: {e}")))?;

    if embed_resp.embeddings.is_empty() {
        return Err((StatusCode::BAD_GATEWAY, "no embedding returned".to_string()));
    }

    let query_vec: Vec<f32> = embed_resp.embeddings[0].iter().map(|&x| x as f32).collect();

    // Search HNSW
    match state.hnsw_store.search(&req.index_name, &query_vec, top_k).await {
        Ok(results) => Ok(Json(serde_json::json!({
            "results": results,
            "query": req.query,
            "method": "hnsw",
        }))),
        Err(e) => Err((StatusCode::NOT_FOUND, e)),
    }
}

async fn list_hnsw(State(state): State<VectorState>) -> impl IntoResponse {
    Json(state.hnsw_store.list().await)
}

// --- Trial System: parameterized HNSW tuning loop ---
//
// Flow:
//   1. Agent picks an HnswConfig
//   2. POST /hnsw/trial builds HNSW with that config against cached embeddings,
//      runs every query in the harness, measures latency + recall vs the
//      harness's ground truth, appends a Trial record to _hnsw_trials/{idx}.jsonl
//   3. Agent reads GET /hnsw/trials/{index}, sees history, decides next config
//   4. Repeat until converged.
//
// The first trial triggers embedding load (slow). Every subsequent trial reuses
// the cache — so the agent iterates in seconds, not minutes.

#[derive(Deserialize)]
struct TrialRequest {
    index_name: String,
    harness: String,
    #[serde(default)]
    config: trial::HnswConfig,
    #[serde(default)]
    note: Option<String>,
}

async fn run_trial(
    State(state): State<VectorState>,
    Json(req): Json<TrialRequest>,
) -> Result<Json<trial::Trial>, (StatusCode, String)> {
    let mut harness_set = state.harness_store.load_for_index(&req.index_name, &req.harness)
        .await
        .map_err(|e| (StatusCode::NOT_FOUND, format!("harness not found: {e}")))?;

    if harness_set.index_name != req.index_name {
        return Err((
            StatusCode::BAD_REQUEST,
            format!(
                "harness '{}' is for index '{}', not '{}'",
                req.harness, harness_set.index_name, req.index_name
            ),
        ));
    }
    if harness_set.queries.is_empty() {
        return Err((StatusCode::BAD_REQUEST, "harness has no queries".into()));
    }

    let embeddings = state
        .embedding_cache
        .get_or_load(&req.index_name)
        .await
        .map_err(|e| (StatusCode::NOT_FOUND, format!("load embeddings: {e}")))?;

    if !harness_set.ground_truth_built {
        tracing::info!("trial: computing ground truth for harness '{}'", harness_set.name);
        let t0 = std::time::Instant::now();
        harness::compute_ground_truth(&mut harness_set, &embeddings, &state.ai_client)
            .await
            .map_err(|e| (StatusCode::INTERNAL_SERVER_ERROR, format!("ground truth: {e}")))?;
        tracing::info!("trial: ground truth built in {:.1}s", t0.elapsed().as_secs_f32());
        state.harness_store
            .save(&harness_set)
            .await
            .map_err(|e| (StatusCode::INTERNAL_SERVER_ERROR, format!("save harness: {e}")))?;
    }

    let trial_id = trial::Trial::new_id();
    let hnsw_slot = format!("{}__{}", req.index_name, trial_id);

    let build_stats = state
        .hnsw_store
        .build_index_with_config(&hnsw_slot, (*embeddings).clone(), &req.config)
        .await
        .map_err(|e| (StatusCode::INTERNAL_SERVER_ERROR, format!("build: {e}")))?;

    let query_vectors: Vec<Vec<f32>> = harness_set
        .queries
        .iter()
        .filter_map(|q| q.query_embedding.clone())
        .collect();
    let bench = state
        .hnsw_store
        .bench_search(&hnsw_slot, &query_vectors, harness_set.k)
        .await
        .map_err(|e| (StatusCode::INTERNAL_SERVER_ERROR, format!("search: {e}")))?;

    let mut recalls = Vec::with_capacity(harness_set.queries.len());
    for (q, hits) in harness_set.queries.iter().zip(bench.retrieved.iter()) {
        if let Some(gt) = &q.ground_truth {
            recalls.push(harness::recall_at_k(hits, gt, harness_set.k));
        }
    }
    let mean_recall = if recalls.is_empty() {
        0.0
    } else {
        recalls.iter().sum::<f32>() / recalls.len() as f32
    };

    let mut lats = bench.latencies_us.clone();
    lats.sort_unstable_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
    let p = |pct: f32| -> f32 {
        if lats.is_empty() { return 0.0; }
        let idx = ((lats.len() as f32 - 1.0) * pct).round() as usize;
        lats[idx.min(lats.len() - 1)]
    };

    // One brute-force reference latency — keeps the cost proportional to
    // whatever the agent is willing to pay per trial.
    let brute_latency_us = if let Some(qv) = query_vectors.first() {
        let t0 = std::time::Instant::now();
        let _ = harness::brute_force_top_k(qv, &embeddings, harness_set.k);
        t0.elapsed().as_micros() as f32
    } else {
        0.0
    };

    let dims = embeddings.first().map(|e| e.vector.len()).unwrap_or(0);
    let memory_bytes =
        (embeddings.len() * dims * std::mem::size_of::<f32>() + embeddings.len() * 128) as u64;

    let trial_record = trial::Trial {
        id: trial_id.clone(),
        index_name: req.index_name.clone(),
        eval_set: req.harness.clone(),
        config: req.config.clone(),
        metrics: trial::TrialMetrics {
            build_time_secs: build_stats.build_time_secs,
            search_latency_p50_us: p(0.50),
            search_latency_p95_us: p(0.95),
            search_latency_p99_us: p(0.99),
            recall_at_k: mean_recall,
            memory_bytes,
            vectors: build_stats.vectors,
            eval_queries: harness_set.queries.len(),
            brute_force_latency_us: brute_latency_us,
        },
        created_at: chrono::Utc::now(),
        note: req.note,
    };

    state
        .trial_journal
        .append(&trial_record)
        .await
        .map_err(|e| (StatusCode::INTERNAL_SERVER_ERROR, format!("journal: {e}")))?;

    state.hnsw_store.drop(&hnsw_slot).await;

    Ok(Json(trial_record))
}

async fn list_trials(
    State(state): State<VectorState>,
    Path(index_name): Path<String>,
) -> impl IntoResponse {
    match state.trial_journal.list(&index_name).await {
        Ok(trials) => Ok(Json(trials)),
        Err(e) => Err((StatusCode::INTERNAL_SERVER_ERROR, e)),
    }
}

#[derive(Deserialize)]
struct BestTrialQuery {
    #[serde(default = "default_metric")]
    metric: String,
}

fn default_metric() -> String {
    "pareto".to_string()
}

async fn best_trial(
    State(state): State<VectorState>,
    Path(index_name): Path<String>,
    Query(q): Query<BestTrialQuery>,
) -> impl IntoResponse {
    match state.trial_journal.best(&index_name, &q.metric).await {
        Ok(Some(t)) => Ok(Json(t)),
        Ok(None) => Err((StatusCode::NOT_FOUND, "no trials yet".to_string())),
        Err(e) => Err((StatusCode::INTERNAL_SERVER_ERROR, e)),
    }
}

// --- Harness management ---

async fn list_evals(State(state): State<VectorState>) -> impl IntoResponse {
    Json(state.harness_store.list_all().await)
}

async fn get_eval(
    State(state): State<VectorState>,
    Path(name): Path<String>,
) -> impl IntoResponse {
    match state.harness_store.get_any(&name).await {
        Ok(e) => Ok(Json(e)),
        Err(err) => Err((StatusCode::NOT_FOUND, err)),
    }
}

async fn put_eval(
    State(state): State<VectorState>,
    Path(name): Path<String>,
    Json(mut harness_set): Json<harness::EvalSet>,
) -> impl IntoResponse {
    harness_set.name = name;
    harness_set.ground_truth_built = harness_set
        .queries
        .iter()
        .all(|q| q.ground_truth.is_some());
    match state.harness_store.save(&harness_set).await {
        Ok(()) => Ok(Json(harness_set)),
        Err(e) => Err((StatusCode::INTERNAL_SERVER_ERROR, e)),
    }
}

#[derive(Deserialize)]
struct AutogenRequest {
    index_name: String,
    #[serde(default = "default_sample_count")]
    sample_count: usize,
    #[serde(default = "default_k")]
    k: usize,
}

fn default_sample_count() -> usize { 100 }
fn default_k() -> usize { 10 }

async fn autogen_eval(
    State(state): State<VectorState>,
    Path(name): Path<String>,
    Json(req): Json<AutogenRequest>,
) -> Result<Json<harness::EvalSet>, (StatusCode, String)> {
    let embeddings = state
        .embedding_cache
        .get_or_load(&req.index_name)
        .await
        .map_err(|e| (StatusCode::NOT_FOUND, format!("load embeddings: {e}")))?;

    let mut harness_set = harness::synthetic_from_chunks(
        &name,
        &req.index_name,
        &embeddings,
        req.sample_count,
        req.k,
    );

    harness::compute_ground_truth(&mut harness_set, &embeddings, &state.ai_client)
        .await
        .map_err(|e| (StatusCode::INTERNAL_SERVER_ERROR, format!("ground truth: {e}")))?;

    state.harness_store
        .save(&harness_set)
        .await
        .map_err(|e| (StatusCode::INTERNAL_SERVER_ERROR, format!("save: {e}")))?;

    Ok(Json(harness_set))
}

// --- Embedding cache management ---

async fn cache_stats(State(state): State<VectorState>) -> impl IntoResponse {
    Json(state.embedding_cache.stats().await)
}

async fn cache_evict(
    State(state): State<VectorState>,
    Path(index_name): Path<String>,
) -> impl IntoResponse {
    let ok = state.embedding_cache.evict(&index_name).await;
    Json(serde_json::json!({ "evicted": ok, "index_name": index_name }))
}

// --- Phase C: embedding refresh ---
//
// Decouples "new row data arrived" from "re-embed everything." Ingest marks
// a dataset's embeddings stale (see catalogd::registry::mark_embeddings_stale);
// `/vectors/refresh/{dataset}` diffs existing embeddings against current
// rows, embeds only the new ones, appends to the index, and clears the
// stale flag.

async fn refresh_dataset(
    State(state): State<VectorState>,
    Path(dataset_name): Path<String>,
    Json(req): Json<refresh::RefreshRequest>,
) -> Result<Json<refresh::RefreshResult>, (StatusCode, String)> {
    tracing::info!(
        "refresh requested for dataset '{}' -> index '{}'",
        dataset_name, req.index_name,
    );
    match refresh::refresh_index(
        &dataset_name,
        &req,
        &state.store,
        &state.catalog,
        &state.ai_client,
        &state.embedding_cache,
        &state.index_registry,
    )
    .await
    {
        Ok(result) => Ok(Json(result)),
        Err(e) => Err((StatusCode::INTERNAL_SERVER_ERROR, e)),
    }
}

#[derive(Serialize)]
struct StaleEntry {
    dataset_name: String,
    last_embedded_at: Option<String>,
    stale_since: String,
    refresh_policy: Option<shared::types::RefreshPolicy>,
}

async fn list_stale(State(state): State<VectorState>) -> impl IntoResponse {
    let datasets = state.catalog.stale_datasets().await;
    let entries: Vec<StaleEntry> = datasets
        .into_iter()
        .map(|d| StaleEntry {
            dataset_name: d.name,
            last_embedded_at: d.last_embedded_at.map(|t| t.to_rfc3339()),
            stale_since: d
                .embedding_stale_since
                .map(|t| t.to_rfc3339())
                .unwrap_or_default(),
            refresh_policy: d.embedding_refresh_policy,
        })
        .collect();
    Json(entries)
}

// --- Phase 17: Model profile activation + scoped search ---

#[derive(Serialize)]
struct ActivateReport {
    profile_id: String,
    ollama_name: String,
    indexes_warmed: Vec<WarmedIndex>,
    failures: Vec<String>,
    total_vectors: usize,
    duration_secs: f32,
    /// Phase C: did we successfully preload the Ollama model?
    model_preloaded: bool,
    /// Phase C: which profile previously held the GPU slot, if any.
    /// Useful for observability of the swap.
    previous_profile: Option<String>,
}

#[derive(Serialize)]
struct WarmedIndex {
    index_name: String,
    source: String,
    vectors: usize,
    hnsw_build_secs: f32,
}

/// Warm this profile's indexes. For every bound dataset, find the
/// matching vector index (any index whose `source` equals the dataset
/// or view name), load its embeddings into EmbeddingCache, build HNSW
/// with the profile's config. Next `/profile/{id}/search` call is then
/// <1ms cold.
///
/// Failures on individual indexes don't stop the activation — they get
/// reported in the response. This matches the "substrate keeps working"
/// philosophy from ADR-017: one bad binding shouldn't take down the
/// whole profile.
async fn activate_profile(
    State(state): State<VectorState>,
    Path(profile_id): Path<String>,
) -> impl IntoResponse {
    tracing::info!("[activate_profile] START profile_id={}", profile_id);
    let profile = match state.catalog.get_profile(&profile_id).await {
        Some(p) => p,
        None => return Err((StatusCode::NOT_FOUND, format!("profile not found: {profile_id}"))),
    };

    let job_id = state.job_tracker.create_profile_activation(&profile_id).await;
    let job_id_for_response = job_id.clone();
    let tracker = state.job_tracker.clone();
    let _catalog = state.catalog.clone();
    let index_registry = state.index_registry.clone();
    let bucket_registry = state.bucket_registry.clone();
    let lance = state.lance.clone();
    let embedding_cache = state.embedding_cache.clone();
    let hnsw_store = state.hnsw_store.clone();
    let promotion_registry = state.promotion_registry.clone();
    let ai_client = state.ai_client.clone();
    let active_profile = state.active_profile.clone();
    let profile_name = profile.ollama_name.clone();
    let profile_id_clone = profile.id.clone();
    let profile_bucket = profile.bucket.clone();
    let profile_bound = profile.bound_datasets.clone();
    let profile_hnsw = profile.hnsw_config.clone();
    let profile_backend = profile.vector_backend.clone();
    let _profile_full = profile.clone();

    tokio::spawn(async move {
        let t0 = std::time::Instant::now();
        let mut warmed = Vec::new();
        let mut failures = Vec::new();
        let mut total_vectors = 0usize;
        let job_id = job_id;

        let previous_slot = {
            let guard = active_profile.read().await;
            guard.clone()
        };
        if let Some(prev) = &previous_slot {
            if prev.ollama_name != profile_name {
                match ai_client.unload_model(&prev.ollama_name).await {
                    Ok(_) => tracing::info!(
                        "profile swap: unloaded '{}' ({} -> {})",
                        prev.ollama_name, prev.profile_id, profile_id_clone,
                    ),
                    Err(e) => failures.push(format!("unload previous model '{}': {e}", prev.ollama_name)),
                }
            }
        }

        if let Some(bucket_name) = profile_bucket.clone() {
            if !bucket_registry.contains(&bucket_name) {
                let root = format!(
                    "{}/{}",
                    bucket_registry.profile_root().trim_end_matches('/'),
                    bucket_name.replace(':', "_"),
                );
                let bc = shared::config::BucketConfig {
                    name: bucket_name.clone(),
                    backend: "local".to_string(),
                    root: Some(root.clone()),
                    bucket: None,
                    region: None,
                    endpoint: None,
                    secret_ref: None,
                };
                match bucket_registry.add_bucket(bc).await {
                    Ok(info) => {
                        tracing::info!(
                            "profile '{}' activated bucket '{}' (root={}, reachable={})",
                            profile_id_clone, bucket_name, root, info.reachable,
                        );
                    }
                    Err(e) => failures.push(format!("auto-provision bucket '{}': {}", bucket_name, e)),
                }
            }
        }

        let all_indexes = index_registry.list(None, None).await;
        let use_lance = profile_backend == shared::types::VectorBackend::Lance;

        for binding in &profile_bound {
            let matched: Vec<_> = all_indexes
                .iter()
                .filter(|m| &m.source == binding)
                .collect();
            if matched.is_empty() {
                failures.push(format!("no vector index found for binding '{}'", binding));
                continue;
            }
            for meta in matched {
                if use_lance {
                    let bucket = meta.bucket.clone();
                    let lance_store = match lance.store_for_new(&meta.index_name, &bucket).await {
                        Ok(s) => s,
                        Err(e) => { failures.push(format!("{}: lance store init: {e}", meta.index_name)); continue; }
                    };
                    let count = lance_store.count().await.unwrap_or(0);
                    if count == 0 {
                        let pq_store = match bucket_registry.get(&bucket) {
                            Ok(s) => s,
                            Err(e) => { failures.push(format!("{}: bucket: {e}", meta.index_name)); continue; }
                        };
                        match storaged::ops::get(&pq_store, &meta.storage_key).await {
                            Ok(bytes) => {
                                let build_t = std::time::Instant::now();
                                match lance_store.migrate_from_parquet_bytes(&bytes).await {
                                    Ok(ms) => {
                                        total_vectors += ms.rows_written;
                                        tracing::info!("lance auto-migrate '{}': {} rows in {:.2}s", meta.index_name, ms.rows_written, ms.duration_secs);
                                        warmed.push(WarmedIndex {
                                            index_name: meta.index_name.clone(),
                                            source: meta.source.clone(),
                                            vectors: ms.rows_written,
                                            hnsw_build_secs: build_t.elapsed().as_secs_f32(),
                                        });
                                    }
                                    Err(e) => failures.push(format!("{}: lance migrate: {e}", meta.index_name)),
                                }
                            }
                            Err(e) => failures.push(format!("{}: read parquet: {e}", meta.index_name)),
                        }
                    } else {
                        total_vectors += count;
                        warmed.push(WarmedIndex {
                            index_name: meta.index_name.clone(),
                            source: meta.source.clone(),
                            vectors: count,
                            hnsw_build_secs: 0.0,
                        });
                    }
                    if !lance_store.has_vector_index().await.unwrap_or(false) {
                        match lance_store.build_index(316, 8, 48).await {
                            Ok(ix) => tracing::info!("lance auto-index '{}': IVF_PQ built in {:.1}s", meta.index_name, ix.build_time_secs),
                            Err(e) => failures.push(format!("{}: lance IVF_PQ build: {e}", meta.index_name)),
                        }
                    }
                    if !lance_store.has_scalar_index("doc_id").await.unwrap_or(false) {
                        match lance_store.build_scalar_index("doc_id").await {
                            Ok(ix) => tracing::info!("lance auto-index '{}': doc_id btree built in {:.2}s", meta.index_name, ix.build_time_secs),
                            Err(e) => failures.push(format!("{}: lance doc_id btree: {e}", meta.index_name)),
                        }
                    }
                } else {
                    let embeddings = match embedding_cache.get_or_load(&meta.index_name).await {
                        Ok(arc) => arc,
                        Err(e) => { failures.push(format!("{}: load failed: {}", meta.index_name, e)); continue; }
                    };
                    total_vectors += embeddings.len();

                    let profile_default = trial::HnswConfig {
                        ef_construction: profile_hnsw.ef_construction,
                        ef_search: profile_hnsw.ef_search,
                        seed: profile_hnsw.seed,
                    };
                    let cfg = promotion_registry
                        .config_or(&meta.index_name, profile_default)
                        .await;
                    let build_t = std::time::Instant::now();
                    match hnsw_store
                        .build_index_with_config(&meta.index_name, (*embeddings).clone(), &cfg)
                        .await
                    {
                        Ok(_) => {
                            warmed.push(WarmedIndex {
                                index_name: meta.index_name.clone(),
                                source: meta.source.clone(),
                                vectors: embeddings.len(),
                                hnsw_build_secs: build_t.elapsed().as_secs_f32(),
                            });
                        }
                        Err(e) => failures.push(format!("{}: HNSW build failed: {}", meta.index_name, e)),
                    }
                }
            }
        }

        let mut model_preloaded = false;
        match ai_client.preload_model(&profile_name).await {
            Ok(_) => {
                model_preloaded = true;
                tracing::info!("profile '{}' preloaded ollama model '{}'", profile_id_clone, profile_name);
            }
            Err(e) => failures.push(format!("preload ollama model '{}': {e}", profile_name)),
        }

        {
            let mut guard = active_profile.write().await;
            *guard = Some(ActiveProfileSlot {
                profile_id: profile_id_clone.clone(),
                ollama_name: profile_name.clone(),
                activated_at: chrono::Utc::now(),
            });
        }

        let result = serde_json::to_value(ActivateReport {
            profile_id: profile_id_clone,
            ollama_name: profile_name,
            indexes_warmed: warmed,
            failures,
            total_vectors,
            duration_secs: t0.elapsed().as_secs_f32(),
            model_preloaded,
            previous_profile: previous_slot.map(|s| s.profile_id),
        }).ok();

        tracker.complete(&job_id, result).await;
    });

    // PRD Phase 41 gate: "Activate a profile → returns 202 in <100ms
    // → job completes in background". 202 ACCEPTED signals async-work
    // started; clients poll /vectors/jobs/{job_id} for progress.
    Ok((StatusCode::ACCEPTED, Json(json!({
        "job_id": job_id_for_response,
        "message": format!("profile activation started — poll /vectors/jobs/{} for progress", job_id_for_response),
    }))))
}

/// Unload this profile's model and clear the active slot. No-op if the
/// caller isn't the currently-active profile.
async fn deactivate_profile(
    State(state): State<VectorState>,
    Path(profile_id): Path<String>,
) -> impl IntoResponse {
    let profile = match state.catalog.get_profile(&profile_id).await {
        Some(p) => p,
        None => return Err((StatusCode::NOT_FOUND, format!("profile not found: {profile_id}"))),
    };

    let was_active = {
        let mut guard = state.active_profile.write().await;
        match guard.as_ref() {
            Some(s) if s.profile_id == profile_id => {
                let prev = s.clone();
                *guard = None;
                Some(prev)
            }
            _ => None,
        }
    };

    // Regardless of whether it held the slot, we can still try to unload —
    // the operator's intent is "get this model out of VRAM."
    let unload_result = state.ai_client.unload_model(&profile.ollama_name).await;

    Ok(Json(serde_json::json!({
        "profile_id": profile.id,
        "ollama_name": profile.ollama_name,
        "was_active": was_active.is_some(),
        "unloaded": unload_result.is_ok(),
        "unload_error": unload_result.err(),
    })))
}

async fn get_active_profile(State(state): State<VectorState>) -> impl IntoResponse {
    let slot = state.active_profile.read().await.clone();
    Json(slot)
}

#[derive(Deserialize)]
struct ProfileSearchRequest {
    index_name: String,
    query: String,
    top_k: Option<usize>,
}

/// Search scoped to a profile — refuses if the requested index's source
/// isn't in the profile's bound_datasets. Reuses the existing HNSW
/// search path when the index is warm; falls back to brute-force cosine
/// if it's not (handled by the existing search code path).
async fn profile_scoped_search(
    State(state): State<VectorState>,
    Path(profile_id): Path<String>,
    Json(req): Json<ProfileSearchRequest>,
) -> impl IntoResponse {
    let profile = match state.catalog.get_profile(&profile_id).await {
        Some(p) => p,
        None => return Err((StatusCode::NOT_FOUND, format!("profile not found: {profile_id}"))),
    };

    // Verify the index is in scope for this profile.
    let index_meta = match state.index_registry.get(&req.index_name).await {
        Some(m) => m,
        None => return Err((StatusCode::NOT_FOUND, format!("index not found: {}", req.index_name))),
    };
    if !profile.bound_datasets.contains(&index_meta.source) {
        return Err((
            StatusCode::FORBIDDEN,
            format!(
                "profile '{}' is not bound to '{}' — allowed bindings: {:?}",
                profile.id, index_meta.source, profile.bound_datasets,
            ),
        ));
    }

    let top_k = req.top_k.unwrap_or(5);
    let use_lance = profile.vector_backend == shared::types::VectorBackend::Lance;

    // Embed the query.
    let embed_resp = state
        .ai_client
        .embed(EmbedRequest { texts: vec![req.query.clone()], model: None })
        .await
        .map_err(|e| (StatusCode::BAD_GATEWAY, format!("embed: {e}")))?;
    if embed_resp.embeddings.is_empty() {
        return Err((StatusCode::BAD_GATEWAY, "no embedding returned".into()));
    }
    let query_vec: Vec<f32> = embed_resp.embeddings[0].iter().map(|&x| x as f32).collect();

    // ADR-019 hybrid: route to Lance or Parquet+HNSW based on the
    // profile's declared backend. Callers don't need to know which
    // storage tier they're hitting — the profile abstracts it.
    if use_lance {
        let lance_store = state.lance.store_for(&req.index_name).await
            .map_err(|e| (StatusCode::BAD_REQUEST, e))?;
        let t0 = std::time::Instant::now();
        match lance_store.search(
            &query_vec,
            top_k,
            Some(LANCE_DEFAULT_NPROBES),
            Some(LANCE_DEFAULT_REFINE_FACTOR),
        ).await {
            Ok(hits) => Ok(Json(serde_json::json!({
                "profile": profile.id,
                "source": index_meta.source,
                "method": "lance_ivf_pq",
                "latency_us": t0.elapsed().as_micros() as u64,
                "results": hits,
            }))),
            Err(e) => Err((StatusCode::INTERNAL_SERVER_ERROR, e)),
        }
    } else if state.hnsw_store.has_index(&req.index_name).await {
        match state.hnsw_store.search(&req.index_name, &query_vec, top_k).await {
            Ok(hits) => Ok(Json(serde_json::json!({
                "profile": profile.id,
                "source": index_meta.source,
                "method": "hnsw",
                "results": hits,
            }))),
            Err(e) => Err((StatusCode::INTERNAL_SERVER_ERROR, e)),
        }
    } else {
        let embeddings = state
            .embedding_cache
            .get_or_load(&req.index_name)
            .await
            .map_err(|e| (StatusCode::NOT_FOUND, format!("embeddings: {e}")))?;
        let results = search::search(&query_vec, &embeddings, top_k);
        Ok(Json(serde_json::json!({
            "profile": profile.id,
            "source": index_meta.source,
            "method": "brute_force",
            "results": results,
        })))
    }
}

// --- Phase 16: Promotion + autotune ---

#[derive(Deserialize)]
struct PromoteQuery {
    #[serde(default)]
    promoted_by: String,
    #[serde(default)]
    note: Option<String>,
}

async fn promote_trial(
    State(state): State<VectorState>,
    Path((index_name, trial_id)): Path<(String, String)>,
    Query(q): Query<PromoteQuery>,
) -> impl IntoResponse {
    // Pull the trial from the journal to get its config.
    let trials = state
        .trial_journal
        .list(&index_name)
        .await
        .map_err(|e| (StatusCode::INTERNAL_SERVER_ERROR, e))?;
    let trial = trials
        .iter()
        .find(|t| t.id == trial_id)
        .ok_or_else(|| (StatusCode::NOT_FOUND, format!("trial not found: {trial_id}")))?;

    let entry = promotion::PromotionEntry {
        config: trial.config.clone(),
        trial_id: trial.id.clone(),
        promoted_at: chrono::Utc::now(),
        promoted_by: q.promoted_by,
        note: q.note,
    };
    match state.promotion_registry.promote(&index_name, entry).await {
        Ok(file) => Ok(Json(file)),
        Err(e) => Err((StatusCode::INTERNAL_SERVER_ERROR, e)),
    }
}

async fn rollback_promotion(
    State(state): State<VectorState>,
    Path(index_name): Path<String>,
) -> impl IntoResponse {
    match state.promotion_registry.rollback(&index_name).await {
        Ok(file) => Ok(Json(file)),
        Err(e) => Err((StatusCode::NOT_FOUND, e)),
    }
}

async fn get_promoted(
    State(state): State<VectorState>,
    Path(index_name): Path<String>,
) -> impl IntoResponse {
    match state.promotion_registry.load(&index_name).await {
        Ok(file) => Ok(Json(file)),
        Err(e) => Err((StatusCode::INTERNAL_SERVER_ERROR, e)),
    }
}

async fn run_autotune_endpoint(
    State(state): State<VectorState>,
    Json(req): Json<autotune::AutotuneRequest>,
) -> impl IntoResponse {
    match autotune::run_autotune(
        req,
        &state.store,
        &state.catalog,
        &state.ai_client,
        &state.embedding_cache,
        &state.hnsw_store,
        &state.index_registry,
        &state.trial_journal,
        &state.promotion_registry,
        &state.harness_store,
        &state.job_tracker,
    ).await {
        Ok(result) => Ok(Json(result)),
        Err(e) => Err((StatusCode::INTERNAL_SERVER_ERROR, e)),
    }
}

// --- Phase 16.2: autotune agent endpoints ---

async fn agent_status(State(state): State<VectorState>) -> impl IntoResponse {
    Json(state.agent_handle.status().await)
}

async fn agent_stop(State(state): State<VectorState>) -> impl IntoResponse {
    let stopped = state.agent_handle.stop().await;
    Json(serde_json::json!({ "stopped": stopped }))
}

async fn agent_enqueue(
    State(state): State<VectorState>,
    Path(index_name): Path<String>,
) -> impl IntoResponse {
    let event = agent::TriggerEvent::manual(index_name);
    match state.agent_handle.enqueue(event).await {
        Ok(()) => Ok(Json(serde_json::json!({ "enqueued": true }))),
        Err(e) => Err((StatusCode::SERVICE_UNAVAILABLE, e)),
    }
}

// --- ADR-019: Lance hybrid backend HTTP surface ---
//
// Lance routes operate on the same `index_name` as the Parquet/HNSW path,
// but materialize the data as a Lance dataset on disk under
// `{bucket_root}/lance/{index_name}/`. The two backends are independent:
// you can have an index in both formats simultaneously. `IndexMeta.vector_backend`
// records which one is canonical for that index.

#[derive(Deserialize)]
struct LanceMigrateRequest {
    /// Optional bucket override. Defaults to whatever the existing
    /// IndexMeta says, or "primary" for indexes that don't exist yet.
    #[serde(default)]
    bucket: Option<String>,
}

/// Read the existing Parquet vector file for `index_name` from object
/// storage, hand the bytes to vectord-lance, return migration stats.
/// The original Parquet file is left intact — both backends coexist
/// after migration.
async fn lance_migrate(
    State(state): State<VectorState>,
    Path(index_name): Path<String>,
    Json(req): Json<LanceMigrateRequest>,
) -> impl IntoResponse {
    let meta = state.index_registry.get(&index_name).await
        .ok_or((StatusCode::NOT_FOUND, format!("index not found: {index_name}")))?;
    let bucket = req.bucket.unwrap_or(meta.bucket.clone());

    // Pull the Parquet bytes via storaged::ops — same path as the
    // existing embedding loader uses.
    let store = state.bucket_registry.get(&bucket)
        .map_err(|e| (StatusCode::BAD_REQUEST, e))?;
    let bytes = storaged::ops::get(&store, &meta.storage_key).await
        .map_err(|e| (StatusCode::NOT_FOUND, format!("read parquet: {e}")))?;

    let lance_store = state.lance.store_for_new(&index_name, &bucket).await
        .map_err(|e| sanitize_lance_err(e, &index_name))?;

    let stats = lance_store.migrate_from_parquet_bytes(&bytes).await
        .map_err(|e| sanitize_lance_err(e, &index_name))?;

    tracing::info!(
        "lance migrate '{}': {} rows, {}d, {} bytes on disk, {:.2}s",
        index_name, stats.rows_written, stats.dimensions,
        stats.disk_bytes, stats.duration_secs,
    );

    // Auto-build the doc_id btree. The scalar index is what makes
    // get_doc_by_id O(log n) instead of a full table scan; ADR-019
    // calls this out as the load-bearing feature for hybrid lookup.
    // Verified 2026-05-02: skipping this on a 10M-row dataset turns
    // ~5ms doc-fetch into ~100ms (full scan over 35GB). Cheap to
    // build (~1.2s on 10M, +269MB on disk) and only runs once per
    // dataset since `has_scalar_index` short-circuits subsequent calls.
    let scalar_stats = if !lance_store.has_scalar_index("doc_id").await.unwrap_or(false) {
        match lance_store.build_scalar_index("doc_id").await {
            Ok(s) => {
                tracing::info!(
                    "lance migrate '{}': doc_id btree built in {:.2}s (+{} bytes)",
                    index_name, s.build_time_secs, s.disk_bytes_added,
                );
                Some(s)
            }
            Err(e) => {
                // Don't fail the whole migrate over a missing btree —
                // the dataset is still queryable, just slowly. Log it
                // so it's debuggable.
                tracing::warn!("lance migrate '{}': doc_id btree build failed (will fall back to scan): {e}", index_name);
                None
            }
        }
    } else {
        None
    };

    Ok::<_, (StatusCode, String)>(Json(serde_json::json!({
        "index_name": index_name,
        "bucket": bucket,
        "lance_path": lance_store.path(),
        "stats": stats,
        "scalar_index": scalar_stats,
    })))
}

#[derive(Deserialize)]
struct LanceIndexRequest {
    #[serde(default = "default_partitions")]
    num_partitions: u32,
    #[serde(default = "default_bits")]
    num_bits: u32,
    #[serde(default = "default_subvectors")]
    num_sub_vectors: u32,
}

fn default_partitions() -> u32 { 316 }   // ≈√100K — sane for the reference dataset
fn default_bits() -> u32 { 8 }
fn default_subvectors() -> u32 { 48 }    // 768/48 = 16 dims per subvector

/// Sanitize a Lance backend error before returning it to the HTTP
/// caller. Two responsibilities:
///
/// 1. Map "dataset not found" patterns to HTTP 404 instead of 500.
///    A missing index isn't an internal failure — it's a resource
///    lookup miss, and the response code should reflect that.
/// 2. Strip server-side filesystem paths and Rust crate registry
///    paths (`/root/.cargo/registry/src/index.crates.io-...`) from
///    the message body. An attacker probing the surface shouldn't
///    learn the server's directory layout or our exact dep versions.
///
/// Surfaced 2026-05-02 by the Lance backend audit: missing-index
/// search returned 500 + leaked the lakehouse data path AND the
/// .cargo/registry path with crate versions.
fn sanitize_lance_err(err: String, index_name: &str) -> (StatusCode, String) {
    // 404 detection — narrowed across two 2026-05-02→03 scrum waves.
    // First wave (opus WARN service.rs:1908): the original `lower.contains
    // ("not found")` was too broad — caught "column not found" /
    // "field not found in schema" which are real 500s. Second wave (opus
    // WARN service.rs:1949): the looser `mentions_path_missing` branch I
    // added would 404 on a registry-file error like "/root/.cargo/.../x.rs:
    // no such file or directory" because it triggers without dataset
    // context. Drop the standalone path-missing branch; require dataset
    // context AND a missing-shape phrase. Lance's actual error format
    // ("Dataset at path X was not found") satisfies this.
    let lower = err.to_lowercase();
    let mentions_dataset = lower.contains("dataset");
    let lance_dataset_missing = mentions_dataset && (
        lower.contains("not found") || lower.contains("does not exist")
    );
    // Excluded shapes — these contain "not found" but are real 500s.
    let column_or_field = lower.contains("column not found")
        || lower.contains("field not found")
        || lower.contains("schema not found");
    let is_not_found = lance_dataset_missing && !column_or_field;
    if is_not_found {
        return (StatusCode::NOT_FOUND, format!("lance dataset not found: {index_name}"));
    }

    // Path redaction — replace path-shaped substrings with [REDACTED]
    // rather than truncating, per opus BLOCK at service.rs:1914 from the
    // 2026-05-02 scrum. The previous `err.split("/home/").next()` returned
    // Some("") when the error string STARTED with "/home/", erasing the
    // entire message and falling back to a generic "lance backend error"
    // that lost all real error context. Replacing keeps the structural
    // error (the "what failed") while stripping the location.
    let cleaned = redact_paths(&err)
        .trim_end_matches([',', ' ', '\n', '\t'])
        .to_string();
    let msg = if cleaned.is_empty() {
        format!("lance backend error on {index_name}")
    } else {
        cleaned
    };
    (StatusCode::INTERNAL_SERVER_ERROR, msg)
}

/// Replace absolute-path substrings (under known leak-prone roots) with
/// "[REDACTED]". Walks the input once, identifying path-shaped runs that
/// start with one of the configured prefixes and continue until a
/// path-terminating character (whitespace, quote, comma, paren, EOL).
///
/// Linear time, no regex dep. Catches multi-occurrence cases that
/// `String::split(p).next()` lost. The path-redaction surface intentionally
/// includes /var, /tmp, /etc, /usr, /opt in addition to /home and
/// /root/.cargo because Lance/Arrow errors surface system paths in
/// addition to project paths.
fn redact_paths(s: &str) -> String {
    // Two prefix sets:
    // - ABSOLUTE: paths starting with '/' (always safe to redact)
    // - RELATIVE: same path bodies but without leading '/' (Lance occasionally
    //   strips the leading slash when echoing dataset paths back, observed
    //   live 2026-05-02 — "Dataset at path home/profit/lakehouse/data/lance/x
    //   was not found"). Match these only when preceded by a non-alpha char
    //   (start of string, space, colon, etc.) so we don't redact innocent
    //   tokens like "homecoming" or "etcetera".
    const ABSOLUTE: &[&str] = &[
        "/root/.cargo", "/home", "/var", "/tmp", "/etc", "/usr", "/opt",
    ];
    const RELATIVE: &[&str] = &[
        "root/.cargo", "home/", "var/", "tmp/", "etc/", "usr/", "opt/",
    ];
    fn is_path_term(b: u8) -> bool {
        matches!(b, b' ' | b'\t' | b'\n' | b'\r' | b'"' | b'\'' | b',' | b')' | b']' | b'}')
    }
    fn is_word_boundary_before(bytes: &[u8], i: usize) -> bool {
        // True if byte at i-1 is non-alphanumeric (so this position starts
        // a fresh token). True at start-of-input.
        if i == 0 { return true; }
        let b = bytes[i - 1];
        !(b.is_ascii_alphanumeric() || b == b'_' || b == b'.' || b == b'-')
    }
    // Walk by byte index but slice the original &str when emitting, never
    // cast bytes to char (that would corrupt multi-byte UTF-8 — opus WARN
    // at service.rs:2018 from the 2026-05-03 re-scrum). Path prefixes are
    // pure ASCII so byte-level matching is sound; what matters is that
    // we emit non-matched stretches as &str slices, not byte-by-byte.
    let bytes = s.as_bytes();
    let mut out = String::with_capacity(s.len());
    let mut i = 0;
    let mut copy_start = 0usize;  // start of an in-progress unmatched run
    while i < bytes.len() {
        let mut matched_len: Option<usize> = None;
        // Try absolute prefixes first (always allowed).
        for p in ABSOLUTE {
            let pb = p.as_bytes();
            if i + pb.len() <= bytes.len() && &bytes[i..i + pb.len()] == pb {
                let after = i + pb.len();
                if after == bytes.len() || bytes[after] == b'/' || is_path_term(bytes[after]) {
                    matched_len = Some(pb.len());
                    break;
                }
            }
        }
        // Then relative prefixes — only at word boundaries.
        if matched_len.is_none() && is_word_boundary_before(bytes, i) {
            for p in RELATIVE {
                let pb = p.as_bytes();
                if i + pb.len() <= bytes.len() && &bytes[i..i + pb.len()] == pb {
                    matched_len = Some(pb.len());
                    break;
                }
            }
        }
        if let Some(prefix_len) = matched_len {
            // Flush any pending unmatched run as a UTF-8-safe slice.
            if copy_start < i {
                out.push_str(&s[copy_start..i]);
            }
            out.push_str("[REDACTED]");
            // Skip past the prefix and the path body (until terminator).
            let mut j = i + prefix_len;
            while j < bytes.len() && !is_path_term(bytes[j]) {
                j += 1;
            }
            i = j;
            copy_start = i;
        } else {
            // Advance one CHAR (not one byte) so multi-byte UTF-8 sequences
            // stay intact in the eventual slice. Look up the next char
            // boundary using the public API.
            i += utf8_char_len(bytes, i);
        }
    }
    if copy_start < bytes.len() {
        out.push_str(&s[copy_start..]);
    }
    out
}

/// Length in bytes of the UTF-8 character starting at byte `i`. Bytes are
/// guaranteed to be a valid UTF-8 sequence start (callers ensure that).
fn utf8_char_len(bytes: &[u8], i: usize) -> usize {
    let b = bytes[i];
    if b < 0x80 { 1 }
    else if b < 0xC0 { 1 }  // continuation byte — defensive, shouldn't start here
    else if b < 0xE0 { 2 }
    else if b < 0xF0 { 3 }
    else { 4 }
}

#[cfg(test)]
mod sanitize_tests {
    use super::*;

    #[test]
    fn redact_path_at_offset_zero() {
        // Regression: opus BLOCK 2026-05-02. Old impl returned Some("")
        // when err started with "/home/", erasing the whole message.
        let out = redact_paths("/home/profit/lakehouse/data/lance not a directory");
        assert_eq!(out, "[REDACTED] not a directory");
    }

    #[test]
    fn redact_keeps_pre_and_post_text() {
        let out = redact_paths("failed to open /home/profit/lakehouse/data/x for read: ENOENT");
        assert_eq!(out, "failed to open [REDACTED] for read: ENOENT");
    }

    #[test]
    fn redact_multiple_paths() {
        let out = redact_paths("at /root/.cargo/registry/src/index.crates.io-foo/lance-table-4.0.0/src/io/commit.rs:364:26 from /home/profit/lakehouse");
        assert!(!out.contains("/root/.cargo"));
        assert!(!out.contains("/home/"));
        assert!(out.contains("[REDACTED]"));
    }

    #[test]
    fn redact_preserves_quote_terminator() {
        let out = redact_paths("{\"path\":\"/home/profit/x\",\"err\":\"bad\"}");
        assert_eq!(out, "{\"path\":\"[REDACTED]\",\"err\":\"bad\"}");
    }

    #[test]
    fn is_not_found_narrow_dataset_only() {
        // Regression: opus WARN 2026-05-02. Old impl 404'd on any "not
        // found" — including legitimate column/field-not-found 500s.
        let (status, _) = sanitize_lance_err(
            "column not found: vector".into(), "test_idx",
        );
        assert_eq!(status, StatusCode::INTERNAL_SERVER_ERROR);

        let (status, _) = sanitize_lance_err(
            "dataset not found at /home/profit/lakehouse/data/lance/missing".into(), "test_idx",
        );
        assert_eq!(status, StatusCode::NOT_FOUND);
    }

    #[test]
    fn redact_does_not_match_prefix_substring() {
        // /etcetera should NOT trigger /etc redaction.
        let out = redact_paths("etcetera and /etcd");
        assert_eq!(out, "etcetera and /etcd");
    }

    #[test]
    fn redact_relative_paths_lance_emits() {
        // 2026-05-02: live missing-index probe surfaced Lance error of the
        // form "Dataset at path home/profit/lakehouse/data/lance/x was not
        // found" — leading slash stripped. Need to redact the relative form
        // when preceded by a word boundary.
        let out = redact_paths("Dataset at path home/profit/lakehouse/data/lance/x was not found");
        assert!(!out.contains("home/profit"), "should redact: {out}");
        assert!(out.contains("Dataset at path"));
        assert!(out.contains("was not found"));
    }

    #[test]
    fn redact_does_not_eat_innocent_prefix_words() {
        // "homecoming" must NOT trigger "home/" redaction. "Etcetera" must
        // NOT trigger "etc/" redaction. The word-boundary guard handles this.
        let out = redact_paths("homecoming etcetera vary tmpfile");
        assert_eq!(out, "homecoming etcetera vary tmpfile");
    }

    #[test]
    fn is_not_found_lance_actual_phrasing() {
        // Lance's actual error format observed live: "Dataset at path X was
        // not found: Not found: ...". Must 404, not 500.
        let (status, _) = sanitize_lance_err(
            "Dataset at path home/profit/lakehouse/data/lance/x was not found".into(),
            "x",
        );
        assert_eq!(status, StatusCode::NOT_FOUND);
    }

    #[test]
    fn is_not_found_excludes_column_field_schema() {
        // Real 500s with the "not found" phrase that aren't dataset-missing.
        for err in [
            "column not found: vector",
            "field not found in schema: doc_id",
            "schema not found for dataset xyz",
        ] {
            let (status, _) = sanitize_lance_err(err.into(), "test_idx");
            assert_eq!(status, StatusCode::INTERNAL_SERVER_ERROR, "{err}");
        }
    }

    #[test]
    fn is_not_found_does_not_match_unrelated_path_missing() {
        // Regression: opus WARN at service.rs:1949 from the 2026-05-03
        // re-scrum. A registry-file error from inside a Lance internal
        // module should NOT be coerced to 404 just because it contains
        // "no such file or directory" — it's a real 500.
        let (status, _) = sanitize_lance_err(
            "/root/.cargo/registry/src/index.crates.io-foo/lance-table-4.0.0/src/io/commit.rs: no such file or directory".into(),
            "test_idx",
        );
        assert_eq!(status, StatusCode::INTERNAL_SERVER_ERROR);
        // (And the path is still redacted in the message.)
        let (_, msg) = sanitize_lance_err(
            "/root/.cargo/registry/src/lance-foo/x.rs: no such file or directory".into(),
            "test_idx",
        );
        assert!(!msg.contains("/root/.cargo"), "path leak: {msg}");
    }

    #[test]
    fn redact_preserves_multibyte_utf8() {
        // Regression: opus WARN at service.rs:2018 from the 2026-05-03
        // re-scrum. Old impl did `out.push(bytes[i] as char)` which
        // corrupted multi-byte UTF-8 (e.g. a path containing user-supplied
        // names with non-ASCII characters) into Latin-1 mojibake.
        let input = "Failed to open /home/profit/工作/data — café not found";
        let out = redact_paths(input);
        // The path is redacted...
        assert!(!out.contains("/home/profit"), "path leak: {out}");
        // ...AND the multi-byte characters elsewhere are preserved verbatim.
        assert!(out.contains("café"), "lost UTF-8: {out}");
        assert!(out.contains("not found"), "lost trailing context: {out}");
    }
}

/// Build the IVF_PQ index on the Lance dataset.
async fn lance_build_index(
    State(state): State<VectorState>,
    Path(index_name): Path<String>,
    Json(req): Json<LanceIndexRequest>,
) -> impl IntoResponse {
    let lance_store = state.lance.store_for(&index_name).await
        .map_err(|e| sanitize_lance_err(e, &index_name))?;
    match lance_store.build_index(req.num_partitions, req.num_bits, req.num_sub_vectors).await {
        Ok(stats) => Ok(Json(stats)),
        Err(e) => Err(sanitize_lance_err(e, &index_name)),
    }
}

#[derive(Deserialize)]
struct LanceSearchRequest {
    /// Plain text query — embedded server-side for symmetry with the
    /// existing /vectors/search path.
    query: String,
    #[serde(default = "default_top_k")]
    top_k: usize,
    /// IVF partitions to probe. `None` uses Lance's built-in default of
    /// 1, which caps recall well below the index's real capability.
    /// Recommended: 5–10% of num_partitions (≈20 for a 316-partition
    /// index). Omitting it here picks the server-side default.
    #[serde(default)]
    nprobes: Option<usize>,
    /// Refine factor — re-rank `top_k * factor` PQ-approximate candidates
    /// with exact distances before returning `top_k`. Recovers recall
    /// lost to product quantization.
    #[serde(default)]
    refine_factor: Option<u32>,
}

/// Server-side defaults when the caller doesn't pin nprobes / refine
/// themselves. Tuned for the ~100K × 768d reference workload; see
/// docs/ADR-019-vector-storage.md for the recall / latency trade-off.
const LANCE_DEFAULT_NPROBES: usize = 20;
const LANCE_DEFAULT_REFINE_FACTOR: u32 = 5;

fn default_top_k() -> usize { 5 }

/// Vector search against a Lance dataset. Embeds the query text via the
/// sidecar then calls Lance's nearest-neighbor scanner.
async fn lance_search(
    State(state): State<VectorState>,
    Path(index_name): Path<String>,
    Json(req): Json<LanceSearchRequest>,
) -> impl IntoResponse {
    let embed_resp = state.ai_client
        .embed(EmbedRequest { texts: vec![req.query.clone()], model: None })
        .await
        .map_err(|e| (StatusCode::BAD_GATEWAY, format!("embed: {e}")))?;
    if embed_resp.embeddings.is_empty() {
        return Err((StatusCode::BAD_GATEWAY, "no embedding returned".into()));
    }
    let qv: Vec<f32> = embed_resp.embeddings[0].iter().map(|&x| x as f32).collect();

    let lance_store = state.lance.store_for(&index_name).await
        .map_err(|e| sanitize_lance_err(e, &index_name))?;

    let t0 = std::time::Instant::now();
    let nprobes = req.nprobes.or(Some(LANCE_DEFAULT_NPROBES));
    let refine = req.refine_factor.or(Some(LANCE_DEFAULT_REFINE_FACTOR));
    let hits = lance_store.search(&qv, req.top_k, nprobes, refine).await
        .map_err(|e| sanitize_lance_err(e, &index_name))?;

    Ok(Json(serde_json::json!({
        "index_name": index_name,
        "query": req.query,
        "method": "lance_ivf_pq",
        "latency_us": t0.elapsed().as_micros() as u64,
        "results": hits,
    })))
}

/// Random-access fetch by doc_id — the O(1) lookup that's basically
/// impossible in our Parquet path without scanning the whole file.
async fn lance_get_doc(
    State(state): State<VectorState>,
    Path((index_name, doc_id)): Path<(String, String)>,
) -> impl IntoResponse {
    let lance_store = state.lance.store_for(&index_name).await
        .map_err(|e| sanitize_lance_err(e, &index_name))?;
    let t0 = std::time::Instant::now();
    match lance_store.get_by_doc_id(&doc_id).await {
        Ok(Some(row)) => Ok(Json(serde_json::json!({
            "index_name": index_name,
            "doc_id": doc_id,
            "latency_us": t0.elapsed().as_micros() as u64,
            "row": row,
        }))),
        Ok(None) => Err((StatusCode::NOT_FOUND, format!("doc_id not found: {doc_id}"))),
        Err(e) => Err(sanitize_lance_err(e, &index_name)),
    }
}

#[derive(Deserialize)]
struct LanceAppendRequest {
    /// Optional source tag — set on every appended row.
    #[serde(default)]
    source: Option<String>,
    rows: Vec<LanceAppendRow>,
}

#[derive(Deserialize)]
struct LanceAppendRow {
    doc_id: String,
    #[serde(default)]
    chunk_idx: Option<i32>,
    chunk_text: String,
    /// Pre-computed embedding. Caller is responsible for ensuring it
    /// matches the dataset's dimensions and embedding model.
    vector: Vec<f32>,
}

async fn lance_append(
    State(state): State<VectorState>,
    Path(index_name): Path<String>,
    Json(req): Json<LanceAppendRequest>,
) -> impl IntoResponse {
    if req.rows.is_empty() {
        return Err((StatusCode::BAD_REQUEST, "rows array is empty".into()));
    }
    let lance_store = state.lance.store_for(&index_name).await
        .map_err(|e| sanitize_lance_err(e, &index_name))?;

    let mut doc_ids = Vec::with_capacity(req.rows.len());
    let mut chunk_idxs = Vec::with_capacity(req.rows.len());
    let mut chunk_texts = Vec::with_capacity(req.rows.len());
    let mut vectors = Vec::with_capacity(req.rows.len());
    for r in req.rows {
        doc_ids.push(r.doc_id);
        chunk_idxs.push(r.chunk_idx.unwrap_or(0));
        chunk_texts.push(r.chunk_text);
        vectors.push(r.vector);
    }

    match lance_store.append(req.source, doc_ids, chunk_idxs, chunk_texts, vectors).await {
        Ok(stats) => Ok(Json(stats)),
        Err(e) => Err((StatusCode::INTERNAL_SERVER_ERROR, e)),
    }
}

async fn lance_stats(
    State(state): State<VectorState>,
    Path(index_name): Path<String>,
) -> impl IntoResponse {
    let lance_store = state.lance.store_for(&index_name).await
        .map_err(|e| (StatusCode::BAD_REQUEST, e))?;
    match lance_store.stats().await {
        Ok(s) => Ok(Json(s)),
        Err(e) => Err((StatusCode::INTERNAL_SERVER_ERROR, e)),
    }
}

/// Run an existing harness against Lance IVF_PQ and measure recall@k.
/// Uses the same ground truth computed by brute-force cosine (the HNSW
/// eval path). This closes ADR-019's explicit gap: "IVF_PQ recall not
/// measured."
#[derive(Deserialize)]
struct LanceRecallRequest {
    harness: String,
    #[serde(default = "default_top_k")]
    top_k: usize,
    /// Override server defaults so operators can sweep nprobes /
    /// refine_factor to chart the recall-vs-latency curve.
    #[serde(default)]
    nprobes: Option<usize>,
    #[serde(default)]
    refine_factor: Option<u32>,
}

#[derive(serde::Serialize)]
struct LanceRecallResult {
    index_name: String,
    harness: String,
    queries: usize,
    top_k: usize,
    mean_recall: f32,
    per_query: Vec<LanceRecallQuery>,
    latency_p50_us: f32,
    latency_p95_us: f32,
    total_duration_secs: f32,
}

#[derive(serde::Serialize)]
struct LanceRecallQuery {
    query_id: String,
    recall: f32,
    latency_us: f32,
    hits_returned: usize,
}

// --- Phase 19: playbook memory endpoints ---

/// Extract (name, city, state) from a chunk formatted like
/// "{Name} — {Role} in {City}, {State}. Skills: …".
/// Returns None if the chunk doesn't match the shape; callers simply
/// skip the boost for that hit.
/// Extract role from an SQL filter matching `role = 'Welder'` style.
/// Case-insensitive on the column name. Quoted value; quotes not
/// included in returned string.
fn extract_target_role(sql_filter: &str) -> Option<String> {
    grab_eq_value(sql_filter, "role")
}

/// Shared equality-value extractor for (city, state, role) lookups.
fn grab_eq_value(src: &str, col: &str) -> Option<String> {
    let lower = src.to_ascii_lowercase();
    let col_lower = col.to_ascii_lowercase();
    let mut search_from = 0usize;
    while let Some(off) = lower[search_from..].find(&col_lower) {
        let pos = search_from + off;
        let prior_ok = pos == 0
            || !lower.as_bytes()[pos - 1].is_ascii_alphanumeric()
               && lower.as_bytes()[pos - 1] != b'_';
        let after = pos + col_lower.len();
        if !prior_ok || after >= src.len() {
            search_from = pos + col_lower.len();
            continue;
        }
        let mut i = after;
        while i < src.len() && src.as_bytes()[i] == b' ' { i += 1; }
        if i >= src.len() || src.as_bytes()[i] != b'=' { search_from = pos + col_lower.len(); continue; }
        i += 1;
        while i < src.len() && src.as_bytes()[i] == b' ' { i += 1; }
        if i >= src.len() || src.as_bytes()[i] != b'\'' { search_from = pos + col_lower.len(); continue; }
        i += 1;
        let start = i;
        while i < src.len() && src.as_bytes()[i] != b'\'' { i += 1; }
        if i > start {
            return Some(src[start..i].to_string());
        }
        search_from = pos + col_lower.len();
    }
    None
}

/// Pull (city, state) out of a SQL filter that uses
/// `city = 'Toledo' AND state = 'OH'` style equality. Returns None if
/// either is missing — the caller keeps the original global boost map
/// behavior (no geo narrowing). Case-insensitive on the column name
/// so `CITY=` or `City =` also work.
fn extract_target_geo(sql_filter: &str) -> Option<(String, String)> {
    let city = grab_eq_value(sql_filter, "city")?;
    let state = grab_eq_value(sql_filter, "state")?;
    Some((city, state))
}

fn parse_worker_chunk(chunk: &str) -> Option<(String, String, String)> {
    // "Name — Role in City, ST. …" → split on "—" then " in " then ","
    let (name_part, rest) = chunk.split_once('—')?;
    let rest = rest.trim();
    let (_role, loc_part) = rest.split_once(" in ")?;
    let loc_part = loc_part.trim();
    let (city, state_plus) = loc_part.split_once(',')?;
    let state: String = state_plus.trim()
        .chars()
        .take_while(|c| c.is_ascii_alphabetic())
        .collect();
    let name = name_part.trim().to_string();
    let city = city.trim().to_string();
    if name.is_empty() || city.is_empty() || state.is_empty() {
        return None;
    }
    Some((name, city, state))
}

#[derive(Deserialize)]
struct SeedPlaybookRequest {
    /// One playbook with {operation, approach, context, endorsed_names}.
    /// City + state are parsed from the operation text.
    operation: String,
    #[serde(default)]
    approach: String,
    #[serde(default)]
    context: String,
    endorsed_names: Vec<String>,
    /// Append to the existing memory rather than replacing. Default true —
    /// seeding is a bootstrap/demo tool, not a rebuild substitute.
    #[serde(default = "default_true")]
    append: bool,
    /// Phase 25 — optional schema_fingerprint captured at seed time.
    /// When the underlying dataset's schema changes, any entry whose
    /// fingerprint doesn't match the new one is auto-retired via
    /// retire_on_schema_drift. Caller-provided so the producer (the
    /// scenario driver, the orchestrator) can pass the live fingerprint
    /// without the gateway needing a second catalogd round trip.
    #[serde(default)]
    schema_fingerprint: Option<String>,
    /// Phase 25 — optional hard expiry. RFC3339 timestamp. After this
    /// moment the entry is skipped during boost computation (not
    /// retired, just inactive). Useful for seasonal/temp contracts.
    #[serde(default)]
    valid_until: Option<String>,
    /// Phase 45 — optional external doc references captured at seal
    /// time. Each entry names a tool + version_seen; context7-driven
    /// drift check compares against current versions later. None or
    /// empty = no drift signal (never flagged).
    #[serde(default)]
    doc_refs: Option<Vec<playbook_memory::DocRef>>,
}

/// Bootstrap / test-only: inject a playbook entry directly into
/// `playbook_memory` without going through `successful_playbooks`. Useful
/// when the source dataset has stale or phantom entries (as the initial
/// staffing seed did — names that don't correspond to real workers), and
/// you want to demonstrate the feedback loop with a known-good fixture.
///
/// Production path is always `/rebuild` — this endpoint is for operators
/// who need to prime the memory before real playbooks accumulate.
async fn seed_playbook_memory(
    State(state): State<VectorState>,
    Json(req): Json<SeedPlaybookRequest>,
) -> impl IntoResponse {
    // Serialize embed calls to avoid concurrent socket collisions.
    let _permit = state.embed_semaphore.acquire().await.map_err(|e|
        (StatusCode::INTERNAL_SERVER_ERROR, format!("semaphore error: {e}")))?;

    // Embed the entry through the same text shape `rebuild` uses so
    // similarity math is comparable across seed + real entries.
    let tmp_entry = playbook_memory::PlaybookEntry {
        operation: req.operation.clone(),
        approach: req.approach.clone(),
        context: req.context.clone(),
        timestamp: chrono::Utc::now().to_rfc3339(),
        endorsed_names: req.endorsed_names.clone(),
        ..Default::default()
    };
    let text = format!(
        "{} | {} | {} | fills: {}",
        tmp_entry.operation, tmp_entry.approach, tmp_entry.context,
        tmp_entry.endorsed_names.join(", "),
    );
    let resp = match state.ai_client.embed(EmbedRequest { texts: vec![text], model: None }).await {
        Ok(r) => r,
        Err(e) => return Err((StatusCode::BAD_GATEWAY, format!("embed seed: {e}"))),
    };
    if resp.embeddings.is_empty() {
        return Err((StatusCode::BAD_GATEWAY, "embed returned nothing".into()));
    }
    let emb: Vec<f32> = resp.embeddings[0].iter().map(|&x| x as f32).collect();

    // Parse city/state from the operation ("fill: Role xN in City, ST").
    // Parser lives in playbook_memory::rebuild — expose via a tiny helper
    // or inline the same logic here; duplicated briefly since this seed
    // path is stable but infrequently called.
    let (city, state_) = {
        let after_in = req.operation.split(" in ").nth(1).unwrap_or("");
        let mut parts = after_in.splitn(2, ',');
        let city = parts.next().map(|s| s.trim().to_string()).filter(|s| !s.is_empty());
        let state = parts.next().map(|s| s.trim().chars().take_while(|c| c.is_ascii_alphabetic()).collect::<String>()).filter(|s| !s.is_empty());
        (city, state)
    };
    if city.is_none() || state_.is_none() {
        return Err((StatusCode::BAD_REQUEST,
            "operation must match 'fill: Role xN in City, ST' shape".into()));
    }

    // Stable id: hash of timestamp + operation. Callers get the id back
    // so they can reference it in citations.
    let ts = chrono::Utc::now().to_rfc3339();
    use sha2::{Digest, Sha256};
    let mut h = Sha256::new();
    h.update(ts.as_bytes());
    h.update(b"|");
    h.update(req.operation.as_bytes());
    let bytes = h.finalize();
    let pid = format!("pb-seed-{}", bytes.iter().take(8).map(|b| format!("{b:02x}")).collect::<String>());

    let new_entry = playbook_memory::PlaybookEntry {
        playbook_id: pid.clone(),
        operation: req.operation,
        approach: req.approach,
        context: req.context,
        timestamp: ts,
        endorsed_names: req.endorsed_names,
        city, state: state_,
        embedding: Some(emb),
        // Phase 25 — seed request may carry a fingerprint; if not, we
        // default to None and the entry degrades to "no expiry signal"
        // (never auto-retired on drift, but manual retirement still
        // works). valid_until + retired_at start None.
        schema_fingerprint: req.schema_fingerprint.clone(),
        valid_until: req.valid_until.clone(),
        // Phase 45 — seed request may also carry doc_refs; defaults
        // empty so pre-Phase-45 callers still work and the entry
        // degrades to "no drift signal" (never flagged).
        doc_refs: req.doc_refs.clone().unwrap_or_default(),
        ..Default::default()
    };

    // Phase 26 — when append=true (default), route through upsert so
    // same-day re-seeds of the same operation merge instead of
    // appending duplicates. When append=false, retain the old
    // replace-all semantics for callers that want a hard reset.
    if req.append {
        match state.playbook_memory.upsert_entry(new_entry).await {
            Ok(outcome) => {
                let entries_after = state.playbook_memory.entry_count().await;
                Ok(Json(serde_json::json!({
                    "outcome": outcome,
                    "entries_after": entries_after,
                })))
            }
            Err(e) => Err((StatusCode::INTERNAL_SERVER_ERROR, format!("upsert: {e}"))),
        }
    } else {
        if let Err(e) = state.playbook_memory.set_entries(vec![new_entry]).await {
            return Err((StatusCode::INTERNAL_SERVER_ERROR, format!("persist: {e}")));
        }
        Ok(Json(serde_json::json!({
            "outcome": { "mode": "replaced", "playbook_id": pid },
            "entries_after": state.playbook_memory.entry_count().await,
        })))
    }
}

async fn rebuild_playbook_memory(
    State(state): State<VectorState>,
) -> impl IntoResponse {
    match playbook_memory::rebuild(
        &state.playbook_memory,
        &state.ai_client,
        &state.catalog,
        &state.bucket_registry,
    ).await {
        Ok(report) => Ok(Json(report)),
        Err(e) => Err((StatusCode::INTERNAL_SERVER_ERROR, e)),
    }
}

// Path 2 foundation — dump in-memory playbook_memory state to a fresh
// `successful_playbooks_live` dataset. Cheap to call (writes one parquet,
// updates one manifest), so /log can call it after every seed to keep the
// SQL-queryable surface honest without the destructive REPLACE bug that
// /ingest/file has.
async fn persist_playbook_memory_sql(
    State(state): State<VectorState>,
) -> impl IntoResponse {
    match playbook_memory::persist_to_sql(&state.playbook_memory, &state.catalog).await {
        Ok(report) => Ok(Json(report)),
        Err(e) => Err((StatusCode::INTERNAL_SERVER_ERROR, e)),
    }
}

#[derive(Deserialize)]
struct PatternsRequest {
    query: String,
    #[serde(default = "default_pattern_k")]
    top_k_playbooks: usize,
    /// Minimum frequency (0.0-1.0) for a trait to make the report.
    /// Default 0.4 — at least 40% of examined workers must share it.
    #[serde(default = "default_pattern_min_freq")]
    min_trait_frequency: f32,
}
fn default_pattern_k() -> usize { 10 }
fn default_pattern_min_freq() -> f32 { 0.4 }

// Path 2 — meta-index discovery surface. "What did past similar fills
// have in common that I didn't ask about?" — surfaces signals like
// recurring certifications, skill clusters, archetype tendencies.
async fn discover_playbook_patterns(
    State(state): State<VectorState>,
    Json(req): Json<PatternsRequest>,
) -> impl IntoResponse {
    match playbook_memory::discover_patterns(
        &state.playbook_memory,
        &state.ai_client,
        &state.catalog,
        &state.bucket_registry,
        &req.query,
        req.top_k_playbooks,
        req.min_trait_frequency,
    ).await {
        Ok(report) => Ok(Json(report)),
        Err(e) => Err((StatusCode::INTERNAL_SERVER_ERROR, e)),
    }
}

#[derive(Deserialize)]
struct MarkFailedRequest {
    /// Operation text, same shape as seed: "fill: Role xN in City, ST"
    operation: String,
    /// Names of workers who didn't deliver on the fill.
    failed_names: Vec<String>,
    /// Short reason (no-show, fired, unreliable). Stored verbatim.
    #[serde(default)]
    reason: String,
}

async fn mark_playbook_failed(
    State(state): State<VectorState>,
    Json(req): Json<MarkFailedRequest>,
) -> impl IntoResponse {
    // Parse city + state from the operation — mirrors seed's parser.
    let after_in = req.operation.split(" in ").nth(1).unwrap_or("");
    let mut parts = after_in.splitn(2, ',');
    let city = parts.next().map(|s| s.trim().to_string()).filter(|s| !s.is_empty());
    let state_ = parts.next().map(|s|
        s.trim().chars().take_while(|c| c.is_ascii_alphabetic()).collect::<String>()
    ).filter(|s| !s.is_empty());
    let (Some(city), Some(state_code)) = (city, state_) else {
        return Err((StatusCode::BAD_REQUEST,
            "operation must match 'fill: Role xN in City, ST' shape".into()));
    };

    let ts = chrono::Utc::now().to_rfc3339();
    let records: Vec<playbook_memory::FailureRecord> = req.failed_names.iter()
        .map(|n| playbook_memory::FailureRecord {
            city: city.clone(), state: state_code.clone(), name: n.clone(),
            reason: req.reason.clone(), timestamp: ts.clone(),
        })
        .collect();

    match state.playbook_memory.mark_failures(records).await {
        Ok(added) => Ok(Json(serde_json::json!({ "added": added, "city": city, "state": state_code }))),
        Err(e) => Err((StatusCode::INTERNAL_SERVER_ERROR, e)),
    }
}

async fn playbook_memory_stats(
    State(state): State<VectorState>,
) -> impl IntoResponse {
    let entries = state.playbook_memory.snapshot().await;
    Json(serde_json::json!({
        "entries": entries.len(),
        "total_names_endorsed": entries.iter().map(|e| e.endorsed_names.len()).sum::<usize>(),
        "entries_with_embeddings": entries.iter().filter(|e| e.embedding.is_some()).count(),
        "sample": entries.iter().take(3).map(|e| serde_json::json!({
            "id": e.playbook_id,
            "operation": e.operation,
            "city": e.city,
            "state": e.state,
            "endorsed": e.endorsed_names,
        })).collect::<Vec<_>>(),
    }))
}

#[derive(Deserialize)]
struct RetirePlaybookRequest {
    /// Retire by playbook_id — exact match, single entry. Used for
    /// manual operator retirement via the UI.
    #[serde(default)]
    playbook_id: Option<String>,
    /// Retire by scope — city + state required, with a fingerprint
    /// that entries must match to survive. Fingerprint mismatch → retire.
    /// Use when a schema migration produces a new fingerprint and
    /// historical playbooks need to be auto-retired.
    #[serde(default)]
    city: Option<String>,
    #[serde(default)]
    state: Option<String>,
    #[serde(default)]
    current_schema_fingerprint: Option<String>,
    /// Human-readable reason stored on the retired entry.
    reason: String,
}

/// Phase 25 retirement endpoint. Two modes:
///   {playbook_id, reason}                    → retire one
///   {city, state, current_schema_fingerprint, reason} → retire all
///                                               entries in scope whose
///                                               fingerprint differs
async fn retire_playbook_memory(
    State(state): State<VectorState>,
    Json(req): Json<RetirePlaybookRequest>,
) -> impl IntoResponse {
    if let Some(id) = &req.playbook_id {
        return match state.playbook_memory.retire_one(id, &req.reason).await {
            Ok(found) => Ok(Json(serde_json::json!({ "mode": "by_id", "retired": if found { 1 } else { 0 } }))),
            Err(e) => Err((StatusCode::INTERNAL_SERVER_ERROR, e)),
        };
    }
    if let (Some(city), Some(state_code), Some(fp)) = (&req.city, &req.state, &req.current_schema_fingerprint) {
        return match state.playbook_memory.retire_on_schema_drift(city, state_code, fp, &req.reason).await {
            Ok(n) => Ok(Json(serde_json::json!({ "mode": "schema_drift", "retired": n, "city": city, "state": state_code }))),
            Err(e) => Err((StatusCode::INTERNAL_SERVER_ERROR, e)),
        };
    }
    Err((StatusCode::BAD_REQUEST,
        "supply either {playbook_id, reason} or {city, state, current_schema_fingerprint, reason}".into()))
}

/// Phase 45 slice 3 — POST /playbook_memory/doc_drift/check/{id}
///
/// Iterates the playbook's `doc_refs`, asks the context7 bridge whether
/// each one drifted against the recorded snippet_hash. If any tool
/// returned `drifted: true`, stamps `doc_drift_flagged_at` on the
/// entry — which excludes it from boost (via the filter in
/// `compute_boost_for_filtered_with_role`) until a human reviews and
/// resolves.
///
/// Unknown outcomes (bridge down, tool not in context7, no snippet
/// hash) are explicitly NOT enough to flag. Only a positive drifted=true
/// from the bridge flips the flag.
async fn check_doc_drift(
    State(state): State<VectorState>,
    axum::extract::Path(id): axum::extract::Path<String>,
) -> Result<Json<serde_json::Value>, (StatusCode, String)> {
    use crate::doc_drift::{check_all_refs, DriftCheckerConfig, DriftOutcome};

    let entry = state.playbook_memory.get_entry(&id).await
        .ok_or((StatusCode::NOT_FOUND, format!("playbook not found: {id}")))?;

    if entry.doc_refs.is_empty() {
        return Ok(Json(serde_json::json!({
            "playbook_id": id,
            "checked_tools": [],
            "drifted": false,
            "flagged": false,
            "reason": "entry has no doc_refs — nothing to check",
        })));
    }

    let cfg = DriftCheckerConfig::default();
    let results = check_all_refs(&cfg, &entry.doc_refs).await;

    let per_tool: Vec<serde_json::Value> = results.iter().map(|r| {
        let (drifted, current, src, reason) = match &r.outcome {
            DriftOutcome::Drifted { current_snippet_hash, source_url } =>
                (true, Some(current_snippet_hash.clone()), source_url.clone(), None),
            DriftOutcome::Unchanged =>
                (false, None, None, None),
            DriftOutcome::Unknown { reason } =>
                (false, None, None, Some(reason.clone())),
        };
        serde_json::json!({
            "tool": r.tool,
            "version_seen": r.version_seen,
            "drifted": drifted,
            "current_snippet_hash": current,
            "source_url": src,
            "unknown_reason": reason,
        })
    }).collect();

    let any_drifted = results.iter().any(|r| matches!(r.outcome, DriftOutcome::Drifted { .. }));

    let flagged = if any_drifted {
        state.playbook_memory.flag_doc_drift(&id).await
            .map_err(|e| (StatusCode::INTERNAL_SERVER_ERROR, format!("flag: {e}")))?
    } else {
        false
    };

    Ok(Json(serde_json::json!({
        "playbook_id": id,
        "checked_tools": results.iter().map(|r| &r.tool).collect::<Vec<_>>(),
        "drifted": any_drifted,
        "flagged": flagged,
        "per_tool": per_tool,
    })))
}

/// Phase 45 closure (2026-04-27) — POST /playbook_memory/doc_drift/scan
///
/// Iterates all active playbooks (non-retired, has doc_refs), runs
/// drift check against context7 for each, flags drifted entries via
/// PlaybookMemory::flag_doc_drift, and appends a row to
/// data/_kb/doc_drift_corrections.jsonl for each drift detected.
///
/// Returns aggregate stats so an operator can see at-a-glance how
/// many playbooks drifted and which tools moved.
///
/// Honors entries already flagged: they're counted in `already_flagged`
/// (no double-flag, no duplicate corrections.jsonl row).
async fn scan_doc_drift(
    State(state): State<VectorState>,
) -> Result<Json<serde_json::Value>, (StatusCode, String)> {
    use crate::doc_drift::{check_all_refs, DriftCheckerConfig, DriftOutcome};

    let entries = state.playbook_memory.snapshot().await;
    let now = chrono::Utc::now().to_rfc3339();
    let cfg = DriftCheckerConfig::default();

    let mut scanned = 0usize;
    let mut newly_flagged = 0usize;
    let mut already_flagged = 0usize;
    let mut skipped_no_refs = 0usize;
    let mut skipped_retired = 0usize;
    let mut tool_counts: std::collections::HashMap<String, usize> = Default::default();
    let mut corrections_rows: Vec<String> = vec![];

    for e in entries.iter() {
        if e.retired_at.is_some() { skipped_retired += 1; continue; }
        if e.doc_refs.is_empty() { skipped_no_refs += 1; continue; }
        if e.doc_drift_flagged_at.is_some() && e.doc_drift_reviewed_at.is_none() {
            already_flagged += 1;
            continue;
        }
        scanned += 1;
        let results = check_all_refs(&cfg, &e.doc_refs).await;
        let drifted_tools: Vec<&str> = results.iter()
            .filter(|r| matches!(r.outcome, DriftOutcome::Drifted { .. }))
            .map(|r| r.tool.as_str())
            .collect();
        if drifted_tools.is_empty() { continue; }

        // Flag the entry.
        let flagged = state.playbook_memory.flag_doc_drift(&e.playbook_id).await
            .unwrap_or(false);
        if flagged { newly_flagged += 1; }
        for t in &drifted_tools {
            *tool_counts.entry(t.to_string()).or_insert(0) += 1;
        }

        // Build corrections.jsonl row — one per drifted playbook with
        // the tool list inline. Downstream consumers (overview model,
        // operator dashboard) read this to decide reviews + revisions.
        let row = serde_json::json!({
            "playbook_id": e.playbook_id,
            "scanned_at": now,
            "drifted_tools": drifted_tools,
            "per_tool": results.iter().map(|r| {
                let (drifted, current, src) = match &r.outcome {
                    DriftOutcome::Drifted { current_snippet_hash, source_url } =>
                        (true, Some(current_snippet_hash.clone()), source_url.clone()),
                    _ => (false, None, None),
                };
                serde_json::json!({
                    "tool": r.tool, "version_seen": r.version_seen,
                    "drifted": drifted, "current_snippet_hash": current, "source_url": src,
                })
            }).collect::<Vec<_>>(),
            "recommended_action": "review-and-resolve",
        });
        corrections_rows.push(row.to_string());
    }

    // Persist corrections.jsonl row(s) for the operator/overview model.
    if !corrections_rows.is_empty() {
        let path = std::path::PathBuf::from("/home/profit/lakehouse/data/_kb/doc_drift_corrections.jsonl");
        if let Some(parent) = path.parent() {
            if let Err(e) = tokio::fs::create_dir_all(parent).await {
                tracing::warn!(target: "vectord.doc_drift", "create_dir_all {parent:?}: {e}");
            }
        }
        let body = corrections_rows.join("\n") + "\n";
        if let Err(e) = tokio::fs::OpenOptions::new()
            .create(true).append(true).open(&path).await
        {
            tracing::warn!(target: "vectord.doc_drift", "open {path:?}: {e}");
        } else {
            use tokio::io::AsyncWriteExt;
            match tokio::fs::OpenOptions::new().create(true).append(true).open(&path).await {
                Ok(mut f) => {
                    if let Err(e) = f.write_all(body.as_bytes()).await {
                        tracing::warn!(target: "vectord.doc_drift", "append {path:?}: {e}");
                    }
                }
                Err(e) => tracing::warn!(target: "vectord.doc_drift", "reopen {path:?}: {e}"),
            }
        }
    }

    Ok(Json(serde_json::json!({
        "scanned_at": now,
        "scanned": scanned,
        "newly_flagged": newly_flagged,
        "already_flagged": already_flagged,
        "skipped_retired": skipped_retired,
        "skipped_no_refs": skipped_no_refs,
        "drifted_by_tool": tool_counts,
        "corrections_written": corrections_rows.len(),
    })))
}

/// Phase 45 slice 3 — POST /playbook_memory/doc_drift/resolve/{id}
///
/// Human-in-the-loop re-admission. Stamps `doc_drift_reviewed_at`.
/// Idempotent: returns `resolved: false` if nothing changed (entry
/// wasn't flagged, already reviewed, or doesn't exist).
async fn resolve_doc_drift(
    State(state): State<VectorState>,
    axum::extract::Path(id): axum::extract::Path<String>,
) -> Result<Json<serde_json::Value>, (StatusCode, String)> {
    let resolved = state.playbook_memory.resolve_doc_drift(&id).await
        .map_err(|e| (StatusCode::INTERNAL_SERVER_ERROR, format!("resolve: {e}")))?;
    Ok(Json(serde_json::json!({
        "playbook_id": id,
        "resolved": resolved,
    })))
}

/// Phase 27 — request body for `POST /playbook_memory/revise`. Same
/// shape as a seed request minus `append` (revise is always
/// append-semantics for a specific parent) plus `parent_id`. The new
/// version's `playbook_id` is derived deterministically so callers get
/// the same id back from repeated revises with identical content —
/// useful for idempotent retry paths.
#[derive(Deserialize)]
struct RevisePlaybookRequest {
    parent_id: String,
    operation: String,
    approach: String,
    context: String,
    endorsed_names: Vec<String>,
    #[serde(default)]
    schema_fingerprint: Option<String>,
    #[serde(default)]
    valid_until: Option<String>,
    /// Phase 45 — updated doc references. Typically a revise happens
    /// BECAUSE docs drifted; pass the new versions seen so the revised
    /// entry starts with fresh drift signal.
    #[serde(default)]
    doc_refs: Option<Vec<playbook_memory::DocRef>>,
}

/// Phase 27 — create a new version of an existing playbook. The parent
/// is marked superseded; the new entry inherits the chain via
/// `parent_id` and carries `version = parent.version + 1`. Errors with
/// 400 on a retired or already-superseded parent (must revise the tip
/// of the chain). Embeds the new text through the same shape as
/// `/seed` so cosine similarity stays comparable across rebuild + seed
/// + revise entries.
async fn revise_playbook_memory(
    State(state): State<VectorState>,
    Json(req): Json<RevisePlaybookRequest>,
) -> Result<Json<serde_json::Value>, (StatusCode, String)> {
    let text = format!(
        "{} | {} | {} | fills: {}",
        req.operation, req.approach, req.context,
        req.endorsed_names.join(", "),
    );
    let resp = state.ai_client.embed(EmbedRequest { texts: vec![text], model: None })
        .await
        .map_err(|e| (StatusCode::BAD_GATEWAY, format!("embed revise: {e}")))?;
    if resp.embeddings.is_empty() {
        return Err((StatusCode::BAD_GATEWAY, "embed returned nothing".into()));
    }
    let emb: Vec<f32> = resp.embeddings[0].iter().map(|&x| x as f32).collect();

    let (city, state_) = {
        let after_in = req.operation.split(" in ").nth(1).unwrap_or("");
        let mut parts = after_in.splitn(2, ',');
        let city = parts.next().map(|s| s.trim().to_string()).filter(|s| !s.is_empty());
        let state = parts.next()
            .map(|s| s.trim().chars().take_while(|c| c.is_ascii_alphabetic()).collect::<String>())
            .filter(|s| !s.is_empty());
        (city, state)
    };
    if city.is_none() || state_.is_none() {
        return Err((StatusCode::BAD_REQUEST,
            "operation must match 'fill: Role xN in City, ST' shape".into()));
    }

    // Phase 27 — deterministic pid derived ONLY from content-shaped
    // inputs (parent_id + operation + approach + context + sorted
    // endorsed_names). Excluding wall-clock ts means two revise calls
    // with identical payloads produce the same pid, which is the
    // contract the docstring promises. On retry the caller gets back
    // the same id and the pre-flight idempotency check below short-
    // circuits to the existing entry instead of re-appending.
    let mut names_sorted = req.endorsed_names.clone();
    names_sorted.sort();
    use sha2::{Digest, Sha256};
    let mut h = Sha256::new();
    h.update(req.parent_id.as_bytes()); h.update(b"|");
    h.update(req.operation.as_bytes()); h.update(b"|");
    h.update(req.approach.as_bytes());  h.update(b"|");
    h.update(req.context.as_bytes());   h.update(b"|");
    h.update(names_sorted.join(",").as_bytes());
    let bytes = h.finalize();
    let pid = format!("pb-rev-{}", bytes.iter().take(8).map(|b| format!("{b:02x}")).collect::<String>());

    // Idempotency short-circuit — if this exact pid already exists in
    // memory (from a prior successful revise with the same content),
    // return it directly rather than re-appending or 400ing on the
    // superseded-parent rejection. Walks the parent's chain via
    // history() because the parent may itself have been superseded by
    // our prior successful call.
    let chain = state.playbook_memory.history(&req.parent_id).await;
    if let Some(existing) = chain.iter().find(|e| e.playbook_id == pid) {
        return Ok(Json(serde_json::json!({
            "outcome": {
                "parent_id": req.parent_id,
                "parent_version": existing.version.saturating_sub(1),
                "new_playbook_id": existing.playbook_id,
                "new_version": existing.version,
                "superseded_at": existing.superseded_at.clone().unwrap_or_default(),
                "idempotent_return": true,
            },
            "entries_after": state.playbook_memory.entry_count().await,
        })));
    }

    let ts = chrono::Utc::now().to_rfc3339();
    let new_entry = playbook_memory::PlaybookEntry {
        playbook_id: pid.clone(),
        operation: req.operation,
        approach: req.approach,
        context: req.context,
        timestamp: ts,
        endorsed_names: req.endorsed_names,
        city, state: state_,
        embedding: Some(emb),
        schema_fingerprint: req.schema_fingerprint,
        valid_until: req.valid_until,
        // Phase 45 — doc_refs may be provided on revise too.
        doc_refs: req.doc_refs.clone().unwrap_or_default(),
        // revise_entry overwrites version / parent_id / supersession
        // from the parent; other fields keep defaults.
        ..Default::default()
    };

    let outcome = state.playbook_memory.revise_entry(&req.parent_id, new_entry)
        .await
        .map_err(|e| (StatusCode::BAD_REQUEST, e))?;
    Ok(Json(serde_json::json!({
        "outcome": outcome,
        "entries_after": state.playbook_memory.entry_count().await,
    })))
}

/// Phase 27 — return the full version chain containing `playbook_id`,
/// ordered root → tip. 404 if the id isn't present. The walker is
/// cycle-safe by construction (visited set per direction).
async fn playbook_memory_history(
    State(state): State<VectorState>,
    Path(playbook_id): Path<String>,
) -> Result<Json<serde_json::Value>, (StatusCode, String)> {
    let chain = state.playbook_memory.history(&playbook_id).await;
    if chain.is_empty() {
        return Err((StatusCode::NOT_FOUND, format!("no playbook with id '{playbook_id}'")));
    }
    Ok(Json(serde_json::json!({
        "playbook_id": playbook_id,
        "versions": chain.len(),
        "chain": chain,
    })))
}

/// Phase 25 status endpoint — reports retirement counts so dashboards
/// can show "N playbooks retired (12 from 2026-05 schema migration)".
/// Phase 27 added `superseded` as a distinct counter.
async fn playbook_memory_status(
    State(state): State<VectorState>,
) -> impl IntoResponse {
    let (total, retired, superseded, failures) = state.playbook_memory.status_counts().await;
    // `active` = entries eligible for boost. Retired and superseded are
    // distinct exclusion reasons; subtract both. An entry can in principle
    // be both retired AND superseded (e.g. revised then retired) so
    // saturating_sub guards against underflow if that pathological case
    // ever lands.
    let inactive = retired + superseded;
    Json(serde_json::json!({
        "total": total,
        "retired": retired,
        "superseded": superseded,
        "active": total.saturating_sub(inactive),
        "failures": failures,
    }))
}

async fn lance_recall_harness(
    State(state): State<VectorState>,
    Path(index_name): Path<String>,
    Json(req): Json<LanceRecallRequest>,
) -> impl IntoResponse {
    let t0 = std::time::Instant::now();

    let harness_set = state.harness_store.load_for_index(&index_name, &req.harness).await
        .map_err(|e| (StatusCode::NOT_FOUND, format!("harness: {e}")))?;
    if !harness_set.ground_truth_built {
        return Err((StatusCode::BAD_REQUEST,
            "harness has no ground truth — run a regular /hnsw/trial first to compute it".into()));
    }

    let lance_store = state.lance.store_for(&index_name).await
        .map_err(|e| (StatusCode::BAD_REQUEST, e))?;

    let k = req.top_k;
    let mut per_query = Vec::with_capacity(harness_set.queries.len());
    let mut latencies: Vec<f32> = Vec::with_capacity(harness_set.queries.len());
    let mut recalls: Vec<f32> = Vec::with_capacity(harness_set.queries.len());

    for q in &harness_set.queries {
        let qv = match &q.query_embedding {
            Some(v) => v,
            None => continue,
        };
        let gt = match &q.ground_truth {
            Some(gt) => gt,
            None => continue,
        };

        let qt0 = std::time::Instant::now();
        let hits = lance_store.search(
            qv,
            k,
            Some(req.nprobes.unwrap_or(LANCE_DEFAULT_NPROBES)),
            Some(req.refine_factor.unwrap_or(LANCE_DEFAULT_REFINE_FACTOR)),
        ).await
            .map_err(|e| (StatusCode::INTERNAL_SERVER_ERROR, format!("search: {e}")))?;
        let lat_us = qt0.elapsed().as_micros() as f32;

        let predicted: Vec<String> = hits.iter().map(|h| h.doc_id.clone()).collect();
        let recall = harness::recall_at_k(&predicted, gt, k);

        per_query.push(LanceRecallQuery {
            query_id: q.id.clone(),
            recall,
            latency_us: lat_us,
            hits_returned: hits.len(),
        });
        latencies.push(lat_us);
        recalls.push(recall);
    }

    let mean_recall = if recalls.is_empty() { 0.0 } else {
        recalls.iter().sum::<f32>() / recalls.len() as f32
    };
    latencies.sort_unstable_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
    let p = |pct: f32| -> f32 {
        if latencies.is_empty() { return 0.0; }
        let idx = ((latencies.len() as f32 - 1.0) * pct).round() as usize;
        latencies[idx.min(latencies.len() - 1)]
    };

    Ok(Json(LanceRecallResult {
        index_name,
        harness: req.harness,
        queries: per_query.len(),
        top_k: k,
        mean_recall,
        per_query,
        latency_p50_us: p(0.50),
        latency_p95_us: p(0.95),
        total_duration_secs: t0.elapsed().as_secs_f32(),
    }))
}

/// Build a scalar btree index on a column (typically `doc_id`). Makes
/// filter-pushdown queries O(log N) instead of full-fragment scan.
async fn lance_build_scalar_index(
    State(state): State<VectorState>,
    Path((index_name, column)): Path<(String, String)>,
) -> impl IntoResponse {
    let lance_store = state.lance.store_for(&index_name).await
        .map_err(|e| (StatusCode::BAD_REQUEST, e))?;
    match lance_store.build_scalar_index(&column).await {
        Ok(stats) => Ok(Json(stats)),
        Err(e) => Err((StatusCode::INTERNAL_SERVER_ERROR, e)),
    }
}

// ─── Pathway memory handlers ──────────────────────────────────────────
//
// Thin wrappers around pathway_memory::PathwayMemory. HTTP surface is
// deliberately small — four endpoints cover the full lifecycle:
// insert at end-of-review, query before running the ladder,
// record_replay after a hot-swap, and stats for the VCP UI.

#[derive(Deserialize)]
struct PathwayQueryRequest {
    task_class: String,
    file_path: String,
    signal_class: Option<String>,
    query_vec: Vec<f32>,
}

async fn pathway_insert(
    State(state): State<VectorState>,
    Json(trace): Json<pathway_memory::PathwayTrace>,
) -> impl IntoResponse {
    match state.pathway_memory.insert(trace).await {
        Ok(()) => Ok(Json(json!({"ok": true}))),
        Err(e) => Err((StatusCode::INTERNAL_SERVER_ERROR, e)),
    }
}

async fn pathway_query(
    State(state): State<VectorState>,
    Json(req): Json<PathwayQueryRequest>,
) -> impl IntoResponse {
    let cand = state
        .pathway_memory
        .query_hot_swap(
            &req.task_class,
            &req.file_path,
            req.signal_class.as_deref(),
            &req.query_vec,
        )
        .await;
    // 200 with null candidate means "no hot-swap"; this is a normal
    // path, not an error — callers should proceed with the full ladder.
    Json(json!({ "candidate": cand }))
}

#[derive(Deserialize)]
struct PathwayReplayRequest {
    pathway_id: String,
    succeeded: bool,
}

async fn pathway_record_replay(
    State(state): State<VectorState>,
    Json(req): Json<PathwayReplayRequest>,
) -> impl IntoResponse {
    match state
        .pathway_memory
        .record_replay_outcome(&req.pathway_id, req.succeeded)
        .await
    {
        Ok(()) => Ok(Json(json!({"ok": true}))),
        Err(e) => Err((StatusCode::NOT_FOUND, e)),
    }
}

async fn pathway_stats(State(state): State<VectorState>) -> impl IntoResponse {
    Json(state.pathway_memory.stats().await)
}

#[derive(Deserialize)]
struct PathwayBugFingerprintsRequest {
    task_class: String,
    file_path: String,
    signal_class: Option<String>,
    limit: Option<usize>,
}

async fn pathway_bug_fingerprints(
    State(state): State<VectorState>,
    Json(req): Json<PathwayBugFingerprintsRequest>,
) -> impl IntoResponse {
    let fps = state
        .pathway_memory
        .bug_fingerprints_for(
            &req.task_class,
            &req.file_path,
            req.signal_class.as_deref(),
            req.limit.unwrap_or(5),
        )
        .await;
    Json(json!({ "fingerprints": fps }))
}

// ─── Mem0 ops endpoints (J 2026-04-25) ───

async fn pathway_upsert(
    State(state): State<VectorState>,
    Json(trace): Json<pathway_memory::PathwayTrace>,
) -> impl IntoResponse {
    match state.pathway_memory.upsert(trace).await {
        Ok(outcome) => Ok(Json(json!({"ok": true, "outcome": outcome}))),
        Err(e) => Err((StatusCode::INTERNAL_SERVER_ERROR, e)),
    }
}

#[derive(Deserialize)]
struct PathwayRetireRequest {
    trace_uid: String,
    reason: String,
}

async fn pathway_retire(
    State(state): State<VectorState>,
    Json(req): Json<PathwayRetireRequest>,
) -> impl IntoResponse {
    match state.pathway_memory.retire(&req.trace_uid, &req.reason).await {
        Ok(touched) => Ok(Json(json!({"ok": true, "retired": touched}))),
        Err(e) => Err((StatusCode::INTERNAL_SERVER_ERROR, e)),
    }
}

#[derive(Deserialize)]
struct PathwayReviseRequest {
    parent_trace_uid: String,
    new_trace: pathway_memory::PathwayTrace,
}

async fn pathway_revise(
    State(state): State<VectorState>,
    Json(req): Json<PathwayReviseRequest>,
) -> impl IntoResponse {
    match state.pathway_memory.revise(&req.parent_trace_uid, req.new_trace).await {
        Ok(outcome) => Ok(Json(json!({"ok": true, "outcome": outcome}))),
        Err(e) => Err((StatusCode::INTERNAL_SERVER_ERROR, e)),
    }
}

async fn pathway_history(
    State(state): State<VectorState>,
    axum::extract::Path(trace_uid): axum::extract::Path<String>,
) -> impl IntoResponse {
    let chain = state.pathway_memory.history(&trace_uid).await;
    Json(json!({"trace_uid": trace_uid, "chain_len": chain.len(), "chain": chain}))
}

#[cfg(test)]
mod extractor_tests {
    use super::*;

    #[test]
    fn extract_target_geo_basic() {
        let f = "role = 'Welder' AND city = 'Toledo' AND state = 'OH' AND CAST(availability AS DOUBLE) > 0.5";
        assert_eq!(extract_target_geo(f), Some(("Toledo".into(), "OH".into())));
    }

    #[test]
    fn extract_target_geo_missing_state_returns_none() {
        let f = "role = 'Welder' AND city = 'Toledo'";
        assert_eq!(extract_target_geo(f), None);
    }

    #[test]
    fn extract_target_geo_word_boundary() {
        // "civilian" contains "city" as a substring — must not match.
        let f = "civilian_rank = 1 AND city = 'Toledo' AND state = 'OH'";
        assert_eq!(extract_target_geo(f), Some(("Toledo".into(), "OH".into())));
    }

    #[test]
    fn extract_target_role_basic() {
        let f = "role = 'Welder' AND city = 'Toledo'";
        assert_eq!(extract_target_role(f), Some("Welder".into()));
    }

    #[test]
    fn extract_target_role_none_when_absent() {
        let f = "city = 'Toledo' AND state = 'OH'";
        assert_eq!(extract_target_role(f), None);
    }

    #[test]
    fn extract_target_role_multi_word() {
        let f = "role = 'Warehouse Associate' AND city = 'Chicago'";
        assert_eq!(extract_target_role(f), Some("Warehouse Associate".into()));
    }
}