lakehouse/crates/vectord/src/agent.rs

//! Phase 16.2 + 16.5 — The autotune agent.
//!
//! A long-running tokio task that watches the trial journal and
//! autonomously proposes + runs new HNSW configs. Distinct from
//! `autotune::run_autotune` which is synchronous (one HTTP call, grid
//! of trials, done). The agent is the continuous version: it sleeps,
//! wakes on triggers, proposes configs based on prior trial history,
//! runs them one at a time, and auto-promotes when it finds an
//! improvement.
//!
//! Design invariants:
//! - Trials are data (ADR-018). Every proposal reads the journal; every
//!   attempt appends to it. The journal is the agent's memory.
//! - One trial at a time. Bounded Ollama load — the agent never fires
//!   multiple parallel embeddings and respects `cooldown_between_trials_secs`.
//! - Rate-limited. `max_trials_per_hour` is a hard ceiling so a
//!   misbehaving proposal function can't saturate the system.
//! - Never promotes below `min_recall`. Same safety gate as
//!   `run_autotune` — we will not make the index worse.
//! - Triggered OR periodic. Ingest enqueues a `DatasetAppended` event
//!   when a new batch lands; the agent also wakes periodically to keep
//!   exploring even when nothing changed externally.
//! - Graceful shutdown via the `stop_tx` signal — the handle's Drop
//!   doesn't force-kill, but `stop()` requests a clean exit after the
//!   current trial.

use chrono::{DateTime, Utc};
use object_store::ObjectStore;
use serde::{Deserialize, Serialize};
use std::collections::VecDeque;
use std::sync::Arc;
use tokio::sync::{Mutex, RwLock, mpsc, oneshot};

use aibridge::client::AiClient;
use catalogd::registry::Registry as CatalogRegistry;

use crate::embedding_cache::EmbeddingCache;
use crate::harness;
use crate::hnsw::HnswStore;
use crate::index_registry::IndexRegistry;
use crate::promotion::{PromotionEntry, PromotionRegistry};
use crate::trial::{HnswConfig, Trial, TrialJournal, TrialMetrics};

// -------- Public-facing types --------

/// Runtime configuration for the agent. Mirrored in shared::config under
/// `[agent]`. Defaults are conservative — designed to tune slowly in the
/// background without fighting real workloads for GPU time.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct AgentConfig {
    /// Master switch. When false, `spawn` returns a handle but the loop
    /// doesn't run.
    pub enabled: bool,
    /// Periodic wake-up — even if the trigger queue is empty, every N
    /// seconds the agent picks an index with trials and proposes one
    /// more config. Keeps exploration alive on idle indexes.
    pub cycle_interval_secs: u64,
    /// Minimum gap between two trials on the SAME index. Prevents the
    /// agent from hammering Ollama when a hot index has many pending
    /// triggers in a row.
    pub cooldown_between_trials_secs: u64,
    /// Below this recall, a proposal is never promoted — even if it
    /// beats the champion on latency.
    pub min_recall: f32,
    /// Budget cap: hard ceiling on trials per hour across all indexes.
    /// When hit, the agent idles until the hour window rolls.
    pub max_trials_per_hour: u32,
}

impl Default for AgentConfig {
    fn default() -> Self {
        Self {
            enabled: false,            // opt-in — don't auto-start until J turns it on
            cycle_interval_secs: 60,
            cooldown_between_trials_secs: 30,
            min_recall: 0.9,
            max_trials_per_hour: 30,
        }
    }
}

/// What caused the agent to look at a particular index. Recorded in the
/// trial's note field so we can tell "new data arrived" trials from
/// "periodic exploration" trials in the journal.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub enum TriggerReason {
    /// Ingest just appended to a dataset that has attached HNSW indexes.
    DatasetAppended { dataset: String },
    /// A human or another agent hit `/agent/enqueue/{index}`.
    Manual,
    /// Periodic wake — no external event, just keep exploring.
    Periodic,
}

/// One unit of work for the agent.
#[derive(Debug, Clone)]
pub struct TriggerEvent {
    pub index_name: String,
    pub reason: TriggerReason,
    pub enqueued_at: DateTime<Utc>,
}

impl TriggerEvent {
    pub fn manual(index_name: impl Into<String>) -> Self {
        Self { index_name: index_name.into(), reason: TriggerReason::Manual, enqueued_at: Utc::now() }
    }
    pub fn dataset_appended(index_name: impl Into<String>, dataset: impl Into<String>) -> Self {
        Self {
            index_name: index_name.into(),
            reason: TriggerReason::DatasetAppended { dataset: dataset.into() },
            enqueued_at: Utc::now(),
        }
    }
    pub fn periodic(index_name: impl Into<String>) -> Self {
        Self { index_name: index_name.into(), reason: TriggerReason::Periodic, enqueued_at: Utc::now() }
    }
}

/// Observable snapshot of the agent's state — what `/agent/status` returns.
#[derive(Debug, Clone, Serialize)]
pub struct AgentStatus {
    pub running: bool,
    pub config: AgentConfig,
    pub queue_depth: usize,
    pub trials_run: u64,
    pub promotions: u64,
    pub trials_in_last_hour: u32,
    pub last_event: Option<AgentEvent>,
    pub started_at: Option<DateTime<Utc>>,
}

/// Last thing that happened — useful for "why didn't it do anything?" debugging.
#[derive(Debug, Clone, Serialize)]
pub struct AgentEvent {
    pub at: DateTime<Utc>,
    pub kind: String,      // "trial_completed" | "promoted" | "skipped_rate_limit" | etc
    pub index_name: Option<String>,
    pub detail: String,
}

/// Handle returned by `spawn`. Holds the trigger sender + shared status +
/// stop signal.
#[derive(Clone)]
pub struct AgentHandle {
    trigger_tx: mpsc::Sender<TriggerEvent>,
    inner: Arc<AgentInner>,
}

struct AgentInner {
    status: RwLock<AgentStatus>,
    stop_tx: Mutex<Option<oneshot::Sender<()>>>,
    queue_len: Mutex<usize>,     // mirror of the channel capacity — for status reporting
    recent_trials: Mutex<VecDeque<DateTime<Utc>>>,  // ring of recent trial timestamps for rate limit
}

impl AgentHandle {
    /// Enqueue a trigger. Returns Err if the agent isn't running or the
    /// queue is full (backpressure — dropping events is correct here
    /// since periodic exploration will pick up the slack).
    pub async fn enqueue(&self, event: TriggerEvent) -> Result<(), String> {
        self.trigger_tx.try_send(event).map_err(|e| format!("enqueue: {e}"))?;
        let mut guard = self.inner.queue_len.lock().await;
        *guard = guard.saturating_add(1);
        // Update queue_depth in status for observability.
        let mut s = self.inner.status.write().await;
        s.queue_depth = *guard;
        Ok(())
    }

    pub async fn status(&self) -> AgentStatus {
        let mut s = self.inner.status.read().await.clone();
        // Refresh rate-limit window from ring buffer.
        let cutoff = Utc::now() - chrono::Duration::hours(1);
        let ring = self.inner.recent_trials.lock().await;
        s.trials_in_last_hour = ring.iter().filter(|t| **t >= cutoff).count() as u32;
        s
    }

    /// Request a graceful stop. Returns immediately — the loop exits
    /// after its current trial completes.
    pub async fn stop(&self) -> bool {
        let mut guard = self.inner.stop_tx.lock().await;
        if let Some(tx) = guard.take() {
            let _ = tx.send(());
            true
        } else {
            false
        }
    }
}

// -------- Agent state holder --------

/// Everything the agent needs to run a trial. Mirrors the fields of
/// `VectorState` the agent actually uses. Kept separate so the service
/// layer builds it explicitly — no clone of unneeded state.
#[derive(Clone)]
pub struct AgentDeps {
    pub store: Arc<dyn ObjectStore>,
    pub ai_client: AiClient,
    pub catalog: CatalogRegistry,
    pub index_registry: IndexRegistry,
    pub hnsw_store: HnswStore,
    pub embedding_cache: EmbeddingCache,
    pub trial_journal: TrialJournal,
    pub promotion_registry: PromotionRegistry,
    pub harness_store: crate::harness::HarnessStore,
}

// -------- Spawn --------

/// Start the agent loop in a background tokio task. Returns a handle
/// the caller uses to enqueue events and read status.
pub fn spawn(config: AgentConfig, deps: AgentDeps) -> AgentHandle {
    let (trigger_tx, trigger_rx) = mpsc::channel::<TriggerEvent>(256);
    let (stop_tx, stop_rx) = oneshot::channel::<()>();

    let status = AgentStatus {
        running: config.enabled,
        config: config.clone(),
        queue_depth: 0,
        trials_run: 0,
        promotions: 0,
        trials_in_last_hour: 0,
        last_event: None,
        started_at: if config.enabled { Some(Utc::now()) } else { None },
    };

    let inner = Arc::new(AgentInner {
        status: RwLock::new(status),
        stop_tx: Mutex::new(Some(stop_tx)),
        queue_len: Mutex::new(0),
        recent_trials: Mutex::new(VecDeque::with_capacity(64)),
    });

    if config.enabled {
        tracing::info!(
            "autotune agent started (cycle={}s, cooldown={}s, cap={}/hr, min_recall={})",
            config.cycle_interval_secs, config.cooldown_between_trials_secs,
            config.max_trials_per_hour, config.min_recall,
        );
        let loop_inner = inner.clone();
        let loop_deps = deps.clone();
        let loop_config = config.clone();
        tokio::spawn(async move {
            run_loop(loop_config, loop_deps, trigger_rx, stop_rx, loop_inner).await;
        });
    } else {
        // Agent disabled — still drain the channel so sends don't back up.
        tokio::spawn(async move {
            let mut rx = trigger_rx;
            while rx.recv().await.is_some() {}
        });
        tracing::info!("autotune agent configured but disabled (set [agent].enabled=true)");
    }

    AgentHandle { trigger_tx, inner }
}

// -------- Main loop --------

async fn run_loop(
    config: AgentConfig,
    deps: AgentDeps,
    mut trigger_rx: mpsc::Receiver<TriggerEvent>,
    mut stop_rx: oneshot::Receiver<()>,
    inner: Arc<AgentInner>,
) {
    let mut periodic = tokio::time::interval(std::time::Duration::from_secs(config.cycle_interval_secs));
    // First tick fires immediately — skip it so we don't double-fire on startup.
    periodic.tick().await;

    loop {
        let event = tokio::select! {
            _ = &mut stop_rx => {
                tracing::info!("autotune agent: stop signal received");
                let mut s = inner.status.write().await;
                s.running = false;
                return;
            }
            maybe = trigger_rx.recv() => match maybe {
                Some(ev) => {
                    let mut guard = inner.queue_len.lock().await;
                    *guard = guard.saturating_sub(1);
                    let mut s = inner.status.write().await;
                    s.queue_depth = *guard;
                    ev
                }
                None => {
                    tracing::info!("autotune agent: trigger channel closed");
                    return;
                }
            },
            _ = periodic.tick() => {
                // Periodic wake — pick an index with existing trials.
                // If nothing's been tuned yet there's nothing to propose.
                match pick_periodic_target(&deps).await {
                    Some(idx) => TriggerEvent::periodic(idx),
                    None => continue,
                }
            }
        };

        // Rate limit: count recent trials, skip if over budget.
        if over_rate_limit(&inner, config.max_trials_per_hour).await {
            record_event(&inner, "skipped_rate_limit", Some(&event.index_name),
                format!("hit cap of {}/hour", config.max_trials_per_hour)).await;
            continue;
        }

        // Per-index cooldown.
        if cooling_down(&inner, &event.index_name, config.cooldown_between_trials_secs).await {
            record_event(&inner, "skipped_cooldown", Some(&event.index_name),
                format!("last trial too recent (<{}s)", config.cooldown_between_trials_secs)).await;
            continue;
        }

        // Run one trial.
        match run_one_cycle(&event, &deps, config.min_recall).await {
            Ok(outcome) => {
                mark_recent_trial(&inner).await;
                {
                    let mut s = inner.status.write().await;
                    s.trials_run += 1;
                    if outcome.promoted { s.promotions += 1; }
                }
                record_event(&inner, if outcome.promoted { "promoted" } else { "trial_completed" },
                    Some(&event.index_name),
                    format!("config=ec{}/es{} recall={:.3} p50={:.0}us {}",
                        outcome.trial.config.ef_construction,
                        outcome.trial.config.ef_search,
                        outcome.trial.metrics.recall_at_k,
                        outcome.trial.metrics.search_latency_p50_us,
                        if outcome.promoted { "★ PROMOTED" } else { "" })).await;
            }
            Err(e) => {
                record_event(&inner, "trial_error", Some(&event.index_name), e).await;
            }
        }
    }
}

/// Result of one cycle — ran a trial, maybe promoted it.
struct CycleOutcome {
    trial: Trial,
    promoted: bool,
}

/// Core cycle: propose → build → bench → record → maybe promote.
async fn run_one_cycle(
    event: &TriggerEvent,
    deps: &AgentDeps,
    min_recall: f32,
) -> Result<CycleOutcome, String> {
    // Read history.
    let history = deps.trial_journal.list(&event.index_name).await
        .map_err(|e| format!("read journal: {e}"))?;

    // Bootstrap mode: first ever visit to this index. Auto-generate a
    // harness (if missing) and seed with the default config — the Phase 15
    // known-good ec=80/es=30. Subsequent visits go through the proposer.
    let is_bootstrap = history.is_empty();

    // Current champion (if any) is the promoted config.
    let champion = deps.promotion_registry.get_current(&event.index_name).await;
    let champion_trial = champion.as_ref().and_then(|p| {
        history.iter().find(|t| t.id == p.trial_id).cloned()
    });

    let (next_config, harness_name) = if is_bootstrap {
        let name = ensure_auto_harness(&event.index_name, deps).await?;
        (HnswConfig::default(), name)
    } else {
        let Some(cfg) = propose_next_config(&history, champion_trial.as_ref()) else {
            return Err("proposer returned None — search space exhausted".into());
        };
        // Validate bounds defensively.
        if !(10..=400).contains(&cfg.ef_construction) {
            return Err(format!("proposed ef_construction={} out of bounds", cfg.ef_construction));
        }
        if !(10..=200).contains(&cfg.ef_search) {
            return Err(format!("proposed ef_search={} out of bounds", cfg.ef_search));
        }
        // A future refinement: remember per-index "canonical harness" on
        // the index metadata. For now: latest wins.
        let hname = history.last().unwrap().eval_set.clone();
        (cfg, hname)
    };
    let mut harness_set = deps.harness_store.load_for_index(&event.index_name, &harness_name).await
        .map_err(|e| format!("load harness '{harness_name}': {e}"))?;

    let embeddings = deps.embedding_cache.get_or_load(&event.index_name).await
        .map_err(|e| format!("embeddings: {e}"))?;

    if !harness_set.ground_truth_built {
        harness::compute_ground_truth(&mut harness_set, &embeddings, &deps.ai_client).await
            .map_err(|e| format!("ground truth: {e}"))?;
        deps.harness_store.save(&harness_set).await.ok();
    }

    // Build + bench.
    let trial_id = Trial::new_id();
    let slot = format!("{}__{}", event.index_name, trial_id);
    let build = deps.hnsw_store
        .build_index_with_config(&slot, (*embeddings).clone(), &next_config)
        .await?;

    let query_vectors: Vec<Vec<f32>> = harness_set.queries
        .iter().filter_map(|q| q.query_embedding.clone()).collect();
    let bench = deps.hnsw_store.bench_search(&slot, &query_vectors, harness_set.k).await?;

    let mut recalls = Vec::with_capacity(harness_set.queries.len());
    for (q, hits) in harness_set.queries.iter().zip(bench.retrieved.iter()) {
        if let Some(gt) = &q.ground_truth {
            recalls.push(harness::recall_at_k(hits, gt, harness_set.k));
        }
    }
    let mean_recall = if recalls.is_empty() { 0.0 } else {
        recalls.iter().sum::<f32>() / recalls.len() as f32
    };

    let mut lats = bench.latencies_us.clone();
    lats.sort_unstable_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
    let p = |pct: f32| -> f32 {
        if lats.is_empty() { return 0.0; }
        let idx = ((lats.len() as f32 - 1.0) * pct).round() as usize;
        lats[idx.min(lats.len() - 1)]
    };

    let dims = embeddings.first().map(|e| e.vector.len()).unwrap_or(0);
    let memory_bytes = (embeddings.len() * dims * std::mem::size_of::<f32>() + embeddings.len() * 128) as u64;

    let note = {
        let base = match &event.reason {
            TriggerReason::DatasetAppended { dataset } => format!("agent: dataset_appended({dataset})"),
            TriggerReason::Manual => "agent: manual".to_string(),
            TriggerReason::Periodic => "agent: periodic".to_string(),
        };
        if is_bootstrap { format!("{base} bootstrap") } else { base }
    };

    let trial = Trial {
        id: trial_id,
        index_name: event.index_name.clone(),
        eval_set: harness_set.name.clone(),
        config: next_config.clone(),
        metrics: TrialMetrics {
            build_time_secs: build.build_time_secs,
            search_latency_p50_us: p(0.50),
            search_latency_p95_us: p(0.95),
            search_latency_p99_us: p(0.99),
            recall_at_k: mean_recall,
            memory_bytes,
            vectors: build.vectors,
            eval_queries: harness_set.queries.len(),
            brute_force_latency_us: 0.0,
        },
        created_at: Utc::now(),
        note: Some(note),
    };
    deps.trial_journal.append(&trial).await.ok();
    deps.hnsw_store.drop(&slot).await;

    // Promotion decision: the new trial must meet recall gate AND beat
    // the current champion (higher recall OR same recall + lower p50).
    let promoted = if trial.metrics.recall_at_k < min_recall {
        false
    } else {
        let beats = match &champion_trial {
            None => true,  // no champion yet — anything passing the gate wins
            Some(c) => beats_champion(&trial, c),
        };
        if beats {
            let entry = PromotionEntry {
                config: trial.config.clone(),
                trial_id: trial.id.clone(),
                promoted_at: Utc::now(),
                promoted_by: "agent".to_string(),
                note: Some(format!(
                    "auto-promote: recall={:.3} p50={:.0}us (was {:.3}/{:.0}us)",
                    trial.metrics.recall_at_k, trial.metrics.search_latency_p50_us,
                    champion_trial.as_ref().map(|t| t.metrics.recall_at_k).unwrap_or(0.0),
                    champion_trial.as_ref().map(|t| t.metrics.search_latency_p50_us).unwrap_or(0.0),
                )),
            };
            deps.promotion_registry.promote(&event.index_name, entry).await.is_ok()
        } else {
            false
        }
    };

    Ok(CycleOutcome { trial, promoted })
}

/// Champion-beat test: strictly higher recall, OR equal recall with
/// lower p50. Same rule as autotune::pick_winner — kept consistent so the
/// agent and the synchronous autotune agree on what "better" means.
fn beats_champion(candidate: &Trial, champion: &Trial) -> bool {
    if candidate.metrics.recall_at_k > champion.metrics.recall_at_k {
        return true;
    }
    if (candidate.metrics.recall_at_k - champion.metrics.recall_at_k).abs() < 1e-4
        && candidate.metrics.search_latency_p50_us < champion.metrics.search_latency_p50_us {
        return true;
    }
    false
}

/// Propose the next HnswConfig given trial history and the current
/// champion.
///
/// ============================================================
/// J: THIS IS YOURS TO IMPLEMENT
/// ============================================================
///
/// Inputs:
/// - `history`: every trial ever run on this index, oldest first
/// - `champion`: the currently-promoted trial, if any
///
/// Output:
/// - `Some(HnswConfig)` with the config to try next
/// - `None` if you decide there's nothing worth trying (agent logs
///   "search space exhausted" and moves on)
///
/// Hard bounds the caller enforces: ef_construction ∈ [10, 400],
/// ef_search ∈ [10, 200]. Stay inside those — configs outside get
/// rejected and count as a wasted cycle.
///
/// Design options (pick one, or mix):
///
/// 1. LOCAL REFINEMENT (exploit-heavy):
///    Sample near champion ± small delta. Converges fast, risks local
///    minima. Good for "we know roughly where the sweet spot is."
///
/// 2. ε-GREEDY (mixed):
///    With prob ε, random sample from full bounds (explore). Otherwise
///    refinement around champion (exploit). ε=0.2 is a reasonable start.
///    Good for long-running tune with no prior knowledge.
///
/// 3. COARSE→FINE (annealed):
///    First N trials: wide random. Then shrink the neighborhood around
///    champion as more trials accumulate. Mimics simulated annealing.
///
/// 4. DEDUP-AWARE:
///    Whatever strategy, skip configs already in history. Prevents the
///    agent from re-running the same (ec, es) pair twice.
///
/// A simple starter implementation is provided below (local refinement
/// + dedup). Replace with your preferred strategy.
pub fn propose_next_config(history: &[Trial], champion: Option<&Trial>) -> Option<HnswConfig> {
    // ε-greedy around the champion, dedup-aware.
    //
    // - With probability ε (≈0.25), sample a random config from the full
    //   bounds. Keeps exploration alive so we don't get stuck hill-climbing
    //   one axis.
    // - Otherwise: perturb the champion symmetrically on BOTH axes (not
    //   just +20 / +40 like the starter did). Prefers small moves first
    //   so recall stays near the current level.
    // - Always skip configs already in history — no point re-running.
    // - Deterministic per-history: RNG is seeded from history length so
    //   the same journal state always proposes the same next config.
    //   Makes tests + offline replay reproducible.

    let base = champion
        .map(|t| t.config.clone())
        .or_else(|| history.last().map(|t| t.config.clone()))
        .unwrap_or_default();

    let tried = |ec: usize, es: usize| -> bool {
        history.iter().any(|t|
            t.config.ef_construction == ec && t.config.ef_search == es
        )
    };
    let clamp = |ec: i32, es: i32| -> (usize, usize) {
        (ec.clamp(10, 400) as usize, es.clamp(10, 200) as usize)
    };

    // Tiny xorshift — no rand crate dep. Seeded from history length so the
    // proposer is deterministic for a given journal state.
    let mut rng = (history.len() as u64).wrapping_mul(0x9E37_79B9_7F4A_7C15) | 1;
    let mut next_u = || {
        rng ^= rng << 13; rng ^= rng >> 7; rng ^= rng << 17;
        rng
    };

    for _attempt in 0..32 {
        let explore = (next_u() % 100) < 25;  // ε = 0.25
        let (ec, es) = if explore {
            clamp(10 + (next_u() % 391) as i32, 10 + (next_u() % 191) as i32)
        } else {
            // Symmetric perturbation — signed steps on both axes.
            let dec = [-40, -20, -10, 10, 20, 40][(next_u() % 6) as usize];
            let des = [-20, -10, -5, 5, 10, 20][(next_u() % 6) as usize];
            clamp(base.ef_construction as i32 + dec, base.ef_search as i32 + des)
        };
        if !tried(ec, es) {
            return Some(HnswConfig { ef_construction: ec, ef_search: es, seed: Some(42) });
        }
    }
    None  // 32 attempts all landed on duplicates — likely saturated
}

// -------- Helpers --------

/// Minimum vectors for an index to be worth auto-tuning. Below this,
/// HNSW's win over brute-force is too small to matter and trial budget
/// is better spent on bigger indexes.
const AUTOTUNE_MIN_VECTORS: usize = 1_000;

/// Find an index to poke on a periodic wake. Strategy: least-recently-tuned
/// wins — pick the index (from `IndexRegistry`, not just promoted ones)
/// whose most-recent trial is oldest, so trial budget spreads across the
/// whole portfolio. Never-trialed indexes sort first (None < Some), which
/// is exactly what we want: bootstrap them on their first visit.
///
/// Why not "most recently promoted" (the original strategy): a converged
/// index like `threat_intel_v1` sits at recall=1.0 on a minimal graph and
/// can't be improved — but it was always the freshest promotion, so the
/// agent burned every trial on it while 40+ other indexes got zero
/// attention. Rotating by last-trial-time fixes that without any explicit
/// convergence detection.
///
/// Why IndexRegistry instead of PromotionRegistry: on a fresh system only
/// a handful of indexes have ever been promoted, so promotion-based
/// picking starves new ones. IndexRegistry lists every live index the
/// platform knows about; filtering by `AUTOTUNE_MIN_VECTORS` keeps the
/// proposer off indexes where the graph config doesn't matter.
///
/// Cost: N extra journal reads per periodic tick, where N = eligible
/// indexes. Fine at dozens; if the portfolio grows into thousands, cache
/// `last_trial_at` on the IndexMeta.
async fn pick_periodic_target(deps: &AgentDeps) -> Option<String> {
    let candidates: Vec<String> = deps
        .index_registry
        .list(None, None)
        .await
        .into_iter()
        .filter(|m| m.chunk_count >= AUTOTUNE_MIN_VECTORS)
        .map(|m| m.index_name)
        .collect();
    if candidates.is_empty() { return None; }

    let mut scored: Vec<(String, Option<DateTime<Utc>>)> = Vec::with_capacity(candidates.len());
    for name in candidates {
        let last_trial_at = deps
            .trial_journal
            .list(&name)
            .await
            .ok()
            .and_then(|trials| trials.into_iter().map(|t| t.created_at).max());
        scored.push((name, last_trial_at));
    }

    scored.into_iter().min_by_key(|(_, last)| *last).map(|(n, _)| n)
}

/// On the first visit to an index, load or synthesize an eval harness so
/// bootstrap trials have something to measure recall against. Returns the
/// harness name. Synthetic harnesses sample 20 chunks and use their first
/// 200 chars as self-queries — the ground truth is computed by brute-force
/// cosine, so recall numbers are real (a config that finds the chunk in
/// top-k counts as a hit).
async fn ensure_auto_harness(index_name: &str, deps: &AgentDeps) -> Result<String, String> {
    let name = format!("{index_name}_auto");

    if deps.harness_store.load_for_index(index_name, &name).await.is_ok() {
        return Ok(name);
    }

    let embeddings = deps
        .embedding_cache
        .get_or_load(index_name)
        .await
        .map_err(|e| format!("load embeddings for auto-harness: {e}"))?;
    if embeddings.is_empty() {
        return Err(format!("index '{index_name}' has no embeddings — cannot bootstrap"));
    }

    let mut eval = harness::synthetic_from_chunks(&name, index_name, &embeddings, 20, 10);
    harness::compute_ground_truth(&mut eval, &embeddings, &deps.ai_client)
        .await
        .map_err(|e| format!("auto-harness ground truth: {e}"))?;
    deps.harness_store.save(&eval).await
        .map_err(|e| format!("save auto-harness: {e}"))?;

    tracing::info!(
        "agent: bootstrapped harness '{}' for index '{}' (20 self-queries, k=10)",
        name, index_name
    );
    Ok(name)
}

async fn over_rate_limit(inner: &Arc<AgentInner>, cap: u32) -> bool {
    let cutoff = Utc::now() - chrono::Duration::hours(1);
    let ring = inner.recent_trials.lock().await;
    ring.iter().filter(|t| **t >= cutoff).count() as u32 >= cap
}

async fn cooling_down(inner: &Arc<AgentInner>, _index: &str, cooldown_secs: u64) -> bool {
    // Minimal impl: gate on global most-recent trial rather than per-index.
    // Per-index cooldown would be easy to add — keep a HashMap<String, DateTime>
    // — but for Phase 16.2 MVP, global is fine. Ollama is the shared resource.
    let ring = inner.recent_trials.lock().await;
    if let Some(last) = ring.back() {
        let since = Utc::now().signed_duration_since(*last);
        return since < chrono::Duration::seconds(cooldown_secs as i64);
    }
    false
}

async fn mark_recent_trial(inner: &Arc<AgentInner>) {
    let mut ring = inner.recent_trials.lock().await;
    ring.push_back(Utc::now());
    // Keep bounded.
    while ring.len() > 256 {
        ring.pop_front();
    }
}

async fn record_event(
    inner: &Arc<AgentInner>,
    kind: &str,
    index: Option<&str>,
    detail: String,
) {
    tracing::info!("agent: {} {}{}", kind,
        index.map(|i| format!("[{i}] ")).unwrap_or_default(), detail);
    let mut s = inner.status.write().await;
    s.last_event = Some(AgentEvent {
        at: Utc::now(),
        kind: kind.to_string(),
        index_name: index.map(String::from),
        detail,
    });
}

#[cfg(test)]
mod tests {
    use super::*;

    fn mk_trial(ec: usize, es: usize, recall: f32, p50: f32) -> Trial {
        Trial {
            id: format!("t-{ec}-{es}"),
            index_name: "test".into(),
            eval_set: "eval".into(),
            config: HnswConfig { ef_construction: ec, ef_search: es, seed: Some(42) },
            metrics: TrialMetrics {
                build_time_secs: 1.0,
                search_latency_p50_us: p50,
                search_latency_p95_us: p50 * 1.5,
                search_latency_p99_us: p50 * 2.0,
                recall_at_k: recall,
                memory_bytes: 0, vectors: 1000, eval_queries: 10,
                brute_force_latency_us: 100.0,
            },
            created_at: Utc::now(),
            note: None,
        }
    }

    #[test]
    fn propose_skips_duplicates() {
        let hist = vec![
            mk_trial(80, 30, 1.0, 500.0),
            mk_trial(100, 30, 1.0, 520.0),   // ec+20
        ];
        let next = propose_next_config(&hist, Some(&hist[0])).unwrap();
        // ec+20 is taken, so the proposer should skip it.
        assert!(next.ef_construction != 100 || next.ef_search != 30);
    }

    #[test]
    fn beats_champion_strict_recall() {
        let champ = mk_trial(80, 30, 0.95, 500.0);
        let better_recall = mk_trial(80, 30, 0.99, 600.0);
        let worse_recall = mk_trial(80, 30, 0.90, 100.0);
        assert!(beats_champion(&better_recall, &champ));
        assert!(!beats_champion(&worse_recall, &champ));
    }

    #[test]
    fn beats_champion_same_recall_lower_latency() {
        let champ = mk_trial(80, 30, 1.0, 500.0);
        let faster = mk_trial(60, 30, 1.0, 400.0);
        let slower = mk_trial(60, 30, 1.0, 600.0);
        assert!(beats_champion(&faster, &champ));
        assert!(!beats_champion(&slower, &champ));
    }
}