//! Phase 16: Autotune — the agent loop for HNSW tuning. //! //! Given an index and a harness, run a grid of (ef_construction, //! ef_search) trials, pick the Pareto winner, and promote it. //! //! This is deliberately NOT a sophisticated optimizer — random-grid //! search is good enough when the config space is small (a dozen //! combinations) and the work per trial is seconds, not hours. Think //! of it as a "smart operator" rather than Bayesian optimization. //! //! Score: we want high recall and low p50 latency. The function below //! picks the Pareto-dominating trial, breaking ties by prefer-lower- //! build-time (cheaper to rebuild on data changes). //! //! Safeguards (PRD risk mitigation): //! - Never promote a trial with recall < `min_recall` (default 0.9) //! - Hard config bounds: ef_construction ∈ [10, 400], ef_search ∈ [10, 200] //! - Concurrency cap: one autotune per index at a time (tracked by //! JobTracker — caller is responsible for not firing in parallel //! for the same index) use chrono::Utc; use object_store::ObjectStore; use serde::{Deserialize, Serialize}; use std::sync::Arc; use aibridge::client::AiClient; use catalogd::registry::Registry as CatalogRegistry; use crate::embedding_cache::EmbeddingCache; use crate::harness::{self, HarnessStore}; use crate::hnsw::HnswStore; use crate::index_registry::IndexRegistry; use crate::jobs::JobTracker; use crate::promotion::{PromotionEntry, PromotionRegistry}; use crate::trial::{HnswConfig, Trial, TrialJournal, TrialMetrics}; #[derive(Debug, Clone, Deserialize)] pub struct AutotuneRequest { pub index_name: String, pub harness: String, /// Minimum recall a trial must achieve to be eligible for promotion. /// Default 0.9 — below that, the tuning is not producing useful /// answers. Never promote a bad index. #[serde(default = "default_min_recall")] pub min_recall: f32, /// Override the default config grid (rarely needed). When omitted, /// a small curated grid runs. #[serde(default)] pub grid: Option>, } fn default_min_recall() -> f32 { 0.9 } #[derive(Debug, Clone, Serialize)] pub struct AutotuneResult { pub index_name: String, pub trials_run: usize, pub trials_eligible: usize, pub winner: Option, pub promoted: bool, pub duration_secs: f32, pub skipped: Vec, } #[derive(Debug, Clone, Serialize)] pub struct WinnerReport { pub trial_id: String, pub config: HnswConfig, pub recall_at_k: f32, pub search_latency_p50_us: f32, pub build_time_secs: f32, } /// Default exploration grid. Covers the practical range of settings /// from "fast build, OK recall" to "thorough build, great recall". fn default_grid() -> Vec { vec![ HnswConfig { ef_construction: 20, ef_search: 30, seed: Some(42) }, HnswConfig { ef_construction: 40, ef_search: 30, seed: Some(42) }, HnswConfig { ef_construction: 80, ef_search: 30, seed: Some(42) }, HnswConfig { ef_construction: 80, ef_search: 60, seed: Some(42) }, HnswConfig { ef_construction: 160, ef_search: 30, seed: Some(42) }, ] } /// Config-space bounds. Rejects anything outside these before running a /// trial — defends against an agent getting creative with absurd values. fn sanitize_grid(grid: Vec) -> (Vec, Vec) { let mut ok = Vec::new(); let mut rejected = Vec::new(); for cfg in grid { if !(10..=400).contains(&cfg.ef_construction) { rejected.push(format!( "ef_construction={} out of bounds [10,400]", cfg.ef_construction, )); continue; } if !(10..=200).contains(&cfg.ef_search) { rejected.push(format!( "ef_search={} out of bounds [10,200]", cfg.ef_search, )); continue; } ok.push(cfg); } (ok, rejected) } /// Pareto winner: highest recall, then lowest p50 latency among /// eligible trials (recall >= min_recall). fn pick_winner(trials: &[Trial], min_recall: f32) -> Option<&Trial> { trials .iter() .filter(|t| t.metrics.recall_at_k >= min_recall) .min_by(|a, b| { // Compare by (−recall, p50_latency) so minimum is the // best trial: higher recall wins, then lower latency. let a_key = (-a.metrics.recall_at_k, a.metrics.search_latency_p50_us); let b_key = (-b.metrics.recall_at_k, b.metrics.search_latency_p50_us); a_key.partial_cmp(&b_key).unwrap_or(std::cmp::Ordering::Equal) }) } /// Run the autotune loop. Synchronous — caller can wrap in tokio::spawn /// to make it fire-and-forget. Uses the existing trial flow so every /// autotune step lands in the trial journal. #[allow(clippy::too_many_arguments)] pub async fn run_autotune( req: AutotuneRequest, _store: &Arc, catalog: &CatalogRegistry, ai_client: &AiClient, embedding_cache: &EmbeddingCache, hnsw_store: &HnswStore, index_registry: &IndexRegistry, trial_journal: &TrialJournal, promotion_registry: &PromotionRegistry, harness_store: &HarnessStore, _job_tracker: &JobTracker, ) -> Result { let t0 = std::time::Instant::now(); // Sanity: the index has to exist. if index_registry.get(&req.index_name).await.is_none() { return Err(format!("index not found: {}", req.index_name)); } let _ = catalog; // reserved for future audit emission // Load the harness once, compute ground truth once. Harness resolves // to the index's bucket via HarnessStore, with primary as fallback for // pre-federation evals. let mut harness_set = harness_store .load_for_index(&req.index_name, &req.harness) .await .map_err(|e| format!("load harness: {e}"))?; let embeddings = embedding_cache .get_or_load(&req.index_name) .await .map_err(|e| format!("embeddings: {e}"))?; if !harness_set.ground_truth_built { harness::compute_ground_truth(&mut harness_set, &embeddings, ai_client) .await .map_err(|e| format!("ground truth: {e}"))?; harness_store.save(&harness_set).await.ok(); } let (grid, rejected) = sanitize_grid(req.grid.clone().unwrap_or_else(default_grid)); if grid.is_empty() { return Err("all configs rejected by bounds check".into()); } tracing::info!( "autotune '{}': {} trial configs, min_recall={}", req.index_name, grid.len(), req.min_recall, ); // Run each trial through the existing pipeline so every run lands // in the trial journal and contributes to the long-term tuning // memory. This is the llms3.com-style "trials are data" pattern. let mut run_trials: Vec = Vec::with_capacity(grid.len()); for config in &grid { let trial = run_single_trial( &req.index_name, &harness_set, &embeddings, config, hnsw_store, trial_journal, ai_client, ).await; match trial { Ok(t) => run_trials.push(t), Err(e) => tracing::warn!("autotune: trial ec={} es={} failed: {e}", config.ef_construction, config.ef_search), } } let trials_eligible = run_trials .iter() .filter(|t| t.metrics.recall_at_k >= req.min_recall) .count(); let winner = pick_winner(&run_trials, req.min_recall); let mut promoted = false; let winner_report = if let Some(w) = winner { let entry = PromotionEntry { config: w.config.clone(), trial_id: w.id.clone(), promoted_at: Utc::now(), promoted_by: "autotune".to_string(), note: Some(format!( "autotune winner: recall={:.3} p50={:.0}us", w.metrics.recall_at_k, w.metrics.search_latency_p50_us, )), }; if let Err(e) = promotion_registry.promote(&req.index_name, entry).await { tracing::warn!("autotune: promote failed: {e}"); } else { promoted = true; } Some(WinnerReport { trial_id: w.id.clone(), config: w.config.clone(), recall_at_k: w.metrics.recall_at_k, search_latency_p50_us: w.metrics.search_latency_p50_us, build_time_secs: w.metrics.build_time_secs, }) } else { tracing::warn!( "autotune '{}': no trial met min_recall={}; not promoting", req.index_name, req.min_recall, ); None }; Ok(AutotuneResult { index_name: req.index_name, trials_run: run_trials.len(), trials_eligible, winner: winner_report, promoted, duration_secs: t0.elapsed().as_secs_f32(), skipped: rejected, }) } /// Run one trial and journal it. Returns the recorded Trial. #[allow(clippy::too_many_arguments)] async fn run_single_trial( index_name: &str, harness_set: &harness::EvalSet, embeddings: &Arc>, config: &HnswConfig, hnsw_store: &HnswStore, trial_journal: &TrialJournal, _ai_client: &AiClient, ) -> Result { let trial_id = Trial::new_id(); let slot = format!("{}__{}", index_name, trial_id); let build = hnsw_store .build_index_with_config(&slot, (**embeddings).clone(), config) .await?; let query_vectors: Vec> = harness_set .queries .iter() .filter_map(|q| q.query_embedding.clone()) .collect(); let bench = hnsw_store .bench_search(&slot, &query_vectors, harness_set.k) .await?; let mut recalls = Vec::with_capacity(harness_set.queries.len()); for (q, hits) in harness_set.queries.iter().zip(bench.retrieved.iter()) { if let Some(gt) = &q.ground_truth { recalls.push(harness::recall_at_k(hits, gt, harness_set.k)); } } let mean_recall = if recalls.is_empty() { 0.0 } else { recalls.iter().sum::() / recalls.len() as f32 }; let mut lats = bench.latencies_us.clone(); lats.sort_unstable_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal)); let p = |pct: f32| -> f32 { if lats.is_empty() { return 0.0; } let idx = ((lats.len() as f32 - 1.0) * pct).round() as usize; lats[idx.min(lats.len() - 1)] }; let brute_us = if let Some(qv) = query_vectors.first() { let t = std::time::Instant::now(); let _ = harness::brute_force_top_k(qv, embeddings, harness_set.k); t.elapsed().as_micros() as f32 } else { 0.0 }; let dims = embeddings.first().map(|e| e.vector.len()).unwrap_or(0); let memory_bytes = (embeddings.len() * dims * std::mem::size_of::() + embeddings.len() * 128) as u64; let trial = Trial { id: trial_id.clone(), index_name: index_name.to_string(), eval_set: harness_set.name.clone(), config: config.clone(), metrics: TrialMetrics { build_time_secs: build.build_time_secs, search_latency_p50_us: p(0.50), search_latency_p95_us: p(0.95), search_latency_p99_us: p(0.99), recall_at_k: mean_recall, memory_bytes, vectors: build.vectors, eval_queries: harness_set.queries.len(), brute_force_latency_us: brute_us, }, created_at: Utc::now(), note: Some("autotune".to_string()), }; let _ = trial_journal.append(&trial).await; let _ = hnsw_store.drop(&slot).await; Ok(trial) }