//! Phase 16: Promoted HNSW configs — the "active generation" pointer. //! //! An index's HNSW config used at build time normally defaults to the //! system-wide default (`HnswConfig::default()`). An operator or the //! autotune agent can *promote* a specific trial's config — subsequent //! HNSW builds against that index use the promoted config instead. //! //! Every promotion is history-tracked so `rollback` can revert. The //! history file lives at `primary://_hnsw_promotions/{index_name}.json` //! and is small (< few KB) so we rewrite it on every promotion rather //! than append-log. //! //! Not included here: //! - Atomic graph rebuild on promote — promotion only updates the sticky //! default. Next activation (or search that triggers lazy build) picks //! up the new config. That's "zero-downtime swap after build" which is //! what ADR-019 actually claimed; an instant-swap requires a //! pre-built graph pool which we don't have yet. //! - Agent loop — lives in `vectord::autotune`. use chrono::{DateTime, Utc}; use serde::{Deserialize, Serialize}; use std::collections::HashMap; use std::sync::Arc; use storaged::ops; use storaged::registry::BucketRegistry; use tokio::sync::RwLock; use crate::index_registry::IndexRegistry; use crate::trial::HnswConfig; const PROMOTION_PREFIX: &str = "_hnsw_promotions"; /// One promotion record. The `trial_id` is the origin of the config — /// lets operators trace back "why was this config picked?" to the exact /// trial in the trial journal. #[derive(Debug, Clone, Serialize, Deserialize)] pub struct PromotionEntry { pub config: HnswConfig, pub trial_id: String, pub promoted_at: DateTime, #[serde(default)] pub promoted_by: String, #[serde(default)] pub note: Option, } /// Serialized form of an index's promotion history. #[derive(Debug, Clone, Serialize, Deserialize, Default)] pub struct PromotionFile { pub index_name: String, pub current: Option, #[serde(default)] pub history: Vec, } #[derive(Clone)] pub struct PromotionRegistry { buckets: Arc, index_registry: IndexRegistry, cache: Arc>>, } impl PromotionRegistry { pub fn new(buckets: Arc, index_registry: IndexRegistry) -> Self { Self { buckets, index_registry, cache: Arc::new(RwLock::new(HashMap::new())), } } fn key(index_name: &str) -> String { // Sanitize for object-store safety. let safe: String = index_name .chars() .map(|c| if c.is_ascii_alphanumeric() || c == '_' || c == '-' { c } else { '_' }) .collect(); format!("{PROMOTION_PREFIX}/{safe}.json") } /// Resolve which bucket's store holds this index's promotion file. /// Same rules as TrialJournal::bucket_for — follows IndexMeta.bucket, /// defaults to primary when metadata is missing. async fn store_for(&self, index_name: &str) -> Result, String> { let bucket = self.index_registry .get(index_name) .await .map(|m| m.bucket) .unwrap_or_else(|| "primary".to_string()); self.buckets.get(&bucket) } /// Load (and cache) the promotion file for an index. pub async fn load(&self, index_name: &str) -> Result { if let Some(cached) = self.cache.read().await.get(index_name) { return Ok(cached.clone()); } let store = self.store_for(index_name).await?; let key = Self::key(index_name); let file = match ops::get(&store, &key).await { Ok(bytes) => serde_json::from_slice::(&bytes) .map_err(|e| format!("parse promotion file: {e}"))?, Err(_) => PromotionFile { index_name: index_name.to_string(), current: None, history: Vec::new(), }, }; self.cache.write().await.insert(index_name.to_string(), file.clone()); Ok(file) } /// Promote a config to the active slot. Pushes the current promotion /// (if any) onto the history stack. Persists before returning — the /// config is durable by the time this call completes. pub async fn promote( &self, index_name: &str, entry: PromotionEntry, ) -> Result { let mut file = self.load(index_name).await?; if let Some(prior) = file.current.take() { file.history.push(prior); // Cap history to something sensible so this file doesn't grow // unbounded. 50 entries = 50 promotions — way more than any // sane workflow needs. const HISTORY_CAP: usize = 50; if file.history.len() > HISTORY_CAP { let drop = file.history.len() - HISTORY_CAP; file.history.drain(0..drop); } } file.current = Some(entry); file.index_name = index_name.to_string(); let store = self.store_for(index_name).await?; let key = Self::key(index_name); let json = serde_json::to_vec_pretty(&file).map_err(|e| e.to_string())?; ops::put(&store, &key, json.into()).await?; self.cache.write().await.insert(index_name.to_string(), file.clone()); tracing::info!( "promoted '{}' to config {:?} (trial={})", index_name, file.current.as_ref().unwrap().config, file.current.as_ref().unwrap().trial_id, ); Ok(file) } /// Pop the latest promotion back onto the current slot (if any /// history exists). If current is set but history is empty, the /// current promotion is cleared — the index falls back to defaults. pub async fn rollback(&self, index_name: &str) -> Result { let mut file = self.load(index_name).await?; match file.history.pop() { Some(prev) => { file.current = Some(prev); } None => { if file.current.is_none() { return Err(format!("no promotion to rollback for '{index_name}'")); } file.current = None; } } let store = self.store_for(index_name).await?; let key = Self::key(index_name); let json = serde_json::to_vec_pretty(&file).map_err(|e| e.to_string())?; ops::put(&store, &key, json.into()).await?; self.cache.write().await.insert(index_name.to_string(), file.clone()); tracing::info!("rolled back promotion for '{}'", index_name); Ok(file) } /// Get the currently-promoted config (if any). Callers use this to /// pick the right HnswConfig at build time. pub async fn get_current(&self, index_name: &str) -> Option { self.load(index_name).await.ok().and_then(|f| f.current) } /// Convenience: return the promoted config or the provided default. pub async fn config_or(&self, index_name: &str, default: HnswConfig) -> HnswConfig { match self.get_current(index_name).await { Some(entry) => entry.config, None => default, } } /// List every index that has a promotion recorded (for operator UI). /// /// Federation: scans EVERY registered bucket for promotion files. /// Per-profile buckets each have their own `_hnsw_promotions/` so we /// aggregate across them. Dedups by index_name — if the same index /// somehow has promotion files in multiple buckets, the one from the /// bucket recorded in IndexMeta wins. pub async fn list_all(&self) -> Result, String> { let bucket_infos = self.buckets.list().await; let mut by_name: HashMap = HashMap::new(); for b in &bucket_infos { let store = match self.buckets.get(&b.name) { Ok(s) => s, Err(_) => continue, }; let keys = ops::list(&store, Some(&format!("{PROMOTION_PREFIX}/"))) .await.unwrap_or_default(); for key in keys { if !key.ends_with(".json") { continue; } let bytes = match ops::get(&store, &key).await { Ok(b) => b, Err(_) => continue, }; if let Ok(f) = serde_json::from_slice::(&bytes) { by_name.insert(f.index_name.clone(), f); } } } Ok(by_name.into_values().collect()) } }