lakehouse/crates/vectord/src/promotion.rs
root 12e615bb5d
Some checks failed
lakehouse/auditor 1 blocking issue: todo!() macro call in tests/real-world/scrum_master_pipeline.ts
ingestd/vectord: remove two fragile unwraps on Option paths
Both were technically safe — guarded above by map_or(true, ...) and
Some(entry) assignment respectively — but relied on multi-line
invariants that a future refactor could easily break.

  - ingestd/watcher.rs:80: path.file_name().unwrap() on a path that
    was already checked via map_or(true, ...) two lines up. Fix:
    let-else binds filename once, no double lookup, no unwrap.

  - vectord/promotion.rs:145: file.current.as_ref().unwrap() called
    TWICE on the same line to log config + trial_id. Guard via
    `if let Some(cur) = &file.current` so the log gracefully skips
    if the invariant ever breaks instead of panicking at runtime.

Both are drop-in semantically: happy path identical, error path now
graceful-skip instead of panic. Workspace warnings still at 0.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-24 06:39:40 -05:00

228 lines
8.9 KiB
Rust

//! Phase 16: Promoted HNSW configs — the "active generation" pointer.
//!
//! An index's HNSW config used at build time normally defaults to the
//! system-wide default (`HnswConfig::default()`). An operator or the
//! autotune agent can *promote* a specific trial's config — subsequent
//! HNSW builds against that index use the promoted config instead.
//!
//! Every promotion is history-tracked so `rollback` can revert. The
//! history file lives at `primary://_hnsw_promotions/{index_name}.json`
//! and is small (< few KB) so we rewrite it on every promotion rather
//! than append-log.
//!
//! Not included here:
//! - Atomic graph rebuild on promote — promotion only updates the sticky
//! default. Next activation (or search that triggers lazy build) picks
//! up the new config. That's "zero-downtime swap after build" which is
//! what ADR-019 actually claimed; an instant-swap requires a
//! pre-built graph pool which we don't have yet.
//! - Agent loop — lives in `vectord::autotune`.
use chrono::{DateTime, Utc};
use serde::{Deserialize, Serialize};
use std::collections::HashMap;
use std::sync::Arc;
use storaged::ops;
use storaged::registry::BucketRegistry;
use tokio::sync::RwLock;
use crate::index_registry::IndexRegistry;
use crate::trial::HnswConfig;
const PROMOTION_PREFIX: &str = "_hnsw_promotions";
/// One promotion record. The `trial_id` is the origin of the config —
/// lets operators trace back "why was this config picked?" to the exact
/// trial in the trial journal.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct PromotionEntry {
pub config: HnswConfig,
pub trial_id: String,
pub promoted_at: DateTime<Utc>,
#[serde(default)]
pub promoted_by: String,
#[serde(default)]
pub note: Option<String>,
}
/// Serialized form of an index's promotion history.
#[derive(Debug, Clone, Serialize, Deserialize, Default)]
pub struct PromotionFile {
pub index_name: String,
pub current: Option<PromotionEntry>,
#[serde(default)]
pub history: Vec<PromotionEntry>,
}
#[derive(Clone)]
pub struct PromotionRegistry {
buckets: Arc<BucketRegistry>,
index_registry: IndexRegistry,
cache: Arc<RwLock<HashMap<String, PromotionFile>>>,
}
impl PromotionRegistry {
pub fn new(buckets: Arc<BucketRegistry>, index_registry: IndexRegistry) -> Self {
Self {
buckets,
index_registry,
cache: Arc::new(RwLock::new(HashMap::new())),
}
}
fn key(index_name: &str) -> String {
// Sanitize for object-store safety.
let safe: String = index_name
.chars()
.map(|c| if c.is_ascii_alphanumeric() || c == '_' || c == '-' { c } else { '_' })
.collect();
format!("{PROMOTION_PREFIX}/{safe}.json")
}
/// Resolve which bucket's store holds this index's promotion file.
/// Same rules as TrialJournal::bucket_for — follows IndexMeta.bucket,
/// defaults to primary when metadata is missing.
async fn store_for(&self, index_name: &str) -> Result<Arc<dyn object_store::ObjectStore>, String> {
let bucket = self.index_registry
.get(index_name)
.await
.map(|m| m.bucket)
.unwrap_or_else(|| "primary".to_string());
self.buckets.get(&bucket)
}
/// Load (and cache) the promotion file for an index.
pub async fn load(&self, index_name: &str) -> Result<PromotionFile, String> {
if let Some(cached) = self.cache.read().await.get(index_name) {
return Ok(cached.clone());
}
let store = self.store_for(index_name).await?;
let key = Self::key(index_name);
let file = match ops::get(&store, &key).await {
Ok(bytes) => serde_json::from_slice::<PromotionFile>(&bytes)
.map_err(|e| format!("parse promotion file: {e}"))?,
Err(_) => PromotionFile {
index_name: index_name.to_string(),
current: None,
history: Vec::new(),
},
};
self.cache.write().await.insert(index_name.to_string(), file.clone());
Ok(file)
}
/// Promote a config to the active slot. Pushes the current promotion
/// (if any) onto the history stack. Persists before returning — the
/// config is durable by the time this call completes.
pub async fn promote(
&self,
index_name: &str,
entry: PromotionEntry,
) -> Result<PromotionFile, String> {
let mut file = self.load(index_name).await?;
if let Some(prior) = file.current.take() {
file.history.push(prior);
// Cap history to something sensible so this file doesn't grow
// unbounded. 50 entries = 50 promotions — way more than any
// sane workflow needs.
const HISTORY_CAP: usize = 50;
if file.history.len() > HISTORY_CAP {
let drop = file.history.len() - HISTORY_CAP;
file.history.drain(0..drop);
}
}
// Bind `entry` ref-captured for the log line below so the log
// doesn't double-unwrap file.current — entry is Some-by-construction
// at the function boundary; past versions reached in via
// `.as_ref().unwrap()` twice, which compiled but would panic if
// the construction above ever changed.
file.current = Some(entry);
file.index_name = index_name.to_string();
let store = self.store_for(index_name).await?;
let key = Self::key(index_name);
let json = serde_json::to_vec_pretty(&file).map_err(|e| e.to_string())?;
ops::put(&store, &key, json.into()).await?;
self.cache.write().await.insert(index_name.to_string(), file.clone());
if let Some(cur) = &file.current {
tracing::info!(
"promoted '{}' to config {:?} (trial={})",
index_name, cur.config, cur.trial_id,
);
}
Ok(file)
}
/// Pop the latest promotion back onto the current slot (if any
/// history exists). If current is set but history is empty, the
/// current promotion is cleared — the index falls back to defaults.
pub async fn rollback(&self, index_name: &str) -> Result<PromotionFile, String> {
let mut file = self.load(index_name).await?;
match file.history.pop() {
Some(prev) => {
file.current = Some(prev);
}
None => {
if file.current.is_none() {
return Err(format!("no promotion to rollback for '{index_name}'"));
}
file.current = None;
}
}
let store = self.store_for(index_name).await?;
let key = Self::key(index_name);
let json = serde_json::to_vec_pretty(&file).map_err(|e| e.to_string())?;
ops::put(&store, &key, json.into()).await?;
self.cache.write().await.insert(index_name.to_string(), file.clone());
tracing::info!("rolled back promotion for '{}'", index_name);
Ok(file)
}
/// Get the currently-promoted config (if any). Callers use this to
/// pick the right HnswConfig at build time.
pub async fn get_current(&self, index_name: &str) -> Option<PromotionEntry> {
self.load(index_name).await.ok().and_then(|f| f.current)
}
/// Convenience: return the promoted config or the provided default.
pub async fn config_or(&self, index_name: &str, default: HnswConfig) -> HnswConfig {
match self.get_current(index_name).await {
Some(entry) => entry.config,
None => default,
}
}
/// List every index that has a promotion recorded (for operator UI).
///
/// Federation: scans EVERY registered bucket for promotion files.
/// Per-profile buckets each have their own `_hnsw_promotions/` so we
/// aggregate across them. Dedups by index_name — if the same index
/// somehow has promotion files in multiple buckets, the one from the
/// bucket recorded in IndexMeta wins.
pub async fn list_all(&self) -> Result<Vec<PromotionFile>, String> {
let bucket_infos = self.buckets.list().await;
let mut by_name: HashMap<String, PromotionFile> = HashMap::new();
for b in &bucket_infos {
let store = match self.buckets.get(&b.name) {
Ok(s) => s,
Err(_) => continue,
};
let keys = ops::list(&store, Some(&format!("{PROMOTION_PREFIX}/")))
.await.unwrap_or_default();
for key in keys {
if !key.ends_with(".json") { continue; }
let bytes = match ops::get(&store, &key).await {
Ok(b) => b,
Err(_) => continue,
};
if let Ok(f) = serde_json::from_slice::<PromotionFile>(&bytes) {
by_name.insert(f.index_name.clone(), f);
}
}
}
Ok(by_name.into_values().collect())
}
}