Some checks failed
lakehouse/auditor 1 blocking issue: todo!() macro call in tests/real-world/scrum_master_pipeline.ts
Both were technically safe — guarded above by map_or(true, ...) and
Some(entry) assignment respectively — but relied on multi-line
invariants that a future refactor could easily break.
- ingestd/watcher.rs:80: path.file_name().unwrap() on a path that
was already checked via map_or(true, ...) two lines up. Fix:
let-else binds filename once, no double lookup, no unwrap.
- vectord/promotion.rs:145: file.current.as_ref().unwrap() called
TWICE on the same line to log config + trial_id. Guard via
`if let Some(cur) = &file.current` so the log gracefully skips
if the invariant ever breaks instead of panicking at runtime.
Both are drop-in semantically: happy path identical, error path now
graceful-skip instead of panic. Workspace warnings still at 0.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
228 lines
8.9 KiB
Rust
228 lines
8.9 KiB
Rust
//! Phase 16: Promoted HNSW configs — the "active generation" pointer.
|
|
//!
|
|
//! An index's HNSW config used at build time normally defaults to the
|
|
//! system-wide default (`HnswConfig::default()`). An operator or the
|
|
//! autotune agent can *promote* a specific trial's config — subsequent
|
|
//! HNSW builds against that index use the promoted config instead.
|
|
//!
|
|
//! Every promotion is history-tracked so `rollback` can revert. The
|
|
//! history file lives at `primary://_hnsw_promotions/{index_name}.json`
|
|
//! and is small (< few KB) so we rewrite it on every promotion rather
|
|
//! than append-log.
|
|
//!
|
|
//! Not included here:
|
|
//! - Atomic graph rebuild on promote — promotion only updates the sticky
|
|
//! default. Next activation (or search that triggers lazy build) picks
|
|
//! up the new config. That's "zero-downtime swap after build" which is
|
|
//! what ADR-019 actually claimed; an instant-swap requires a
|
|
//! pre-built graph pool which we don't have yet.
|
|
//! - Agent loop — lives in `vectord::autotune`.
|
|
|
|
use chrono::{DateTime, Utc};
|
|
use serde::{Deserialize, Serialize};
|
|
use std::collections::HashMap;
|
|
use std::sync::Arc;
|
|
use storaged::ops;
|
|
use storaged::registry::BucketRegistry;
|
|
use tokio::sync::RwLock;
|
|
|
|
use crate::index_registry::IndexRegistry;
|
|
use crate::trial::HnswConfig;
|
|
|
|
const PROMOTION_PREFIX: &str = "_hnsw_promotions";
|
|
|
|
/// One promotion record. The `trial_id` is the origin of the config —
|
|
/// lets operators trace back "why was this config picked?" to the exact
|
|
/// trial in the trial journal.
|
|
#[derive(Debug, Clone, Serialize, Deserialize)]
|
|
pub struct PromotionEntry {
|
|
pub config: HnswConfig,
|
|
pub trial_id: String,
|
|
pub promoted_at: DateTime<Utc>,
|
|
#[serde(default)]
|
|
pub promoted_by: String,
|
|
#[serde(default)]
|
|
pub note: Option<String>,
|
|
}
|
|
|
|
/// Serialized form of an index's promotion history.
|
|
#[derive(Debug, Clone, Serialize, Deserialize, Default)]
|
|
pub struct PromotionFile {
|
|
pub index_name: String,
|
|
pub current: Option<PromotionEntry>,
|
|
#[serde(default)]
|
|
pub history: Vec<PromotionEntry>,
|
|
}
|
|
|
|
#[derive(Clone)]
|
|
pub struct PromotionRegistry {
|
|
buckets: Arc<BucketRegistry>,
|
|
index_registry: IndexRegistry,
|
|
cache: Arc<RwLock<HashMap<String, PromotionFile>>>,
|
|
}
|
|
|
|
impl PromotionRegistry {
|
|
pub fn new(buckets: Arc<BucketRegistry>, index_registry: IndexRegistry) -> Self {
|
|
Self {
|
|
buckets,
|
|
index_registry,
|
|
cache: Arc::new(RwLock::new(HashMap::new())),
|
|
}
|
|
}
|
|
|
|
fn key(index_name: &str) -> String {
|
|
// Sanitize for object-store safety.
|
|
let safe: String = index_name
|
|
.chars()
|
|
.map(|c| if c.is_ascii_alphanumeric() || c == '_' || c == '-' { c } else { '_' })
|
|
.collect();
|
|
format!("{PROMOTION_PREFIX}/{safe}.json")
|
|
}
|
|
|
|
/// Resolve which bucket's store holds this index's promotion file.
|
|
/// Same rules as TrialJournal::bucket_for — follows IndexMeta.bucket,
|
|
/// defaults to primary when metadata is missing.
|
|
async fn store_for(&self, index_name: &str) -> Result<Arc<dyn object_store::ObjectStore>, String> {
|
|
let bucket = self.index_registry
|
|
.get(index_name)
|
|
.await
|
|
.map(|m| m.bucket)
|
|
.unwrap_or_else(|| "primary".to_string());
|
|
self.buckets.get(&bucket)
|
|
}
|
|
|
|
/// Load (and cache) the promotion file for an index.
|
|
pub async fn load(&self, index_name: &str) -> Result<PromotionFile, String> {
|
|
if let Some(cached) = self.cache.read().await.get(index_name) {
|
|
return Ok(cached.clone());
|
|
}
|
|
let store = self.store_for(index_name).await?;
|
|
let key = Self::key(index_name);
|
|
let file = match ops::get(&store, &key).await {
|
|
Ok(bytes) => serde_json::from_slice::<PromotionFile>(&bytes)
|
|
.map_err(|e| format!("parse promotion file: {e}"))?,
|
|
Err(_) => PromotionFile {
|
|
index_name: index_name.to_string(),
|
|
current: None,
|
|
history: Vec::new(),
|
|
},
|
|
};
|
|
self.cache.write().await.insert(index_name.to_string(), file.clone());
|
|
Ok(file)
|
|
}
|
|
|
|
/// Promote a config to the active slot. Pushes the current promotion
|
|
/// (if any) onto the history stack. Persists before returning — the
|
|
/// config is durable by the time this call completes.
|
|
pub async fn promote(
|
|
&self,
|
|
index_name: &str,
|
|
entry: PromotionEntry,
|
|
) -> Result<PromotionFile, String> {
|
|
let mut file = self.load(index_name).await?;
|
|
if let Some(prior) = file.current.take() {
|
|
file.history.push(prior);
|
|
// Cap history to something sensible so this file doesn't grow
|
|
// unbounded. 50 entries = 50 promotions — way more than any
|
|
// sane workflow needs.
|
|
const HISTORY_CAP: usize = 50;
|
|
if file.history.len() > HISTORY_CAP {
|
|
let drop = file.history.len() - HISTORY_CAP;
|
|
file.history.drain(0..drop);
|
|
}
|
|
}
|
|
// Bind `entry` ref-captured for the log line below so the log
|
|
// doesn't double-unwrap file.current — entry is Some-by-construction
|
|
// at the function boundary; past versions reached in via
|
|
// `.as_ref().unwrap()` twice, which compiled but would panic if
|
|
// the construction above ever changed.
|
|
file.current = Some(entry);
|
|
file.index_name = index_name.to_string();
|
|
|
|
let store = self.store_for(index_name).await?;
|
|
let key = Self::key(index_name);
|
|
let json = serde_json::to_vec_pretty(&file).map_err(|e| e.to_string())?;
|
|
ops::put(&store, &key, json.into()).await?;
|
|
|
|
self.cache.write().await.insert(index_name.to_string(), file.clone());
|
|
if let Some(cur) = &file.current {
|
|
tracing::info!(
|
|
"promoted '{}' to config {:?} (trial={})",
|
|
index_name, cur.config, cur.trial_id,
|
|
);
|
|
}
|
|
Ok(file)
|
|
}
|
|
|
|
/// Pop the latest promotion back onto the current slot (if any
|
|
/// history exists). If current is set but history is empty, the
|
|
/// current promotion is cleared — the index falls back to defaults.
|
|
pub async fn rollback(&self, index_name: &str) -> Result<PromotionFile, String> {
|
|
let mut file = self.load(index_name).await?;
|
|
match file.history.pop() {
|
|
Some(prev) => {
|
|
file.current = Some(prev);
|
|
}
|
|
None => {
|
|
if file.current.is_none() {
|
|
return Err(format!("no promotion to rollback for '{index_name}'"));
|
|
}
|
|
file.current = None;
|
|
}
|
|
}
|
|
let store = self.store_for(index_name).await?;
|
|
let key = Self::key(index_name);
|
|
let json = serde_json::to_vec_pretty(&file).map_err(|e| e.to_string())?;
|
|
ops::put(&store, &key, json.into()).await?;
|
|
self.cache.write().await.insert(index_name.to_string(), file.clone());
|
|
tracing::info!("rolled back promotion for '{}'", index_name);
|
|
Ok(file)
|
|
}
|
|
|
|
/// Get the currently-promoted config (if any). Callers use this to
|
|
/// pick the right HnswConfig at build time.
|
|
pub async fn get_current(&self, index_name: &str) -> Option<PromotionEntry> {
|
|
self.load(index_name).await.ok().and_then(|f| f.current)
|
|
}
|
|
|
|
/// Convenience: return the promoted config or the provided default.
|
|
pub async fn config_or(&self, index_name: &str, default: HnswConfig) -> HnswConfig {
|
|
match self.get_current(index_name).await {
|
|
Some(entry) => entry.config,
|
|
None => default,
|
|
}
|
|
}
|
|
|
|
/// List every index that has a promotion recorded (for operator UI).
|
|
///
|
|
/// Federation: scans EVERY registered bucket for promotion files.
|
|
/// Per-profile buckets each have their own `_hnsw_promotions/` so we
|
|
/// aggregate across them. Dedups by index_name — if the same index
|
|
/// somehow has promotion files in multiple buckets, the one from the
|
|
/// bucket recorded in IndexMeta wins.
|
|
pub async fn list_all(&self) -> Result<Vec<PromotionFile>, String> {
|
|
let bucket_infos = self.buckets.list().await;
|
|
let mut by_name: HashMap<String, PromotionFile> = HashMap::new();
|
|
|
|
for b in &bucket_infos {
|
|
let store = match self.buckets.get(&b.name) {
|
|
Ok(s) => s,
|
|
Err(_) => continue,
|
|
};
|
|
let keys = ops::list(&store, Some(&format!("{PROMOTION_PREFIX}/")))
|
|
.await.unwrap_or_default();
|
|
for key in keys {
|
|
if !key.ends_with(".json") { continue; }
|
|
let bytes = match ops::get(&store, &key).await {
|
|
Ok(b) => b,
|
|
Err(_) => continue,
|
|
};
|
|
if let Ok(f) = serde_json::from_slice::<PromotionFile>(&bytes) {
|
|
by_name.insert(f.index_name.clone(), f);
|
|
}
|
|
}
|
|
}
|
|
Ok(by_name.into_values().collect())
|
|
}
|
|
}
|