/// In-memory cache for StoredEmbedding vectors. /// /// Rationale: loading 100K embeddings from Parquet takes ~2-5s. When an AI agent /// iterates on HNSW parameters, each trial would repeat that cost. The cache /// pins embeddings in memory so trials reuse them. /// /// This is a pure performance layer — the Parquet file is still the source of /// truth (ADR-008). Eviction is safe; worst case is one slow reload. use object_store::ObjectStore; use serde::Serialize; use std::collections::HashMap; use std::sync::Arc; use tokio::sync::RwLock; use crate::store::{self, StoredEmbedding}; #[derive(Clone)] pub struct EmbeddingCache { store: Arc, cache: Arc>>>>, } #[derive(Debug, Clone, Serialize)] pub struct CacheEntry { pub index_name: String, pub vectors: usize, pub dimensions: usize, pub memory_bytes: u64, } #[derive(Debug, Clone, Serialize)] pub struct CacheStats { pub entries: Vec, pub total_memory_bytes: u64, } impl EmbeddingCache { pub fn new(store: Arc) -> Self { Self { store, cache: Arc::new(RwLock::new(HashMap::new())), } } /// Return cached embeddings, loading from object storage on first request. pub async fn get_or_load( &self, index_name: &str, ) -> Result>, String> { if let Some(cached) = self.cache.read().await.get(index_name) { return Ok(cached.clone()); } // Load under a write lock so concurrent callers only hit disk once. let mut guard = self.cache.write().await; if let Some(cached) = guard.get(index_name) { return Ok(cached.clone()); } tracing::info!("embedding_cache: loading '{index_name}' from object storage"); let t0 = std::time::Instant::now(); let loaded = store::load_embeddings(&self.store, index_name).await?; let n = loaded.len(); let arc = Arc::new(loaded); guard.insert(index_name.to_string(), arc.clone()); tracing::info!( "embedding_cache: loaded '{index_name}' — {n} vectors in {:.2}s", t0.elapsed().as_secs_f32() ); Ok(arc) } pub async fn evict(&self, index_name: &str) -> bool { self.cache.write().await.remove(index_name).is_some() } pub async fn stats(&self) -> CacheStats { let cache = self.cache.read().await; let mut entries = Vec::with_capacity(cache.len()); let mut total: u64 = 0; for (name, embs) in cache.iter() { let dims = embs.first().map(|e| e.vector.len()).unwrap_or(0); // Rough estimate: vector data + chunk_text + metadata overhead. let vec_bytes = (embs.len() * dims * std::mem::size_of::()) as u64; let text_bytes: u64 = embs.iter().map(|e| e.chunk_text.len() as u64).sum(); let overhead = (embs.len() * 128) as u64; // strings + struct overhead let mem = vec_bytes + text_bytes + overhead; total += mem; entries.push(CacheEntry { index_name: name.clone(), vectors: embs.len(), dimensions: dims, memory_bytes: mem, }); } CacheStats { entries, total_memory_bytes: total } } }