profit 5b1fcf6d27 Phase 28-36 body of work
Accumulated since a6f12e2 (Phase 21 Rust port + Phase 27 versioning):

- Phase 36: embed_semaphore on VectorState (permits=1) serializes
  seed embed calls — prevents sidecar socket collisions under
  concurrent /seed stress load
- Phase 31+: run_stress.ts 6-task diverse stress scaffolding;
  run_e2e_rated.ts + orchestrator.ts tightening
- Catalog dedupe cleanup: 16 duplicate manifests removed; canonical
  candidates.parquet (10.5MB -> 76KB) + placements.parquet (1.2MB ->
  11KB) regenerated post-dedupe; fresh manifests for active datasets
- vectord: harness EvalSet refinements (+181), agent portfolio
  rotation + ingest triggers (+158), autotune + rag adjustments
- catalogd/storaged/ingestd/mcp-server: misc tightening
- docs: Phase 28-36 PRD entries + DECISIONS ADR additions;
  control-plane pivot banner added to top of docs/PRD.md (pointing
  at docs/CONTROL_PLANE_PRD.md which lands in next commit)

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-22 02:41:15 -05:00

790 lines
30 KiB
Rust

//! Phase 16.2 + 16.5 — The autotune agent.
//!
//! A long-running tokio task that watches the trial journal and
//! autonomously proposes + runs new HNSW configs. Distinct from
//! `autotune::run_autotune` which is synchronous (one HTTP call, grid
//! of trials, done). The agent is the continuous version: it sleeps,
//! wakes on triggers, proposes configs based on prior trial history,
//! runs them one at a time, and auto-promotes when it finds an
//! improvement.
//!
//! Design invariants:
//! - Trials are data (ADR-018). Every proposal reads the journal; every
//! attempt appends to it. The journal is the agent's memory.
//! - One trial at a time. Bounded Ollama load — the agent never fires
//! multiple parallel embeddings and respects `cooldown_between_trials_secs`.
//! - Rate-limited. `max_trials_per_hour` is a hard ceiling so a
//! misbehaving proposal function can't saturate the system.
//! - Never promotes below `min_recall`. Same safety gate as
//! `run_autotune` — we will not make the index worse.
//! - Triggered OR periodic. Ingest enqueues a `DatasetAppended` event
//! when a new batch lands; the agent also wakes periodically to keep
//! exploring even when nothing changed externally.
//! - Graceful shutdown via the `stop_tx` signal — the handle's Drop
//! doesn't force-kill, but `stop()` requests a clean exit after the
//! current trial.
use chrono::{DateTime, Utc};
use object_store::ObjectStore;
use serde::{Deserialize, Serialize};
use std::collections::VecDeque;
use std::sync::Arc;
use tokio::sync::{Mutex, RwLock, mpsc, oneshot};
use aibridge::client::AiClient;
use catalogd::registry::Registry as CatalogRegistry;
use crate::embedding_cache::EmbeddingCache;
use crate::harness;
use crate::hnsw::HnswStore;
use crate::index_registry::IndexRegistry;
use crate::promotion::{PromotionEntry, PromotionRegistry};
use crate::trial::{HnswConfig, Trial, TrialJournal, TrialMetrics};
// -------- Public-facing types --------
/// Runtime configuration for the agent. Mirrored in shared::config under
/// `[agent]`. Defaults are conservative — designed to tune slowly in the
/// background without fighting real workloads for GPU time.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct AgentConfig {
/// Master switch. When false, `spawn` returns a handle but the loop
/// doesn't run.
pub enabled: bool,
/// Periodic wake-up — even if the trigger queue is empty, every N
/// seconds the agent picks an index with trials and proposes one
/// more config. Keeps exploration alive on idle indexes.
pub cycle_interval_secs: u64,
/// Minimum gap between two trials on the SAME index. Prevents the
/// agent from hammering Ollama when a hot index has many pending
/// triggers in a row.
pub cooldown_between_trials_secs: u64,
/// Below this recall, a proposal is never promoted — even if it
/// beats the champion on latency.
pub min_recall: f32,
/// Budget cap: hard ceiling on trials per hour across all indexes.
/// When hit, the agent idles until the hour window rolls.
pub max_trials_per_hour: u32,
}
impl Default for AgentConfig {
fn default() -> Self {
Self {
enabled: false, // opt-in — don't auto-start until J turns it on
cycle_interval_secs: 60,
cooldown_between_trials_secs: 30,
min_recall: 0.9,
max_trials_per_hour: 30,
}
}
}
/// What caused the agent to look at a particular index. Recorded in the
/// trial's note field so we can tell "new data arrived" trials from
/// "periodic exploration" trials in the journal.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub enum TriggerReason {
/// Ingest just appended to a dataset that has attached HNSW indexes.
DatasetAppended { dataset: String },
/// A human or another agent hit `/agent/enqueue/{index}`.
Manual,
/// Periodic wake — no external event, just keep exploring.
Periodic,
}
/// One unit of work for the agent.
#[derive(Debug, Clone)]
pub struct TriggerEvent {
pub index_name: String,
pub reason: TriggerReason,
pub enqueued_at: DateTime<Utc>,
}
impl TriggerEvent {
pub fn manual(index_name: impl Into<String>) -> Self {
Self { index_name: index_name.into(), reason: TriggerReason::Manual, enqueued_at: Utc::now() }
}
pub fn dataset_appended(index_name: impl Into<String>, dataset: impl Into<String>) -> Self {
Self {
index_name: index_name.into(),
reason: TriggerReason::DatasetAppended { dataset: dataset.into() },
enqueued_at: Utc::now(),
}
}
pub fn periodic(index_name: impl Into<String>) -> Self {
Self { index_name: index_name.into(), reason: TriggerReason::Periodic, enqueued_at: Utc::now() }
}
}
/// Observable snapshot of the agent's state — what `/agent/status` returns.
#[derive(Debug, Clone, Serialize)]
pub struct AgentStatus {
pub running: bool,
pub config: AgentConfig,
pub queue_depth: usize,
pub trials_run: u64,
pub promotions: u64,
pub trials_in_last_hour: u32,
pub last_event: Option<AgentEvent>,
pub started_at: Option<DateTime<Utc>>,
}
/// Last thing that happened — useful for "why didn't it do anything?" debugging.
#[derive(Debug, Clone, Serialize)]
pub struct AgentEvent {
pub at: DateTime<Utc>,
pub kind: String, // "trial_completed" | "promoted" | "skipped_rate_limit" | etc
pub index_name: Option<String>,
pub detail: String,
}
/// Handle returned by `spawn`. Holds the trigger sender + shared status +
/// stop signal.
#[derive(Clone)]
pub struct AgentHandle {
trigger_tx: mpsc::Sender<TriggerEvent>,
inner: Arc<AgentInner>,
}
struct AgentInner {
status: RwLock<AgentStatus>,
stop_tx: Mutex<Option<oneshot::Sender<()>>>,
queue_len: Mutex<usize>, // mirror of the channel capacity — for status reporting
recent_trials: Mutex<VecDeque<DateTime<Utc>>>, // ring of recent trial timestamps for rate limit
}
impl AgentHandle {
/// Enqueue a trigger. Returns Err if the agent isn't running or the
/// queue is full (backpressure — dropping events is correct here
/// since periodic exploration will pick up the slack).
pub async fn enqueue(&self, event: TriggerEvent) -> Result<(), String> {
self.trigger_tx.try_send(event).map_err(|e| format!("enqueue: {e}"))?;
let mut guard = self.inner.queue_len.lock().await;
*guard = guard.saturating_add(1);
// Update queue_depth in status for observability.
let mut s = self.inner.status.write().await;
s.queue_depth = *guard;
Ok(())
}
pub async fn status(&self) -> AgentStatus {
let mut s = self.inner.status.read().await.clone();
// Refresh rate-limit window from ring buffer.
let cutoff = Utc::now() - chrono::Duration::hours(1);
let ring = self.inner.recent_trials.lock().await;
s.trials_in_last_hour = ring.iter().filter(|t| **t >= cutoff).count() as u32;
s
}
/// Request a graceful stop. Returns immediately — the loop exits
/// after its current trial completes.
pub async fn stop(&self) -> bool {
let mut guard = self.inner.stop_tx.lock().await;
if let Some(tx) = guard.take() {
let _ = tx.send(());
true
} else {
false
}
}
}
// -------- Agent state holder --------
/// Everything the agent needs to run a trial. Mirrors the fields of
/// `VectorState` the agent actually uses. Kept separate so the service
/// layer builds it explicitly — no clone of unneeded state.
#[derive(Clone)]
pub struct AgentDeps {
pub store: Arc<dyn ObjectStore>,
pub ai_client: AiClient,
pub catalog: CatalogRegistry,
pub index_registry: IndexRegistry,
pub hnsw_store: HnswStore,
pub embedding_cache: EmbeddingCache,
pub trial_journal: TrialJournal,
pub promotion_registry: PromotionRegistry,
pub harness_store: crate::harness::HarnessStore,
}
// -------- Spawn --------
/// Start the agent loop in a background tokio task. Returns a handle
/// the caller uses to enqueue events and read status.
pub fn spawn(config: AgentConfig, deps: AgentDeps) -> AgentHandle {
let (trigger_tx, trigger_rx) = mpsc::channel::<TriggerEvent>(256);
let (stop_tx, stop_rx) = oneshot::channel::<()>();
let status = AgentStatus {
running: config.enabled,
config: config.clone(),
queue_depth: 0,
trials_run: 0,
promotions: 0,
trials_in_last_hour: 0,
last_event: None,
started_at: if config.enabled { Some(Utc::now()) } else { None },
};
let inner = Arc::new(AgentInner {
status: RwLock::new(status),
stop_tx: Mutex::new(Some(stop_tx)),
queue_len: Mutex::new(0),
recent_trials: Mutex::new(VecDeque::with_capacity(64)),
});
if config.enabled {
tracing::info!(
"autotune agent started (cycle={}s, cooldown={}s, cap={}/hr, min_recall={})",
config.cycle_interval_secs, config.cooldown_between_trials_secs,
config.max_trials_per_hour, config.min_recall,
);
let loop_inner = inner.clone();
let loop_deps = deps.clone();
let loop_config = config.clone();
tokio::spawn(async move {
run_loop(loop_config, loop_deps, trigger_rx, stop_rx, loop_inner).await;
});
} else {
// Agent disabled — still drain the channel so sends don't back up.
tokio::spawn(async move {
let mut rx = trigger_rx;
while rx.recv().await.is_some() {}
});
tracing::info!("autotune agent configured but disabled (set [agent].enabled=true)");
}
AgentHandle { trigger_tx, inner }
}
// -------- Main loop --------
async fn run_loop(
config: AgentConfig,
deps: AgentDeps,
mut trigger_rx: mpsc::Receiver<TriggerEvent>,
mut stop_rx: oneshot::Receiver<()>,
inner: Arc<AgentInner>,
) {
let mut periodic = tokio::time::interval(std::time::Duration::from_secs(config.cycle_interval_secs));
// First tick fires immediately — skip it so we don't double-fire on startup.
periodic.tick().await;
loop {
let event = tokio::select! {
_ = &mut stop_rx => {
tracing::info!("autotune agent: stop signal received");
let mut s = inner.status.write().await;
s.running = false;
return;
}
maybe = trigger_rx.recv() => match maybe {
Some(ev) => {
let mut guard = inner.queue_len.lock().await;
*guard = guard.saturating_sub(1);
let mut s = inner.status.write().await;
s.queue_depth = *guard;
ev
}
None => {
tracing::info!("autotune agent: trigger channel closed");
return;
}
},
_ = periodic.tick() => {
// Periodic wake — pick an index with existing trials.
// If nothing's been tuned yet there's nothing to propose.
match pick_periodic_target(&deps).await {
Some(idx) => TriggerEvent::periodic(idx),
None => continue,
}
}
};
// Rate limit: count recent trials, skip if over budget.
if over_rate_limit(&inner, config.max_trials_per_hour).await {
record_event(&inner, "skipped_rate_limit", Some(&event.index_name),
format!("hit cap of {}/hour", config.max_trials_per_hour)).await;
continue;
}
// Per-index cooldown.
if cooling_down(&inner, &event.index_name, config.cooldown_between_trials_secs).await {
record_event(&inner, "skipped_cooldown", Some(&event.index_name),
format!("last trial too recent (<{}s)", config.cooldown_between_trials_secs)).await;
continue;
}
// Run one trial.
match run_one_cycle(&event, &deps, config.min_recall).await {
Ok(outcome) => {
mark_recent_trial(&inner).await;
{
let mut s = inner.status.write().await;
s.trials_run += 1;
if outcome.promoted { s.promotions += 1; }
}
record_event(&inner, if outcome.promoted { "promoted" } else { "trial_completed" },
Some(&event.index_name),
format!("config=ec{}/es{} recall={:.3} p50={:.0}us {}",
outcome.trial.config.ef_construction,
outcome.trial.config.ef_search,
outcome.trial.metrics.recall_at_k,
outcome.trial.metrics.search_latency_p50_us,
if outcome.promoted { "★ PROMOTED" } else { "" })).await;
}
Err(e) => {
record_event(&inner, "trial_error", Some(&event.index_name), e).await;
}
}
}
}
/// Result of one cycle — ran a trial, maybe promoted it.
struct CycleOutcome {
trial: Trial,
promoted: bool,
}
/// Core cycle: propose → build → bench → record → maybe promote.
async fn run_one_cycle(
event: &TriggerEvent,
deps: &AgentDeps,
min_recall: f32,
) -> Result<CycleOutcome, String> {
// Read history.
let history = deps.trial_journal.list(&event.index_name).await
.map_err(|e| format!("read journal: {e}"))?;
// Bootstrap mode: first ever visit to this index. Auto-generate a
// harness (if missing) and seed with the default config — the Phase 15
// known-good ec=80/es=30. Subsequent visits go through the proposer.
let is_bootstrap = history.is_empty();
// Current champion (if any) is the promoted config.
let champion = deps.promotion_registry.get_current(&event.index_name).await;
let champion_trial = champion.as_ref().and_then(|p| {
history.iter().find(|t| t.id == p.trial_id).cloned()
});
let (next_config, harness_name) = if is_bootstrap {
let name = ensure_auto_harness(&event.index_name, deps).await?;
(HnswConfig::default(), name)
} else {
let Some(cfg) = propose_next_config(&history, champion_trial.as_ref()) else {
return Err("proposer returned None — search space exhausted".into());
};
// Validate bounds defensively.
if !(10..=400).contains(&cfg.ef_construction) {
return Err(format!("proposed ef_construction={} out of bounds", cfg.ef_construction));
}
if !(10..=200).contains(&cfg.ef_search) {
return Err(format!("proposed ef_search={} out of bounds", cfg.ef_search));
}
// A future refinement: remember per-index "canonical harness" on
// the index metadata. For now: latest wins.
let hname = history.last().unwrap().eval_set.clone();
(cfg, hname)
};
let mut harness_set = deps.harness_store.load_for_index(&event.index_name, &harness_name).await
.map_err(|e| format!("load harness '{harness_name}': {e}"))?;
let embeddings = deps.embedding_cache.get_or_load(&event.index_name).await
.map_err(|e| format!("embeddings: {e}"))?;
if !harness_set.ground_truth_built {
harness::compute_ground_truth(&mut harness_set, &embeddings, &deps.ai_client).await
.map_err(|e| format!("ground truth: {e}"))?;
deps.harness_store.save(&harness_set).await.ok();
}
// Build + bench.
let trial_id = Trial::new_id();
let slot = format!("{}__{}", event.index_name, trial_id);
let build = deps.hnsw_store
.build_index_with_config(&slot, (*embeddings).clone(), &next_config)
.await?;
let query_vectors: Vec<Vec<f32>> = harness_set.queries
.iter().filter_map(|q| q.query_embedding.clone()).collect();
let bench = deps.hnsw_store.bench_search(&slot, &query_vectors, harness_set.k).await?;
let mut recalls = Vec::with_capacity(harness_set.queries.len());
for (q, hits) in harness_set.queries.iter().zip(bench.retrieved.iter()) {
if let Some(gt) = &q.ground_truth {
recalls.push(harness::recall_at_k(hits, gt, harness_set.k));
}
}
let mean_recall = if recalls.is_empty() { 0.0 } else {
recalls.iter().sum::<f32>() / recalls.len() as f32
};
let mut lats = bench.latencies_us.clone();
lats.sort_unstable_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
let p = |pct: f32| -> f32 {
if lats.is_empty() { return 0.0; }
let idx = ((lats.len() as f32 - 1.0) * pct).round() as usize;
lats[idx.min(lats.len() - 1)]
};
let dims = embeddings.first().map(|e| e.vector.len()).unwrap_or(0);
let memory_bytes = (embeddings.len() * dims * std::mem::size_of::<f32>() + embeddings.len() * 128) as u64;
let note = {
let base = match &event.reason {
TriggerReason::DatasetAppended { dataset } => format!("agent: dataset_appended({dataset})"),
TriggerReason::Manual => "agent: manual".to_string(),
TriggerReason::Periodic => "agent: periodic".to_string(),
};
if is_bootstrap { format!("{base} bootstrap") } else { base }
};
let trial = Trial {
id: trial_id,
index_name: event.index_name.clone(),
eval_set: harness_set.name.clone(),
config: next_config.clone(),
metrics: TrialMetrics {
build_time_secs: build.build_time_secs,
search_latency_p50_us: p(0.50),
search_latency_p95_us: p(0.95),
search_latency_p99_us: p(0.99),
recall_at_k: mean_recall,
memory_bytes,
vectors: build.vectors,
eval_queries: harness_set.queries.len(),
brute_force_latency_us: 0.0,
},
created_at: Utc::now(),
note: Some(note),
};
deps.trial_journal.append(&trial).await.ok();
deps.hnsw_store.drop(&slot).await;
// Promotion decision: the new trial must meet recall gate AND beat
// the current champion (higher recall OR same recall + lower p50).
let promoted = if trial.metrics.recall_at_k < min_recall {
false
} else {
let beats = match &champion_trial {
None => true, // no champion yet — anything passing the gate wins
Some(c) => beats_champion(&trial, c),
};
if beats {
let entry = PromotionEntry {
config: trial.config.clone(),
trial_id: trial.id.clone(),
promoted_at: Utc::now(),
promoted_by: "agent".to_string(),
note: Some(format!(
"auto-promote: recall={:.3} p50={:.0}us (was {:.3}/{:.0}us)",
trial.metrics.recall_at_k, trial.metrics.search_latency_p50_us,
champion_trial.as_ref().map(|t| t.metrics.recall_at_k).unwrap_or(0.0),
champion_trial.as_ref().map(|t| t.metrics.search_latency_p50_us).unwrap_or(0.0),
)),
};
deps.promotion_registry.promote(&event.index_name, entry).await.is_ok()
} else {
false
}
};
Ok(CycleOutcome { trial, promoted })
}
/// Champion-beat test: strictly higher recall, OR equal recall with
/// lower p50. Same rule as autotune::pick_winner — kept consistent so the
/// agent and the synchronous autotune agree on what "better" means.
fn beats_champion(candidate: &Trial, champion: &Trial) -> bool {
if candidate.metrics.recall_at_k > champion.metrics.recall_at_k {
return true;
}
if (candidate.metrics.recall_at_k - champion.metrics.recall_at_k).abs() < 1e-4
&& candidate.metrics.search_latency_p50_us < champion.metrics.search_latency_p50_us {
return true;
}
false
}
/// Propose the next HnswConfig given trial history and the current
/// champion.
///
/// ============================================================
/// J: THIS IS YOURS TO IMPLEMENT
/// ============================================================
///
/// Inputs:
/// - `history`: every trial ever run on this index, oldest first
/// - `champion`: the currently-promoted trial, if any
///
/// Output:
/// - `Some(HnswConfig)` with the config to try next
/// - `None` if you decide there's nothing worth trying (agent logs
/// "search space exhausted" and moves on)
///
/// Hard bounds the caller enforces: ef_construction ∈ [10, 400],
/// ef_search ∈ [10, 200]. Stay inside those — configs outside get
/// rejected and count as a wasted cycle.
///
/// Design options (pick one, or mix):
///
/// 1. LOCAL REFINEMENT (exploit-heavy):
/// Sample near champion ± small delta. Converges fast, risks local
/// minima. Good for "we know roughly where the sweet spot is."
///
/// 2. ε-GREEDY (mixed):
/// With prob ε, random sample from full bounds (explore). Otherwise
/// refinement around champion (exploit). ε=0.2 is a reasonable start.
/// Good for long-running tune with no prior knowledge.
///
/// 3. COARSE→FINE (annealed):
/// First N trials: wide random. Then shrink the neighborhood around
/// champion as more trials accumulate. Mimics simulated annealing.
///
/// 4. DEDUP-AWARE:
/// Whatever strategy, skip configs already in history. Prevents the
/// agent from re-running the same (ec, es) pair twice.
///
/// A simple starter implementation is provided below (local refinement
/// + dedup). Replace with your preferred strategy.
pub fn propose_next_config(history: &[Trial], champion: Option<&Trial>) -> Option<HnswConfig> {
// ε-greedy around the champion, dedup-aware.
//
// - With probability ε (≈0.25), sample a random config from the full
// bounds. Keeps exploration alive so we don't get stuck hill-climbing
// one axis.
// - Otherwise: perturb the champion symmetrically on BOTH axes (not
// just +20 / +40 like the starter did). Prefers small moves first
// so recall stays near the current level.
// - Always skip configs already in history — no point re-running.
// - Deterministic per-history: RNG is seeded from history length so
// the same journal state always proposes the same next config.
// Makes tests + offline replay reproducible.
let base = champion
.map(|t| t.config.clone())
.or_else(|| history.last().map(|t| t.config.clone()))
.unwrap_or_default();
let tried = |ec: usize, es: usize| -> bool {
history.iter().any(|t|
t.config.ef_construction == ec && t.config.ef_search == es
)
};
let clamp = |ec: i32, es: i32| -> (usize, usize) {
(ec.clamp(10, 400) as usize, es.clamp(10, 200) as usize)
};
// Tiny xorshift — no rand crate dep. Seeded from history length so the
// proposer is deterministic for a given journal state.
let mut rng = (history.len() as u64).wrapping_mul(0x9E37_79B9_7F4A_7C15) | 1;
let mut next_u = || {
rng ^= rng << 13; rng ^= rng >> 7; rng ^= rng << 17;
rng
};
for _attempt in 0..32 {
let explore = (next_u() % 100) < 25; // ε = 0.25
let (ec, es) = if explore {
clamp(10 + (next_u() % 391) as i32, 10 + (next_u() % 191) as i32)
} else {
// Symmetric perturbation — signed steps on both axes.
let dec = [-40, -20, -10, 10, 20, 40][(next_u() % 6) as usize];
let des = [-20, -10, -5, 5, 10, 20][(next_u() % 6) as usize];
clamp(base.ef_construction as i32 + dec, base.ef_search as i32 + des)
};
if !tried(ec, es) {
return Some(HnswConfig { ef_construction: ec, ef_search: es, seed: Some(42) });
}
}
None // 32 attempts all landed on duplicates — likely saturated
}
// -------- Helpers --------
/// Minimum vectors for an index to be worth auto-tuning. Below this,
/// HNSW's win over brute-force is too small to matter and trial budget
/// is better spent on bigger indexes.
const AUTOTUNE_MIN_VECTORS: usize = 1_000;
/// Find an index to poke on a periodic wake. Strategy: least-recently-tuned
/// wins — pick the index (from `IndexRegistry`, not just promoted ones)
/// whose most-recent trial is oldest, so trial budget spreads across the
/// whole portfolio. Never-trialed indexes sort first (None < Some), which
/// is exactly what we want: bootstrap them on their first visit.
///
/// Why not "most recently promoted" (the original strategy): a converged
/// index like `threat_intel_v1` sits at recall=1.0 on a minimal graph and
/// can't be improved — but it was always the freshest promotion, so the
/// agent burned every trial on it while 40+ other indexes got zero
/// attention. Rotating by last-trial-time fixes that without any explicit
/// convergence detection.
///
/// Why IndexRegistry instead of PromotionRegistry: on a fresh system only
/// a handful of indexes have ever been promoted, so promotion-based
/// picking starves new ones. IndexRegistry lists every live index the
/// platform knows about; filtering by `AUTOTUNE_MIN_VECTORS` keeps the
/// proposer off indexes where the graph config doesn't matter.
///
/// Cost: N extra journal reads per periodic tick, where N = eligible
/// indexes. Fine at dozens; if the portfolio grows into thousands, cache
/// `last_trial_at` on the IndexMeta.
async fn pick_periodic_target(deps: &AgentDeps) -> Option<String> {
let candidates: Vec<String> = deps
.index_registry
.list(None, None)
.await
.into_iter()
.filter(|m| m.chunk_count >= AUTOTUNE_MIN_VECTORS)
.map(|m| m.index_name)
.collect();
if candidates.is_empty() { return None; }
let mut scored: Vec<(String, Option<DateTime<Utc>>)> = Vec::with_capacity(candidates.len());
for name in candidates {
let last_trial_at = deps
.trial_journal
.list(&name)
.await
.ok()
.and_then(|trials| trials.into_iter().map(|t| t.created_at).max());
scored.push((name, last_trial_at));
}
scored.into_iter().min_by_key(|(_, last)| *last).map(|(n, _)| n)
}
/// On the first visit to an index, load or synthesize an eval harness so
/// bootstrap trials have something to measure recall against. Returns the
/// harness name. Synthetic harnesses sample 20 chunks and use their first
/// 200 chars as self-queries — the ground truth is computed by brute-force
/// cosine, so recall numbers are real (a config that finds the chunk in
/// top-k counts as a hit).
async fn ensure_auto_harness(index_name: &str, deps: &AgentDeps) -> Result<String, String> {
let name = format!("{index_name}_auto");
if deps.harness_store.load_for_index(index_name, &name).await.is_ok() {
return Ok(name);
}
let embeddings = deps
.embedding_cache
.get_or_load(index_name)
.await
.map_err(|e| format!("load embeddings for auto-harness: {e}"))?;
if embeddings.is_empty() {
return Err(format!("index '{index_name}' has no embeddings — cannot bootstrap"));
}
let mut eval = harness::synthetic_from_chunks(&name, index_name, &embeddings, 20, 10);
harness::compute_ground_truth(&mut eval, &embeddings, &deps.ai_client)
.await
.map_err(|e| format!("auto-harness ground truth: {e}"))?;
deps.harness_store.save(&eval).await
.map_err(|e| format!("save auto-harness: {e}"))?;
tracing::info!(
"agent: bootstrapped harness '{}' for index '{}' (20 self-queries, k=10)",
name, index_name
);
Ok(name)
}
async fn over_rate_limit(inner: &Arc<AgentInner>, cap: u32) -> bool {
let cutoff = Utc::now() - chrono::Duration::hours(1);
let ring = inner.recent_trials.lock().await;
ring.iter().filter(|t| **t >= cutoff).count() as u32 >= cap
}
async fn cooling_down(inner: &Arc<AgentInner>, _index: &str, cooldown_secs: u64) -> bool {
// Minimal impl: gate on global most-recent trial rather than per-index.
// Per-index cooldown would be easy to add — keep a HashMap<String, DateTime>
// — but for Phase 16.2 MVP, global is fine. Ollama is the shared resource.
let ring = inner.recent_trials.lock().await;
if let Some(last) = ring.back() {
let since = Utc::now().signed_duration_since(*last);
return since < chrono::Duration::seconds(cooldown_secs as i64);
}
false
}
async fn mark_recent_trial(inner: &Arc<AgentInner>) {
let mut ring = inner.recent_trials.lock().await;
ring.push_back(Utc::now());
// Keep bounded.
while ring.len() > 256 {
ring.pop_front();
}
}
async fn record_event(
inner: &Arc<AgentInner>,
kind: &str,
index: Option<&str>,
detail: String,
) {
tracing::info!("agent: {} {}{}", kind,
index.map(|i| format!("[{i}] ")).unwrap_or_default(), detail);
let mut s = inner.status.write().await;
s.last_event = Some(AgentEvent {
at: Utc::now(),
kind: kind.to_string(),
index_name: index.map(String::from),
detail,
});
}
#[cfg(test)]
mod tests {
use super::*;
fn mk_trial(ec: usize, es: usize, recall: f32, p50: f32) -> Trial {
Trial {
id: format!("t-{ec}-{es}"),
index_name: "test".into(),
eval_set: "eval".into(),
config: HnswConfig { ef_construction: ec, ef_search: es, seed: Some(42) },
metrics: TrialMetrics {
build_time_secs: 1.0,
search_latency_p50_us: p50,
search_latency_p95_us: p50 * 1.5,
search_latency_p99_us: p50 * 2.0,
recall_at_k: recall,
memory_bytes: 0, vectors: 1000, eval_queries: 10,
brute_force_latency_us: 100.0,
},
created_at: Utc::now(),
note: None,
}
}
#[test]
fn propose_skips_duplicates() {
let hist = vec![
mk_trial(80, 30, 1.0, 500.0),
mk_trial(100, 30, 1.0, 520.0), // ec+20
];
let next = propose_next_config(&hist, Some(&hist[0])).unwrap();
// ec+20 is taken, so the proposer should skip it.
assert!(next.ef_construction != 100 || next.ef_search != 30);
}
#[test]
fn beats_champion_strict_recall() {
let champ = mk_trial(80, 30, 0.95, 500.0);
let better_recall = mk_trial(80, 30, 0.99, 600.0);
let worse_recall = mk_trial(80, 30, 0.90, 100.0);
assert!(beats_champion(&better_recall, &champ));
assert!(!beats_champion(&worse_recall, &champ));
}
#[test]
fn beats_champion_same_recall_lower_latency() {
let champ = mk_trial(80, 30, 1.0, 500.0);
let faster = mk_trial(60, 30, 1.0, 400.0);
let slower = mk_trial(60, 30, 1.0, 600.0);
assert!(beats_champion(&faster, &champ));
assert!(!beats_champion(&slower, &champ));
}
}