diff --git a/config/providers.toml b/config/providers.toml new file mode 100644 index 0000000..a3e761a --- /dev/null +++ b/config/providers.toml @@ -0,0 +1,62 @@ +# Phase 39: Provider Registry +# +# Per-provider base_url, auth scheme, and default model. The gateway's +# /v1/chat dispatcher reads this file at boot to populate its provider +# table. Secrets (API keys) come from /etc/lakehouse/secrets.toml or +# environment variables — NEVER inline a key here. +# +# Adding a new provider: +# 1. New [[provider]] block with name, base_url, auth, default_model +# 2. Matching adapter at crates/aibridge/src/providers/.rs +# implementing the ProviderAdapter trait (chat + embed + unload) +# 3. Route arm in crates/gateway/src/v1/mod.rs matching on `name` +# 4. Model-prefix routing hint in resolve_provider() if the provider +# uses an "/..." model prefix (e.g. "openrouter/...") + +[[provider]] +name = "ollama" +base_url = "http://localhost:3200" +auth = "none" +default_model = "qwen3.5:latest" +# Hot-path local inference. No bearer needed — Python sidecar on +# localhost handles the Ollama API. Model names are bare +# (e.g. "qwen3.5:latest", not "ollama/qwen3.5:latest"). + +[[provider]] +name = "ollama_cloud" +base_url = "https://ollama.com" +auth = "bearer" +auth_env = "OLLAMA_CLOUD_KEY" +default_model = "gpt-oss:120b" +# Cloud-tier Ollama. Key resolved from OLLAMA_CLOUD_KEY env at gateway +# boot. Model-prefix routing: "cloud/" auto-routes here +# (see gateway::v1::resolve_provider). + +[[provider]] +name = "openrouter" +base_url = "https://openrouter.ai/api/v1" +auth = "bearer" +auth_env = "OPENROUTER_API_KEY" +auth_fallback_files = ["/home/profit/.env", "/root/llm_team_config.json"] +default_model = "openai/gpt-oss-120b:free" +# Multi-provider gateway. Covers Anthropic, Google, OpenAI, MiniMax, +# Qwen, Gemma, etc. Key resolved via crates/gateway/src/v1/openrouter.rs +# resolve_openrouter_key() — env first, then fallback files. +# Model-prefix routing: "openrouter//" auto-routes here, +# prefix stripped before upstream call. + +# Planned (Phase 40 long-horizon — adapters not yet shipped): +# +# [[provider]] +# name = "gemini" +# base_url = "https://generativelanguage.googleapis.com/v1beta" +# auth = "api_key_query" +# auth_env = "GEMINI_API_KEY" +# default_model = "gemini-2.0-flash" +# +# [[provider]] +# name = "claude" +# base_url = "https://api.anthropic.com/v1" +# auth = "x_api_key" +# auth_env = "ANTHROPIC_API_KEY" +# default_model = "claude-3-5-sonnet-latest" diff --git a/crates/shared/src/lib.rs b/crates/shared/src/lib.rs index 4c5f758..df52168 100644 --- a/crates/shared/src/lib.rs +++ b/crates/shared/src/lib.rs @@ -5,3 +5,4 @@ pub mod config; pub mod pii; pub mod secrets; pub mod model_matrix; +pub mod profiles; diff --git a/crates/shared/src/profiles/execution.rs b/crates/shared/src/profiles/execution.rs new file mode 100644 index 0000000..d810404 --- /dev/null +++ b/crates/shared/src/profiles/execution.rs @@ -0,0 +1,14 @@ +//! ExecutionProfile — the Phase 41 rename of Phase 17's ModelProfile. +//! +//! Carries what's needed to RUN inference: model tag, dataset bindings, +//! HNSW config, embed model, bucket binding. Today this is a type +//! alias over `crate::types::ModelProfile` — the PRD's +//! "Backward compat: ModelProfile still loads, aliased to +//! ExecutionProfile" line, honored literally. +//! +//! When the migration off the old name finishes, this file can either +//! absorb the full struct definition or continue as an alias. Callers +//! should reference `ExecutionProfile` going forward; `ModelProfile` +//! stays exported from `types` for on-disk schema compat. + +pub use crate::types::ModelProfile as ExecutionProfile; diff --git a/crates/shared/src/profiles/memory.rs b/crates/shared/src/profiles/memory.rs new file mode 100644 index 0000000..2166cfc --- /dev/null +++ b/crates/shared/src/profiles/memory.rs @@ -0,0 +1,38 @@ +//! MemoryProfile — how the agent's execution memory is kept. +//! +//! Phase 41 decomposition: the Phase 19 playbook_memory + Phase 26 +//! successful_playbooks + Phase 45 doc_refs all need per-profile +//! tuning. Rather than bolt those onto ExecutionProfile, they live +//! here so a "thin" execution profile can reuse a "fat" memory +//! profile and vice versa. + +use serde::{Deserialize, Serialize}; + +#[derive(Clone, Debug, Serialize, Deserialize)] +pub struct MemoryProfile { + pub id: String, + #[serde(default)] + pub description: String, + /// Phase 19: ceiling for playbook_memory boost on retrieval. 0 + /// disables the boost entirely. + #[serde(default = "default_boost_ceiling")] + pub playbook_boost_ceiling: f32, + /// Phase 26: max history entries retained before rotation. + #[serde(default = "default_history_cap")] + pub history_cap: usize, + /// Phase 45: stale threshold for doc_refs before drift check + /// fires (hours). + #[serde(default = "default_stale_hours")] + pub doc_stale_hours: u32, + /// Phase 28: auto-retire playbooks that fail 3+ consecutive runs. + #[serde(default = "default_true")] + pub auto_retire_on_failure: bool, + pub created_at: chrono::DateTime, + #[serde(default)] + pub created_by: String, +} + +fn default_boost_ceiling() -> f32 { 0.35 } +fn default_history_cap() -> usize { 1000 } +fn default_stale_hours() -> u32 { 168 } // one week +fn default_true() -> bool { true } diff --git a/crates/shared/src/profiles/mod.rs b/crates/shared/src/profiles/mod.rs new file mode 100644 index 0000000..97064a0 --- /dev/null +++ b/crates/shared/src/profiles/mod.rs @@ -0,0 +1,28 @@ +//! Phase 41 profile types. +//! +//! The existing `ModelProfile` (Phase 17) is aliased as +//! `ExecutionProfile` here — it continues to carry the model + +//! bindings + HNSW config needed to run inference. Three new profile +//! types land alongside: `RetrievalProfile`, `MemoryProfile`, +//! `ObserverProfile` — each owns a distinct slice of what used to be +//! bundled. +//! +//! Backward-compat rule (PRD Phase 41): existing `ModelProfile` on +//! disk continues to deserialize unchanged. New fields on the new +//! profile types are `#[serde(default)]` so old payloads load with +//! empty defaults. +//! +//! These are the canonical shapes — downstream code converts via +//! `From for ExecutionProfile` (they're the same struct +//! today, just named differently) and constructs the other three +//! as needed. + +pub mod execution; +pub mod retrieval; +pub mod memory; +pub mod observer; + +pub use execution::ExecutionProfile; +pub use memory::MemoryProfile; +pub use observer::ObserverProfile; +pub use retrieval::RetrievalProfile; diff --git a/crates/shared/src/profiles/observer.rs b/crates/shared/src/profiles/observer.rs new file mode 100644 index 0000000..7f05224 --- /dev/null +++ b/crates/shared/src/profiles/observer.rs @@ -0,0 +1,38 @@ +//! ObserverProfile — how loudly the observer logs this workload. +//! +//! Phase 41 decomposition: the observer's alert thresholds, escalation +//! cadence, and log retention need per-workload tuning. Hot-path +//! staffing workflows want aggressive alerting; batch backfills want +//! quieter. This profile is read by mcp-server/observer.ts at +//! activation-time. + +use serde::{Deserialize, Serialize}; + +#[derive(Clone, Debug, Serialize, Deserialize)] +pub struct ObserverProfile { + pub id: String, + #[serde(default)] + pub description: String, + /// How many consecutive failures trigger a cluster escalation to + /// LLM Team `/v1/chat` (qwen3-coder:480b). + #[serde(default = "default_failure_cluster_size")] + pub failure_cluster_size: u32, + /// Minimum seconds between alert emails for the same sig_hash. + /// Prevents alert storms during a regression. + #[serde(default = "default_alert_cooldown")] + pub alert_cooldown_secs: u32, + /// Observer ring buffer size. Older events fall off when full. + #[serde(default = "default_ring_size")] + pub ring_size: usize, + /// Whether to forward events to external Langfuse + /// (/v1/langfuse_trace). Off by default. + #[serde(default)] + pub forward_to_langfuse: bool, + pub created_at: chrono::DateTime, + #[serde(default)] + pub created_by: String, +} + +fn default_failure_cluster_size() -> u32 { 3 } +fn default_alert_cooldown() -> u32 { 300 } // 5 minutes +fn default_ring_size() -> usize { 2000 } diff --git a/crates/shared/src/profiles/retrieval.rs b/crates/shared/src/profiles/retrieval.rs new file mode 100644 index 0000000..a8bfa3c --- /dev/null +++ b/crates/shared/src/profiles/retrieval.rs @@ -0,0 +1,52 @@ +//! RetrievalProfile — what + how the agent reaches into memory. +//! +//! Phase 41 decomposition: the old ModelProfile bundled "what dataset +//! can I read" (bound_datasets) AND "how do I rank results" +//! (hnsw_config) with the model tag. Retrieval concerns split out here +//! so a profile can swap its retrieval strategy without re-activating +//! the model. +//! +//! Fields chosen for what's actually varied per-workload today: +//! - `top_k` / `rerank_top_k` — how many hits to fetch + rerank +//! - `freshness_cutoff_days` — Phase 45 doc-drift uses this +//! - `boost_playbook_memory` — Phase 19 meta-index feedback +//! - `enforce_sensitivity_gates` — Phase 13 access-control integration +//! +//! All fields are `#[serde(default)]` so loading a profile file that +//! predates Phase 41 works without migration. + +use serde::{Deserialize, Serialize}; + +#[derive(Clone, Debug, Serialize, Deserialize)] +pub struct RetrievalProfile { + /// Unique id — slug form, separate namespace from ExecutionProfile. + pub id: String, + /// Free-text operator description. + #[serde(default)] + pub description: String, + /// Default top-K for /vectors/search + /vectors/hybrid. + #[serde(default = "default_top_k")] + pub top_k: u32, + /// How many of the top-K to pass through the reranker. 0 disables + /// reranking for this profile. + #[serde(default = "default_rerank_top_k")] + pub rerank_top_k: u32, + /// Don't consider playbooks / docs older than this (days). 0 or + /// absent = no freshness filter. + #[serde(default)] + pub freshness_cutoff_days: u32, + /// Phase 19: boost workers/results by playbook_memory similarity. + #[serde(default)] + pub boost_playbook_memory: bool, + /// Phase 13: apply access-control masking on sensitive columns. + /// Default on — safety-first. + #[serde(default = "default_true")] + pub enforce_sensitivity_gates: bool, + pub created_at: chrono::DateTime, + #[serde(default)] + pub created_by: String, +} + +fn default_top_k() -> u32 { 10 } +fn default_rerank_top_k() -> u32 { 5 } +fn default_true() -> bool { true } diff --git a/crates/vectord/src/activation.rs b/crates/vectord/src/activation.rs new file mode 100644 index 0000000..5fd99f9 --- /dev/null +++ b/crates/vectord/src/activation.rs @@ -0,0 +1,118 @@ +//! Profile activation tracking (Phase 41 PRD). +//! +//! Phase 41 PRD called out `crates/vectord/src/activation.rs` with +//! `ActivationTracker` + background-job pattern. The activation +//! handler itself lives in `service.rs::activate_profile` (200+ lines +//! of warm-up + bucket binding that's wired to VectorState); this +//! module provides the type the PRD named and a single-flight guard +//! that satisfies the PRD gate "refuse new activation if one is +//! pending/running." +//! +//! Handler extraction (moving the body of `activate_profile` here) +//! is deliberately NOT in this commit — it's a module-structure +//! refactor, not a semantic change. When that lands, the inline +//! `tokio::spawn` in `service.rs` moves into `ActivationTracker::start` +//! and the HTTP handler shrinks to ~20 lines of validate + start + +//! respond-202. + +use std::collections::HashMap; +use std::sync::Arc; +use tokio::sync::RwLock; + +/// Tracks in-flight profile activations. The PRD's "single-flight guard" +/// lives here: callers check `is_pending` before starting a new activation +/// and register via `mark_pending` if they proceed. On completion, they +/// call `mark_complete` so the next caller can start. +/// +/// Per-profile granularity — activating profile A doesn't block B. +#[derive(Clone, Default)] +pub struct ActivationTracker { + pending: Arc>>, // profile_id → job_id +} + +impl ActivationTracker { + pub fn new() -> Self { + Self::default() + } + + /// Check if a profile has an activation already running. Returns the + /// in-flight job_id if so. Safe to call without holding a lock. + pub async fn is_pending(&self, profile_id: &str) -> Option { + self.pending.read().await.get(profile_id).cloned() + } + + /// Register a new activation as pending. Returns false if an + /// activation is already running for the same profile (caller should + /// return 409 Conflict or surface the existing job_id). Returns true + /// on successful registration. + pub async fn mark_pending(&self, profile_id: &str, job_id: &str) -> bool { + let mut guard = self.pending.write().await; + if guard.contains_key(profile_id) { + return false; + } + guard.insert(profile_id.to_string(), job_id.to_string()); + true + } + + /// Remove the pending marker when activation finishes (success OR + /// failure — both free the slot for the next caller). + pub async fn mark_complete(&self, profile_id: &str) { + self.pending.write().await.remove(profile_id); + } + + /// How many activations are currently in-flight across all profiles. + pub async fn in_flight_count(&self) -> usize { + self.pending.read().await.len() + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[tokio::test] + async fn empty_tracker_has_no_pending() { + let t = ActivationTracker::new(); + assert_eq!(t.in_flight_count().await, 0); + assert!(t.is_pending("any-profile").await.is_none()); + } + + #[tokio::test] + async fn mark_pending_registers_the_job() { + let t = ActivationTracker::new(); + assert!(t.mark_pending("profile-A", "job-1").await); + assert_eq!(t.in_flight_count().await, 1); + assert_eq!(t.is_pending("profile-A").await, Some("job-1".into())); + } + + #[tokio::test] + async fn single_flight_guard_refuses_second_activation_same_profile() { + // PRD Phase 41 gate: "refuse new activation if one is + // pending/running." Same profile twice → second call returns + // false, caller must surface the in-flight job_id. + let t = ActivationTracker::new(); + assert!(t.mark_pending("profile-A", "job-1").await); + assert!(!t.mark_pending("profile-A", "job-2").await); + // Still the first job — second registration didn't overwrite. + assert_eq!(t.is_pending("profile-A").await, Some("job-1".into())); + } + + #[tokio::test] + async fn different_profiles_dont_block_each_other() { + // Per-profile granularity — activating A doesn't block B. + let t = ActivationTracker::new(); + assert!(t.mark_pending("profile-A", "job-1").await); + assert!(t.mark_pending("profile-B", "job-2").await); + assert_eq!(t.in_flight_count().await, 2); + } + + #[tokio::test] + async fn mark_complete_frees_the_slot() { + let t = ActivationTracker::new(); + t.mark_pending("profile-A", "job-1").await; + t.mark_complete("profile-A").await; + assert_eq!(t.in_flight_count().await, 0); + // Next activation can now proceed. + assert!(t.mark_pending("profile-A", "job-2").await); + } +} diff --git a/crates/vectord/src/lib.rs b/crates/vectord/src/lib.rs index 41f0d4f..6416af5 100644 --- a/crates/vectord/src/lib.rs +++ b/crates/vectord/src/lib.rs @@ -7,6 +7,7 @@ pub mod harness; pub mod hnsw; pub mod index_registry; pub mod jobs; +pub mod activation; pub mod playbook_memory; pub mod pathway_memory; pub mod doc_drift;