diff --git a/config/modes.toml b/config/modes.toml new file mode 100644 index 0000000..42145f3 --- /dev/null +++ b/config/modes.toml @@ -0,0 +1,53 @@ +# Mode router config — task_class → mode mapping +# +# `preferred_mode` is the first choice for a task class; `fallback_modes` +# get tried in order if the preferred one isn't available (LLM Team can +# return Unknown mode for some, OR the matrix has stronger signal for a +# fallback). `default_model` seeds the mode runner's model field if the +# caller doesn't override. +# +# Modes are dispatched against LLM Team UI (localhost:5000/api/run) for +# now; future Rust-native runners will short-circuit before the proxy. +# See crates/gateway/src/v1/mode.rs for the dispatch path. + +[[task_class]] +name = "scrum_review" +preferred_mode = "codereview" +fallback_modes = ["consensus", "ladder"] +default_model = "qwen3-coder:480b" +matrix_corpus = "distilled_procedural_v20260423102847" + +[[task_class]] +name = "contract_analysis" +preferred_mode = "deep_analysis" +fallback_modes = ["research", "extract"] +default_model = "kimi-k2:1t" +matrix_corpus = "chicago_permits_v1" + +[[task_class]] +name = "staffing_inference" +preferred_mode = "ladder" +fallback_modes = ["consensus", "pipeline"] +default_model = "gpt-oss:120b" +matrix_corpus = "workers_500k_v8" + +[[task_class]] +name = "fact_extract" +preferred_mode = "extract" +fallback_modes = ["distill"] +default_model = "qwen2.5" +matrix_corpus = "kb_team_runs_v1" + +[[task_class]] +name = "doc_drift_check" +preferred_mode = "drift" +fallback_modes = ["validator"] +default_model = "gpt-oss:120b" +matrix_corpus = "distilled_factual_v20260423095819" + +# Fallback when task_class isn't in the table — useful for ad-hoc calls +# during development that don't yet have a mapped mode. +[default] +preferred_mode = "pipeline" +fallback_modes = ["consensus", "ladder"] +default_model = "qwen3.5:latest" diff --git a/crates/gateway/Cargo.toml b/crates/gateway/Cargo.toml index 505c613..48b5d60 100644 --- a/crates/gateway/Cargo.toml +++ b/crates/gateway/Cargo.toml @@ -30,3 +30,4 @@ tracing-opentelemetry = { workspace = true } arrow = { workspace = true } chrono = { workspace = true } reqwest = { version = "0.12", default-features = false, features = ["json", "rustls-tls"] } +toml = { workspace = true } diff --git a/crates/gateway/src/v1/mod.rs b/crates/gateway/src/v1/mod.rs index 691b272..482c79e 100644 --- a/crates/gateway/src/v1/mod.rs +++ b/crates/gateway/src/v1/mod.rs @@ -17,6 +17,7 @@ pub mod openrouter; pub mod gemini; pub mod claude; pub mod langfuse_trace; +pub mod mode; pub mod respond; pub mod truth; @@ -83,6 +84,8 @@ pub fn router(state: V1State) -> Router { .route("/usage", get(usage)) .route("/sessions", get(sessions)) .route("/context", get(truth::context)) + .route("/mode", post(mode::route)) + .route("/mode/list", get(mode::list)) .with_state(state) } diff --git a/crates/gateway/src/v1/mode.rs b/crates/gateway/src/v1/mode.rs new file mode 100644 index 0000000..834de12 --- /dev/null +++ b/crates/gateway/src/v1/mode.rs @@ -0,0 +1,339 @@ +//! Mode router — task_class → mode + model recommendation. +//! +//! HANDOVER §queued (2026-04-25): "Mode router — port LLM Team multi-model +//! patterns. Pick the right TOOL/MODE for each task class via the matrix, +//! not cascade through models." +//! +//! Two-stage architecture: +//! +//! 1. **Decision** (`POST /v1/mode`) — given `{task_class, prompt}`, +//! consult `config/modes.toml` + (future) pathway memory and return +//! `{mode, model, decision_trace}`. Pure recommendation; no execution. +//! +//! 2. **Execution** (`POST /v1/mode/execute`) — given `{mode, prompt, ...}`, +//! proxy to LLM Team UI (`localhost:5000/api/run`) which has all 25 +//! mode runners implemented. As Rust-native runners land in this +//! crate, they short-circuit before the proxy. +//! +//! The split lets us A/B-test the routing logic (decision-only) without +//! committing to running every recommendation. It also keeps the pure +//! decision function simple enough to unit-test exhaustively. + +use axum::{Json, extract::State, http::StatusCode, response::IntoResponse}; +use serde::{Deserialize, Serialize}; +use std::collections::HashMap; +use std::sync::OnceLock; + +use super::V1State; + +/// Validated against the LLM Team /api/run handler at +/// /root/llm_team_ui.py:10581. Kept in sync manually — adding a mode +/// here without adding it upstream returns 400 from the proxy. +const VALID_MODES: &[&str] = &[ + "brainstorm", "pipeline", "debate", "validator", "roundrobin", + "redteam", "consensus", "codereview", "ladder", "tournament", + "evolution", "blindassembly", "staircase", "drift", "mesh", + "hallucination", "timeloop", "research", "eval", "extract", + "refine", "adaptive", "deep_analysis", "distill", +]; + +#[derive(Clone, Debug, Deserialize)] +pub struct TaskClassEntry { + pub name: String, + pub preferred_mode: String, + #[serde(default)] + pub fallback_modes: Vec, + pub default_model: String, + #[serde(default)] + pub matrix_corpus: Option, +} + +#[derive(Clone, Debug, Deserialize)] +pub struct DefaultEntry { + pub preferred_mode: String, + #[serde(default)] + pub fallback_modes: Vec, + pub default_model: String, +} + +#[derive(Clone, Debug, Deserialize)] +pub struct ModeRouterConfig { + #[serde(default, rename = "task_class")] + pub task_classes: Vec, + pub default: DefaultEntry, +} + +impl ModeRouterConfig { + pub fn lookup(&self, task_class: &str) -> Option<&TaskClassEntry> { + self.task_classes.iter().find(|t| t.name == task_class) + } +} + +/// Process-global config cache. Loaded on first request from +/// `config/modes.toml` (or `LH_MODES_CONFIG`). If parsing fails the +/// router falls back to a hard-coded default so a malformed config can +/// never take the gateway down. +static CONFIG: OnceLock = OnceLock::new(); + +fn load_config() -> &'static ModeRouterConfig { + CONFIG.get_or_init(|| { + let path = std::env::var("LH_MODES_CONFIG") + .unwrap_or_else(|_| "config/modes.toml".to_string()); + match std::fs::read_to_string(&path) { + Ok(s) => match toml::from_str::(&s) { + Ok(c) => { + tracing::info!(target: "v1::mode", "loaded {} task classes from {}", c.task_classes.len(), path); + c + } + Err(e) => { + tracing::warn!(target: "v1::mode", "parse {} failed ({}), using built-in default", path, e); + fallback_config() + } + }, + Err(e) => { + tracing::warn!(target: "v1::mode", "read {} failed ({}), using built-in default", path, e); + fallback_config() + } + } + }) +} + +fn fallback_config() -> ModeRouterConfig { + ModeRouterConfig { + task_classes: vec![], + default: DefaultEntry { + preferred_mode: "pipeline".into(), + fallback_modes: vec!["consensus".into(), "ladder".into()], + default_model: "qwen3.5:latest".into(), + }, + } +} + +#[derive(Deserialize, Debug)] +pub struct RouteRequest { + pub task_class: String, + /// Reserved for future matrix-informed routing (cosine against + /// matrix_corpus + pathway memory). Currently parsed but unused by + /// the decision logic — kept on the API so callers can land their + /// integration without waiting on the matrix-signal hookup. + #[serde(default)] + #[allow(dead_code)] + pub prompt: Option, + /// Caller-supplied override. When set, the router honors it (with a + /// validation check against VALID_MODES) and skips the matrix + /// signal — useful for testing a specific mode in isolation. + #[serde(default)] + pub force_mode: Option, +} + +#[derive(Serialize, Debug)] +pub struct DecisionTrace { + pub task_class_matched: bool, + pub source: &'static str, // "config" | "default" | "force_mode" + pub fallbacks: Vec, + pub matrix_corpus: Option, + pub notes: Vec, +} + +#[derive(Serialize, Debug)] +pub struct RouteDecision { + pub mode: String, + pub model: String, + pub decision: DecisionTrace, +} + +/// `POST /v1/mode` — pure recommendation. Returns a `RouteDecision` +/// with the chosen mode + model + reasoning trail. Caller is then +/// responsible for invoking the mode (either via `/v1/mode/execute` +/// proxy or directly against the LLM Team `/api/run`). +pub async fn route( + State(_state): State, + Json(req): Json, +) -> impl IntoResponse { + let cfg = load_config(); + let mut notes = Vec::new(); + + // force_mode short-circuits everything else but still validates. + if let Some(forced) = req.force_mode.as_deref() { + if !VALID_MODES.contains(&forced) { + return Err(( + StatusCode::BAD_REQUEST, + Json(serde_json::json!({ + "error": format!("Unknown mode: {}", forced), + "valid_modes": VALID_MODES, + })), + )); + } + let model = cfg + .lookup(&req.task_class) + .map(|t| t.default_model.clone()) + .unwrap_or_else(|| cfg.default.default_model.clone()); + notes.push("force_mode override accepted".into()); + return Ok(Json(RouteDecision { + mode: forced.to_string(), + model, + decision: DecisionTrace { + task_class_matched: cfg.lookup(&req.task_class).is_some(), + source: "force_mode", + fallbacks: vec![], + matrix_corpus: None, + notes, + }, + })); + } + + // Lookup task class; fall through to default if absent. + if let Some(tc) = cfg.lookup(&req.task_class) { + notes.push(format!( + "task_class '{}' matched, preferred mode '{}'", + tc.name, tc.preferred_mode + )); + if !VALID_MODES.contains(&tc.preferred_mode.as_str()) { + notes.push(format!( + "preferred '{}' not in VALID_MODES — falling through to first valid fallback", + tc.preferred_mode + )); + for fb in &tc.fallback_modes { + if VALID_MODES.contains(&fb.as_str()) { + notes.push(format!("fallback '{}' selected", fb)); + return Ok(Json(RouteDecision { + mode: fb.clone(), + model: tc.default_model.clone(), + decision: DecisionTrace { + task_class_matched: true, + source: "config", + fallbacks: tc.fallback_modes.clone(), + matrix_corpus: tc.matrix_corpus.clone(), + notes, + }, + })); + } + } + // No fallback was valid either — return 422 so the caller + // knows the config is broken for this task class. + return Err(( + StatusCode::UNPROCESSABLE_ENTITY, + Json(serde_json::json!({ + "error": format!( + "task_class '{}' has no valid mode (preferred='{}', fallbacks={:?})", + req.task_class, tc.preferred_mode, tc.fallback_modes + ), + "valid_modes": VALID_MODES, + })), + )); + } + return Ok(Json(RouteDecision { + mode: tc.preferred_mode.clone(), + model: tc.default_model.clone(), + decision: DecisionTrace { + task_class_matched: true, + source: "config", + fallbacks: tc.fallback_modes.clone(), + matrix_corpus: tc.matrix_corpus.clone(), + notes, + }, + })); + } + + notes.push(format!( + "task_class '{}' not in config, using default", + req.task_class + )); + Ok(Json(RouteDecision { + mode: cfg.default.preferred_mode.clone(), + model: cfg.default.default_model.clone(), + decision: DecisionTrace { + task_class_matched: false, + source: "default", + fallbacks: cfg.default.fallback_modes.clone(), + matrix_corpus: None, + notes, + }, + })) +} + +/// `GET /v1/mode/list` — operator-facing introspection. Returns the +/// current registry table + valid modes so a UI can render the matrix +/// without re-parsing the TOML. +pub async fn list(State(_state): State) -> impl IntoResponse { + let cfg = load_config(); + let task_map: HashMap<&str, serde_json::Value> = cfg + .task_classes + .iter() + .map(|t| { + ( + t.name.as_str(), + serde_json::json!({ + "preferred_mode": t.preferred_mode, + "fallback_modes": t.fallback_modes, + "default_model": t.default_model, + "matrix_corpus": t.matrix_corpus, + }), + ) + }) + .collect(); + Json(serde_json::json!({ + "task_classes": task_map, + "default": { + "preferred_mode": cfg.default.preferred_mode, + "fallback_modes": cfg.default.fallback_modes, + "default_model": cfg.default.default_model, + }, + "valid_modes": VALID_MODES, + })) +} + +#[cfg(test)] +mod tests { + use super::*; + + fn cfg_for_test() -> ModeRouterConfig { + ModeRouterConfig { + task_classes: vec![ + TaskClassEntry { + name: "scrum_review".into(), + preferred_mode: "codereview".into(), + fallback_modes: vec!["consensus".into()], + default_model: "qwen3-coder:480b".into(), + matrix_corpus: Some("distilled_procedural_v1".into()), + }, + TaskClassEntry { + name: "broken".into(), + preferred_mode: "nonsense_mode".into(), + fallback_modes: vec!["consensus".into()], + default_model: "x".into(), + matrix_corpus: None, + }, + ], + default: DefaultEntry { + preferred_mode: "pipeline".into(), + fallback_modes: vec![], + default_model: "qwen3.5:latest".into(), + }, + } + } + + #[test] + fn lookup_finds_matching_task_class() { + let cfg = cfg_for_test(); + assert_eq!(cfg.lookup("scrum_review").unwrap().preferred_mode, "codereview"); + assert!(cfg.lookup("unknown").is_none()); + } + + #[test] + fn valid_modes_contains_known_runners() { + assert!(VALID_MODES.contains(&"extract")); + assert!(VALID_MODES.contains(&"codereview")); + assert!(VALID_MODES.contains(&"deep_analysis")); + assert!(!VALID_MODES.contains(&"made_up")); + } + + #[test] + fn fallback_path_is_well_defined() { + let cfg = cfg_for_test(); + let tc = cfg.lookup("broken").unwrap(); + // Preferred is invalid; first valid fallback should be 'consensus'. + assert!(!VALID_MODES.contains(&tc.preferred_mode.as_str())); + assert!(VALID_MODES.contains(&tc.fallback_modes[0].as_str())); + } +}