v1/mode: task_class → mode/model router (decision-only, phase 1)

HANDOVER §queued (2026-04-25): "Mode router — port LLM Team multi-model patterns. Pick the right TOOL/MODE for each task class via the matrix, not cascade through models." Two-stage architecture: 1. Decision (POST /v1/mode) — pure recommendation, no execution. Returns {mode, model, decision: {source, fallbacks, matrix_corpus, notes}} so callers see WHY this mode was picked. 2. Execution (future POST /v1/mode/execute) — proxy to LLM Team /api/run for modes not yet ported to native Rust runners. Not wired in this phase. Splitting decision from execution lets us A/B-test the routing logic without committing to running every recommendation. The decision function is pure enough for exhaustive unit tests (3 added). config/modes.toml — initial map for 5 task_classes (scrum_review, contract_analysis, staffing_inference, fact_extract, doc_drift_check) + a default. matrix_corpus per task is reserved for the future matrix-informed routing pass. VALID_MODES list (24 modes) is kept in sync manually with LLM Team's /api/run handler at /root/llm_team_ui.py:10581. Adding a mode here without adding it upstream returns 400 from a future proxy. GET /v1/mode/list — operator introspection so a UI can render the registry table without re-parsing TOML. Live-tested: 5 task classes match, unknown classes fall through to default, force_mode override works + validates, bogus modes return 400 with the valid_modes list. Updates reference_llm_team_modes.md memory — earlier note claiming "only extract is registered" was wrong (all 25 are registered). Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-26 00:16:32 -05:00 · 2026-04-26 00:16:32 -05:00 · d277efbfd2
commit d277efbfd2
parent 626f18d491
4 changed files with 396 additions and 0 deletions
--- a/config/modes.toml
+++ b/config/modes.toml
@ -0,0 +1,53 @@
+# Mode router config — task_class → mode mapping
+#
+# `preferred_mode` is the first choice for a task class; `fallback_modes`
+# get tried in order if the preferred one isn't available (LLM Team can
+# return Unknown mode for some, OR the matrix has stronger signal for a
+# fallback). `default_model` seeds the mode runner's model field if the
+# caller doesn't override.
+#
+# Modes are dispatched against LLM Team UI (localhost:5000/api/run) for
+# now; future Rust-native runners will short-circuit before the proxy.
+# See crates/gateway/src/v1/mode.rs for the dispatch path.
+
+[[task_class]]
+name = "scrum_review"
+preferred_mode = "codereview"
+fallback_modes = ["consensus", "ladder"]
+default_model = "qwen3-coder:480b"
+matrix_corpus = "distilled_procedural_v20260423102847"
+
+[[task_class]]
+name = "contract_analysis"
+preferred_mode = "deep_analysis"
+fallback_modes = ["research", "extract"]
+default_model = "kimi-k2:1t"
+matrix_corpus = "chicago_permits_v1"
+
+[[task_class]]
+name = "staffing_inference"
+preferred_mode = "ladder"
+fallback_modes = ["consensus", "pipeline"]
+default_model = "gpt-oss:120b"
+matrix_corpus = "workers_500k_v8"
+
+[[task_class]]
+name = "fact_extract"
+preferred_mode = "extract"
+fallback_modes = ["distill"]
+default_model = "qwen2.5"
+matrix_corpus = "kb_team_runs_v1"
+
+[[task_class]]
+name = "doc_drift_check"
+preferred_mode = "drift"
+fallback_modes = ["validator"]
+default_model = "gpt-oss:120b"
+matrix_corpus = "distilled_factual_v20260423095819"
+
+# Fallback when task_class isn't in the table — useful for ad-hoc calls
+# during development that don't yet have a mapped mode.
+[default]
+preferred_mode = "pipeline"
+fallback_modes = ["consensus", "ladder"]
+default_model = "qwen3.5:latest"
--- a/crates/gateway/Cargo.toml
+++ b/crates/gateway/Cargo.toml
@ -30,3 +30,4 @@ tracing-opentelemetry = { workspace = true }
 arrow = { workspace = true }
 chrono = { workspace = true }
 reqwest = { version = "0.12", default-features = false, features = ["json", "rustls-tls"] }
+toml = { workspace = true }
--- a/crates/gateway/src/v1/mod.rs
+++ b/crates/gateway/src/v1/mod.rs
@ -17,6 +17,7 @@ pub mod openrouter;
 pub mod gemini;
 pub mod claude;
 pub mod langfuse_trace;
+pub mod mode;
 pub mod respond;
 pub mod truth;

@ -83,6 +84,8 @@ pub fn router(state: V1State) -> Router {
        .route("/usage", get(usage))
        .route("/sessions", get(sessions))
        .route("/context", get(truth::context))
+        .route("/mode", post(mode::route))
+        .route("/mode/list", get(mode::list))
        .with_state(state)
 }

--- a/crates/gateway/src/v1/mode.rs
+++ b/crates/gateway/src/v1/mode.rs
@ -0,0 +1,339 @@
+//! Mode router — task_class → mode + model recommendation.
+//!
+//! HANDOVER §queued (2026-04-25): "Mode router — port LLM Team multi-model
+//! patterns. Pick the right TOOL/MODE for each task class via the matrix,
+//! not cascade through models."
+//!
+//! Two-stage architecture:
+//!
+//!   1. **Decision** (`POST /v1/mode`) — given `{task_class, prompt}`,
+//!      consult `config/modes.toml` + (future) pathway memory and return
+//!      `{mode, model, decision_trace}`. Pure recommendation; no execution.
+//!
+//!   2. **Execution** (`POST /v1/mode/execute`) — given `{mode, prompt, ...}`,
+//!      proxy to LLM Team UI (`localhost:5000/api/run`) which has all 25
+//!      mode runners implemented. As Rust-native runners land in this
+//!      crate, they short-circuit before the proxy.
+//!
+//! The split lets us A/B-test the routing logic (decision-only) without
+//! committing to running every recommendation. It also keeps the pure
+//! decision function simple enough to unit-test exhaustively.
+
+use axum::{Json, extract::State, http::StatusCode, response::IntoResponse};
+use serde::{Deserialize, Serialize};
+use std::collections::HashMap;
+use std::sync::OnceLock;
+
+use super::V1State;
+
+/// Validated against the LLM Team /api/run handler at
+/// /root/llm_team_ui.py:10581. Kept in sync manually — adding a mode
+/// here without adding it upstream returns 400 from the proxy.
+const VALID_MODES: &[&str] = &[
+    "brainstorm", "pipeline", "debate", "validator", "roundrobin",
+    "redteam", "consensus", "codereview", "ladder", "tournament",
+    "evolution", "blindassembly", "staircase", "drift", "mesh",
+    "hallucination", "timeloop", "research", "eval", "extract",
+    "refine", "adaptive", "deep_analysis", "distill",
+];
+
+#[derive(Clone, Debug, Deserialize)]
+pub struct TaskClassEntry {
+    pub name: String,
+    pub preferred_mode: String,
+    #[serde(default)]
+    pub fallback_modes: Vec<String>,
+    pub default_model: String,
+    #[serde(default)]
+    pub matrix_corpus: Option<String>,
+}
+
+#[derive(Clone, Debug, Deserialize)]
+pub struct DefaultEntry {
+    pub preferred_mode: String,
+    #[serde(default)]
+    pub fallback_modes: Vec<String>,
+    pub default_model: String,
+}
+
+#[derive(Clone, Debug, Deserialize)]
+pub struct ModeRouterConfig {
+    #[serde(default, rename = "task_class")]
+    pub task_classes: Vec<TaskClassEntry>,
+    pub default: DefaultEntry,
+}
+
+impl ModeRouterConfig {
+    pub fn lookup(&self, task_class: &str) -> Option<&TaskClassEntry> {
+        self.task_classes.iter().find(|t| t.name == task_class)
+    }
+}
+
+/// Process-global config cache. Loaded on first request from
+/// `config/modes.toml` (or `LH_MODES_CONFIG`). If parsing fails the
+/// router falls back to a hard-coded default so a malformed config can
+/// never take the gateway down.
+static CONFIG: OnceLock<ModeRouterConfig> = OnceLock::new();
+
+fn load_config() -> &'static ModeRouterConfig {
+    CONFIG.get_or_init(|| {
+        let path = std::env::var("LH_MODES_CONFIG")
+            .unwrap_or_else(|_| "config/modes.toml".to_string());
+        match std::fs::read_to_string(&path) {
+            Ok(s) => match toml::from_str::<ModeRouterConfig>(&s) {
+                Ok(c) => {
+                    tracing::info!(target: "v1::mode", "loaded {} task classes from {}", c.task_classes.len(), path);
+                    c
+                }
+                Err(e) => {
+                    tracing::warn!(target: "v1::mode", "parse {} failed ({}), using built-in default", path, e);
+                    fallback_config()
+                }
+            },
+            Err(e) => {
+                tracing::warn!(target: "v1::mode", "read {} failed ({}), using built-in default", path, e);
+                fallback_config()
+            }
+        }
+    })
+}
+
+fn fallback_config() -> ModeRouterConfig {
+    ModeRouterConfig {
+        task_classes: vec![],
+        default: DefaultEntry {
+            preferred_mode: "pipeline".into(),
+            fallback_modes: vec!["consensus".into(), "ladder".into()],
+            default_model: "qwen3.5:latest".into(),
+        },
+    }
+}
+
+#[derive(Deserialize, Debug)]
+pub struct RouteRequest {
+    pub task_class: String,
+    /// Reserved for future matrix-informed routing (cosine against
+    /// matrix_corpus + pathway memory). Currently parsed but unused by
+    /// the decision logic — kept on the API so callers can land their
+    /// integration without waiting on the matrix-signal hookup.
+    #[serde(default)]
+    #[allow(dead_code)]
+    pub prompt: Option<String>,
+    /// Caller-supplied override. When set, the router honors it (with a
+    /// validation check against VALID_MODES) and skips the matrix
+    /// signal — useful for testing a specific mode in isolation.
+    #[serde(default)]
+    pub force_mode: Option<String>,
+}
+
+#[derive(Serialize, Debug)]
+pub struct DecisionTrace {
+    pub task_class_matched: bool,
+    pub source: &'static str, // "config" | "default" | "force_mode"
+    pub fallbacks: Vec<String>,
+    pub matrix_corpus: Option<String>,
+    pub notes: Vec<String>,
+}
+
+#[derive(Serialize, Debug)]
+pub struct RouteDecision {
+    pub mode: String,
+    pub model: String,
+    pub decision: DecisionTrace,
+}
+
+/// `POST /v1/mode` — pure recommendation. Returns a `RouteDecision`
+/// with the chosen mode + model + reasoning trail. Caller is then
+/// responsible for invoking the mode (either via `/v1/mode/execute`
+/// proxy or directly against the LLM Team `/api/run`).
+pub async fn route(
+    State(_state): State<V1State>,
+    Json(req): Json<RouteRequest>,
+) -> impl IntoResponse {
+    let cfg = load_config();
+    let mut notes = Vec::new();
+
+    // force_mode short-circuits everything else but still validates.
+    if let Some(forced) = req.force_mode.as_deref() {
+        if !VALID_MODES.contains(&forced) {
+            return Err((
+                StatusCode::BAD_REQUEST,
+                Json(serde_json::json!({
+                    "error": format!("Unknown mode: {}", forced),
+                    "valid_modes": VALID_MODES,
+                })),
+            ));
+        }
+        let model = cfg
+            .lookup(&req.task_class)
+            .map(|t| t.default_model.clone())
+            .unwrap_or_else(|| cfg.default.default_model.clone());
+        notes.push("force_mode override accepted".into());
+        return Ok(Json(RouteDecision {
+            mode: forced.to_string(),
+            model,
+            decision: DecisionTrace {
+                task_class_matched: cfg.lookup(&req.task_class).is_some(),
+                source: "force_mode",
+                fallbacks: vec![],
+                matrix_corpus: None,
+                notes,
+            },
+        }));
+    }
+
+    // Lookup task class; fall through to default if absent.
+    if let Some(tc) = cfg.lookup(&req.task_class) {
+        notes.push(format!(
+            "task_class '{}' matched, preferred mode '{}'",
+            tc.name, tc.preferred_mode
+        ));
+        if !VALID_MODES.contains(&tc.preferred_mode.as_str()) {
+            notes.push(format!(
+                "preferred '{}' not in VALID_MODES — falling through to first valid fallback",
+                tc.preferred_mode
+            ));
+            for fb in &tc.fallback_modes {
+                if VALID_MODES.contains(&fb.as_str()) {
+                    notes.push(format!("fallback '{}' selected", fb));
+                    return Ok(Json(RouteDecision {
+                        mode: fb.clone(),
+                        model: tc.default_model.clone(),
+                        decision: DecisionTrace {
+                            task_class_matched: true,
+                            source: "config",
+                            fallbacks: tc.fallback_modes.clone(),
+                            matrix_corpus: tc.matrix_corpus.clone(),
+                            notes,
+                        },
+                    }));
+                }
+            }
+            // No fallback was valid either — return 422 so the caller
+            // knows the config is broken for this task class.
+            return Err((
+                StatusCode::UNPROCESSABLE_ENTITY,
+                Json(serde_json::json!({
+                    "error": format!(
+                        "task_class '{}' has no valid mode (preferred='{}', fallbacks={:?})",
+                        req.task_class, tc.preferred_mode, tc.fallback_modes
+                    ),
+                    "valid_modes": VALID_MODES,
+                })),
+            ));
+        }
+        return Ok(Json(RouteDecision {
+            mode: tc.preferred_mode.clone(),
+            model: tc.default_model.clone(),
+            decision: DecisionTrace {
+                task_class_matched: true,
+                source: "config",
+                fallbacks: tc.fallback_modes.clone(),
+                matrix_corpus: tc.matrix_corpus.clone(),
+                notes,
+            },
+        }));
+    }
+
+    notes.push(format!(
+        "task_class '{}' not in config, using default",
+        req.task_class
+    ));
+    Ok(Json(RouteDecision {
+        mode: cfg.default.preferred_mode.clone(),
+        model: cfg.default.default_model.clone(),
+        decision: DecisionTrace {
+            task_class_matched: false,
+            source: "default",
+            fallbacks: cfg.default.fallback_modes.clone(),
+            matrix_corpus: None,
+            notes,
+        },
+    }))
+}
+
+/// `GET /v1/mode/list` — operator-facing introspection. Returns the
+/// current registry table + valid modes so a UI can render the matrix
+/// without re-parsing the TOML.
+pub async fn list(State(_state): State<V1State>) -> impl IntoResponse {
+    let cfg = load_config();
+    let task_map: HashMap<&str, serde_json::Value> = cfg
+        .task_classes
+        .iter()
+        .map(|t| {
+            (
+                t.name.as_str(),
+                serde_json::json!({
+                    "preferred_mode": t.preferred_mode,
+                    "fallback_modes": t.fallback_modes,
+                    "default_model": t.default_model,
+                    "matrix_corpus": t.matrix_corpus,
+                }),
+            )
+        })
+        .collect();
+    Json(serde_json::json!({
+        "task_classes": task_map,
+        "default": {
+            "preferred_mode": cfg.default.preferred_mode,
+            "fallback_modes": cfg.default.fallback_modes,
+            "default_model": cfg.default.default_model,
+        },
+        "valid_modes": VALID_MODES,
+    }))
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    fn cfg_for_test() -> ModeRouterConfig {
+        ModeRouterConfig {
+            task_classes: vec![
+                TaskClassEntry {
+                    name: "scrum_review".into(),
+                    preferred_mode: "codereview".into(),
+                    fallback_modes: vec!["consensus".into()],
+                    default_model: "qwen3-coder:480b".into(),
+                    matrix_corpus: Some("distilled_procedural_v1".into()),
+                },
+                TaskClassEntry {
+                    name: "broken".into(),
+                    preferred_mode: "nonsense_mode".into(),
+                    fallback_modes: vec!["consensus".into()],
+                    default_model: "x".into(),
+                    matrix_corpus: None,
+                },
+            ],
+            default: DefaultEntry {
+                preferred_mode: "pipeline".into(),
+                fallback_modes: vec![],
+                default_model: "qwen3.5:latest".into(),
+            },
+        }
+    }
+
+    #[test]
+    fn lookup_finds_matching_task_class() {
+        let cfg = cfg_for_test();
+        assert_eq!(cfg.lookup("scrum_review").unwrap().preferred_mode, "codereview");
+        assert!(cfg.lookup("unknown").is_none());
+    }
+
+    #[test]
+    fn valid_modes_contains_known_runners() {
+        assert!(VALID_MODES.contains(&"extract"));
+        assert!(VALID_MODES.contains(&"codereview"));
+        assert!(VALID_MODES.contains(&"deep_analysis"));
+        assert!(!VALID_MODES.contains(&"made_up"));
+    }
+
+    #[test]
+    fn fallback_path_is_well_defined() {
+        let cfg = cfg_for_test();
+        let tc = cfg.lookup("broken").unwrap();
+        // Preferred is invalid; first valid fallback should be 'consensus'.
+        assert!(!VALID_MODES.contains(&tc.preferred_mode.as_str()));
+        assert!(VALID_MODES.contains(&tc.fallback_modes[0].as_str()));
+    }
+}