v1/mode: task_class → mode/model router (decision-only, phase 1)
Some checks failed
lakehouse/auditor 1 blocking issue: todo!() macro call in tests/real-world/scrum_master_pipeline.ts

HANDOVER §queued (2026-04-25): "Mode router — port LLM Team multi-model
patterns. Pick the right TOOL/MODE for each task class via the matrix,
not cascade through models."

Two-stage architecture:
  1. Decision (POST /v1/mode) — pure recommendation, no execution.
     Returns {mode, model, decision: {source, fallbacks, matrix_corpus,
     notes}} so callers see WHY this mode was picked.
  2. Execution (future POST /v1/mode/execute) — proxy to LLM Team
     /api/run for modes not yet ported to native Rust runners. Not
     wired in this phase.

Splitting decision from execution lets us A/B-test the routing logic
without committing to running every recommendation. The decision
function is pure enough for exhaustive unit tests (3 added).

config/modes.toml — initial map for 5 task_classes (scrum_review,
contract_analysis, staffing_inference, fact_extract, doc_drift_check)
+ a default. matrix_corpus per task is reserved for the future
matrix-informed routing pass.

VALID_MODES list (24 modes) is kept in sync manually with LLM Team's
/api/run handler at /root/llm_team_ui.py:10581. Adding a mode here
without adding it upstream returns 400 from a future proxy.

GET /v1/mode/list — operator introspection so a UI can render the
registry table without re-parsing TOML.

Live-tested: 5 task classes match, unknown classes fall through to
default, force_mode override works + validates, bogus modes return
400 with the valid_modes list.

Updates reference_llm_team_modes.md memory — earlier note claiming
"only extract is registered" was wrong (all 25 are registered).

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
root 2026-04-26 00:16:32 -05:00
parent 626f18d491
commit d277efbfd2
4 changed files with 396 additions and 0 deletions

53
config/modes.toml Normal file
View File

@ -0,0 +1,53 @@
# Mode router config — task_class → mode mapping
#
# `preferred_mode` is the first choice for a task class; `fallback_modes`
# get tried in order if the preferred one isn't available (LLM Team can
# return Unknown mode for some, OR the matrix has stronger signal for a
# fallback). `default_model` seeds the mode runner's model field if the
# caller doesn't override.
#
# Modes are dispatched against LLM Team UI (localhost:5000/api/run) for
# now; future Rust-native runners will short-circuit before the proxy.
# See crates/gateway/src/v1/mode.rs for the dispatch path.
[[task_class]]
name = "scrum_review"
preferred_mode = "codereview"
fallback_modes = ["consensus", "ladder"]
default_model = "qwen3-coder:480b"
matrix_corpus = "distilled_procedural_v20260423102847"
[[task_class]]
name = "contract_analysis"
preferred_mode = "deep_analysis"
fallback_modes = ["research", "extract"]
default_model = "kimi-k2:1t"
matrix_corpus = "chicago_permits_v1"
[[task_class]]
name = "staffing_inference"
preferred_mode = "ladder"
fallback_modes = ["consensus", "pipeline"]
default_model = "gpt-oss:120b"
matrix_corpus = "workers_500k_v8"
[[task_class]]
name = "fact_extract"
preferred_mode = "extract"
fallback_modes = ["distill"]
default_model = "qwen2.5"
matrix_corpus = "kb_team_runs_v1"
[[task_class]]
name = "doc_drift_check"
preferred_mode = "drift"
fallback_modes = ["validator"]
default_model = "gpt-oss:120b"
matrix_corpus = "distilled_factual_v20260423095819"
# Fallback when task_class isn't in the table — useful for ad-hoc calls
# during development that don't yet have a mapped mode.
[default]
preferred_mode = "pipeline"
fallback_modes = ["consensus", "ladder"]
default_model = "qwen3.5:latest"

View File

@ -30,3 +30,4 @@ tracing-opentelemetry = { workspace = true }
arrow = { workspace = true }
chrono = { workspace = true }
reqwest = { version = "0.12", default-features = false, features = ["json", "rustls-tls"] }
toml = { workspace = true }

View File

@ -17,6 +17,7 @@ pub mod openrouter;
pub mod gemini;
pub mod claude;
pub mod langfuse_trace;
pub mod mode;
pub mod respond;
pub mod truth;
@ -83,6 +84,8 @@ pub fn router(state: V1State) -> Router {
.route("/usage", get(usage))
.route("/sessions", get(sessions))
.route("/context", get(truth::context))
.route("/mode", post(mode::route))
.route("/mode/list", get(mode::list))
.with_state(state)
}

View File

@ -0,0 +1,339 @@
//! Mode router — task_class → mode + model recommendation.
//!
//! HANDOVER §queued (2026-04-25): "Mode router — port LLM Team multi-model
//! patterns. Pick the right TOOL/MODE for each task class via the matrix,
//! not cascade through models."
//!
//! Two-stage architecture:
//!
//! 1. **Decision** (`POST /v1/mode`) — given `{task_class, prompt}`,
//! consult `config/modes.toml` + (future) pathway memory and return
//! `{mode, model, decision_trace}`. Pure recommendation; no execution.
//!
//! 2. **Execution** (`POST /v1/mode/execute`) — given `{mode, prompt, ...}`,
//! proxy to LLM Team UI (`localhost:5000/api/run`) which has all 25
//! mode runners implemented. As Rust-native runners land in this
//! crate, they short-circuit before the proxy.
//!
//! The split lets us A/B-test the routing logic (decision-only) without
//! committing to running every recommendation. It also keeps the pure
//! decision function simple enough to unit-test exhaustively.
use axum::{Json, extract::State, http::StatusCode, response::IntoResponse};
use serde::{Deserialize, Serialize};
use std::collections::HashMap;
use std::sync::OnceLock;
use super::V1State;
/// Validated against the LLM Team /api/run handler at
/// /root/llm_team_ui.py:10581. Kept in sync manually — adding a mode
/// here without adding it upstream returns 400 from the proxy.
const VALID_MODES: &[&str] = &[
"brainstorm", "pipeline", "debate", "validator", "roundrobin",
"redteam", "consensus", "codereview", "ladder", "tournament",
"evolution", "blindassembly", "staircase", "drift", "mesh",
"hallucination", "timeloop", "research", "eval", "extract",
"refine", "adaptive", "deep_analysis", "distill",
];
#[derive(Clone, Debug, Deserialize)]
pub struct TaskClassEntry {
pub name: String,
pub preferred_mode: String,
#[serde(default)]
pub fallback_modes: Vec<String>,
pub default_model: String,
#[serde(default)]
pub matrix_corpus: Option<String>,
}
#[derive(Clone, Debug, Deserialize)]
pub struct DefaultEntry {
pub preferred_mode: String,
#[serde(default)]
pub fallback_modes: Vec<String>,
pub default_model: String,
}
#[derive(Clone, Debug, Deserialize)]
pub struct ModeRouterConfig {
#[serde(default, rename = "task_class")]
pub task_classes: Vec<TaskClassEntry>,
pub default: DefaultEntry,
}
impl ModeRouterConfig {
pub fn lookup(&self, task_class: &str) -> Option<&TaskClassEntry> {
self.task_classes.iter().find(|t| t.name == task_class)
}
}
/// Process-global config cache. Loaded on first request from
/// `config/modes.toml` (or `LH_MODES_CONFIG`). If parsing fails the
/// router falls back to a hard-coded default so a malformed config can
/// never take the gateway down.
static CONFIG: OnceLock<ModeRouterConfig> = OnceLock::new();
fn load_config() -> &'static ModeRouterConfig {
CONFIG.get_or_init(|| {
let path = std::env::var("LH_MODES_CONFIG")
.unwrap_or_else(|_| "config/modes.toml".to_string());
match std::fs::read_to_string(&path) {
Ok(s) => match toml::from_str::<ModeRouterConfig>(&s) {
Ok(c) => {
tracing::info!(target: "v1::mode", "loaded {} task classes from {}", c.task_classes.len(), path);
c
}
Err(e) => {
tracing::warn!(target: "v1::mode", "parse {} failed ({}), using built-in default", path, e);
fallback_config()
}
},
Err(e) => {
tracing::warn!(target: "v1::mode", "read {} failed ({}), using built-in default", path, e);
fallback_config()
}
}
})
}
fn fallback_config() -> ModeRouterConfig {
ModeRouterConfig {
task_classes: vec![],
default: DefaultEntry {
preferred_mode: "pipeline".into(),
fallback_modes: vec!["consensus".into(), "ladder".into()],
default_model: "qwen3.5:latest".into(),
},
}
}
#[derive(Deserialize, Debug)]
pub struct RouteRequest {
pub task_class: String,
/// Reserved for future matrix-informed routing (cosine against
/// matrix_corpus + pathway memory). Currently parsed but unused by
/// the decision logic — kept on the API so callers can land their
/// integration without waiting on the matrix-signal hookup.
#[serde(default)]
#[allow(dead_code)]
pub prompt: Option<String>,
/// Caller-supplied override. When set, the router honors it (with a
/// validation check against VALID_MODES) and skips the matrix
/// signal — useful for testing a specific mode in isolation.
#[serde(default)]
pub force_mode: Option<String>,
}
#[derive(Serialize, Debug)]
pub struct DecisionTrace {
pub task_class_matched: bool,
pub source: &'static str, // "config" | "default" | "force_mode"
pub fallbacks: Vec<String>,
pub matrix_corpus: Option<String>,
pub notes: Vec<String>,
}
#[derive(Serialize, Debug)]
pub struct RouteDecision {
pub mode: String,
pub model: String,
pub decision: DecisionTrace,
}
/// `POST /v1/mode` — pure recommendation. Returns a `RouteDecision`
/// with the chosen mode + model + reasoning trail. Caller is then
/// responsible for invoking the mode (either via `/v1/mode/execute`
/// proxy or directly against the LLM Team `/api/run`).
pub async fn route(
State(_state): State<V1State>,
Json(req): Json<RouteRequest>,
) -> impl IntoResponse {
let cfg = load_config();
let mut notes = Vec::new();
// force_mode short-circuits everything else but still validates.
if let Some(forced) = req.force_mode.as_deref() {
if !VALID_MODES.contains(&forced) {
return Err((
StatusCode::BAD_REQUEST,
Json(serde_json::json!({
"error": format!("Unknown mode: {}", forced),
"valid_modes": VALID_MODES,
})),
));
}
let model = cfg
.lookup(&req.task_class)
.map(|t| t.default_model.clone())
.unwrap_or_else(|| cfg.default.default_model.clone());
notes.push("force_mode override accepted".into());
return Ok(Json(RouteDecision {
mode: forced.to_string(),
model,
decision: DecisionTrace {
task_class_matched: cfg.lookup(&req.task_class).is_some(),
source: "force_mode",
fallbacks: vec![],
matrix_corpus: None,
notes,
},
}));
}
// Lookup task class; fall through to default if absent.
if let Some(tc) = cfg.lookup(&req.task_class) {
notes.push(format!(
"task_class '{}' matched, preferred mode '{}'",
tc.name, tc.preferred_mode
));
if !VALID_MODES.contains(&tc.preferred_mode.as_str()) {
notes.push(format!(
"preferred '{}' not in VALID_MODES — falling through to first valid fallback",
tc.preferred_mode
));
for fb in &tc.fallback_modes {
if VALID_MODES.contains(&fb.as_str()) {
notes.push(format!("fallback '{}' selected", fb));
return Ok(Json(RouteDecision {
mode: fb.clone(),
model: tc.default_model.clone(),
decision: DecisionTrace {
task_class_matched: true,
source: "config",
fallbacks: tc.fallback_modes.clone(),
matrix_corpus: tc.matrix_corpus.clone(),
notes,
},
}));
}
}
// No fallback was valid either — return 422 so the caller
// knows the config is broken for this task class.
return Err((
StatusCode::UNPROCESSABLE_ENTITY,
Json(serde_json::json!({
"error": format!(
"task_class '{}' has no valid mode (preferred='{}', fallbacks={:?})",
req.task_class, tc.preferred_mode, tc.fallback_modes
),
"valid_modes": VALID_MODES,
})),
));
}
return Ok(Json(RouteDecision {
mode: tc.preferred_mode.clone(),
model: tc.default_model.clone(),
decision: DecisionTrace {
task_class_matched: true,
source: "config",
fallbacks: tc.fallback_modes.clone(),
matrix_corpus: tc.matrix_corpus.clone(),
notes,
},
}));
}
notes.push(format!(
"task_class '{}' not in config, using default",
req.task_class
));
Ok(Json(RouteDecision {
mode: cfg.default.preferred_mode.clone(),
model: cfg.default.default_model.clone(),
decision: DecisionTrace {
task_class_matched: false,
source: "default",
fallbacks: cfg.default.fallback_modes.clone(),
matrix_corpus: None,
notes,
},
}))
}
/// `GET /v1/mode/list` — operator-facing introspection. Returns the
/// current registry table + valid modes so a UI can render the matrix
/// without re-parsing the TOML.
pub async fn list(State(_state): State<V1State>) -> impl IntoResponse {
let cfg = load_config();
let task_map: HashMap<&str, serde_json::Value> = cfg
.task_classes
.iter()
.map(|t| {
(
t.name.as_str(),
serde_json::json!({
"preferred_mode": t.preferred_mode,
"fallback_modes": t.fallback_modes,
"default_model": t.default_model,
"matrix_corpus": t.matrix_corpus,
}),
)
})
.collect();
Json(serde_json::json!({
"task_classes": task_map,
"default": {
"preferred_mode": cfg.default.preferred_mode,
"fallback_modes": cfg.default.fallback_modes,
"default_model": cfg.default.default_model,
},
"valid_modes": VALID_MODES,
}))
}
#[cfg(test)]
mod tests {
use super::*;
fn cfg_for_test() -> ModeRouterConfig {
ModeRouterConfig {
task_classes: vec![
TaskClassEntry {
name: "scrum_review".into(),
preferred_mode: "codereview".into(),
fallback_modes: vec!["consensus".into()],
default_model: "qwen3-coder:480b".into(),
matrix_corpus: Some("distilled_procedural_v1".into()),
},
TaskClassEntry {
name: "broken".into(),
preferred_mode: "nonsense_mode".into(),
fallback_modes: vec!["consensus".into()],
default_model: "x".into(),
matrix_corpus: None,
},
],
default: DefaultEntry {
preferred_mode: "pipeline".into(),
fallback_modes: vec![],
default_model: "qwen3.5:latest".into(),
},
}
}
#[test]
fn lookup_finds_matching_task_class() {
let cfg = cfg_for_test();
assert_eq!(cfg.lookup("scrum_review").unwrap().preferred_mode, "codereview");
assert!(cfg.lookup("unknown").is_none());
}
#[test]
fn valid_modes_contains_known_runners() {
assert!(VALID_MODES.contains(&"extract"));
assert!(VALID_MODES.contains(&"codereview"));
assert!(VALID_MODES.contains(&"deep_analysis"));
assert!(!VALID_MODES.contains(&"made_up"));
}
#[test]
fn fallback_path_is_well_defined() {
let cfg = cfg_for_test();
let tc = cfg.lookup("broken").unwrap();
// Preferred is invalid; first valid fallback should be 'consensus'.
assert!(!VALID_MODES.contains(&tc.preferred_mode.as_str()));
assert!(VALID_MODES.contains(&tc.fallback_modes[0].as_str()));
}
}