v1/mode: task_class → mode/model router (decision-only, phase 1)
Some checks failed
lakehouse/auditor 1 blocking issue: todo!() macro call in tests/real-world/scrum_master_pipeline.ts
Some checks failed
lakehouse/auditor 1 blocking issue: todo!() macro call in tests/real-world/scrum_master_pipeline.ts
HANDOVER §queued (2026-04-25): "Mode router — port LLM Team multi-model
patterns. Pick the right TOOL/MODE for each task class via the matrix,
not cascade through models."
Two-stage architecture:
1. Decision (POST /v1/mode) — pure recommendation, no execution.
Returns {mode, model, decision: {source, fallbacks, matrix_corpus,
notes}} so callers see WHY this mode was picked.
2. Execution (future POST /v1/mode/execute) — proxy to LLM Team
/api/run for modes not yet ported to native Rust runners. Not
wired in this phase.
Splitting decision from execution lets us A/B-test the routing logic
without committing to running every recommendation. The decision
function is pure enough for exhaustive unit tests (3 added).
config/modes.toml — initial map for 5 task_classes (scrum_review,
contract_analysis, staffing_inference, fact_extract, doc_drift_check)
+ a default. matrix_corpus per task is reserved for the future
matrix-informed routing pass.
VALID_MODES list (24 modes) is kept in sync manually with LLM Team's
/api/run handler at /root/llm_team_ui.py:10581. Adding a mode here
without adding it upstream returns 400 from a future proxy.
GET /v1/mode/list — operator introspection so a UI can render the
registry table without re-parsing TOML.
Live-tested: 5 task classes match, unknown classes fall through to
default, force_mode override works + validates, bogus modes return
400 with the valid_modes list.
Updates reference_llm_team_modes.md memory — earlier note claiming
"only extract is registered" was wrong (all 25 are registered).
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
626f18d491
commit
d277efbfd2
53
config/modes.toml
Normal file
53
config/modes.toml
Normal file
@ -0,0 +1,53 @@
|
||||
# Mode router config — task_class → mode mapping
|
||||
#
|
||||
# `preferred_mode` is the first choice for a task class; `fallback_modes`
|
||||
# get tried in order if the preferred one isn't available (LLM Team can
|
||||
# return Unknown mode for some, OR the matrix has stronger signal for a
|
||||
# fallback). `default_model` seeds the mode runner's model field if the
|
||||
# caller doesn't override.
|
||||
#
|
||||
# Modes are dispatched against LLM Team UI (localhost:5000/api/run) for
|
||||
# now; future Rust-native runners will short-circuit before the proxy.
|
||||
# See crates/gateway/src/v1/mode.rs for the dispatch path.
|
||||
|
||||
[[task_class]]
|
||||
name = "scrum_review"
|
||||
preferred_mode = "codereview"
|
||||
fallback_modes = ["consensus", "ladder"]
|
||||
default_model = "qwen3-coder:480b"
|
||||
matrix_corpus = "distilled_procedural_v20260423102847"
|
||||
|
||||
[[task_class]]
|
||||
name = "contract_analysis"
|
||||
preferred_mode = "deep_analysis"
|
||||
fallback_modes = ["research", "extract"]
|
||||
default_model = "kimi-k2:1t"
|
||||
matrix_corpus = "chicago_permits_v1"
|
||||
|
||||
[[task_class]]
|
||||
name = "staffing_inference"
|
||||
preferred_mode = "ladder"
|
||||
fallback_modes = ["consensus", "pipeline"]
|
||||
default_model = "gpt-oss:120b"
|
||||
matrix_corpus = "workers_500k_v8"
|
||||
|
||||
[[task_class]]
|
||||
name = "fact_extract"
|
||||
preferred_mode = "extract"
|
||||
fallback_modes = ["distill"]
|
||||
default_model = "qwen2.5"
|
||||
matrix_corpus = "kb_team_runs_v1"
|
||||
|
||||
[[task_class]]
|
||||
name = "doc_drift_check"
|
||||
preferred_mode = "drift"
|
||||
fallback_modes = ["validator"]
|
||||
default_model = "gpt-oss:120b"
|
||||
matrix_corpus = "distilled_factual_v20260423095819"
|
||||
|
||||
# Fallback when task_class isn't in the table — useful for ad-hoc calls
|
||||
# during development that don't yet have a mapped mode.
|
||||
[default]
|
||||
preferred_mode = "pipeline"
|
||||
fallback_modes = ["consensus", "ladder"]
|
||||
default_model = "qwen3.5:latest"
|
||||
@ -30,3 +30,4 @@ tracing-opentelemetry = { workspace = true }
|
||||
arrow = { workspace = true }
|
||||
chrono = { workspace = true }
|
||||
reqwest = { version = "0.12", default-features = false, features = ["json", "rustls-tls"] }
|
||||
toml = { workspace = true }
|
||||
|
||||
@ -17,6 +17,7 @@ pub mod openrouter;
|
||||
pub mod gemini;
|
||||
pub mod claude;
|
||||
pub mod langfuse_trace;
|
||||
pub mod mode;
|
||||
pub mod respond;
|
||||
pub mod truth;
|
||||
|
||||
@ -83,6 +84,8 @@ pub fn router(state: V1State) -> Router {
|
||||
.route("/usage", get(usage))
|
||||
.route("/sessions", get(sessions))
|
||||
.route("/context", get(truth::context))
|
||||
.route("/mode", post(mode::route))
|
||||
.route("/mode/list", get(mode::list))
|
||||
.with_state(state)
|
||||
}
|
||||
|
||||
|
||||
339
crates/gateway/src/v1/mode.rs
Normal file
339
crates/gateway/src/v1/mode.rs
Normal file
@ -0,0 +1,339 @@
|
||||
//! Mode router — task_class → mode + model recommendation.
|
||||
//!
|
||||
//! HANDOVER §queued (2026-04-25): "Mode router — port LLM Team multi-model
|
||||
//! patterns. Pick the right TOOL/MODE for each task class via the matrix,
|
||||
//! not cascade through models."
|
||||
//!
|
||||
//! Two-stage architecture:
|
||||
//!
|
||||
//! 1. **Decision** (`POST /v1/mode`) — given `{task_class, prompt}`,
|
||||
//! consult `config/modes.toml` + (future) pathway memory and return
|
||||
//! `{mode, model, decision_trace}`. Pure recommendation; no execution.
|
||||
//!
|
||||
//! 2. **Execution** (`POST /v1/mode/execute`) — given `{mode, prompt, ...}`,
|
||||
//! proxy to LLM Team UI (`localhost:5000/api/run`) which has all 25
|
||||
//! mode runners implemented. As Rust-native runners land in this
|
||||
//! crate, they short-circuit before the proxy.
|
||||
//!
|
||||
//! The split lets us A/B-test the routing logic (decision-only) without
|
||||
//! committing to running every recommendation. It also keeps the pure
|
||||
//! decision function simple enough to unit-test exhaustively.
|
||||
|
||||
use axum::{Json, extract::State, http::StatusCode, response::IntoResponse};
|
||||
use serde::{Deserialize, Serialize};
|
||||
use std::collections::HashMap;
|
||||
use std::sync::OnceLock;
|
||||
|
||||
use super::V1State;
|
||||
|
||||
/// Validated against the LLM Team /api/run handler at
|
||||
/// /root/llm_team_ui.py:10581. Kept in sync manually — adding a mode
|
||||
/// here without adding it upstream returns 400 from the proxy.
|
||||
const VALID_MODES: &[&str] = &[
|
||||
"brainstorm", "pipeline", "debate", "validator", "roundrobin",
|
||||
"redteam", "consensus", "codereview", "ladder", "tournament",
|
||||
"evolution", "blindassembly", "staircase", "drift", "mesh",
|
||||
"hallucination", "timeloop", "research", "eval", "extract",
|
||||
"refine", "adaptive", "deep_analysis", "distill",
|
||||
];
|
||||
|
||||
#[derive(Clone, Debug, Deserialize)]
|
||||
pub struct TaskClassEntry {
|
||||
pub name: String,
|
||||
pub preferred_mode: String,
|
||||
#[serde(default)]
|
||||
pub fallback_modes: Vec<String>,
|
||||
pub default_model: String,
|
||||
#[serde(default)]
|
||||
pub matrix_corpus: Option<String>,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, Deserialize)]
|
||||
pub struct DefaultEntry {
|
||||
pub preferred_mode: String,
|
||||
#[serde(default)]
|
||||
pub fallback_modes: Vec<String>,
|
||||
pub default_model: String,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, Deserialize)]
|
||||
pub struct ModeRouterConfig {
|
||||
#[serde(default, rename = "task_class")]
|
||||
pub task_classes: Vec<TaskClassEntry>,
|
||||
pub default: DefaultEntry,
|
||||
}
|
||||
|
||||
impl ModeRouterConfig {
|
||||
pub fn lookup(&self, task_class: &str) -> Option<&TaskClassEntry> {
|
||||
self.task_classes.iter().find(|t| t.name == task_class)
|
||||
}
|
||||
}
|
||||
|
||||
/// Process-global config cache. Loaded on first request from
|
||||
/// `config/modes.toml` (or `LH_MODES_CONFIG`). If parsing fails the
|
||||
/// router falls back to a hard-coded default so a malformed config can
|
||||
/// never take the gateway down.
|
||||
static CONFIG: OnceLock<ModeRouterConfig> = OnceLock::new();
|
||||
|
||||
fn load_config() -> &'static ModeRouterConfig {
|
||||
CONFIG.get_or_init(|| {
|
||||
let path = std::env::var("LH_MODES_CONFIG")
|
||||
.unwrap_or_else(|_| "config/modes.toml".to_string());
|
||||
match std::fs::read_to_string(&path) {
|
||||
Ok(s) => match toml::from_str::<ModeRouterConfig>(&s) {
|
||||
Ok(c) => {
|
||||
tracing::info!(target: "v1::mode", "loaded {} task classes from {}", c.task_classes.len(), path);
|
||||
c
|
||||
}
|
||||
Err(e) => {
|
||||
tracing::warn!(target: "v1::mode", "parse {} failed ({}), using built-in default", path, e);
|
||||
fallback_config()
|
||||
}
|
||||
},
|
||||
Err(e) => {
|
||||
tracing::warn!(target: "v1::mode", "read {} failed ({}), using built-in default", path, e);
|
||||
fallback_config()
|
||||
}
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
fn fallback_config() -> ModeRouterConfig {
|
||||
ModeRouterConfig {
|
||||
task_classes: vec![],
|
||||
default: DefaultEntry {
|
||||
preferred_mode: "pipeline".into(),
|
||||
fallback_modes: vec!["consensus".into(), "ladder".into()],
|
||||
default_model: "qwen3.5:latest".into(),
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Deserialize, Debug)]
|
||||
pub struct RouteRequest {
|
||||
pub task_class: String,
|
||||
/// Reserved for future matrix-informed routing (cosine against
|
||||
/// matrix_corpus + pathway memory). Currently parsed but unused by
|
||||
/// the decision logic — kept on the API so callers can land their
|
||||
/// integration without waiting on the matrix-signal hookup.
|
||||
#[serde(default)]
|
||||
#[allow(dead_code)]
|
||||
pub prompt: Option<String>,
|
||||
/// Caller-supplied override. When set, the router honors it (with a
|
||||
/// validation check against VALID_MODES) and skips the matrix
|
||||
/// signal — useful for testing a specific mode in isolation.
|
||||
#[serde(default)]
|
||||
pub force_mode: Option<String>,
|
||||
}
|
||||
|
||||
#[derive(Serialize, Debug)]
|
||||
pub struct DecisionTrace {
|
||||
pub task_class_matched: bool,
|
||||
pub source: &'static str, // "config" | "default" | "force_mode"
|
||||
pub fallbacks: Vec<String>,
|
||||
pub matrix_corpus: Option<String>,
|
||||
pub notes: Vec<String>,
|
||||
}
|
||||
|
||||
#[derive(Serialize, Debug)]
|
||||
pub struct RouteDecision {
|
||||
pub mode: String,
|
||||
pub model: String,
|
||||
pub decision: DecisionTrace,
|
||||
}
|
||||
|
||||
/// `POST /v1/mode` — pure recommendation. Returns a `RouteDecision`
|
||||
/// with the chosen mode + model + reasoning trail. Caller is then
|
||||
/// responsible for invoking the mode (either via `/v1/mode/execute`
|
||||
/// proxy or directly against the LLM Team `/api/run`).
|
||||
pub async fn route(
|
||||
State(_state): State<V1State>,
|
||||
Json(req): Json<RouteRequest>,
|
||||
) -> impl IntoResponse {
|
||||
let cfg = load_config();
|
||||
let mut notes = Vec::new();
|
||||
|
||||
// force_mode short-circuits everything else but still validates.
|
||||
if let Some(forced) = req.force_mode.as_deref() {
|
||||
if !VALID_MODES.contains(&forced) {
|
||||
return Err((
|
||||
StatusCode::BAD_REQUEST,
|
||||
Json(serde_json::json!({
|
||||
"error": format!("Unknown mode: {}", forced),
|
||||
"valid_modes": VALID_MODES,
|
||||
})),
|
||||
));
|
||||
}
|
||||
let model = cfg
|
||||
.lookup(&req.task_class)
|
||||
.map(|t| t.default_model.clone())
|
||||
.unwrap_or_else(|| cfg.default.default_model.clone());
|
||||
notes.push("force_mode override accepted".into());
|
||||
return Ok(Json(RouteDecision {
|
||||
mode: forced.to_string(),
|
||||
model,
|
||||
decision: DecisionTrace {
|
||||
task_class_matched: cfg.lookup(&req.task_class).is_some(),
|
||||
source: "force_mode",
|
||||
fallbacks: vec![],
|
||||
matrix_corpus: None,
|
||||
notes,
|
||||
},
|
||||
}));
|
||||
}
|
||||
|
||||
// Lookup task class; fall through to default if absent.
|
||||
if let Some(tc) = cfg.lookup(&req.task_class) {
|
||||
notes.push(format!(
|
||||
"task_class '{}' matched, preferred mode '{}'",
|
||||
tc.name, tc.preferred_mode
|
||||
));
|
||||
if !VALID_MODES.contains(&tc.preferred_mode.as_str()) {
|
||||
notes.push(format!(
|
||||
"preferred '{}' not in VALID_MODES — falling through to first valid fallback",
|
||||
tc.preferred_mode
|
||||
));
|
||||
for fb in &tc.fallback_modes {
|
||||
if VALID_MODES.contains(&fb.as_str()) {
|
||||
notes.push(format!("fallback '{}' selected", fb));
|
||||
return Ok(Json(RouteDecision {
|
||||
mode: fb.clone(),
|
||||
model: tc.default_model.clone(),
|
||||
decision: DecisionTrace {
|
||||
task_class_matched: true,
|
||||
source: "config",
|
||||
fallbacks: tc.fallback_modes.clone(),
|
||||
matrix_corpus: tc.matrix_corpus.clone(),
|
||||
notes,
|
||||
},
|
||||
}));
|
||||
}
|
||||
}
|
||||
// No fallback was valid either — return 422 so the caller
|
||||
// knows the config is broken for this task class.
|
||||
return Err((
|
||||
StatusCode::UNPROCESSABLE_ENTITY,
|
||||
Json(serde_json::json!({
|
||||
"error": format!(
|
||||
"task_class '{}' has no valid mode (preferred='{}', fallbacks={:?})",
|
||||
req.task_class, tc.preferred_mode, tc.fallback_modes
|
||||
),
|
||||
"valid_modes": VALID_MODES,
|
||||
})),
|
||||
));
|
||||
}
|
||||
return Ok(Json(RouteDecision {
|
||||
mode: tc.preferred_mode.clone(),
|
||||
model: tc.default_model.clone(),
|
||||
decision: DecisionTrace {
|
||||
task_class_matched: true,
|
||||
source: "config",
|
||||
fallbacks: tc.fallback_modes.clone(),
|
||||
matrix_corpus: tc.matrix_corpus.clone(),
|
||||
notes,
|
||||
},
|
||||
}));
|
||||
}
|
||||
|
||||
notes.push(format!(
|
||||
"task_class '{}' not in config, using default",
|
||||
req.task_class
|
||||
));
|
||||
Ok(Json(RouteDecision {
|
||||
mode: cfg.default.preferred_mode.clone(),
|
||||
model: cfg.default.default_model.clone(),
|
||||
decision: DecisionTrace {
|
||||
task_class_matched: false,
|
||||
source: "default",
|
||||
fallbacks: cfg.default.fallback_modes.clone(),
|
||||
matrix_corpus: None,
|
||||
notes,
|
||||
},
|
||||
}))
|
||||
}
|
||||
|
||||
/// `GET /v1/mode/list` — operator-facing introspection. Returns the
|
||||
/// current registry table + valid modes so a UI can render the matrix
|
||||
/// without re-parsing the TOML.
|
||||
pub async fn list(State(_state): State<V1State>) -> impl IntoResponse {
|
||||
let cfg = load_config();
|
||||
let task_map: HashMap<&str, serde_json::Value> = cfg
|
||||
.task_classes
|
||||
.iter()
|
||||
.map(|t| {
|
||||
(
|
||||
t.name.as_str(),
|
||||
serde_json::json!({
|
||||
"preferred_mode": t.preferred_mode,
|
||||
"fallback_modes": t.fallback_modes,
|
||||
"default_model": t.default_model,
|
||||
"matrix_corpus": t.matrix_corpus,
|
||||
}),
|
||||
)
|
||||
})
|
||||
.collect();
|
||||
Json(serde_json::json!({
|
||||
"task_classes": task_map,
|
||||
"default": {
|
||||
"preferred_mode": cfg.default.preferred_mode,
|
||||
"fallback_modes": cfg.default.fallback_modes,
|
||||
"default_model": cfg.default.default_model,
|
||||
},
|
||||
"valid_modes": VALID_MODES,
|
||||
}))
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
fn cfg_for_test() -> ModeRouterConfig {
|
||||
ModeRouterConfig {
|
||||
task_classes: vec![
|
||||
TaskClassEntry {
|
||||
name: "scrum_review".into(),
|
||||
preferred_mode: "codereview".into(),
|
||||
fallback_modes: vec!["consensus".into()],
|
||||
default_model: "qwen3-coder:480b".into(),
|
||||
matrix_corpus: Some("distilled_procedural_v1".into()),
|
||||
},
|
||||
TaskClassEntry {
|
||||
name: "broken".into(),
|
||||
preferred_mode: "nonsense_mode".into(),
|
||||
fallback_modes: vec!["consensus".into()],
|
||||
default_model: "x".into(),
|
||||
matrix_corpus: None,
|
||||
},
|
||||
],
|
||||
default: DefaultEntry {
|
||||
preferred_mode: "pipeline".into(),
|
||||
fallback_modes: vec![],
|
||||
default_model: "qwen3.5:latest".into(),
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn lookup_finds_matching_task_class() {
|
||||
let cfg = cfg_for_test();
|
||||
assert_eq!(cfg.lookup("scrum_review").unwrap().preferred_mode, "codereview");
|
||||
assert!(cfg.lookup("unknown").is_none());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn valid_modes_contains_known_runners() {
|
||||
assert!(VALID_MODES.contains(&"extract"));
|
||||
assert!(VALID_MODES.contains(&"codereview"));
|
||||
assert!(VALID_MODES.contains(&"deep_analysis"));
|
||||
assert!(!VALID_MODES.contains(&"made_up"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn fallback_path_is_well_defined() {
|
||||
let cfg = cfg_for_test();
|
||||
let tc = cfg.lookup("broken").unwrap();
|
||||
// Preferred is invalid; first valid fallback should be 'consensus'.
|
||||
assert!(!VALID_MODES.contains(&tc.preferred_mode.as_str()));
|
||||
assert!(VALID_MODES.contains(&tc.fallback_modes[0].as_str()));
|
||||
}
|
||||
}
|
||||
Loading…
x
Reference in New Issue
Block a user