gateway/v1: model-prefix routing closes Phase 39 PRD gate
Some checks failed
lakehouse/auditor 4 blocking issues: todo!() macro call in tests/real-world/scrum_master_pipeline.ts
Some checks failed
lakehouse/auditor 4 blocking issues: todo!() macro call in tests/real-world/scrum_master_pipeline.ts
Phase 39 PRD (docs/CONTROL_PLANE_PRD.md:62) promised:
"/v1/chat routes by `model` field: prefix match
(e.g. openrouter/anthropic/claude-3.5-sonnet → OpenRouter;
bare names → Ollama)"
Actual behavior required clients to pass `provider: "openrouter"`
explicitly. Bare `model: "openrouter/..."` would fall through to the
"unknown provider ''" error. PRD gate never actually passed.
Fix: resolve_provider(&ChatRequest) picks (provider, effective_model):
- explicit `req.provider` wins, model passes through unchanged
- else strip "openrouter/" prefix → provider="openrouter", model
without prefix (OpenRouter API expects "openai/gpt-4o-mini",
not "openrouter/openai/gpt-4o-mini")
- else strip "cloud/" prefix → provider="ollama_cloud"
- else default provider="ollama"
Adapter calls use Cow<ChatRequest>: borrowed when no strip needed
(zero alloc), owned when we needed to build a new model string. Keeps
the hot path allocation-free for the common case.
ChatRequest gains #[derive(Clone)] — needed for the Owned variant.
5 new tests pin the resolution semantics including the
"explicit provider + prefixed model" corner case (trust the caller,
don't double-strip).
Workspace warnings unchanged at 0.
Still not shipped from Phase 39: config/providers.toml — hardcoded
match arms work fine in practice, centralizing them is cosmetic.
Flag as a follow-up if a 4th provider lands.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
0cf1b7c45a
commit
999abd6999
@ -84,7 +84,7 @@ pub struct Message {
|
|||||||
pub content: String,
|
pub content: String,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Deserialize, Debug)]
|
#[derive(Deserialize, Debug, Clone)]
|
||||||
pub struct ChatRequest {
|
pub struct ChatRequest {
|
||||||
pub model: String,
|
pub model: String,
|
||||||
pub messages: Vec<Message>,
|
pub messages: Vec<Message>,
|
||||||
@ -140,6 +140,78 @@ pub struct UsageBlock {
|
|||||||
|
|
||||||
// -- Handlers --
|
// -- Handlers --
|
||||||
|
|
||||||
|
/// Phase 39: resolve (provider, effective_model) from a ChatRequest.
|
||||||
|
///
|
||||||
|
/// Explicit `req.provider` wins. If absent, infer from a model-name
|
||||||
|
/// prefix: "openrouter/..." → openrouter (strip prefix), "cloud/..." →
|
||||||
|
/// ollama_cloud (strip prefix). Bare names default to "ollama".
|
||||||
|
///
|
||||||
|
/// The stripped model is what the upstream adapter expects:
|
||||||
|
/// OpenRouter's API wants "openai/gpt-4o-mini", not
|
||||||
|
/// "openrouter/openai/gpt-4o-mini".
|
||||||
|
fn resolve_provider(req: &ChatRequest) -> (String, String) {
|
||||||
|
if let Some(p) = req.provider.as_deref() {
|
||||||
|
return (p.to_ascii_lowercase(), req.model.clone());
|
||||||
|
}
|
||||||
|
if let Some(rest) = req.model.strip_prefix("openrouter/") {
|
||||||
|
return ("openrouter".to_string(), rest.to_string());
|
||||||
|
}
|
||||||
|
if let Some(rest) = req.model.strip_prefix("cloud/") {
|
||||||
|
return ("ollama_cloud".to_string(), rest.to_string());
|
||||||
|
}
|
||||||
|
("ollama".to_string(), req.model.clone())
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod resolve_provider_tests {
|
||||||
|
use super::*;
|
||||||
|
|
||||||
|
fn mk_req(provider: Option<&str>, model: &str) -> ChatRequest {
|
||||||
|
ChatRequest {
|
||||||
|
model: model.to_string(),
|
||||||
|
messages: vec![],
|
||||||
|
temperature: None,
|
||||||
|
max_tokens: None,
|
||||||
|
stream: None,
|
||||||
|
think: None,
|
||||||
|
provider: provider.map(|s| s.to_string()),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn explicit_provider_wins() {
|
||||||
|
let r = mk_req(Some("openrouter"), "qwen3.5:latest");
|
||||||
|
assert_eq!(resolve_provider(&r), ("openrouter".into(), "qwen3.5:latest".into()));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn bare_model_defaults_to_ollama() {
|
||||||
|
let r = mk_req(None, "qwen3.5:latest");
|
||||||
|
assert_eq!(resolve_provider(&r), ("ollama".into(), "qwen3.5:latest".into()));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn openrouter_prefix_infers_and_strips() {
|
||||||
|
let r = mk_req(None, "openrouter/openai/gpt-4o-mini");
|
||||||
|
assert_eq!(resolve_provider(&r), ("openrouter".into(), "openai/gpt-4o-mini".into()));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn cloud_prefix_infers_and_strips() {
|
||||||
|
let r = mk_req(None, "cloud/kimi-k2:1t");
|
||||||
|
assert_eq!(resolve_provider(&r), ("ollama_cloud".into(), "kimi-k2:1t".into()));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn explicit_provider_preserves_full_model_even_with_prefix() {
|
||||||
|
// If caller provides both provider and a model with a prefix,
|
||||||
|
// trust them — don't strip. The adapter will get the full model
|
||||||
|
// string as-is.
|
||||||
|
let r = mk_req(Some("openrouter"), "openrouter/openai/gpt-4o-mini");
|
||||||
|
assert_eq!(resolve_provider(&r), ("openrouter".into(), "openrouter/openai/gpt-4o-mini".into()));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
async fn chat(
|
async fn chat(
|
||||||
State(state): State<V1State>,
|
State(state): State<V1State>,
|
||||||
Json(req): Json<ChatRequest>,
|
Json(req): Json<ChatRequest>,
|
||||||
@ -151,13 +223,29 @@ async fn chat(
|
|||||||
tracing::warn!("/v1/chat: stream=true requested but Phase 38 returns non-streaming");
|
tracing::warn!("/v1/chat: stream=true requested but Phase 38 returns non-streaming");
|
||||||
}
|
}
|
||||||
|
|
||||||
let provider = req.provider.as_deref().unwrap_or("ollama").to_ascii_lowercase();
|
// Provider resolution: explicit `req.provider` wins; otherwise
|
||||||
|
// infer from a model-name prefix. Phase 39 PRD gate example:
|
||||||
|
// `model: "openrouter/openai/gpt-4o-mini"` → provider "openrouter",
|
||||||
|
// adapter gets the stripped "openai/gpt-4o-mini".
|
||||||
|
let (provider, effective_model) = resolve_provider(&req);
|
||||||
let start_time = chrono::Utc::now();
|
let start_time = chrono::Utc::now();
|
||||||
let start_instant = std::time::Instant::now();
|
let start_instant = std::time::Instant::now();
|
||||||
|
|
||||||
|
// If we stripped a prefix, clone req with the effective model so
|
||||||
|
// the adapter sees what the upstream provider expects (OpenRouter
|
||||||
|
// wants "openai/gpt-4o-mini", not "openrouter/openai/gpt-4o-mini").
|
||||||
|
let req_for_adapter: std::borrow::Cow<'_, ChatRequest> =
|
||||||
|
if effective_model == req.model {
|
||||||
|
std::borrow::Cow::Borrowed(&req)
|
||||||
|
} else {
|
||||||
|
let mut cloned = req.clone();
|
||||||
|
cloned.model = effective_model.clone();
|
||||||
|
std::borrow::Cow::Owned(cloned)
|
||||||
|
};
|
||||||
|
|
||||||
let (resp, used_provider) = match provider.as_str() {
|
let (resp, used_provider) = match provider.as_str() {
|
||||||
"ollama" | "local" | "" => {
|
"ollama" | "local" | "" => {
|
||||||
let r = ollama::chat(&state.ai_client, &req)
|
let r = ollama::chat(&state.ai_client, &*req_for_adapter)
|
||||||
.await
|
.await
|
||||||
.map_err(|e| (StatusCode::BAD_GATEWAY, format!("ollama local: {e}")))?;
|
.map_err(|e| (StatusCode::BAD_GATEWAY, format!("ollama local: {e}")))?;
|
||||||
(r, "ollama".to_string())
|
(r, "ollama".to_string())
|
||||||
@ -167,7 +255,7 @@ async fn chat(
|
|||||||
StatusCode::SERVICE_UNAVAILABLE,
|
StatusCode::SERVICE_UNAVAILABLE,
|
||||||
"OLLAMA_CLOUD_KEY not configured".to_string(),
|
"OLLAMA_CLOUD_KEY not configured".to_string(),
|
||||||
))?;
|
))?;
|
||||||
let r = ollama_cloud::chat(key, &req)
|
let r = ollama_cloud::chat(key, &*req_for_adapter)
|
||||||
.await
|
.await
|
||||||
.map_err(|e| (StatusCode::BAD_GATEWAY, format!("ollama cloud: {e}")))?;
|
.map_err(|e| (StatusCode::BAD_GATEWAY, format!("ollama cloud: {e}")))?;
|
||||||
(r, "ollama_cloud".to_string())
|
(r, "ollama_cloud".to_string())
|
||||||
@ -180,7 +268,7 @@ async fn chat(
|
|||||||
StatusCode::SERVICE_UNAVAILABLE,
|
StatusCode::SERVICE_UNAVAILABLE,
|
||||||
"OPENROUTER_API_KEY not configured".to_string(),
|
"OPENROUTER_API_KEY not configured".to_string(),
|
||||||
))?;
|
))?;
|
||||||
let r = openrouter::chat(key, &req)
|
let r = openrouter::chat(key, &*req_for_adapter)
|
||||||
.await
|
.await
|
||||||
.map_err(|e| (StatusCode::BAD_GATEWAY, format!("openrouter: {e}")))?;
|
.map_err(|e| (StatusCode::BAD_GATEWAY, format!("openrouter: {e}")))?;
|
||||||
(r, "openrouter".to_string())
|
(r, "openrouter".to_string())
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user