diff --git a/crates/gateway/src/v1/mod.rs b/crates/gateway/src/v1/mod.rs index 793e781..f53c7c3 100644 --- a/crates/gateway/src/v1/mod.rs +++ b/crates/gateway/src/v1/mod.rs @@ -84,7 +84,7 @@ pub struct Message { pub content: String, } -#[derive(Deserialize, Debug)] +#[derive(Deserialize, Debug, Clone)] pub struct ChatRequest { pub model: String, pub messages: Vec, @@ -140,6 +140,78 @@ pub struct UsageBlock { // -- Handlers -- +/// Phase 39: resolve (provider, effective_model) from a ChatRequest. +/// +/// Explicit `req.provider` wins. If absent, infer from a model-name +/// prefix: "openrouter/..." → openrouter (strip prefix), "cloud/..." → +/// ollama_cloud (strip prefix). Bare names default to "ollama". +/// +/// The stripped model is what the upstream adapter expects: +/// OpenRouter's API wants "openai/gpt-4o-mini", not +/// "openrouter/openai/gpt-4o-mini". +fn resolve_provider(req: &ChatRequest) -> (String, String) { + if let Some(p) = req.provider.as_deref() { + return (p.to_ascii_lowercase(), req.model.clone()); + } + if let Some(rest) = req.model.strip_prefix("openrouter/") { + return ("openrouter".to_string(), rest.to_string()); + } + if let Some(rest) = req.model.strip_prefix("cloud/") { + return ("ollama_cloud".to_string(), rest.to_string()); + } + ("ollama".to_string(), req.model.clone()) +} + +#[cfg(test)] +mod resolve_provider_tests { + use super::*; + + fn mk_req(provider: Option<&str>, model: &str) -> ChatRequest { + ChatRequest { + model: model.to_string(), + messages: vec![], + temperature: None, + max_tokens: None, + stream: None, + think: None, + provider: provider.map(|s| s.to_string()), + } + } + + #[test] + fn explicit_provider_wins() { + let r = mk_req(Some("openrouter"), "qwen3.5:latest"); + assert_eq!(resolve_provider(&r), ("openrouter".into(), "qwen3.5:latest".into())); + } + + #[test] + fn bare_model_defaults_to_ollama() { + let r = mk_req(None, "qwen3.5:latest"); + assert_eq!(resolve_provider(&r), ("ollama".into(), "qwen3.5:latest".into())); + } + + #[test] + fn openrouter_prefix_infers_and_strips() { + let r = mk_req(None, "openrouter/openai/gpt-4o-mini"); + assert_eq!(resolve_provider(&r), ("openrouter".into(), "openai/gpt-4o-mini".into())); + } + + #[test] + fn cloud_prefix_infers_and_strips() { + let r = mk_req(None, "cloud/kimi-k2:1t"); + assert_eq!(resolve_provider(&r), ("ollama_cloud".into(), "kimi-k2:1t".into())); + } + + #[test] + fn explicit_provider_preserves_full_model_even_with_prefix() { + // If caller provides both provider and a model with a prefix, + // trust them — don't strip. The adapter will get the full model + // string as-is. + let r = mk_req(Some("openrouter"), "openrouter/openai/gpt-4o-mini"); + assert_eq!(resolve_provider(&r), ("openrouter".into(), "openrouter/openai/gpt-4o-mini".into())); + } +} + async fn chat( State(state): State, Json(req): Json, @@ -151,13 +223,29 @@ async fn chat( tracing::warn!("/v1/chat: stream=true requested but Phase 38 returns non-streaming"); } - let provider = req.provider.as_deref().unwrap_or("ollama").to_ascii_lowercase(); + // Provider resolution: explicit `req.provider` wins; otherwise + // infer from a model-name prefix. Phase 39 PRD gate example: + // `model: "openrouter/openai/gpt-4o-mini"` → provider "openrouter", + // adapter gets the stripped "openai/gpt-4o-mini". + let (provider, effective_model) = resolve_provider(&req); let start_time = chrono::Utc::now(); let start_instant = std::time::Instant::now(); + // If we stripped a prefix, clone req with the effective model so + // the adapter sees what the upstream provider expects (OpenRouter + // wants "openai/gpt-4o-mini", not "openrouter/openai/gpt-4o-mini"). + let req_for_adapter: std::borrow::Cow<'_, ChatRequest> = + if effective_model == req.model { + std::borrow::Cow::Borrowed(&req) + } else { + let mut cloned = req.clone(); + cloned.model = effective_model.clone(); + std::borrow::Cow::Owned(cloned) + }; + let (resp, used_provider) = match provider.as_str() { "ollama" | "local" | "" => { - let r = ollama::chat(&state.ai_client, &req) + let r = ollama::chat(&state.ai_client, &*req_for_adapter) .await .map_err(|e| (StatusCode::BAD_GATEWAY, format!("ollama local: {e}")))?; (r, "ollama".to_string()) @@ -167,7 +255,7 @@ async fn chat( StatusCode::SERVICE_UNAVAILABLE, "OLLAMA_CLOUD_KEY not configured".to_string(), ))?; - let r = ollama_cloud::chat(key, &req) + let r = ollama_cloud::chat(key, &*req_for_adapter) .await .map_err(|e| (StatusCode::BAD_GATEWAY, format!("ollama cloud: {e}")))?; (r, "ollama_cloud".to_string()) @@ -180,7 +268,7 @@ async fn chat( StatusCode::SERVICE_UNAVAILABLE, "OPENROUTER_API_KEY not configured".to_string(), ))?; - let r = openrouter::chat(key, &req) + let r = openrouter::chat(key, &*req_for_adapter) .await .map_err(|e| (StatusCode::BAD_GATEWAY, format!("openrouter: {e}")))?; (r, "openrouter".to_string())