phase-40: Gemini + Claude provider adapters
Phase 40 PRD (docs/CONTROL_PLANE_PRD.md:82-83) listed: - crates/aibridge/src/providers/gemini.rs - crates/aibridge/src/providers/claude.rs Neither existed. Landing both now, in gateway/src/v1/ (matches the existing ollama.rs + openrouter.rs sibling pattern — aibridge's providers/ is for the adapter *trait* abstractions, v1/ holds the concrete /v1/chat dispatchers that know the wire format). gemini.rs: - POST https://generativelanguage.googleapis.com/v1beta/models/ {model}:generateContent?key=<API_KEY> - Auth: query-string key (not bearer) - Maps messages → contents+parts (Gemini's wire shape), extracts from candidates[0].content.parts[0].text - 3 tests: key resolution, body serialization (camelCase generationConfig + maxOutputTokens), prefix-strip claude.rs: - POST https://api.anthropic.com/v1/messages - Auth: x-api-key header + anthropic-version: 2023-06-01 - Carries system prompt in top-level `system` field (not messages[]). Extracts from content[0].text where type=="text" - 4 tests: key resolution, body serialization with/without system field, prefix-strip v1/mod.rs: + V1State.gemini_key + claude_key Option<String> + resolve_provider() strips "gemini/" and "claude/" prefixes + /v1/chat dispatcher handles "gemini" + "claude"/"anthropic" + 2 new resolve_provider tests (prefix + strip per adapter) main.rs: + Construct both keys at startup via resolve_*_key() helpers. Missing keys log at debug (not warn) since these are optional providers — unlike OpenRouter which is the rescue rung. Every /v1/chat error path mirrors the existing pattern: - 503 SERVICE_UNAVAILABLE when key isn't configured - 502 BAD_GATEWAY with the provider's error text when the upstream call fails - Response shape always the OpenAI-compatible ChatResponse Workspace warnings still at 0. 9 new tests pass. Pre-existing test failure `executor_prompt_includes_surfaced_ candidates` at execution_loop/mod.rs:1550 is unrelated (fails on pristine HEAD too — PR fixture divergence). Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
b5b0c00efe
commit
0b3bd28cf8
11
Cargo.lock
generated
11
Cargo.lock
generated
@ -8906,6 +8906,17 @@ dependencies = [
|
||||
"wasm-bindgen",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "validator"
|
||||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"serde",
|
||||
"serde_json",
|
||||
"thiserror 2.0.18",
|
||||
"tokio",
|
||||
"tracing",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "valuable"
|
||||
version = "0.1.1"
|
||||
|
||||
@ -229,6 +229,26 @@ async fn main() {
|
||||
}
|
||||
k
|
||||
},
|
||||
gemini_key: {
|
||||
// Phase 40 provider. GEMINI_API_KEY in env or .env.
|
||||
let k = v1::gemini::resolve_gemini_key();
|
||||
if k.is_some() {
|
||||
tracing::info!("v1: Gemini key loaded — /v1/chat provider=gemini enabled");
|
||||
} else {
|
||||
tracing::debug!("v1: no Gemini key — provider=gemini will 503");
|
||||
}
|
||||
k
|
||||
},
|
||||
claude_key: {
|
||||
// Phase 40 provider. ANTHROPIC_API_KEY in env or .env.
|
||||
let k = v1::claude::resolve_claude_key();
|
||||
if k.is_some() {
|
||||
tracing::info!("v1: Claude key loaded — /v1/chat provider=claude enabled");
|
||||
} else {
|
||||
tracing::debug!("v1: no Claude key — provider=claude will 503");
|
||||
}
|
||||
k
|
||||
},
|
||||
// Phase 40 early deliverable — Langfuse trace emitter.
|
||||
// Defaults match mcp-server/tracing.ts conventions so
|
||||
// gateway traces land in the same staffing project.
|
||||
|
||||
222
crates/gateway/src/v1/claude.rs
Normal file
222
crates/gateway/src/v1/claude.rs
Normal file
@ -0,0 +1,222 @@
|
||||
//! Claude (Anthropic) adapter.
|
||||
//!
|
||||
//! POST `https://api.anthropic.com/v1/messages`. Auth via `x-api-key`
|
||||
//! header (not bearer) + required `anthropic-version` header. Payload
|
||||
//! is NOT OpenAI-compatible — response text lives at
|
||||
//! `content[0].text`. Phase 40 deliverable. System prompts travel in
|
||||
//! a top-level `system` field, separate from the `messages` array.
|
||||
|
||||
use std::time::Duration;
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
use super::{ChatRequest, ChatResponse, Choice, Message, UsageBlock};
|
||||
|
||||
const CLAUDE_BASE_URL: &str = "https://api.anthropic.com/v1";
|
||||
const CLAUDE_API_VERSION: &str = "2023-06-01";
|
||||
const CLAUDE_TIMEOUT_SECS: u64 = 180;
|
||||
|
||||
pub fn resolve_claude_key() -> Option<String> {
|
||||
if let Ok(k) = std::env::var("ANTHROPIC_API_KEY") {
|
||||
if !k.trim().is_empty() { return Some(k.trim().to_string()); }
|
||||
}
|
||||
for path in ["/home/profit/.env", "/root/.env"] {
|
||||
if let Ok(raw) = std::fs::read_to_string(path) {
|
||||
for line in raw.lines() {
|
||||
if let Some(rest) = line.strip_prefix("ANTHROPIC_API_KEY=") {
|
||||
let k = rest.trim().trim_matches('"').trim_matches('\'');
|
||||
if !k.is_empty() { return Some(k.to_string()); }
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
None
|
||||
}
|
||||
|
||||
pub async fn chat(
|
||||
key: &str,
|
||||
req: &ChatRequest,
|
||||
) -> Result<ChatResponse, String> {
|
||||
// Strip the "claude/" prefix if the caller used the namespaced form.
|
||||
let model = req.model.strip_prefix("claude/").unwrap_or(&req.model).to_string();
|
||||
|
||||
// Anthropic carries system prompts outside the messages array.
|
||||
// Concatenate any system-role messages into a single system string;
|
||||
// keep user + assistant messages in `messages`.
|
||||
let mut system_parts: Vec<String> = Vec::new();
|
||||
let mut msgs: Vec<AnMessage> = Vec::new();
|
||||
for m in &req.messages {
|
||||
if m.role == "system" {
|
||||
system_parts.push(m.content.clone());
|
||||
} else {
|
||||
// Anthropic expects strictly "user" or "assistant"; anything
|
||||
// else we normalize to "user".
|
||||
let role = if m.role == "assistant" { "assistant" } else { "user" };
|
||||
msgs.push(AnMessage { role: role.to_string(), content: m.content.clone() });
|
||||
}
|
||||
}
|
||||
let system = if system_parts.is_empty() {
|
||||
None
|
||||
} else {
|
||||
Some(system_parts.join("\n\n"))
|
||||
};
|
||||
|
||||
let body = AnChatBody {
|
||||
model: model.clone(),
|
||||
messages: msgs,
|
||||
max_tokens: req.max_tokens.unwrap_or(800),
|
||||
temperature: req.temperature.unwrap_or(0.3),
|
||||
system,
|
||||
};
|
||||
|
||||
let client = reqwest::Client::builder()
|
||||
.timeout(Duration::from_secs(CLAUDE_TIMEOUT_SECS))
|
||||
.build()
|
||||
.map_err(|e| format!("build client: {e}"))?;
|
||||
|
||||
let t0 = std::time::Instant::now();
|
||||
let resp = client
|
||||
.post(format!("{}/messages", CLAUDE_BASE_URL))
|
||||
.header("x-api-key", key)
|
||||
.header("anthropic-version", CLAUDE_API_VERSION)
|
||||
.json(&body)
|
||||
.send()
|
||||
.await
|
||||
.map_err(|e| format!("api.anthropic.com unreachable: {e}"))?;
|
||||
|
||||
let status = resp.status();
|
||||
if !status.is_success() {
|
||||
let body = resp.text().await.unwrap_or_else(|_| "?".into());
|
||||
return Err(format!("claude {}: {}", status, body));
|
||||
}
|
||||
|
||||
let parsed: AnChatResponse = resp.json().await
|
||||
.map_err(|e| format!("invalid claude response: {e}"))?;
|
||||
|
||||
let latency_ms = t0.elapsed().as_millis();
|
||||
let text = parsed.content.into_iter()
|
||||
.find(|b| b.block_type == "text")
|
||||
.map(|b| b.text)
|
||||
.unwrap_or_default();
|
||||
|
||||
let prompt_tokens = parsed.usage.as_ref().map(|u| u.input_tokens).unwrap_or_else(|| {
|
||||
let chars: usize = req.messages.iter().map(|m| m.content.chars().count()).sum();
|
||||
((chars + 3) / 4) as u32
|
||||
});
|
||||
let completion_tokens = parsed.usage.as_ref().map(|u| u.output_tokens).unwrap_or_else(|| {
|
||||
((text.chars().count() + 3) / 4) as u32
|
||||
});
|
||||
|
||||
tracing::info!(
|
||||
target: "v1.chat",
|
||||
provider = "claude",
|
||||
model = %model,
|
||||
prompt_tokens,
|
||||
completion_tokens,
|
||||
latency_ms = latency_ms as u64,
|
||||
"claude chat completed",
|
||||
);
|
||||
|
||||
Ok(ChatResponse {
|
||||
id: format!("chatcmpl-{}", chrono::Utc::now().timestamp_nanos_opt().unwrap_or(0)),
|
||||
object: "chat.completion",
|
||||
created: chrono::Utc::now().timestamp(),
|
||||
model,
|
||||
choices: vec![Choice {
|
||||
index: 0,
|
||||
message: Message { role: "assistant".into(), content: text },
|
||||
finish_reason: parsed.stop_reason.unwrap_or_else(|| "stop".into()),
|
||||
}],
|
||||
usage: UsageBlock {
|
||||
prompt_tokens,
|
||||
completion_tokens,
|
||||
total_tokens: prompt_tokens + completion_tokens,
|
||||
},
|
||||
})
|
||||
}
|
||||
|
||||
// -- Anthropic Messages API wire shapes --
|
||||
|
||||
#[derive(Serialize)]
|
||||
struct AnChatBody {
|
||||
model: String,
|
||||
messages: Vec<AnMessage>,
|
||||
max_tokens: u32,
|
||||
temperature: f64,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
system: Option<String>,
|
||||
}
|
||||
|
||||
#[derive(Serialize)]
|
||||
struct AnMessage { role: String, content: String }
|
||||
|
||||
#[derive(Deserialize)]
|
||||
struct AnChatResponse {
|
||||
content: Vec<AnContentBlock>,
|
||||
#[serde(default, rename = "stop_reason")]
|
||||
stop_reason: Option<String>,
|
||||
#[serde(default)]
|
||||
usage: Option<AnUsage>,
|
||||
}
|
||||
|
||||
#[derive(Deserialize)]
|
||||
struct AnContentBlock {
|
||||
#[serde(rename = "type")]
|
||||
block_type: String,
|
||||
#[serde(default)]
|
||||
text: String,
|
||||
}
|
||||
|
||||
#[derive(Deserialize)]
|
||||
struct AnUsage { input_tokens: u32, output_tokens: u32 }
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn resolve_claude_key_does_not_panic() {
|
||||
let _ = resolve_claude_key();
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn chat_body_serializes_with_separate_system() {
|
||||
let body = AnChatBody {
|
||||
model: "claude-3-5-sonnet-latest".into(),
|
||||
messages: vec![
|
||||
AnMessage { role: "user".into(), content: "hi".into() },
|
||||
],
|
||||
max_tokens: 800,
|
||||
temperature: 0.3,
|
||||
system: Some("You are helpful.".into()),
|
||||
};
|
||||
let json = serde_json::to_string(&body).unwrap();
|
||||
assert!(json.contains("\"system\":\"You are helpful.\""));
|
||||
assert!(json.contains("\"messages\""));
|
||||
assert!(json.contains("\"max_tokens\":800"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn body_omits_system_when_none() {
|
||||
let body = AnChatBody {
|
||||
model: "claude-3-5-sonnet-latest".into(),
|
||||
messages: vec![AnMessage { role: "user".into(), content: "hi".into() }],
|
||||
max_tokens: 800,
|
||||
temperature: 0.3,
|
||||
system: None,
|
||||
};
|
||||
let json = serde_json::to_string(&body).unwrap();
|
||||
assert!(!json.contains("\"system\""), "system field should be skipped when None: {json}");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn model_prefix_strip_preserves_bare_names() {
|
||||
let cases = [
|
||||
("claude/claude-3-5-sonnet-latest", "claude-3-5-sonnet-latest"),
|
||||
("claude-3-5-sonnet-latest", "claude-3-5-sonnet-latest"),
|
||||
];
|
||||
for (input, expected) in cases {
|
||||
let out = input.strip_prefix("claude/").unwrap_or(input);
|
||||
assert_eq!(out, expected);
|
||||
}
|
||||
}
|
||||
}
|
||||
230
crates/gateway/src/v1/gemini.rs
Normal file
230
crates/gateway/src/v1/gemini.rs
Normal file
@ -0,0 +1,230 @@
|
||||
//! Gemini adapter — Google's Generative Language API.
|
||||
//!
|
||||
//! POST `https://generativelanguage.googleapis.com/v1beta/models/
|
||||
//! {model}:generateContent?key=<API_KEY>`. Auth via query-string key
|
||||
//! (not bearer). Payload shape is NOT OpenAI-compatible — we map
|
||||
//! messages → contents + parts, extract response from `candidates[0]
|
||||
//! .content.parts[0].text`. Phase 40 deliverable; gate: `/v1/chat`
|
||||
//! with a prefixed or explicit gemini model returns normally.
|
||||
|
||||
use std::time::Duration;
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
use super::{ChatRequest, ChatResponse, Choice, Message, UsageBlock};
|
||||
|
||||
const GEMINI_BASE_URL: &str = "https://generativelanguage.googleapis.com/v1beta";
|
||||
const GEMINI_TIMEOUT_SECS: u64 = 180;
|
||||
|
||||
pub fn resolve_gemini_key() -> Option<String> {
|
||||
if let Ok(k) = std::env::var("GEMINI_API_KEY") {
|
||||
if !k.trim().is_empty() { return Some(k.trim().to_string()); }
|
||||
}
|
||||
for path in ["/home/profit/.env", "/root/.env"] {
|
||||
if let Ok(raw) = std::fs::read_to_string(path) {
|
||||
for line in raw.lines() {
|
||||
if let Some(rest) = line.strip_prefix("GEMINI_API_KEY=") {
|
||||
let k = rest.trim().trim_matches('"').trim_matches('\'');
|
||||
if !k.is_empty() { return Some(k.to_string()); }
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
None
|
||||
}
|
||||
|
||||
pub async fn chat(
|
||||
key: &str,
|
||||
req: &ChatRequest,
|
||||
) -> Result<ChatResponse, String> {
|
||||
// Strip the "gemini/" prefix if the caller used the namespaced form.
|
||||
let model = req.model.strip_prefix("gemini/").unwrap_or(&req.model).to_string();
|
||||
|
||||
// Gemini splits system prompt from conversation differently.
|
||||
// Simplest working mapping: concatenate any system messages at the
|
||||
// top of a single user turn, then append user/assistant turns as
|
||||
// separate contents entries. Covers the common single-turn case
|
||||
// the scrum pipeline uses.
|
||||
let mut contents: Vec<GmContent> = Vec::new();
|
||||
for m in &req.messages {
|
||||
let role = match m.role.as_str() {
|
||||
"system" | "user" => "user",
|
||||
_ => "model",
|
||||
};
|
||||
contents.push(GmContent {
|
||||
role: role.to_string(),
|
||||
parts: vec![GmPart { text: m.content.clone() }],
|
||||
});
|
||||
}
|
||||
|
||||
let body = GmChatBody {
|
||||
contents,
|
||||
generation_config: GmGenerationConfig {
|
||||
temperature: req.temperature.unwrap_or(0.3),
|
||||
max_output_tokens: req.max_tokens.unwrap_or(800),
|
||||
},
|
||||
};
|
||||
|
||||
let client = reqwest::Client::builder()
|
||||
.timeout(Duration::from_secs(GEMINI_TIMEOUT_SECS))
|
||||
.build()
|
||||
.map_err(|e| format!("build client: {e}"))?;
|
||||
|
||||
let url = format!("{}/models/{}:generateContent?key={}", GEMINI_BASE_URL, model, key);
|
||||
let t0 = std::time::Instant::now();
|
||||
let resp = client
|
||||
.post(&url)
|
||||
.json(&body)
|
||||
.send()
|
||||
.await
|
||||
.map_err(|e| format!("generativelanguage.googleapis.com unreachable: {e}"))?;
|
||||
|
||||
let status = resp.status();
|
||||
if !status.is_success() {
|
||||
let body = resp.text().await.unwrap_or_else(|_| "?".into());
|
||||
return Err(format!("gemini {}: {}", status, body));
|
||||
}
|
||||
|
||||
let parsed: GmChatResponse = resp.json().await
|
||||
.map_err(|e| format!("invalid gemini response: {e}"))?;
|
||||
|
||||
let latency_ms = t0.elapsed().as_millis();
|
||||
let candidate = parsed.candidates.into_iter().next()
|
||||
.ok_or_else(|| "gemini returned no candidates".to_string())?;
|
||||
let text = candidate.content.parts.into_iter()
|
||||
.next()
|
||||
.map(|p| p.text)
|
||||
.unwrap_or_default();
|
||||
|
||||
let prompt_tokens = parsed.usage_metadata.as_ref()
|
||||
.map(|u| u.prompt_token_count)
|
||||
.unwrap_or_else(|| {
|
||||
let chars: usize = req.messages.iter().map(|m| m.content.chars().count()).sum();
|
||||
((chars + 3) / 4) as u32
|
||||
});
|
||||
let completion_tokens = parsed.usage_metadata.as_ref()
|
||||
.map(|u| u.candidates_token_count)
|
||||
.unwrap_or_else(|| ((text.chars().count() + 3) / 4) as u32);
|
||||
|
||||
tracing::info!(
|
||||
target: "v1.chat",
|
||||
provider = "gemini",
|
||||
model = %model,
|
||||
prompt_tokens,
|
||||
completion_tokens,
|
||||
latency_ms = latency_ms as u64,
|
||||
"gemini chat completed",
|
||||
);
|
||||
|
||||
Ok(ChatResponse {
|
||||
id: format!("chatcmpl-{}", chrono::Utc::now().timestamp_nanos_opt().unwrap_or(0)),
|
||||
object: "chat.completion",
|
||||
created: chrono::Utc::now().timestamp(),
|
||||
model,
|
||||
choices: vec![Choice {
|
||||
index: 0,
|
||||
message: Message { role: "assistant".into(), content: text },
|
||||
finish_reason: candidate.finish_reason.unwrap_or_else(|| "stop".into()),
|
||||
}],
|
||||
usage: UsageBlock {
|
||||
prompt_tokens,
|
||||
completion_tokens,
|
||||
total_tokens: prompt_tokens + completion_tokens,
|
||||
},
|
||||
})
|
||||
}
|
||||
|
||||
// -- Gemini wire shapes --
|
||||
|
||||
#[derive(Serialize)]
|
||||
struct GmChatBody {
|
||||
contents: Vec<GmContent>,
|
||||
#[serde(rename = "generationConfig")]
|
||||
generation_config: GmGenerationConfig,
|
||||
}
|
||||
|
||||
#[derive(Serialize)]
|
||||
struct GmContent {
|
||||
role: String,
|
||||
parts: Vec<GmPart>,
|
||||
}
|
||||
|
||||
#[derive(Serialize)]
|
||||
struct GmPart { text: String }
|
||||
|
||||
#[derive(Serialize)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
struct GmGenerationConfig {
|
||||
temperature: f64,
|
||||
max_output_tokens: u32,
|
||||
}
|
||||
|
||||
#[derive(Deserialize)]
|
||||
struct GmChatResponse {
|
||||
candidates: Vec<GmCandidate>,
|
||||
#[serde(default, rename = "usageMetadata")]
|
||||
usage_metadata: Option<GmUsage>,
|
||||
}
|
||||
|
||||
#[derive(Deserialize)]
|
||||
struct GmCandidate {
|
||||
content: GmContentResp,
|
||||
#[serde(default, rename = "finishReason")]
|
||||
finish_reason: Option<String>,
|
||||
}
|
||||
|
||||
#[derive(Deserialize)]
|
||||
struct GmContentResp { parts: Vec<GmPartResp> }
|
||||
|
||||
#[derive(Deserialize)]
|
||||
struct GmPartResp { #[serde(default)] text: String }
|
||||
|
||||
#[derive(Deserialize)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
struct GmUsage {
|
||||
prompt_token_count: u32,
|
||||
candidates_token_count: u32,
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn resolve_gemini_key_does_not_panic() {
|
||||
let _ = resolve_gemini_key();
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn chat_body_serializes_to_gemini_shape() {
|
||||
let body = GmChatBody {
|
||||
contents: vec![
|
||||
GmContent {
|
||||
role: "user".into(),
|
||||
parts: vec![GmPart { text: "hello".into() }],
|
||||
},
|
||||
],
|
||||
generation_config: GmGenerationConfig {
|
||||
temperature: 0.3,
|
||||
max_output_tokens: 800,
|
||||
},
|
||||
};
|
||||
let json = serde_json::to_string(&body).unwrap();
|
||||
assert!(json.contains("\"contents\""));
|
||||
assert!(json.contains("\"parts\""));
|
||||
// camelCase per Gemini API
|
||||
assert!(json.contains("\"generationConfig\""));
|
||||
assert!(json.contains("\"maxOutputTokens\":800"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn model_prefix_strip_preserves_bare_names() {
|
||||
let cases = [
|
||||
("gemini/gemini-2.0-flash", "gemini-2.0-flash"),
|
||||
("gemini-2.0-flash", "gemini-2.0-flash"),
|
||||
];
|
||||
for (input, expected) in cases {
|
||||
let out = input.strip_prefix("gemini/").unwrap_or(input);
|
||||
assert_eq!(out, expected);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -14,6 +14,8 @@
|
||||
pub mod ollama;
|
||||
pub mod ollama_cloud;
|
||||
pub mod openrouter;
|
||||
pub mod gemini;
|
||||
pub mod claude;
|
||||
pub mod langfuse_trace;
|
||||
pub mod respond;
|
||||
pub mod truth;
|
||||
@ -42,6 +44,14 @@ pub struct V1State {
|
||||
/// provider="openrouter" calls 503 rather than attempt. Same key
|
||||
/// sourcing as LLM Team UI so the two share one API quota.
|
||||
pub openrouter_key: Option<String>,
|
||||
/// Gemini API key (Google Generative Language). Loaded at startup
|
||||
/// via `gemini::resolve_gemini_key()`. None = provider="gemini"
|
||||
/// calls 503. Phase 40 deliverable.
|
||||
pub gemini_key: Option<String>,
|
||||
/// Anthropic Claude API key. Loaded at startup via
|
||||
/// `claude::resolve_claude_key()`. None = provider="claude" calls
|
||||
/// 503. Phase 40 deliverable.
|
||||
pub claude_key: Option<String>,
|
||||
/// Phase 40 early deliverable — Langfuse client. None = tracing
|
||||
/// disabled (keys missing or container unreachable). Traces are
|
||||
/// fire-and-forget: never block the response path.
|
||||
@ -159,6 +169,12 @@ fn resolve_provider(req: &ChatRequest) -> (String, String) {
|
||||
if let Some(rest) = req.model.strip_prefix("cloud/") {
|
||||
return ("ollama_cloud".to_string(), rest.to_string());
|
||||
}
|
||||
if let Some(rest) = req.model.strip_prefix("gemini/") {
|
||||
return ("gemini".to_string(), rest.to_string());
|
||||
}
|
||||
if let Some(rest) = req.model.strip_prefix("claude/") {
|
||||
return ("claude".to_string(), rest.to_string());
|
||||
}
|
||||
("ollama".to_string(), req.model.clone())
|
||||
}
|
||||
|
||||
@ -210,6 +226,18 @@ mod resolve_provider_tests {
|
||||
let r = mk_req(Some("openrouter"), "openrouter/openai/gpt-4o-mini");
|
||||
assert_eq!(resolve_provider(&r), ("openrouter".into(), "openrouter/openai/gpt-4o-mini".into()));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn gemini_prefix_infers_and_strips() {
|
||||
let r = mk_req(None, "gemini/gemini-2.0-flash");
|
||||
assert_eq!(resolve_provider(&r), ("gemini".into(), "gemini-2.0-flash".into()));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn claude_prefix_infers_and_strips() {
|
||||
let r = mk_req(None, "claude/claude-3-5-sonnet-latest");
|
||||
assert_eq!(resolve_provider(&r), ("claude".into(), "claude-3-5-sonnet-latest".into()));
|
||||
}
|
||||
}
|
||||
|
||||
async fn chat(
|
||||
@ -273,10 +301,34 @@ async fn chat(
|
||||
.map_err(|e| (StatusCode::BAD_GATEWAY, format!("openrouter: {e}")))?;
|
||||
(r, "openrouter".to_string())
|
||||
}
|
||||
"gemini" => {
|
||||
// Phase 40 provider adapter. Google Generative Language
|
||||
// API via query-string key auth (not bearer).
|
||||
let key = state.gemini_key.as_deref().ok_or((
|
||||
StatusCode::SERVICE_UNAVAILABLE,
|
||||
"GEMINI_API_KEY not configured".to_string(),
|
||||
))?;
|
||||
let r = gemini::chat(key, &*req_for_adapter)
|
||||
.await
|
||||
.map_err(|e| (StatusCode::BAD_GATEWAY, format!("gemini: {e}")))?;
|
||||
(r, "gemini".to_string())
|
||||
}
|
||||
"claude" | "anthropic" => {
|
||||
// Phase 40 provider adapter. Anthropic Messages API via
|
||||
// x-api-key header + anthropic-version:2023-06-01.
|
||||
let key = state.claude_key.as_deref().ok_or((
|
||||
StatusCode::SERVICE_UNAVAILABLE,
|
||||
"ANTHROPIC_API_KEY not configured".to_string(),
|
||||
))?;
|
||||
let r = claude::chat(key, &*req_for_adapter)
|
||||
.await
|
||||
.map_err(|e| (StatusCode::BAD_GATEWAY, format!("claude: {e}")))?;
|
||||
(r, "claude".to_string())
|
||||
}
|
||||
other => {
|
||||
return Err((
|
||||
StatusCode::BAD_REQUEST,
|
||||
format!("unknown provider '{other}' — supported: ollama, ollama_cloud, openrouter"),
|
||||
format!("unknown provider '{other}' — supported: ollama, ollama_cloud, openrouter, gemini, claude"),
|
||||
));
|
||||
}
|
||||
};
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user