diff --git a/Cargo.lock b/Cargo.lock index a2c981b..a3f2e3b 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -8906,6 +8906,17 @@ dependencies = [ "wasm-bindgen", ] +[[package]] +name = "validator" +version = "0.1.0" +dependencies = [ + "serde", + "serde_json", + "thiserror 2.0.18", + "tokio", + "tracing", +] + [[package]] name = "valuable" version = "0.1.1" diff --git a/crates/gateway/src/main.rs b/crates/gateway/src/main.rs index a35a043..ac5f6e3 100644 --- a/crates/gateway/src/main.rs +++ b/crates/gateway/src/main.rs @@ -229,6 +229,26 @@ async fn main() { } k }, + gemini_key: { + // Phase 40 provider. GEMINI_API_KEY in env or .env. + let k = v1::gemini::resolve_gemini_key(); + if k.is_some() { + tracing::info!("v1: Gemini key loaded — /v1/chat provider=gemini enabled"); + } else { + tracing::debug!("v1: no Gemini key — provider=gemini will 503"); + } + k + }, + claude_key: { + // Phase 40 provider. ANTHROPIC_API_KEY in env or .env. + let k = v1::claude::resolve_claude_key(); + if k.is_some() { + tracing::info!("v1: Claude key loaded — /v1/chat provider=claude enabled"); + } else { + tracing::debug!("v1: no Claude key — provider=claude will 503"); + } + k + }, // Phase 40 early deliverable — Langfuse trace emitter. // Defaults match mcp-server/tracing.ts conventions so // gateway traces land in the same staffing project. diff --git a/crates/gateway/src/v1/claude.rs b/crates/gateway/src/v1/claude.rs new file mode 100644 index 0000000..ccbe8c3 --- /dev/null +++ b/crates/gateway/src/v1/claude.rs @@ -0,0 +1,222 @@ +//! Claude (Anthropic) adapter. +//! +//! POST `https://api.anthropic.com/v1/messages`. Auth via `x-api-key` +//! header (not bearer) + required `anthropic-version` header. Payload +//! is NOT OpenAI-compatible — response text lives at +//! `content[0].text`. Phase 40 deliverable. System prompts travel in +//! a top-level `system` field, separate from the `messages` array. + +use std::time::Duration; +use serde::{Deserialize, Serialize}; + +use super::{ChatRequest, ChatResponse, Choice, Message, UsageBlock}; + +const CLAUDE_BASE_URL: &str = "https://api.anthropic.com/v1"; +const CLAUDE_API_VERSION: &str = "2023-06-01"; +const CLAUDE_TIMEOUT_SECS: u64 = 180; + +pub fn resolve_claude_key() -> Option { + if let Ok(k) = std::env::var("ANTHROPIC_API_KEY") { + if !k.trim().is_empty() { return Some(k.trim().to_string()); } + } + for path in ["/home/profit/.env", "/root/.env"] { + if let Ok(raw) = std::fs::read_to_string(path) { + for line in raw.lines() { + if let Some(rest) = line.strip_prefix("ANTHROPIC_API_KEY=") { + let k = rest.trim().trim_matches('"').trim_matches('\''); + if !k.is_empty() { return Some(k.to_string()); } + } + } + } + } + None +} + +pub async fn chat( + key: &str, + req: &ChatRequest, +) -> Result { + // Strip the "claude/" prefix if the caller used the namespaced form. + let model = req.model.strip_prefix("claude/").unwrap_or(&req.model).to_string(); + + // Anthropic carries system prompts outside the messages array. + // Concatenate any system-role messages into a single system string; + // keep user + assistant messages in `messages`. + let mut system_parts: Vec = Vec::new(); + let mut msgs: Vec = Vec::new(); + for m in &req.messages { + if m.role == "system" { + system_parts.push(m.content.clone()); + } else { + // Anthropic expects strictly "user" or "assistant"; anything + // else we normalize to "user". + let role = if m.role == "assistant" { "assistant" } else { "user" }; + msgs.push(AnMessage { role: role.to_string(), content: m.content.clone() }); + } + } + let system = if system_parts.is_empty() { + None + } else { + Some(system_parts.join("\n\n")) + }; + + let body = AnChatBody { + model: model.clone(), + messages: msgs, + max_tokens: req.max_tokens.unwrap_or(800), + temperature: req.temperature.unwrap_or(0.3), + system, + }; + + let client = reqwest::Client::builder() + .timeout(Duration::from_secs(CLAUDE_TIMEOUT_SECS)) + .build() + .map_err(|e| format!("build client: {e}"))?; + + let t0 = std::time::Instant::now(); + let resp = client + .post(format!("{}/messages", CLAUDE_BASE_URL)) + .header("x-api-key", key) + .header("anthropic-version", CLAUDE_API_VERSION) + .json(&body) + .send() + .await + .map_err(|e| format!("api.anthropic.com unreachable: {e}"))?; + + let status = resp.status(); + if !status.is_success() { + let body = resp.text().await.unwrap_or_else(|_| "?".into()); + return Err(format!("claude {}: {}", status, body)); + } + + let parsed: AnChatResponse = resp.json().await + .map_err(|e| format!("invalid claude response: {e}"))?; + + let latency_ms = t0.elapsed().as_millis(); + let text = parsed.content.into_iter() + .find(|b| b.block_type == "text") + .map(|b| b.text) + .unwrap_or_default(); + + let prompt_tokens = parsed.usage.as_ref().map(|u| u.input_tokens).unwrap_or_else(|| { + let chars: usize = req.messages.iter().map(|m| m.content.chars().count()).sum(); + ((chars + 3) / 4) as u32 + }); + let completion_tokens = parsed.usage.as_ref().map(|u| u.output_tokens).unwrap_or_else(|| { + ((text.chars().count() + 3) / 4) as u32 + }); + + tracing::info!( + target: "v1.chat", + provider = "claude", + model = %model, + prompt_tokens, + completion_tokens, + latency_ms = latency_ms as u64, + "claude chat completed", + ); + + Ok(ChatResponse { + id: format!("chatcmpl-{}", chrono::Utc::now().timestamp_nanos_opt().unwrap_or(0)), + object: "chat.completion", + created: chrono::Utc::now().timestamp(), + model, + choices: vec![Choice { + index: 0, + message: Message { role: "assistant".into(), content: text }, + finish_reason: parsed.stop_reason.unwrap_or_else(|| "stop".into()), + }], + usage: UsageBlock { + prompt_tokens, + completion_tokens, + total_tokens: prompt_tokens + completion_tokens, + }, + }) +} + +// -- Anthropic Messages API wire shapes -- + +#[derive(Serialize)] +struct AnChatBody { + model: String, + messages: Vec, + max_tokens: u32, + temperature: f64, + #[serde(skip_serializing_if = "Option::is_none")] + system: Option, +} + +#[derive(Serialize)] +struct AnMessage { role: String, content: String } + +#[derive(Deserialize)] +struct AnChatResponse { + content: Vec, + #[serde(default, rename = "stop_reason")] + stop_reason: Option, + #[serde(default)] + usage: Option, +} + +#[derive(Deserialize)] +struct AnContentBlock { + #[serde(rename = "type")] + block_type: String, + #[serde(default)] + text: String, +} + +#[derive(Deserialize)] +struct AnUsage { input_tokens: u32, output_tokens: u32 } + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn resolve_claude_key_does_not_panic() { + let _ = resolve_claude_key(); + } + + #[test] + fn chat_body_serializes_with_separate_system() { + let body = AnChatBody { + model: "claude-3-5-sonnet-latest".into(), + messages: vec![ + AnMessage { role: "user".into(), content: "hi".into() }, + ], + max_tokens: 800, + temperature: 0.3, + system: Some("You are helpful.".into()), + }; + let json = serde_json::to_string(&body).unwrap(); + assert!(json.contains("\"system\":\"You are helpful.\"")); + assert!(json.contains("\"messages\"")); + assert!(json.contains("\"max_tokens\":800")); + } + + #[test] + fn body_omits_system_when_none() { + let body = AnChatBody { + model: "claude-3-5-sonnet-latest".into(), + messages: vec![AnMessage { role: "user".into(), content: "hi".into() }], + max_tokens: 800, + temperature: 0.3, + system: None, + }; + let json = serde_json::to_string(&body).unwrap(); + assert!(!json.contains("\"system\""), "system field should be skipped when None: {json}"); + } + + #[test] + fn model_prefix_strip_preserves_bare_names() { + let cases = [ + ("claude/claude-3-5-sonnet-latest", "claude-3-5-sonnet-latest"), + ("claude-3-5-sonnet-latest", "claude-3-5-sonnet-latest"), + ]; + for (input, expected) in cases { + let out = input.strip_prefix("claude/").unwrap_or(input); + assert_eq!(out, expected); + } + } +} diff --git a/crates/gateway/src/v1/gemini.rs b/crates/gateway/src/v1/gemini.rs new file mode 100644 index 0000000..99d4c98 --- /dev/null +++ b/crates/gateway/src/v1/gemini.rs @@ -0,0 +1,230 @@ +//! Gemini adapter — Google's Generative Language API. +//! +//! POST `https://generativelanguage.googleapis.com/v1beta/models/ +//! {model}:generateContent?key=`. Auth via query-string key +//! (not bearer). Payload shape is NOT OpenAI-compatible — we map +//! messages → contents + parts, extract response from `candidates[0] +//! .content.parts[0].text`. Phase 40 deliverable; gate: `/v1/chat` +//! with a prefixed or explicit gemini model returns normally. + +use std::time::Duration; +use serde::{Deserialize, Serialize}; + +use super::{ChatRequest, ChatResponse, Choice, Message, UsageBlock}; + +const GEMINI_BASE_URL: &str = "https://generativelanguage.googleapis.com/v1beta"; +const GEMINI_TIMEOUT_SECS: u64 = 180; + +pub fn resolve_gemini_key() -> Option { + if let Ok(k) = std::env::var("GEMINI_API_KEY") { + if !k.trim().is_empty() { return Some(k.trim().to_string()); } + } + for path in ["/home/profit/.env", "/root/.env"] { + if let Ok(raw) = std::fs::read_to_string(path) { + for line in raw.lines() { + if let Some(rest) = line.strip_prefix("GEMINI_API_KEY=") { + let k = rest.trim().trim_matches('"').trim_matches('\''); + if !k.is_empty() { return Some(k.to_string()); } + } + } + } + } + None +} + +pub async fn chat( + key: &str, + req: &ChatRequest, +) -> Result { + // Strip the "gemini/" prefix if the caller used the namespaced form. + let model = req.model.strip_prefix("gemini/").unwrap_or(&req.model).to_string(); + + // Gemini splits system prompt from conversation differently. + // Simplest working mapping: concatenate any system messages at the + // top of a single user turn, then append user/assistant turns as + // separate contents entries. Covers the common single-turn case + // the scrum pipeline uses. + let mut contents: Vec = Vec::new(); + for m in &req.messages { + let role = match m.role.as_str() { + "system" | "user" => "user", + _ => "model", + }; + contents.push(GmContent { + role: role.to_string(), + parts: vec![GmPart { text: m.content.clone() }], + }); + } + + let body = GmChatBody { + contents, + generation_config: GmGenerationConfig { + temperature: req.temperature.unwrap_or(0.3), + max_output_tokens: req.max_tokens.unwrap_or(800), + }, + }; + + let client = reqwest::Client::builder() + .timeout(Duration::from_secs(GEMINI_TIMEOUT_SECS)) + .build() + .map_err(|e| format!("build client: {e}"))?; + + let url = format!("{}/models/{}:generateContent?key={}", GEMINI_BASE_URL, model, key); + let t0 = std::time::Instant::now(); + let resp = client + .post(&url) + .json(&body) + .send() + .await + .map_err(|e| format!("generativelanguage.googleapis.com unreachable: {e}"))?; + + let status = resp.status(); + if !status.is_success() { + let body = resp.text().await.unwrap_or_else(|_| "?".into()); + return Err(format!("gemini {}: {}", status, body)); + } + + let parsed: GmChatResponse = resp.json().await + .map_err(|e| format!("invalid gemini response: {e}"))?; + + let latency_ms = t0.elapsed().as_millis(); + let candidate = parsed.candidates.into_iter().next() + .ok_or_else(|| "gemini returned no candidates".to_string())?; + let text = candidate.content.parts.into_iter() + .next() + .map(|p| p.text) + .unwrap_or_default(); + + let prompt_tokens = parsed.usage_metadata.as_ref() + .map(|u| u.prompt_token_count) + .unwrap_or_else(|| { + let chars: usize = req.messages.iter().map(|m| m.content.chars().count()).sum(); + ((chars + 3) / 4) as u32 + }); + let completion_tokens = parsed.usage_metadata.as_ref() + .map(|u| u.candidates_token_count) + .unwrap_or_else(|| ((text.chars().count() + 3) / 4) as u32); + + tracing::info!( + target: "v1.chat", + provider = "gemini", + model = %model, + prompt_tokens, + completion_tokens, + latency_ms = latency_ms as u64, + "gemini chat completed", + ); + + Ok(ChatResponse { + id: format!("chatcmpl-{}", chrono::Utc::now().timestamp_nanos_opt().unwrap_or(0)), + object: "chat.completion", + created: chrono::Utc::now().timestamp(), + model, + choices: vec![Choice { + index: 0, + message: Message { role: "assistant".into(), content: text }, + finish_reason: candidate.finish_reason.unwrap_or_else(|| "stop".into()), + }], + usage: UsageBlock { + prompt_tokens, + completion_tokens, + total_tokens: prompt_tokens + completion_tokens, + }, + }) +} + +// -- Gemini wire shapes -- + +#[derive(Serialize)] +struct GmChatBody { + contents: Vec, + #[serde(rename = "generationConfig")] + generation_config: GmGenerationConfig, +} + +#[derive(Serialize)] +struct GmContent { + role: String, + parts: Vec, +} + +#[derive(Serialize)] +struct GmPart { text: String } + +#[derive(Serialize)] +#[serde(rename_all = "camelCase")] +struct GmGenerationConfig { + temperature: f64, + max_output_tokens: u32, +} + +#[derive(Deserialize)] +struct GmChatResponse { + candidates: Vec, + #[serde(default, rename = "usageMetadata")] + usage_metadata: Option, +} + +#[derive(Deserialize)] +struct GmCandidate { + content: GmContentResp, + #[serde(default, rename = "finishReason")] + finish_reason: Option, +} + +#[derive(Deserialize)] +struct GmContentResp { parts: Vec } + +#[derive(Deserialize)] +struct GmPartResp { #[serde(default)] text: String } + +#[derive(Deserialize)] +#[serde(rename_all = "camelCase")] +struct GmUsage { + prompt_token_count: u32, + candidates_token_count: u32, +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn resolve_gemini_key_does_not_panic() { + let _ = resolve_gemini_key(); + } + + #[test] + fn chat_body_serializes_to_gemini_shape() { + let body = GmChatBody { + contents: vec![ + GmContent { + role: "user".into(), + parts: vec![GmPart { text: "hello".into() }], + }, + ], + generation_config: GmGenerationConfig { + temperature: 0.3, + max_output_tokens: 800, + }, + }; + let json = serde_json::to_string(&body).unwrap(); + assert!(json.contains("\"contents\"")); + assert!(json.contains("\"parts\"")); + // camelCase per Gemini API + assert!(json.contains("\"generationConfig\"")); + assert!(json.contains("\"maxOutputTokens\":800")); + } + + #[test] + fn model_prefix_strip_preserves_bare_names() { + let cases = [ + ("gemini/gemini-2.0-flash", "gemini-2.0-flash"), + ("gemini-2.0-flash", "gemini-2.0-flash"), + ]; + for (input, expected) in cases { + let out = input.strip_prefix("gemini/").unwrap_or(input); + assert_eq!(out, expected); + } + } +} diff --git a/crates/gateway/src/v1/mod.rs b/crates/gateway/src/v1/mod.rs index f53c7c3..691b272 100644 --- a/crates/gateway/src/v1/mod.rs +++ b/crates/gateway/src/v1/mod.rs @@ -14,6 +14,8 @@ pub mod ollama; pub mod ollama_cloud; pub mod openrouter; +pub mod gemini; +pub mod claude; pub mod langfuse_trace; pub mod respond; pub mod truth; @@ -42,6 +44,14 @@ pub struct V1State { /// provider="openrouter" calls 503 rather than attempt. Same key /// sourcing as LLM Team UI so the two share one API quota. pub openrouter_key: Option, + /// Gemini API key (Google Generative Language). Loaded at startup + /// via `gemini::resolve_gemini_key()`. None = provider="gemini" + /// calls 503. Phase 40 deliverable. + pub gemini_key: Option, + /// Anthropic Claude API key. Loaded at startup via + /// `claude::resolve_claude_key()`. None = provider="claude" calls + /// 503. Phase 40 deliverable. + pub claude_key: Option, /// Phase 40 early deliverable — Langfuse client. None = tracing /// disabled (keys missing or container unreachable). Traces are /// fire-and-forget: never block the response path. @@ -159,6 +169,12 @@ fn resolve_provider(req: &ChatRequest) -> (String, String) { if let Some(rest) = req.model.strip_prefix("cloud/") { return ("ollama_cloud".to_string(), rest.to_string()); } + if let Some(rest) = req.model.strip_prefix("gemini/") { + return ("gemini".to_string(), rest.to_string()); + } + if let Some(rest) = req.model.strip_prefix("claude/") { + return ("claude".to_string(), rest.to_string()); + } ("ollama".to_string(), req.model.clone()) } @@ -210,6 +226,18 @@ mod resolve_provider_tests { let r = mk_req(Some("openrouter"), "openrouter/openai/gpt-4o-mini"); assert_eq!(resolve_provider(&r), ("openrouter".into(), "openrouter/openai/gpt-4o-mini".into())); } + + #[test] + fn gemini_prefix_infers_and_strips() { + let r = mk_req(None, "gemini/gemini-2.0-flash"); + assert_eq!(resolve_provider(&r), ("gemini".into(), "gemini-2.0-flash".into())); + } + + #[test] + fn claude_prefix_infers_and_strips() { + let r = mk_req(None, "claude/claude-3-5-sonnet-latest"); + assert_eq!(resolve_provider(&r), ("claude".into(), "claude-3-5-sonnet-latest".into())); + } } async fn chat( @@ -273,10 +301,34 @@ async fn chat( .map_err(|e| (StatusCode::BAD_GATEWAY, format!("openrouter: {e}")))?; (r, "openrouter".to_string()) } + "gemini" => { + // Phase 40 provider adapter. Google Generative Language + // API via query-string key auth (not bearer). + let key = state.gemini_key.as_deref().ok_or(( + StatusCode::SERVICE_UNAVAILABLE, + "GEMINI_API_KEY not configured".to_string(), + ))?; + let r = gemini::chat(key, &*req_for_adapter) + .await + .map_err(|e| (StatusCode::BAD_GATEWAY, format!("gemini: {e}")))?; + (r, "gemini".to_string()) + } + "claude" | "anthropic" => { + // Phase 40 provider adapter. Anthropic Messages API via + // x-api-key header + anthropic-version:2023-06-01. + let key = state.claude_key.as_deref().ok_or(( + StatusCode::SERVICE_UNAVAILABLE, + "ANTHROPIC_API_KEY not configured".to_string(), + ))?; + let r = claude::chat(key, &*req_for_adapter) + .await + .map_err(|e| (StatusCode::BAD_GATEWAY, format!("claude: {e}")))?; + (r, "claude".to_string()) + } other => { return Err(( StatusCode::BAD_REQUEST, - format!("unknown provider '{other}' — supported: ollama, ollama_cloud, openrouter"), + format!("unknown provider '{other}' — supported: ollama, ollama_cloud, openrouter, gemini, claude"), )); } };