phase-40: Gemini + Claude provider adapters

Phase 40 PRD (docs/CONTROL_PLANE_PRD.md:82-83) listed: - crates/aibridge/src/providers/gemini.rs - crates/aibridge/src/providers/claude.rs Neither existed. Landing both now, in gateway/src/v1/ (matches the existing ollama.rs + openrouter.rs sibling pattern — aibridge's providers/ is for the adapter *trait* abstractions, v1/ holds the concrete /v1/chat dispatchers that know the wire format). gemini.rs: - POST https://generativelanguage.googleapis.com/v1beta/models/ {model}:generateContent?key=<API_KEY> - Auth: query-string key (not bearer) - Maps messages → contents+parts (Gemini's wire shape), extracts from candidates[0].content.parts[0].text - 3 tests: key resolution, body serialization (camelCase generationConfig + maxOutputTokens), prefix-strip claude.rs: - POST https://api.anthropic.com/v1/messages - Auth: x-api-key header + anthropic-version: 2023-06-01 - Carries system prompt in top-level `system` field (not messages[]). Extracts from content[0].text where type=="text" - 4 tests: key resolution, body serialization with/without system field, prefix-strip v1/mod.rs: + V1State.gemini_key + claude_key Option<String> + resolve_provider() strips "gemini/" and "claude/" prefixes + /v1/chat dispatcher handles "gemini" + "claude"/"anthropic" + 2 new resolve_provider tests (prefix + strip per adapter) main.rs: + Construct both keys at startup via resolve_*_key() helpers. Missing keys log at debug (not warn) since these are optional providers — unlike OpenRouter which is the rescue rung. Every /v1/chat error path mirrors the existing pattern: - 503 SERVICE_UNAVAILABLE when key isn't configured - 502 BAD_GATEWAY with the provider's error text when the upstream call fails - Response shape always the OpenAI-compatible ChatResponse Workspace warnings still at 0. 9 new tests pass. Pre-existing test failure `executor_prompt_includes_surfaced_ candidates` at execution_loop/mod.rs:1550 is unrelated (fails on pristine HEAD too — PR fixture divergence). Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-24 13:41:31 -05:00 · 2026-04-24 13:41:31 -05:00 · 0b3bd28cf8
commit 0b3bd28cf8
parent b5b0c00efe
5 changed files with 536 additions and 1 deletions
--- a/Cargo.lock
+++ b/Cargo.lock
@ -8906,6 +8906,17 @@ dependencies = [
 "wasm-bindgen",
 ]

+[[package]]
+name = "validator"
+version = "0.1.0"
+dependencies = [
+ "serde",
+ "serde_json",
+ "thiserror 2.0.18",
+ "tokio",
+ "tracing",
+]
+
 [[package]]
 name = "valuable"
 version = "0.1.1"
--- a/crates/gateway/src/main.rs
+++ b/crates/gateway/src/main.rs
@ -229,6 +229,26 @@ async fn main() {
                }
                k
            },
+            gemini_key: {
+                // Phase 40 provider. GEMINI_API_KEY in env or .env.
+                let k = v1::gemini::resolve_gemini_key();
+                if k.is_some() {
+                    tracing::info!("v1: Gemini key loaded — /v1/chat provider=gemini enabled");
+                } else {
+                    tracing::debug!("v1: no Gemini key — provider=gemini will 503");
+                }
+                k
+            },
+            claude_key: {
+                // Phase 40 provider. ANTHROPIC_API_KEY in env or .env.
+                let k = v1::claude::resolve_claude_key();
+                if k.is_some() {
+                    tracing::info!("v1: Claude key loaded — /v1/chat provider=claude enabled");
+                } else {
+                    tracing::debug!("v1: no Claude key — provider=claude will 503");
+                }
+                k
+            },
            // Phase 40 early deliverable — Langfuse trace emitter.
            // Defaults match mcp-server/tracing.ts conventions so
            // gateway traces land in the same staffing project.
--- a/crates/gateway/src/v1/claude.rs
+++ b/crates/gateway/src/v1/claude.rs
@ -0,0 +1,222 @@
+//! Claude (Anthropic) adapter.
+//!
+//! POST `https://api.anthropic.com/v1/messages`. Auth via `x-api-key`
+//! header (not bearer) + required `anthropic-version` header. Payload
+//! is NOT OpenAI-compatible — response text lives at
+//! `content[0].text`. Phase 40 deliverable. System prompts travel in
+//! a top-level `system` field, separate from the `messages` array.
+
+use std::time::Duration;
+use serde::{Deserialize, Serialize};
+
+use super::{ChatRequest, ChatResponse, Choice, Message, UsageBlock};
+
+const CLAUDE_BASE_URL: &str = "https://api.anthropic.com/v1";
+const CLAUDE_API_VERSION: &str = "2023-06-01";
+const CLAUDE_TIMEOUT_SECS: u64 = 180;
+
+pub fn resolve_claude_key() -> Option<String> {
+    if let Ok(k) = std::env::var("ANTHROPIC_API_KEY") {
+        if !k.trim().is_empty() { return Some(k.trim().to_string()); }
+    }
+    for path in ["/home/profit/.env", "/root/.env"] {
+        if let Ok(raw) = std::fs::read_to_string(path) {
+            for line in raw.lines() {
+                if let Some(rest) = line.strip_prefix("ANTHROPIC_API_KEY=") {
+                    let k = rest.trim().trim_matches('"').trim_matches('\'');
+                    if !k.is_empty() { return Some(k.to_string()); }
+                }
+            }
+        }
+    }
+    None
+}
+
+pub async fn chat(
+    key: &str,
+    req: &ChatRequest,
+) -> Result<ChatResponse, String> {
+    // Strip the "claude/" prefix if the caller used the namespaced form.
+    let model = req.model.strip_prefix("claude/").unwrap_or(&req.model).to_string();
+
+    // Anthropic carries system prompts outside the messages array.
+    // Concatenate any system-role messages into a single system string;
+    // keep user + assistant messages in `messages`.
+    let mut system_parts: Vec<String> = Vec::new();
+    let mut msgs: Vec<AnMessage> = Vec::new();
+    for m in &req.messages {
+        if m.role == "system" {
+            system_parts.push(m.content.clone());
+        } else {
+            // Anthropic expects strictly "user" or "assistant"; anything
+            // else we normalize to "user".
+            let role = if m.role == "assistant" { "assistant" } else { "user" };
+            msgs.push(AnMessage { role: role.to_string(), content: m.content.clone() });
+        }
+    }
+    let system = if system_parts.is_empty() {
+        None
+    } else {
+        Some(system_parts.join("\n\n"))
+    };
+
+    let body = AnChatBody {
+        model: model.clone(),
+        messages: msgs,
+        max_tokens: req.max_tokens.unwrap_or(800),
+        temperature: req.temperature.unwrap_or(0.3),
+        system,
+    };
+
+    let client = reqwest::Client::builder()
+        .timeout(Duration::from_secs(CLAUDE_TIMEOUT_SECS))
+        .build()
+        .map_err(|e| format!("build client: {e}"))?;
+
+    let t0 = std::time::Instant::now();
+    let resp = client
+        .post(format!("{}/messages", CLAUDE_BASE_URL))
+        .header("x-api-key", key)
+        .header("anthropic-version", CLAUDE_API_VERSION)
+        .json(&body)
+        .send()
+        .await
+        .map_err(|e| format!("api.anthropic.com unreachable: {e}"))?;
+
+    let status = resp.status();
+    if !status.is_success() {
+        let body = resp.text().await.unwrap_or_else(|_| "?".into());
+        return Err(format!("claude {}: {}", status, body));
+    }
+
+    let parsed: AnChatResponse = resp.json().await
+        .map_err(|e| format!("invalid claude response: {e}"))?;
+
+    let latency_ms = t0.elapsed().as_millis();
+    let text = parsed.content.into_iter()
+        .find(|b| b.block_type == "text")
+        .map(|b| b.text)
+        .unwrap_or_default();
+
+    let prompt_tokens = parsed.usage.as_ref().map(|u| u.input_tokens).unwrap_or_else(|| {
+        let chars: usize = req.messages.iter().map(|m| m.content.chars().count()).sum();
+        ((chars + 3) / 4) as u32
+    });
+    let completion_tokens = parsed.usage.as_ref().map(|u| u.output_tokens).unwrap_or_else(|| {
+        ((text.chars().count() + 3) / 4) as u32
+    });
+
+    tracing::info!(
+        target: "v1.chat",
+        provider = "claude",
+        model = %model,
+        prompt_tokens,
+        completion_tokens,
+        latency_ms = latency_ms as u64,
+        "claude chat completed",
+    );
+
+    Ok(ChatResponse {
+        id: format!("chatcmpl-{}", chrono::Utc::now().timestamp_nanos_opt().unwrap_or(0)),
+        object: "chat.completion",
+        created: chrono::Utc::now().timestamp(),
+        model,
+        choices: vec![Choice {
+            index: 0,
+            message: Message { role: "assistant".into(), content: text },
+            finish_reason: parsed.stop_reason.unwrap_or_else(|| "stop".into()),
+        }],
+        usage: UsageBlock {
+            prompt_tokens,
+            completion_tokens,
+            total_tokens: prompt_tokens + completion_tokens,
+        },
+    })
+}
+
+// -- Anthropic Messages API wire shapes --
+
+#[derive(Serialize)]
+struct AnChatBody {
+    model: String,
+    messages: Vec<AnMessage>,
+    max_tokens: u32,
+    temperature: f64,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    system: Option<String>,
+}
+
+#[derive(Serialize)]
+struct AnMessage { role: String, content: String }
+
+#[derive(Deserialize)]
+struct AnChatResponse {
+    content: Vec<AnContentBlock>,
+    #[serde(default, rename = "stop_reason")]
+    stop_reason: Option<String>,
+    #[serde(default)]
+    usage: Option<AnUsage>,
+}
+
+#[derive(Deserialize)]
+struct AnContentBlock {
+    #[serde(rename = "type")]
+    block_type: String,
+    #[serde(default)]
+    text: String,
+}
+
+#[derive(Deserialize)]
+struct AnUsage { input_tokens: u32, output_tokens: u32 }
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn resolve_claude_key_does_not_panic() {
+        let _ = resolve_claude_key();
+    }
+
+    #[test]
+    fn chat_body_serializes_with_separate_system() {
+        let body = AnChatBody {
+            model: "claude-3-5-sonnet-latest".into(),
+            messages: vec![
+                AnMessage { role: "user".into(), content: "hi".into() },
+            ],
+            max_tokens: 800,
+            temperature: 0.3,
+            system: Some("You are helpful.".into()),
+        };
+        let json = serde_json::to_string(&body).unwrap();
+        assert!(json.contains("\"system\":\"You are helpful.\""));
+        assert!(json.contains("\"messages\""));
+        assert!(json.contains("\"max_tokens\":800"));
+    }
+
+    #[test]
+    fn body_omits_system_when_none() {
+        let body = AnChatBody {
+            model: "claude-3-5-sonnet-latest".into(),
+            messages: vec![AnMessage { role: "user".into(), content: "hi".into() }],
+            max_tokens: 800,
+            temperature: 0.3,
+            system: None,
+        };
+        let json = serde_json::to_string(&body).unwrap();
+        assert!(!json.contains("\"system\""), "system field should be skipped when None: {json}");
+    }
+
+    #[test]
+    fn model_prefix_strip_preserves_bare_names() {
+        let cases = [
+            ("claude/claude-3-5-sonnet-latest", "claude-3-5-sonnet-latest"),
+            ("claude-3-5-sonnet-latest", "claude-3-5-sonnet-latest"),
+        ];
+        for (input, expected) in cases {
+            let out = input.strip_prefix("claude/").unwrap_or(input);
+            assert_eq!(out, expected);
+        }
+    }
+}
--- a/crates/gateway/src/v1/gemini.rs
+++ b/crates/gateway/src/v1/gemini.rs
@ -0,0 +1,230 @@
+//! Gemini adapter — Google's Generative Language API.
+//!
+//! POST `https://generativelanguage.googleapis.com/v1beta/models/
+//! {model}:generateContent?key=<API_KEY>`. Auth via query-string key
+//! (not bearer). Payload shape is NOT OpenAI-compatible — we map
+//! messages → contents + parts, extract response from `candidates[0]
+//! .content.parts[0].text`. Phase 40 deliverable; gate: `/v1/chat`
+//! with a prefixed or explicit gemini model returns normally.
+
+use std::time::Duration;
+use serde::{Deserialize, Serialize};
+
+use super::{ChatRequest, ChatResponse, Choice, Message, UsageBlock};
+
+const GEMINI_BASE_URL: &str = "https://generativelanguage.googleapis.com/v1beta";
+const GEMINI_TIMEOUT_SECS: u64 = 180;
+
+pub fn resolve_gemini_key() -> Option<String> {
+    if let Ok(k) = std::env::var("GEMINI_API_KEY") {
+        if !k.trim().is_empty() { return Some(k.trim().to_string()); }
+    }
+    for path in ["/home/profit/.env", "/root/.env"] {
+        if let Ok(raw) = std::fs::read_to_string(path) {
+            for line in raw.lines() {
+                if let Some(rest) = line.strip_prefix("GEMINI_API_KEY=") {
+                    let k = rest.trim().trim_matches('"').trim_matches('\'');
+                    if !k.is_empty() { return Some(k.to_string()); }
+                }
+            }
+        }
+    }
+    None
+}
+
+pub async fn chat(
+    key: &str,
+    req: &ChatRequest,
+) -> Result<ChatResponse, String> {
+    // Strip the "gemini/" prefix if the caller used the namespaced form.
+    let model = req.model.strip_prefix("gemini/").unwrap_or(&req.model).to_string();
+
+    // Gemini splits system prompt from conversation differently.
+    // Simplest working mapping: concatenate any system messages at the
+    // top of a single user turn, then append user/assistant turns as
+    // separate contents entries. Covers the common single-turn case
+    // the scrum pipeline uses.
+    let mut contents: Vec<GmContent> = Vec::new();
+    for m in &req.messages {
+        let role = match m.role.as_str() {
+            "system" | "user" => "user",
+            _ => "model",
+        };
+        contents.push(GmContent {
+            role: role.to_string(),
+            parts: vec![GmPart { text: m.content.clone() }],
+        });
+    }
+
+    let body = GmChatBody {
+        contents,
+        generation_config: GmGenerationConfig {
+            temperature: req.temperature.unwrap_or(0.3),
+            max_output_tokens: req.max_tokens.unwrap_or(800),
+        },
+    };
+
+    let client = reqwest::Client::builder()
+        .timeout(Duration::from_secs(GEMINI_TIMEOUT_SECS))
+        .build()
+        .map_err(|e| format!("build client: {e}"))?;
+
+    let url = format!("{}/models/{}:generateContent?key={}", GEMINI_BASE_URL, model, key);
+    let t0 = std::time::Instant::now();
+    let resp = client
+        .post(&url)
+        .json(&body)
+        .send()
+        .await
+        .map_err(|e| format!("generativelanguage.googleapis.com unreachable: {e}"))?;
+
+    let status = resp.status();
+    if !status.is_success() {
+        let body = resp.text().await.unwrap_or_else(|_| "?".into());
+        return Err(format!("gemini {}: {}", status, body));
+    }
+
+    let parsed: GmChatResponse = resp.json().await
+        .map_err(|e| format!("invalid gemini response: {e}"))?;
+
+    let latency_ms = t0.elapsed().as_millis();
+    let candidate = parsed.candidates.into_iter().next()
+        .ok_or_else(|| "gemini returned no candidates".to_string())?;
+    let text = candidate.content.parts.into_iter()
+        .next()
+        .map(|p| p.text)
+        .unwrap_or_default();
+
+    let prompt_tokens = parsed.usage_metadata.as_ref()
+        .map(|u| u.prompt_token_count)
+        .unwrap_or_else(|| {
+            let chars: usize = req.messages.iter().map(|m| m.content.chars().count()).sum();
+            ((chars + 3) / 4) as u32
+        });
+    let completion_tokens = parsed.usage_metadata.as_ref()
+        .map(|u| u.candidates_token_count)
+        .unwrap_or_else(|| ((text.chars().count() + 3) / 4) as u32);
+
+    tracing::info!(
+        target: "v1.chat",
+        provider = "gemini",
+        model = %model,
+        prompt_tokens,
+        completion_tokens,
+        latency_ms = latency_ms as u64,
+        "gemini chat completed",
+    );
+
+    Ok(ChatResponse {
+        id: format!("chatcmpl-{}", chrono::Utc::now().timestamp_nanos_opt().unwrap_or(0)),
+        object: "chat.completion",
+        created: chrono::Utc::now().timestamp(),
+        model,
+        choices: vec![Choice {
+            index: 0,
+            message: Message { role: "assistant".into(), content: text },
+            finish_reason: candidate.finish_reason.unwrap_or_else(|| "stop".into()),
+        }],
+        usage: UsageBlock {
+            prompt_tokens,
+            completion_tokens,
+            total_tokens: prompt_tokens + completion_tokens,
+        },
+    })
+}
+
+// -- Gemini wire shapes --
+
+#[derive(Serialize)]
+struct GmChatBody {
+    contents: Vec<GmContent>,
+    #[serde(rename = "generationConfig")]
+    generation_config: GmGenerationConfig,
+}
+
+#[derive(Serialize)]
+struct GmContent {
+    role: String,
+    parts: Vec<GmPart>,
+}
+
+#[derive(Serialize)]
+struct GmPart { text: String }
+
+#[derive(Serialize)]
+#[serde(rename_all = "camelCase")]
+struct GmGenerationConfig {
+    temperature: f64,
+    max_output_tokens: u32,
+}
+
+#[derive(Deserialize)]
+struct GmChatResponse {
+    candidates: Vec<GmCandidate>,
+    #[serde(default, rename = "usageMetadata")]
+    usage_metadata: Option<GmUsage>,
+}
+
+#[derive(Deserialize)]
+struct GmCandidate {
+    content: GmContentResp,
+    #[serde(default, rename = "finishReason")]
+    finish_reason: Option<String>,
+}
+
+#[derive(Deserialize)]
+struct GmContentResp { parts: Vec<GmPartResp> }
+
+#[derive(Deserialize)]
+struct GmPartResp { #[serde(default)] text: String }
+
+#[derive(Deserialize)]
+#[serde(rename_all = "camelCase")]
+struct GmUsage {
+    prompt_token_count: u32,
+    candidates_token_count: u32,
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn resolve_gemini_key_does_not_panic() {
+        let _ = resolve_gemini_key();
+    }
+
+    #[test]
+    fn chat_body_serializes_to_gemini_shape() {
+        let body = GmChatBody {
+            contents: vec![
+                GmContent {
+                    role: "user".into(),
+                    parts: vec![GmPart { text: "hello".into() }],
+                },
+            ],
+            generation_config: GmGenerationConfig {
+                temperature: 0.3,
+                max_output_tokens: 800,
+            },
+        };
+        let json = serde_json::to_string(&body).unwrap();
+        assert!(json.contains("\"contents\""));
+        assert!(json.contains("\"parts\""));
+        // camelCase per Gemini API
+        assert!(json.contains("\"generationConfig\""));
+        assert!(json.contains("\"maxOutputTokens\":800"));
+    }
+
+    #[test]
+    fn model_prefix_strip_preserves_bare_names() {
+        let cases = [
+            ("gemini/gemini-2.0-flash", "gemini-2.0-flash"),
+            ("gemini-2.0-flash", "gemini-2.0-flash"),
+        ];
+        for (input, expected) in cases {
+            let out = input.strip_prefix("gemini/").unwrap_or(input);
+            assert_eq!(out, expected);
+        }
+    }
+}
--- a/crates/gateway/src/v1/mod.rs
+++ b/crates/gateway/src/v1/mod.rs
@ -14,6 +14,8 @@
 pub mod ollama;
 pub mod ollama_cloud;
 pub mod openrouter;
+pub mod gemini;
+pub mod claude;
 pub mod langfuse_trace;
 pub mod respond;
 pub mod truth;
@ -42,6 +44,14 @@ pub struct V1State {
    /// provider="openrouter" calls 503 rather than attempt. Same key
    /// sourcing as LLM Team UI so the two share one API quota.
    pub openrouter_key: Option<String>,
+    /// Gemini API key (Google Generative Language). Loaded at startup
+    /// via `gemini::resolve_gemini_key()`. None = provider="gemini"
+    /// calls 503. Phase 40 deliverable.
+    pub gemini_key: Option<String>,
+    /// Anthropic Claude API key. Loaded at startup via
+    /// `claude::resolve_claude_key()`. None = provider="claude" calls
+    /// 503. Phase 40 deliverable.
+    pub claude_key: Option<String>,
    /// Phase 40 early deliverable — Langfuse client. None = tracing
    /// disabled (keys missing or container unreachable). Traces are
    /// fire-and-forget: never block the response path.
@ -159,6 +169,12 @@ fn resolve_provider(req: &ChatRequest) -> (String, String) {
    if let Some(rest) = req.model.strip_prefix("cloud/") {
        return ("ollama_cloud".to_string(), rest.to_string());
    }
+    if let Some(rest) = req.model.strip_prefix("gemini/") {
+        return ("gemini".to_string(), rest.to_string());
+    }
+    if let Some(rest) = req.model.strip_prefix("claude/") {
+        return ("claude".to_string(), rest.to_string());
+    }
    ("ollama".to_string(), req.model.clone())
 }

@ -210,6 +226,18 @@ mod resolve_provider_tests {
        let r = mk_req(Some("openrouter"), "openrouter/openai/gpt-4o-mini");
        assert_eq!(resolve_provider(&r), ("openrouter".into(), "openrouter/openai/gpt-4o-mini".into()));
    }
+
+    #[test]
+    fn gemini_prefix_infers_and_strips() {
+        let r = mk_req(None, "gemini/gemini-2.0-flash");
+        assert_eq!(resolve_provider(&r), ("gemini".into(), "gemini-2.0-flash".into()));
+    }
+
+    #[test]
+    fn claude_prefix_infers_and_strips() {
+        let r = mk_req(None, "claude/claude-3-5-sonnet-latest");
+        assert_eq!(resolve_provider(&r), ("claude".into(), "claude-3-5-sonnet-latest".into()));
+    }
 }

 async fn chat(
@ -273,10 +301,34 @@ async fn chat(
                .map_err(|e| (StatusCode::BAD_GATEWAY, format!("openrouter: {e}")))?;
            (r, "openrouter".to_string())
        }
+        "gemini" => {
+            // Phase 40 provider adapter. Google Generative Language
+            // API via query-string key auth (not bearer).
+            let key = state.gemini_key.as_deref().ok_or((
+                StatusCode::SERVICE_UNAVAILABLE,
+                "GEMINI_API_KEY not configured".to_string(),
+            ))?;
+            let r = gemini::chat(key, &*req_for_adapter)
+                .await
+                .map_err(|e| (StatusCode::BAD_GATEWAY, format!("gemini: {e}")))?;
+            (r, "gemini".to_string())
+        }
+        "claude" | "anthropic" => {
+            // Phase 40 provider adapter. Anthropic Messages API via
+            // x-api-key header + anthropic-version:2023-06-01.
+            let key = state.claude_key.as_deref().ok_or((
+                StatusCode::SERVICE_UNAVAILABLE,
+                "ANTHROPIC_API_KEY not configured".to_string(),
+            ))?;
+            let r = claude::chat(key, &*req_for_adapter)
+                .await
+                .map_err(|e| (StatusCode::BAD_GATEWAY, format!("claude: {e}")))?;
+            (r, "claude".to_string())
+        }
        other => {
            return Err((
                StatusCode::BAD_REQUEST,
-                format!("unknown provider '{other}' — supported: ollama, ollama_cloud, openrouter"),
+                format!("unknown provider '{other}' — supported: ollama, ollama_cloud, openrouter, gemini, claude"),
            ));
        }
    };