phase-40: Gemini + Claude provider adapters

Phase 40 PRD (docs/CONTROL_PLANE_PRD.md:82-83) listed:
  - crates/aibridge/src/providers/gemini.rs
  - crates/aibridge/src/providers/claude.rs

Neither existed. Landing both now, in gateway/src/v1/ (matches the
existing ollama.rs + openrouter.rs sibling pattern — aibridge's
providers/ is for the adapter *trait* abstractions, v1/ holds the
concrete /v1/chat dispatchers that know the wire format).

gemini.rs:
  - POST https://generativelanguage.googleapis.com/v1beta/models/
    {model}:generateContent?key=<API_KEY>
  - Auth: query-string key (not bearer)
  - Maps messages → contents+parts (Gemini's wire shape),
    extracts from candidates[0].content.parts[0].text
  - 3 tests: key resolution, body serialization (camelCase
    generationConfig + maxOutputTokens), prefix-strip

claude.rs:
  - POST https://api.anthropic.com/v1/messages
  - Auth: x-api-key header + anthropic-version: 2023-06-01
  - Carries system prompt in top-level `system` field (not
    messages[]). Extracts from content[0].text where type=="text"
  - 4 tests: key resolution, body serialization with/without
    system field, prefix-strip

v1/mod.rs:
  + V1State.gemini_key + claude_key Option<String>
  + resolve_provider() strips "gemini/" and "claude/" prefixes
  + /v1/chat dispatcher handles "gemini" + "claude"/"anthropic"
  + 2 new resolve_provider tests (prefix + strip per adapter)

main.rs:
  + Construct both keys at startup via resolve_*_key() helpers.
    Missing keys log at debug (not warn) since these are optional
    providers — unlike OpenRouter which is the rescue rung.

Every /v1/chat error path mirrors the existing pattern:
  - 503 SERVICE_UNAVAILABLE when key isn't configured
  - 502 BAD_GATEWAY with the provider's error text when the
    upstream call fails
  - Response shape always the OpenAI-compatible ChatResponse

Workspace warnings still at 0. 9 new tests pass.

Pre-existing test failure `executor_prompt_includes_surfaced_
candidates` at execution_loop/mod.rs:1550 is unrelated (fails on
pristine HEAD too — PR fixture divergence).

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
root 2026-04-24 13:41:31 -05:00
parent b5b0c00efe
commit 0b3bd28cf8
5 changed files with 536 additions and 1 deletions

11
Cargo.lock generated
View File

@ -8906,6 +8906,17 @@ dependencies = [
"wasm-bindgen",
]
[[package]]
name = "validator"
version = "0.1.0"
dependencies = [
"serde",
"serde_json",
"thiserror 2.0.18",
"tokio",
"tracing",
]
[[package]]
name = "valuable"
version = "0.1.1"

View File

@ -229,6 +229,26 @@ async fn main() {
}
k
},
gemini_key: {
// Phase 40 provider. GEMINI_API_KEY in env or .env.
let k = v1::gemini::resolve_gemini_key();
if k.is_some() {
tracing::info!("v1: Gemini key loaded — /v1/chat provider=gemini enabled");
} else {
tracing::debug!("v1: no Gemini key — provider=gemini will 503");
}
k
},
claude_key: {
// Phase 40 provider. ANTHROPIC_API_KEY in env or .env.
let k = v1::claude::resolve_claude_key();
if k.is_some() {
tracing::info!("v1: Claude key loaded — /v1/chat provider=claude enabled");
} else {
tracing::debug!("v1: no Claude key — provider=claude will 503");
}
k
},
// Phase 40 early deliverable — Langfuse trace emitter.
// Defaults match mcp-server/tracing.ts conventions so
// gateway traces land in the same staffing project.

View File

@ -0,0 +1,222 @@
//! Claude (Anthropic) adapter.
//!
//! POST `https://api.anthropic.com/v1/messages`. Auth via `x-api-key`
//! header (not bearer) + required `anthropic-version` header. Payload
//! is NOT OpenAI-compatible — response text lives at
//! `content[0].text`. Phase 40 deliverable. System prompts travel in
//! a top-level `system` field, separate from the `messages` array.
use std::time::Duration;
use serde::{Deserialize, Serialize};
use super::{ChatRequest, ChatResponse, Choice, Message, UsageBlock};
const CLAUDE_BASE_URL: &str = "https://api.anthropic.com/v1";
const CLAUDE_API_VERSION: &str = "2023-06-01";
const CLAUDE_TIMEOUT_SECS: u64 = 180;
pub fn resolve_claude_key() -> Option<String> {
if let Ok(k) = std::env::var("ANTHROPIC_API_KEY") {
if !k.trim().is_empty() { return Some(k.trim().to_string()); }
}
for path in ["/home/profit/.env", "/root/.env"] {
if let Ok(raw) = std::fs::read_to_string(path) {
for line in raw.lines() {
if let Some(rest) = line.strip_prefix("ANTHROPIC_API_KEY=") {
let k = rest.trim().trim_matches('"').trim_matches('\'');
if !k.is_empty() { return Some(k.to_string()); }
}
}
}
}
None
}
pub async fn chat(
key: &str,
req: &ChatRequest,
) -> Result<ChatResponse, String> {
// Strip the "claude/" prefix if the caller used the namespaced form.
let model = req.model.strip_prefix("claude/").unwrap_or(&req.model).to_string();
// Anthropic carries system prompts outside the messages array.
// Concatenate any system-role messages into a single system string;
// keep user + assistant messages in `messages`.
let mut system_parts: Vec<String> = Vec::new();
let mut msgs: Vec<AnMessage> = Vec::new();
for m in &req.messages {
if m.role == "system" {
system_parts.push(m.content.clone());
} else {
// Anthropic expects strictly "user" or "assistant"; anything
// else we normalize to "user".
let role = if m.role == "assistant" { "assistant" } else { "user" };
msgs.push(AnMessage { role: role.to_string(), content: m.content.clone() });
}
}
let system = if system_parts.is_empty() {
None
} else {
Some(system_parts.join("\n\n"))
};
let body = AnChatBody {
model: model.clone(),
messages: msgs,
max_tokens: req.max_tokens.unwrap_or(800),
temperature: req.temperature.unwrap_or(0.3),
system,
};
let client = reqwest::Client::builder()
.timeout(Duration::from_secs(CLAUDE_TIMEOUT_SECS))
.build()
.map_err(|e| format!("build client: {e}"))?;
let t0 = std::time::Instant::now();
let resp = client
.post(format!("{}/messages", CLAUDE_BASE_URL))
.header("x-api-key", key)
.header("anthropic-version", CLAUDE_API_VERSION)
.json(&body)
.send()
.await
.map_err(|e| format!("api.anthropic.com unreachable: {e}"))?;
let status = resp.status();
if !status.is_success() {
let body = resp.text().await.unwrap_or_else(|_| "?".into());
return Err(format!("claude {}: {}", status, body));
}
let parsed: AnChatResponse = resp.json().await
.map_err(|e| format!("invalid claude response: {e}"))?;
let latency_ms = t0.elapsed().as_millis();
let text = parsed.content.into_iter()
.find(|b| b.block_type == "text")
.map(|b| b.text)
.unwrap_or_default();
let prompt_tokens = parsed.usage.as_ref().map(|u| u.input_tokens).unwrap_or_else(|| {
let chars: usize = req.messages.iter().map(|m| m.content.chars().count()).sum();
((chars + 3) / 4) as u32
});
let completion_tokens = parsed.usage.as_ref().map(|u| u.output_tokens).unwrap_or_else(|| {
((text.chars().count() + 3) / 4) as u32
});
tracing::info!(
target: "v1.chat",
provider = "claude",
model = %model,
prompt_tokens,
completion_tokens,
latency_ms = latency_ms as u64,
"claude chat completed",
);
Ok(ChatResponse {
id: format!("chatcmpl-{}", chrono::Utc::now().timestamp_nanos_opt().unwrap_or(0)),
object: "chat.completion",
created: chrono::Utc::now().timestamp(),
model,
choices: vec![Choice {
index: 0,
message: Message { role: "assistant".into(), content: text },
finish_reason: parsed.stop_reason.unwrap_or_else(|| "stop".into()),
}],
usage: UsageBlock {
prompt_tokens,
completion_tokens,
total_tokens: prompt_tokens + completion_tokens,
},
})
}
// -- Anthropic Messages API wire shapes --
#[derive(Serialize)]
struct AnChatBody {
model: String,
messages: Vec<AnMessage>,
max_tokens: u32,
temperature: f64,
#[serde(skip_serializing_if = "Option::is_none")]
system: Option<String>,
}
#[derive(Serialize)]
struct AnMessage { role: String, content: String }
#[derive(Deserialize)]
struct AnChatResponse {
content: Vec<AnContentBlock>,
#[serde(default, rename = "stop_reason")]
stop_reason: Option<String>,
#[serde(default)]
usage: Option<AnUsage>,
}
#[derive(Deserialize)]
struct AnContentBlock {
#[serde(rename = "type")]
block_type: String,
#[serde(default)]
text: String,
}
#[derive(Deserialize)]
struct AnUsage { input_tokens: u32, output_tokens: u32 }
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn resolve_claude_key_does_not_panic() {
let _ = resolve_claude_key();
}
#[test]
fn chat_body_serializes_with_separate_system() {
let body = AnChatBody {
model: "claude-3-5-sonnet-latest".into(),
messages: vec![
AnMessage { role: "user".into(), content: "hi".into() },
],
max_tokens: 800,
temperature: 0.3,
system: Some("You are helpful.".into()),
};
let json = serde_json::to_string(&body).unwrap();
assert!(json.contains("\"system\":\"You are helpful.\""));
assert!(json.contains("\"messages\""));
assert!(json.contains("\"max_tokens\":800"));
}
#[test]
fn body_omits_system_when_none() {
let body = AnChatBody {
model: "claude-3-5-sonnet-latest".into(),
messages: vec![AnMessage { role: "user".into(), content: "hi".into() }],
max_tokens: 800,
temperature: 0.3,
system: None,
};
let json = serde_json::to_string(&body).unwrap();
assert!(!json.contains("\"system\""), "system field should be skipped when None: {json}");
}
#[test]
fn model_prefix_strip_preserves_bare_names() {
let cases = [
("claude/claude-3-5-sonnet-latest", "claude-3-5-sonnet-latest"),
("claude-3-5-sonnet-latest", "claude-3-5-sonnet-latest"),
];
for (input, expected) in cases {
let out = input.strip_prefix("claude/").unwrap_or(input);
assert_eq!(out, expected);
}
}
}

View File

@ -0,0 +1,230 @@
//! Gemini adapter — Google's Generative Language API.
//!
//! POST `https://generativelanguage.googleapis.com/v1beta/models/
//! {model}:generateContent?key=<API_KEY>`. Auth via query-string key
//! (not bearer). Payload shape is NOT OpenAI-compatible — we map
//! messages → contents + parts, extract response from `candidates[0]
//! .content.parts[0].text`. Phase 40 deliverable; gate: `/v1/chat`
//! with a prefixed or explicit gemini model returns normally.
use std::time::Duration;
use serde::{Deserialize, Serialize};
use super::{ChatRequest, ChatResponse, Choice, Message, UsageBlock};
const GEMINI_BASE_URL: &str = "https://generativelanguage.googleapis.com/v1beta";
const GEMINI_TIMEOUT_SECS: u64 = 180;
pub fn resolve_gemini_key() -> Option<String> {
if let Ok(k) = std::env::var("GEMINI_API_KEY") {
if !k.trim().is_empty() { return Some(k.trim().to_string()); }
}
for path in ["/home/profit/.env", "/root/.env"] {
if let Ok(raw) = std::fs::read_to_string(path) {
for line in raw.lines() {
if let Some(rest) = line.strip_prefix("GEMINI_API_KEY=") {
let k = rest.trim().trim_matches('"').trim_matches('\'');
if !k.is_empty() { return Some(k.to_string()); }
}
}
}
}
None
}
pub async fn chat(
key: &str,
req: &ChatRequest,
) -> Result<ChatResponse, String> {
// Strip the "gemini/" prefix if the caller used the namespaced form.
let model = req.model.strip_prefix("gemini/").unwrap_or(&req.model).to_string();
// Gemini splits system prompt from conversation differently.
// Simplest working mapping: concatenate any system messages at the
// top of a single user turn, then append user/assistant turns as
// separate contents entries. Covers the common single-turn case
// the scrum pipeline uses.
let mut contents: Vec<GmContent> = Vec::new();
for m in &req.messages {
let role = match m.role.as_str() {
"system" | "user" => "user",
_ => "model",
};
contents.push(GmContent {
role: role.to_string(),
parts: vec![GmPart { text: m.content.clone() }],
});
}
let body = GmChatBody {
contents,
generation_config: GmGenerationConfig {
temperature: req.temperature.unwrap_or(0.3),
max_output_tokens: req.max_tokens.unwrap_or(800),
},
};
let client = reqwest::Client::builder()
.timeout(Duration::from_secs(GEMINI_TIMEOUT_SECS))
.build()
.map_err(|e| format!("build client: {e}"))?;
let url = format!("{}/models/{}:generateContent?key={}", GEMINI_BASE_URL, model, key);
let t0 = std::time::Instant::now();
let resp = client
.post(&url)
.json(&body)
.send()
.await
.map_err(|e| format!("generativelanguage.googleapis.com unreachable: {e}"))?;
let status = resp.status();
if !status.is_success() {
let body = resp.text().await.unwrap_or_else(|_| "?".into());
return Err(format!("gemini {}: {}", status, body));
}
let parsed: GmChatResponse = resp.json().await
.map_err(|e| format!("invalid gemini response: {e}"))?;
let latency_ms = t0.elapsed().as_millis();
let candidate = parsed.candidates.into_iter().next()
.ok_or_else(|| "gemini returned no candidates".to_string())?;
let text = candidate.content.parts.into_iter()
.next()
.map(|p| p.text)
.unwrap_or_default();
let prompt_tokens = parsed.usage_metadata.as_ref()
.map(|u| u.prompt_token_count)
.unwrap_or_else(|| {
let chars: usize = req.messages.iter().map(|m| m.content.chars().count()).sum();
((chars + 3) / 4) as u32
});
let completion_tokens = parsed.usage_metadata.as_ref()
.map(|u| u.candidates_token_count)
.unwrap_or_else(|| ((text.chars().count() + 3) / 4) as u32);
tracing::info!(
target: "v1.chat",
provider = "gemini",
model = %model,
prompt_tokens,
completion_tokens,
latency_ms = latency_ms as u64,
"gemini chat completed",
);
Ok(ChatResponse {
id: format!("chatcmpl-{}", chrono::Utc::now().timestamp_nanos_opt().unwrap_or(0)),
object: "chat.completion",
created: chrono::Utc::now().timestamp(),
model,
choices: vec![Choice {
index: 0,
message: Message { role: "assistant".into(), content: text },
finish_reason: candidate.finish_reason.unwrap_or_else(|| "stop".into()),
}],
usage: UsageBlock {
prompt_tokens,
completion_tokens,
total_tokens: prompt_tokens + completion_tokens,
},
})
}
// -- Gemini wire shapes --
#[derive(Serialize)]
struct GmChatBody {
contents: Vec<GmContent>,
#[serde(rename = "generationConfig")]
generation_config: GmGenerationConfig,
}
#[derive(Serialize)]
struct GmContent {
role: String,
parts: Vec<GmPart>,
}
#[derive(Serialize)]
struct GmPart { text: String }
#[derive(Serialize)]
#[serde(rename_all = "camelCase")]
struct GmGenerationConfig {
temperature: f64,
max_output_tokens: u32,
}
#[derive(Deserialize)]
struct GmChatResponse {
candidates: Vec<GmCandidate>,
#[serde(default, rename = "usageMetadata")]
usage_metadata: Option<GmUsage>,
}
#[derive(Deserialize)]
struct GmCandidate {
content: GmContentResp,
#[serde(default, rename = "finishReason")]
finish_reason: Option<String>,
}
#[derive(Deserialize)]
struct GmContentResp { parts: Vec<GmPartResp> }
#[derive(Deserialize)]
struct GmPartResp { #[serde(default)] text: String }
#[derive(Deserialize)]
#[serde(rename_all = "camelCase")]
struct GmUsage {
prompt_token_count: u32,
candidates_token_count: u32,
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn resolve_gemini_key_does_not_panic() {
let _ = resolve_gemini_key();
}
#[test]
fn chat_body_serializes_to_gemini_shape() {
let body = GmChatBody {
contents: vec![
GmContent {
role: "user".into(),
parts: vec![GmPart { text: "hello".into() }],
},
],
generation_config: GmGenerationConfig {
temperature: 0.3,
max_output_tokens: 800,
},
};
let json = serde_json::to_string(&body).unwrap();
assert!(json.contains("\"contents\""));
assert!(json.contains("\"parts\""));
// camelCase per Gemini API
assert!(json.contains("\"generationConfig\""));
assert!(json.contains("\"maxOutputTokens\":800"));
}
#[test]
fn model_prefix_strip_preserves_bare_names() {
let cases = [
("gemini/gemini-2.0-flash", "gemini-2.0-flash"),
("gemini-2.0-flash", "gemini-2.0-flash"),
];
for (input, expected) in cases {
let out = input.strip_prefix("gemini/").unwrap_or(input);
assert_eq!(out, expected);
}
}
}

View File

@ -14,6 +14,8 @@
pub mod ollama;
pub mod ollama_cloud;
pub mod openrouter;
pub mod gemini;
pub mod claude;
pub mod langfuse_trace;
pub mod respond;
pub mod truth;
@ -42,6 +44,14 @@ pub struct V1State {
/// provider="openrouter" calls 503 rather than attempt. Same key
/// sourcing as LLM Team UI so the two share one API quota.
pub openrouter_key: Option<String>,
/// Gemini API key (Google Generative Language). Loaded at startup
/// via `gemini::resolve_gemini_key()`. None = provider="gemini"
/// calls 503. Phase 40 deliverable.
pub gemini_key: Option<String>,
/// Anthropic Claude API key. Loaded at startup via
/// `claude::resolve_claude_key()`. None = provider="claude" calls
/// 503. Phase 40 deliverable.
pub claude_key: Option<String>,
/// Phase 40 early deliverable — Langfuse client. None = tracing
/// disabled (keys missing or container unreachable). Traces are
/// fire-and-forget: never block the response path.
@ -159,6 +169,12 @@ fn resolve_provider(req: &ChatRequest) -> (String, String) {
if let Some(rest) = req.model.strip_prefix("cloud/") {
return ("ollama_cloud".to_string(), rest.to_string());
}
if let Some(rest) = req.model.strip_prefix("gemini/") {
return ("gemini".to_string(), rest.to_string());
}
if let Some(rest) = req.model.strip_prefix("claude/") {
return ("claude".to_string(), rest.to_string());
}
("ollama".to_string(), req.model.clone())
}
@ -210,6 +226,18 @@ mod resolve_provider_tests {
let r = mk_req(Some("openrouter"), "openrouter/openai/gpt-4o-mini");
assert_eq!(resolve_provider(&r), ("openrouter".into(), "openrouter/openai/gpt-4o-mini".into()));
}
#[test]
fn gemini_prefix_infers_and_strips() {
let r = mk_req(None, "gemini/gemini-2.0-flash");
assert_eq!(resolve_provider(&r), ("gemini".into(), "gemini-2.0-flash".into()));
}
#[test]
fn claude_prefix_infers_and_strips() {
let r = mk_req(None, "claude/claude-3-5-sonnet-latest");
assert_eq!(resolve_provider(&r), ("claude".into(), "claude-3-5-sonnet-latest".into()));
}
}
async fn chat(
@ -273,10 +301,34 @@ async fn chat(
.map_err(|e| (StatusCode::BAD_GATEWAY, format!("openrouter: {e}")))?;
(r, "openrouter".to_string())
}
"gemini" => {
// Phase 40 provider adapter. Google Generative Language
// API via query-string key auth (not bearer).
let key = state.gemini_key.as_deref().ok_or((
StatusCode::SERVICE_UNAVAILABLE,
"GEMINI_API_KEY not configured".to_string(),
))?;
let r = gemini::chat(key, &*req_for_adapter)
.await
.map_err(|e| (StatusCode::BAD_GATEWAY, format!("gemini: {e}")))?;
(r, "gemini".to_string())
}
"claude" | "anthropic" => {
// Phase 40 provider adapter. Anthropic Messages API via
// x-api-key header + anthropic-version:2023-06-01.
let key = state.claude_key.as_deref().ok_or((
StatusCode::SERVICE_UNAVAILABLE,
"ANTHROPIC_API_KEY not configured".to_string(),
))?;
let r = claude::chat(key, &*req_for_adapter)
.await
.map_err(|e| (StatusCode::BAD_GATEWAY, format!("claude: {e}")))?;
(r, "claude".to_string())
}
other => {
return Err((
StatusCode::BAD_REQUEST,
format!("unknown provider '{other}' — supported: ollama, ollama_cloud, openrouter"),
format!("unknown provider '{other}' — supported: ollama, ollama_cloud, openrouter, gemini, claude"),
));
}
};