root 540a9a27ee
Some checks failed
lakehouse/auditor 1 blocking issue: todo!() macro call in tests/real-world/scrum_master_pipeline.ts
v1: accept OpenAI multimodal content shape (array-of-parts)
Modern OpenAI clients (pi-ai, openai SDK 6.x, langchain-js, the official
agents) send `messages[].content` as an array of content parts:
`[{type:"text", text:"..."}, {type:"image_url", ...}]`. Our gateway
typed `content` as plain `String` and 422'd those calls.

Fix: `Message.content` is now `serde_json::Value` so requests
deserialize regardless of shape. `Message::text()` flattens
content-parts arrays (concat'd `text` fields, non-text parts skipped)
for places that need a plain string — Ollama prompt assembly, char
counts, the assistant's own response synthesis. `Message::new_text()`
constructs string-content messages without writing the wrapper at
each call site. Forwarders (openrouter) clone content through
verbatim so providers see exactly what the client sent.

Verified end-to-end: Pi CLI (`pi --print --provider openrouter`)
landed a clean 1902-token request through `/v1/chat/completions`,
routed to OpenRouter as `openai/gpt-oss-120b:free`, response in
1.62s, Langfuse trace `v1.chat:openrouter` recorded with provider
tag. Same path that any tool using the official openai SDK takes.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-26 17:56:46 -05:00

231 lines
7.0 KiB
Rust

//! Gemini adapter — Google's Generative Language API.
//!
//! POST `https://generativelanguage.googleapis.com/v1beta/models/
//! {model}:generateContent?key=<API_KEY>`. Auth via query-string key
//! (not bearer). Payload shape is NOT OpenAI-compatible — we map
//! messages → contents + parts, extract response from `candidates[0]
//! .content.parts[0].text`. Phase 40 deliverable; gate: `/v1/chat`
//! with a prefixed or explicit gemini model returns normally.
use std::time::Duration;
use serde::{Deserialize, Serialize};
use super::{ChatRequest, ChatResponse, Choice, Message, UsageBlock};
const GEMINI_BASE_URL: &str = "https://generativelanguage.googleapis.com/v1beta";
const GEMINI_TIMEOUT_SECS: u64 = 180;
pub fn resolve_gemini_key() -> Option<String> {
if let Ok(k) = std::env::var("GEMINI_API_KEY") {
if !k.trim().is_empty() { return Some(k.trim().to_string()); }
}
for path in ["/home/profit/.env", "/root/.env"] {
if let Ok(raw) = std::fs::read_to_string(path) {
for line in raw.lines() {
if let Some(rest) = line.strip_prefix("GEMINI_API_KEY=") {
let k = rest.trim().trim_matches('"').trim_matches('\'');
if !k.is_empty() { return Some(k.to_string()); }
}
}
}
}
None
}
pub async fn chat(
key: &str,
req: &ChatRequest,
) -> Result<ChatResponse, String> {
// Strip the "gemini/" prefix if the caller used the namespaced form.
let model = req.model.strip_prefix("gemini/").unwrap_or(&req.model).to_string();
// Gemini splits system prompt from conversation differently.
// Simplest working mapping: concatenate any system messages at the
// top of a single user turn, then append user/assistant turns as
// separate contents entries. Covers the common single-turn case
// the scrum pipeline uses.
let mut contents: Vec<GmContent> = Vec::new();
for m in &req.messages {
let role = match m.role.as_str() {
"system" | "user" => "user",
_ => "model",
};
contents.push(GmContent {
role: role.to_string(),
parts: vec![GmPart { text: m.text() }],
});
}
let body = GmChatBody {
contents,
generation_config: GmGenerationConfig {
temperature: req.temperature.unwrap_or(0.3),
max_output_tokens: req.max_tokens.unwrap_or(800),
},
};
let client = reqwest::Client::builder()
.timeout(Duration::from_secs(GEMINI_TIMEOUT_SECS))
.build()
.map_err(|e| format!("build client: {e}"))?;
let url = format!("{}/models/{}:generateContent?key={}", GEMINI_BASE_URL, model, key);
let t0 = std::time::Instant::now();
let resp = client
.post(&url)
.json(&body)
.send()
.await
.map_err(|e| format!("generativelanguage.googleapis.com unreachable: {e}"))?;
let status = resp.status();
if !status.is_success() {
let body = resp.text().await.unwrap_or_else(|_| "?".into());
return Err(format!("gemini {}: {}", status, body));
}
let parsed: GmChatResponse = resp.json().await
.map_err(|e| format!("invalid gemini response: {e}"))?;
let latency_ms = t0.elapsed().as_millis();
let candidate = parsed.candidates.into_iter().next()
.ok_or_else(|| "gemini returned no candidates".to_string())?;
let text = candidate.content.parts.into_iter()
.next()
.map(|p| p.text)
.unwrap_or_default();
let prompt_tokens = parsed.usage_metadata.as_ref()
.map(|u| u.prompt_token_count)
.unwrap_or_else(|| {
let chars: usize = req.messages.iter().map(|m| m.text().chars().count()).sum();
((chars + 3) / 4) as u32
});
let completion_tokens = parsed.usage_metadata.as_ref()
.map(|u| u.candidates_token_count)
.unwrap_or_else(|| ((text.chars().count() + 3) / 4) as u32);
tracing::info!(
target: "v1.chat",
provider = "gemini",
model = %model,
prompt_tokens,
completion_tokens,
latency_ms = latency_ms as u64,
"gemini chat completed",
);
Ok(ChatResponse {
id: format!("chatcmpl-{}", chrono::Utc::now().timestamp_nanos_opt().unwrap_or(0)),
object: "chat.completion",
created: chrono::Utc::now().timestamp(),
model,
choices: vec![Choice {
index: 0,
message: Message::new_text("assistant", text),
finish_reason: candidate.finish_reason.unwrap_or_else(|| "stop".into()),
}],
usage: UsageBlock {
prompt_tokens,
completion_tokens,
total_tokens: prompt_tokens + completion_tokens,
},
})
}
// -- Gemini wire shapes --
#[derive(Serialize)]
struct GmChatBody {
contents: Vec<GmContent>,
#[serde(rename = "generationConfig")]
generation_config: GmGenerationConfig,
}
#[derive(Serialize)]
struct GmContent {
role: String,
parts: Vec<GmPart>,
}
#[derive(Serialize)]
struct GmPart { text: String }
#[derive(Serialize)]
#[serde(rename_all = "camelCase")]
struct GmGenerationConfig {
temperature: f64,
max_output_tokens: u32,
}
#[derive(Deserialize)]
struct GmChatResponse {
candidates: Vec<GmCandidate>,
#[serde(default, rename = "usageMetadata")]
usage_metadata: Option<GmUsage>,
}
#[derive(Deserialize)]
struct GmCandidate {
content: GmContentResp,
#[serde(default, rename = "finishReason")]
finish_reason: Option<String>,
}
#[derive(Deserialize)]
struct GmContentResp { parts: Vec<GmPartResp> }
#[derive(Deserialize)]
struct GmPartResp { #[serde(default)] text: String }
#[derive(Deserialize)]
#[serde(rename_all = "camelCase")]
struct GmUsage {
prompt_token_count: u32,
candidates_token_count: u32,
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn resolve_gemini_key_does_not_panic() {
let _ = resolve_gemini_key();
}
#[test]
fn chat_body_serializes_to_gemini_shape() {
let body = GmChatBody {
contents: vec![
GmContent {
role: "user".into(),
parts: vec![GmPart { text: "hello".into() }],
},
],
generation_config: GmGenerationConfig {
temperature: 0.3,
max_output_tokens: 800,
},
};
let json = serde_json::to_string(&body).unwrap();
assert!(json.contains("\"contents\""));
assert!(json.contains("\"parts\""));
// camelCase per Gemini API
assert!(json.contains("\"generationConfig\""));
assert!(json.contains("\"maxOutputTokens\":800"));
}
#[test]
fn model_prefix_strip_preserves_bare_names() {
let cases = [
("gemini/gemini-2.0-flash", "gemini-2.0-flash"),
("gemini-2.0-flash", "gemini-2.0-flash"),
];
for (input, expected) in cases {
let out = input.strip_prefix("gemini/").unwrap_or(input);
assert_eq!(out, expected);
}
}
}