lakehouse/crates/gateway/src/v1/gemini.rs

//! Gemini adapter — Google's Generative Language API.
//!
//! POST `https://generativelanguage.googleapis.com/v1beta/models/
//! {model}:generateContent?key=<API_KEY>`. Auth via query-string key
//! (not bearer). Payload shape is NOT OpenAI-compatible — we map
//! messages → contents + parts, extract response from `candidates[0]
//! .content.parts[0].text`. Phase 40 deliverable; gate: `/v1/chat`
//! with a prefixed or explicit gemini model returns normally.

use std::time::Duration;
use serde::{Deserialize, Serialize};

use super::{ChatRequest, ChatResponse, Choice, Message, UsageBlock};

const GEMINI_BASE_URL: &str = "https://generativelanguage.googleapis.com/v1beta";
const GEMINI_TIMEOUT_SECS: u64 = 180;

pub fn resolve_gemini_key() -> Option<String> {
    if let Ok(k) = std::env::var("GEMINI_API_KEY") {
        if !k.trim().is_empty() { return Some(k.trim().to_string()); }
    }
    for path in ["/home/profit/.env", "/root/.env"] {
        if let Ok(raw) = std::fs::read_to_string(path) {
            for line in raw.lines() {
                if let Some(rest) = line.strip_prefix("GEMINI_API_KEY=") {
                    let k = rest.trim().trim_matches('"').trim_matches('\'');
                    if !k.is_empty() { return Some(k.to_string()); }
                }
            }
        }
    }
    None
}

pub async fn chat(
    key: &str,
    req: &ChatRequest,
) -> Result<ChatResponse, String> {
    // Strip the "gemini/" prefix if the caller used the namespaced form.
    let model = req.model.strip_prefix("gemini/").unwrap_or(&req.model).to_string();

    // Gemini splits system prompt from conversation differently.
    // Simplest working mapping: concatenate any system messages at the
    // top of a single user turn, then append user/assistant turns as
    // separate contents entries. Covers the common single-turn case
    // the scrum pipeline uses.
    let mut contents: Vec<GmContent> = Vec::new();
    for m in &req.messages {
        let role = match m.role.as_str() {
            "system" | "user" => "user",
            _ => "model",
        };
        contents.push(GmContent {
            role: role.to_string(),
            parts: vec![GmPart { text: m.text() }],
        });
    }

    let body = GmChatBody {
        contents,
        generation_config: GmGenerationConfig {
            temperature: req.temperature.unwrap_or(0.3),
            max_output_tokens: req.max_tokens.unwrap_or(800),
        },
    };

    let client = reqwest::Client::builder()
        .timeout(Duration::from_secs(GEMINI_TIMEOUT_SECS))
        .build()
        .map_err(|e| format!("build client: {e}"))?;

    let url = format!("{}/models/{}:generateContent?key={}", GEMINI_BASE_URL, model, key);
    let t0 = std::time::Instant::now();
    let resp = client
        .post(&url)
        .json(&body)
        .send()
        .await
        .map_err(|e| format!("generativelanguage.googleapis.com unreachable: {e}"))?;

    let status = resp.status();
    if !status.is_success() {
        let body = resp.text().await.unwrap_or_else(|_| "?".into());
        return Err(format!("gemini {}: {}", status, body));
    }

    let parsed: GmChatResponse = resp.json().await
        .map_err(|e| format!("invalid gemini response: {e}"))?;

    let latency_ms = t0.elapsed().as_millis();
    let candidate = parsed.candidates.into_iter().next()
        .ok_or_else(|| "gemini returned no candidates".to_string())?;
    let text = candidate.content.parts.into_iter()
        .next()
        .map(|p| p.text)
        .unwrap_or_default();

    let prompt_tokens = parsed.usage_metadata.as_ref()
        .map(|u| u.prompt_token_count)
        .unwrap_or_else(|| {
            let chars: usize = req.messages.iter().map(|m| m.text().chars().count()).sum();
            ((chars + 3) / 4) as u32
        });
    let completion_tokens = parsed.usage_metadata.as_ref()
        .map(|u| u.candidates_token_count)
        .unwrap_or_else(|| ((text.chars().count() + 3) / 4) as u32);

    tracing::info!(
        target: "v1.chat",
        provider = "gemini",
        model = %model,
        prompt_tokens,
        completion_tokens,
        latency_ms = latency_ms as u64,
        "gemini chat completed",
    );

    Ok(ChatResponse {
        id: format!("chatcmpl-{}", chrono::Utc::now().timestamp_nanos_opt().unwrap_or(0)),
        object: "chat.completion",
        created: chrono::Utc::now().timestamp(),
        model,
        choices: vec![Choice {
            index: 0,
            message: Message::new_text("assistant", text),
            finish_reason: candidate.finish_reason.unwrap_or_else(|| "stop".into()),
        }],
        usage: UsageBlock {
            prompt_tokens,
            completion_tokens,
            total_tokens: prompt_tokens + completion_tokens,
        },
    })
}

// -- Gemini wire shapes --

#[derive(Serialize)]
struct GmChatBody {
    contents: Vec<GmContent>,
    #[serde(rename = "generationConfig")]
    generation_config: GmGenerationConfig,
}

#[derive(Serialize)]
struct GmContent {
    role: String,
    parts: Vec<GmPart>,
}

#[derive(Serialize)]
struct GmPart { text: String }

#[derive(Serialize)]
#[serde(rename_all = "camelCase")]
struct GmGenerationConfig {
    temperature: f64,
    max_output_tokens: u32,
}

#[derive(Deserialize)]
struct GmChatResponse {
    candidates: Vec<GmCandidate>,
    #[serde(default, rename = "usageMetadata")]
    usage_metadata: Option<GmUsage>,
}

#[derive(Deserialize)]
struct GmCandidate {
    content: GmContentResp,
    #[serde(default, rename = "finishReason")]
    finish_reason: Option<String>,
}

#[derive(Deserialize)]
struct GmContentResp { parts: Vec<GmPartResp> }

#[derive(Deserialize)]
struct GmPartResp { #[serde(default)] text: String }

#[derive(Deserialize)]
#[serde(rename_all = "camelCase")]
struct GmUsage {
    prompt_token_count: u32,
    candidates_token_count: u32,
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn resolve_gemini_key_does_not_panic() {
        let _ = resolve_gemini_key();
    }

    #[test]
    fn chat_body_serializes_to_gemini_shape() {
        let body = GmChatBody {
            contents: vec![
                GmContent {
                    role: "user".into(),
                    parts: vec![GmPart { text: "hello".into() }],
                },
            ],
            generation_config: GmGenerationConfig {
                temperature: 0.3,
                max_output_tokens: 800,
            },
        };
        let json = serde_json::to_string(&body).unwrap();
        assert!(json.contains("\"contents\""));
        assert!(json.contains("\"parts\""));
        // camelCase per Gemini API
        assert!(json.contains("\"generationConfig\""));
        assert!(json.contains("\"maxOutputTokens\":800"));
    }

    #[test]
    fn model_prefix_strip_preserves_bare_names() {
        let cases = [
            ("gemini/gemini-2.0-flash", "gemini-2.0-flash"),
            ("gemini-2.0-flash", "gemini-2.0-flash"),
        ];
        for (input, expected) in cases {
            let out = input.strip_prefix("gemini/").unwrap_or(input);
            assert_eq!(out, expected);
        }
    }
}