lakehouse/crates/gateway/src/v1/openrouter.rs

//! OpenRouter adapter — free-tier rescue rung for /v1/chat.
//!
//! Direct HTTPS call to `https://openrouter.ai/api/v1/chat/completions`
//! with Bearer auth. Mirrors the OpenAI-compatible shape so the model
//! list can be expanded without code changes. Added 2026-04-24 after
//! iter 5 hit repeated Ollama Cloud 502s on kimi-k2:1t — OpenRouter
//! free-tier models give us a different provider backbone as fallback.
//!
//! Key sourcing priority:
//!   1. Env var `OPENROUTER_API_KEY`
//!   2. `/home/profit/.env`        (LLM Team convention)
//!   3. `/root/llm_team_config.json` → providers.openrouter.api_key
//!
//! First hit wins. Key is resolved once at gateway startup and stored
//! on `V1State.openrouter_key`.

use std::time::Duration;
use serde::{Deserialize, Serialize};

use super::{ChatRequest, ChatResponse, Choice, Message, UsageBlock};

const OR_BASE_URL: &str = "https://openrouter.ai/api/v1";
const OR_TIMEOUT_SECS: u64 = 180;

pub fn resolve_openrouter_key() -> Option<String> {
    if let Ok(k) = std::env::var("OPENROUTER_API_KEY") {
        if !k.trim().is_empty() { return Some(k.trim().to_string()); }
    }
    // LLM Team UI writes its key to ~/.env on the host user — pick it up
    // from the same source so the free-tier rescue path works without
    // an explicit systemd Environment= line.
    for path in ["/home/profit/.env", "/root/.env"] {
        if let Ok(raw) = std::fs::read_to_string(path) {
            for line in raw.lines() {
                if let Some(rest) = line.strip_prefix("OPENROUTER_API_KEY=") {
                    let k = rest.trim().trim_matches('"').trim_matches('\'');
                    if !k.is_empty() { return Some(k.to_string()); }
                }
            }
        }
    }
    if let Ok(raw) = std::fs::read_to_string("/root/llm_team_config.json") {
        if let Ok(v) = serde_json::from_str::<serde_json::Value>(&raw) {
            if let Some(k) = v.pointer("/providers/openrouter/api_key").and_then(|x| x.as_str()) {
                if !k.trim().is_empty() { return Some(k.trim().to_string()); }
            }
        }
    }
    None
}

pub async fn chat(
    key: &str,
    req: &ChatRequest,
) -> Result<ChatResponse, String> {
    // Strip the "openrouter/" prefix if the caller used the namespaced
    // form so OpenRouter sees the raw model id (e.g. "openai/gpt-oss-120b:free").
    let model = req.model.strip_prefix("openrouter/").unwrap_or(&req.model).to_string();

    let body = ORChatBody {
        model: model.clone(),
        messages: req.messages.iter().map(|m| ORMessage {
            role: m.role.clone(),
            content: m.content.clone(),
        }).collect(),
        max_tokens: req.max_tokens.unwrap_or(800),
        temperature: req.temperature.unwrap_or(0.3),
        stream: false,
    };

    let client = reqwest::Client::builder()
        .timeout(Duration::from_secs(OR_TIMEOUT_SECS))
        .build()
        .map_err(|e| format!("build client: {e}"))?;

    let t0 = std::time::Instant::now();
    let resp = client
        .post(format!("{}/chat/completions", OR_BASE_URL))
        .bearer_auth(key)
        // OpenRouter recommends Referer + Title for attribution; absent
        // headers do not fail the call but help us see our traffic in
        // their dashboard.
        .header("HTTP-Referer", "https://vcp.devop.live")
        .header("X-Title", "Lakehouse Scrum")
        .json(&body)
        .send()
        .await
        .map_err(|e| format!("openrouter.ai unreachable: {e}"))?;

    let status = resp.status();
    if !status.is_success() {
        let body = resp.text().await.unwrap_or_else(|_| "?".into());
        return Err(format!("openrouter.ai {}: {}", status, body));
    }

    let parsed: ORChatResponse = resp.json().await
        .map_err(|e| format!("invalid openrouter response: {e}"))?;

    let latency_ms = t0.elapsed().as_millis();
    let choice = parsed.choices.into_iter().next()
        .ok_or_else(|| "openrouter returned no choices".to_string())?;
    let text = choice.message.content;

    let prompt_tokens = parsed.usage.as_ref().map(|u| u.prompt_tokens).unwrap_or_else(|| {
        let chars: usize = req.messages.iter().map(|m| m.content.chars().count()).sum();
        ((chars + 3) / 4) as u32
    });
    let completion_tokens = parsed.usage.as_ref().map(|u| u.completion_tokens).unwrap_or_else(|| {
        ((text.chars().count() + 3) / 4) as u32
    });

    tracing::info!(
        target: "v1.chat",
        provider = "openrouter",
        model = %model,
        prompt_tokens,
        completion_tokens,
        latency_ms = latency_ms as u64,
        "openrouter chat completed",
    );

    Ok(ChatResponse {
        id: format!("chatcmpl-{}", chrono::Utc::now().timestamp_nanos_opt().unwrap_or(0)),
        object: "chat.completion",
        created: chrono::Utc::now().timestamp(),
        model,
        choices: vec![Choice {
            index: 0,
            message: Message { role: "assistant".into(), content: text },
            finish_reason: choice.finish_reason.unwrap_or_else(|| "stop".into()),
        }],
        usage: UsageBlock {
            prompt_tokens,
            completion_tokens,
            total_tokens: prompt_tokens + completion_tokens,
        },
    })
}

// -- OpenRouter wire shapes (OpenAI-compatible) --

#[derive(Serialize)]
struct ORChatBody {
    model: String,
    messages: Vec<ORMessage>,
    max_tokens: u32,
    temperature: f64,
    stream: bool,
}

#[derive(Serialize)]
struct ORMessage { role: String, content: String }

#[derive(Deserialize)]
struct ORChatResponse {
    choices: Vec<ORChoice>,
    #[serde(default)]
    usage: Option<ORUsage>,
}

#[derive(Deserialize)]
struct ORChoice {
    message: ORMessageResp,
    #[serde(default)]
    finish_reason: Option<String>,
}

#[derive(Deserialize)]
struct ORMessageResp { content: String }

#[derive(Deserialize)]
struct ORUsage { prompt_tokens: u32, completion_tokens: u32 }

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn resolve_openrouter_key_does_not_panic() {
        // Smoke test — all three sources may or may not be set depending
        // on environment; just confirm the call returns cleanly.
        let _ = resolve_openrouter_key();
    }

    #[test]
    fn chat_body_serializes_to_openai_shape() {
        let body = ORChatBody {
            model: "openai/gpt-oss-120b:free".into(),
            messages: vec![
                ORMessage { role: "user".into(), content: "review this".into() },
            ],
            max_tokens: 800,
            temperature: 0.3,
            stream: false,
        };
        let json = serde_json::to_string(&body).unwrap();
        assert!(json.contains("\"model\":\"openai/gpt-oss-120b:free\""));
        assert!(json.contains("\"messages\""));
        assert!(json.contains("\"max_tokens\":800"));
        assert!(json.contains("\"stream\":false"));
    }

    #[test]
    fn model_prefix_strip_preserves_unprefixed() {
        // If caller passes "openrouter/openai/gpt-oss-120b:free" we strip.
        // If caller passes "openai/gpt-oss-120b:free" unchanged, we keep.
        let cases = [
            ("openrouter/openai/gpt-oss-120b:free", "openai/gpt-oss-120b:free"),
            ("openai/gpt-oss-120b:free", "openai/gpt-oss-120b:free"),
            ("google/gemma-3-27b-it:free", "google/gemma-3-27b-it:free"),
        ];
        for (input, expected) in cases {
            let out = input.strip_prefix("openrouter/").unwrap_or(input);
            assert_eq!(out, expected, "{input} should become {expected}");
        }
    }
}