diff --git a/config/providers.toml b/config/providers.toml index 1a7473c..248d672 100644 --- a/config/providers.toml +++ b/config/providers.toml @@ -45,6 +45,27 @@ default_model = "openai/gpt-oss-120b:free" # Model-prefix routing: "openrouter//" auto-routes here, # prefix stripped before upstream call. +[[provider]] +name = "opencode" +base_url = "https://opencode.ai/zen/v1" +# Unified endpoint — covers BOTH Zen (pay-per-token Anthropic/OpenAI/ +# Gemini frontier) AND Go (flat-sub Kimi/GLM/DeepSeek/Qwen/Minimax). +# Upstream bills per-model: Zen models hit Zen balance, Go models hit +# Go subscription cap. /zen/go/v1 is the Go-only sub-path (rejects +# Zen models), kept for reference but not used by this provider. +auth = "bearer" +auth_env = "OPENCODE_API_KEY" +default_model = "claude-opus-4-7" +# OpenCode (Zen + GO unified endpoint). One sk-* key reaches Claude +# Opus 4.7, GPT-5.5-pro, Gemini 3.1-pro, Kimi K2.6, DeepSeek, GLM, +# Qwen, plus 4 free-tier models. OpenAI-compatible Chat Completions +# at /v1/chat/completions. Model-prefix routing: "opencode/" +# auto-routes here, prefix stripped before upstream call. +# Key file: /etc/lakehouse/opencode.env (loaded via systemd EnvironmentFile). +# Model catalog: curl -H "Authorization: Bearer ..." https://opencode.ai/zen/v1/models +# Note: /zen/go/v1 is the GO-only sub-path (Kimi/GLM/DeepSeek tier); +# /zen/v1 covers everything including Anthropic (which /zen/go/v1 rejects). + [[provider]] name = "kimi" base_url = "https://api.kimi.com/coding/v1" diff --git a/crates/gateway/src/main.rs b/crates/gateway/src/main.rs index 06476ee..9d4072e 100644 --- a/crates/gateway/src/main.rs +++ b/crates/gateway/src/main.rs @@ -283,6 +283,18 @@ async fn main() { } k }, + opencode_key: { + // OpenCode GO multi-vendor gateway — Claude Opus 4.7, + // GPT-5.5-pro, Gemini 3.1-pro, Kimi K2.6, DeepSeek, GLM, + // Qwen + free-tier. Key from /etc/lakehouse/opencode.env. + let k = v1::opencode::resolve_opencode_key(); + if k.is_some() { + tracing::info!("v1: OpenCode key loaded — /v1/chat provider=opencode enabled (40 models)"); + } else { + tracing::debug!("v1: no OpenCode key — provider=opencode will 503"); + } + k + }, // Phase 40 early deliverable — Langfuse trace emitter. // Defaults match mcp-server/tracing.ts conventions so // gateway traces land in the same staffing project. diff --git a/crates/gateway/src/v1/mod.rs b/crates/gateway/src/v1/mod.rs index 39d227a..dc0076e 100644 --- a/crates/gateway/src/v1/mod.rs +++ b/crates/gateway/src/v1/mod.rs @@ -17,6 +17,7 @@ pub mod openrouter; pub mod gemini; pub mod claude; pub mod kimi; +pub mod opencode; pub mod langfuse_trace; pub mod mode; pub mod respond; @@ -60,6 +61,13 @@ pub struct V1State { /// from `KIMI_API_KEY` env or `/etc/lakehouse/kimi.env`. None = /// provider="kimi" calls 503. pub kimi_key: Option, + /// OpenCode GO (opencode.ai) bearer token — multi-vendor curated + /// gateway. One sk-* key reaches Claude Opus 4.7, GPT-5.5-pro, + /// Gemini 3.1-pro, Kimi K2.6, DeepSeek, GLM, Qwen + free-tier. + /// Loaded at startup via `opencode::resolve_opencode_key()` from + /// `OPENCODE_API_KEY` env or `/etc/lakehouse/opencode.env`. None = + /// provider="opencode" calls 503. + pub opencode_key: Option, /// Phase 40 early deliverable — Langfuse client. None = tracing /// disabled (keys missing or container unreachable). Traces are /// fire-and-forget: never block the response path. @@ -234,6 +242,9 @@ fn resolve_provider(req: &ChatRequest) -> (String, String) { if let Some(rest) = req.model.strip_prefix("kimi/") { return ("kimi".to_string(), rest.to_string()); } + if let Some(rest) = req.model.strip_prefix("opencode/") { + return ("opencode".to_string(), rest.to_string()); + } // Bare `vendor/model` shape (e.g. `x-ai/grok-4.1-fast`, // `moonshotai/kimi-k2`, `openai/gpt-oss-120b:free`) → OpenRouter. // This makes the gateway a drop-in OpenAI-compatible middleware: @@ -433,10 +444,23 @@ async fn chat( .map_err(|e| (StatusCode::BAD_GATEWAY, format!("kimi: {e}")))?; (r, "kimi".to_string()) } + "opencode" => { + // OpenCode GO multi-vendor gateway — Claude Opus 4.7, + // GPT-5.5-pro, Gemini 3.1-pro, Kimi K2.6, DeepSeek, GLM, + // Qwen, free-tier. OpenAI-compat at opencode.ai/zen/go/v1. + let key = state.opencode_key.as_deref().ok_or(( + StatusCode::SERVICE_UNAVAILABLE, + "OPENCODE_API_KEY not configured".to_string(), + ))?; + let r = opencode::chat(key, &*req_for_adapter) + .await + .map_err(|e| (StatusCode::BAD_GATEWAY, format!("opencode: {e}")))?; + (r, "opencode".to_string()) + } other => { return Err(( StatusCode::BAD_REQUEST, - format!("unknown provider '{other}' — supported: ollama, ollama_cloud, openrouter, gemini, claude, kimi"), + format!("unknown provider '{other}' — supported: ollama, ollama_cloud, openrouter, gemini, claude, kimi, opencode"), )); } }; diff --git a/crates/gateway/src/v1/opencode.rs b/crates/gateway/src/v1/opencode.rs new file mode 100644 index 0000000..d45abf7 --- /dev/null +++ b/crates/gateway/src/v1/opencode.rs @@ -0,0 +1,228 @@ +//! OpenCode GO adapter — multi-vendor curated gateway via opencode.ai/zen/go. +//! +//! One sk-* key reaches Claude Opus 4.7, GPT-5.5-pro, Gemini 3.1-pro, +//! Kimi K2.6, DeepSeek, GLM, Qwen, plus 4 free-tier models. +//! OpenAI-compatible Chat Completions; auth via Bearer. +//! +//! Why a separate adapter (vs reusing openrouter.rs): +//! - Different account, different key, different base_url +//! - No HTTP-Referer / X-Title headers (those are OpenRouter-specific) +//! - Future-proof for any opencode-only request shaping +//! +//! Key sourcing priority: +//! 1. Env var `OPENCODE_API_KEY` (loaded from /etc/lakehouse/opencode.env +//! via systemd EnvironmentFile=) +//! 2. /etc/lakehouse/opencode.env directly (rescue path if env missing) +//! +//! Resolved once at gateway startup, stored on `V1State.opencode_key`. +//! Model-prefix routing: "opencode/" auto-routes here, prefix +//! stripped before upstream call. + +use std::time::Duration; +use serde::{Deserialize, Serialize}; + +use super::{ChatRequest, ChatResponse, Choice, Message, UsageBlock}; + +// /zen/v1 is the unified OpenCode endpoint that covers BOTH the +// Zen pay-per-token tier (Claude/GPT/Gemini frontier) AND the Go +// subscription tier (Kimi/GLM/DeepSeek/Qwen/Minimax/mimo). When the +// caller has both, opencode bills per-model: Zen models charge Zen +// balance, Go models charge against the Go subscription cap. +// +// /zen/go/v1 exists as a Go-only sub-path (rejects Zen models with +// "Model not supported"); we use the unified /zen/v1 since the same +// key works for both with correct billing routing upstream. +const OPENCODE_BASE_URL: &str = "https://opencode.ai/zen/v1"; +// 600s default — opencode upstream models include reasoning-heavy +// variants (Claude Opus, Kimi K2.6, GLM-5.1) that legitimately take +// 3-5 min on big audit prompts. Override via OPENCODE_TIMEOUT_SECS. +const OPENCODE_TIMEOUT_SECS_DEFAULT: u64 = 600; + +fn opencode_timeout_secs() -> u64 { + std::env::var("OPENCODE_TIMEOUT_SECS") + .ok() + .and_then(|s| s.trim().parse::().ok()) + .filter(|&n| n > 0) + .unwrap_or(OPENCODE_TIMEOUT_SECS_DEFAULT) +} + +pub fn resolve_opencode_key() -> Option { + if let Ok(k) = std::env::var("OPENCODE_API_KEY") { + if !k.trim().is_empty() { return Some(k.trim().to_string()); } + } + if let Ok(raw) = std::fs::read_to_string("/etc/lakehouse/opencode.env") { + for line in raw.lines() { + if let Some(rest) = line.strip_prefix("OPENCODE_API_KEY=") { + let k = rest.trim().trim_matches('"').trim_matches('\''); + if !k.is_empty() { return Some(k.to_string()); } + } + } + } + None +} + +pub async fn chat( + key: &str, + req: &ChatRequest, +) -> Result { + // Strip the "opencode/" namespace prefix so the upstream sees the + // bare model id (e.g. "claude-opus-4-7", "kimi-k2.6"). + let model = req.model.strip_prefix("opencode/").unwrap_or(&req.model).to_string(); + + // Anthropic models on opencode reject `temperature` with a 400 + // "temperature is deprecated for this model" error. Strip the + // field for claude-* and the new gpt-5.x reasoning lineages + // (Anthropic/OpenAI's reasoning models all moved away from temp). + // Other models keep the caller's value or default to 0.3. + let drop_temp = model.starts_with("claude-") + || model.starts_with("gpt-5") + || model.starts_with("o1") + || model.starts_with("o3") + || model.starts_with("o4"); + let body = OCChatBody { + model: model.clone(), + messages: req.messages.iter().map(|m| OCMessage { + role: m.role.clone(), + content: m.content.clone(), + }).collect(), + // filter(|&n| n > 0) catches Some(0) — same trap that bit the + // Kimi adapter when callers passed empty-env-parsed-to-0. + max_tokens: req.max_tokens.filter(|&n| n > 0).unwrap_or(800), + temperature: if drop_temp { None } else { Some(req.temperature.unwrap_or(0.3)) }, + stream: false, + }; + + let client = reqwest::Client::builder() + .timeout(Duration::from_secs(opencode_timeout_secs())) + .build() + .map_err(|e| format!("build client: {e}"))?; + + let t0 = std::time::Instant::now(); + let resp = client + .post(format!("{}/chat/completions", OPENCODE_BASE_URL)) + .bearer_auth(key) + .json(&body) + .send() + .await + .map_err(|e| format!("opencode.ai unreachable: {e}"))?; + + let status = resp.status(); + if !status.is_success() { + let body = resp.text().await.unwrap_or_else(|_| "?".into()); + return Err(format!("opencode.ai {}: {}", status, body)); + } + + let parsed: OCChatResponse = resp.json().await + .map_err(|e| format!("invalid opencode response: {e}"))?; + + let latency_ms = t0.elapsed().as_millis(); + let choice = parsed.choices.into_iter().next() + .ok_or_else(|| "opencode returned no choices".to_string())?; + let text = choice.message.content; + + let prompt_tokens = parsed.usage.as_ref().map(|u| u.prompt_tokens).unwrap_or_else(|| { + let chars: usize = req.messages.iter().map(|m| m.text().chars().count()).sum(); + ((chars + 3) / 4) as u32 + }); + let completion_tokens = parsed.usage.as_ref().map(|u| u.completion_tokens).unwrap_or_else(|| { + ((text.chars().count() + 3) / 4) as u32 + }); + + tracing::info!( + target: "v1.chat", + provider = "opencode", + model = %model, + prompt_tokens, + completion_tokens, + latency_ms = latency_ms as u64, + "opencode chat completed", + ); + + Ok(ChatResponse { + id: format!("chatcmpl-{}", chrono::Utc::now().timestamp_nanos_opt().unwrap_or(0)), + object: "chat.completion", + created: chrono::Utc::now().timestamp(), + model, + choices: vec![Choice { + index: 0, + message: Message { role: "assistant".into(), content: serde_json::Value::String(text) }, + finish_reason: choice.finish_reason.unwrap_or_else(|| "stop".into()), + }], + usage: UsageBlock { + prompt_tokens, + completion_tokens, + total_tokens: prompt_tokens + completion_tokens, + }, + }) +} + +// -- OpenCode wire shapes (OpenAI-compatible) -- + +#[derive(Serialize)] +struct OCChatBody { + model: String, + messages: Vec, + max_tokens: u32, + #[serde(skip_serializing_if = "Option::is_none")] + temperature: Option, + stream: bool, +} + +#[derive(Serialize)] +struct OCMessage { role: String, content: serde_json::Value } + +#[derive(Deserialize)] +struct OCChatResponse { + choices: Vec, + #[serde(default)] + usage: Option, +} + +#[derive(Deserialize)] +struct OCChoice { + message: OCMessageResp, + #[serde(default)] + finish_reason: Option, +} + +#[derive(Deserialize)] +struct OCMessageResp { content: String } + +#[derive(Deserialize)] +struct OCUsage { prompt_tokens: u32, completion_tokens: u32 } + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn resolve_opencode_key_does_not_panic() { + let _ = resolve_opencode_key(); + } + + #[test] + fn model_prefix_strip() { + let cases = [ + ("opencode/claude-opus-4-7", "claude-opus-4-7"), + ("opencode/kimi-k2.6", "kimi-k2.6"), + ("claude-opus-4-7", "claude-opus-4-7"), + ]; + for (input, expected) in cases { + let out = input.strip_prefix("opencode/").unwrap_or(input); + assert_eq!(out, expected); + } + } + + #[test] + fn max_tokens_filters_zero() { + // The trap: empty env -> Number("") -> 0 -> Some(0). Adapter + // must not pass 0 upstream; should fall to 800. + let some_zero: Option = Some(0); + let result = some_zero.filter(|&n| n > 0).unwrap_or(800); + assert_eq!(result, 800); + let some_real: Option = Some(4096); + assert_eq!(some_real.filter(|&n| n > 0).unwrap_or(800), 4096); + let none_val: Option = None; + assert_eq!(none_val.filter(|&n| n > 0).unwrap_or(800), 800); + } +}