gateway: OpenCode (Zen + Go) provider adapter

Wires opencode.ai as a /v1/chat provider. One sk-* key reaches 40 models across Anthropic, OpenAI, Google, Moonshot, DeepSeek, Zhipu, Alibaba, Minimax — billed against either the user's Zen balance (pay-per-token premium models) or Go subscription (flat-rate Kimi/GLM/DeepSeek/etc.). The unified /zen/v1 endpoint routes both; upstream picks the billing tier based on model id. Notable adapter quirks: - Strip "opencode/" prefix on outbound (mirrors openrouter/kimi pattern). Caller can use {provider:"opencode", model:"X"} or {model:"opencode/X"}. - Drop temperature for claude-*, gpt-5*, o1/o3/o4 models. Anthropic and OpenAI's reasoning lineage rejects temperature with 400 "deprecated for this model". OCChatBody now serializes temperature as Option<f64> with skip_serializing_if so omitting it produces clean JSON. - max_tokens.filter(|&n| n > 0) catches Some(0) — defensive after the same trap bit kimi.rs (empty env -> Number("") -> 0 -> 503). - 600s default upstream timeout; reasoning models on big audit prompts legitimately take 3-5 min. Override OPENCODE_TIMEOUT_SECS. Key handling: - /etc/lakehouse/opencode.env (0600 root) loaded via systemd EnvironmentFile. Same pattern as kimi.env. - OPENCODE_API_KEY env first, file scrape as fallback. Verified end-to-end: opencode/claude-opus-4-7 -> "I'm Claude, made by Anthropic." opencode/kimi-k2.6 -> PONG-K26-GO opencode/deepseek-v4-pro -> PONG-DS-V4 opencode/glm-5.1 -> PONG-GLM opencode/minimax-m2.5-free -> PONG-FREE Pricing reference (per audit @ ~14k in / 6k out): claude-opus-4-7 ~$0.22 (Zen) claude-haiku-4-5 ~$0.04 (Zen) gpt-5.5-pro ~$1.50 (Zen) gemini-3-flash ~$0.03 (Zen) kimi-k2.6 / glm / deepseek / qwen / minimax / mimo: covered by Go subscription ($10/mo, $60/mo cap). Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-27 06:40:55 -05:00 · 2026-04-27 06:40:55 -05:00 · bc698eb6da
commit bc698eb6da
parent ff5de76241
4 changed files with 286 additions and 1 deletions
--- a/config/providers.toml
+++ b/config/providers.toml
@ -45,6 +45,27 @@ default_model = "openai/gpt-oss-120b:free"
 # Model-prefix routing: "openrouter/<vendor>/<model>" auto-routes here,
 # prefix stripped before upstream call.

+[[provider]]
+name = "opencode"
+base_url = "https://opencode.ai/zen/v1"
+# Unified endpoint — covers BOTH Zen (pay-per-token Anthropic/OpenAI/
+# Gemini frontier) AND Go (flat-sub Kimi/GLM/DeepSeek/Qwen/Minimax).
+# Upstream bills per-model: Zen models hit Zen balance, Go models hit
+# Go subscription cap. /zen/go/v1 is the Go-only sub-path (rejects
+# Zen models), kept for reference but not used by this provider.
+auth = "bearer"
+auth_env = "OPENCODE_API_KEY"
+default_model = "claude-opus-4-7"
+# OpenCode (Zen + GO unified endpoint). One sk-* key reaches Claude
+# Opus 4.7, GPT-5.5-pro, Gemini 3.1-pro, Kimi K2.6, DeepSeek, GLM,
+# Qwen, plus 4 free-tier models. OpenAI-compatible Chat Completions
+# at /v1/chat/completions. Model-prefix routing: "opencode/<name>"
+# auto-routes here, prefix stripped before upstream call.
+# Key file: /etc/lakehouse/opencode.env (loaded via systemd EnvironmentFile).
+# Model catalog: curl -H "Authorization: Bearer ..." https://opencode.ai/zen/v1/models
+# Note: /zen/go/v1 is the GO-only sub-path (Kimi/GLM/DeepSeek tier);
+# /zen/v1 covers everything including Anthropic (which /zen/go/v1 rejects).
+
 [[provider]]
 name = "kimi"
 base_url = "https://api.kimi.com/coding/v1"
--- a/crates/gateway/src/main.rs
+++ b/crates/gateway/src/main.rs
@ -283,6 +283,18 @@ async fn main() {
                }
                k
            },
+            opencode_key: {
+                // OpenCode GO multi-vendor gateway — Claude Opus 4.7,
+                // GPT-5.5-pro, Gemini 3.1-pro, Kimi K2.6, DeepSeek, GLM,
+                // Qwen + free-tier. Key from /etc/lakehouse/opencode.env.
+                let k = v1::opencode::resolve_opencode_key();
+                if k.is_some() {
+                    tracing::info!("v1: OpenCode key loaded — /v1/chat provider=opencode enabled (40 models)");
+                } else {
+                    tracing::debug!("v1: no OpenCode key — provider=opencode will 503");
+                }
+                k
+            },
            // Phase 40 early deliverable — Langfuse trace emitter.
            // Defaults match mcp-server/tracing.ts conventions so
            // gateway traces land in the same staffing project.
--- a/crates/gateway/src/v1/mod.rs
+++ b/crates/gateway/src/v1/mod.rs
@ -17,6 +17,7 @@ pub mod openrouter;
 pub mod gemini;
 pub mod claude;
 pub mod kimi;
+pub mod opencode;
 pub mod langfuse_trace;
 pub mod mode;
 pub mod respond;
@ -60,6 +61,13 @@ pub struct V1State {
    /// from `KIMI_API_KEY` env or `/etc/lakehouse/kimi.env`. None =
    /// provider="kimi" calls 503.
    pub kimi_key: Option<String>,
+    /// OpenCode GO (opencode.ai) bearer token — multi-vendor curated
+    /// gateway. One sk-* key reaches Claude Opus 4.7, GPT-5.5-pro,
+    /// Gemini 3.1-pro, Kimi K2.6, DeepSeek, GLM, Qwen + free-tier.
+    /// Loaded at startup via `opencode::resolve_opencode_key()` from
+    /// `OPENCODE_API_KEY` env or `/etc/lakehouse/opencode.env`. None =
+    /// provider="opencode" calls 503.
+    pub opencode_key: Option<String>,
    /// Phase 40 early deliverable — Langfuse client. None = tracing
    /// disabled (keys missing or container unreachable). Traces are
    /// fire-and-forget: never block the response path.
@ -234,6 +242,9 @@ fn resolve_provider(req: &ChatRequest) -> (String, String) {
    if let Some(rest) = req.model.strip_prefix("kimi/") {
        return ("kimi".to_string(), rest.to_string());
    }
+    if let Some(rest) = req.model.strip_prefix("opencode/") {
+        return ("opencode".to_string(), rest.to_string());
+    }
    // Bare `vendor/model` shape (e.g. `x-ai/grok-4.1-fast`,
    // `moonshotai/kimi-k2`, `openai/gpt-oss-120b:free`) → OpenRouter.
    // This makes the gateway a drop-in OpenAI-compatible middleware:
@ -433,10 +444,23 @@ async fn chat(
                .map_err(|e| (StatusCode::BAD_GATEWAY, format!("kimi: {e}")))?;
            (r, "kimi".to_string())
        }
+        "opencode" => {
+            // OpenCode GO multi-vendor gateway — Claude Opus 4.7,
+            // GPT-5.5-pro, Gemini 3.1-pro, Kimi K2.6, DeepSeek, GLM,
+            // Qwen, free-tier. OpenAI-compat at opencode.ai/zen/go/v1.
+            let key = state.opencode_key.as_deref().ok_or((
+                StatusCode::SERVICE_UNAVAILABLE,
+                "OPENCODE_API_KEY not configured".to_string(),
+            ))?;
+            let r = opencode::chat(key, &*req_for_adapter)
+                .await
+                .map_err(|e| (StatusCode::BAD_GATEWAY, format!("opencode: {e}")))?;
+            (r, "opencode".to_string())
+        }
        other => {
            return Err((
                StatusCode::BAD_REQUEST,
-                format!("unknown provider '{other}' — supported: ollama, ollama_cloud, openrouter, gemini, claude, kimi"),
+                format!("unknown provider '{other}' — supported: ollama, ollama_cloud, openrouter, gemini, claude, kimi, opencode"),
            ));
        }
    };
--- a/crates/gateway/src/v1/opencode.rs
+++ b/crates/gateway/src/v1/opencode.rs
@ -0,0 +1,228 @@
+//! OpenCode GO adapter — multi-vendor curated gateway via opencode.ai/zen/go.
+//!
+//! One sk-* key reaches Claude Opus 4.7, GPT-5.5-pro, Gemini 3.1-pro,
+//! Kimi K2.6, DeepSeek, GLM, Qwen, plus 4 free-tier models.
+//! OpenAI-compatible Chat Completions; auth via Bearer.
+//!
+//! Why a separate adapter (vs reusing openrouter.rs):
+//! - Different account, different key, different base_url
+//! - No HTTP-Referer / X-Title headers (those are OpenRouter-specific)
+//! - Future-proof for any opencode-only request shaping
+//!
+//! Key sourcing priority:
+//!   1. Env var `OPENCODE_API_KEY` (loaded from /etc/lakehouse/opencode.env
+//!      via systemd EnvironmentFile=)
+//!   2. /etc/lakehouse/opencode.env directly (rescue path if env missing)
+//!
+//! Resolved once at gateway startup, stored on `V1State.opencode_key`.
+//! Model-prefix routing: "opencode/<model>" auto-routes here, prefix
+//! stripped before upstream call.
+
+use std::time::Duration;
+use serde::{Deserialize, Serialize};
+
+use super::{ChatRequest, ChatResponse, Choice, Message, UsageBlock};
+
+// /zen/v1 is the unified OpenCode endpoint that covers BOTH the
+// Zen pay-per-token tier (Claude/GPT/Gemini frontier) AND the Go
+// subscription tier (Kimi/GLM/DeepSeek/Qwen/Minimax/mimo). When the
+// caller has both, opencode bills per-model: Zen models charge Zen
+// balance, Go models charge against the Go subscription cap.
+//
+// /zen/go/v1 exists as a Go-only sub-path (rejects Zen models with
+// "Model not supported"); we use the unified /zen/v1 since the same
+// key works for both with correct billing routing upstream.
+const OPENCODE_BASE_URL: &str = "https://opencode.ai/zen/v1";
+// 600s default — opencode upstream models include reasoning-heavy
+// variants (Claude Opus, Kimi K2.6, GLM-5.1) that legitimately take
+// 3-5 min on big audit prompts. Override via OPENCODE_TIMEOUT_SECS.
+const OPENCODE_TIMEOUT_SECS_DEFAULT: u64 = 600;
+
+fn opencode_timeout_secs() -> u64 {
+    std::env::var("OPENCODE_TIMEOUT_SECS")
+        .ok()
+        .and_then(|s| s.trim().parse::<u64>().ok())
+        .filter(|&n| n > 0)
+        .unwrap_or(OPENCODE_TIMEOUT_SECS_DEFAULT)
+}
+
+pub fn resolve_opencode_key() -> Option<String> {
+    if let Ok(k) = std::env::var("OPENCODE_API_KEY") {
+        if !k.trim().is_empty() { return Some(k.trim().to_string()); }
+    }
+    if let Ok(raw) = std::fs::read_to_string("/etc/lakehouse/opencode.env") {
+        for line in raw.lines() {
+            if let Some(rest) = line.strip_prefix("OPENCODE_API_KEY=") {
+                let k = rest.trim().trim_matches('"').trim_matches('\'');
+                if !k.is_empty() { return Some(k.to_string()); }
+            }
+        }
+    }
+    None
+}
+
+pub async fn chat(
+    key: &str,
+    req: &ChatRequest,
+) -> Result<ChatResponse, String> {
+    // Strip the "opencode/" namespace prefix so the upstream sees the
+    // bare model id (e.g. "claude-opus-4-7", "kimi-k2.6").
+    let model = req.model.strip_prefix("opencode/").unwrap_or(&req.model).to_string();
+
+    // Anthropic models on opencode reject `temperature` with a 400
+    // "temperature is deprecated for this model" error. Strip the
+    // field for claude-* and the new gpt-5.x reasoning lineages
+    // (Anthropic/OpenAI's reasoning models all moved away from temp).
+    // Other models keep the caller's value or default to 0.3.
+    let drop_temp = model.starts_with("claude-")
+        || model.starts_with("gpt-5")
+        || model.starts_with("o1")
+        || model.starts_with("o3")
+        || model.starts_with("o4");
+    let body = OCChatBody {
+        model: model.clone(),
+        messages: req.messages.iter().map(|m| OCMessage {
+            role: m.role.clone(),
+            content: m.content.clone(),
+        }).collect(),
+        // filter(|&n| n > 0) catches Some(0) — same trap that bit the
+        // Kimi adapter when callers passed empty-env-parsed-to-0.
+        max_tokens: req.max_tokens.filter(|&n| n > 0).unwrap_or(800),
+        temperature: if drop_temp { None } else { Some(req.temperature.unwrap_or(0.3)) },
+        stream: false,
+    };
+
+    let client = reqwest::Client::builder()
+        .timeout(Duration::from_secs(opencode_timeout_secs()))
+        .build()
+        .map_err(|e| format!("build client: {e}"))?;
+
+    let t0 = std::time::Instant::now();
+    let resp = client
+        .post(format!("{}/chat/completions", OPENCODE_BASE_URL))
+        .bearer_auth(key)
+        .json(&body)
+        .send()
+        .await
+        .map_err(|e| format!("opencode.ai unreachable: {e}"))?;
+
+    let status = resp.status();
+    if !status.is_success() {
+        let body = resp.text().await.unwrap_or_else(|_| "?".into());
+        return Err(format!("opencode.ai {}: {}", status, body));
+    }
+
+    let parsed: OCChatResponse = resp.json().await
+        .map_err(|e| format!("invalid opencode response: {e}"))?;
+
+    let latency_ms = t0.elapsed().as_millis();
+    let choice = parsed.choices.into_iter().next()
+        .ok_or_else(|| "opencode returned no choices".to_string())?;
+    let text = choice.message.content;
+
+    let prompt_tokens = parsed.usage.as_ref().map(|u| u.prompt_tokens).unwrap_or_else(|| {
+        let chars: usize = req.messages.iter().map(|m| m.text().chars().count()).sum();
+        ((chars + 3) / 4) as u32
+    });
+    let completion_tokens = parsed.usage.as_ref().map(|u| u.completion_tokens).unwrap_or_else(|| {
+        ((text.chars().count() + 3) / 4) as u32
+    });
+
+    tracing::info!(
+        target: "v1.chat",
+        provider = "opencode",
+        model = %model,
+        prompt_tokens,
+        completion_tokens,
+        latency_ms = latency_ms as u64,
+        "opencode chat completed",
+    );
+
+    Ok(ChatResponse {
+        id: format!("chatcmpl-{}", chrono::Utc::now().timestamp_nanos_opt().unwrap_or(0)),
+        object: "chat.completion",
+        created: chrono::Utc::now().timestamp(),
+        model,
+        choices: vec![Choice {
+            index: 0,
+            message: Message { role: "assistant".into(), content: serde_json::Value::String(text) },
+            finish_reason: choice.finish_reason.unwrap_or_else(|| "stop".into()),
+        }],
+        usage: UsageBlock {
+            prompt_tokens,
+            completion_tokens,
+            total_tokens: prompt_tokens + completion_tokens,
+        },
+    })
+}
+
+// -- OpenCode wire shapes (OpenAI-compatible) --
+
+#[derive(Serialize)]
+struct OCChatBody {
+    model: String,
+    messages: Vec<OCMessage>,
+    max_tokens: u32,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    temperature: Option<f64>,
+    stream: bool,
+}
+
+#[derive(Serialize)]
+struct OCMessage { role: String, content: serde_json::Value }
+
+#[derive(Deserialize)]
+struct OCChatResponse {
+    choices: Vec<OCChoice>,
+    #[serde(default)]
+    usage: Option<OCUsage>,
+}
+
+#[derive(Deserialize)]
+struct OCChoice {
+    message: OCMessageResp,
+    #[serde(default)]
+    finish_reason: Option<String>,
+}
+
+#[derive(Deserialize)]
+struct OCMessageResp { content: String }
+
+#[derive(Deserialize)]
+struct OCUsage { prompt_tokens: u32, completion_tokens: u32 }
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn resolve_opencode_key_does_not_panic() {
+        let _ = resolve_opencode_key();
+    }
+
+    #[test]
+    fn model_prefix_strip() {
+        let cases = [
+            ("opencode/claude-opus-4-7", "claude-opus-4-7"),
+            ("opencode/kimi-k2.6", "kimi-k2.6"),
+            ("claude-opus-4-7", "claude-opus-4-7"),
+        ];
+        for (input, expected) in cases {
+            let out = input.strip_prefix("opencode/").unwrap_or(input);
+            assert_eq!(out, expected);
+        }
+    }
+
+    #[test]
+    fn max_tokens_filters_zero() {
+        // The trap: empty env -> Number("") -> 0 -> Some(0). Adapter
+        // must not pass 0 upstream; should fall to 800.
+        let some_zero: Option<u32> = Some(0);
+        let result = some_zero.filter(|&n| n > 0).unwrap_or(800);
+        assert_eq!(result, 800);
+        let some_real: Option<u32> = Some(4096);
+        assert_eq!(some_real.filter(|&n| n > 0).unwrap_or(800), 4096);
+        let none_val: Option<u32> = None;
+        assert_eq!(none_val.filter(|&n| n > 0).unwrap_or(800), 800);
+    }
+}