gateway: OpenCode (Zen + Go) provider adapter
Wires opencode.ai as a /v1/chat provider. One sk-* key reaches 40
models across Anthropic, OpenAI, Google, Moonshot, DeepSeek, Zhipu,
Alibaba, Minimax — billed against either the user's Zen balance
(pay-per-token premium models) or Go subscription (flat-rate
Kimi/GLM/DeepSeek/etc.). The unified /zen/v1 endpoint routes both;
upstream picks the billing tier based on model id.
Notable adapter quirks:
- Strip "opencode/" prefix on outbound (mirrors openrouter/kimi
pattern). Caller can use {provider:"opencode", model:"X"} or
{model:"opencode/X"}.
- Drop temperature for claude-*, gpt-5*, o1/o3/o4 models. Anthropic
and OpenAI's reasoning lineage rejects temperature with 400
"deprecated for this model". OCChatBody now serializes temperature
as Option<f64> with skip_serializing_if so omitting it produces
clean JSON.
- max_tokens.filter(|&n| n > 0) catches Some(0) — defensive after
the same trap bit kimi.rs (empty env -> Number("") -> 0 -> 503).
- 600s default upstream timeout; reasoning models on big audit
prompts legitimately take 3-5 min. Override OPENCODE_TIMEOUT_SECS.
Key handling:
- /etc/lakehouse/opencode.env (0600 root) loaded via systemd
EnvironmentFile. Same pattern as kimi.env.
- OPENCODE_API_KEY env first, file scrape as fallback.
Verified end-to-end:
opencode/claude-opus-4-7 -> "I'm Claude, made by Anthropic."
opencode/kimi-k2.6 -> PONG-K26-GO
opencode/deepseek-v4-pro -> PONG-DS-V4
opencode/glm-5.1 -> PONG-GLM
opencode/minimax-m2.5-free -> PONG-FREE
Pricing reference (per audit @ ~14k in / 6k out):
claude-opus-4-7 ~$0.22 (Zen)
claude-haiku-4-5 ~$0.04 (Zen)
gpt-5.5-pro ~$1.50 (Zen)
gemini-3-flash ~$0.03 (Zen)
kimi-k2.6 / glm / deepseek / qwen / minimax / mimo: covered by Go
subscription ($10/mo, $60/mo cap).
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
ff5de76241
commit
bc698eb6da
@ -45,6 +45,27 @@ default_model = "openai/gpt-oss-120b:free"
|
||||
# Model-prefix routing: "openrouter/<vendor>/<model>" auto-routes here,
|
||||
# prefix stripped before upstream call.
|
||||
|
||||
[[provider]]
|
||||
name = "opencode"
|
||||
base_url = "https://opencode.ai/zen/v1"
|
||||
# Unified endpoint — covers BOTH Zen (pay-per-token Anthropic/OpenAI/
|
||||
# Gemini frontier) AND Go (flat-sub Kimi/GLM/DeepSeek/Qwen/Minimax).
|
||||
# Upstream bills per-model: Zen models hit Zen balance, Go models hit
|
||||
# Go subscription cap. /zen/go/v1 is the Go-only sub-path (rejects
|
||||
# Zen models), kept for reference but not used by this provider.
|
||||
auth = "bearer"
|
||||
auth_env = "OPENCODE_API_KEY"
|
||||
default_model = "claude-opus-4-7"
|
||||
# OpenCode (Zen + GO unified endpoint). One sk-* key reaches Claude
|
||||
# Opus 4.7, GPT-5.5-pro, Gemini 3.1-pro, Kimi K2.6, DeepSeek, GLM,
|
||||
# Qwen, plus 4 free-tier models. OpenAI-compatible Chat Completions
|
||||
# at /v1/chat/completions. Model-prefix routing: "opencode/<name>"
|
||||
# auto-routes here, prefix stripped before upstream call.
|
||||
# Key file: /etc/lakehouse/opencode.env (loaded via systemd EnvironmentFile).
|
||||
# Model catalog: curl -H "Authorization: Bearer ..." https://opencode.ai/zen/v1/models
|
||||
# Note: /zen/go/v1 is the GO-only sub-path (Kimi/GLM/DeepSeek tier);
|
||||
# /zen/v1 covers everything including Anthropic (which /zen/go/v1 rejects).
|
||||
|
||||
[[provider]]
|
||||
name = "kimi"
|
||||
base_url = "https://api.kimi.com/coding/v1"
|
||||
|
||||
@ -283,6 +283,18 @@ async fn main() {
|
||||
}
|
||||
k
|
||||
},
|
||||
opencode_key: {
|
||||
// OpenCode GO multi-vendor gateway — Claude Opus 4.7,
|
||||
// GPT-5.5-pro, Gemini 3.1-pro, Kimi K2.6, DeepSeek, GLM,
|
||||
// Qwen + free-tier. Key from /etc/lakehouse/opencode.env.
|
||||
let k = v1::opencode::resolve_opencode_key();
|
||||
if k.is_some() {
|
||||
tracing::info!("v1: OpenCode key loaded — /v1/chat provider=opencode enabled (40 models)");
|
||||
} else {
|
||||
tracing::debug!("v1: no OpenCode key — provider=opencode will 503");
|
||||
}
|
||||
k
|
||||
},
|
||||
// Phase 40 early deliverable — Langfuse trace emitter.
|
||||
// Defaults match mcp-server/tracing.ts conventions so
|
||||
// gateway traces land in the same staffing project.
|
||||
|
||||
@ -17,6 +17,7 @@ pub mod openrouter;
|
||||
pub mod gemini;
|
||||
pub mod claude;
|
||||
pub mod kimi;
|
||||
pub mod opencode;
|
||||
pub mod langfuse_trace;
|
||||
pub mod mode;
|
||||
pub mod respond;
|
||||
@ -60,6 +61,13 @@ pub struct V1State {
|
||||
/// from `KIMI_API_KEY` env or `/etc/lakehouse/kimi.env`. None =
|
||||
/// provider="kimi" calls 503.
|
||||
pub kimi_key: Option<String>,
|
||||
/// OpenCode GO (opencode.ai) bearer token — multi-vendor curated
|
||||
/// gateway. One sk-* key reaches Claude Opus 4.7, GPT-5.5-pro,
|
||||
/// Gemini 3.1-pro, Kimi K2.6, DeepSeek, GLM, Qwen + free-tier.
|
||||
/// Loaded at startup via `opencode::resolve_opencode_key()` from
|
||||
/// `OPENCODE_API_KEY` env or `/etc/lakehouse/opencode.env`. None =
|
||||
/// provider="opencode" calls 503.
|
||||
pub opencode_key: Option<String>,
|
||||
/// Phase 40 early deliverable — Langfuse client. None = tracing
|
||||
/// disabled (keys missing or container unreachable). Traces are
|
||||
/// fire-and-forget: never block the response path.
|
||||
@ -234,6 +242,9 @@ fn resolve_provider(req: &ChatRequest) -> (String, String) {
|
||||
if let Some(rest) = req.model.strip_prefix("kimi/") {
|
||||
return ("kimi".to_string(), rest.to_string());
|
||||
}
|
||||
if let Some(rest) = req.model.strip_prefix("opencode/") {
|
||||
return ("opencode".to_string(), rest.to_string());
|
||||
}
|
||||
// Bare `vendor/model` shape (e.g. `x-ai/grok-4.1-fast`,
|
||||
// `moonshotai/kimi-k2`, `openai/gpt-oss-120b:free`) → OpenRouter.
|
||||
// This makes the gateway a drop-in OpenAI-compatible middleware:
|
||||
@ -433,10 +444,23 @@ async fn chat(
|
||||
.map_err(|e| (StatusCode::BAD_GATEWAY, format!("kimi: {e}")))?;
|
||||
(r, "kimi".to_string())
|
||||
}
|
||||
"opencode" => {
|
||||
// OpenCode GO multi-vendor gateway — Claude Opus 4.7,
|
||||
// GPT-5.5-pro, Gemini 3.1-pro, Kimi K2.6, DeepSeek, GLM,
|
||||
// Qwen, free-tier. OpenAI-compat at opencode.ai/zen/go/v1.
|
||||
let key = state.opencode_key.as_deref().ok_or((
|
||||
StatusCode::SERVICE_UNAVAILABLE,
|
||||
"OPENCODE_API_KEY not configured".to_string(),
|
||||
))?;
|
||||
let r = opencode::chat(key, &*req_for_adapter)
|
||||
.await
|
||||
.map_err(|e| (StatusCode::BAD_GATEWAY, format!("opencode: {e}")))?;
|
||||
(r, "opencode".to_string())
|
||||
}
|
||||
other => {
|
||||
return Err((
|
||||
StatusCode::BAD_REQUEST,
|
||||
format!("unknown provider '{other}' — supported: ollama, ollama_cloud, openrouter, gemini, claude, kimi"),
|
||||
format!("unknown provider '{other}' — supported: ollama, ollama_cloud, openrouter, gemini, claude, kimi, opencode"),
|
||||
));
|
||||
}
|
||||
};
|
||||
|
||||
228
crates/gateway/src/v1/opencode.rs
Normal file
228
crates/gateway/src/v1/opencode.rs
Normal file
@ -0,0 +1,228 @@
|
||||
//! OpenCode GO adapter — multi-vendor curated gateway via opencode.ai/zen/go.
|
||||
//!
|
||||
//! One sk-* key reaches Claude Opus 4.7, GPT-5.5-pro, Gemini 3.1-pro,
|
||||
//! Kimi K2.6, DeepSeek, GLM, Qwen, plus 4 free-tier models.
|
||||
//! OpenAI-compatible Chat Completions; auth via Bearer.
|
||||
//!
|
||||
//! Why a separate adapter (vs reusing openrouter.rs):
|
||||
//! - Different account, different key, different base_url
|
||||
//! - No HTTP-Referer / X-Title headers (those are OpenRouter-specific)
|
||||
//! - Future-proof for any opencode-only request shaping
|
||||
//!
|
||||
//! Key sourcing priority:
|
||||
//! 1. Env var `OPENCODE_API_KEY` (loaded from /etc/lakehouse/opencode.env
|
||||
//! via systemd EnvironmentFile=)
|
||||
//! 2. /etc/lakehouse/opencode.env directly (rescue path if env missing)
|
||||
//!
|
||||
//! Resolved once at gateway startup, stored on `V1State.opencode_key`.
|
||||
//! Model-prefix routing: "opencode/<model>" auto-routes here, prefix
|
||||
//! stripped before upstream call.
|
||||
|
||||
use std::time::Duration;
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
use super::{ChatRequest, ChatResponse, Choice, Message, UsageBlock};
|
||||
|
||||
// /zen/v1 is the unified OpenCode endpoint that covers BOTH the
|
||||
// Zen pay-per-token tier (Claude/GPT/Gemini frontier) AND the Go
|
||||
// subscription tier (Kimi/GLM/DeepSeek/Qwen/Minimax/mimo). When the
|
||||
// caller has both, opencode bills per-model: Zen models charge Zen
|
||||
// balance, Go models charge against the Go subscription cap.
|
||||
//
|
||||
// /zen/go/v1 exists as a Go-only sub-path (rejects Zen models with
|
||||
// "Model not supported"); we use the unified /zen/v1 since the same
|
||||
// key works for both with correct billing routing upstream.
|
||||
const OPENCODE_BASE_URL: &str = "https://opencode.ai/zen/v1";
|
||||
// 600s default — opencode upstream models include reasoning-heavy
|
||||
// variants (Claude Opus, Kimi K2.6, GLM-5.1) that legitimately take
|
||||
// 3-5 min on big audit prompts. Override via OPENCODE_TIMEOUT_SECS.
|
||||
const OPENCODE_TIMEOUT_SECS_DEFAULT: u64 = 600;
|
||||
|
||||
fn opencode_timeout_secs() -> u64 {
|
||||
std::env::var("OPENCODE_TIMEOUT_SECS")
|
||||
.ok()
|
||||
.and_then(|s| s.trim().parse::<u64>().ok())
|
||||
.filter(|&n| n > 0)
|
||||
.unwrap_or(OPENCODE_TIMEOUT_SECS_DEFAULT)
|
||||
}
|
||||
|
||||
pub fn resolve_opencode_key() -> Option<String> {
|
||||
if let Ok(k) = std::env::var("OPENCODE_API_KEY") {
|
||||
if !k.trim().is_empty() { return Some(k.trim().to_string()); }
|
||||
}
|
||||
if let Ok(raw) = std::fs::read_to_string("/etc/lakehouse/opencode.env") {
|
||||
for line in raw.lines() {
|
||||
if let Some(rest) = line.strip_prefix("OPENCODE_API_KEY=") {
|
||||
let k = rest.trim().trim_matches('"').trim_matches('\'');
|
||||
if !k.is_empty() { return Some(k.to_string()); }
|
||||
}
|
||||
}
|
||||
}
|
||||
None
|
||||
}
|
||||
|
||||
pub async fn chat(
|
||||
key: &str,
|
||||
req: &ChatRequest,
|
||||
) -> Result<ChatResponse, String> {
|
||||
// Strip the "opencode/" namespace prefix so the upstream sees the
|
||||
// bare model id (e.g. "claude-opus-4-7", "kimi-k2.6").
|
||||
let model = req.model.strip_prefix("opencode/").unwrap_or(&req.model).to_string();
|
||||
|
||||
// Anthropic models on opencode reject `temperature` with a 400
|
||||
// "temperature is deprecated for this model" error. Strip the
|
||||
// field for claude-* and the new gpt-5.x reasoning lineages
|
||||
// (Anthropic/OpenAI's reasoning models all moved away from temp).
|
||||
// Other models keep the caller's value or default to 0.3.
|
||||
let drop_temp = model.starts_with("claude-")
|
||||
|| model.starts_with("gpt-5")
|
||||
|| model.starts_with("o1")
|
||||
|| model.starts_with("o3")
|
||||
|| model.starts_with("o4");
|
||||
let body = OCChatBody {
|
||||
model: model.clone(),
|
||||
messages: req.messages.iter().map(|m| OCMessage {
|
||||
role: m.role.clone(),
|
||||
content: m.content.clone(),
|
||||
}).collect(),
|
||||
// filter(|&n| n > 0) catches Some(0) — same trap that bit the
|
||||
// Kimi adapter when callers passed empty-env-parsed-to-0.
|
||||
max_tokens: req.max_tokens.filter(|&n| n > 0).unwrap_or(800),
|
||||
temperature: if drop_temp { None } else { Some(req.temperature.unwrap_or(0.3)) },
|
||||
stream: false,
|
||||
};
|
||||
|
||||
let client = reqwest::Client::builder()
|
||||
.timeout(Duration::from_secs(opencode_timeout_secs()))
|
||||
.build()
|
||||
.map_err(|e| format!("build client: {e}"))?;
|
||||
|
||||
let t0 = std::time::Instant::now();
|
||||
let resp = client
|
||||
.post(format!("{}/chat/completions", OPENCODE_BASE_URL))
|
||||
.bearer_auth(key)
|
||||
.json(&body)
|
||||
.send()
|
||||
.await
|
||||
.map_err(|e| format!("opencode.ai unreachable: {e}"))?;
|
||||
|
||||
let status = resp.status();
|
||||
if !status.is_success() {
|
||||
let body = resp.text().await.unwrap_or_else(|_| "?".into());
|
||||
return Err(format!("opencode.ai {}: {}", status, body));
|
||||
}
|
||||
|
||||
let parsed: OCChatResponse = resp.json().await
|
||||
.map_err(|e| format!("invalid opencode response: {e}"))?;
|
||||
|
||||
let latency_ms = t0.elapsed().as_millis();
|
||||
let choice = parsed.choices.into_iter().next()
|
||||
.ok_or_else(|| "opencode returned no choices".to_string())?;
|
||||
let text = choice.message.content;
|
||||
|
||||
let prompt_tokens = parsed.usage.as_ref().map(|u| u.prompt_tokens).unwrap_or_else(|| {
|
||||
let chars: usize = req.messages.iter().map(|m| m.text().chars().count()).sum();
|
||||
((chars + 3) / 4) as u32
|
||||
});
|
||||
let completion_tokens = parsed.usage.as_ref().map(|u| u.completion_tokens).unwrap_or_else(|| {
|
||||
((text.chars().count() + 3) / 4) as u32
|
||||
});
|
||||
|
||||
tracing::info!(
|
||||
target: "v1.chat",
|
||||
provider = "opencode",
|
||||
model = %model,
|
||||
prompt_tokens,
|
||||
completion_tokens,
|
||||
latency_ms = latency_ms as u64,
|
||||
"opencode chat completed",
|
||||
);
|
||||
|
||||
Ok(ChatResponse {
|
||||
id: format!("chatcmpl-{}", chrono::Utc::now().timestamp_nanos_opt().unwrap_or(0)),
|
||||
object: "chat.completion",
|
||||
created: chrono::Utc::now().timestamp(),
|
||||
model,
|
||||
choices: vec![Choice {
|
||||
index: 0,
|
||||
message: Message { role: "assistant".into(), content: serde_json::Value::String(text) },
|
||||
finish_reason: choice.finish_reason.unwrap_or_else(|| "stop".into()),
|
||||
}],
|
||||
usage: UsageBlock {
|
||||
prompt_tokens,
|
||||
completion_tokens,
|
||||
total_tokens: prompt_tokens + completion_tokens,
|
||||
},
|
||||
})
|
||||
}
|
||||
|
||||
// -- OpenCode wire shapes (OpenAI-compatible) --
|
||||
|
||||
#[derive(Serialize)]
|
||||
struct OCChatBody {
|
||||
model: String,
|
||||
messages: Vec<OCMessage>,
|
||||
max_tokens: u32,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
temperature: Option<f64>,
|
||||
stream: bool,
|
||||
}
|
||||
|
||||
#[derive(Serialize)]
|
||||
struct OCMessage { role: String, content: serde_json::Value }
|
||||
|
||||
#[derive(Deserialize)]
|
||||
struct OCChatResponse {
|
||||
choices: Vec<OCChoice>,
|
||||
#[serde(default)]
|
||||
usage: Option<OCUsage>,
|
||||
}
|
||||
|
||||
#[derive(Deserialize)]
|
||||
struct OCChoice {
|
||||
message: OCMessageResp,
|
||||
#[serde(default)]
|
||||
finish_reason: Option<String>,
|
||||
}
|
||||
|
||||
#[derive(Deserialize)]
|
||||
struct OCMessageResp { content: String }
|
||||
|
||||
#[derive(Deserialize)]
|
||||
struct OCUsage { prompt_tokens: u32, completion_tokens: u32 }
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn resolve_opencode_key_does_not_panic() {
|
||||
let _ = resolve_opencode_key();
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn model_prefix_strip() {
|
||||
let cases = [
|
||||
("opencode/claude-opus-4-7", "claude-opus-4-7"),
|
||||
("opencode/kimi-k2.6", "kimi-k2.6"),
|
||||
("claude-opus-4-7", "claude-opus-4-7"),
|
||||
];
|
||||
for (input, expected) in cases {
|
||||
let out = input.strip_prefix("opencode/").unwrap_or(input);
|
||||
assert_eq!(out, expected);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn max_tokens_filters_zero() {
|
||||
// The trap: empty env -> Number("") -> 0 -> Some(0). Adapter
|
||||
// must not pass 0 upstream; should fall to 800.
|
||||
let some_zero: Option<u32> = Some(0);
|
||||
let result = some_zero.filter(|&n| n > 0).unwrap_or(800);
|
||||
assert_eq!(result, 800);
|
||||
let some_real: Option<u32> = Some(4096);
|
||||
assert_eq!(some_real.filter(|&n| n > 0).unwrap_or(800), 4096);
|
||||
let none_val: Option<u32> = None;
|
||||
assert_eq!(none_val.filter(|&n| n > 0).unwrap_or(800), 800);
|
||||
}
|
||||
}
|
||||
Loading…
x
Reference in New Issue
Block a user