From 3a0b37ed93ad263166bbfea009dce9806c21e3c9 Mon Sep 17 00:00:00 2001 From: root Date: Sun, 26 Apr 2026 17:49:37 -0500 Subject: [PATCH] =?UTF-8?q?v1:=20OpenAI-compat=20alias=20+=20smart=20provi?= =?UTF-8?q?der=20routing=20=E2=80=94=20gateway=20is=20now=20drop-in=20midd?= =?UTF-8?q?leware?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit /v1/chat/completions route alias (same handler as /chat) lets any tool using the official `openai` SDK adopt the gateway via OPENAI_BASE_URL alone — no custom provider field needed. resolve_provider() extended: - bare `vendor/model` (slash) → openrouter (catches x-ai/grok-4.1-fast, moonshotai/kimi-k2, deepseek/deepseek-v4-flash, openai/gpt-oss-120b:free) - bare vendor model names (no slash, no colon) get auto-prefixed: gpt-* / o1-* / o3-* / o4-* → openai/ (OpenRouter form) claude-* → anthropic/ grok-* → x-ai/ Then routed to openrouter. Ollama models (with colon, no slash) keep default routing. Tools like pi-ai validate against an OpenAI-style catalog and send bare names — this lets them flow through cleanly. Verified end-to-end: - curl POST /v1/chat/completions {model: "gpt-4o-mini", ...} → 200, routed to openrouter as openai/gpt-4o-mini - openai SDK with baseURL=http://localhost:3100/v1 → 3 model variants all succeed (openai/gpt-4o-mini, gpt-4o-mini, x-ai/grok-4.1-fast) - Langfuse traces fire automatically on every call (v1.chat:openrouter, provider tagged in metadata) scripts/mode_pass5_variance_paid.ts gains LH_CONDITIONS env so subset runs (e.g. just isolation vs composed) take half the latency. Archon-on-Lakehouse integration: gateway side is done. Pi-ai's openai-responses backend uses /v1/responses (not /chat/completions) and its openrouter backend appears to bail in client-side validation before sending. Patching Pi locally to override baseUrl works for arch but the harness still rejects — needs more work in a follow-up. Direct openai SDK path (langchain-js / agents / patched Pi) works today. Co-Authored-By: Claude Opus 4.7 (1M context) --- crates/gateway/src/v1/mod.rs | 34 +++++++++++++++++++++++++++++ scripts/mode_pass5_variance_paid.ts | 11 +++++++++- 2 files changed, 44 insertions(+), 1 deletion(-) diff --git a/crates/gateway/src/v1/mod.rs b/crates/gateway/src/v1/mod.rs index b477d86..875a077 100644 --- a/crates/gateway/src/v1/mod.rs +++ b/crates/gateway/src/v1/mod.rs @@ -80,6 +80,11 @@ pub struct ProviderUsage { pub fn router(state: V1State) -> Router { Router::new() .route("/chat", post(chat)) + // Canonical OpenAI path alias — lets any client built on the + // openai SDK (pi-ai, langchain-js, etc.) treat the gateway as + // a drop-in middleware via OPENAI_BASE_URL=http://gw/v1 alone. + // Same handler as /chat; same OpenAI-compatible request shape. + .route("/chat/completions", post(chat)) .route("/respond", post(respond::respond)) .route("/usage", get(usage)) .route("/sessions", get(sessions)) @@ -179,6 +184,35 @@ fn resolve_provider(req: &ChatRequest) -> (String, String) { if let Some(rest) = req.model.strip_prefix("claude/") { return ("claude".to_string(), rest.to_string()); } + // Bare `vendor/model` shape (e.g. `x-ai/grok-4.1-fast`, + // `moonshotai/kimi-k2`, `openai/gpt-oss-120b:free`) → OpenRouter. + // This makes the gateway a drop-in OpenAI-compatible middleware: + // clients using the official `openai` SDK only set OPENAI_BASE_URL + // + a model name and get correct upstream routing without needing + // our custom `provider` field. Ollama models in J's stack use + // `model:tag` form with NO slash (`qwen3.5:latest`, `kimi-k2:1t`), + // so a slash here unambiguously means "namespaced provider/model". + if req.model.contains('/') { + return ("openrouter".to_string(), req.model.clone()); + } + // Vendor-bare model names (no slash, no colon) — `gpt-4o-mini`, + // `claude-3-5-sonnet-20241022`, etc. Tools like pi-ai validate + // models against an OpenAI-style catalog (no namespace prefix), + // so they send the bare name. Map to OpenRouter's namespaced form + // by inferring the vendor from the leading token. Falls through to + // ollama if no pattern matches — preserves existing behavior. + if !req.model.contains(':') && !req.model.contains('/') { + let m = req.model.as_str(); + if m.starts_with("gpt-") || m.starts_with("o1-") || m.starts_with("o3-") || m.starts_with("o4-") || m == "o1" || m == "o3" || m == "o4-mini" { + return ("openrouter".to_string(), format!("openai/{}", m)); + } + if m.starts_with("claude-") { + return ("openrouter".to_string(), format!("anthropic/{}", m)); + } + if m.starts_with("grok-") { + return ("openrouter".to_string(), format!("x-ai/{}", m)); + } + } ("ollama".to_string(), req.model.clone()) } diff --git a/scripts/mode_pass5_variance_paid.ts b/scripts/mode_pass5_variance_paid.ts index 2191747..47dbe29 100644 --- a/scripts/mode_pass5_variance_paid.ts +++ b/scripts/mode_pass5_variance_paid.ts @@ -34,13 +34,22 @@ interface Condition { corpus?: string | string[]; } -const CONDITIONS: Condition[] = [ +const ALL_CONDITIONS: Condition[] = [ { label: "isolation ", mode: "codereview_isolation" }, { label: "arch_only ", mode: "codereview_lakehouse", corpus: "lakehouse_arch_v1" }, { label: "symbols_only ", mode: "codereview_lakehouse", corpus: "lakehouse_symbols_v1" }, { label: "composed (A+C) ", mode: "codereview_lakehouse" /* uses modes.toml default */ }, ]; +// Optional whitelist via env: LH_CONDITIONS=isolation,composed limits the +// run to a subset (matches against the trimmed `label`). Useful when only +// the head-to-head pair matters and saves ~50% latency on slow rungs. +const wantedLabels = (process.env.LH_CONDITIONS ?? "") + .split(",").map(s => s.trim().toLowerCase()).filter(Boolean); +const CONDITIONS: Condition[] = wantedLabels.length === 0 + ? ALL_CONDITIONS + : ALL_CONDITIONS.filter(c => wantedLabels.some(w => c.label.trim().toLowerCase().startsWith(w))); + async function runOne(c: Condition, rep: number): Promise<{ ok: boolean; latency_ms?: number; resp_chars?: number; error?: string }> { const body: any = { task_class: "scrum_review",