From 3a0b37ed93ad263166bbfea009dce9806c21e3c9 Mon Sep 17 00:00:00 2001
From: root <root@island37.com>
Date: Sun, 26 Apr 2026 17:49:37 -0500
Subject: [PATCH] =?UTF-8?q?v1:=20OpenAI-compat=20alias=20+=20smart=20provi?=
 =?UTF-8?q?der=20routing=20=E2=80=94=20gateway=20is=20now=20drop-in=20midd?=
 =?UTF-8?q?leware?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

/v1/chat/completions route alias (same handler as /chat) lets any tool
using the official `openai` SDK adopt the gateway via OPENAI_BASE_URL
alone — no custom provider field needed.

resolve_provider() extended:
- bare `vendor/model` (slash) → openrouter (catches x-ai/grok-4.1-fast,
  moonshotai/kimi-k2, deepseek/deepseek-v4-flash, openai/gpt-oss-120b:free)
- bare vendor model names (no slash, no colon) get auto-prefixed:
  gpt-* / o1-* / o3-* / o4-* → openai/<name>  (OpenRouter form)
  claude-* → anthropic/<name>
  grok-* → x-ai/<name>
  Then routed to openrouter. Ollama models (with colon, no slash) keep
  default routing. Tools like pi-ai validate against an OpenAI-style
  catalog and send bare names — this lets them flow through cleanly.

Verified end-to-end:
- curl POST /v1/chat/completions {model: "gpt-4o-mini", ...} → 200,
  routed to openrouter as openai/gpt-4o-mini
- openai SDK with baseURL=http://localhost:3100/v1 → 3 model variants all
  succeed (openai/gpt-4o-mini, gpt-4o-mini, x-ai/grok-4.1-fast)
- Langfuse traces fire automatically on every call
  (v1.chat:openrouter, provider tagged in metadata)

scripts/mode_pass5_variance_paid.ts gains LH_CONDITIONS env so subset
runs (e.g. just isolation vs composed) take half the latency.

Archon-on-Lakehouse integration: gateway side is done. Pi-ai's
openai-responses backend uses /v1/responses (not /chat/completions) and
its openrouter backend appears to bail in client-side validation before
sending. Patching Pi locally to override baseUrl works for arch but the
harness still rejects — needs more work in a follow-up. Direct openai
SDK path (langchain-js / agents / patched Pi) works today.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 crates/gateway/src/v1/mod.rs        | 34 +++++++++++++++++++++++++++++
 scripts/mode_pass5_variance_paid.ts | 11 +++++++++-
 2 files changed, 44 insertions(+), 1 deletion(-)
diff --git a/crates/gateway/src/v1/mod.rs b/crates/gateway/src/v1/mod.rs
index b477d86..875a077 100644
--- a/crates/gateway/src/v1/mod.rs
+++ b/crates/gateway/src/v1/mod.rs
@@ -80,6 +80,11 @@ pub struct ProviderUsage {
 pub fn router(state: V1State) -> Router {
     Router::new()
         .route("/chat", post(chat))
+        // Canonical OpenAI path alias — lets any client built on the
+        // openai SDK (pi-ai, langchain-js, etc.) treat the gateway as
+        // a drop-in middleware via OPENAI_BASE_URL=http://gw/v1 alone.
+        // Same handler as /chat; same OpenAI-compatible request shape.
+        .route("/chat/completions", post(chat))
         .route("/respond", post(respond::respond))
         .route("/usage", get(usage))
         .route("/sessions", get(sessions))
@@ -179,6 +184,35 @@ fn resolve_provider(req: &ChatRequest) -> (String, String) {
     if let Some(rest) = req.model.strip_prefix("claude/") {
         return ("claude".to_string(), rest.to_string());
     }
+    // Bare `vendor/model` shape (e.g. `x-ai/grok-4.1-fast`,
+    // `moonshotai/kimi-k2`, `openai/gpt-oss-120b:free`) → OpenRouter.
+    // This makes the gateway a drop-in OpenAI-compatible middleware:
+    // clients using the official `openai` SDK only set OPENAI_BASE_URL
+    // + a model name and get correct upstream routing without needing
+    // our custom `provider` field. Ollama models in J's stack use
+    // `model:tag` form with NO slash (`qwen3.5:latest`, `kimi-k2:1t`),
+    // so a slash here unambiguously means "namespaced provider/model".
+    if req.model.contains('/') {
+        return ("openrouter".to_string(), req.model.clone());
+    }
+    // Vendor-bare model names (no slash, no colon) — `gpt-4o-mini`,
+    // `claude-3-5-sonnet-20241022`, etc. Tools like pi-ai validate
+    // models against an OpenAI-style catalog (no namespace prefix),
+    // so they send the bare name. Map to OpenRouter's namespaced form
+    // by inferring the vendor from the leading token. Falls through to
+    // ollama if no pattern matches — preserves existing behavior.
+    if !req.model.contains(':') && !req.model.contains('/') {
+        let m = req.model.as_str();
+        if m.starts_with("gpt-") || m.starts_with("o1-") || m.starts_with("o3-") || m.starts_with("o4-") || m == "o1" || m == "o3" || m == "o4-mini" {
+            return ("openrouter".to_string(), format!("openai/{}", m));
+        }
+        if m.starts_with("claude-") {
+            return ("openrouter".to_string(), format!("anthropic/{}", m));
+        }
+        if m.starts_with("grok-") {
+            return ("openrouter".to_string(), format!("x-ai/{}", m));
+        }
+    }
     ("ollama".to_string(), req.model.clone())
 }
 
diff --git a/scripts/mode_pass5_variance_paid.ts b/scripts/mode_pass5_variance_paid.ts
index 2191747..47dbe29 100644
--- a/scripts/mode_pass5_variance_paid.ts
+++ b/scripts/mode_pass5_variance_paid.ts
@@ -34,13 +34,22 @@ interface Condition {
   corpus?: string | string[];
 }
 
-const CONDITIONS: Condition[] = [
+const ALL_CONDITIONS: Condition[] = [
   { label: "isolation       ",  mode: "codereview_isolation"  },
   { label: "arch_only       ",  mode: "codereview_lakehouse",  corpus: "lakehouse_arch_v1" },
   { label: "symbols_only    ",  mode: "codereview_lakehouse",  corpus: "lakehouse_symbols_v1" },
   { label: "composed (A+C)  ",  mode: "codereview_lakehouse"  /* uses modes.toml default */ },
 ];
 
+// Optional whitelist via env: LH_CONDITIONS=isolation,composed limits the
+// run to a subset (matches against the trimmed `label`). Useful when only
+// the head-to-head pair matters and saves ~50% latency on slow rungs.
+const wantedLabels = (process.env.LH_CONDITIONS ?? "")
+  .split(",").map(s => s.trim().toLowerCase()).filter(Boolean);
+const CONDITIONS: Condition[] = wantedLabels.length === 0
+  ? ALL_CONDITIONS
+  : ALL_CONDITIONS.filter(c => wantedLabels.some(w => c.label.trim().toLowerCase().startsWith(w)));
+
 async function runOne(c: Condition, rep: number): Promise<{ ok: boolean; latency_ms?: number; resp_chars?: number; error?: string }> {
   const body: any = {
     task_class: "scrum_review",