diff --git a/crates/gateway/src/v1/mod.rs b/crates/gateway/src/v1/mod.rs index b477d86..875a077 100644 --- a/crates/gateway/src/v1/mod.rs +++ b/crates/gateway/src/v1/mod.rs @@ -80,6 +80,11 @@ pub struct ProviderUsage { pub fn router(state: V1State) -> Router { Router::new() .route("/chat", post(chat)) + // Canonical OpenAI path alias — lets any client built on the + // openai SDK (pi-ai, langchain-js, etc.) treat the gateway as + // a drop-in middleware via OPENAI_BASE_URL=http://gw/v1 alone. + // Same handler as /chat; same OpenAI-compatible request shape. + .route("/chat/completions", post(chat)) .route("/respond", post(respond::respond)) .route("/usage", get(usage)) .route("/sessions", get(sessions)) @@ -179,6 +184,35 @@ fn resolve_provider(req: &ChatRequest) -> (String, String) { if let Some(rest) = req.model.strip_prefix("claude/") { return ("claude".to_string(), rest.to_string()); } + // Bare `vendor/model` shape (e.g. `x-ai/grok-4.1-fast`, + // `moonshotai/kimi-k2`, `openai/gpt-oss-120b:free`) → OpenRouter. + // This makes the gateway a drop-in OpenAI-compatible middleware: + // clients using the official `openai` SDK only set OPENAI_BASE_URL + // + a model name and get correct upstream routing without needing + // our custom `provider` field. Ollama models in J's stack use + // `model:tag` form with NO slash (`qwen3.5:latest`, `kimi-k2:1t`), + // so a slash here unambiguously means "namespaced provider/model". + if req.model.contains('/') { + return ("openrouter".to_string(), req.model.clone()); + } + // Vendor-bare model names (no slash, no colon) — `gpt-4o-mini`, + // `claude-3-5-sonnet-20241022`, etc. Tools like pi-ai validate + // models against an OpenAI-style catalog (no namespace prefix), + // so they send the bare name. Map to OpenRouter's namespaced form + // by inferring the vendor from the leading token. Falls through to + // ollama if no pattern matches — preserves existing behavior. + if !req.model.contains(':') && !req.model.contains('/') { + let m = req.model.as_str(); + if m.starts_with("gpt-") || m.starts_with("o1-") || m.starts_with("o3-") || m.starts_with("o4-") || m == "o1" || m == "o3" || m == "o4-mini" { + return ("openrouter".to_string(), format!("openai/{}", m)); + } + if m.starts_with("claude-") { + return ("openrouter".to_string(), format!("anthropic/{}", m)); + } + if m.starts_with("grok-") { + return ("openrouter".to_string(), format!("x-ai/{}", m)); + } + } ("ollama".to_string(), req.model.clone()) } diff --git a/scripts/mode_pass5_variance_paid.ts b/scripts/mode_pass5_variance_paid.ts index 2191747..47dbe29 100644 --- a/scripts/mode_pass5_variance_paid.ts +++ b/scripts/mode_pass5_variance_paid.ts @@ -34,13 +34,22 @@ interface Condition { corpus?: string | string[]; } -const CONDITIONS: Condition[] = [ +const ALL_CONDITIONS: Condition[] = [ { label: "isolation ", mode: "codereview_isolation" }, { label: "arch_only ", mode: "codereview_lakehouse", corpus: "lakehouse_arch_v1" }, { label: "symbols_only ", mode: "codereview_lakehouse", corpus: "lakehouse_symbols_v1" }, { label: "composed (A+C) ", mode: "codereview_lakehouse" /* uses modes.toml default */ }, ]; +// Optional whitelist via env: LH_CONDITIONS=isolation,composed limits the +// run to a subset (matches against the trimmed `label`). Useful when only +// the head-to-head pair matters and saves ~50% latency on slow rungs. +const wantedLabels = (process.env.LH_CONDITIONS ?? "") + .split(",").map(s => s.trim().toLowerCase()).filter(Boolean); +const CONDITIONS: Condition[] = wantedLabels.length === 0 + ? ALL_CONDITIONS + : ALL_CONDITIONS.filter(c => wantedLabels.some(w => c.label.trim().toLowerCase().startsWith(w))); + async function runOne(c: Condition, rep: number): Promise<{ ok: boolean; latency_ms?: number; resp_chars?: number; error?: string }> { const body: any = { task_class: "scrum_review",