Session infrastructure: OpenRouter + tree-split reducer + observer→LLM Team + scrum_applier #11
@ -80,6 +80,11 @@ pub struct ProviderUsage {
|
||||
pub fn router(state: V1State) -> Router {
|
||||
Router::new()
|
||||
.route("/chat", post(chat))
|
||||
// Canonical OpenAI path alias — lets any client built on the
|
||||
// openai SDK (pi-ai, langchain-js, etc.) treat the gateway as
|
||||
// a drop-in middleware via OPENAI_BASE_URL=http://gw/v1 alone.
|
||||
// Same handler as /chat; same OpenAI-compatible request shape.
|
||||
.route("/chat/completions", post(chat))
|
||||
.route("/respond", post(respond::respond))
|
||||
.route("/usage", get(usage))
|
||||
.route("/sessions", get(sessions))
|
||||
@ -179,6 +184,35 @@ fn resolve_provider(req: &ChatRequest) -> (String, String) {
|
||||
if let Some(rest) = req.model.strip_prefix("claude/") {
|
||||
return ("claude".to_string(), rest.to_string());
|
||||
}
|
||||
// Bare `vendor/model` shape (e.g. `x-ai/grok-4.1-fast`,
|
||||
// `moonshotai/kimi-k2`, `openai/gpt-oss-120b:free`) → OpenRouter.
|
||||
// This makes the gateway a drop-in OpenAI-compatible middleware:
|
||||
// clients using the official `openai` SDK only set OPENAI_BASE_URL
|
||||
// + a model name and get correct upstream routing without needing
|
||||
// our custom `provider` field. Ollama models in J's stack use
|
||||
// `model:tag` form with NO slash (`qwen3.5:latest`, `kimi-k2:1t`),
|
||||
// so a slash here unambiguously means "namespaced provider/model".
|
||||
if req.model.contains('/') {
|
||||
return ("openrouter".to_string(), req.model.clone());
|
||||
}
|
||||
// Vendor-bare model names (no slash, no colon) — `gpt-4o-mini`,
|
||||
// `claude-3-5-sonnet-20241022`, etc. Tools like pi-ai validate
|
||||
// models against an OpenAI-style catalog (no namespace prefix),
|
||||
// so they send the bare name. Map to OpenRouter's namespaced form
|
||||
// by inferring the vendor from the leading token. Falls through to
|
||||
// ollama if no pattern matches — preserves existing behavior.
|
||||
if !req.model.contains(':') && !req.model.contains('/') {
|
||||
let m = req.model.as_str();
|
||||
if m.starts_with("gpt-") || m.starts_with("o1-") || m.starts_with("o3-") || m.starts_with("o4-") || m == "o1" || m == "o3" || m == "o4-mini" {
|
||||
return ("openrouter".to_string(), format!("openai/{}", m));
|
||||
}
|
||||
if m.starts_with("claude-") {
|
||||
return ("openrouter".to_string(), format!("anthropic/{}", m));
|
||||
}
|
||||
if m.starts_with("grok-") {
|
||||
return ("openrouter".to_string(), format!("x-ai/{}", m));
|
||||
}
|
||||
}
|
||||
("ollama".to_string(), req.model.clone())
|
||||
}
|
||||
|
||||
|
||||
@ -34,13 +34,22 @@ interface Condition {
|
||||
corpus?: string | string[];
|
||||
}
|
||||
|
||||
const CONDITIONS: Condition[] = [
|
||||
const ALL_CONDITIONS: Condition[] = [
|
||||
{ label: "isolation ", mode: "codereview_isolation" },
|
||||
{ label: "arch_only ", mode: "codereview_lakehouse", corpus: "lakehouse_arch_v1" },
|
||||
{ label: "symbols_only ", mode: "codereview_lakehouse", corpus: "lakehouse_symbols_v1" },
|
||||
{ label: "composed (A+C) ", mode: "codereview_lakehouse" /* uses modes.toml default */ },
|
||||
];
|
||||
|
||||
// Optional whitelist via env: LH_CONDITIONS=isolation,composed limits the
|
||||
// run to a subset (matches against the trimmed `label`). Useful when only
|
||||
// the head-to-head pair matters and saves ~50% latency on slow rungs.
|
||||
const wantedLabels = (process.env.LH_CONDITIONS ?? "")
|
||||
.split(",").map(s => s.trim().toLowerCase()).filter(Boolean);
|
||||
const CONDITIONS: Condition[] = wantedLabels.length === 0
|
||||
? ALL_CONDITIONS
|
||||
: ALL_CONDITIONS.filter(c => wantedLabels.some(w => c.label.trim().toLowerCase().startsWith(w)));
|
||||
|
||||
async function runOne(c: Condition, rep: number): Promise<{ ok: boolean; latency_ms?: number; resp_chars?: number; error?: string }> {
|
||||
const body: any = {
|
||||
task_class: "scrum_review",
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user