diff --git a/mcp-server/observer.ts b/mcp-server/observer.ts index 722b20d..a82389e 100644 --- a/mcp-server/observer.ts +++ b/mcp-server/observer.ts @@ -275,12 +275,18 @@ async function escalateFailureClusterToLLMTeam(sigHash: string, cluster: Observe const prompt = `${kbPreamble}sig_hash=${sigHash} · ${cluster.length} failures on the same signature:\n\n${context}\n\nReview this failure cluster. Identify:\n1. Likely root cause (single sentence).\n2. Files most likely responsible (path hints).\n3. Concrete fix direction (under 3 sentences).\n4. Confidence: NN%\n\nBe specific, not generic.`; try { + // 2026-04-26: switched from ollama_cloud/qwen3-coder:480b (weekly + // 429 quota was blocking escalations) to paid OpenRouter + // deepseek-v3.1-terminus — 671B reasoning specialist, $0.21 in / + // $0.79 out per M tokens (under the $0.85/M ceiling J set), 164K + // ctx. Per-escalation cost: ~$0.0006 (typical 500-token prompt + + // 300-token completion). const resp = await fetch(`${LAKEHOUSE}/v1/chat`, { method: "POST", headers: { "Content-Type": "application/json" }, body: JSON.stringify({ - provider: "ollama_cloud", - model: "qwen3-coder:480b", + provider: "openrouter", + model: "deepseek/deepseek-v3.1-terminus", messages: [{ role: "user", content: prompt }], max_tokens: 800, temperature: 0.2, @@ -302,7 +308,7 @@ async function escalateFailureClusterToLLMTeam(sigHash: string, cluster: Observe const row = { ts: new Date().toISOString(), source: "observer_escalation", - mode: "direct_chat_qwen3_coder_480b", + mode: "direct_chat_deepseek_v3_1_terminus", sig_hash: sigHash, cluster_size: cluster.length, cluster_staffer: cluster[0]?.staffer_id,