diff --git a/auditor/checks/kimi_architect.ts b/auditor/checks/kimi_architect.ts index 569720a..65bd600 100644 --- a/auditor/checks/kimi_architect.ts +++ b/auditor/checks/kimi_architect.ts @@ -35,12 +35,33 @@ const GATEWAY = process.env.LH_GATEWAY_URL ?? "http://localhost:3100"; const KIMI_VERDICTS_DIR = "/home/profit/lakehouse/data/_auditor/kimi_verdicts"; const KIMI_AUDITS_JSONL = "/home/profit/lakehouse/data/_kb/kimi_audits.jsonl"; const REPO_ROOT = "/home/profit/lakehouse"; -const CALL_TIMEOUT_MS = 360_000; // 6min — kimi reasoning + audit prompt +// 15 min budget. Bun's fetch has an intrinsic ~300s limit that our +// AbortController + setTimeout combo could not override; we use curl +// via Bun.spawn instead (callKimi below). Curl honors -m for max +// transfer time without a hard intrinsic ceiling. +const CALL_TIMEOUT_MS = 900_000; const CACHE_TTL_MS = 24 * 60 * 60 * 1000; const MAX_DIFF_CHARS = 180_000; const MAX_PRIOR_FINDINGS = 50; -const KIMI_MODEL = process.env.LH_AUDITOR_KIMI_MODEL ?? "kimi-for-coding"; -const MAX_TOKENS = Number(process.env.LH_AUDITOR_KIMI_MAX_TOKENS ?? 12_000); +// Default provider/model = ollama_cloud/kimi-k2.6. Pre-2026-04-27 we +// went direct to api.kimi.com, but Ollama Cloud Pro now exposes the +// same model legitimately, so we route there to avoid User-Agent +// gating. The api.kimi.com path (provider=kimi) remains wired in the +// gateway as a fallback for when Ollama Cloud is upstream-broken. +const KIMI_PROVIDER = process.env.LH_AUDITOR_KIMI_PROVIDER ?? "ollama_cloud"; +const KIMI_MODEL = process.env.LH_AUDITOR_KIMI_MODEL ?? "kimi-k2.6"; +// 128K — Kimi K2.6 emits reasoning_content that counts against this +// budget but isn't surfaced in the OpenAI-shape `content` field. +// Capping low silently produces empty content with finish_reason=length +// when reasoning consumes the budget. 128K leaves ample room for both +// reasoning and visible findings on any audit prompt we throw at it. +// Override via LH_AUDITOR_KIMI_MAX_TOKENS only if you want to cap cost. +// +// Bug fix 2026-04-27 (caught by Kimi's own self-audit): empty env var +// like LH_AUDITOR_KIMI_MAX_TOKENS="" used to parse via Number("") → 0 +// because `??` only catches null/undefined. Use `||` so empty string, +// 0, or NaN all fall back to the default. +const MAX_TOKENS = Number(process.env.LH_AUDITOR_KIMI_MAX_TOKENS) || 128_000; export interface KimiArchitectContext { pr_number: number; @@ -166,33 +187,52 @@ ${truncatedDiff} async function callKimi(prompt: string): Promise<{ content: string; usage: any; finish_reason: string; latency_ms: number }> { const t0 = Date.now(); - const ctrl = new AbortController(); - const timer = setTimeout(() => ctrl.abort(), CALL_TIMEOUT_MS); - try { - const r = await fetch(`${GATEWAY}/v1/chat`, { - method: "POST", - headers: { "content-type": "application/json" }, - body: JSON.stringify({ - provider: "kimi", - model: KIMI_MODEL, - messages: [{ role: "user", content: prompt }], - max_tokens: MAX_TOKENS, - temperature: 0.2, - }), - signal: ctrl.signal, - }); - if (!r.ok) { - const body = await r.text(); - throw new Error(`/v1/chat ${r.status}: ${body.slice(0, 300)}`); - } - const j: any = await r.json(); - return { - content: j.choices?.[0]?.message?.content ?? "", - usage: j.usage ?? {}, - finish_reason: j.choices?.[0]?.finish_reason ?? "unknown", - latency_ms: Date.now() - t0, - }; - } finally { clearTimeout(timer); } + const body = JSON.stringify({ + provider: KIMI_PROVIDER, + model: KIMI_MODEL, + messages: [{ role: "user", content: prompt }], + max_tokens: MAX_TOKENS, + temperature: 0.2, + }); + // curl via Bun.spawn — bypasses Bun fetch's ~300s intrinsic ceiling. + // -m sets the max transfer time honored end-to-end. Body is piped via + // stdin to avoid argv length limits on big audit prompts (~50K+ tokens). + const proc = Bun.spawn({ + cmd: [ + "curl", "-sS", "-X", "POST", + "-m", String(Math.ceil(CALL_TIMEOUT_MS / 1000)), + "-H", "content-type: application/json", + "--data-binary", "@-", + `${GATEWAY}/v1/chat`, + ], + stdin: "pipe", + stdout: "pipe", + stderr: "pipe", + }); + proc.stdin.write(body); + await proc.stdin.end(); + const [stdout, stderr, exitCode] = await Promise.all([ + new Response(proc.stdout).text(), + new Response(proc.stderr).text(), + proc.exited, + ]); + if (exitCode !== 0) { + throw new Error(`curl exit ${exitCode}: ${stderr.slice(0, 300)}`); + } + let j: any; + try { j = JSON.parse(stdout); } + catch (e) { + throw new Error(`bad response (${stdout.length} bytes): ${stdout.slice(0, 300)}`); + } + if (j.error || !j.choices) { + throw new Error(`gateway error: ${JSON.stringify(j).slice(0, 300)}`); + } + return { + content: j.choices?.[0]?.message?.content ?? "", + usage: j.usage ?? {}, + finish_reason: j.choices?.[0]?.finish_reason ?? "unknown", + latency_ms: Date.now() - t0, + }; } // Parse Kimi's markdown into Finding[]. Format expected (per buildPrompt):