// Kimi-architect check — second-pass senior architectural review using // kimi-for-coding (Kimi K2.6) via /v1/chat provider=kimi. // // Runs AFTER the deepseek inference check (N=3 consensus) and the // static/kb_query checks. Reads their findings as context and asks Kimi // "what did everyone else miss?" — complementing the cheap-consensus // voting with a sparse senior pass that catches load-bearing issues // (compile errors, false telemetry, schema bypasses, etc.) which the // voting structure can't see. // // Why Kimi here and not in the inner inference loop: // - Cost: ~3min wall-clock per call vs ~30s for deepseek consensus. // - TOS: api.kimi.com is User-Agent-gated (see crates/gateway/src/v1/ // kimi.rs); cost-bounded calls only. // - Value: experiment 2026-04-27 showed 7/7 grounding rate with full // files vs ~50% on truncated input. Best as a sparse complement, not // a replacement. // // Failure-isolated: any Kimi error returns a single info-level Finding // "kimi_architect skipped — " so the existing audit pipeline // is never blocked by a Kimi outage / TOS revocation / 429. // // Cost cap: if a kimi_verdicts/-.json file exists less than 24h // old, return cached findings without calling upstream. New commits // produce new SHAs so this is per-head, not per-day. // // Off by default: caller checks LH_AUDITOR_KIMI=1 before invoking. import { readFile, writeFile, mkdir, appendFile, stat, realpath } from "node:fs/promises"; import { existsSync, realpathSync } from "node:fs"; import { dirname, join, resolve } from "node:path"; import type { Finding, CheckKind } from "../types.ts"; const GATEWAY = process.env.LH_GATEWAY_URL ?? "http://localhost:3100"; const KIMI_VERDICTS_DIR = "/home/profit/lakehouse/data/_auditor/kimi_verdicts"; const KIMI_AUDITS_JSONL = "/home/profit/lakehouse/data/_kb/kimi_audits.jsonl"; const REPO_ROOT = "/home/profit/lakehouse"; // Canonicalize at module load — REPO_ROOT itself may be a symlink in // some environments (e.g. /home/profit is a bind-mount). Computing // once at startup means the per-finding grounding loop can compare // realpath(target) against this stable anchor. const REPO_ROOT_REAL = (() => { try { return realpathSync(REPO_ROOT); } catch { return REPO_ROOT; } })(); // 15 min budget. Bun's fetch has an intrinsic ~300s limit that our // AbortController + setTimeout combo could not override; we use curl // via Bun.spawn instead (callKimi below). Curl honors -m for max // transfer time without a hard intrinsic ceiling. const CALL_TIMEOUT_MS = 900_000; const CACHE_TTL_MS = 24 * 60 * 60 * 1000; const MAX_DIFF_CHARS = 180_000; const MAX_PRIOR_FINDINGS = 50; // Default provider/model = ollama_cloud/kimi-k2.6. Pre-2026-04-27 we // went direct to api.kimi.com, but Ollama Cloud Pro now exposes the // same model legitimately, so we route there to avoid User-Agent // gating. The api.kimi.com path (provider=kimi) remains wired in the // gateway as a fallback for when Ollama Cloud is upstream-broken. const KIMI_PROVIDER = process.env.LH_AUDITOR_KIMI_PROVIDER ?? "ollama_cloud"; const KIMI_MODEL = process.env.LH_AUDITOR_KIMI_MODEL ?? "kimi-k2.6"; // Cross-lineage alternation. 2026-04-27 J's call: Opus is too // expensive to auto-fire (~$0.30/audit). Kimi K2.6 via Go-sub is // effectively free; Haiku 4.5 via Zen is ~$0.04. Alternate between // them so we get cross-lineage signal (Moonshot vs Anthropic) on // every PR's audit history without burning the budget. // // Default: Kimi K2.6 on even audits, Haiku 4.5 on odd. Each PR's // audits flip between vendors as new SHAs come in. // // Frontier models (Opus 4.7, GPT-5.5, Gemini 3.1) are NOT in the // auto path. Operator hands distilled findings to a frontier model // manually when high-leverage decisions need it. Removing Opus from // auto-promotion saves ~$1-3/day on the daemon at our cadence. // // Override the alternation entirely with LH_AUDITOR_KIMI_MODEL // (forces one model regardless of audit count); set // LH_AUDITOR_KIMI_ALT_MODEL to the alternate. const ALT_MODEL = process.env.LH_AUDITOR_KIMI_ALT_MODEL ?? "claude-haiku-4-5"; const ALT_PROVIDER = process.env.LH_AUDITOR_KIMI_ALT_PROVIDER ?? "opencode"; const FORCE_DEFAULT = process.env.LH_AUDITOR_KIMI_MODEL !== undefined && process.env.LH_AUDITOR_KIMI_MODEL !== ""; function selectModel(diffLen: number, auditIndex: number = 0): { provider: string; model: string; promoted: boolean } { // Operator override — env-pinned model wins. if (FORCE_DEFAULT) { return { provider: KIMI_PROVIDER, model: KIMI_MODEL, promoted: false }; } // Alternate Kimi (default, even index) ↔ Haiku (alt, odd index). // diffLen kept in the signature for future "big diff → Haiku // anyway" logic; not used yet so we don't auto-burn on big PRs. void diffLen; if (auditIndex % 2 === 1) { return { provider: ALT_PROVIDER, model: ALT_MODEL, promoted: true }; } return { provider: KIMI_PROVIDER, model: KIMI_MODEL, promoted: false }; } // Model-aware max_tokens. Different upstream APIs cap at different // limits and reject requests that exceed them: // - Anthropic Opus 4.x: 32K output (with extended-output header) // - Anthropic Haiku 4.5: 8K output // - Kimi K2.6 (reasoning): 128K — needs headroom because // reasoning_content counts against the budget // - Default: 16K, conservative middle ground // // 2026-04-27 BLOCK from Opus self-audit: the prior single-default of // 128K worked silently (Anthropic clamps server-side) but was // technically invalid. Per-model caps make it explicit. Override via // LH_AUDITOR_KIMI_MAX_TOKENS to force a value (also fixes the empty- // env Number("") -> 0 trap by using `||` not `??`). const MAX_TOKENS_OVERRIDE = Number(process.env.LH_AUDITOR_KIMI_MAX_TOKENS) || 0; function maxTokensFor(model: string): number { if (MAX_TOKENS_OVERRIDE > 0) return MAX_TOKENS_OVERRIDE; if (model.startsWith("claude-opus")) return 32_000; if (model.startsWith("claude-haiku") || model.startsWith("claude-sonnet")) return 8_192; if (model.startsWith("kimi-")) return 128_000; if (model.startsWith("gpt-5") || model.startsWith("o1") || model.startsWith("o3") || model.startsWith("o4")) return 32_000; return 16_000; } export interface KimiArchitectContext { pr_number: number; head_sha: string; } interface KimiVerdictFile { pr_number: number; head_sha: string; cached_at: string; model: string; latency_ms: number; finish_reason: string; usage: { prompt_tokens: number; completion_tokens: number; total_tokens: number }; raw_content: string; findings: Finding[]; grounding: { total: number; verified: number; rate: number }; } export async function runKimiArchitectCheck( diff: string, priorFindings: Finding[], ctx: KimiArchitectContext, ): Promise { const cachePath = join(KIMI_VERDICTS_DIR, `${ctx.pr_number}-${ctx.head_sha.slice(0, 12)}.json`); const outageSentinel = `${cachePath}.outage`; const OUTAGE_TTL_MS = 10 * 60 * 1000; // Outage negative-cache — if upstream failed within OUTAGE_TTL_MS, // skip this audit and return immediately. Prevents the daemon from // hammering a downed Kimi/Anthropic upstream every 90s. if (existsSync(outageSentinel)) { try { const s = await stat(outageSentinel); if (Date.now() - s.mtimeMs < OUTAGE_TTL_MS) { const note = JSON.parse(await readFile(outageSentinel, "utf8")); return [skipFinding(`upstream still down (cached ${Math.round((Date.now() - s.mtimeMs) / 1000)}s ago): ${String(note.reason).slice(0, 160)}`)]; } } catch { /* malformed sentinel — fall through to fresh call */ } } // Cost cap — return cached findings if a verdict for this exact head // SHA was generated within the TTL. const cached = await loadCachedVerdict(cachePath); if (cached) { return cached.findings.length > 0 ? cached.findings : [{ check: "kimi_architect" as CheckKind, severity: "info", summary: "kimi_architect cached — 0 findings", evidence: [`cache: ${cachePath}`] }]; } // Alternate model based on how many audits this PR has had — gives // cross-lineage signal (Kimi/Moonshot ↔ Haiku/Anthropic) on every // PR's audit history. Count is derived from existing kimi_verdicts // files for this PR; cheap O(N_PRs) directory read. let auditIndex = 0; try { const dir = "/home/profit/lakehouse/data/_auditor/kimi_verdicts"; if (existsSync(dir)) { const all = require("node:fs").readdirSync(dir) as string[]; auditIndex = all.filter((f) => f.startsWith(`${ctx.pr_number}-`)).length; } } catch { /* default 0 — Kimi */ } const selected = selectModel(diff.length, auditIndex); let response: { content: string; usage: any; finish_reason: string; latency_ms: number }; try { response = await callKimi(buildPrompt(diff, priorFindings, ctx), selected.provider, selected.model); } catch (e) { // Negative-cache for 10 min on outage (caught 2026-04-27 by Opus // self-audit): without this, every audit cycle within the 24h // TTL re-calls upstream while it's still down. Use a sentinel // file with mtime check rather than persisting a verdict so the // happy-path cache reader doesn't have to special-case it. const sentinel = `${cachePath}.outage`; try { await writeFile(sentinel, JSON.stringify({ at: new Date().toISOString(), reason: (e as Error).message.slice(0, 200) })); } catch {} return [skipFinding(`kimi call failed (${selected.model}): ${(e as Error).message.slice(0, 200)}`)]; } const findings = parseFindings(response.content); const grounding = await computeGrounding(findings); const verdict: KimiVerdictFile = { pr_number: ctx.pr_number, head_sha: ctx.head_sha, cached_at: new Date().toISOString(), model: selected.model, latency_ms: response.latency_ms, finish_reason: response.finish_reason, usage: { prompt_tokens: response.usage?.prompt_tokens ?? 0, completion_tokens: response.usage?.completion_tokens ?? 0, total_tokens: response.usage?.total_tokens ?? 0, }, raw_content: response.content, findings, grounding, }; // Cache-poisoning guard (caught 2026-04-27 by Opus self-audit): // when parseFindings returns 0 findings (Kimi rambled, prompt too // big, or the markdown shape changed and our regex missed every // block), persisting the empty verdict short-circuits all future // audits in the 24h TTL window with a useless cached "0 findings" // result. Better to leave no cache and re-call upstream next time. // Always append metrics — observability shouldn't depend on whether // findings parsed. await appendMetrics(verdict); if (findings.length > 0) { await persistVerdict(cachePath, verdict); return findings; } return [{ check: "kimi_architect" as CheckKind, severity: "info", summary: `kimi_architect produced 0 ranked findings (${response.finish_reason}, ${verdict.usage.completion_tokens} tokens) — not cached`, evidence: [`raw saved (no cache): see kimi_audits.jsonl ${verdict.cached_at}`], }]; } async function loadCachedVerdict(path: string): Promise { if (!existsSync(path)) return null; try { const s = await stat(path); if (Date.now() - s.mtimeMs > CACHE_TTL_MS) return null; return JSON.parse(await readFile(path, "utf8")) as KimiVerdictFile; } catch { return null; } } function buildPrompt(diff: string, priorFindings: Finding[], ctx: KimiArchitectContext): string { const truncatedDiff = diff.length > MAX_DIFF_CHARS ? diff.slice(0, MAX_DIFF_CHARS) + `\n\n... [truncated; original diff was ${diff.length} chars]` : diff; const priorBlock = priorFindings .filter(f => f.severity !== "info") .slice(0, MAX_PRIOR_FINDINGS) .map(f => `- [${f.check}/${f.severity}] ${f.summary}${f.evidence?.[0] ? ` — ${f.evidence[0].slice(0, 160)}` : ""}`) .join("\n"); return `You are a senior software architect doing a second-pass review on PR #${ctx.pr_number} (head ${ctx.head_sha.slice(0, 12)}). The team's automated auditor (deepseek-v3.1:671b, N=3 consensus) already produced findings. Your job is NOT to repeat what they found — your job is to catch what their voting structure CAN'T see: compile errors, type-system bypasses, false telemetry, silent determinism leaks, schema-bypass anti-patterns, load-bearing assumptions that look fine line-by-line. GROUNDING RULES (non-negotiable): - Cite file:line for EVERY finding. Lines you cite must actually contain what you claim. Confabulating a finding wastes more time than missing one. - If the diff is truncated and you can't verify a claim, say "diff-truncated, can't verify" — DO NOT guess. - Distinguish architectural concerns (no specific line) from concrete bugs (specific line). Don't dress one as the other. PRIOR FINDINGS FROM DEEPSEEK CONSENSUS (do not repeat these): ${priorBlock || "(none)"} OUTPUT FORMAT (markdown): - ## Verdict (one sentence) - ## Findings (5-10 items, each formatted EXACTLY as below) For each finding use this exact shape so a parser can lift them: ### F1: - **Severity:** block | warn | info - **File:** path/to/file.ext:LINE - **Rationale:** one or two sentences THE DIFF: ${truncatedDiff} `; } async function callKimi(prompt: string, provider: string, model: string): Promise<{ content: string; usage: any; finish_reason: string; latency_ms: number }> { const t0 = Date.now(); const body = JSON.stringify({ provider, model, messages: [{ role: "user", content: prompt }], max_tokens: maxTokensFor(model), temperature: 0.2, }); // curl via Bun.spawn — bypasses Bun fetch's ~300s intrinsic ceiling. // -m sets the max transfer time honored end-to-end. Body is piped via // stdin to avoid argv length limits on big audit prompts (~50K+ tokens). const proc = Bun.spawn({ cmd: [ "curl", "-sS", "-X", "POST", "-m", String(Math.ceil(CALL_TIMEOUT_MS / 1000)), "-H", "content-type: application/json", "--data-binary", "@-", `${GATEWAY}/v1/chat`, ], stdin: "pipe", stdout: "pipe", stderr: "pipe", }); proc.stdin.write(body); await proc.stdin.end(); const [stdout, stderr, exitCode] = await Promise.all([ new Response(proc.stdout).text(), new Response(proc.stderr).text(), proc.exited, ]); if (exitCode !== 0) { throw new Error(`curl exit ${exitCode}: ${stderr.slice(0, 300)}`); } let j: any; try { j = JSON.parse(stdout); } catch (e) { throw new Error(`bad response (${stdout.length} bytes): ${stdout.slice(0, 300)}`); } if (j.error || !j.choices) { throw new Error(`gateway error: ${JSON.stringify(j).slice(0, 300)}`); } return { content: j.choices?.[0]?.message?.content ?? "", usage: j.usage ?? {}, finish_reason: j.choices?.[0]?.finish_reason ?? "unknown", latency_ms: Date.now() - t0, }; } // Parse Kimi's markdown into Finding[]. Format expected (per buildPrompt): // ### F: // - **Severity:** block | warn | info // - **File:** path:line // - **Rationale:** ... function parseFindings(content: string): Finding[] { const findings: Finding[] = []; const blocks = content.split(/^###\s+F\d+:\s*/m).slice(1); for (const block of blocks) { const summary = (block.split("\n")[0] ?? "").trim(); if (!summary) continue; const sev = /\*\*Severity:\*\*\s*(block|warn|info)/i.exec(block)?.[1]?.toLowerCase(); const fileLine = /\*\*File:\*\*\s*(\S+)/i.exec(block)?.[1] ?? "unknown"; const rationale = /\*\*Rationale:\*\*\s*([\s\S]+?)(?=\n###|\n\*\*|$)/i.exec(block)?.[1]?.trim() ?? ""; const severity: Finding["severity"] = sev === "block" ? "block" : sev === "warn" ? "warn" : "info"; findings.push({ check: "kimi_architect" as CheckKind, severity, summary: summary.slice(0, 240), evidence: [fileLine, rationale.slice(0, 360)].filter(Boolean), }); } return findings; } // For each finding's cited file:line, grep the actual file to verify // the line exists. Returns total + verified counts; per-finding metadata // is appended into the evidence array so the reader can see which // citations were verified. async function computeGrounding(findings: Finding[]): Promise<{ total: number; verified: number; rate: number }> { // readFile (async) instead of readFileSync — caught 2026-04-27 by // Kimi's self-audit. Sync I/O in an async fn blocks the event loop // for every cited file; doesn't matter at 10 findings, would matter // at 100+. const checks = await Promise.all(findings.map(async (f) => { const cite = f.evidence[0] ?? ""; const m = /^(\S+?):(\d+)/.exec(cite); if (!m) return false; const [, relpath, lineStr] = m; const line = Number(lineStr); if (!line || !relpath) return false; // Path-traversal guard, two-layer (caught 2026-04-27 by Kimi // self-audits on dd77632 then 2d9cb12). // // Layer 1 (lexical): resolve() normalizes `..` segments. Refuse // any path that doesn't anchor under REPO_ROOT. // // Layer 2 (symlink): even if the lexical path is anchored, it // could be a symlink whose target escapes. realpath() resolves // symlinks; compare the real path against REPO_ROOT_REAL. // // Both layers exist because attackers might bypass either alone: // raw `../etc/passwd` triggers layer 1; a planted symlink at // ./safe-looking-name → /etc/passwd triggers layer 2. const abs = resolve(REPO_ROOT, relpath); if (!abs.startsWith(REPO_ROOT + "/") && abs !== REPO_ROOT) { f.evidence.push(`[grounding: path escapes repo root, refusing]`); return false; } if (!existsSync(abs)) { f.evidence.push("[grounding: file not found]"); return false; } try { // Symlink-resolution check before any read. realpath() throws // if the file doesn't exist; existsSync above shields the // common case but a TOCTOU race could still error here — the // outer catch handles it. const realPath = await realpath(abs); if (!realPath.startsWith(REPO_ROOT_REAL + "/") && realPath !== REPO_ROOT_REAL) { f.evidence.push(`[grounding: symlink target escapes repo root, refusing]`); return false; } const lines = (await readFile(realPath, "utf8")).split("\n"); if (line < 1 || line > lines.length) { f.evidence.push(`[grounding: line ${line} > EOF (${lines.length})]`); return false; } f.evidence.push(`[grounding: verified at ${relpath}:${line}]`); return true; } catch (e) { f.evidence.push(`[grounding: read failed: ${(e as Error).message.slice(0, 80)}]`); return false; } })); const verified = checks.filter(Boolean).length; const total = findings.length; return { total, verified, rate: total === 0 ? 0 : verified / total }; } async function persistVerdict(path: string, v: KimiVerdictFile): Promise { await mkdir(KIMI_VERDICTS_DIR, { recursive: true }); await writeFile(path, JSON.stringify(v, null, 2)); } async function appendMetrics(v: KimiVerdictFile): Promise { // dirname() instead of join(path, "..") — caught 2026-04-27 by both // Haiku and Opus self-audits. The "/.." idiom resolves correctly // via Node path normalization but is non-idiomatic + breaks if the // path ever has trailing dots. await mkdir(dirname(KIMI_AUDITS_JSONL), { recursive: true }); await appendFile(KIMI_AUDITS_JSONL, JSON.stringify({ pr_number: v.pr_number, head_sha: v.head_sha, audited_at: v.cached_at, model: v.model, latency_ms: v.latency_ms, finish_reason: v.finish_reason, prompt_tokens: v.usage.prompt_tokens, completion_tokens: v.usage.completion_tokens, findings_total: v.findings.length, findings_block: v.findings.filter(f => f.severity === "block").length, findings_warn: v.findings.filter(f => f.severity === "warn").length, grounding_verified: v.grounding.verified, grounding_rate: Number(v.grounding.rate.toFixed(3)), }) + "\n"); } function skipFinding(why: string): Finding { return { check: "kimi_architect" as CheckKind, severity: "info", summary: `kimi_architect skipped — ${why}`, evidence: [why], }; }