From bfe1ea9d1c9f84031d967cfe3a83f0488f386f50 Mon Sep 17 00:00:00 2001 From: root Date: Mon, 27 Apr 2026 07:26:31 -0500 Subject: [PATCH] =?UTF-8?q?auditor:=20alternate=20Kimi=20K2.6=20=E2=86=94?= =?UTF-8?q?=20Haiku=204.5,=20drop=20Opus=20from=20auto-promotion?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Operator can't sustain Opus's ~$0.30/audit on the daemon. New strategy: - Even-numbered audits per PR use kimi-k2.6 via ollama_cloud (effectively free under the Ollama Pro flat subscription) - Odd-numbered audits use claude-haiku-4-5 via opencode/Zen (~$0.04/audit) - Frontier models (Opus, GPT-5.5-pro, Gemini 3.1-pro) are NOT in auto-promotion. Operator hands distilled findings to a frontier model manually when a load-bearing decision needs it. Mirrors the lakehouse playbook-memory pattern: cheap models do the volume, the validated subset compounds, only the compounded bundle gets handed to a frontier model. Same logic at the auditor layer. Audit-index derivation: count of existing kimi_verdicts files for the PR. So if the dir has 4 verdicts for PR #11 already, the 5th audit is index 4 (even) → Kimi, the 6th is index 5 (odd) → Haiku. Across an active PR's lifetime the audits naturally interleave the two lineages. Cost projection at observed cadence (5-10 pushes/day): - Old (Haiku default + Opus auto on big diffs): $1-3/day - New (Kimi/Haiku alternating, no Opus): $0.10-0.40/day - $31.68 budget lasts: ~3 months instead of ~10 days Override knobs: LH_AUDITOR_KIMI_MODEL= pins to model X (no alternation) LH_AUDITOR_KIMI_PROVIDER=

provider for default model LH_AUDITOR_KIMI_ALT_MODEL= sets the odd-index alternate LH_AUDITOR_KIMI_ALT_PROVIDER=

provider for alternate The OPUS_THRESHOLD env knobs from the prior auto-promotion commit are now no-ops (unset, no longer referenced). Verification: bun build auditor/checks/kimi_architect.ts compiles systemctl restart lakehouse-auditor active systemctl show env Haiku pin removed, Kimi default + cap=3 set Co-Authored-By: Claude Opus 4.7 (1M context) --- auditor/checks/kimi_architect.ts | 59 ++++++++++++++++++++++++-------- 1 file changed, 44 insertions(+), 15 deletions(-) diff --git a/auditor/checks/kimi_architect.ts b/auditor/checks/kimi_architect.ts index be08c1f..c5562fa 100644 --- a/auditor/checks/kimi_architect.ts +++ b/auditor/checks/kimi_architect.ts @@ -50,22 +50,38 @@ const MAX_PRIOR_FINDINGS = 50; // gateway as a fallback for when Ollama Cloud is upstream-broken. const KIMI_PROVIDER = process.env.LH_AUDITOR_KIMI_PROVIDER ?? "ollama_cloud"; const KIMI_MODEL = process.env.LH_AUDITOR_KIMI_MODEL ?? "kimi-k2.6"; -// Big-diff promotion: when the diff exceeds OPUS_THRESHOLD_CHARS, swap -// to OPUS_MODEL for that audit. 2026-04-27 3-way bake-off (Kimi vs -// Haiku vs Opus on a 32K diff) showed Opus is the only model that -// catches cross-file ramifications + escalates `block` severity on -// real architectural risks. ~5x the spend per audit, only worth it -// when the diff is big enough to have those risks. +// Cross-lineage alternation. 2026-04-27 J's call: Opus is too +// expensive to auto-fire (~$0.30/audit). Kimi K2.6 via Go-sub is +// effectively free; Haiku 4.5 via Zen is ~$0.04. Alternate between +// them so we get cross-lineage signal (Moonshot vs Anthropic) on +// every PR's audit history without burning the budget. // -// Defaults: Haiku for normal diffs (fast, cheap, ~$0.02), Opus for -// > 100k chars. Disable promotion: set OPUS_THRESHOLD_CHARS very high. -const OPUS_MODEL = process.env.LH_AUDITOR_KIMI_OPUS_MODEL ?? "claude-opus-4-7"; -const OPUS_PROVIDER = process.env.LH_AUDITOR_KIMI_OPUS_PROVIDER ?? "opencode"; -const OPUS_THRESHOLD_CHARS = Number(process.env.LH_AUDITOR_KIMI_OPUS_THRESHOLD_CHARS) || 100_000; +// Default: Kimi K2.6 on even audits, Haiku 4.5 on odd. Each PR's +// audits flip between vendors as new SHAs come in. +// +// Frontier models (Opus 4.7, GPT-5.5, Gemini 3.1) are NOT in the +// auto path. Operator hands distilled findings to a frontier model +// manually when high-leverage decisions need it. Removing Opus from +// auto-promotion saves ~$1-3/day on the daemon at our cadence. +// +// Override the alternation entirely with LH_AUDITOR_KIMI_MODEL +// (forces one model regardless of audit count); set +// LH_AUDITOR_KIMI_ALT_MODEL to the alternate. +const ALT_MODEL = process.env.LH_AUDITOR_KIMI_ALT_MODEL ?? "claude-haiku-4-5"; +const ALT_PROVIDER = process.env.LH_AUDITOR_KIMI_ALT_PROVIDER ?? "opencode"; +const FORCE_DEFAULT = process.env.LH_AUDITOR_KIMI_MODEL !== undefined && process.env.LH_AUDITOR_KIMI_MODEL !== ""; -function selectModel(diffLen: number): { provider: string; model: string; promoted: boolean } { - if (diffLen > OPUS_THRESHOLD_CHARS) { - return { provider: OPUS_PROVIDER, model: OPUS_MODEL, promoted: true }; +function selectModel(diffLen: number, auditIndex: number = 0): { provider: string; model: string; promoted: boolean } { + // Operator override — env-pinned model wins. + if (FORCE_DEFAULT) { + return { provider: KIMI_PROVIDER, model: KIMI_MODEL, promoted: false }; + } + // Alternate Kimi (default, even index) ↔ Haiku (alt, odd index). + // diffLen kept in the signature for future "big diff → Haiku + // anyway" logic; not used yet so we don't auto-burn on big PRs. + void diffLen; + if (auditIndex % 2 === 1) { + return { provider: ALT_PROVIDER, model: ALT_MODEL, promoted: true }; } return { provider: KIMI_PROVIDER, model: KIMI_MODEL, promoted: false }; } @@ -141,7 +157,20 @@ export async function runKimiArchitectCheck( : [{ check: "kimi_architect" as CheckKind, severity: "info", summary: "kimi_architect cached — 0 findings", evidence: [`cache: ${cachePath}`] }]; } - const selected = selectModel(diff.length); + // Alternate model based on how many audits this PR has had — gives + // cross-lineage signal (Kimi/Moonshot ↔ Haiku/Anthropic) on every + // PR's audit history. Count is derived from existing kimi_verdicts + // files for this PR; cheap O(N_PRs) directory read. + let auditIndex = 0; + try { + const dir = "/home/profit/lakehouse/data/_auditor/kimi_verdicts"; + if (existsSync(dir)) { + const all = require("node:fs").readdirSync(dir) as string[]; + auditIndex = all.filter((f) => f.startsWith(`${ctx.pr_number}-`)).length; + } + } catch { /* default 0 — Kimi */ } + + const selected = selectModel(diff.length, auditIndex); let response: { content: string; usage: any; finish_reason: string; latency_ms: number }; try { response = await callKimi(buildPrompt(diff, priorFindings, ctx), selected.provider, selected.model);