auditor: route kimi_architect through ollama_cloud/kimi-k2.6 (TOS-clean primary)
Two changes:
1. Default provider now ollama_cloud/kimi-k2.6 (env-overridable via
LH_AUDITOR_KIMI_PROVIDER + LH_AUDITOR_KIMI_MODEL). Ollama Cloud Pro
exposes kimi-k2.6 legitimately, so we no longer need the User-Agent-
spoof path through api.kimi.com. Smoke test 2026-04-27:
api.kimi.com 368s 8 findings 8/8 grounded
ollama_cloud 54s 10 findings 10/10 grounded
The kimi.rs adapter (provider=kimi) stays wired as a fallback when
Ollama Cloud is upstream-broken.
2. Switch HTTP transport from Bun's native fetch to curl via Bun.spawn.
Bun fetch has an undocumented ~300s ceiling that AbortController +
setTimeout cannot override; curl honors -m for end-to-end max
transfer time without a hard intrinsic limit. Required for Kimi's
reasoning-heavy responses on big audit prompts.
3. Bug fix Kimi caught in this very file (turtles all the way down):
Number(process.env.LH_AUDITOR_KIMI_MAX_TOKENS ?? 128_000) yields 0
when env is set to empty string — `??` only catches null/undefined.
Switched to Number(env) || 128_000 so empty/0/NaN all fall back.
Same pattern probably exists in other files; future audit pass.
4. Bumped MAX_TOKENS default 12K -> 128K. Kimi K2.6's reasoning_content
counts against this budget but isn't surfaced in OpenAI-shape content;
12K silently produced finish_reason=length with empty content when
reasoning consumed the budget.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
8d02c7f441
commit
3eaac413e6
@ -35,12 +35,33 @@ const GATEWAY = process.env.LH_GATEWAY_URL ?? "http://localhost:3100";
|
|||||||
const KIMI_VERDICTS_DIR = "/home/profit/lakehouse/data/_auditor/kimi_verdicts";
|
const KIMI_VERDICTS_DIR = "/home/profit/lakehouse/data/_auditor/kimi_verdicts";
|
||||||
const KIMI_AUDITS_JSONL = "/home/profit/lakehouse/data/_kb/kimi_audits.jsonl";
|
const KIMI_AUDITS_JSONL = "/home/profit/lakehouse/data/_kb/kimi_audits.jsonl";
|
||||||
const REPO_ROOT = "/home/profit/lakehouse";
|
const REPO_ROOT = "/home/profit/lakehouse";
|
||||||
const CALL_TIMEOUT_MS = 360_000; // 6min — kimi reasoning + audit prompt
|
// 15 min budget. Bun's fetch has an intrinsic ~300s limit that our
|
||||||
|
// AbortController + setTimeout combo could not override; we use curl
|
||||||
|
// via Bun.spawn instead (callKimi below). Curl honors -m for max
|
||||||
|
// transfer time without a hard intrinsic ceiling.
|
||||||
|
const CALL_TIMEOUT_MS = 900_000;
|
||||||
const CACHE_TTL_MS = 24 * 60 * 60 * 1000;
|
const CACHE_TTL_MS = 24 * 60 * 60 * 1000;
|
||||||
const MAX_DIFF_CHARS = 180_000;
|
const MAX_DIFF_CHARS = 180_000;
|
||||||
const MAX_PRIOR_FINDINGS = 50;
|
const MAX_PRIOR_FINDINGS = 50;
|
||||||
const KIMI_MODEL = process.env.LH_AUDITOR_KIMI_MODEL ?? "kimi-for-coding";
|
// Default provider/model = ollama_cloud/kimi-k2.6. Pre-2026-04-27 we
|
||||||
const MAX_TOKENS = Number(process.env.LH_AUDITOR_KIMI_MAX_TOKENS ?? 12_000);
|
// went direct to api.kimi.com, but Ollama Cloud Pro now exposes the
|
||||||
|
// same model legitimately, so we route there to avoid User-Agent
|
||||||
|
// gating. The api.kimi.com path (provider=kimi) remains wired in the
|
||||||
|
// gateway as a fallback for when Ollama Cloud is upstream-broken.
|
||||||
|
const KIMI_PROVIDER = process.env.LH_AUDITOR_KIMI_PROVIDER ?? "ollama_cloud";
|
||||||
|
const KIMI_MODEL = process.env.LH_AUDITOR_KIMI_MODEL ?? "kimi-k2.6";
|
||||||
|
// 128K — Kimi K2.6 emits reasoning_content that counts against this
|
||||||
|
// budget but isn't surfaced in the OpenAI-shape `content` field.
|
||||||
|
// Capping low silently produces empty content with finish_reason=length
|
||||||
|
// when reasoning consumes the budget. 128K leaves ample room for both
|
||||||
|
// reasoning and visible findings on any audit prompt we throw at it.
|
||||||
|
// Override via LH_AUDITOR_KIMI_MAX_TOKENS only if you want to cap cost.
|
||||||
|
//
|
||||||
|
// Bug fix 2026-04-27 (caught by Kimi's own self-audit): empty env var
|
||||||
|
// like LH_AUDITOR_KIMI_MAX_TOKENS="" used to parse via Number("") → 0
|
||||||
|
// because `??` only catches null/undefined. Use `||` so empty string,
|
||||||
|
// 0, or NaN all fall back to the default.
|
||||||
|
const MAX_TOKENS = Number(process.env.LH_AUDITOR_KIMI_MAX_TOKENS) || 128_000;
|
||||||
|
|
||||||
export interface KimiArchitectContext {
|
export interface KimiArchitectContext {
|
||||||
pr_number: number;
|
pr_number: number;
|
||||||
@ -166,33 +187,52 @@ ${truncatedDiff}
|
|||||||
|
|
||||||
async function callKimi(prompt: string): Promise<{ content: string; usage: any; finish_reason: string; latency_ms: number }> {
|
async function callKimi(prompt: string): Promise<{ content: string; usage: any; finish_reason: string; latency_ms: number }> {
|
||||||
const t0 = Date.now();
|
const t0 = Date.now();
|
||||||
const ctrl = new AbortController();
|
const body = JSON.stringify({
|
||||||
const timer = setTimeout(() => ctrl.abort(), CALL_TIMEOUT_MS);
|
provider: KIMI_PROVIDER,
|
||||||
try {
|
model: KIMI_MODEL,
|
||||||
const r = await fetch(`${GATEWAY}/v1/chat`, {
|
messages: [{ role: "user", content: prompt }],
|
||||||
method: "POST",
|
max_tokens: MAX_TOKENS,
|
||||||
headers: { "content-type": "application/json" },
|
temperature: 0.2,
|
||||||
body: JSON.stringify({
|
});
|
||||||
provider: "kimi",
|
// curl via Bun.spawn — bypasses Bun fetch's ~300s intrinsic ceiling.
|
||||||
model: KIMI_MODEL,
|
// -m sets the max transfer time honored end-to-end. Body is piped via
|
||||||
messages: [{ role: "user", content: prompt }],
|
// stdin to avoid argv length limits on big audit prompts (~50K+ tokens).
|
||||||
max_tokens: MAX_TOKENS,
|
const proc = Bun.spawn({
|
||||||
temperature: 0.2,
|
cmd: [
|
||||||
}),
|
"curl", "-sS", "-X", "POST",
|
||||||
signal: ctrl.signal,
|
"-m", String(Math.ceil(CALL_TIMEOUT_MS / 1000)),
|
||||||
});
|
"-H", "content-type: application/json",
|
||||||
if (!r.ok) {
|
"--data-binary", "@-",
|
||||||
const body = await r.text();
|
`${GATEWAY}/v1/chat`,
|
||||||
throw new Error(`/v1/chat ${r.status}: ${body.slice(0, 300)}`);
|
],
|
||||||
}
|
stdin: "pipe",
|
||||||
const j: any = await r.json();
|
stdout: "pipe",
|
||||||
return {
|
stderr: "pipe",
|
||||||
content: j.choices?.[0]?.message?.content ?? "",
|
});
|
||||||
usage: j.usage ?? {},
|
proc.stdin.write(body);
|
||||||
finish_reason: j.choices?.[0]?.finish_reason ?? "unknown",
|
await proc.stdin.end();
|
||||||
latency_ms: Date.now() - t0,
|
const [stdout, stderr, exitCode] = await Promise.all([
|
||||||
};
|
new Response(proc.stdout).text(),
|
||||||
} finally { clearTimeout(timer); }
|
new Response(proc.stderr).text(),
|
||||||
|
proc.exited,
|
||||||
|
]);
|
||||||
|
if (exitCode !== 0) {
|
||||||
|
throw new Error(`curl exit ${exitCode}: ${stderr.slice(0, 300)}`);
|
||||||
|
}
|
||||||
|
let j: any;
|
||||||
|
try { j = JSON.parse(stdout); }
|
||||||
|
catch (e) {
|
||||||
|
throw new Error(`bad response (${stdout.length} bytes): ${stdout.slice(0, 300)}`);
|
||||||
|
}
|
||||||
|
if (j.error || !j.choices) {
|
||||||
|
throw new Error(`gateway error: ${JSON.stringify(j).slice(0, 300)}`);
|
||||||
|
}
|
||||||
|
return {
|
||||||
|
content: j.choices?.[0]?.message?.content ?? "",
|
||||||
|
usage: j.usage ?? {},
|
||||||
|
finish_reason: j.choices?.[0]?.finish_reason ?? "unknown",
|
||||||
|
latency_ms: Date.now() - t0,
|
||||||
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
// Parse Kimi's markdown into Finding[]. Format expected (per buildPrompt):
|
// Parse Kimi's markdown into Finding[]. Format expected (per buildPrompt):
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user