Session infrastructure: OpenRouter + tree-split reducer + observer→LLM Team + scrum_applier #11

Merged
profit merged 118 commits from scrum/auto-apply-19814 into main 2026-04-27 15:55:24 +00:00
Showing only changes of commit 3eaac413e6 - Show all commits

View File

@ -35,12 +35,33 @@ const GATEWAY = process.env.LH_GATEWAY_URL ?? "http://localhost:3100";
const KIMI_VERDICTS_DIR = "/home/profit/lakehouse/data/_auditor/kimi_verdicts";
const KIMI_AUDITS_JSONL = "/home/profit/lakehouse/data/_kb/kimi_audits.jsonl";
const REPO_ROOT = "/home/profit/lakehouse";
const CALL_TIMEOUT_MS = 360_000; // 6min — kimi reasoning + audit prompt
// 15 min budget. Bun's fetch has an intrinsic ~300s limit that our
// AbortController + setTimeout combo could not override; we use curl
// via Bun.spawn instead (callKimi below). Curl honors -m for max
// transfer time without a hard intrinsic ceiling.
const CALL_TIMEOUT_MS = 900_000;
const CACHE_TTL_MS = 24 * 60 * 60 * 1000;
const MAX_DIFF_CHARS = 180_000;
const MAX_PRIOR_FINDINGS = 50;
const KIMI_MODEL = process.env.LH_AUDITOR_KIMI_MODEL ?? "kimi-for-coding";
const MAX_TOKENS = Number(process.env.LH_AUDITOR_KIMI_MAX_TOKENS ?? 12_000);
// Default provider/model = ollama_cloud/kimi-k2.6. Pre-2026-04-27 we
// went direct to api.kimi.com, but Ollama Cloud Pro now exposes the
// same model legitimately, so we route there to avoid User-Agent
// gating. The api.kimi.com path (provider=kimi) remains wired in the
// gateway as a fallback for when Ollama Cloud is upstream-broken.
const KIMI_PROVIDER = process.env.LH_AUDITOR_KIMI_PROVIDER ?? "ollama_cloud";
const KIMI_MODEL = process.env.LH_AUDITOR_KIMI_MODEL ?? "kimi-k2.6";
// 128K — Kimi K2.6 emits reasoning_content that counts against this
// budget but isn't surfaced in the OpenAI-shape `content` field.
// Capping low silently produces empty content with finish_reason=length
// when reasoning consumes the budget. 128K leaves ample room for both
// reasoning and visible findings on any audit prompt we throw at it.
// Override via LH_AUDITOR_KIMI_MAX_TOKENS only if you want to cap cost.
//
// Bug fix 2026-04-27 (caught by Kimi's own self-audit): empty env var
// like LH_AUDITOR_KIMI_MAX_TOKENS="" used to parse via Number("") → 0
// because `??` only catches null/undefined. Use `||` so empty string,
// 0, or NaN all fall back to the default.
const MAX_TOKENS = Number(process.env.LH_AUDITOR_KIMI_MAX_TOKENS) || 128_000;
export interface KimiArchitectContext {
pr_number: number;
@ -166,33 +187,52 @@ ${truncatedDiff}
async function callKimi(prompt: string): Promise<{ content: string; usage: any; finish_reason: string; latency_ms: number }> {
const t0 = Date.now();
const ctrl = new AbortController();
const timer = setTimeout(() => ctrl.abort(), CALL_TIMEOUT_MS);
try {
const r = await fetch(`${GATEWAY}/v1/chat`, {
method: "POST",
headers: { "content-type": "application/json" },
body: JSON.stringify({
provider: "kimi",
model: KIMI_MODEL,
messages: [{ role: "user", content: prompt }],
max_tokens: MAX_TOKENS,
temperature: 0.2,
}),
signal: ctrl.signal,
});
if (!r.ok) {
const body = await r.text();
throw new Error(`/v1/chat ${r.status}: ${body.slice(0, 300)}`);
}
const j: any = await r.json();
return {
content: j.choices?.[0]?.message?.content ?? "",
usage: j.usage ?? {},
finish_reason: j.choices?.[0]?.finish_reason ?? "unknown",
latency_ms: Date.now() - t0,
};
} finally { clearTimeout(timer); }
const body = JSON.stringify({
provider: KIMI_PROVIDER,
model: KIMI_MODEL,
messages: [{ role: "user", content: prompt }],
max_tokens: MAX_TOKENS,
temperature: 0.2,
});
// curl via Bun.spawn — bypasses Bun fetch's ~300s intrinsic ceiling.
// -m sets the max transfer time honored end-to-end. Body is piped via
// stdin to avoid argv length limits on big audit prompts (~50K+ tokens).
const proc = Bun.spawn({
cmd: [
"curl", "-sS", "-X", "POST",
"-m", String(Math.ceil(CALL_TIMEOUT_MS / 1000)),
"-H", "content-type: application/json",
"--data-binary", "@-",
`${GATEWAY}/v1/chat`,
],
stdin: "pipe",
stdout: "pipe",
stderr: "pipe",
});
proc.stdin.write(body);
await proc.stdin.end();
const [stdout, stderr, exitCode] = await Promise.all([
new Response(proc.stdout).text(),
new Response(proc.stderr).text(),
proc.exited,
]);
if (exitCode !== 0) {
throw new Error(`curl exit ${exitCode}: ${stderr.slice(0, 300)}`);
}
let j: any;
try { j = JSON.parse(stdout); }
catch (e) {
throw new Error(`bad response (${stdout.length} bytes): ${stdout.slice(0, 300)}`);
}
if (j.error || !j.choices) {
throw new Error(`gateway error: ${JSON.stringify(j).slice(0, 300)}`);
}
return {
content: j.choices?.[0]?.message?.content ?? "",
usage: j.usage ?? {},
finish_reason: j.choices?.[0]?.finish_reason ?? "unknown",
latency_ms: Date.now() - t0,
};
}
// Parse Kimi's markdown into Finding[]. Format expected (per buildPrompt):