diff --git a/auditor/checks/inference.ts b/auditor/checks/inference.ts index 66e8c85..6a59899 100644 --- a/auditor/checks/inference.ts +++ b/auditor/checks/inference.ts @@ -169,12 +169,16 @@ export async function runInferenceCheck( interface Votes { trues: number; falses: number; evidences: string[] } const votesByClaim = new Map(); const unflaggedByRun: any[][] = []; - let totalLatencyMs = 0; + // The N=3 consensus calls run via Promise.all — wall-clock is + // bounded by the SLOWEST call, not the sum. Pre-2026-04-27 we + // summed and reported "Xms total" which double/triple-counted + // (Opus self-audit caught it). Use max for accurate wall-clock. + let maxLatencyMs = 0; let totalEnrichedChars = 0; let bugFingerprintsSeen = 0; let matrixKeptSeen = 0; for (const run of parsedRuns) { - totalLatencyMs += run.latency_ms ?? 0; + maxLatencyMs = Math.max(maxLatencyMs, run.latency_ms ?? 0); totalEnrichedChars += run.enriched_chars ?? 0; bugFingerprintsSeen = Math.max(bugFingerprintsSeen, run.bug_fingerprints ?? 0); matrixKeptSeen = Math.max(matrixKeptSeen, run.matrix_kept ?? 0); @@ -199,7 +203,7 @@ export async function runInferenceCheck( findings.push({ check: "inference", severity: "info", - summary: `pr_audit mode runner completed (model=${MODEL}, consensus=${parsedRuns.length}/${N_CONSENSUS}, ${totalLatencyMs}ms total)${curationNote}`, + summary: `pr_audit mode runner completed (model=${MODEL}, consensus=${parsedRuns.length}/${N_CONSENSUS}, ${maxLatencyMs}ms wall-clock)${curationNote}`, evidence: [ `claims voted: ${votesByClaim.size}`, `parsed runs: ${parsedRuns.length} / ${N_CONSENSUS}`, diff --git a/auditor/checks/kimi_architect.ts b/auditor/checks/kimi_architect.ts index f31ead1..3d06ad6 100644 --- a/auditor/checks/kimi_architect.ts +++ b/auditor/checks/kimi_architect.ts @@ -28,7 +28,7 @@ import { readFile, writeFile, mkdir, appendFile, stat } from "node:fs/promises"; import { existsSync } from "node:fs"; -import { join, resolve } from "node:path"; +import { dirname, join, resolve } from "node:path"; import type { Finding, CheckKind } from "../types.ts"; const GATEWAY = process.env.LH_GATEWAY_URL ?? "http://localhost:3100"; @@ -106,15 +106,29 @@ export async function runKimiArchitectCheck( ctx: KimiArchitectContext, ): Promise { const cachePath = join(KIMI_VERDICTS_DIR, `${ctx.pr_number}-${ctx.head_sha.slice(0, 12)}.json`); + const outageSentinel = `${cachePath}.outage`; + const OUTAGE_TTL_MS = 10 * 60 * 1000; + + // Outage negative-cache — if upstream failed within OUTAGE_TTL_MS, + // skip this audit and return immediately. Prevents the daemon from + // hammering a downed Kimi/Anthropic upstream every 90s. + if (existsSync(outageSentinel)) { + try { + const s = await stat(outageSentinel); + if (Date.now() - s.mtimeMs < OUTAGE_TTL_MS) { + const note = JSON.parse(await readFile(outageSentinel, "utf8")); + return [skipFinding(`upstream still down (cached ${Math.round((Date.now() - s.mtimeMs) / 1000)}s ago): ${String(note.reason).slice(0, 160)}`)]; + } + } catch { /* malformed sentinel — fall through to fresh call */ } + } // Cost cap — return cached findings if a verdict for this exact head // SHA was generated within the TTL. const cached = await loadCachedVerdict(cachePath); if (cached) { - const fs2: Finding[] = cached.findings.length > 0 + return cached.findings.length > 0 ? cached.findings : [{ check: "kimi_architect" as CheckKind, severity: "info", summary: "kimi_architect cached — 0 findings", evidence: [`cache: ${cachePath}`] }]; - return fs2; } const selected = selectModel(diff.length); @@ -122,6 +136,13 @@ export async function runKimiArchitectCheck( try { response = await callKimi(buildPrompt(diff, priorFindings, ctx), selected.provider, selected.model); } catch (e) { + // Negative-cache for 10 min on outage (caught 2026-04-27 by Opus + // self-audit): without this, every audit cycle within the 24h + // TTL re-calls upstream while it's still down. Use a sentinel + // file with mtime check rather than persisting a verdict so the + // happy-path cache reader doesn't have to special-case it. + const sentinel = `${cachePath}.outage`; + try { await writeFile(sentinel, JSON.stringify({ at: new Date().toISOString(), reason: (e as Error).message.slice(0, 200) })); } catch {} return [skipFinding(`kimi call failed (${selected.model}): ${(e as Error).message.slice(0, 200)}`)]; } @@ -145,17 +166,25 @@ export async function runKimiArchitectCheck( grounding, }; - await persistVerdict(cachePath, verdict); + // Cache-poisoning guard (caught 2026-04-27 by Opus self-audit): + // when parseFindings returns 0 findings (Kimi rambled, prompt too + // big, or the markdown shape changed and our regex missed every + // block), persisting the empty verdict short-circuits all future + // audits in the 24h TTL window with a useless cached "0 findings" + // result. Better to leave no cache and re-call upstream next time. + // Always append metrics — observability shouldn't depend on whether + // findings parsed. await appendMetrics(verdict); - - return findings.length > 0 - ? findings - : [{ - check: "kimi_architect" as CheckKind, - severity: "info", - summary: `kimi_architect produced 0 ranked findings (${response.finish_reason}, ${verdict.usage.completion_tokens} tokens)`, - evidence: [`raw response: ${cachePath}`], - }]; + if (findings.length > 0) { + await persistVerdict(cachePath, verdict); + return findings; + } + return [{ + check: "kimi_architect" as CheckKind, + severity: "info", + summary: `kimi_architect produced 0 ranked findings (${response.finish_reason}, ${verdict.usage.completion_tokens} tokens) — not cached`, + evidence: [`raw saved (no cache): see kimi_audits.jsonl ${verdict.cached_at}`], + }]; } async function loadCachedVerdict(path: string): Promise { @@ -325,7 +354,11 @@ async function persistVerdict(path: string, v: KimiVerdictFile): Promise { } async function appendMetrics(v: KimiVerdictFile): Promise { - await mkdir(join(KIMI_AUDITS_JSONL, ".."), { recursive: true }); + // dirname() instead of join(path, "..") — caught 2026-04-27 by both + // Haiku and Opus self-audits. The "/.." idiom resolves correctly + // via Node path normalization but is non-idiomatic + breaks if the + // path ever has trailing dots. + await mkdir(dirname(KIMI_AUDITS_JSONL), { recursive: true }); await appendFile(KIMI_AUDITS_JSONL, JSON.stringify({ pr_number: v.pr_number, head_sha: v.head_sha, diff --git a/crates/aibridge/src/continuation.rs b/crates/aibridge/src/continuation.rs index a0189da..7ad2859 100644 --- a/crates/aibridge/src/continuation.rs +++ b/crates/aibridge/src/continuation.rs @@ -195,8 +195,11 @@ pub async fn generate_continuable( let req = make_request(opts, prompt.to_string(), current_max); let resp = generator.generate_text(req).await?; calls += 1; - prompt_tokens = prompt_tokens.saturating_add(resp.tokens_evaluated.unwrap_or(0) as u32); - completion_tokens = completion_tokens.saturating_add(resp.tokens_generated.unwrap_or(0) as u32); + // u32::try_from saturates at u32::MAX instead of silently + // truncating bits when tokens_evaluated/_generated comes back + // as a u64 > 4 billion. Caught 2026-04-27 by Opus self-audit. + prompt_tokens = prompt_tokens.saturating_add(u32::try_from(resp.tokens_evaluated.unwrap_or(0)).unwrap_or(u32::MAX)); + completion_tokens = completion_tokens.saturating_add(u32::try_from(resp.tokens_generated.unwrap_or(0)).unwrap_or(u32::MAX)); if !resp.text.trim().is_empty() { combined = resp.text; break; @@ -227,8 +230,8 @@ pub async fn generate_continuable( let req = make_request(opts, cont_prompt, current_max.min(opts.budget_cap)); let resp = generator.generate_text(req).await?; calls += 1; - prompt_tokens = prompt_tokens.saturating_add(resp.tokens_evaluated.unwrap_or(0) as u32); - completion_tokens = completion_tokens.saturating_add(resp.tokens_generated.unwrap_or(0) as u32); + prompt_tokens = prompt_tokens.saturating_add(u32::try_from(resp.tokens_evaluated.unwrap_or(0)).unwrap_or(u32::MAX)); + completion_tokens = completion_tokens.saturating_add(u32::try_from(resp.tokens_generated.unwrap_or(0)).unwrap_or(u32::MAX)); combined.push_str(&resp.text); continuations += 1; }