lakehouse/auditor/checks/kimi_architect.ts

// Kimi-architect check — second-pass senior architectural review using
// kimi-for-coding (Kimi K2.6) via /v1/chat provider=kimi.
//
// Runs AFTER the deepseek inference check (N=3 consensus) and the
// static/kb_query checks. Reads their findings as context and asks Kimi
// "what did everyone else miss?" — complementing the cheap-consensus
// voting with a sparse senior pass that catches load-bearing issues
// (compile errors, false telemetry, schema bypasses, etc.) which the
// voting structure can't see.
//
// Why Kimi here and not in the inner inference loop:
// - Cost: ~3min wall-clock per call vs ~30s for deepseek consensus.
// - TOS: api.kimi.com is User-Agent-gated (see crates/gateway/src/v1/
//   kimi.rs); cost-bounded calls only.
// - Value: experiment 2026-04-27 showed 7/7 grounding rate with full
//   files vs ~50% on truncated input. Best as a sparse complement, not
//   a replacement.
//
// Failure-isolated: any Kimi error returns a single info-level Finding
// "kimi_architect skipped — <reason>" so the existing audit pipeline
// is never blocked by a Kimi outage / TOS revocation / 429.
//
// Cost cap: if a kimi_verdicts/<pr>-<sha>.json file exists less than 24h
// old, return cached findings without calling upstream. New commits
// produce new SHAs so this is per-head, not per-day.
//
// Off by default: caller checks LH_AUDITOR_KIMI=1 before invoking.

import { readFile, writeFile, mkdir, appendFile, stat } from "node:fs/promises";
import { existsSync, readFileSync } from "node:fs";
import { join, resolve } from "node:path";
import type { Finding, CheckKind } from "../types.ts";

const GATEWAY = process.env.LH_GATEWAY_URL ?? "http://localhost:3100";
const KIMI_VERDICTS_DIR = "/home/profit/lakehouse/data/_auditor/kimi_verdicts";
const KIMI_AUDITS_JSONL = "/home/profit/lakehouse/data/_kb/kimi_audits.jsonl";
const REPO_ROOT = "/home/profit/lakehouse";
const CALL_TIMEOUT_MS = 360_000; // 6min — kimi reasoning + audit prompt
const CACHE_TTL_MS = 24 * 60 * 60 * 1000;
const MAX_DIFF_CHARS = 180_000;
const MAX_PRIOR_FINDINGS = 50;
const KIMI_MODEL = process.env.LH_AUDITOR_KIMI_MODEL ?? "kimi-for-coding";
const MAX_TOKENS = Number(process.env.LH_AUDITOR_KIMI_MAX_TOKENS ?? 12_000);

export interface KimiArchitectContext {
  pr_number: number;
  head_sha: string;
}

interface KimiVerdictFile {
  pr_number: number;
  head_sha: string;
  cached_at: string;
  model: string;
  latency_ms: number;
  finish_reason: string;
  usage: { prompt_tokens: number; completion_tokens: number; total_tokens: number };
  raw_content: string;
  findings: Finding[];
  grounding: { total: number; verified: number; rate: number };
}

export async function runKimiArchitectCheck(
  diff: string,
  priorFindings: Finding[],
  ctx: KimiArchitectContext,
): Promise<Finding[]> {
  const cachePath = join(KIMI_VERDICTS_DIR, `${ctx.pr_number}-${ctx.head_sha.slice(0, 12)}.json`);

  // Cost cap — return cached findings if a verdict for this exact head
  // SHA was generated within the TTL.
  const cached = await loadCachedVerdict(cachePath);
  if (cached) {
    const fs2: Finding[] = cached.findings.length > 0
      ? cached.findings
      : [{ check: "kimi_architect" as CheckKind, severity: "info", summary: "kimi_architect cached — 0 findings", evidence: [`cache: ${cachePath}`] }];
    return fs2;
  }

  let response: { content: string; usage: any; finish_reason: string; latency_ms: number };
  try {
    response = await callKimi(buildPrompt(diff, priorFindings, ctx));
  } catch (e) {
    return [skipFinding(`kimi call failed: ${(e as Error).message.slice(0, 200)}`)];
  }

  const findings = parseFindings(response.content);
  const grounding = await computeGrounding(findings);

  const verdict: KimiVerdictFile = {
    pr_number: ctx.pr_number,
    head_sha: ctx.head_sha,
    cached_at: new Date().toISOString(),
    model: KIMI_MODEL,
    latency_ms: response.latency_ms,
    finish_reason: response.finish_reason,
    usage: {
      prompt_tokens: response.usage?.prompt_tokens ?? 0,
      completion_tokens: response.usage?.completion_tokens ?? 0,
      total_tokens: response.usage?.total_tokens ?? 0,
    },
    raw_content: response.content,
    findings,
    grounding,
  };

  await persistVerdict(cachePath, verdict);
  await appendMetrics(verdict);

  return findings.length > 0
    ? findings
    : [{
        check: "kimi_architect" as CheckKind,
        severity: "info",
        summary: `kimi_architect produced 0 ranked findings (${response.finish_reason}, ${verdict.usage.completion_tokens} tokens)`,
        evidence: [`raw response: ${cachePath}`],
      }];
}

async function loadCachedVerdict(path: string): Promise<KimiVerdictFile | null> {
  if (!existsSync(path)) return null;
  try {
    const s = await stat(path);
    if (Date.now() - s.mtimeMs > CACHE_TTL_MS) return null;
    return JSON.parse(await readFile(path, "utf8")) as KimiVerdictFile;
  } catch { return null; }
}

function buildPrompt(diff: string, priorFindings: Finding[], ctx: KimiArchitectContext): string {
  const truncatedDiff = diff.length > MAX_DIFF_CHARS
    ? diff.slice(0, MAX_DIFF_CHARS) + `\n\n... [truncated; original diff was ${diff.length} chars]`
    : diff;

  const priorBlock = priorFindings
    .filter(f => f.severity !== "info")
    .slice(0, MAX_PRIOR_FINDINGS)
    .map(f => `- [${f.check}/${f.severity}] ${f.summary}${f.evidence?.[0] ? ` — ${f.evidence[0].slice(0, 160)}` : ""}`)
    .join("\n");

  return `You are a senior software architect doing a second-pass review on PR #${ctx.pr_number} (head ${ctx.head_sha.slice(0, 12)}). The team's automated auditor (deepseek-v3.1:671b, N=3 consensus) already produced findings. Your job is NOT to repeat what they found — your job is to catch what their voting structure CAN'T see: compile errors, type-system bypasses, false telemetry, silent determinism leaks, schema-bypass anti-patterns, load-bearing assumptions that look fine line-by-line.

GROUNDING RULES (non-negotiable):
- Cite file:line for EVERY finding. Lines you cite must actually contain what you claim. Confabulating a finding wastes more time than missing one.
- If the diff is truncated and you can't verify a claim, say "diff-truncated, can't verify" — DO NOT guess.
- Distinguish architectural concerns (no specific line) from concrete bugs (specific line). Don't dress one as the other.

PRIOR FINDINGS FROM DEEPSEEK CONSENSUS (do not repeat these):
${priorBlock || "(none)"}

OUTPUT FORMAT (markdown):
- ## Verdict (one sentence)
- ## Findings (5-10 items, each formatted EXACTLY as below)

For each finding use this exact shape so a parser can lift them:

### F1: <one-line summary>
- **Severity:** block | warn | info
- **File:** path/to/file.ext:LINE
- **Rationale:** one or two sentences

THE DIFF:

${truncatedDiff}
`;
}

async function callKimi(prompt: string): Promise<{ content: string; usage: any; finish_reason: string; latency_ms: number }> {
  const t0 = Date.now();
  const ctrl = new AbortController();
  const timer = setTimeout(() => ctrl.abort(), CALL_TIMEOUT_MS);
  try {
    const r = await fetch(`${GATEWAY}/v1/chat`, {
      method: "POST",
      headers: { "content-type": "application/json" },
      body: JSON.stringify({
        provider: "kimi",
        model: KIMI_MODEL,
        messages: [{ role: "user", content: prompt }],
        max_tokens: MAX_TOKENS,
        temperature: 0.2,
      }),
      signal: ctrl.signal,
    });
    if (!r.ok) {
      const body = await r.text();
      throw new Error(`/v1/chat ${r.status}: ${body.slice(0, 300)}`);
    }
    const j: any = await r.json();
    return {
      content: j.choices?.[0]?.message?.content ?? "",
      usage: j.usage ?? {},
      finish_reason: j.choices?.[0]?.finish_reason ?? "unknown",
      latency_ms: Date.now() - t0,
    };
  } finally { clearTimeout(timer); }
}

// Parse Kimi's markdown into Finding[]. Format expected (per buildPrompt):
//   ### F<N>: <summary>
//   - **Severity:** block | warn | info
//   - **File:** path:line
//   - **Rationale:** ...
function parseFindings(content: string): Finding[] {
  const findings: Finding[] = [];
  const blocks = content.split(/^###\s+F\d+:\s*/m).slice(1);
  for (const block of blocks) {
    const summary = (block.split("\n")[0] ?? "").trim();
    if (!summary) continue;
    const sev = /\*\*Severity:\*\*\s*(block|warn|info)/i.exec(block)?.[1]?.toLowerCase();
    const fileLine = /\*\*File:\*\*\s*(\S+)/i.exec(block)?.[1] ?? "unknown";
    const rationale = /\*\*Rationale:\*\*\s*([\s\S]+?)(?=\n###|\n\*\*|$)/i.exec(block)?.[1]?.trim() ?? "";
    const severity: Finding["severity"] = sev === "block" ? "block" : sev === "warn" ? "warn" : "info";
    findings.push({
      check: "kimi_architect" as CheckKind,
      severity,
      summary: summary.slice(0, 240),
      evidence: [fileLine, rationale.slice(0, 360)].filter(Boolean),
    });
  }
  return findings;
}

// For each finding's cited file:line, grep the actual file to verify
// the line exists. Returns total + verified counts; per-finding metadata
// is appended into the evidence array so the reader can see which
// citations were verified.
async function computeGrounding(findings: Finding[]): Promise<{ total: number; verified: number; rate: number }> {
  let verified = 0;
  for (const f of findings) {
    const cite = f.evidence[0] ?? "";
    const m = /^(\S+?):(\d+)/.exec(cite);
    if (!m) continue;
    const [, relpath, lineStr] = m;
    const line = Number(lineStr);
    if (!line || !relpath) continue;
    const abs = relpath.startsWith("/") ? relpath : resolve(REPO_ROOT, relpath);
    if (!existsSync(abs)) {
      f.evidence.push("[grounding: file not found]");
      continue;
    }
    try {
      const lines = readFileSync(abs, "utf8").split("\n");
      if (line < 1 || line > lines.length) {
        f.evidence.push(`[grounding: line ${line} > EOF (${lines.length})]`);
        continue;
      }
      f.evidence.push(`[grounding: verified at ${relpath}:${line}]`);
      verified++;
    } catch (e) {
      f.evidence.push(`[grounding: read failed: ${(e as Error).message.slice(0, 80)}]`);
    }
  }
  const total = findings.length;
  return { total, verified, rate: total === 0 ? 0 : verified / total };
}

async function persistVerdict(path: string, v: KimiVerdictFile): Promise<void> {
  await mkdir(KIMI_VERDICTS_DIR, { recursive: true });
  await writeFile(path, JSON.stringify(v, null, 2));
}

async function appendMetrics(v: KimiVerdictFile): Promise<void> {
  await mkdir(join(KIMI_AUDITS_JSONL, ".."), { recursive: true });
  await appendFile(KIMI_AUDITS_JSONL, JSON.stringify({
    pr_number: v.pr_number,
    head_sha: v.head_sha,
    audited_at: v.cached_at,
    model: v.model,
    latency_ms: v.latency_ms,
    finish_reason: v.finish_reason,
    prompt_tokens: v.usage.prompt_tokens,
    completion_tokens: v.usage.completion_tokens,
    findings_total: v.findings.length,
    findings_block: v.findings.filter(f => f.severity === "block").length,
    findings_warn: v.findings.filter(f => f.severity === "warn").length,
    grounding_verified: v.grounding.verified,
    grounding_rate: Number(v.grounding.rate.toFixed(3)),
  }) + "\n");
}

function skipFinding(why: string): Finding {
  return {
    check: "kimi_architect" as CheckKind,
    severity: "info",
    summary: `kimi_architect skipped — ${why}`,
    evidence: [why],
  };
}