lakehouse/auditor/audit.ts
root 8d02c7f441 auditor: integrate Kimi second-pass review (off by default, LH_AUDITOR_KIMI=1)
Adds kimi_architect as a fifth check kind in the auditor. Runs
sequentially after static/dynamic/inference/kb_query, consumes their
findings as context, and asks Kimi For Coding "what did everyone
miss?" — targeting load-bearing issues that deepseek N=3 voting can't
see (compile errors, false telemetry, schema bypasses, determinism
leaks). 7/7 grounded on the distillation v1.0.0 audit experiment
2026-04-27.

Off by default. Enable on the lakehouse-auditor service:
  systemctl edit lakehouse-auditor.service
  Environment=LH_AUDITOR_KIMI=1

Tunable env (all optional):
  LH_AUDITOR_KIMI_MODEL       default kimi-for-coding
  LH_AUDITOR_KIMI_MAX_TOKENS  default 12000
  LH_GATEWAY_URL              default http://localhost:3100

Guardrails:
- Failure-isolated. Any Kimi error / 429 / TOS revocation returns a
  single info-level skip-finding so the existing pipeline never blocks
  on a Kimi outage.
- Cost-bounded. Cached verdicts at data/_auditor/kimi_verdicts/<pr>-
  <sha>.json with 24h TTL — re-audits within the window return cached
  findings instead of re-calling upstream. New commits produce new
  SHAs so caching is per-head, not per-day.
- 6min upstream timeout (vs 2min for openrouter inference) — Kimi is
  a reasoning model and the audit prompt is large.
- Grounding verification baked in. Every finding's cited file:line is
  greppped against the actual file before the verdict is persisted.
  Per-finding evidence carries [grounding: verified at FILE:LINE] or
  [grounding: line N > EOF] / [grounding: file not found]. Confab-
  ulation rate goes into data/_kb/kimi_audits.jsonl as grounding_rate
  for "is this still valuable" tracking.

Persisted artifacts:
  data/_auditor/kimi_verdicts/<pr>-<sha>.json   full verdict + raw
                                                Kimi response + grounding
  data/_kb/kimi_audits.jsonl                    one row per call:
                                                latency, tokens, findings,
                                                grounding rate

Verdict-rendering: kimi_architect now appears in the per-check
sections of the human-readable comment posted to PRs (auditor/audit.ts
checkOrder), after kb_query.

Verification:
  bun build auditor/checks/kimi_architect.ts   compiles
  bun build auditor/audit.ts                   compiles
  parser sanity (3-finding fixture)            3/3 lifted correctly

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-27 05:39:51 -05:00

247 lines
10 KiB
TypeScript
Raw Blame History

This file contains invisible Unicode characters

This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

// Orchestrator — runs all four checks on a PR, assembles a verdict,
// posts to Gitea. This is task #8's integration layer; the poller
// (task #9) calls this once per PR on every fresh head SHA.
//
// Hard-block mechanism: commit status posted with state="failure"
// and context="lakehouse/auditor". If `main` branch protection
// requires that context to pass, merge is physically impossible
// until the auditor re-audits a fixed commit and flips the status
// to "success".
//
// Human-readable reasoning: posted as a PR issue comment (not a
// review — reviews have self-review restrictions on Gitea and the
// auditor currently uses the same PAT as the PR author).
import { readFile, writeFile, mkdir, appendFile } from "node:fs/promises";
import { createHash } from "node:crypto";
import { join } from "node:path";
import type { PrSnapshot, Verdict, Finding } from "./types.ts";
import { getPrDiff, postCommitStatus, postIssueComment } from "./gitea.ts";
import { parseClaims } from "./claim_parser.ts";
import { assembleVerdict } from "./policy.ts";
import { runStaticCheck } from "./checks/static.ts";
import { runDynamicCheck } from "./checks/dynamic.ts";
import { runInferenceCheck } from "./checks/inference.ts";
import { runKbCheck } from "./checks/kb_query.ts";
import { runKimiArchitectCheck } from "./checks/kimi_architect.ts";
const VERDICTS_DIR = "/home/profit/lakehouse/data/_auditor/verdicts";
// Playbook for audit findings — one row per block/warn finding from a
// verdict. kb_query tails this next audit and escalates recurrences.
// Structured as JSONL so it's cheap to append and cheap to tail.
const AUDIT_LESSONS_JSONL = "/home/profit/lakehouse/data/_kb/audit_lessons.jsonl";
export interface AuditOptions {
// Skip the cloud inference call (fast path for iteration). Default false.
skip_inference?: boolean;
// Skip the dynamic check (avoid running the hybrid fixture every PR,
// since it hits live services and mutates playbook state). Default false
// on `main`-branch-target PRs, true when auditing feature branches
// where the fixture would pollute state. Caller decides.
skip_dynamic?: boolean;
// Skip Gitea posting — useful for dry-runs / local testing.
// Default false.
dry_run?: boolean;
}
export async function auditPr(pr: PrSnapshot, opts: AuditOptions = {}): Promise<Verdict> {
const t0 = Date.now();
const diff = await getPrDiff(pr.number);
const { claims } = parseClaims(pr);
// Run checks in parallel where they don't share mutable state.
// Static + kb_query + inference are all read-only. Dynamic mutates
// playbook state (nonce-scoped per run, but still live) so if
// skip_dynamic is false we still run it in parallel — the mutation
// is namespaced.
const [staticFindings, dynamicFindings, inferenceFindings, kbFindings] = await Promise.all([
runStaticCheck(diff),
opts.skip_dynamic ? Promise.resolve(stubFinding("dynamic", "skipped by options")) : runDynamicCheck(),
opts.skip_inference ? Promise.resolve(stubFinding("inference", "skipped by options")) : runInferenceCheck(claims, diff, { pr_number: pr.number, head_sha: pr.head_sha }),
runKbCheck(claims, pr.files.map(f => f.path)),
]);
const allFindings: Finding[] = [
...staticFindings,
...dynamicFindings,
...inferenceFindings,
...kbFindings,
];
// Kimi-architect second-pass review. Off by default; enabled with
// LH_AUDITOR_KIMI=1. Sequential (not in the parallel block above)
// because it consumes the prior findings as context — Kimi sees what
// deepseek already flagged and is asked "what did everyone miss?"
// Failure-isolated by design: any error returns a single info-level
// skip finding so the existing audit pipeline never blocks on Kimi.
if (process.env.LH_AUDITOR_KIMI === "1") {
try {
const kimiFindings = await runKimiArchitectCheck(diff, allFindings, {
pr_number: pr.number,
head_sha: pr.head_sha,
});
allFindings.push(...kimiFindings);
} catch (e) {
allFindings.push({
check: "kimi_architect",
severity: "info",
summary: `kimi_architect outer error — ${(e as Error).message.slice(0, 160)}`,
evidence: [(e as Error).stack?.slice(0, 360) ?? ""],
});
}
}
const duration_ms = Date.now() - t0;
const metrics = {
audit_duration_ms: duration_ms,
findings_total: allFindings.length,
findings_block: allFindings.filter(f => f.severity === "block").length,
findings_warn: allFindings.filter(f => f.severity === "warn").length,
findings_info: allFindings.filter(f => f.severity === "info").length,
claims_strong: claims.filter(c => c.strength === "strong").length,
claims_moderate: claims.filter(c => c.strength === "moderate").length,
claims_weak: claims.filter(c => c.strength === "weak").length,
claims_empirical: claims.filter(c => c.strength === "empirical").length,
claims_total: claims.length,
diff_bytes: diff.length,
};
const verdict = assembleVerdict(allFindings, metrics, pr.number, pr.head_sha);
await persistVerdict(verdict);
// Feedback loop — every block/warn finding becomes a row in
// audit_lessons.jsonl, dedup-keyed by (check, normalized-summary).
// The next audit's kb_query reads these and escalates recurring
// findings so we don't lose the "this pattern has been flagged
// before" signal across runs. Fire-and-forget; failure here must
// not break the audit.
appendAuditLessons(verdict).catch(e =>
console.error(`[audit] audit_lessons append failed: ${(e as Error).message}`));
if (!opts.dry_run) {
await postToGitea(verdict);
}
return verdict;
}
// Normalizes a finding summary for dedup: strips path-specific tails
// ("in path/to/file.ts" → "in <file>"), line numbers, and long
// commit-hash snippets. The goal is: the SAME class of finding on
// DIFFERENT files should share a signature, so we can measure
// "this pattern keeps showing up."
function normalizedSignature(f: Finding): string {
const summary = String(f.summary)
.replace(/\bin\s+\S+\.(ts|rs|js|py|md)\b/gi, "in <file>")
.replace(/:\+?\d+\b/g, ":<line>")
.replace(/[0-9a-f]{8,}/gi, "<hash>")
.replace(/\s+/g, " ")
.trim()
.slice(0, 240);
const src = `${f.check}::${f.severity}::${summary}`;
return createHash("sha256").update(src).digest("hex").slice(0, 16);
}
async function appendAuditLessons(v: Verdict): Promise<void> {
const actionable = v.findings.filter(f => f.severity === "block" || f.severity === "warn");
if (actionable.length === 0) return;
await mkdir(join(AUDIT_LESSONS_JSONL, ".."), { recursive: true });
const rows: string[] = [];
for (const f of actionable) {
rows.push(JSON.stringify({
signature: normalizedSignature(f),
check: f.check,
severity: f.severity,
summary: f.summary,
pr_number: v.pr_number,
head_sha: v.head_sha,
audited_at: v.audited_at,
}));
}
await appendFile(AUDIT_LESSONS_JSONL, rows.join("\n") + "\n");
}
async function persistVerdict(v: Verdict): Promise<void> {
await mkdir(VERDICTS_DIR, { recursive: true });
const filename = `${v.pr_number}-${v.head_sha.slice(0, 12)}.json`;
await writeFile(join(VERDICTS_DIR, filename), JSON.stringify(v, null, 2));
}
export async function postToGitea(v: Verdict): Promise<void> {
// 1. Commit status — the hard block signal (if branch protection
// is configured to require lakehouse/auditor on main).
const state = v.overall === "approve" ? "success" : "failure";
await postCommitStatus({
sha: v.head_sha,
state,
context: "lakehouse/auditor",
description: v.one_liner,
target_url: "", // no URL yet; could point to a verdicts dashboard
});
// 2. Issue comment — the reasoning. Gated so we don't spam the PR
// with identical comments on re-audits of the same SHA. Caller
// (poller) ensures we only re-audit fresh SHAs, but a dedup
// marker inside the body keeps it idempotent if re-run.
const body = formatReviewBody(v);
await postIssueComment({ pr_number: v.pr_number, body });
}
function formatReviewBody(v: Verdict): string {
const byCheck: Record<string, Finding[]> = {};
for (const f of v.findings) {
(byCheck[f.check] ||= []).push(f);
}
const verdictEmoji =
v.overall === "approve" ? "✅" :
v.overall === "request_changes" ? "⚠️" :
"🛑";
const lines: string[] = [];
lines.push(`## Auditor verdict: ${verdictEmoji} \`${v.overall}\``);
lines.push("");
lines.push(`**One-liner:** ${v.one_liner}`);
lines.push(`**Head SHA:** \`${v.head_sha.slice(0, 12)}\``);
lines.push(`**Audited at:** ${v.audited_at}`);
lines.push("");
// Per-check sections, only if the check produced findings.
const checkOrder = ["static", "dynamic", "inference", "kb_query", "kimi_architect"] as const;
for (const check of checkOrder) {
const fs = byCheck[check] ?? [];
if (fs.length === 0) continue;
const bySev = {
block: fs.filter(f => f.severity === "block").length,
warn: fs.filter(f => f.severity === "warn").length,
info: fs.filter(f => f.severity === "info").length,
};
lines.push(`<details><summary><b>${check}</b> — ${fs.length} findings (${bySev.block} block, ${bySev.warn} warn, ${bySev.info} info)</summary>`);
lines.push("");
for (const f of fs) {
const mark = f.severity === "block" ? "🛑" : f.severity === "warn" ? "⚠️" : "";
lines.push(`${mark} **${f.severity}** — ${f.summary}`);
for (const e of f.evidence.slice(0, 3)) {
lines.push(` - \`${e.slice(0, 180).replace(/\n/g, " ")}\``);
}
}
lines.push("");
lines.push("</details>");
lines.push("");
}
lines.push("### Metrics");
lines.push("```json");
lines.push(JSON.stringify(v.metrics, null, 2));
lines.push("```");
lines.push("");
lines.push(`<sub>Lakehouse auditor · SHA ${v.head_sha.slice(0, 8)} · re-audit on new commit flips the status automatically.</sub>`);
return lines.join("\n");
}
function stubFinding(check: "dynamic" | "inference" | "kimi_architect", why: string): Finding[] {
return [{ check, severity: "info", summary: `${check} check skipped — ${why}`, evidence: [why] }];
}