lakehouse/auditor/index.ts

// Auditor poller — the top-level entry. Polls Gitea for open PRs on
// a fixed interval, dedupes by head SHA, runs audit + posts verdict
// for each new (pr, sha) pair.
//
// Run manually:
//   bun run auditor/index.ts
//
// Stop:
//   touch auditor.paused        (skips next cycle)
//   pkill -f auditor/index.ts   (kills in-flight)
//
// State:
//   data/_auditor/state.json           — last-audited SHA per PR
//   data/_auditor/verdicts/{id}.json   — per-run verdict records
//
// This entry runs forever. A systemd unit would wrap it once the
// workflow is trusted (same pattern as mcp-server, observer).

import { readFile, writeFile, mkdir, access } from "node:fs/promises";
import { listOpenPrs } from "./gitea.ts";
import { auditPr } from "./audit.ts";

const POLL_INTERVAL_MS = 90_000; // 90s — enough budget for audit runs to complete
const PAUSE_FILE = "/home/profit/lakehouse/auditor.paused";
const STATE_FILE = "/home/profit/lakehouse/data/_auditor/state.json";

// Per-PR audit cap. Prevents the daemon from running away on a PR
// when each push surfaces new findings — operator wants to review
// in batch, not have the daemon burn budget while they're away.
// Default 3 audits per PR. Override via LH_AUDITOR_MAX_AUDITS_PER_PR.
// Set to 0 to disable the cap.
//
// Reset (after manual review): edit data/_auditor/state.json and
// set audit_count_per_pr.<N> = 0 (or delete the key). Daemon picks
// up the change on the next cycle without restart.
const MAX_AUDITS_PER_PR = Number(process.env.LH_AUDITOR_MAX_AUDITS_PER_PR) || 3;

interface State {
  // Map: PR number → last-audited head SHA. Lets us dedupe audits
  // across restarts (poller can crash/restart without re-auditing
  // all open PRs from scratch).
  last_audited: Record<string, string>;
  // Map: PR number → number of audits run on that PR since last reset.
  // Daemon halts auditing a PR once this hits MAX_AUDITS_PER_PR.
  // Operator clears the entry to resume.
  audit_count_per_pr: Record<string, number>;
  started_at: string;
  cycles_total: number;
  cycles_skipped_paused: number;
  cycles_skipped_capped: number;
  audits_run: number;
  last_cycle_at?: string;
}

async function fileExists(path: string): Promise<boolean> {
  try { await access(path); return true; } catch { return false; }
}

async function loadState(): Promise<State> {
  try {
    const raw = await readFile(STATE_FILE, "utf8");
    const s = JSON.parse(raw);
    return {
      last_audited: s.last_audited ?? {},
      started_at: s.started_at ?? new Date().toISOString(),
      audit_count_per_pr: s.audit_count_per_pr ?? {},
      cycles_total: s.cycles_total ?? 0,
      cycles_skipped_paused: s.cycles_skipped_paused ?? 0,
      cycles_skipped_capped: s.cycles_skipped_capped ?? 0,
      audits_run: s.audits_run ?? 0,
      last_cycle_at: s.last_cycle_at,
    };
  } catch {
    return {
      last_audited: {},
      audit_count_per_pr: {},
      started_at: new Date().toISOString(),
      cycles_total: 0,
      cycles_skipped_paused: 0,
      cycles_skipped_capped: 0,
      audits_run: 0,
    };
  }
}

async function saveState(s: State): Promise<void> {
  await mkdir("/home/profit/lakehouse/data/_auditor", { recursive: true });
  await writeFile(STATE_FILE, JSON.stringify(s, null, 2));
}

async function runCycle(state: State): Promise<State> {
  state.cycles_total += 1;
  state.last_cycle_at = new Date().toISOString();

  if (await fileExists(PAUSE_FILE)) {
    state.cycles_skipped_paused += 1;
    console.log(`[auditor] cycle ${state.cycles_total}: paused (touch ${PAUSE_FILE} exists)`);
    return state;
  }

  let prs;
  try {
    prs = await listOpenPrs();
  } catch (e) {
    console.error(`[auditor] listOpenPrs failed: ${(e as Error).message}`);
    return state;
  }

  console.log(`[auditor] cycle ${state.cycles_total}: ${prs.length} open PR(s)`);

  for (const pr of prs) {
    const prKey = String(pr.number);
    const last = state.last_audited[prKey];
    if (last === pr.head_sha) {
      console.log(`[auditor]   skip PR #${pr.number} (SHA ${pr.head_sha.slice(0, 8)} already audited)`);
      continue;
    }
    // Per-PR audit cap — once a PR has been audited MAX_AUDITS_PER_PR
    // times, halt further audits until the operator manually clears
    // audit_count_per_pr[<N>] in state.json. Prevents runaway burn
    // when each fix surfaces new findings.
    const auditedSoFar = state.audit_count_per_pr[prKey] ?? 0;
    if (MAX_AUDITS_PER_PR > 0 && auditedSoFar >= MAX_AUDITS_PER_PR) {
      console.log(`[auditor]   skip PR #${pr.number} (capped at ${auditedSoFar}/${MAX_AUDITS_PER_PR} audits — clear state.json audit_count_per_pr.${prKey} to resume)`);
      state.cycles_skipped_capped += 1;
      continue;
    }
    console.log(`[auditor]   audit PR #${pr.number} (${pr.head_sha.slice(0, 8)}) — ${pr.title.slice(0, 60)} [${auditedSoFar + 1}/${MAX_AUDITS_PER_PR}]`);
    try {
      // Skip dynamic by default: it mutates live playbook state and
      // re-runs on every PR update would pollute quickly. Operator
      // can run dynamic via `bun run auditor/fixtures/cli.ts` manually
      // OR set LH_AUDITOR_RUN_DYNAMIC=1 to opt in.
      const run_dynamic = process.env.LH_AUDITOR_RUN_DYNAMIC === "1";
      const verdict = await auditPr(pr, {
        skip_dynamic: !run_dynamic,
        skip_inference: process.env.LH_AUDITOR_SKIP_INFERENCE === "1",
      });
      console.log(`[auditor]     verdict=${verdict.overall} findings=${verdict.metrics.findings_total} (block=${verdict.metrics.findings_block} warn=${verdict.metrics.findings_warn})`);
      state.last_audited[prKey] = pr.head_sha;
      state.audit_count_per_pr[prKey] = auditedSoFar + 1;
      state.audits_run += 1;
      if (state.audit_count_per_pr[prKey] >= MAX_AUDITS_PER_PR) {
        console.log(`[auditor]     PR #${pr.number} reached cap (${MAX_AUDITS_PER_PR} audits) — daemon will skip further audits until reset`);
      }
      // Persist state immediately after each successful audit so the
      // increment survives a crash. Pre-2026-04-27 the cycle saved
      // once at the end (main.ts:140), which lost the count if the
      // daemon was killed mid-cycle. Fix lifted from kimi_architect's
      // own audit on this very file. saveState is idempotent + cheap
      // (one JSON write), so per-audit cost is negligible.
      try { await saveState(state); }
      catch (e) {
        console.error(`[auditor]     saveState mid-cycle failed: ${(e as Error).message} — count held in memory`);
      }
    } catch (e) {
      console.error(`[auditor]     audit failed: ${(e as Error).message}`);
    }
  }

  return state;
}

async function main(): Promise<void> {
  console.log(`[auditor] starting poller — interval ${POLL_INTERVAL_MS / 1000}s`);
  console.log(`[auditor] pause file: ${PAUSE_FILE}`);
  console.log(`[auditor] state file: ${STATE_FILE}`);

  let state = await loadState();
  console.log(`[auditor] loaded state: ${Object.keys(state.last_audited).length} PRs previously audited, ${state.cycles_total} cycles so far`);

  // Single-shot mode for CLI testing: `bun run auditor/index.ts --once`
  const once = process.argv.includes("--once");
  if (once) {
    state = await runCycle(state);
    await saveState(state);
    console.log(`[auditor] single-shot complete. total audits: ${state.audits_run}`);
    return;
  }

  // Loop.
  while (true) {
    state = await runCycle(state);
    await saveState(state);
    await new Promise(res => setTimeout(res, POLL_INTERVAL_MS));
  }
}

main().catch(e => {
  console.error("[auditor] fatal:", e);
  process.exit(1);
});