Closes the cycle J asked for: curated cloud output lands structured knowledge in the KB so future audits have architectural context, not just a log of per-finding signatures. Three pieces: 1. Inference curation (tree-split) — when diff > 30KB, shard at 4.5KB, summarize each shard via cloud (temp=0, think=false on small shards; think=true on main call). Merge into scratchpad. The cloud verification then runs against the scratchpad, not truncated raw. Eliminates the 40KB MAX_DIFF_CHARS truncation path for large PRs (PR #8 is 102KB — was losing 62KB). Anti-false-positive guard in the prompt: cloud is told scratchpad absence is NOT diff absence, so it doesn't flag curated-out symbols as missing. unflagged_gaps section is dropped entirely when curated (scratchpad can't ground them). 2. fact_extractor — TS client for llm_team_ui's extract-facts mode at localhost:5000/api/run. Sends curated scratchpad through qwen2.5 extractor + gemma2 verifier, parses SSE stream, returns structured {facts, entities, relationships, verification, llm_team_run_id}. Best-effort: if llm_team is down, extraction fails silently and the audit still completes. AWAITED so CLI tools (audit_one.ts) don't exit before extraction lands — the systemd poller has 90s headroom so the extra ~15s doesn't matter. 3. audit_facts.jsonl + checkAuditFacts() — one row per curated audit with the extraction result. kb_query tails the jsonl, explodes entity rows, aggregates by entity name with distinct-PR counting, surfaces entities recurring in 2+ PRs as info findings. Filters out short names (<3 chars, extractor truncation artifacts) and generic types (string/number/etc.) so signal isn't drowned. Verified end-to-end on PR #8: 102KB diff → 23 shards → 1KB scratchpad → qwen2.5 extracted 4 facts + 6 entities + 6 relationships (real code-level knowledge: AggregateOptions<T> type, aggregate<T> async function with real signature, typed relationships). llm_team_run_id cross-references to llm_team's own team_runs table. Also: audit.ts passes (pr_number, head_sha) as InferenceContext so extracted facts are scope-tagged for the KB index.
223 lines
9.0 KiB
TypeScript
223 lines
9.0 KiB
TypeScript
// Orchestrator — runs all four checks on a PR, assembles a verdict,
|
||
// posts to Gitea. This is task #8's integration layer; the poller
|
||
// (task #9) calls this once per PR on every fresh head SHA.
|
||
//
|
||
// Hard-block mechanism: commit status posted with state="failure"
|
||
// and context="lakehouse/auditor". If `main` branch protection
|
||
// requires that context to pass, merge is physically impossible
|
||
// until the auditor re-audits a fixed commit and flips the status
|
||
// to "success".
|
||
//
|
||
// Human-readable reasoning: posted as a PR issue comment (not a
|
||
// review — reviews have self-review restrictions on Gitea and the
|
||
// auditor currently uses the same PAT as the PR author).
|
||
|
||
import { readFile, writeFile, mkdir, appendFile } from "node:fs/promises";
|
||
import { createHash } from "node:crypto";
|
||
import { join } from "node:path";
|
||
import type { PrSnapshot, Verdict, Finding } from "./types.ts";
|
||
import { getPrDiff, postCommitStatus, postIssueComment } from "./gitea.ts";
|
||
import { parseClaims } from "./claim_parser.ts";
|
||
import { assembleVerdict } from "./policy.ts";
|
||
import { runStaticCheck } from "./checks/static.ts";
|
||
import { runDynamicCheck } from "./checks/dynamic.ts";
|
||
import { runInferenceCheck } from "./checks/inference.ts";
|
||
import { runKbCheck } from "./checks/kb_query.ts";
|
||
|
||
const VERDICTS_DIR = "/home/profit/lakehouse/data/_auditor/verdicts";
|
||
// Playbook for audit findings — one row per block/warn finding from a
|
||
// verdict. kb_query tails this next audit and escalates recurrences.
|
||
// Structured as JSONL so it's cheap to append and cheap to tail.
|
||
const AUDIT_LESSONS_JSONL = "/home/profit/lakehouse/data/_kb/audit_lessons.jsonl";
|
||
|
||
export interface AuditOptions {
|
||
// Skip the cloud inference call (fast path for iteration). Default false.
|
||
skip_inference?: boolean;
|
||
// Skip the dynamic check (avoid running the hybrid fixture every PR,
|
||
// since it hits live services and mutates playbook state). Default false
|
||
// on `main`-branch-target PRs, true when auditing feature branches
|
||
// where the fixture would pollute state. Caller decides.
|
||
skip_dynamic?: boolean;
|
||
// Skip Gitea posting — useful for dry-runs / local testing.
|
||
// Default false.
|
||
dry_run?: boolean;
|
||
}
|
||
|
||
export async function auditPr(pr: PrSnapshot, opts: AuditOptions = {}): Promise<Verdict> {
|
||
const t0 = Date.now();
|
||
const diff = await getPrDiff(pr.number);
|
||
const { claims } = parseClaims(pr);
|
||
|
||
// Run checks in parallel where they don't share mutable state.
|
||
// Static + kb_query + inference are all read-only. Dynamic mutates
|
||
// playbook state (nonce-scoped per run, but still live) so if
|
||
// skip_dynamic is false we still run it in parallel — the mutation
|
||
// is namespaced.
|
||
const [staticFindings, dynamicFindings, inferenceFindings, kbFindings] = await Promise.all([
|
||
runStaticCheck(diff),
|
||
opts.skip_dynamic ? Promise.resolve(stubFinding("dynamic", "skipped by options")) : runDynamicCheck(),
|
||
opts.skip_inference ? Promise.resolve(stubFinding("inference", "skipped by options")) : runInferenceCheck(claims, diff, { pr_number: pr.number, head_sha: pr.head_sha }),
|
||
runKbCheck(claims, pr.files.map(f => f.path)),
|
||
]);
|
||
|
||
const allFindings: Finding[] = [
|
||
...staticFindings,
|
||
...dynamicFindings,
|
||
...inferenceFindings,
|
||
...kbFindings,
|
||
];
|
||
|
||
const duration_ms = Date.now() - t0;
|
||
const metrics = {
|
||
audit_duration_ms: duration_ms,
|
||
findings_total: allFindings.length,
|
||
findings_block: allFindings.filter(f => f.severity === "block").length,
|
||
findings_warn: allFindings.filter(f => f.severity === "warn").length,
|
||
findings_info: allFindings.filter(f => f.severity === "info").length,
|
||
claims_strong: claims.filter(c => c.strength === "strong").length,
|
||
claims_moderate: claims.filter(c => c.strength === "moderate").length,
|
||
claims_weak: claims.filter(c => c.strength === "weak").length,
|
||
claims_empirical: claims.filter(c => c.strength === "empirical").length,
|
||
claims_total: claims.length,
|
||
diff_bytes: diff.length,
|
||
};
|
||
|
||
const verdict = assembleVerdict(allFindings, metrics, pr.number, pr.head_sha);
|
||
|
||
await persistVerdict(verdict);
|
||
|
||
// Feedback loop — every block/warn finding becomes a row in
|
||
// audit_lessons.jsonl, dedup-keyed by (check, normalized-summary).
|
||
// The next audit's kb_query reads these and escalates recurring
|
||
// findings so we don't lose the "this pattern has been flagged
|
||
// before" signal across runs. Fire-and-forget; failure here must
|
||
// not break the audit.
|
||
appendAuditLessons(verdict).catch(e =>
|
||
console.error(`[audit] audit_lessons append failed: ${(e as Error).message}`));
|
||
|
||
if (!opts.dry_run) {
|
||
await postToGitea(verdict);
|
||
}
|
||
|
||
return verdict;
|
||
}
|
||
|
||
// Normalizes a finding summary for dedup: strips path-specific tails
|
||
// ("in path/to/file.ts" → "in <file>"), line numbers, and long
|
||
// commit-hash snippets. The goal is: the SAME class of finding on
|
||
// DIFFERENT files should share a signature, so we can measure
|
||
// "this pattern keeps showing up."
|
||
function normalizedSignature(f: Finding): string {
|
||
const summary = String(f.summary)
|
||
.replace(/\bin\s+\S+\.(ts|rs|js|py|md)\b/gi, "in <file>")
|
||
.replace(/:\+?\d+\b/g, ":<line>")
|
||
.replace(/[0-9a-f]{8,}/gi, "<hash>")
|
||
.replace(/\s+/g, " ")
|
||
.trim()
|
||
.slice(0, 240);
|
||
const src = `${f.check}::${f.severity}::${summary}`;
|
||
return createHash("sha256").update(src).digest("hex").slice(0, 16);
|
||
}
|
||
|
||
async function appendAuditLessons(v: Verdict): Promise<void> {
|
||
const actionable = v.findings.filter(f => f.severity === "block" || f.severity === "warn");
|
||
if (actionable.length === 0) return;
|
||
await mkdir(join(AUDIT_LESSONS_JSONL, ".."), { recursive: true });
|
||
const rows: string[] = [];
|
||
for (const f of actionable) {
|
||
rows.push(JSON.stringify({
|
||
signature: normalizedSignature(f),
|
||
check: f.check,
|
||
severity: f.severity,
|
||
summary: f.summary,
|
||
pr_number: v.pr_number,
|
||
head_sha: v.head_sha,
|
||
audited_at: v.audited_at,
|
||
}));
|
||
}
|
||
await appendFile(AUDIT_LESSONS_JSONL, rows.join("\n") + "\n");
|
||
}
|
||
|
||
async function persistVerdict(v: Verdict): Promise<void> {
|
||
await mkdir(VERDICTS_DIR, { recursive: true });
|
||
const filename = `${v.pr_number}-${v.head_sha.slice(0, 12)}.json`;
|
||
await writeFile(join(VERDICTS_DIR, filename), JSON.stringify(v, null, 2));
|
||
}
|
||
|
||
export async function postToGitea(v: Verdict): Promise<void> {
|
||
// 1. Commit status — the hard block signal (if branch protection
|
||
// is configured to require lakehouse/auditor on main).
|
||
const state = v.overall === "approve" ? "success" : "failure";
|
||
await postCommitStatus({
|
||
sha: v.head_sha,
|
||
state,
|
||
context: "lakehouse/auditor",
|
||
description: v.one_liner,
|
||
target_url: "", // no URL yet; could point to a verdicts dashboard
|
||
});
|
||
|
||
// 2. Issue comment — the reasoning. Gated so we don't spam the PR
|
||
// with identical comments on re-audits of the same SHA. Caller
|
||
// (poller) ensures we only re-audit fresh SHAs, but a dedup
|
||
// marker inside the body keeps it idempotent if re-run.
|
||
const body = formatReviewBody(v);
|
||
await postIssueComment({ pr_number: v.pr_number, body });
|
||
}
|
||
|
||
function formatReviewBody(v: Verdict): string {
|
||
const byCheck: Record<string, Finding[]> = {};
|
||
for (const f of v.findings) {
|
||
(byCheck[f.check] ||= []).push(f);
|
||
}
|
||
|
||
const verdictEmoji =
|
||
v.overall === "approve" ? "✅" :
|
||
v.overall === "request_changes" ? "⚠️" :
|
||
"🛑";
|
||
|
||
const lines: string[] = [];
|
||
lines.push(`## Auditor verdict: ${verdictEmoji} \`${v.overall}\``);
|
||
lines.push("");
|
||
lines.push(`**One-liner:** ${v.one_liner}`);
|
||
lines.push(`**Head SHA:** \`${v.head_sha.slice(0, 12)}\``);
|
||
lines.push(`**Audited at:** ${v.audited_at}`);
|
||
lines.push("");
|
||
|
||
// Per-check sections, only if the check produced findings.
|
||
const checkOrder = ["static", "dynamic", "inference", "kb_query"] as const;
|
||
for (const check of checkOrder) {
|
||
const fs = byCheck[check] ?? [];
|
||
if (fs.length === 0) continue;
|
||
const bySev = {
|
||
block: fs.filter(f => f.severity === "block").length,
|
||
warn: fs.filter(f => f.severity === "warn").length,
|
||
info: fs.filter(f => f.severity === "info").length,
|
||
};
|
||
lines.push(`<details><summary><b>${check}</b> — ${fs.length} findings (${bySev.block} block, ${bySev.warn} warn, ${bySev.info} info)</summary>`);
|
||
lines.push("");
|
||
for (const f of fs) {
|
||
const mark = f.severity === "block" ? "🛑" : f.severity === "warn" ? "⚠️" : "ℹ️";
|
||
lines.push(`${mark} **${f.severity}** — ${f.summary}`);
|
||
for (const e of f.evidence.slice(0, 3)) {
|
||
lines.push(` - \`${e.slice(0, 180).replace(/\n/g, " ")}\``);
|
||
}
|
||
}
|
||
lines.push("");
|
||
lines.push("</details>");
|
||
lines.push("");
|
||
}
|
||
|
||
lines.push("### Metrics");
|
||
lines.push("```json");
|
||
lines.push(JSON.stringify(v.metrics, null, 2));
|
||
lines.push("```");
|
||
lines.push("");
|
||
lines.push(`<sub>Lakehouse auditor · SHA ${v.head_sha.slice(0, 8)} · re-audit on new commit flips the status automatically.</sub>`);
|
||
|
||
return lines.join("\n");
|
||
}
|
||
|
||
function stubFinding(check: "dynamic" | "inference", why: string): Finding[] {
|
||
return [{ check, severity: "info", summary: `${check} check skipped — ${why}`, evidence: [why] }];
|
||
}
|