Compare commits
No commits in common. "dc01ba0a3bb080f587e00029064710fb344ef8be" and "a7aba31935ad18d38bb40e2511f04e30991cd953" have entirely different histories.
dc01ba0a3b
...
a7aba31935
@ -52,7 +52,7 @@ export async function auditPr(pr: PrSnapshot, opts: AuditOptions = {}): Promise<
|
|||||||
runStaticCheck(diff),
|
runStaticCheck(diff),
|
||||||
opts.skip_dynamic ? Promise.resolve(stubFinding("dynamic", "skipped by options")) : runDynamicCheck(),
|
opts.skip_dynamic ? Promise.resolve(stubFinding("dynamic", "skipped by options")) : runDynamicCheck(),
|
||||||
opts.skip_inference ? Promise.resolve(stubFinding("inference", "skipped by options")) : runInferenceCheck(claims, diff),
|
opts.skip_inference ? Promise.resolve(stubFinding("inference", "skipped by options")) : runInferenceCheck(claims, diff),
|
||||||
runKbCheck(claims, pr.files.map(f => f.path)),
|
runKbCheck(claims),
|
||||||
]);
|
]);
|
||||||
|
|
||||||
const allFindings: Finding[] = [
|
const allFindings: Finding[] = [
|
||||||
|
|||||||
@ -1,68 +0,0 @@
|
|||||||
// One-shot dry-run audit of a single PR. Useful for verifying check
|
|
||||||
// behavior (kb_query scrum surfacing, inference prompts, etc.) without
|
|
||||||
// posting to Gitea. Does NOT touch state.json and does NOT post
|
|
||||||
// commit status or PR comments.
|
|
||||||
//
|
|
||||||
// Run: bun run auditor/audit_one.ts <pr-number>
|
|
||||||
|
|
||||||
import { getPrSnapshot } from "./gitea.ts";
|
|
||||||
import { auditPr } from "./audit.ts";
|
|
||||||
|
|
||||||
async function main() {
|
|
||||||
const prNumRaw = process.argv[2];
|
|
||||||
if (!prNumRaw) {
|
|
||||||
console.error("usage: bun run auditor/audit_one.ts <pr-number>");
|
|
||||||
process.exit(2);
|
|
||||||
}
|
|
||||||
const prNum = Number(prNumRaw);
|
|
||||||
if (!Number.isFinite(prNum)) {
|
|
||||||
console.error(`invalid PR number: ${prNumRaw}`);
|
|
||||||
process.exit(2);
|
|
||||||
}
|
|
||||||
|
|
||||||
console.log(`[audit_one] fetching PR #${prNum}...`);
|
|
||||||
const pr = await getPrSnapshot(prNum);
|
|
||||||
console.log(`[audit_one] PR #${pr.number}: "${pr.title}" (head=${pr.head_sha.slice(0, 12)})`);
|
|
||||||
console.log(`[audit_one] files in diff: ${pr.files.length}`);
|
|
||||||
for (const f of pr.files) console.log(` - ${f.path} (+${f.additions}/-${f.deletions})`);
|
|
||||||
console.log("");
|
|
||||||
|
|
||||||
const verdict = await auditPr(pr, {
|
|
||||||
dry_run: true, // no Gitea posting
|
|
||||||
skip_dynamic: true, // don't run fixture
|
|
||||||
skip_inference: process.env.LH_AUDITOR_SKIP_INFERENCE === "1",
|
|
||||||
});
|
|
||||||
|
|
||||||
console.log("\n═══ VERDICT ═══");
|
|
||||||
console.log(`overall: ${verdict.overall}`);
|
|
||||||
console.log(`one-liner: ${verdict.one_liner}`);
|
|
||||||
console.log(`findings: total=${verdict.metrics.findings_total} block=${verdict.metrics.findings_block} warn=${verdict.metrics.findings_warn} info=${verdict.metrics.findings_info}`);
|
|
||||||
console.log("");
|
|
||||||
|
|
||||||
// Print findings, highlighting kb_query scrum surfacing
|
|
||||||
const byCheck: Record<string, typeof verdict.findings> = {};
|
|
||||||
for (const f of verdict.findings) (byCheck[f.check] ||= []).push(f);
|
|
||||||
|
|
||||||
for (const [check, findings] of Object.entries(byCheck)) {
|
|
||||||
console.log(`── ${check} (${findings.length}) ──`);
|
|
||||||
for (const f of findings) {
|
|
||||||
const tag = f.severity === "block" ? "🛑" : f.severity === "warn" ? "⚠️ " : "ℹ️ ";
|
|
||||||
console.log(` ${tag} [${f.severity}] ${f.summary}`);
|
|
||||||
if (f.summary.includes("scrum-master")) {
|
|
||||||
for (const e of f.evidence) {
|
|
||||||
console.log(` → ${e.slice(0, 200)}`);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
const scrumFindings = verdict.findings.filter(f => f.summary.includes("scrum-master"));
|
|
||||||
console.log("");
|
|
||||||
console.log(`═══ SCRUM WIRE CHECK: ${scrumFindings.length} scrum-master findings surfaced by kb_query ═══`);
|
|
||||||
if (scrumFindings.length === 0) {
|
|
||||||
console.log(" (none — either no matching scrum_reviews.jsonl rows, or files didn't match PR diff)");
|
|
||||||
}
|
|
||||||
process.exit(0);
|
|
||||||
}
|
|
||||||
|
|
||||||
main().catch(e => { console.error("[audit_one] fatal:", e); process.exit(1); });
|
|
||||||
@ -8,7 +8,6 @@
|
|||||||
// What this check reads (all file-backed, append-only or periodic):
|
// What this check reads (all file-backed, append-only or periodic):
|
||||||
// data/_kb/outcomes.jsonl — per-scenario outcomes (kb.ts)
|
// data/_kb/outcomes.jsonl — per-scenario outcomes (kb.ts)
|
||||||
// data/_kb/error_corrections.jsonl — fail→succeed deltas on same sig
|
// data/_kb/error_corrections.jsonl — fail→succeed deltas on same sig
|
||||||
// data/_kb/scrum_reviews.jsonl — scrum-master accepted reviews
|
|
||||||
// data/_observer/ops.jsonl — observer ring → disk stream
|
// data/_observer/ops.jsonl — observer ring → disk stream
|
||||||
// data/_bot/cycles/*.json — bot cycle results
|
// data/_bot/cycles/*.json — bot cycle results
|
||||||
//
|
//
|
||||||
@ -22,11 +21,10 @@ import type { Claim, Finding } from "../types.ts";
|
|||||||
const KB_DIR = "/home/profit/lakehouse/data/_kb";
|
const KB_DIR = "/home/profit/lakehouse/data/_kb";
|
||||||
const OBSERVER_OPS = "/home/profit/lakehouse/data/_observer/ops.jsonl";
|
const OBSERVER_OPS = "/home/profit/lakehouse/data/_observer/ops.jsonl";
|
||||||
const BOT_CYCLES_DIR = "/home/profit/lakehouse/data/_bot/cycles";
|
const BOT_CYCLES_DIR = "/home/profit/lakehouse/data/_bot/cycles";
|
||||||
const SCRUM_REVIEWS_JSONL = "/home/profit/lakehouse/data/_kb/scrum_reviews.jsonl";
|
|
||||||
const TAIL_LINES = 500;
|
const TAIL_LINES = 500;
|
||||||
const MAX_BOT_CYCLE_FILES = 30;
|
const MAX_BOT_CYCLE_FILES = 30;
|
||||||
|
|
||||||
export async function runKbCheck(claims: Claim[], prFiles: string[] = []): Promise<Finding[]> {
|
export async function runKbCheck(claims: Claim[]): Promise<Finding[]> {
|
||||||
const findings: Finding[] = [];
|
const findings: Finding[] = [];
|
||||||
|
|
||||||
// 1. Recent scenario outcomes: are strong-claim-style phrases showing
|
// 1. Recent scenario outcomes: are strong-claim-style phrases showing
|
||||||
@ -50,15 +48,6 @@ export async function runKbCheck(claims: Claim[], prFiles: string[] = []): Promi
|
|||||||
const obsFindings = await checkObserverStream();
|
const obsFindings = await checkObserverStream();
|
||||||
findings.push(...obsFindings);
|
findings.push(...obsFindings);
|
||||||
|
|
||||||
// 5. Scrum-master reviews — surface prior accepted reviews for any
|
|
||||||
// file in this PR's diff. Cohesion plan Phase C wire: the
|
|
||||||
// auditor gets to "borrow" the scrum-master's deeper per-file
|
|
||||||
// analysis instead of re-doing that work.
|
|
||||||
if (prFiles.length > 0) {
|
|
||||||
const scrumFindings = await checkScrumReviews(prFiles);
|
|
||||||
findings.push(...scrumFindings);
|
|
||||||
}
|
|
||||||
|
|
||||||
return findings;
|
return findings;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -192,54 +181,3 @@ function observerBySource(ops: any[]): string {
|
|||||||
}
|
}
|
||||||
return Object.entries(c).sort((a, b) => b[1] - a[1]).map(([k, v]) => `${k}=${v}`).join(", ") || "empty";
|
return Object.entries(c).sort((a, b) => b[1] - a[1]).map(([k, v]) => `${k}=${v}`).join(", ") || "empty";
|
||||||
}
|
}
|
||||||
|
|
||||||
// Scrum-master reviews — the scrum pipeline writes one row per
|
|
||||||
// accepted per-file review. We match reviews whose `file` matches
|
|
||||||
// any path in the PR's diff, then surface the *preview* + which
|
|
||||||
// model the escalation ladder had to reach. If the scrum-master
|
|
||||||
// needed the 123B specialist or larger to resolve a file, that's
|
|
||||||
// a meaningful signal about the code's complexity — and it's
|
|
||||||
// surfaced to the PR without the auditor having to re-run the
|
|
||||||
// escalation ladder itself.
|
|
||||||
async function checkScrumReviews(prFiles: string[]): Promise<Finding[]> {
|
|
||||||
const rows = await tailJsonl<any>(SCRUM_REVIEWS_JSONL, TAIL_LINES);
|
|
||||||
if (rows.length === 0) return [];
|
|
||||||
|
|
||||||
// Match by exact file OR filename suffix — PR files arrive as
|
|
||||||
// `auditor/audit.ts`-style relative paths; scrum stores the same.
|
|
||||||
const norm = (p: string) => p.replace(/^\/+/, "").replace(/^home\/profit\/lakehouse\//, "");
|
|
||||||
const prSet = new Set(prFiles.map(norm));
|
|
||||||
|
|
||||||
// Keep only the most recent review per file (last-wins).
|
|
||||||
const latestByFile = new Map<string, any>();
|
|
||||||
for (const r of rows) {
|
|
||||||
const f = norm(String(r.file ?? ""));
|
|
||||||
if (!f) continue;
|
|
||||||
if (!prSet.has(f)) continue;
|
|
||||||
latestByFile.set(f, r);
|
|
||||||
}
|
|
||||||
if (latestByFile.size === 0) return [];
|
|
||||||
|
|
||||||
const findings: Finding[] = [];
|
|
||||||
for (const [file, r] of latestByFile) {
|
|
||||||
const model = String(r.accepted_model ?? "?");
|
|
||||||
const attempt = r.accepted_on_attempt ?? "?";
|
|
||||||
const treeSplit = !!r.tree_split_fired;
|
|
||||||
// Heuristic: if the scrum-master had to escalate past attempt 3,
|
|
||||||
// or had to tree-split, that's context the PR reviewer should see.
|
|
||||||
// Severity: info for low-escalation, warn if escalated far up
|
|
||||||
// the ladder (cloud specialist required).
|
|
||||||
const heavyEscalation = Number(attempt) >= 4;
|
|
||||||
const sev: "warn" | "info" = heavyEscalation ? "warn" : "info";
|
|
||||||
findings.push({
|
|
||||||
check: "kb_query",
|
|
||||||
severity: sev,
|
|
||||||
summary: `scrum-master review for \`${file}\` — accepted on attempt ${attempt} by \`${model}\`${treeSplit ? " (tree-split)" : ""}`,
|
|
||||||
evidence: [
|
|
||||||
`reviewed_at: ${r.reviewed_at ?? "?"}`,
|
|
||||||
`preview: ${String(r.suggestions_preview ?? "").slice(0, 300).replace(/\n/g, " ")}`,
|
|
||||||
],
|
|
||||||
});
|
|
||||||
}
|
|
||||||
return findings;
|
|
||||||
}
|
|
||||||
|
|||||||
@ -27,14 +27,6 @@ const CHUNK_SIZE = 800;
|
|||||||
const CHUNK_OVERLAP = 120;
|
const CHUNK_OVERLAP = 120;
|
||||||
const TOP_K_CONTEXT = 5;
|
const TOP_K_CONTEXT = 5;
|
||||||
const MAX_ATTEMPTS = 6;
|
const MAX_ATTEMPTS = 6;
|
||||||
// Files larger than this get tree-split instead of truncated. Fixes the
|
|
||||||
// 6KB false-positive class (model claiming a field is "missing" when
|
|
||||||
// it exists past the context cutoff).
|
|
||||||
const FILE_TREE_SPLIT_THRESHOLD = 6000;
|
|
||||||
const FILE_SHARD_SIZE = 3500;
|
|
||||||
// Appended jsonl so auditor's kb_query can surface scrum findings for
|
|
||||||
// files touched by a PR under review. Part of cohesion plan Phase C.
|
|
||||||
const SCRUM_REVIEWS_JSONL = "/home/profit/lakehouse/data/_kb/scrum_reviews.jsonl";
|
|
||||||
const OUT_DIR = `/home/profit/lakehouse/tests/real-world/runs/scrum_${Date.now().toString(36)}`;
|
const OUT_DIR = `/home/profit/lakehouse/tests/real-world/runs/scrum_${Date.now().toString(36)}`;
|
||||||
|
|
||||||
const PRD_PATH = "/home/profit/lakehouse/docs/PRD.md";
|
const PRD_PATH = "/home/profit/lakehouse/docs/PRD.md";
|
||||||
@ -69,8 +61,6 @@ type Chunk = { id: string; text: string; embedding: number[]; origin: string; of
|
|||||||
interface FileReview {
|
interface FileReview {
|
||||||
file: string;
|
file: string;
|
||||||
file_bytes: number;
|
file_bytes: number;
|
||||||
tree_split_fired: boolean;
|
|
||||||
shards_summarized: number;
|
|
||||||
top_prd_chunks: Array<{ origin: string; offset: number; score: number }>;
|
top_prd_chunks: Array<{ origin: string; offset: number; score: number }>;
|
||||||
top_proposal_chunks: Array<{ origin: string; offset: number; score: number }>;
|
top_proposal_chunks: Array<{ origin: string; offset: number; score: number }>;
|
||||||
attempts_made: number;
|
attempts_made: number;
|
||||||
@ -163,47 +153,6 @@ function retrieveTopK(query_emb: number[], pool: Chunk[], k: number): Chunk[] {
|
|||||||
.map(x => ({ ...x.c, _score: x.score } as any));
|
.map(x => ({ ...x.c, _score: x.score } as any));
|
||||||
}
|
}
|
||||||
|
|
||||||
// Tree-split a large file: shard it, summarize each shard against
|
|
||||||
// the review question, merge into a scratchpad. Uses cloud because
|
|
||||||
// the summarization step needs quality > speed. Returns the
|
|
||||||
// scratchpad (full-file distillation) and the cloud-call count.
|
|
||||||
async function treeSplitFile(
|
|
||||||
filePath: string,
|
|
||||||
content: string,
|
|
||||||
): Promise<{ scratchpad: string; shards: number; cloud_calls: number }> {
|
|
||||||
const shards: Array<{ from: number; to: number; text: string }> = [];
|
|
||||||
for (let i = 0; i < content.length; i += FILE_SHARD_SIZE) {
|
|
||||||
const end = Math.min(i + FILE_SHARD_SIZE, content.length);
|
|
||||||
shards.push({ from: i, to: end, text: content.slice(i, end) });
|
|
||||||
}
|
|
||||||
let scratchpad = "";
|
|
||||||
let cloud_calls = 0;
|
|
||||||
log(` tree-split: ${content.length} chars → ${shards.length} shards of ${FILE_SHARD_SIZE}`);
|
|
||||||
for (const [si, shard] of shards.entries()) {
|
|
||||||
const prompt = `You are summarizing ONE SHARD of a source file as part of a multi-shard review. File: ${filePath}. Shard ${si + 1}/${shards.length} (bytes ${shard.from}..${shard.to}).
|
|
||||||
|
|
||||||
─────── shard source ───────
|
|
||||||
${shard.text}
|
|
||||||
─────── end shard ───────
|
|
||||||
|
|
||||||
Scratchpad of prior shards (if empty, this is shard 1):
|
|
||||||
${scratchpad || "(empty)"}
|
|
||||||
|
|
||||||
Extract ONLY facts useful for reviewing this file against its PRD: function names + purposes, struct fields + types, invariants, edge cases, TODO markers, error-handling style. Under 150 words. No prose outside the extracted facts.`;
|
|
||||||
const r = await chat({
|
|
||||||
provider: "ollama_cloud",
|
|
||||||
model: "gpt-oss:120b",
|
|
||||||
prompt,
|
|
||||||
max_tokens: 400,
|
|
||||||
});
|
|
||||||
cloud_calls += 1;
|
|
||||||
if (r.content) {
|
|
||||||
scratchpad += `\n--- shard ${si + 1} (bytes ${shard.from}..${shard.to}) ---\n${r.content.trim()}`;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return { scratchpad, shards: shards.length, cloud_calls };
|
|
||||||
}
|
|
||||||
|
|
||||||
async function reviewFile(
|
async function reviewFile(
|
||||||
filePath: string,
|
filePath: string,
|
||||||
prd_chunks: Chunk[],
|
prd_chunks: Chunk[],
|
||||||
@ -231,39 +180,11 @@ async function reviewFile(
|
|||||||
...topPlan.map(c => `[PLAN @${c.offset}]\n${c.text.slice(0, 600)}`),
|
...topPlan.map(c => `[PLAN @${c.offset}]\n${c.text.slice(0, 600)}`),
|
||||||
].join("\n\n");
|
].join("\n\n");
|
||||||
|
|
||||||
// Files bigger than FILE_TREE_SPLIT_THRESHOLD get tree-split.
|
|
||||||
// Summarize each shard to a scratchpad, then review against the
|
|
||||||
// scratchpad instead of the truncated first chunk. Prevents the
|
|
||||||
// false-positive pattern where the model claims a field is
|
|
||||||
// "missing" because it's past the context cutoff.
|
|
||||||
let sourceForPrompt: string;
|
|
||||||
let treeSplitFired = false;
|
|
||||||
let shardsSummarized = 0;
|
|
||||||
let extraCloudCalls = 0;
|
|
||||||
if (content.length > FILE_TREE_SPLIT_THRESHOLD) {
|
|
||||||
treeSplitFired = true;
|
|
||||||
const ts = await treeSplitFile(rel, content);
|
|
||||||
shardsSummarized = ts.shards;
|
|
||||||
extraCloudCalls = ts.cloud_calls;
|
|
||||||
sourceForPrompt = `[FULL-FILE SCRATCHPAD — distilled from ${ts.shards} shards via tree-split]\n${ts.scratchpad}`;
|
|
||||||
} else {
|
|
||||||
sourceForPrompt = content;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Prompt — when tree-split fired, include an explicit instruction
|
|
||||||
// not to claim a field/function is "missing" because the scratchpad
|
|
||||||
// is a distillation not the full file. Attacks the rubric-tuning
|
|
||||||
// concern J called out.
|
|
||||||
const truncationWarning = treeSplitFired
|
|
||||||
? `\nIMPORTANT: the "source" below is a multi-shard distillation (tree-split across ${shardsSummarized} shards), NOT the full raw file. DO NOT claim any field, function, or feature is "missing" based on its absence from this distillation — the distillation may have elided it. Only call out gaps that appear DIRECTLY contradicted by the PRD excerpts.\n`
|
|
||||||
: "";
|
|
||||||
|
|
||||||
const baseTask = `You are reviewing one source file against the Lakehouse PRD and an active cohesion-integration plan.
|
const baseTask = `You are reviewing one source file against the Lakehouse PRD and an active cohesion-integration plan.
|
||||||
|
|
||||||
FILE: ${rel} (${content.length} bytes${treeSplitFired ? `, tree-split into ${shardsSummarized} shards` : ""})
|
FILE: ${rel}
|
||||||
${truncationWarning}
|
|
||||||
─────── source ───────
|
─────── source ───────
|
||||||
${sourceForPrompt}
|
${content.slice(0, 6000)}${content.length > 6000 ? "\n[... truncated after 6KB ...]" : ""}
|
||||||
─────── end source ───────
|
─────── end source ───────
|
||||||
|
|
||||||
${contextBlock}
|
${contextBlock}
|
||||||
@ -313,11 +234,9 @@ Respond with markdown. Be specific, not generic. Cite file-region + PRD-chunk-of
|
|||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
const review: FileReview = {
|
return {
|
||||||
file: rel,
|
file: rel,
|
||||||
file_bytes: content.length,
|
file_bytes: content.length,
|
||||||
tree_split_fired: treeSplitFired,
|
|
||||||
shards_summarized: shardsSummarized,
|
|
||||||
top_prd_chunks: topPrd.map(c => ({ origin: c.origin, offset: c.offset, score: (c as any)._score })),
|
top_prd_chunks: topPrd.map(c => ({ origin: c.origin, offset: c.offset, score: (c as any)._score })),
|
||||||
top_proposal_chunks: topPlan.map(c => ({ origin: c.origin, offset: c.offset, score: (c as any)._score })),
|
top_proposal_chunks: topPlan.map(c => ({ origin: c.origin, offset: c.offset, score: (c as any)._score })),
|
||||||
attempts_made: history.length,
|
attempts_made: history.length,
|
||||||
@ -327,31 +246,6 @@ Respond with markdown. Be specific, not generic. Cite file-region + PRD-chunk-of
|
|||||||
suggestions: accepted ?? "[no acceptable answer after escalation ladder exhausted]",
|
suggestions: accepted ?? "[no acceptable answer after escalation ladder exhausted]",
|
||||||
duration_ms: Date.now() - t0,
|
duration_ms: Date.now() - t0,
|
||||||
};
|
};
|
||||||
|
|
||||||
// Append to the shared scrum-reviews jsonl so the auditor's
|
|
||||||
// kb_query check can surface relevant reviews for files in a
|
|
||||||
// PR diff. Cohesion plan Phase C wire.
|
|
||||||
if (accepted) {
|
|
||||||
const { appendFile, mkdir } = await import("node:fs/promises");
|
|
||||||
const { dirname } = await import("node:path");
|
|
||||||
await mkdir(dirname(SCRUM_REVIEWS_JSONL), { recursive: true });
|
|
||||||
const row = {
|
|
||||||
file: rel,
|
|
||||||
reviewed_at: new Date().toISOString(),
|
|
||||||
accepted_model: acceptedModel,
|
|
||||||
accepted_on_attempt: acceptedOn,
|
|
||||||
attempts_made: history.length,
|
|
||||||
tree_split_fired: treeSplitFired,
|
|
||||||
suggestions_preview: accepted.slice(0, 2000),
|
|
||||||
};
|
|
||||||
try {
|
|
||||||
await appendFile(SCRUM_REVIEWS_JSONL, JSON.stringify(row) + "\n");
|
|
||||||
} catch (e) {
|
|
||||||
console.error(`[scrum] failed to append scrum_reviews.jsonl: ${(e as Error).message}`);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return review;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
async function loadAndChunk(path: string, origin_tag: string): Promise<Chunk[]> {
|
async function loadAndChunk(path: string, origin_tag: string): Promise<Chunk[]> {
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user