diff --git a/auditor/checks/inference.ts b/auditor/checks/inference.ts index 73b597f..4a83745 100644 --- a/auditor/checks/inference.ts +++ b/auditor/checks/inference.ts @@ -481,6 +481,10 @@ async function extractAndPersistFacts(scratchpad: string, ctx: InferenceContext) entities: ex.entities, relationships: ex.relationships, verification_preview: ex.verification.slice(0, 400), + verifier_verdicts: ex.verifier_verdicts, + facts_dropped_by_verifier: ex.facts_dropped_by_verifier ?? 0, + schema_version: 2, + source: "audit_inference", }; await mkdir("/home/profit/lakehouse/data/_kb", { recursive: true }); await appendFile(AUDIT_FACTS_JSONL, JSON.stringify(row) + "\n"); diff --git a/auditor/fact_extractor.ts b/auditor/fact_extractor.ts index eb26710..19dd9b0 100644 --- a/auditor/fact_extractor.ts +++ b/auditor/fact_extractor.ts @@ -48,6 +48,12 @@ export interface ExtractedFacts { // persists to its own team_runs; this is for our own cross-ref). llm_team_run_id?: number; extracted_at: string; + // Per-fact verdicts from the verifier pass (CORRECT/INCORRECT/ + // UNVERIFIABLE/UNCHECKED). Aligned 1:1 with the *raw* fact list + // pre-drop so operators can see which verdicts mapped to dropped + // facts if needed. + verifier_verdicts?: Array<"CORRECT" | "INCORRECT" | "UNVERIFIABLE" | "UNCHECKED">; + facts_dropped_by_verifier?: number; error?: string; } @@ -142,9 +148,25 @@ export async function extractFacts(source: string): Promise { return { ...base, error: "extractor returned no parseable JSON", verification: verifierText }; } + const rawFacts: string[] = Array.isArray(parsed.facts) + ? parsed.facts.slice(0, 50).map(String) + : []; + + // Parse the verifier's free-form prose into per-fact verdicts, then + // drop any fact the verifier explicitly marked INCORRECT. Leave + // UNVERIFIABLE in place: many of our extractions are domain-specific + // (Lakehouse internals) and the verifier has no prior-knowledge + // anchor, so UNVERIFIABLE is the expected verdict for new signal, + // not a quality fail. This is verifier-gated persistence: drop only + // what's affirmatively wrong, not what's novel. + const verdicts = parseVerifierVerdicts(verifierText, rawFacts.length); + const incorrectIdx = new Set(); + verdicts.forEach((v, i) => { if (v === "INCORRECT") incorrectIdx.add(i); }); + const kept = rawFacts.filter((_, i) => !incorrectIdx.has(i)); + return { ...base, - facts: Array.isArray(parsed.facts) ? parsed.facts.slice(0, 50).map(String) : [], + facts: kept, entities: Array.isArray(parsed.entities) ? parsed.entities.slice(0, 30).map((e: any) => ({ name: String(e?.name ?? ""), @@ -160,10 +182,36 @@ export async function extractFacts(source: string): Promise { })).filter(r => r.from.length > 0 && r.to.length > 0) : [], verification: verifierText.slice(0, 1500), + facts_dropped_by_verifier: incorrectIdx.size, + verifier_verdicts: verdicts, llm_team_run_id: runId, }; } +// Parse verifier's free-form output into a per-fact verdict array. +// The verifier output typically looks like: +// **1.** The claim... +// * **Verdict:** CORRECT +// **2.** ... +// **Verdict:** UNVERIFIABLE +// Using matchAll to iterate — returns a verdict array of length +// numFacts; unmatched positions stay UNCHECKED. +function parseVerifierVerdicts( + verifierText: string, + numFacts: number, +): Array<"CORRECT" | "INCORRECT" | "UNVERIFIABLE" | "UNCHECKED"> { + const out: Array<"CORRECT" | "INCORRECT" | "UNVERIFIABLE" | "UNCHECKED"> = + Array(numFacts).fill("UNCHECKED"); + const re = /(?:\*\*|#+\s*)?(\d+)[.):]\s[\s\S]*?\bVerdict\s*:\s*\*?\*?\s*(CORRECT|INCORRECT|UNVERIFIABLE)/gi; + for (const m of verifierText.matchAll(re)) { + const idx = Number(m[1]) - 1; + if (idx >= 0 && idx < numFacts) { + out[idx] = m[2].toUpperCase() as "CORRECT" | "INCORRECT" | "UNVERIFIABLE"; + } + } + return out; +} + // Lift the first balanced JSON object out of (possibly fenced) text. // Same discipline as inference.ts::extractJson. function extractFirstJsonObject(text: string): any | null { diff --git a/tests/real-world/scrum_master_pipeline.ts b/tests/real-world/scrum_master_pipeline.ts index 9323da7..369b6f0 100644 --- a/tests/real-world/scrum_master_pipeline.ts +++ b/tests/real-world/scrum_master_pipeline.ts @@ -343,12 +343,50 @@ Respond with markdown. Be specific, not generic. Cite file-region + PRD-chunk-of attempts_made: history.length, tree_split_fired: treeSplitFired, suggestions_preview: accepted.slice(0, 2000), + schema_version: 2, + scrum_master_reviewed: true, }; try { await appendFile(SCRUM_REVIEWS_JSONL, JSON.stringify(row) + "\n"); } catch (e) { console.error(`[scrum] failed to append scrum_reviews.jsonl: ${(e as Error).message}`); } + + // Route the accepted review through llm_team's fact extractor so + // its entities + relationships land in audit_facts.jsonl alongside + // inference-side extractions. Same index, two sources. Tagged + // source:"scrum_review" + scrum_master_reviewed:true so downstream + // queries can filter by provenance. Reviews shorter than 120 + // chars are skipped — they're usually one-liners ("LGTM") with + // no extractable knowledge. + if (accepted.length >= 120 && process.env.LH_SCRUM_SKIP_EXTRACT !== "1") { + try { + const { extractFacts } = await import("../../auditor/fact_extractor.ts"); + const ex = await extractFacts(accepted); + if (!ex.error || ex.entities.length + ex.facts.length > 0) { + const factRow = { + pr_number: 0, // scrum runs outside a PR scope + file: rel, + head_sha: "", // no SHA scope; scope is the file+timestamp + extracted_at: ex.extracted_at, + extractor: ex.extractor_model, + verifier: ex.verifier_model, + llm_team_run_id: ex.llm_team_run_id ?? null, + facts: ex.facts, + entities: ex.entities, + relationships: ex.relationships, + verification_preview: ex.verification.slice(0, 400), + schema_version: 2, + source: "scrum_review", + scrum_master_reviewed: true, + }; + const AUDIT_FACTS_JSONL = "/home/profit/lakehouse/data/_kb/audit_facts.jsonl"; + await appendFile(AUDIT_FACTS_JSONL, JSON.stringify(factRow) + "\n"); + } + } catch (e) { + console.error(`[scrum] fact extraction failed for ${rel}: ${(e as Error).message}`); + } + } } return review;