scrum_master fact extraction + verifier gate + schema_version bump
Three bundled changes that round out the KB enrichment pipeline (PR #9 commits B/C/D compressed into one — they all touch the same persist surfaces so splitting them would just add noise): B. scrum_master reviews now route accepted review bodies through fact_extractor (same llm_team extract pipeline as inference) and append to data/_kb/audit_facts.jsonl tagged source:"scrum_review". One KB, two producers — downstream consumers can filter by source when they care about provenance. Skips reviews <120 chars (one-liners / LGTM-type comments with no extractable knowledge). C. Verifier-gated fact persistence. fact_extractor now parses the verifier's free-form prose into per-fact verdicts (CORRECT / INCORRECT / UNVERIFIABLE / UNCHECKED). Facts marked INCORRECT are dropped on write; CORRECT + UNVERIFIABLE + UNCHECKED are kept (dropping UNVERIFIABLE would lose ~90% of real signal — the verifier's prior-knowledge base doesn't know Lakehouse internals, so domain-specific facts read as UNVERIFIABLE by default). verifier_verdicts array is persisted alongside facts so downstream queries can surface high-confidence facts (CORRECT) separately from provisional ones (UNVERIFIABLE). schema_version:2 added to both scrum_reviews.jsonl and audit_facts.jsonl writes. Old (v1) rows remain readable; new rows get the field so the forward-compat reader in kb_query can differentiate. D. scrum_master_reviewed:true flag added to scrum_reviews.jsonl rows on accept. Future kb_query surfacing can filter by this (e.g., "show me PRs where a scrum review exists vs only inference" as governance signal). Also carried into audit_facts.jsonl when the scrum_review source path writes there.
This commit is contained in:
parent
2afad0f83f
commit
181c35b829
@ -481,6 +481,10 @@ async function extractAndPersistFacts(scratchpad: string, ctx: InferenceContext)
|
||||
entities: ex.entities,
|
||||
relationships: ex.relationships,
|
||||
verification_preview: ex.verification.slice(0, 400),
|
||||
verifier_verdicts: ex.verifier_verdicts,
|
||||
facts_dropped_by_verifier: ex.facts_dropped_by_verifier ?? 0,
|
||||
schema_version: 2,
|
||||
source: "audit_inference",
|
||||
};
|
||||
await mkdir("/home/profit/lakehouse/data/_kb", { recursive: true });
|
||||
await appendFile(AUDIT_FACTS_JSONL, JSON.stringify(row) + "\n");
|
||||
|
||||
@ -48,6 +48,12 @@ export interface ExtractedFacts {
|
||||
// persists to its own team_runs; this is for our own cross-ref).
|
||||
llm_team_run_id?: number;
|
||||
extracted_at: string;
|
||||
// Per-fact verdicts from the verifier pass (CORRECT/INCORRECT/
|
||||
// UNVERIFIABLE/UNCHECKED). Aligned 1:1 with the *raw* fact list
|
||||
// pre-drop so operators can see which verdicts mapped to dropped
|
||||
// facts if needed.
|
||||
verifier_verdicts?: Array<"CORRECT" | "INCORRECT" | "UNVERIFIABLE" | "UNCHECKED">;
|
||||
facts_dropped_by_verifier?: number;
|
||||
error?: string;
|
||||
}
|
||||
|
||||
@ -142,9 +148,25 @@ export async function extractFacts(source: string): Promise<ExtractedFacts> {
|
||||
return { ...base, error: "extractor returned no parseable JSON", verification: verifierText };
|
||||
}
|
||||
|
||||
const rawFacts: string[] = Array.isArray(parsed.facts)
|
||||
? parsed.facts.slice(0, 50).map(String)
|
||||
: [];
|
||||
|
||||
// Parse the verifier's free-form prose into per-fact verdicts, then
|
||||
// drop any fact the verifier explicitly marked INCORRECT. Leave
|
||||
// UNVERIFIABLE in place: many of our extractions are domain-specific
|
||||
// (Lakehouse internals) and the verifier has no prior-knowledge
|
||||
// anchor, so UNVERIFIABLE is the expected verdict for new signal,
|
||||
// not a quality fail. This is verifier-gated persistence: drop only
|
||||
// what's affirmatively wrong, not what's novel.
|
||||
const verdicts = parseVerifierVerdicts(verifierText, rawFacts.length);
|
||||
const incorrectIdx = new Set<number>();
|
||||
verdicts.forEach((v, i) => { if (v === "INCORRECT") incorrectIdx.add(i); });
|
||||
const kept = rawFacts.filter((_, i) => !incorrectIdx.has(i));
|
||||
|
||||
return {
|
||||
...base,
|
||||
facts: Array.isArray(parsed.facts) ? parsed.facts.slice(0, 50).map(String) : [],
|
||||
facts: kept,
|
||||
entities: Array.isArray(parsed.entities)
|
||||
? parsed.entities.slice(0, 30).map((e: any) => ({
|
||||
name: String(e?.name ?? ""),
|
||||
@ -160,10 +182,36 @@ export async function extractFacts(source: string): Promise<ExtractedFacts> {
|
||||
})).filter(r => r.from.length > 0 && r.to.length > 0)
|
||||
: [],
|
||||
verification: verifierText.slice(0, 1500),
|
||||
facts_dropped_by_verifier: incorrectIdx.size,
|
||||
verifier_verdicts: verdicts,
|
||||
llm_team_run_id: runId,
|
||||
};
|
||||
}
|
||||
|
||||
// Parse verifier's free-form output into a per-fact verdict array.
|
||||
// The verifier output typically looks like:
|
||||
// **1.** The claim...
|
||||
// * **Verdict:** CORRECT
|
||||
// **2.** ...
|
||||
// **Verdict:** UNVERIFIABLE
|
||||
// Using matchAll to iterate — returns a verdict array of length
|
||||
// numFacts; unmatched positions stay UNCHECKED.
|
||||
function parseVerifierVerdicts(
|
||||
verifierText: string,
|
||||
numFacts: number,
|
||||
): Array<"CORRECT" | "INCORRECT" | "UNVERIFIABLE" | "UNCHECKED"> {
|
||||
const out: Array<"CORRECT" | "INCORRECT" | "UNVERIFIABLE" | "UNCHECKED"> =
|
||||
Array(numFacts).fill("UNCHECKED");
|
||||
const re = /(?:\*\*|#+\s*)?(\d+)[.):]\s[\s\S]*?\bVerdict\s*:\s*\*?\*?\s*(CORRECT|INCORRECT|UNVERIFIABLE)/gi;
|
||||
for (const m of verifierText.matchAll(re)) {
|
||||
const idx = Number(m[1]) - 1;
|
||||
if (idx >= 0 && idx < numFacts) {
|
||||
out[idx] = m[2].toUpperCase() as "CORRECT" | "INCORRECT" | "UNVERIFIABLE";
|
||||
}
|
||||
}
|
||||
return out;
|
||||
}
|
||||
|
||||
// Lift the first balanced JSON object out of (possibly fenced) text.
|
||||
// Same discipline as inference.ts::extractJson.
|
||||
function extractFirstJsonObject(text: string): any | null {
|
||||
|
||||
@ -343,12 +343,50 @@ Respond with markdown. Be specific, not generic. Cite file-region + PRD-chunk-of
|
||||
attempts_made: history.length,
|
||||
tree_split_fired: treeSplitFired,
|
||||
suggestions_preview: accepted.slice(0, 2000),
|
||||
schema_version: 2,
|
||||
scrum_master_reviewed: true,
|
||||
};
|
||||
try {
|
||||
await appendFile(SCRUM_REVIEWS_JSONL, JSON.stringify(row) + "\n");
|
||||
} catch (e) {
|
||||
console.error(`[scrum] failed to append scrum_reviews.jsonl: ${(e as Error).message}`);
|
||||
}
|
||||
|
||||
// Route the accepted review through llm_team's fact extractor so
|
||||
// its entities + relationships land in audit_facts.jsonl alongside
|
||||
// inference-side extractions. Same index, two sources. Tagged
|
||||
// source:"scrum_review" + scrum_master_reviewed:true so downstream
|
||||
// queries can filter by provenance. Reviews shorter than 120
|
||||
// chars are skipped — they're usually one-liners ("LGTM") with
|
||||
// no extractable knowledge.
|
||||
if (accepted.length >= 120 && process.env.LH_SCRUM_SKIP_EXTRACT !== "1") {
|
||||
try {
|
||||
const { extractFacts } = await import("../../auditor/fact_extractor.ts");
|
||||
const ex = await extractFacts(accepted);
|
||||
if (!ex.error || ex.entities.length + ex.facts.length > 0) {
|
||||
const factRow = {
|
||||
pr_number: 0, // scrum runs outside a PR scope
|
||||
file: rel,
|
||||
head_sha: "", // no SHA scope; scope is the file+timestamp
|
||||
extracted_at: ex.extracted_at,
|
||||
extractor: ex.extractor_model,
|
||||
verifier: ex.verifier_model,
|
||||
llm_team_run_id: ex.llm_team_run_id ?? null,
|
||||
facts: ex.facts,
|
||||
entities: ex.entities,
|
||||
relationships: ex.relationships,
|
||||
verification_preview: ex.verification.slice(0, 400),
|
||||
schema_version: 2,
|
||||
source: "scrum_review",
|
||||
scrum_master_reviewed: true,
|
||||
};
|
||||
const AUDIT_FACTS_JSONL = "/home/profit/lakehouse/data/_kb/audit_facts.jsonl";
|
||||
await appendFile(AUDIT_FACTS_JSONL, JSON.stringify(factRow) + "\n");
|
||||
}
|
||||
} catch (e) {
|
||||
console.error(`[scrum] fact extraction failed for ${rel}: ${(e as Error).message}`);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return review;
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user