scrum_master fact extraction + verifier gate + schema_version bump
Three bundled changes that round out the KB enrichment pipeline (PR #9 commits B/C/D compressed into one — they all touch the same persist surfaces so splitting them would just add noise): B. scrum_master reviews now route accepted review bodies through fact_extractor (same llm_team extract pipeline as inference) and append to data/_kb/audit_facts.jsonl tagged source:"scrum_review". One KB, two producers — downstream consumers can filter by source when they care about provenance. Skips reviews <120 chars (one-liners / LGTM-type comments with no extractable knowledge). C. Verifier-gated fact persistence. fact_extractor now parses the verifier's free-form prose into per-fact verdicts (CORRECT / INCORRECT / UNVERIFIABLE / UNCHECKED). Facts marked INCORRECT are dropped on write; CORRECT + UNVERIFIABLE + UNCHECKED are kept (dropping UNVERIFIABLE would lose ~90% of real signal — the verifier's prior-knowledge base doesn't know Lakehouse internals, so domain-specific facts read as UNVERIFIABLE by default). verifier_verdicts array is persisted alongside facts so downstream queries can surface high-confidence facts (CORRECT) separately from provisional ones (UNVERIFIABLE). schema_version:2 added to both scrum_reviews.jsonl and audit_facts.jsonl writes. Old (v1) rows remain readable; new rows get the field so the forward-compat reader in kb_query can differentiate. D. scrum_master_reviewed:true flag added to scrum_reviews.jsonl rows on accept. Future kb_query surfacing can filter by this (e.g., "show me PRs where a scrum review exists vs only inference" as governance signal). Also carried into audit_facts.jsonl when the scrum_review source path writes there.
This commit is contained in:
parent
2afad0f83f
commit
181c35b829
@ -481,6 +481,10 @@ async function extractAndPersistFacts(scratchpad: string, ctx: InferenceContext)
|
|||||||
entities: ex.entities,
|
entities: ex.entities,
|
||||||
relationships: ex.relationships,
|
relationships: ex.relationships,
|
||||||
verification_preview: ex.verification.slice(0, 400),
|
verification_preview: ex.verification.slice(0, 400),
|
||||||
|
verifier_verdicts: ex.verifier_verdicts,
|
||||||
|
facts_dropped_by_verifier: ex.facts_dropped_by_verifier ?? 0,
|
||||||
|
schema_version: 2,
|
||||||
|
source: "audit_inference",
|
||||||
};
|
};
|
||||||
await mkdir("/home/profit/lakehouse/data/_kb", { recursive: true });
|
await mkdir("/home/profit/lakehouse/data/_kb", { recursive: true });
|
||||||
await appendFile(AUDIT_FACTS_JSONL, JSON.stringify(row) + "\n");
|
await appendFile(AUDIT_FACTS_JSONL, JSON.stringify(row) + "\n");
|
||||||
|
|||||||
@ -48,6 +48,12 @@ export interface ExtractedFacts {
|
|||||||
// persists to its own team_runs; this is for our own cross-ref).
|
// persists to its own team_runs; this is for our own cross-ref).
|
||||||
llm_team_run_id?: number;
|
llm_team_run_id?: number;
|
||||||
extracted_at: string;
|
extracted_at: string;
|
||||||
|
// Per-fact verdicts from the verifier pass (CORRECT/INCORRECT/
|
||||||
|
// UNVERIFIABLE/UNCHECKED). Aligned 1:1 with the *raw* fact list
|
||||||
|
// pre-drop so operators can see which verdicts mapped to dropped
|
||||||
|
// facts if needed.
|
||||||
|
verifier_verdicts?: Array<"CORRECT" | "INCORRECT" | "UNVERIFIABLE" | "UNCHECKED">;
|
||||||
|
facts_dropped_by_verifier?: number;
|
||||||
error?: string;
|
error?: string;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -142,9 +148,25 @@ export async function extractFacts(source: string): Promise<ExtractedFacts> {
|
|||||||
return { ...base, error: "extractor returned no parseable JSON", verification: verifierText };
|
return { ...base, error: "extractor returned no parseable JSON", verification: verifierText };
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const rawFacts: string[] = Array.isArray(parsed.facts)
|
||||||
|
? parsed.facts.slice(0, 50).map(String)
|
||||||
|
: [];
|
||||||
|
|
||||||
|
// Parse the verifier's free-form prose into per-fact verdicts, then
|
||||||
|
// drop any fact the verifier explicitly marked INCORRECT. Leave
|
||||||
|
// UNVERIFIABLE in place: many of our extractions are domain-specific
|
||||||
|
// (Lakehouse internals) and the verifier has no prior-knowledge
|
||||||
|
// anchor, so UNVERIFIABLE is the expected verdict for new signal,
|
||||||
|
// not a quality fail. This is verifier-gated persistence: drop only
|
||||||
|
// what's affirmatively wrong, not what's novel.
|
||||||
|
const verdicts = parseVerifierVerdicts(verifierText, rawFacts.length);
|
||||||
|
const incorrectIdx = new Set<number>();
|
||||||
|
verdicts.forEach((v, i) => { if (v === "INCORRECT") incorrectIdx.add(i); });
|
||||||
|
const kept = rawFacts.filter((_, i) => !incorrectIdx.has(i));
|
||||||
|
|
||||||
return {
|
return {
|
||||||
...base,
|
...base,
|
||||||
facts: Array.isArray(parsed.facts) ? parsed.facts.slice(0, 50).map(String) : [],
|
facts: kept,
|
||||||
entities: Array.isArray(parsed.entities)
|
entities: Array.isArray(parsed.entities)
|
||||||
? parsed.entities.slice(0, 30).map((e: any) => ({
|
? parsed.entities.slice(0, 30).map((e: any) => ({
|
||||||
name: String(e?.name ?? ""),
|
name: String(e?.name ?? ""),
|
||||||
@ -160,10 +182,36 @@ export async function extractFacts(source: string): Promise<ExtractedFacts> {
|
|||||||
})).filter(r => r.from.length > 0 && r.to.length > 0)
|
})).filter(r => r.from.length > 0 && r.to.length > 0)
|
||||||
: [],
|
: [],
|
||||||
verification: verifierText.slice(0, 1500),
|
verification: verifierText.slice(0, 1500),
|
||||||
|
facts_dropped_by_verifier: incorrectIdx.size,
|
||||||
|
verifier_verdicts: verdicts,
|
||||||
llm_team_run_id: runId,
|
llm_team_run_id: runId,
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Parse verifier's free-form output into a per-fact verdict array.
|
||||||
|
// The verifier output typically looks like:
|
||||||
|
// **1.** The claim...
|
||||||
|
// * **Verdict:** CORRECT
|
||||||
|
// **2.** ...
|
||||||
|
// **Verdict:** UNVERIFIABLE
|
||||||
|
// Using matchAll to iterate — returns a verdict array of length
|
||||||
|
// numFacts; unmatched positions stay UNCHECKED.
|
||||||
|
function parseVerifierVerdicts(
|
||||||
|
verifierText: string,
|
||||||
|
numFacts: number,
|
||||||
|
): Array<"CORRECT" | "INCORRECT" | "UNVERIFIABLE" | "UNCHECKED"> {
|
||||||
|
const out: Array<"CORRECT" | "INCORRECT" | "UNVERIFIABLE" | "UNCHECKED"> =
|
||||||
|
Array(numFacts).fill("UNCHECKED");
|
||||||
|
const re = /(?:\*\*|#+\s*)?(\d+)[.):]\s[\s\S]*?\bVerdict\s*:\s*\*?\*?\s*(CORRECT|INCORRECT|UNVERIFIABLE)/gi;
|
||||||
|
for (const m of verifierText.matchAll(re)) {
|
||||||
|
const idx = Number(m[1]) - 1;
|
||||||
|
if (idx >= 0 && idx < numFacts) {
|
||||||
|
out[idx] = m[2].toUpperCase() as "CORRECT" | "INCORRECT" | "UNVERIFIABLE";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return out;
|
||||||
|
}
|
||||||
|
|
||||||
// Lift the first balanced JSON object out of (possibly fenced) text.
|
// Lift the first balanced JSON object out of (possibly fenced) text.
|
||||||
// Same discipline as inference.ts::extractJson.
|
// Same discipline as inference.ts::extractJson.
|
||||||
function extractFirstJsonObject(text: string): any | null {
|
function extractFirstJsonObject(text: string): any | null {
|
||||||
|
|||||||
@ -343,12 +343,50 @@ Respond with markdown. Be specific, not generic. Cite file-region + PRD-chunk-of
|
|||||||
attempts_made: history.length,
|
attempts_made: history.length,
|
||||||
tree_split_fired: treeSplitFired,
|
tree_split_fired: treeSplitFired,
|
||||||
suggestions_preview: accepted.slice(0, 2000),
|
suggestions_preview: accepted.slice(0, 2000),
|
||||||
|
schema_version: 2,
|
||||||
|
scrum_master_reviewed: true,
|
||||||
};
|
};
|
||||||
try {
|
try {
|
||||||
await appendFile(SCRUM_REVIEWS_JSONL, JSON.stringify(row) + "\n");
|
await appendFile(SCRUM_REVIEWS_JSONL, JSON.stringify(row) + "\n");
|
||||||
} catch (e) {
|
} catch (e) {
|
||||||
console.error(`[scrum] failed to append scrum_reviews.jsonl: ${(e as Error).message}`);
|
console.error(`[scrum] failed to append scrum_reviews.jsonl: ${(e as Error).message}`);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Route the accepted review through llm_team's fact extractor so
|
||||||
|
// its entities + relationships land in audit_facts.jsonl alongside
|
||||||
|
// inference-side extractions. Same index, two sources. Tagged
|
||||||
|
// source:"scrum_review" + scrum_master_reviewed:true so downstream
|
||||||
|
// queries can filter by provenance. Reviews shorter than 120
|
||||||
|
// chars are skipped — they're usually one-liners ("LGTM") with
|
||||||
|
// no extractable knowledge.
|
||||||
|
if (accepted.length >= 120 && process.env.LH_SCRUM_SKIP_EXTRACT !== "1") {
|
||||||
|
try {
|
||||||
|
const { extractFacts } = await import("../../auditor/fact_extractor.ts");
|
||||||
|
const ex = await extractFacts(accepted);
|
||||||
|
if (!ex.error || ex.entities.length + ex.facts.length > 0) {
|
||||||
|
const factRow = {
|
||||||
|
pr_number: 0, // scrum runs outside a PR scope
|
||||||
|
file: rel,
|
||||||
|
head_sha: "", // no SHA scope; scope is the file+timestamp
|
||||||
|
extracted_at: ex.extracted_at,
|
||||||
|
extractor: ex.extractor_model,
|
||||||
|
verifier: ex.verifier_model,
|
||||||
|
llm_team_run_id: ex.llm_team_run_id ?? null,
|
||||||
|
facts: ex.facts,
|
||||||
|
entities: ex.entities,
|
||||||
|
relationships: ex.relationships,
|
||||||
|
verification_preview: ex.verification.slice(0, 400),
|
||||||
|
schema_version: 2,
|
||||||
|
source: "scrum_review",
|
||||||
|
scrum_master_reviewed: true,
|
||||||
|
};
|
||||||
|
const AUDIT_FACTS_JSONL = "/home/profit/lakehouse/data/_kb/audit_facts.jsonl";
|
||||||
|
await appendFile(AUDIT_FACTS_JSONL, JSON.stringify(factRow) + "\n");
|
||||||
|
}
|
||||||
|
} catch (e) {
|
||||||
|
console.error(`[scrum] fact extraction failed for ${rel}: ${(e as Error).message}`);
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return review;
|
return review;
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user