scrum_master fact extraction + verifier gate + schema_version bump

Three bundled changes that round out the KB enrichment pipeline (PR #9 commits B/C/D compressed into one — they all touch the same persist surfaces so splitting them would just add noise): B. scrum_master reviews now route accepted review bodies through fact_extractor (same llm_team extract pipeline as inference) and append to data/_kb/audit_facts.jsonl tagged source:"scrum_review". One KB, two producers — downstream consumers can filter by source when they care about provenance. Skips reviews <120 chars (one-liners / LGTM-type comments with no extractable knowledge). C. Verifier-gated fact persistence. fact_extractor now parses the verifier's free-form prose into per-fact verdicts (CORRECT / INCORRECT / UNVERIFIABLE / UNCHECKED). Facts marked INCORRECT are dropped on write; CORRECT + UNVERIFIABLE + UNCHECKED are kept (dropping UNVERIFIABLE would lose ~90% of real signal — the verifier's prior-knowledge base doesn't know Lakehouse internals, so domain-specific facts read as UNVERIFIABLE by default). verifier_verdicts array is persisted alongside facts so downstream queries can surface high-confidence facts (CORRECT) separately from provisional ones (UNVERIFIABLE). schema_version:2 added to both scrum_reviews.jsonl and audit_facts.jsonl writes. Old (v1) rows remain readable; new rows get the field so the forward-compat reader in kb_query can differentiate. D. scrum_master_reviewed:true flag added to scrum_reviews.jsonl rows on accept. Future kb_query surfacing can filter by this (e.g., "show me PRs where a scrum review exists vs only inference" as governance signal). Also carried into audit_facts.jsonl when the scrum_review source path writes there.
2026-04-22 23:40:21 -05:00 · 2026-04-22 23:40:21 -05:00 · 181c35b829
commit 181c35b829
parent 2afad0f83f
3 changed files with 91 additions and 1 deletions
--- a/auditor/checks/inference.ts
+++ b/auditor/checks/inference.ts
@ -481,6 +481,10 @@ async function extractAndPersistFacts(scratchpad: string, ctx: InferenceContext)
    entities: ex.entities,
    relationships: ex.relationships,
    verification_preview: ex.verification.slice(0, 400),
    verifier_verdicts: ex.verifier_verdicts,
    facts_dropped_by_verifier: ex.facts_dropped_by_verifier ?? 0,
    schema_version: 2,
    source: "audit_inference",
  };
  await mkdir("/home/profit/lakehouse/data/_kb", { recursive: true });
  await appendFile(AUDIT_FACTS_JSONL, JSON.stringify(row) + "\n");
--- a/auditor/fact_extractor.ts
+++ b/auditor/fact_extractor.ts
@ -48,6 +48,12 @@ export interface ExtractedFacts {
  // persists to its own team_runs; this is for our own cross-ref).
  llm_team_run_id?: number;
  extracted_at: string;
  // Per-fact verdicts from the verifier pass (CORRECT/INCORRECT/
  // UNVERIFIABLE/UNCHECKED). Aligned 1:1 with the *raw* fact list
  // pre-drop so operators can see which verdicts mapped to dropped
  // facts if needed.
  verifier_verdicts?: Array<"CORRECT" | "INCORRECT" | "UNVERIFIABLE" | "UNCHECKED">;
  facts_dropped_by_verifier?: number;
  error?: string;
 }
@ -142,9 +148,25 @@ export async function extractFacts(source: string): Promise<ExtractedFacts> {
    return { ...base, error: "extractor returned no parseable JSON", verification: verifierText };
  }
  const rawFacts: string[] = Array.isArray(parsed.facts)
    ? parsed.facts.slice(0, 50).map(String)
    : [];
  // Parse the verifier's free-form prose into per-fact verdicts, then
  // drop any fact the verifier explicitly marked INCORRECT. Leave
  // UNVERIFIABLE in place: many of our extractions are domain-specific
  // (Lakehouse internals) and the verifier has no prior-knowledge
  // anchor, so UNVERIFIABLE is the expected verdict for new signal,
  // not a quality fail. This is verifier-gated persistence: drop only
  // what's affirmatively wrong, not what's novel.
  const verdicts = parseVerifierVerdicts(verifierText, rawFacts.length);
  const incorrectIdx = new Set<number>();
  verdicts.forEach((v, i) => { if (v === "INCORRECT") incorrectIdx.add(i); });
  const kept = rawFacts.filter((_, i) => !incorrectIdx.has(i));
  return {
    ...base,
-    facts: Array.isArray(parsed.facts) ? parsed.facts.slice(0, 50).map(String) : [],
+    facts: kept,
    entities: Array.isArray(parsed.entities)
      ? parsed.entities.slice(0, 30).map((e: any) => ({
          name: String(e?.name ?? ""),
@ -160,10 +182,36 @@ export async function extractFacts(source: string): Promise<ExtractedFacts> {
        })).filter(r => r.from.length > 0 && r.to.length > 0)
      : [],
    verification: verifierText.slice(0, 1500),
    facts_dropped_by_verifier: incorrectIdx.size,
    verifier_verdicts: verdicts,
    llm_team_run_id: runId,
  };
 }
 // Parse verifier's free-form output into a per-fact verdict array.
 // The verifier output typically looks like:
 //   **1.** The claim...
 //   * **Verdict:** CORRECT
 //   **2.** ...
 //   **Verdict:** UNVERIFIABLE
 // Using matchAll to iterate — returns a verdict array of length
 // numFacts; unmatched positions stay UNCHECKED.
 function parseVerifierVerdicts(
  verifierText: string,
  numFacts: number,
 ): Array<"CORRECT" | "INCORRECT" | "UNVERIFIABLE" | "UNCHECKED"> {
  const out: Array<"CORRECT" | "INCORRECT" | "UNVERIFIABLE" | "UNCHECKED"> =
    Array(numFacts).fill("UNCHECKED");
  const re = /(?:\*\*|#+\s*)?(\d+)[.):]\s[\s\S]*?\bVerdict\s*:\s*\*?\*?\s*(CORRECT|INCORRECT|UNVERIFIABLE)/gi;
  for (const m of verifierText.matchAll(re)) {
    const idx = Number(m[1]) - 1;
    if (idx >= 0 && idx < numFacts) {
      out[idx] = m[2].toUpperCase() as "CORRECT" | "INCORRECT" | "UNVERIFIABLE";
    }
  }
  return out;
 }
 // Lift the first balanced JSON object out of (possibly fenced) text.
 // Same discipline as inference.ts::extractJson.
 function extractFirstJsonObject(text: string): any | null {
--- a/tests/real-world/scrum_master_pipeline.ts
+++ b/tests/real-world/scrum_master_pipeline.ts
@ -343,12 +343,50 @@ Respond with markdown. Be specific, not generic. Cite file-region + PRD-chunk-of
      attempts_made: history.length,
      tree_split_fired: treeSplitFired,
      suggestions_preview: accepted.slice(0, 2000),
      schema_version: 2,
      scrum_master_reviewed: true,
    };
    try {
      await appendFile(SCRUM_REVIEWS_JSONL, JSON.stringify(row) + "\n");
    } catch (e) {
      console.error(`[scrum] failed to append scrum_reviews.jsonl: ${(e as Error).message}`);
    }
    // Route the accepted review through llm_team's fact extractor so
    // its entities + relationships land in audit_facts.jsonl alongside
    // inference-side extractions. Same index, two sources. Tagged
    // source:"scrum_review" + scrum_master_reviewed:true so downstream
    // queries can filter by provenance. Reviews shorter than 120
    // chars are skipped — they're usually one-liners ("LGTM") with
    // no extractable knowledge.
    if (accepted.length >= 120 && process.env.LH_SCRUM_SKIP_EXTRACT !== "1") {
      try {
        const { extractFacts } = await import("../../auditor/fact_extractor.ts");
        const ex = await extractFacts(accepted);
        if (!ex.error || ex.entities.length + ex.facts.length > 0) {
          const factRow = {
            pr_number: 0,                   // scrum runs outside a PR scope
            file: rel,
            head_sha: "",                    // no SHA scope; scope is the file+timestamp
            extracted_at: ex.extracted_at,
            extractor: ex.extractor_model,
            verifier: ex.verifier_model,
            llm_team_run_id: ex.llm_team_run_id ?? null,
            facts: ex.facts,
            entities: ex.entities,
            relationships: ex.relationships,
            verification_preview: ex.verification.slice(0, 400),
            schema_version: 2,
            source: "scrum_review",
            scrum_master_reviewed: true,
          };
          const AUDIT_FACTS_JSONL = "/home/profit/lakehouse/data/_kb/audit_facts.jsonl";
          await appendFile(AUDIT_FACTS_JSONL, JSON.stringify(factRow) + "\n");
        }
      } catch (e) {
        console.error(`[scrum] fact extraction failed for ${rel}: ${(e as Error).message}`);
      }
    }
  }
  return review;