// Real-data validation test — proves the EvidenceRecord schema fits // what we ALREADY produce, with the minimum transformation each source // stream requires. Doubles as the stale-extraction probe: if // distilled_facts.jsonl rows can't materialize, we know that stream // has rotted and Phase 2 sources from elsewhere. // // Strategy: // 1. Read first N rows from each source jsonl (skip if missing) // 2. Apply minimal transformer: add schema_version + provenance, // synthesize run_id/task_id when source doesn't carry them // 3. Validate each materialized record // 4. Tally pass/fail per source + collect failure reasons // // This file is allowed to skip when source files don't exist (fresh // clone), so it acts as both a CI guard and a real-environment probe. import { test, expect } from "bun:test"; import { existsSync, readFileSync } from "node:fs"; import { resolve } from "node:path"; import { validateEvidenceRecord, EVIDENCE_SCHEMA_VERSION, EvidenceRecord, ModelRole, } from "./evidence_record"; const ROOT = "/home/profit/lakehouse"; const SAMPLE_PER_SOURCE = 10; interface SourceProbe { source_file: string; transform: (row: any, lineNo: number) => Partial | null; } // Canonical 64-char synthetic sha256 for tests where the source row // lacks one. Pretends the materializer would compute it via // canonicalSha256(orderedKeys(row)) at Phase 2 time. We use a fixed // value here to keep the test deterministic; real materialization // re-hashes per row. const PLACEHOLDER_SHA = "0000000000000000000000000000000000000000000000000000000000000000"; const RECORDED = "2026-04-26T22:30:00.000Z"; function provFor(source_file: string, lineNo: number, sigHashRaw?: string): EvidenceRecord["provenance"] { // Pad shorter hashes (distilled_* uses 16-char) to 64 — mimics // canonical recompute. const sig = sigHashRaw && /^[0-9a-f]+$/.test(sigHashRaw) ? sigHashRaw.padEnd(64, "0").slice(0, 64) : PLACEHOLDER_SHA; return { source_file: source_file.replace(`${ROOT}/`, ""), line_offset: lineNo, sig_hash: sig, recorded_at: RECORDED, }; } const PROBES: SourceProbe[] = [ { source_file: `${ROOT}/data/_kb/distilled_facts.jsonl`, transform: (row: any, lineNo: number) => ({ run_id: String(row.run_id ?? `distilled_facts:${lineNo}`), task_id: String(row.source_label ?? `distilled_facts:${lineNo}`), timestamp: row.created_at, schema_version: EVIDENCE_SCHEMA_VERSION, provenance: provFor(`${ROOT}/data/_kb/distilled_facts.jsonl`, lineNo, row.sig_hash), model_name: row.extractor, model_role: "extractor" as ModelRole, model_provider: "ollama", text: row.text, }), }, { source_file: `${ROOT}/data/_kb/distilled_procedures.jsonl`, transform: (row: any, lineNo: number) => ({ run_id: String(row.run_id ?? `distilled_procedures:${lineNo}`), task_id: String(row.source_label ?? `distilled_procedures:${lineNo}`), timestamp: row.created_at, schema_version: EVIDENCE_SCHEMA_VERSION, provenance: provFor(`${ROOT}/data/_kb/distilled_procedures.jsonl`, lineNo, row.sig_hash), model_name: row.extractor, model_role: "extractor" as ModelRole, model_provider: "ollama", text: row.text, }), }, { source_file: `${ROOT}/data/_kb/contract_analyses.jsonl`, transform: (row: any, lineNo: number) => ({ run_id: `contract_analysis:${row.permit_id}:${new Date(row.ts).getTime()}`, task_id: `permit:${row.permit_id}`, timestamp: row.ts, schema_version: EVIDENCE_SCHEMA_VERSION, provenance: provFor(`${ROOT}/data/_kb/contract_analyses.jsonl`, lineNo), model_role: "executor" as ModelRole, retrieved_context: { matrix_corpora: Object.keys(row.matrix_corpora ?? {}), matrix_hits: row.matrix_hits, }, observer_notes: row.observer_notes ? [row.observer_notes].flat() : undefined, observer_verdict: row.observer_verdict, observer_confidence: row.observer_conf, success_markers: row.ok ? ["matrix_hits_above_threshold"] : undefined, failure_markers: !row.ok || row.observer_verdict === "reject" ? ["observer_rejected"] : undefined, cost_usd: typeof row.cost === "number" ? row.cost / 1_000_000 : undefined, latency_ms: row.duration_ms, text: row.analysis, }), }, { source_file: `${ROOT}/data/_kb/mode_experiments.jsonl`, transform: (row: any, lineNo: number) => ({ run_id: `mode_exec:${new Date(row.ts).getTime()}:${row.file_path ?? "?"}`, task_id: row.task_class, timestamp: row.ts, schema_version: EVIDENCE_SCHEMA_VERSION, provenance: provFor(`${ROOT}/data/_kb/mode_experiments.jsonl`, lineNo), model_name: row.model, model_role: "executor" as ModelRole, model_provider: row.model?.includes("/") ? "openrouter" : "ollama_cloud", retrieved_context: { matrix_corpora: row.sources?.matrix_corpus, matrix_chunks_kept: row.sources?.matrix_chunks_kept, matrix_chunks_dropped: row.sources?.matrix_chunks_dropped, pathway_fingerprints_seen: row.sources?.bug_fingerprints_count, }, latency_ms: row.latency_ms, text: row.response, source_files: row.file_path ? [row.file_path] : undefined, }), }, { source_file: `${ROOT}/data/_kb/scrum_reviews.jsonl`, transform: (row: any, lineNo: number) => ({ run_id: `scrum:${new Date(row.reviewed_at).getTime()}:${row.file}`, task_id: `scrum_review:${row.file}`, timestamp: row.reviewed_at, schema_version: EVIDENCE_SCHEMA_VERSION, provenance: provFor(`${ROOT}/data/_kb/scrum_reviews.jsonl`, lineNo), model_name: row.accepted_model, model_role: "executor" as ModelRole, source_files: [row.file], success_markers: row.accepted_on_attempt ? [`accepted_on_attempt_${row.accepted_on_attempt}`] : undefined, text: row.suggestions_preview, }), }, { source_file: `${ROOT}/data/_kb/observer_escalations.jsonl`, transform: (row: any, lineNo: number) => ({ run_id: `obs_esc:${new Date(row.ts).getTime()}:${row.sig_hash}`, task_id: `observer_escalation:${row.cluster_endpoint ?? "?"}`, timestamp: row.ts, schema_version: EVIDENCE_SCHEMA_VERSION, provenance: provFor(`${ROOT}/data/_kb/observer_escalations.jsonl`, lineNo, row.sig_hash), model_role: "reviewer" as ModelRole, prompt_tokens: row.prompt_tokens, completion_tokens: row.completion_tokens, text: row.analysis, }), }, { source_file: `${ROOT}/data/_kb/audit_facts.jsonl`, transform: (row: any, lineNo: number) => ({ run_id: `audit_facts:${row.head_sha}:${lineNo}`, task_id: `pr:${row.pr_number}`, timestamp: row.extracted_at, schema_version: EVIDENCE_SCHEMA_VERSION, provenance: provFor(`${ROOT}/data/_kb/audit_facts.jsonl`, lineNo), model_name: row.extractor, model_role: "extractor" as ModelRole, // facts/entities/relationships go into text as a JSON dump for now; // structured handling lives in Phase 2 where we map to specific // EvidenceRecord substructures. text: JSON.stringify({ facts: row.facts?.length ?? 0, entities: row.entities?.length ?? 0, relationships: row.relationships?.length ?? 0, }), }), }, ]; interface ProbeResult { source_file: string; rows_attempted: number; rows_present: boolean; passed: number; failed: number; failure_reasons: string[]; // unique error strings, top 5 } const RESULTS: ProbeResult[] = []; for (const probe of PROBES) { const sourceLabel = probe.source_file.replace(`${ROOT}/`, ""); test(`real-data: ${sourceLabel}`, () => { const result: ProbeResult = { source_file: sourceLabel, rows_attempted: 0, rows_present: false, passed: 0, failed: 0, failure_reasons: [], }; if (!existsSync(probe.source_file)) { RESULTS.push(result); // Skip silently — fresh clones won't have these files return; } result.rows_present = true; const lines = readFileSync(probe.source_file, "utf8").split("\n").filter(Boolean).slice(0, SAMPLE_PER_SOURCE); const reasons = new Set(); for (let i = 0; i < lines.length; i++) { result.rows_attempted++; let row: unknown; try { row = JSON.parse(lines[i]); } catch { continue; } const transformed = probe.transform(row, i); if (!transformed) continue; const v = validateEvidenceRecord(transformed); if (v.valid) result.passed++; else { result.failed++; for (const e of v.errors) reasons.add(e); } } result.failure_reasons = Array.from(reasons).slice(0, 5); RESULTS.push(result); // Test passes as long as we attempted something and got a result. // Per-source pass/fail counts are reported in the markdown writeup. expect(result.rows_attempted).toBeGreaterThanOrEqual(0); }); } test("real-data: emit markdown report", () => { const md: string[] = []; md.push("# Real-data validation report"); md.push(""); md.push("Schema = EvidenceRecord v" + EVIDENCE_SCHEMA_VERSION + ". Sample = first " + SAMPLE_PER_SOURCE + " rows per source."); md.push(""); md.push("| Source | Present | Rows | Pass | Fail | Pass% |"); md.push("|---|---|---|---|---|---|"); for (const r of RESULTS) { const pct = r.rows_attempted > 0 ? Math.round(100 * r.passed / r.rows_attempted) + "%" : "—"; md.push(`| ${r.source_file} | ${r.rows_present ? "✓" : "—"} | ${r.rows_attempted} | ${r.passed} | ${r.failed} | ${pct} |`); } md.push(""); let hasFailures = false; for (const r of RESULTS) { if (r.failed > 0) { hasFailures = true; md.push(`## Failures in ${r.source_file}`); for (const reason of r.failure_reasons) md.push(`- \`${reason}\``); md.push(""); } } if (!hasFailures) { md.push("**No failures across all probed sources.** Every materialized record validates against EvidenceRecord v1."); md.push(""); } // Stale extraction probe: explicit pass/fail const distilledFacts = RESULTS.find(r => r.source_file.endsWith("distilled_facts.jsonl")); const distilledProc = RESULTS.find(r => r.source_file.endsWith("distilled_procedures.jsonl")); md.push("## Stale-extraction probe"); md.push(""); if (distilledFacts && distilledFacts.rows_present && distilledFacts.passed > 0) { md.push(`- **distilled_facts.jsonl:** ${distilledFacts.passed}/${distilledFacts.rows_attempted} materialize cleanly. Stream is alive at the schema level.`); } else if (distilledFacts && !distilledFacts.rows_present) { md.push(`- **distilled_facts.jsonl:** missing — stale or never produced. Phase 2 sources from live streams instead.`); } else { md.push(`- **distilled_facts.jsonl:** present but materialization failures; treat as suspect, prefer mode_experiments + scrum_reviews.`); } if (distilledProc && distilledProc.rows_present && distilledProc.passed > 0) { md.push(`- **distilled_procedures.jsonl:** ${distilledProc.passed}/${distilledProc.rows_attempted} materialize cleanly.`); } md.push(""); // Write the markdown to a stable path and stdout const out = md.join("\n"); Bun.write(`${ROOT}/data/_kb/realdata_validation_report.md`, out); console.log("\n" + out); });