// Phase 2 acceptance tests — pin the materializer's invariants: // 1. Valid rows materialize; invalid rows go to skips with errors // 2. Idempotency: re-running on same source yields zero new writes // 3. Stability: identical input → byte-identical output (canonical hash) // 4. Schema gating: rows that fail validateEvidenceRecord NEVER reach // data/evidence/*.jsonl, only skips // 5. Receipt: substantive (git_sha + sha256 + record_counts + // validation_pass), conforms to Receipt schema // 6. JSON-parse failures handled gracefully // // All tests run against a temp repo root with synthetic source jsonls // and a custom TRANSFORMS list pointing at them. No live JSONLs touched. // // Run: bun test tests/distillation/build_evidence_index.test.ts import { test, expect, beforeEach, afterEach } from "bun:test"; import { mkdirSync, writeFileSync, rmSync, existsSync, readFileSync, readdirSync, statSync } from "node:fs"; import { resolve } from "node:path"; import { materializeAll, type MaterializeOptions } from "../../scripts/distillation/build_evidence_index"; import type { TransformDef } from "../../scripts/distillation/transforms"; import { EVIDENCE_SCHEMA_VERSION, type ModelRole } from "../../auditor/schemas/distillation/evidence_record"; import { validateReceipt } from "../../auditor/schemas/distillation/receipt"; const TMP_ROOT = "/tmp/distillation_test_phase2"; const RECORDED = "2026-04-26T22:30:00.000Z"; // Minimal transform — produces a valid EvidenceRecord from the // synthetic source rows below. const TEST_TRANSFORMS: TransformDef[] = [ { source_file_relpath: "data/_kb/synthetic_a.jsonl", transform: ({ row, line_offset, source_file_relpath, recorded_at, sig_hash }) => { // Test rows that intentionally fail validation set bad: true. // Transform still returns a Partial — validator catches it. if (row.bad) { return { // missing run_id (required) → forces validateEvidenceRecord to reject task_id: row.task_id, timestamp: row.ts, schema_version: EVIDENCE_SCHEMA_VERSION, provenance: { source_file: source_file_relpath, line_offset, sig_hash, recorded_at }, } as any; } return { run_id: row.run_id, task_id: row.task_id, timestamp: row.ts, schema_version: EVIDENCE_SCHEMA_VERSION, provenance: { source_file: source_file_relpath, line_offset, sig_hash, recorded_at }, text: row.text, model_role: "executor" as ModelRole, }; }, }, { source_file_relpath: "data/_kb/synthetic_b.jsonl", transform: ({ row, line_offset, source_file_relpath, recorded_at, sig_hash }) => ({ run_id: row.run_id, task_id: row.task_id, timestamp: row.ts, schema_version: EVIDENCE_SCHEMA_VERSION, provenance: { source_file: source_file_relpath, line_offset, sig_hash, recorded_at }, text: row.text, model_role: "extractor" as ModelRole, }), }, ]; function setupRoot() { if (existsSync(TMP_ROOT)) rmSync(TMP_ROOT, { recursive: true, force: true }); mkdirSync(resolve(TMP_ROOT, "data/_kb"), { recursive: true }); // Source A: 3 valid + 1 invalid + 1 malformed JSON const aRows = [ { run_id: "a1", task_id: "task1", ts: "2026-04-26T20:00:00.000Z", text: "first" }, { run_id: "a2", task_id: "task2", ts: "2026-04-26T20:01:00.000Z", text: "second" }, { run_id: "a3", task_id: "task3", ts: "2026-04-26T20:02:00.000Z", text: "third" }, { bad: true, task_id: "fail-row", ts: "2026-04-26T20:03:00.000Z" }, ]; const aLines = aRows.map(r => JSON.stringify(r)).join("\n") + "\n{not valid json\n"; writeFileSync(resolve(TMP_ROOT, "data/_kb/synthetic_a.jsonl"), aLines); // Source B: 2 valid rows const bRows = [ { run_id: "b1", task_id: "btask1", ts: "2026-04-26T20:10:00.000Z", text: "alpha" }, { run_id: "b2", task_id: "btask2", ts: "2026-04-26T20:11:00.000Z", text: "beta" }, ]; writeFileSync(resolve(TMP_ROOT, "data/_kb/synthetic_b.jsonl"), bRows.map(r => JSON.stringify(r)).join("\n") + "\n"); } beforeEach(setupRoot); afterEach(() => { if (existsSync(TMP_ROOT)) rmSync(TMP_ROOT, { recursive: true, force: true }); }); // ─── Acceptance Test 1: valid rows materialize, invalid go to skips ── test("materializer: 3 valid rows from source A reach evidence/, 1 invalid + 1 malformed go to skips", async () => { const r = await materializeAll({ root: TMP_ROOT, transforms: TEST_TRANSFORMS, recorded_at: RECORDED, }); // Source A: 5 read, 3 written, 2 skipped (1 missing run_id, 1 malformed JSON) const a = r.sources.find(s => s.source_file_relpath.endsWith("synthetic_a.jsonl"))!; expect(a.rows_read).toBe(5); expect(a.rows_written).toBe(3); expect(a.rows_skipped).toBe(2); // Source B: 2 read, 2 written const b = r.sources.find(s => s.source_file_relpath.endsWith("synthetic_b.jsonl"))!; expect(b.rows_read).toBe(2); expect(b.rows_written).toBe(2); // Skips file exists and contains both rejection reasons const skipsContent = readFileSync(r.skips_path, "utf8"); expect(skipsContent).toContain("run_id"); // missing required field expect(skipsContent).toContain("JSON.parse"); // malformed JSON // Evidence files exist at the expected day partition const partition = "2026/04/26"; const aOut = resolve(TMP_ROOT, "data/evidence", partition, "synthetic_a.jsonl"); const bOut = resolve(TMP_ROOT, "data/evidence", partition, "synthetic_b.jsonl"); expect(existsSync(aOut)).toBe(true); expect(existsSync(bOut)).toBe(true); // Output rows count matches written const aLines = readFileSync(aOut, "utf8").trim().split("\n"); expect(aLines.length).toBe(3); for (const line of aLines) { const row = JSON.parse(line); expect(row.schema_version).toBe(EVIDENCE_SCHEMA_VERSION); expect(row.provenance.source_file).toBe("data/_kb/synthetic_a.jsonl"); expect(typeof row.provenance.sig_hash).toBe("string"); expect(row.provenance.sig_hash.length).toBe(64); } }); // ─── Acceptance Test 2: idempotency ────────────────────────────────── test("materializer: re-running on same source produces 0 new writes (idempotent)", async () => { await materializeAll({ root: TMP_ROOT, transforms: TEST_TRANSFORMS, recorded_at: RECORDED }); const r2 = await materializeAll({ root: TMP_ROOT, transforms: TEST_TRANSFORMS, recorded_at: RECORDED }); // Second run reads the same rows but dedups all of them — zero new writes const a2 = r2.sources.find(s => s.source_file_relpath.endsWith("synthetic_a.jsonl"))!; expect(a2.rows_written).toBe(0); expect(a2.rows_deduped).toBe(3); }); // ─── Acceptance Test 3: stable sig_hash → byte-identical output ────── test("materializer: identical input produces byte-identical output across runs", async () => { const r1 = await materializeAll({ root: TMP_ROOT, transforms: TEST_TRANSFORMS, recorded_at: RECORDED }); const aPath = resolve(TMP_ROOT, "data/evidence/2026/04/26/synthetic_a.jsonl"); const aBeforeBytes = readFileSync(aPath); // Wipe the output file and re-run with the same inputs rmSync(aPath); await materializeAll({ root: TMP_ROOT, transforms: TEST_TRANSFORMS, recorded_at: RECORDED }); const aAfterBytes = readFileSync(aPath); expect(aBeforeBytes.equals(aAfterBytes)).toBe(true); }); // ─── Acceptance Test 4: schema gating ──────────────────────────────── test("materializer: rows failing validateEvidenceRecord NEVER reach evidence/, only skips", async () => { const r = await materializeAll({ root: TMP_ROOT, transforms: TEST_TRANSFORMS, recorded_at: RECORDED }); const aOut = resolve(TMP_ROOT, "data/evidence/2026/04/26/synthetic_a.jsonl"); const aRows = readFileSync(aOut, "utf8").trim().split("\n").filter(Boolean).map(l => JSON.parse(l)); // Every output row has a non-empty run_id (the invalid row had no // run_id, so it MUST be absent from output). for (const row of aRows) { expect(typeof row.run_id).toBe("string"); expect(row.run_id.length).toBeGreaterThan(0); } // Specifically: no row carries the failing fixture's task_id "fail-row" expect(aRows.find((r: any) => r.task_id === "fail-row")).toBeUndefined(); }); // ─── Acceptance Test 5: receipt is substantive + schema-conforming ─── test("materializer: receipt has git_sha + sha256(input) + sha256(output) + record_counts and validates", async () => { const r = await materializeAll({ root: TMP_ROOT, transforms: TEST_TRANSFORMS, recorded_at: RECORDED }); // Self-validation against the Receipt schema const v = validateReceipt(r.receipt); expect(v.valid).toBe(true); // git_sha is 40 hex chars (real or 0...0 fallback) expect(r.receipt.git_sha).toMatch(/^[0-9a-f]{40}$/); // Each input file has a real sha256 + bytes expect(r.receipt.input_files.length).toBe(2); for (const f of r.receipt.input_files) { expect(f.sha256).toMatch(/^[0-9a-f]{64}$/); expect(typeof f.bytes).toBe("number"); expect(f.bytes).toBeGreaterThan(0); } // Each output file too expect(r.receipt.output_files.length).toBe(2); for (const f of r.receipt.output_files) { expect(f.sha256).toMatch(/^[0-9a-f]{64}$/); } // Counts add up expect(r.receipt.record_counts.in).toBe(7); // 5 from A + 2 from B expect(r.receipt.record_counts.out).toBe(5); // 3 + 2 expect(r.receipt.record_counts.skipped).toBe(2); // both from A // validation_pass MUST be a boolean — never inferred expect(typeof r.receipt.validation_pass).toBe("boolean"); // With skips > 0, validation_pass should be false expect(r.receipt.validation_pass).toBe(false); // Receipt persisted expect(existsSync(r.receipt_path)).toBe(true); }); // ─── Acceptance Test 6: clean run sets validation_pass=true ────────── test("materializer: with all-valid sources, validation_pass=true and skips=0", async () => { // Strip the bad row + malformed JSON from source A const cleanRows = [ { run_id: "c1", task_id: "ct1", ts: "2026-04-26T22:00:00.000Z", text: "clean" }, { run_id: "c2", task_id: "ct2", ts: "2026-04-26T22:01:00.000Z", text: "clean2" }, ]; writeFileSync(resolve(TMP_ROOT, "data/_kb/synthetic_a.jsonl"), cleanRows.map(r => JSON.stringify(r)).join("\n") + "\n"); const r = await materializeAll({ root: TMP_ROOT, transforms: TEST_TRANSFORMS, recorded_at: RECORDED }); expect(r.receipt.record_counts.skipped).toBe(0); expect(r.receipt.validation_pass).toBe(true); }); // ─── Acceptance Test 7: dry-run does not write ─────────────────────── test("materializer: --dry-run reports counts but writes no evidence files", async () => { const r = await materializeAll({ root: TMP_ROOT, transforms: TEST_TRANSFORMS, recorded_at: RECORDED, dry_run: true }); // Counts populated expect(r.totals.rows_read).toBe(7); expect(r.totals.rows_written).toBe(5); // No evidence files written const evidenceDir = resolve(TMP_ROOT, "data/evidence"); expect(existsSync(evidenceDir)).toBe(false); // No skips file written const skipsPath = resolve(TMP_ROOT, "data/_kb/distillation_skips.jsonl"); expect(existsSync(skipsPath)).toBe(false); }); // ─── Acceptance Test 8: missing source file does not crash ─────────── test("materializer: missing source file is tallied as rows_present=false, no error", async () => { rmSync(resolve(TMP_ROOT, "data/_kb/synthetic_b.jsonl")); const r = await materializeAll({ root: TMP_ROOT, transforms: TEST_TRANSFORMS, recorded_at: RECORDED }); const b = r.sources.find(s => s.source_file_relpath.endsWith("synthetic_b.jsonl"))!; expect(b.rows_present).toBe(false); expect(b.rows_read).toBe(0); // Source A still processes normally const a = r.sources.find(s => s.source_file_relpath.endsWith("synthetic_a.jsonl"))!; expect(a.rows_present).toBe(true); expect(a.rows_written).toBe(3); }); // ─── Acceptance Test 9: provenance preserved on every row ──────────── test("materializer: every output row has provenance traceable to a source row", async () => { const r = await materializeAll({ root: TMP_ROOT, transforms: TEST_TRANSFORMS, recorded_at: RECORDED }); for (const s of r.sources) { for (const out_path of s.output_files) { const lines = readFileSync(out_path, "utf8").trim().split("\n").filter(Boolean); for (const line of lines) { const row = JSON.parse(line); expect(row.provenance).toBeTruthy(); expect(row.provenance.source_file).toBe(s.source_file_relpath); expect(typeof row.provenance.line_offset).toBe("number"); expect(row.provenance.sig_hash).toMatch(/^[0-9a-f]{64}$/); expect(row.provenance.recorded_at).toBe(RECORDED); } } } });