// Phase 4 contamination-firewall tests. The SFT leak-prevention block // is the most important set: it MUST be impossible for rejected or // needs_human_review records to reach exports/sft/instruction_response.jsonl // regardless of how the input data is crafted. // // Strategy: synthesize evidence + scored-runs in a temp root, run each // exporter, assert outputs and quarantine. import { test, expect, beforeEach, afterEach } from "bun:test"; import { mkdirSync, writeFileSync, rmSync, existsSync, readFileSync } from "node:fs"; import { resolve } from "node:path"; import { exportRag } from "../../scripts/distillation/export_rag"; import { exportSft } from "../../scripts/distillation/export_sft"; import { exportPreference } from "../../scripts/distillation/export_preference"; import { validateRagSample } from "../../auditor/schemas/distillation/rag_sample"; import { validateSftSample } from "../../auditor/schemas/distillation/sft_sample"; import { validatePreferenceSample } from "../../auditor/schemas/distillation/preference_sample"; import { EVIDENCE_SCHEMA_VERSION, type EvidenceRecord, type ModelRole } from "../../auditor/schemas/distillation/evidence_record"; import { SCORED_RUN_SCHEMA_VERSION, type ScoredRun, type ScoreCategory } from "../../auditor/schemas/distillation/scored_run"; const TMP = "/tmp/distillation_test_phase4"; const NOW = "2026-04-26T22:30:00.000Z"; const SHA = "0".repeat(64); const PARTITION = "2026/04/27"; function setupRoot() { if (existsSync(TMP)) rmSync(TMP, { recursive: true, force: true }); mkdirSync(resolve(TMP, `data/evidence/${PARTITION}`), { recursive: true }); mkdirSync(resolve(TMP, `data/scored-runs/${PARTITION}`), { recursive: true }); } function writeEvidence(stem: string, evs: EvidenceRecord[]) { const path = resolve(TMP, `data/evidence/${PARTITION}/${stem}.jsonl`); writeFileSync(path, evs.map(e => JSON.stringify(e)).join("\n") + "\n"); } function writeScored(stem: string, scored: ScoredRun[]) { const path = resolve(TMP, `data/scored-runs/${PARTITION}/${stem}.jsonl`); writeFileSync(path, scored.map(s => JSON.stringify(s)).join("\n") + "\n"); } function makeEv(opts: { run_id: string; task_id: string; source_stem: string; text?: string; role?: ModelRole; source_files?: string[] }): EvidenceRecord { return { run_id: opts.run_id, task_id: opts.task_id, timestamp: NOW, schema_version: EVIDENCE_SCHEMA_VERSION, provenance: { source_file: `data/_kb/${opts.source_stem}.jsonl`, line_offset: 0, sig_hash: SHA, recorded_at: NOW, }, model_role: opts.role ?? "executor", text: opts.text ?? "default response text", source_files: opts.source_files, }; } function makeScored(opts: { run_id: string; task_id: string; category: ScoreCategory; reasons: string[]; out_relpath: string }): ScoredRun { return { schema_version: SCORED_RUN_SCHEMA_VERSION, evidence_run_id: opts.run_id, evidence_task_id: opts.task_id, category: opts.category, reasons: opts.reasons, scored_at: NOW, scorer_version: "v1.0.0", sub_scores: {}, provenance: { source_file: opts.out_relpath, line_offset: 0, sig_hash: SHA, recorded_at: NOW, }, }; } beforeEach(setupRoot); afterEach(() => { if (existsSync(TMP)) rmSync(TMP, { recursive: true, force: true }); }); // ─── RAG export ───────────────────────────────────────────────────── test("RAG: accepted + partial flow through; rejected quarantined", () => { writeEvidence("scrum_reviews", [ makeEv({ run_id: "ra1", task_id: "t1", source_stem: "scrum_reviews", text: "good review" }), makeEv({ run_id: "ra2", task_id: "t2", source_stem: "scrum_reviews", text: "ok review" }), makeEv({ run_id: "ra3", task_id: "t3", source_stem: "scrum_reviews", text: "bad review" }), ]); writeScored("scrum_reviews", [ makeScored({ run_id: "ra1", task_id: "t1", category: "accepted", reasons: ["x"], out_relpath: `data/scored-runs/${PARTITION}/scrum_reviews.jsonl` }), makeScored({ run_id: "ra2", task_id: "t2", category: "partially_accepted", reasons: ["x"], out_relpath: `data/scored-runs/${PARTITION}/scrum_reviews.jsonl` }), makeScored({ run_id: "ra3", task_id: "t3", category: "rejected", reasons: ["x"], out_relpath: `data/scored-runs/${PARTITION}/scrum_reviews.jsonl` }), ]); }); test("RAG: needs_human_review excluded by default, included with flag", async () => { writeEvidence("scrum_reviews", [ makeEv({ run_id: "rh1", task_id: "t1", source_stem: "scrum_reviews", text: "default skip" }), ]); writeScored("scrum_reviews", [ makeScored({ run_id: "rh1", task_id: "t1", category: "needs_human_review", reasons: ["x"], out_relpath: `data/scored-runs/${PARTITION}/scrum_reviews.jsonl` }), ]); const r1 = await exportRag({ root: TMP, recorded_at: NOW, include_review: false }); expect(r1.records_exported).toBe(0); expect(r1.records_quarantined).toBe(1); // Reset for include_review run. if (existsSync(resolve(TMP, "exports"))) rmSync(resolve(TMP, "exports"), { recursive: true }); const r2 = await exportRag({ root: TMP, recorded_at: NOW, include_review: true }); expect(r2.records_exported).toBe(1); expect(r2.records_quarantined).toBe(0); }); test("RAG: every output row validates against RagSample schema", async () => { writeEvidence("scrum_reviews", [ makeEv({ run_id: "rv1", task_id: "t1", source_stem: "scrum_reviews", text: "review content" }), ]); writeScored("scrum_reviews", [ makeScored({ run_id: "rv1", task_id: "t1", category: "accepted", reasons: ["x"], out_relpath: `data/scored-runs/${PARTITION}/scrum_reviews.jsonl` }), ]); await exportRag({ root: TMP, recorded_at: NOW }); const path = resolve(TMP, "exports/rag/playbooks.jsonl"); expect(existsSync(path)).toBe(true); const rows = readFileSync(path, "utf8").trim().split("\n").map(l => JSON.parse(l)); for (const row of rows) { const v = validateRagSample(row); expect(v.valid).toBe(true); } }); test("RAG: empty content quarantined", async () => { writeEvidence("scrum_reviews", [makeEv({ run_id: "re1", task_id: "t1", source_stem: "scrum_reviews", text: "" })]); writeScored("scrum_reviews", [makeScored({ run_id: "re1", task_id: "t1", category: "accepted", reasons: ["x"], out_relpath: `data/scored-runs/${PARTITION}/scrum_reviews.jsonl` })]); const r = await exportRag({ root: TMP, recorded_at: NOW }); expect(r.records_exported).toBe(0); expect(r.records_quarantined).toBe(1); const qPath = resolve(TMP, "exports/quarantine/rag.jsonl"); expect(existsSync(qPath)).toBe(true); expect(readFileSync(qPath, "utf8")).toContain("empty_content"); }); // ─── SFT export — THE CONTAMINATION FIREWALL ──────────────────────── test("SFT: rejected NEVER ships (spec non-negotiable)", async () => { writeEvidence("scrum_reviews", [ makeEv({ run_id: "sf1", task_id: "t1", source_stem: "scrum_reviews", text: "rejected output that should NOT train" }), ]); writeScored("scrum_reviews", [ makeScored({ run_id: "sf1", task_id: "t1", category: "rejected", reasons: ["bad"], out_relpath: `data/scored-runs/${PARTITION}/scrum_reviews.jsonl` }), ]); const r = await exportSft({ root: TMP, recorded_at: NOW }); expect(r.records_exported).toBe(0); expect(r.records_quarantined).toBe(1); const qPath = resolve(TMP, "exports/quarantine/sft.jsonl"); expect(readFileSync(qPath, "utf8")).toContain("unsafe_sft_category"); }); test("SFT: needs_human_review NEVER ships (spec non-negotiable)", async () => { writeEvidence("scrum_reviews", [ makeEv({ run_id: "sh1", task_id: "t1", source_stem: "scrum_reviews", text: "hum text" }), ]); writeScored("scrum_reviews", [ makeScored({ run_id: "sh1", task_id: "t1", category: "needs_human_review", reasons: ["x"], out_relpath: `data/scored-runs/${PARTITION}/scrum_reviews.jsonl` }), ]); const r = await exportSft({ root: TMP, recorded_at: NOW }); expect(r.records_exported).toBe(0); expect(r.records_quarantined).toBe(1); }); test("SFT: partially_accepted excluded by default; included with --include-partial", async () => { writeEvidence("scrum_reviews", [makeEv({ run_id: "sp1", task_id: "t1", source_stem: "scrum_reviews", text: "partial output" })]); writeScored("scrum_reviews", [makeScored({ run_id: "sp1", task_id: "t1", category: "partially_accepted", reasons: ["x"], out_relpath: `data/scored-runs/${PARTITION}/scrum_reviews.jsonl` })]); const r1 = await exportSft({ root: TMP, recorded_at: NOW }); expect(r1.records_exported).toBe(0); expect(r1.records_quarantined).toBe(1); // Reset for include_partial. if (existsSync(resolve(TMP, "exports"))) rmSync(resolve(TMP, "exports"), { recursive: true }); const r2 = await exportSft({ root: TMP, recorded_at: NOW, include_partial: true }); expect(r2.records_exported).toBe(1); }); test("SFT: extraction-class records (no instruction→response shape) quarantined", async () => { writeEvidence("distilled_facts", [ makeEv({ run_id: "sx1", task_id: "t1", source_stem: "distilled_facts", text: "extracted fact", role: "extractor" }), ]); writeScored("distilled_facts", [ // Force category=accepted to prove it's the role-shape gate that catches it, not the category gate. makeScored({ run_id: "sx1", task_id: "t1", category: "accepted", reasons: ["x"], out_relpath: `data/scored-runs/${PARTITION}/distilled_facts.jsonl` }), ]); const r = await exportSft({ root: TMP, recorded_at: NOW }); expect(r.records_exported).toBe(0); expect(r.records_quarantined).toBe(1); }); test("SFT: every output row validates against SftSample (provenance + non-empty + quality_score)", async () => { writeEvidence("scrum_reviews", [ makeEv({ run_id: "sv1", task_id: "t1", source_stem: "scrum_reviews", text: "real instruction response text" }), ]); writeScored("scrum_reviews", [ makeScored({ run_id: "sv1", task_id: "t1", category: "accepted", reasons: ["x"], out_relpath: `data/scored-runs/${PARTITION}/scrum_reviews.jsonl` }), ]); await exportSft({ root: TMP, recorded_at: NOW }); const rows = readFileSync(resolve(TMP, "exports/sft/instruction_response.jsonl"), "utf8").trim().split("\n").map(l => JSON.parse(l)); expect(rows.length).toBe(1); for (const row of rows) { const v = validateSftSample(row); expect(v.valid).toBe(true); expect(row.quality_score).toBe("accepted"); // never partial here expect(row.provenance.sig_hash).toMatch(/^[0-9a-f]{64}$/); } }); test("SFT: idempotent — second run produces 0 new exports", async () => { writeEvidence("scrum_reviews", [makeEv({ run_id: "si1", task_id: "t1", source_stem: "scrum_reviews", text: "idem" })]); writeScored("scrum_reviews", [makeScored({ run_id: "si1", task_id: "t1", category: "accepted", reasons: ["x"], out_relpath: `data/scored-runs/${PARTITION}/scrum_reviews.jsonl` })]); await exportSft({ root: TMP, recorded_at: NOW }); const r2 = await exportSft({ root: TMP, recorded_at: NOW }); expect(r2.records_exported).toBe(0); }); // ─── Preference export — pairing logic ────────────────────────────── test("Preference: same task_id, accepted vs rejected → exports a pair", async () => { writeEvidence("scrum_reviews", [ makeEv({ run_id: "pa1", task_id: "task-X", source_stem: "scrum_reviews", text: "good chosen output" }), makeEv({ run_id: "pr1", task_id: "task-X", source_stem: "scrum_reviews", text: "bad rejected output" }), ]); writeScored("scrum_reviews", [ makeScored({ run_id: "pa1", task_id: "task-X", category: "accepted", reasons: ["good"], out_relpath: `data/scored-runs/${PARTITION}/scrum_reviews.jsonl` }), makeScored({ run_id: "pr1", task_id: "task-X", category: "rejected", reasons: ["bad"], out_relpath: `data/scored-runs/${PARTITION}/scrum_reviews.jsonl` }), ]); const r = await exportPreference({ root: TMP, recorded_at: NOW }); expect(r.pairs_exported).toBe(1); const path = resolve(TMP, "exports/preference/chosen_rejected.jsonl"); const rows = readFileSync(path, "utf8").trim().split("\n").map(l => JSON.parse(l)); expect(rows.length).toBe(1); expect(rows[0].chosen).toContain("good"); expect(rows[0].rejected).toContain("bad"); expect(rows[0].chosen_run_id).not.toBe(rows[0].rejected_run_id); const v = validatePreferenceSample(rows[0]); expect(v.valid).toBe(true); }); test("Preference: different task_ids never pair (no fabrication)", async () => { writeEvidence("scrum_reviews", [ makeEv({ run_id: "pd1", task_id: "task-A", source_stem: "scrum_reviews", text: "A good" }), makeEv({ run_id: "pd2", task_id: "task-B", source_stem: "scrum_reviews", text: "B bad" }), ]); writeScored("scrum_reviews", [ makeScored({ run_id: "pd1", task_id: "task-A", category: "accepted", reasons: ["x"], out_relpath: `data/scored-runs/${PARTITION}/scrum_reviews.jsonl` }), makeScored({ run_id: "pd2", task_id: "task-B", category: "rejected", reasons: ["x"], out_relpath: `data/scored-runs/${PARTITION}/scrum_reviews.jsonl` }), ]); const r = await exportPreference({ root: TMP, recorded_at: NOW }); expect(r.pairs_exported).toBe(0); // no shared task_id expect(r.insufficient_pair_task_ids).toBe(2); }); test("Preference: identical text in chosen and rejected quarantined", async () => { writeEvidence("scrum_reviews", [ makeEv({ run_id: "pi1", task_id: "task-X", source_stem: "scrum_reviews", text: "identical text" }), makeEv({ run_id: "pi2", task_id: "task-X", source_stem: "scrum_reviews", text: "identical text" }), ]); writeScored("scrum_reviews", [ makeScored({ run_id: "pi1", task_id: "task-X", category: "accepted", reasons: ["x"], out_relpath: `data/scored-runs/${PARTITION}/scrum_reviews.jsonl` }), makeScored({ run_id: "pi2", task_id: "task-X", category: "rejected", reasons: ["x"], out_relpath: `data/scored-runs/${PARTITION}/scrum_reviews.jsonl` }), ]); const r = await exportPreference({ root: TMP, recorded_at: NOW }); expect(r.pairs_exported).toBe(0); expect(r.records_quarantined).toBeGreaterThan(0); const qPath = resolve(TMP, "exports/quarantine/preference.jsonl"); expect(readFileSync(qPath, "utf8")).toContain("identical"); }); test("Preference: accepted vs partially_accepted is a softer fallback pair", async () => { writeEvidence("scrum_reviews", [ makeEv({ run_id: "ps1", task_id: "task-X", source_stem: "scrum_reviews", text: "best output" }), makeEv({ run_id: "ps2", task_id: "task-X", source_stem: "scrum_reviews", text: "ok output" }), ]); writeScored("scrum_reviews", [ makeScored({ run_id: "ps1", task_id: "task-X", category: "accepted", reasons: ["x"], out_relpath: `data/scored-runs/${PARTITION}/scrum_reviews.jsonl` }), makeScored({ run_id: "ps2", task_id: "task-X", category: "partially_accepted", reasons: ["x"], out_relpath: `data/scored-runs/${PARTITION}/scrum_reviews.jsonl` }), ]); const r = await exportPreference({ root: TMP, recorded_at: NOW }); expect(r.pairs_exported).toBe(1); }); // ─── Quarantine populated when expected ───────────────────────────── test("Quarantine: every export creates exports/quarantine/.jsonl when needed", async () => { // SFT with a forbidden category should populate quarantine writeEvidence("scrum_reviews", [makeEv({ run_id: "q1", task_id: "t1", source_stem: "scrum_reviews", text: "x" })]); writeScored("scrum_reviews", [makeScored({ run_id: "q1", task_id: "t1", category: "rejected", reasons: ["x"], out_relpath: `data/scored-runs/${PARTITION}/scrum_reviews.jsonl` })]); await exportSft({ root: TMP, recorded_at: NOW }); expect(existsSync(resolve(TMP, "exports/quarantine/sft.jsonl"))).toBe(true); });