Some checks failed
lakehouse/auditor 1 blocking issue: todo!() macro call in tests/real-world/scrum_master_pipeline.ts
Build the contamination firewall: RAG, SFT, and Preference exporters
that turn scored evidence into clean training datasets without
leaking rejected, unvalidated, hallucinated, or provenance-free
records.
Files (8 new + 4 schema updates):
scripts/distillation/quarantine.ts shared QuarantineWriter, 11-reason taxonomy
scripts/distillation/export_rag.ts RAG exporter (--include-review opt-in)
scripts/distillation/export_sft.ts SFT exporter (--include-partial opt-in, SFT_NEVER constant)
scripts/distillation/export_preference.ts preference exporter, same task_id pairing
scripts/distillation/distill.ts CLI dispatcher (build-evidence/score/export-*)
tests/distillation/exports.test.ts 15 contamination-firewall tests
reports/distillation/phase4-export-report.md acceptance report
Schema field-name alignment with now.md:
rag_sample.ts +source_category, exported_at→created_at
sft_sample.ts +id, exported_at→created_at, partially_accepted at schema (CLI gates)
preference_sample.ts +id, source_run_ids→chosen_run_id+rejected_run_id, +created_at
Test metrics: 117 distillation tests pass · 0 fail · 315 expects · 327ms
Real-data export run (1052 scored input rows):
RAG: 446 exported (351 acc + 95 partial), 606 quarantined
SFT: 351 exported (all 'accepted'), 701 quarantined
Preference: 83 pairs exported, 16 quarantined
CONTAMINATION FIREWALL — verified held on real data:
- SFT output: 351/351 quality_score='accepted' (ZERO leaked)
- RAG output: 351 acc + 95 partial (ZERO rejected leaked)
- Preference: 0 self-pairs (chosen_run_id != rejected_run_id)
- 536 rejected+needs_human_review records caught at unsafe_sft_category
gate, exact match to scored-runs forbidden-category total
Defense in depth (the firewall is two layers, not one):
1. Schema layer (Phase 1): SftSample.quality_score enum forbids
rejected/needs_human at write time
2. Exporter layer: SFT_NEVER constant in export_sft.ts checks
category before synthesis. Even if synthesis produced a row
with quality_score=rejected, validateSftSample would reject it.
Quarantine reasons (11): missing_provenance, missing_source_run_id,
empty_content, schema_violation, unsafe_sft_category,
unsafe_rag_category, invalid_preference_pairing,
hallucinated_file_path, duplicate_id, self_pairing,
category_disallowed.
Bug surfaced + fixed during testing: module-level evidenceCache
shared state across test runs (tests wipe TMP, cache holds stale
empty Map). Moved cache to per-call scope. Same pattern bit Phase 2
materializer would have hit if its tests had multiple runs sharing
state — preventive fix.
Pairing logic v1: same task_id with category gap. accepted×rejected
preferred, accepted×partially_accepted as fallback. MAX_PAIRS_PER_TASK=5
cap prevents one hot task from dominating. Future: cross-source
pairing (scrum_reviews chosen vs observer_reviews rejected on same
file) to grow dataset beyond 83.
CLI: ./scripts/distill.ts {build-evidence|score|export-rag|export-sft|export-preference|export-all|health}
Flags: --dry-run, --include-partial (SFT only), --include-review (RAG only)
Carry-overs to Phase 5 (Receipts Harness):
- Each exporter currently writes results but no per-stage receipt.json.
Phase 5 wraps build_evidence_index + score_runs + export_* in a
withReceipt() helper that captures git_sha + sha256 of inputs/outputs
+ record_counts + validation_pass.
- reports/distillation/latest.md aggregating most-recent run of each stage.
Carry-overs to Phase 3 v2:
- mode_experiments scoring (168 needs_human_review): derive markers from
validation_results.grounded_fraction
- extraction-class JOIN: distilled_*/audit_facts/observer_escalations
→ JOIN to verdict-bearing parent by task_id
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
301 lines
16 KiB
TypeScript
301 lines
16 KiB
TypeScript
// Phase 4 contamination-firewall tests. The SFT leak-prevention block
|
|
// is the most important set: it MUST be impossible for rejected or
|
|
// needs_human_review records to reach exports/sft/instruction_response.jsonl
|
|
// regardless of how the input data is crafted.
|
|
//
|
|
// Strategy: synthesize evidence + scored-runs in a temp root, run each
|
|
// exporter, assert outputs and quarantine.
|
|
|
|
import { test, expect, beforeEach, afterEach } from "bun:test";
|
|
import { mkdirSync, writeFileSync, rmSync, existsSync, readFileSync } from "node:fs";
|
|
import { resolve } from "node:path";
|
|
|
|
import { exportRag } from "../../scripts/distillation/export_rag";
|
|
import { exportSft } from "../../scripts/distillation/export_sft";
|
|
import { exportPreference } from "../../scripts/distillation/export_preference";
|
|
import { validateRagSample } from "../../auditor/schemas/distillation/rag_sample";
|
|
import { validateSftSample } from "../../auditor/schemas/distillation/sft_sample";
|
|
import { validatePreferenceSample } from "../../auditor/schemas/distillation/preference_sample";
|
|
import { EVIDENCE_SCHEMA_VERSION, type EvidenceRecord, type ModelRole } from "../../auditor/schemas/distillation/evidence_record";
|
|
import { SCORED_RUN_SCHEMA_VERSION, type ScoredRun, type ScoreCategory } from "../../auditor/schemas/distillation/scored_run";
|
|
|
|
const TMP = "/tmp/distillation_test_phase4";
|
|
const NOW = "2026-04-26T22:30:00.000Z";
|
|
const SHA = "0".repeat(64);
|
|
const PARTITION = "2026/04/27";
|
|
|
|
function setupRoot() {
|
|
if (existsSync(TMP)) rmSync(TMP, { recursive: true, force: true });
|
|
mkdirSync(resolve(TMP, `data/evidence/${PARTITION}`), { recursive: true });
|
|
mkdirSync(resolve(TMP, `data/scored-runs/${PARTITION}`), { recursive: true });
|
|
}
|
|
|
|
function writeEvidence(stem: string, evs: EvidenceRecord[]) {
|
|
const path = resolve(TMP, `data/evidence/${PARTITION}/${stem}.jsonl`);
|
|
writeFileSync(path, evs.map(e => JSON.stringify(e)).join("\n") + "\n");
|
|
}
|
|
|
|
function writeScored(stem: string, scored: ScoredRun[]) {
|
|
const path = resolve(TMP, `data/scored-runs/${PARTITION}/${stem}.jsonl`);
|
|
writeFileSync(path, scored.map(s => JSON.stringify(s)).join("\n") + "\n");
|
|
}
|
|
|
|
function makeEv(opts: { run_id: string; task_id: string; source_stem: string; text?: string; role?: ModelRole; source_files?: string[] }): EvidenceRecord {
|
|
return {
|
|
run_id: opts.run_id,
|
|
task_id: opts.task_id,
|
|
timestamp: NOW,
|
|
schema_version: EVIDENCE_SCHEMA_VERSION,
|
|
provenance: {
|
|
source_file: `data/_kb/${opts.source_stem}.jsonl`,
|
|
line_offset: 0,
|
|
sig_hash: SHA,
|
|
recorded_at: NOW,
|
|
},
|
|
model_role: opts.role ?? "executor",
|
|
text: opts.text ?? "default response text",
|
|
source_files: opts.source_files,
|
|
};
|
|
}
|
|
|
|
function makeScored(opts: { run_id: string; task_id: string; category: ScoreCategory; reasons: string[]; out_relpath: string }): ScoredRun {
|
|
return {
|
|
schema_version: SCORED_RUN_SCHEMA_VERSION,
|
|
evidence_run_id: opts.run_id,
|
|
evidence_task_id: opts.task_id,
|
|
category: opts.category,
|
|
reasons: opts.reasons,
|
|
scored_at: NOW,
|
|
scorer_version: "v1.0.0",
|
|
sub_scores: {},
|
|
provenance: {
|
|
source_file: opts.out_relpath,
|
|
line_offset: 0,
|
|
sig_hash: SHA,
|
|
recorded_at: NOW,
|
|
},
|
|
};
|
|
}
|
|
|
|
beforeEach(setupRoot);
|
|
afterEach(() => { if (existsSync(TMP)) rmSync(TMP, { recursive: true, force: true }); });
|
|
|
|
// ─── RAG export ─────────────────────────────────────────────────────
|
|
|
|
test("RAG: accepted + partial flow through; rejected quarantined", () => {
|
|
writeEvidence("scrum_reviews", [
|
|
makeEv({ run_id: "ra1", task_id: "t1", source_stem: "scrum_reviews", text: "good review" }),
|
|
makeEv({ run_id: "ra2", task_id: "t2", source_stem: "scrum_reviews", text: "ok review" }),
|
|
makeEv({ run_id: "ra3", task_id: "t3", source_stem: "scrum_reviews", text: "bad review" }),
|
|
]);
|
|
writeScored("scrum_reviews", [
|
|
makeScored({ run_id: "ra1", task_id: "t1", category: "accepted", reasons: ["x"], out_relpath: `data/scored-runs/${PARTITION}/scrum_reviews.jsonl` }),
|
|
makeScored({ run_id: "ra2", task_id: "t2", category: "partially_accepted", reasons: ["x"], out_relpath: `data/scored-runs/${PARTITION}/scrum_reviews.jsonl` }),
|
|
makeScored({ run_id: "ra3", task_id: "t3", category: "rejected", reasons: ["x"], out_relpath: `data/scored-runs/${PARTITION}/scrum_reviews.jsonl` }),
|
|
]);
|
|
});
|
|
|
|
test("RAG: needs_human_review excluded by default, included with flag", async () => {
|
|
writeEvidence("scrum_reviews", [
|
|
makeEv({ run_id: "rh1", task_id: "t1", source_stem: "scrum_reviews", text: "default skip" }),
|
|
]);
|
|
writeScored("scrum_reviews", [
|
|
makeScored({ run_id: "rh1", task_id: "t1", category: "needs_human_review", reasons: ["x"], out_relpath: `data/scored-runs/${PARTITION}/scrum_reviews.jsonl` }),
|
|
]);
|
|
|
|
const r1 = await exportRag({ root: TMP, recorded_at: NOW, include_review: false });
|
|
expect(r1.records_exported).toBe(0);
|
|
expect(r1.records_quarantined).toBe(1);
|
|
|
|
// Reset for include_review run.
|
|
if (existsSync(resolve(TMP, "exports"))) rmSync(resolve(TMP, "exports"), { recursive: true });
|
|
const r2 = await exportRag({ root: TMP, recorded_at: NOW, include_review: true });
|
|
expect(r2.records_exported).toBe(1);
|
|
expect(r2.records_quarantined).toBe(0);
|
|
});
|
|
|
|
test("RAG: every output row validates against RagSample schema", async () => {
|
|
writeEvidence("scrum_reviews", [
|
|
makeEv({ run_id: "rv1", task_id: "t1", source_stem: "scrum_reviews", text: "review content" }),
|
|
]);
|
|
writeScored("scrum_reviews", [
|
|
makeScored({ run_id: "rv1", task_id: "t1", category: "accepted", reasons: ["x"], out_relpath: `data/scored-runs/${PARTITION}/scrum_reviews.jsonl` }),
|
|
]);
|
|
await exportRag({ root: TMP, recorded_at: NOW });
|
|
const path = resolve(TMP, "exports/rag/playbooks.jsonl");
|
|
expect(existsSync(path)).toBe(true);
|
|
const rows = readFileSync(path, "utf8").trim().split("\n").map(l => JSON.parse(l));
|
|
for (const row of rows) {
|
|
const v = validateRagSample(row);
|
|
expect(v.valid).toBe(true);
|
|
}
|
|
});
|
|
|
|
test("RAG: empty content quarantined", async () => {
|
|
writeEvidence("scrum_reviews", [makeEv({ run_id: "re1", task_id: "t1", source_stem: "scrum_reviews", text: "" })]);
|
|
writeScored("scrum_reviews", [makeScored({ run_id: "re1", task_id: "t1", category: "accepted", reasons: ["x"], out_relpath: `data/scored-runs/${PARTITION}/scrum_reviews.jsonl` })]);
|
|
const r = await exportRag({ root: TMP, recorded_at: NOW });
|
|
expect(r.records_exported).toBe(0);
|
|
expect(r.records_quarantined).toBe(1);
|
|
const qPath = resolve(TMP, "exports/quarantine/rag.jsonl");
|
|
expect(existsSync(qPath)).toBe(true);
|
|
expect(readFileSync(qPath, "utf8")).toContain("empty_content");
|
|
});
|
|
|
|
// ─── SFT export — THE CONTAMINATION FIREWALL ────────────────────────
|
|
|
|
test("SFT: rejected NEVER ships (spec non-negotiable)", async () => {
|
|
writeEvidence("scrum_reviews", [
|
|
makeEv({ run_id: "sf1", task_id: "t1", source_stem: "scrum_reviews", text: "rejected output that should NOT train" }),
|
|
]);
|
|
writeScored("scrum_reviews", [
|
|
makeScored({ run_id: "sf1", task_id: "t1", category: "rejected", reasons: ["bad"], out_relpath: `data/scored-runs/${PARTITION}/scrum_reviews.jsonl` }),
|
|
]);
|
|
const r = await exportSft({ root: TMP, recorded_at: NOW });
|
|
expect(r.records_exported).toBe(0);
|
|
expect(r.records_quarantined).toBe(1);
|
|
const qPath = resolve(TMP, "exports/quarantine/sft.jsonl");
|
|
expect(readFileSync(qPath, "utf8")).toContain("unsafe_sft_category");
|
|
});
|
|
|
|
test("SFT: needs_human_review NEVER ships (spec non-negotiable)", async () => {
|
|
writeEvidence("scrum_reviews", [
|
|
makeEv({ run_id: "sh1", task_id: "t1", source_stem: "scrum_reviews", text: "hum text" }),
|
|
]);
|
|
writeScored("scrum_reviews", [
|
|
makeScored({ run_id: "sh1", task_id: "t1", category: "needs_human_review", reasons: ["x"], out_relpath: `data/scored-runs/${PARTITION}/scrum_reviews.jsonl` }),
|
|
]);
|
|
const r = await exportSft({ root: TMP, recorded_at: NOW });
|
|
expect(r.records_exported).toBe(0);
|
|
expect(r.records_quarantined).toBe(1);
|
|
});
|
|
|
|
test("SFT: partially_accepted excluded by default; included with --include-partial", async () => {
|
|
writeEvidence("scrum_reviews", [makeEv({ run_id: "sp1", task_id: "t1", source_stem: "scrum_reviews", text: "partial output" })]);
|
|
writeScored("scrum_reviews", [makeScored({ run_id: "sp1", task_id: "t1", category: "partially_accepted", reasons: ["x"], out_relpath: `data/scored-runs/${PARTITION}/scrum_reviews.jsonl` })]);
|
|
|
|
const r1 = await exportSft({ root: TMP, recorded_at: NOW });
|
|
expect(r1.records_exported).toBe(0);
|
|
expect(r1.records_quarantined).toBe(1);
|
|
|
|
// Reset for include_partial.
|
|
if (existsSync(resolve(TMP, "exports"))) rmSync(resolve(TMP, "exports"), { recursive: true });
|
|
const r2 = await exportSft({ root: TMP, recorded_at: NOW, include_partial: true });
|
|
expect(r2.records_exported).toBe(1);
|
|
});
|
|
|
|
test("SFT: extraction-class records (no instruction→response shape) quarantined", async () => {
|
|
writeEvidence("distilled_facts", [
|
|
makeEv({ run_id: "sx1", task_id: "t1", source_stem: "distilled_facts", text: "extracted fact", role: "extractor" }),
|
|
]);
|
|
writeScored("distilled_facts", [
|
|
// Force category=accepted to prove it's the role-shape gate that catches it, not the category gate.
|
|
makeScored({ run_id: "sx1", task_id: "t1", category: "accepted", reasons: ["x"], out_relpath: `data/scored-runs/${PARTITION}/distilled_facts.jsonl` }),
|
|
]);
|
|
const r = await exportSft({ root: TMP, recorded_at: NOW });
|
|
expect(r.records_exported).toBe(0);
|
|
expect(r.records_quarantined).toBe(1);
|
|
});
|
|
|
|
test("SFT: every output row validates against SftSample (provenance + non-empty + quality_score)", async () => {
|
|
writeEvidence("scrum_reviews", [
|
|
makeEv({ run_id: "sv1", task_id: "t1", source_stem: "scrum_reviews", text: "real instruction response text" }),
|
|
]);
|
|
writeScored("scrum_reviews", [
|
|
makeScored({ run_id: "sv1", task_id: "t1", category: "accepted", reasons: ["x"], out_relpath: `data/scored-runs/${PARTITION}/scrum_reviews.jsonl` }),
|
|
]);
|
|
await exportSft({ root: TMP, recorded_at: NOW });
|
|
const rows = readFileSync(resolve(TMP, "exports/sft/instruction_response.jsonl"), "utf8").trim().split("\n").map(l => JSON.parse(l));
|
|
expect(rows.length).toBe(1);
|
|
for (const row of rows) {
|
|
const v = validateSftSample(row);
|
|
expect(v.valid).toBe(true);
|
|
expect(row.quality_score).toBe("accepted"); // never partial here
|
|
expect(row.provenance.sig_hash).toMatch(/^[0-9a-f]{64}$/);
|
|
}
|
|
});
|
|
|
|
test("SFT: idempotent — second run produces 0 new exports", async () => {
|
|
writeEvidence("scrum_reviews", [makeEv({ run_id: "si1", task_id: "t1", source_stem: "scrum_reviews", text: "idem" })]);
|
|
writeScored("scrum_reviews", [makeScored({ run_id: "si1", task_id: "t1", category: "accepted", reasons: ["x"], out_relpath: `data/scored-runs/${PARTITION}/scrum_reviews.jsonl` })]);
|
|
await exportSft({ root: TMP, recorded_at: NOW });
|
|
const r2 = await exportSft({ root: TMP, recorded_at: NOW });
|
|
expect(r2.records_exported).toBe(0);
|
|
});
|
|
|
|
// ─── Preference export — pairing logic ──────────────────────────────
|
|
|
|
test("Preference: same task_id, accepted vs rejected → exports a pair", async () => {
|
|
writeEvidence("scrum_reviews", [
|
|
makeEv({ run_id: "pa1", task_id: "task-X", source_stem: "scrum_reviews", text: "good chosen output" }),
|
|
makeEv({ run_id: "pr1", task_id: "task-X", source_stem: "scrum_reviews", text: "bad rejected output" }),
|
|
]);
|
|
writeScored("scrum_reviews", [
|
|
makeScored({ run_id: "pa1", task_id: "task-X", category: "accepted", reasons: ["good"], out_relpath: `data/scored-runs/${PARTITION}/scrum_reviews.jsonl` }),
|
|
makeScored({ run_id: "pr1", task_id: "task-X", category: "rejected", reasons: ["bad"], out_relpath: `data/scored-runs/${PARTITION}/scrum_reviews.jsonl` }),
|
|
]);
|
|
const r = await exportPreference({ root: TMP, recorded_at: NOW });
|
|
expect(r.pairs_exported).toBe(1);
|
|
const path = resolve(TMP, "exports/preference/chosen_rejected.jsonl");
|
|
const rows = readFileSync(path, "utf8").trim().split("\n").map(l => JSON.parse(l));
|
|
expect(rows.length).toBe(1);
|
|
expect(rows[0].chosen).toContain("good");
|
|
expect(rows[0].rejected).toContain("bad");
|
|
expect(rows[0].chosen_run_id).not.toBe(rows[0].rejected_run_id);
|
|
const v = validatePreferenceSample(rows[0]);
|
|
expect(v.valid).toBe(true);
|
|
});
|
|
|
|
test("Preference: different task_ids never pair (no fabrication)", async () => {
|
|
writeEvidence("scrum_reviews", [
|
|
makeEv({ run_id: "pd1", task_id: "task-A", source_stem: "scrum_reviews", text: "A good" }),
|
|
makeEv({ run_id: "pd2", task_id: "task-B", source_stem: "scrum_reviews", text: "B bad" }),
|
|
]);
|
|
writeScored("scrum_reviews", [
|
|
makeScored({ run_id: "pd1", task_id: "task-A", category: "accepted", reasons: ["x"], out_relpath: `data/scored-runs/${PARTITION}/scrum_reviews.jsonl` }),
|
|
makeScored({ run_id: "pd2", task_id: "task-B", category: "rejected", reasons: ["x"], out_relpath: `data/scored-runs/${PARTITION}/scrum_reviews.jsonl` }),
|
|
]);
|
|
const r = await exportPreference({ root: TMP, recorded_at: NOW });
|
|
expect(r.pairs_exported).toBe(0); // no shared task_id
|
|
expect(r.insufficient_pair_task_ids).toBe(2);
|
|
});
|
|
|
|
test("Preference: identical text in chosen and rejected quarantined", async () => {
|
|
writeEvidence("scrum_reviews", [
|
|
makeEv({ run_id: "pi1", task_id: "task-X", source_stem: "scrum_reviews", text: "identical text" }),
|
|
makeEv({ run_id: "pi2", task_id: "task-X", source_stem: "scrum_reviews", text: "identical text" }),
|
|
]);
|
|
writeScored("scrum_reviews", [
|
|
makeScored({ run_id: "pi1", task_id: "task-X", category: "accepted", reasons: ["x"], out_relpath: `data/scored-runs/${PARTITION}/scrum_reviews.jsonl` }),
|
|
makeScored({ run_id: "pi2", task_id: "task-X", category: "rejected", reasons: ["x"], out_relpath: `data/scored-runs/${PARTITION}/scrum_reviews.jsonl` }),
|
|
]);
|
|
const r = await exportPreference({ root: TMP, recorded_at: NOW });
|
|
expect(r.pairs_exported).toBe(0);
|
|
expect(r.records_quarantined).toBeGreaterThan(0);
|
|
const qPath = resolve(TMP, "exports/quarantine/preference.jsonl");
|
|
expect(readFileSync(qPath, "utf8")).toContain("identical");
|
|
});
|
|
|
|
test("Preference: accepted vs partially_accepted is a softer fallback pair", async () => {
|
|
writeEvidence("scrum_reviews", [
|
|
makeEv({ run_id: "ps1", task_id: "task-X", source_stem: "scrum_reviews", text: "best output" }),
|
|
makeEv({ run_id: "ps2", task_id: "task-X", source_stem: "scrum_reviews", text: "ok output" }),
|
|
]);
|
|
writeScored("scrum_reviews", [
|
|
makeScored({ run_id: "ps1", task_id: "task-X", category: "accepted", reasons: ["x"], out_relpath: `data/scored-runs/${PARTITION}/scrum_reviews.jsonl` }),
|
|
makeScored({ run_id: "ps2", task_id: "task-X", category: "partially_accepted", reasons: ["x"], out_relpath: `data/scored-runs/${PARTITION}/scrum_reviews.jsonl` }),
|
|
]);
|
|
const r = await exportPreference({ root: TMP, recorded_at: NOW });
|
|
expect(r.pairs_exported).toBe(1);
|
|
});
|
|
|
|
// ─── Quarantine populated when expected ─────────────────────────────
|
|
|
|
test("Quarantine: every export creates exports/quarantine/<exporter>.jsonl when needed", async () => {
|
|
// SFT with a forbidden category should populate quarantine
|
|
writeEvidence("scrum_reviews", [makeEv({ run_id: "q1", task_id: "t1", source_stem: "scrum_reviews", text: "x" })]);
|
|
writeScored("scrum_reviews", [makeScored({ run_id: "q1", task_id: "t1", category: "rejected", reasons: ["x"], out_relpath: `data/scored-runs/${PARTITION}/scrum_reviews.jsonl` })]);
|
|
await exportSft({ root: TMP, recorded_at: NOW });
|
|
expect(existsSync(resolve(TMP, "exports/quarantine/sft.jsonl"))).toBe(true);
|
|
});
|