package distillation import ( "os" "path/filepath" "testing" ) // TestIsSftNever_Firewall locks the contamination firewall set: // the predicate fires for "rejected" and "needs_human_review" and // no others. Per project_distillation_substrate.md: this is one of // the substrate's load-bearing knobs — touching the firewall set // requires explicit sign-off. func TestIsSftNever_Firewall(t *testing.T) { mustBlock := []ScoreCategory{ CategoryRejected, CategoryNeedsHumanReview, } for _, c := range mustBlock { if !IsSftNever(c) { t.Errorf("firewall must block %q", c) } } // Anything else should NOT be blocked. Read every category // constant in this package and assert non-blocked unless it's // in mustBlock. allKnown := []ScoreCategory{ CategoryAccepted, CategoryPartiallyAccepted, CategoryRejected, CategoryNeedsHumanReview, } for _, c := range allKnown { shouldBlock := false for _, b := range mustBlock { if c == b { shouldBlock = true break } } if got := IsSftNever(c); got != shouldBlock { t.Errorf("IsSftNever(%q) = %v, want %v", c, got, shouldBlock) } } // Unknown category is NOT blocked — that's the safe default // (operators bumping ScoreCategory enum should explicitly add // to firewall if they want it gated). if IsSftNever(ScoreCategory("custom_future_category")) { t.Errorf("unknown category must not be blocked by firewall") } } // TestSftNever_PinsExpectedSet locks the firewall slice contents. // If a future commit adds or removes categories from SftNever, this // test fails — forcing the change through review. func TestSftNever_PinsExpectedSet(t *testing.T) { want := map[ScoreCategory]bool{ CategoryRejected: true, CategoryNeedsHumanReview: true, } if len(SftNever) != len(want) { t.Fatalf("SftNever has %d entries, want %d (firewall set changed without review?)", len(SftNever), len(want)) } for _, c := range SftNever { if !want[c] { t.Errorf("SftNever contains %q, which is not in the expected firewall set", c) } } } // TestListScoredRunFiles_Empty: missing root → no files, no error. // Matches Rust behavior; operators running ExportSft on a fresh box // shouldn't see an error before any scored runs have landed. func TestListScoredRunFiles_Empty(t *testing.T) { tmp := t.TempDir() files, err := ListScoredRunFiles(tmp) if err != nil { t.Fatalf("ListScoredRunFiles: %v", err) } if len(files) != 0 { t.Errorf("empty root: expected 0 files, got %d", len(files)) } } // TestListScoredRunFiles_WalksYearMonthDay locks the directory walk // pattern: data/scored-runs/YYYY/MM/DD/*.jsonl. Subset of full // Rust-side test coverage but proves the walk visits the right // nesting. func TestListScoredRunFiles_WalksYearMonthDay(t *testing.T) { tmp := t.TempDir() // Create the expected nested structure. dirs := []string{ filepath.Join(tmp, "data", "scored-runs", "2026", "04", "30"), filepath.Join(tmp, "data", "scored-runs", "2026", "05", "01"), } for _, d := range dirs { if err := os.MkdirAll(d, 0o755); err != nil { t.Fatalf("mkdir: %v", err) } } // Drop a JSONL in each + a non-JSONL we should skip. for i, d := range dirs { jsonlPath := filepath.Join(d, "run.jsonl") if err := os.WriteFile(jsonlPath, []byte("{}\n"), 0o644); err != nil { t.Fatalf("write %s: %v", jsonlPath, err) } // Non-JSONL — must be skipped. other := filepath.Join(d, "skip.txt") if err := os.WriteFile(other, []byte("ignore me"), 0o644); err != nil { t.Fatalf("write %s: %v", other, err) } _ = i } files, err := ListScoredRunFiles(tmp) if err != nil { t.Fatalf("ListScoredRunFiles: %v", err) } if len(files) != 2 { t.Errorf("expected 2 .jsonl files, got %d (%v)", len(files), files) } // Sort order: 2026-04-30 before 2026-05-01. Critical for audit // baselines — the longitudinal signal depends on stable order. if len(files) >= 2 { if files[0] >= files[1] { t.Errorf("files not sorted ascending: %q vs %q", files[0], files[1]) } } // Non-JSONL must be skipped. for _, f := range files { if filepath.Ext(f) != ".jsonl" { t.Errorf("listing returned non-.jsonl: %q", f) } } } // TestExportSft_PartialPort_FirewallFires runs the partial-port // ExportSft on a fixture with one valid + one rejected ScoredRun // and asserts the firewall counts correctly. Locks the contamination // guarantee at the integration layer — even before the synthesis // half ports, the firewall protection is end-to-end testable. func TestExportSft_PartialPort_FirewallFires(t *testing.T) { tmp := t.TempDir() dir := filepath.Join(tmp, "data", "scored-runs", "2026", "04", "30") if err := os.MkdirAll(dir, 0o755); err != nil { t.Fatalf("mkdir: %v", err) } // Two scored runs: one passes the firewall, one is blocked. jsonl := `{"category":"accepted","run_id":"r1","task_id":"t1"} {"category":"rejected","run_id":"r2","task_id":"t2"} {"category":"partially_accepted","run_id":"r3","task_id":"t3"} {"category":"needs_human_review","run_id":"r4","task_id":"t4"} ` if err := os.WriteFile(filepath.Join(dir, "run.jsonl"), []byte(jsonl), 0o644); err != nil { t.Fatalf("write: %v", err) } res, err := ExportSft(ExportSftOptions{ Root: tmp, RecordedAt: "2026-04-30T00:00:00Z", DryRun: true, }) if err != nil { t.Fatalf("ExportSft: %v", err) } if res.RecordsRead != 4 { t.Errorf("RecordsRead: got %d, want 4", res.RecordsRead) } if res.RecordsExported != 2 { t.Errorf("RecordsExported (firewall-passing): got %d, want 2", res.RecordsExported) } if res.RecordsQuarantined != 2 { t.Errorf("RecordsQuarantined (firewall-blocked): got %d, want 2", res.RecordsQuarantined) } }