golangLAKEHOUSE/internal/distillation/sft_export_test.go

package distillation

import (
	"os"
	"path/filepath"
	"testing"
)

// TestIsSftNever_Firewall locks the contamination firewall set:
// the predicate fires for "rejected" and "needs_human_review" and
// no others. Per project_distillation_substrate.md: this is one of
// the substrate's load-bearing knobs — touching the firewall set
// requires explicit sign-off.
func TestIsSftNever_Firewall(t *testing.T) {
	mustBlock := []ScoreCategory{
		CategoryRejected,
		CategoryNeedsHumanReview,
	}
	for _, c := range mustBlock {
		if !IsSftNever(c) {
			t.Errorf("firewall must block %q", c)
		}
	}
	// Anything else should NOT be blocked. Read every category
	// constant in this package and assert non-blocked unless it's
	// in mustBlock.
	allKnown := []ScoreCategory{
		CategoryAccepted,
		CategoryPartiallyAccepted,
		CategoryRejected,
		CategoryNeedsHumanReview,
	}
	for _, c := range allKnown {
		shouldBlock := false
		for _, b := range mustBlock {
			if c == b {
				shouldBlock = true
				break
			}
		}
		if got := IsSftNever(c); got != shouldBlock {
			t.Errorf("IsSftNever(%q) = %v, want %v", c, got, shouldBlock)
		}
	}
	// Unknown category is NOT blocked — that's the safe default
	// (operators bumping ScoreCategory enum should explicitly add
	// to firewall if they want it gated).
	if IsSftNever(ScoreCategory("custom_future_category")) {
		t.Errorf("unknown category must not be blocked by firewall")
	}
}

// TestSftNever_PinsExpectedSet locks the firewall slice contents.
// If a future commit adds or removes categories from SftNever, this
// test fails — forcing the change through review.
func TestSftNever_PinsExpectedSet(t *testing.T) {
	want := map[ScoreCategory]bool{
		CategoryRejected:         true,
		CategoryNeedsHumanReview: true,
	}
	if len(SftNever) != len(want) {
		t.Fatalf("SftNever has %d entries, want %d (firewall set changed without review?)",
			len(SftNever), len(want))
	}
	for _, c := range SftNever {
		if !want[c] {
			t.Errorf("SftNever contains %q, which is not in the expected firewall set", c)
		}
	}
}

// TestListScoredRunFiles_Empty: missing root → no files, no error.
// Matches Rust behavior; operators running ExportSft on a fresh box
// shouldn't see an error before any scored runs have landed.
func TestListScoredRunFiles_Empty(t *testing.T) {
	tmp := t.TempDir()
	files, err := ListScoredRunFiles(tmp)
	if err != nil {
		t.Fatalf("ListScoredRunFiles: %v", err)
	}
	if len(files) != 0 {
		t.Errorf("empty root: expected 0 files, got %d", len(files))
	}
}

// TestListScoredRunFiles_WalksYearMonthDay locks the directory walk
// pattern: data/scored-runs/YYYY/MM/DD/*.jsonl. Subset of full
// Rust-side test coverage but proves the walk visits the right
// nesting.
func TestListScoredRunFiles_WalksYearMonthDay(t *testing.T) {
	tmp := t.TempDir()
	// Create the expected nested structure.
	dirs := []string{
		filepath.Join(tmp, "data", "scored-runs", "2026", "04", "30"),
		filepath.Join(tmp, "data", "scored-runs", "2026", "05", "01"),
	}
	for _, d := range dirs {
		if err := os.MkdirAll(d, 0o755); err != nil {
			t.Fatalf("mkdir: %v", err)
		}
	}
	// Drop a JSONL in each + a non-JSONL we should skip.
	for i, d := range dirs {
		jsonlPath := filepath.Join(d, "run.jsonl")
		if err := os.WriteFile(jsonlPath, []byte("{}\n"), 0o644); err != nil {
			t.Fatalf("write %s: %v", jsonlPath, err)
		}
		// Non-JSONL — must be skipped.
		other := filepath.Join(d, "skip.txt")
		if err := os.WriteFile(other, []byte("ignore me"), 0o644); err != nil {
			t.Fatalf("write %s: %v", other, err)
		}
		_ = i
	}
	files, err := ListScoredRunFiles(tmp)
	if err != nil {
		t.Fatalf("ListScoredRunFiles: %v", err)
	}
	if len(files) != 2 {
		t.Errorf("expected 2 .jsonl files, got %d (%v)", len(files), files)
	}
	// Sort order: 2026-04-30 before 2026-05-01. Critical for audit
	// baselines — the longitudinal signal depends on stable order.
	if len(files) >= 2 {
		if files[0] >= files[1] {
			t.Errorf("files not sorted ascending: %q vs %q", files[0], files[1])
		}
	}
	// Non-JSONL must be skipped.
	for _, f := range files {
		if filepath.Ext(f) != ".jsonl" {
			t.Errorf("listing returned non-.jsonl: %q", f)
		}
	}
}

// TestExportSft_PartialPort_FirewallFires runs the partial-port
// ExportSft on a fixture with one valid + one rejected ScoredRun
// and asserts the firewall counts correctly. Locks the contamination
// guarantee at the integration layer — even before the synthesis
// half ports, the firewall protection is end-to-end testable.
func TestExportSft_PartialPort_FirewallFires(t *testing.T) {
	tmp := t.TempDir()
	dir := filepath.Join(tmp, "data", "scored-runs", "2026", "04", "30")
	if err := os.MkdirAll(dir, 0o755); err != nil {
		t.Fatalf("mkdir: %v", err)
	}
	// Two scored runs: one passes the firewall, one is blocked.
	jsonl := `{"category":"accepted","run_id":"r1","task_id":"t1"}
{"category":"rejected","run_id":"r2","task_id":"t2"}
{"category":"partially_accepted","run_id":"r3","task_id":"t3"}
{"category":"needs_human_review","run_id":"r4","task_id":"t4"}
`
	if err := os.WriteFile(filepath.Join(dir, "run.jsonl"), []byte(jsonl), 0o644); err != nil {
		t.Fatalf("write: %v", err)
	}
	res, err := ExportSft(ExportSftOptions{
		Root:       tmp,
		RecordedAt: "2026-04-30T00:00:00Z",
		DryRun:     true,
	})
	if err != nil {
		t.Fatalf("ExportSft: %v", err)
	}
	if res.RecordsRead != 4 {
		t.Errorf("RecordsRead: got %d, want 4", res.RecordsRead)
	}
	if res.RecordsExported != 2 {
		t.Errorf("RecordsExported (firewall-passing): got %d, want 2", res.RecordsExported)
	}
	if res.RecordsQuarantined != 2 {
		t.Errorf("RecordsQuarantined (firewall-blocked): got %d, want 2", res.RecordsQuarantined)
	}
}