golangLAKEHOUSE/internal/distillation/sft_export_test.go

package distillation

import (
	"os"
	"path/filepath"
	"strings"
	"testing"
)

// TestIsSftNever_Firewall locks the contamination firewall set:
// the predicate fires for "rejected" and "needs_human_review" and
// no others. Per project_distillation_substrate.md: this is one of
// the substrate's load-bearing knobs — touching the firewall set
// requires explicit sign-off.
func TestIsSftNever_Firewall(t *testing.T) {
	mustBlock := []ScoreCategory{
		CategoryRejected,
		CategoryNeedsHumanReview,
	}
	for _, c := range mustBlock {
		if !IsSftNever(c) {
			t.Errorf("firewall must block %q", c)
		}
	}
	// Anything else should NOT be blocked. Read every category
	// constant in this package and assert non-blocked unless it's
	// in mustBlock.
	allKnown := []ScoreCategory{
		CategoryAccepted,
		CategoryPartiallyAccepted,
		CategoryRejected,
		CategoryNeedsHumanReview,
	}
	for _, c := range allKnown {
		shouldBlock := false
		for _, b := range mustBlock {
			if c == b {
				shouldBlock = true
				break
			}
		}
		if got := IsSftNever(c); got != shouldBlock {
			t.Errorf("IsSftNever(%q) = %v, want %v", c, got, shouldBlock)
		}
	}
	// Unknown category is NOT blocked — that's the safe default
	// (operators bumping ScoreCategory enum should explicitly add
	// to firewall if they want it gated).
	if IsSftNever(ScoreCategory("custom_future_category")) {
		t.Errorf("unknown category must not be blocked by firewall")
	}
}

// TestSftNever_PinsExpectedSet locks the firewall slice contents.
// If a future commit adds or removes categories from SftNever, this
// test fails — forcing the change through review.
func TestSftNever_PinsExpectedSet(t *testing.T) {
	want := map[ScoreCategory]bool{
		CategoryRejected:         true,
		CategoryNeedsHumanReview: true,
	}
	if len(SftNever) != len(want) {
		t.Fatalf("SftNever has %d entries, want %d (firewall set changed without review?)",
			len(SftNever), len(want))
	}
	for _, c := range SftNever {
		if !want[c] {
			t.Errorf("SftNever contains %q, which is not in the expected firewall set", c)
		}
	}
}

// TestListScoredRunFiles_Empty: missing root → no files, no error.
// Matches Rust behavior; operators running ExportSft on a fresh box
// shouldn't see an error before any scored runs have landed.
func TestListScoredRunFiles_Empty(t *testing.T) {
	tmp := t.TempDir()
	files, err := ListScoredRunFiles(tmp)
	if err != nil {
		t.Fatalf("ListScoredRunFiles: %v", err)
	}
	if len(files) != 0 {
		t.Errorf("empty root: expected 0 files, got %d", len(files))
	}
}

// TestListScoredRunFiles_WalksYearMonthDay locks the directory walk
// pattern: data/scored-runs/YYYY/MM/DD/*.jsonl. Subset of full
// Rust-side test coverage but proves the walk visits the right
// nesting.
func TestListScoredRunFiles_WalksYearMonthDay(t *testing.T) {
	tmp := t.TempDir()
	// Create the expected nested structure.
	dirs := []string{
		filepath.Join(tmp, "data", "scored-runs", "2026", "04", "30"),
		filepath.Join(tmp, "data", "scored-runs", "2026", "05", "01"),
	}
	for _, d := range dirs {
		if err := os.MkdirAll(d, 0o755); err != nil {
			t.Fatalf("mkdir: %v", err)
		}
	}
	// Drop a JSONL in each + a non-JSONL we should skip.
	for i, d := range dirs {
		jsonlPath := filepath.Join(d, "run.jsonl")
		if err := os.WriteFile(jsonlPath, []byte("{}\n"), 0o644); err != nil {
			t.Fatalf("write %s: %v", jsonlPath, err)
		}
		// Non-JSONL — must be skipped.
		other := filepath.Join(d, "skip.txt")
		if err := os.WriteFile(other, []byte("ignore me"), 0o644); err != nil {
			t.Fatalf("write %s: %v", other, err)
		}
		_ = i
	}
	files, err := ListScoredRunFiles(tmp)
	if err != nil {
		t.Fatalf("ListScoredRunFiles: %v", err)
	}
	if len(files) != 2 {
		t.Errorf("expected 2 .jsonl files, got %d (%v)", len(files), files)
	}
	// Sort order: 2026-04-30 before 2026-05-01. Critical for audit
	// baselines — the longitudinal signal depends on stable order.
	if len(files) >= 2 {
		if files[0] >= files[1] {
			t.Errorf("files not sorted ascending: %q vs %q", files[0], files[1])
		}
	}
	// Non-JSONL must be skipped.
	for _, f := range files {
		if filepath.Ext(f) != ".jsonl" {
			t.Errorf("listing returned non-.jsonl: %q", f)
		}
	}
}

// TestSynthesizeSft_PerSourceClass locks the per-source-class
// instruction templates byte-for-byte against the Rust source.
// If a future commit changes a template, this test fails — the
// trained-model behavior shifts under our feet.
func TestSynthesizeSft_PerSourceClass(t *testing.T) {
	cases := []struct {
		name        string
		sourceFile  string
		taskID      string
		sourceFiles []string
		wantPrefix  string
	}{
		{
			"scrum_reviews",
			"data/_kb/scrum_reviews.jsonl",
			"any",
			[]string{"src/foo.rs"},
			"Review the file 'src/foo.rs' against",
		},
		{
			"mode_experiments",
			"data/_kb/mode_experiments.jsonl",
			"task_42",
			[]string{"src/bar.go"},
			"Run task_class='task_42' for file 'src/bar.go'.",
		},
		{
			"auto_apply",
			"data/_kb/auto_apply.jsonl",
			"any",
			[]string{"src/baz.ts"},
			"Auto-apply: emit a 6-line surgical patch for 'src/baz.ts'",
		},
		{
			"audits with phase: prefix stripped",
			"data/_kb/audits.jsonl",
			"phase:G2",
			nil,
			"Audit phase 'G2' and report findings",
		},
		{
			"observer_reviews",
			"data/_kb/observer_reviews.jsonl",
			"any",
			[]string{"f.rs"},
			"Observer-review the latest attempt on 'f.rs'.",
		},
		{
			"contract_analyses with permit: prefix",
			"data/_kb/contract_analyses.jsonl",
			"permit:ABC123",
			nil,
			"Analyze permit 'ABC123'. Recommend with risk markers.",
		},
		{
			"outcomes",
			"data/_kb/outcomes.jsonl",
			"any",
			nil,
			"Run scenario; report per-event outcome with citations.",
		},
		{
			"unknown source falls back to default",
			"data/_kb/something_new.jsonl",
			"any",
			nil,
			"Source 'something_new' run; produce the appropriate output",
		},
	}
	for _, c := range cases {
		t.Run(c.name, func(t *testing.T) {
			scored := ScoredRun{
				EvidenceRunID: "rid",
				Category:      CategoryAccepted,
				Provenance: Provenance{
					SourceFile: c.sourceFile,
					SigHash:    "abc",
					RecordedAt: "2026-04-30T00:00:00Z",
				},
			}
			ev := EvidenceRecord{
				RunID:       "rid",
				TaskID:      c.taskID,
				ModelRole:   RoleExecutor,
				Text:        "model response text",
				SourceFiles: c.sourceFiles,
				Provenance:  Provenance{SourceFile: c.sourceFile, SigHash: "abc", RecordedAt: "2026-04-30T00:00:00Z"},
			}
			sample := SynthesizeSft(scored, ev, "2026-04-30T00:00:00Z", "test-id")
			if sample == nil {
				t.Fatalf("expected non-nil sample for %s", c.name)
			}
			if !strings.HasPrefix(sample.Instruction, c.wantPrefix) {
				t.Errorf("instruction prefix mismatch:\n  got:  %q\n  want: %q...", sample.Instruction, c.wantPrefix)
			}
		})
	}
}

// TestSynthesizeSft_RejectsExtraction: extraction-class records
// have no instruction→response shape (they're pure data extraction,
// not model-output-as-training-target). Synthesis must return nil.
func TestSynthesizeSft_RejectsExtraction(t *testing.T) {
	ev := EvidenceRecord{
		RunID:     "rid",
		ModelRole: RoleExtractor,
		Text:      "extracted data",
		Provenance: Provenance{SourceFile: "data/_kb/anything.jsonl", SigHash: "abc", RecordedAt: "2026-04-30T00:00:00Z"},
	}
	scored := ScoredRun{EvidenceRunID: "rid", Category: CategoryAccepted, Provenance: ev.Provenance}
	if sample := SynthesizeSft(scored, ev, "2026-04-30T00:00:00Z", "id"); sample != nil {
		t.Errorf("extraction record must produce nil sample, got %+v", sample)
	}
}

// TestSynthesizeSft_RejectsEmptyText: text is the response side of
// the SFT pair; empty text means nothing to learn.
func TestSynthesizeSft_RejectsEmptyText(t *testing.T) {
	ev := EvidenceRecord{
		RunID:     "rid",
		ModelRole: RoleExecutor,
		Text:      "   \n\t",
		Provenance: Provenance{SourceFile: "data/_kb/scrum_reviews.jsonl", SigHash: "abc", RecordedAt: "2026-04-30T00:00:00Z"},
	}
	scored := ScoredRun{EvidenceRunID: "rid", Category: CategoryAccepted, Provenance: ev.Provenance}
	if sample := SynthesizeSft(scored, ev, "2026-04-30T00:00:00Z", "id"); sample != nil {
		t.Errorf("empty-text record must produce nil sample, got %+v", sample)
	}
}

// TestSynthesizeSft_ContextAssembly verifies the terse " · "-joined
// context string carries matrix corpora + pathway fingerprints +
// model name in the documented order.
func TestSynthesizeSft_ContextAssembly(t *testing.T) {
	ev := EvidenceRecord{
		RunID:     "rid",
		ModelRole: RoleReviewer,
		Text:      "verdict",
		ModelName: "qwen3.5",
		RetrievedContext: &RetrievedContext{
			MatrixCorpora:           []string{"workers", "candidates"},
			PathwayFingerprintsSeen: 88,
		},
		Provenance: Provenance{SourceFile: "data/_kb/scrum_reviews.jsonl", SigHash: "abc", RecordedAt: "2026-04-30T00:00:00Z"},
	}
	scored := ScoredRun{EvidenceRunID: "rid", Category: CategoryAccepted, Provenance: ev.Provenance}
	sample := SynthesizeSft(scored, ev, "2026-04-30T00:00:00Z", "id")
	if sample == nil {
		t.Fatalf("expected non-nil sample")
	}
	want := "matrix=workers,candidates · pathway_fingerprints=88 · model=qwen3.5"
	if sample.Context != want {
		t.Errorf("context mismatch:\n  got:  %q\n  want: %q", sample.Context, want)
	}
}

// TestExportSft_FullPort_WritesJSONL covers the fully-ported path:
// scored runs + paired evidence both present, synthesis produces
// SftSamples, output JSONL is written. Locks the end-to-end
// contract that next-wave changes (synthesis tweaks, output layout)
// have to preserve.
func TestExportSft_FullPort_WritesJSONL(t *testing.T) {
	tmp := t.TempDir()
	scoredDir := filepath.Join(tmp, "data", "scored-runs", "2026", "04", "30")
	evidenceDir := filepath.Join(tmp, "data", "evidence", "2026", "04", "30")
	for _, d := range []string{scoredDir, evidenceDir} {
		if err := os.MkdirAll(d, 0o755); err != nil {
			t.Fatalf("mkdir %s: %v", d, err)
		}
	}
	scoredJSONL := `{"category":"accepted","evidence_run_id":"r1","provenance":{"source_file":"data/_kb/scrum_reviews.jsonl","sig_hash":"h1","recorded_at":"2026-04-30T00:00:00Z"}}
{"category":"rejected","evidence_run_id":"r2","provenance":{"source_file":"data/_kb/scrum_reviews.jsonl","sig_hash":"h2","recorded_at":"2026-04-30T00:00:00Z"}}
`
	evidenceJSONL := `{"run_id":"r1","model_role":"executor","text":"some review output","source_files":["src/foo.rs"],"provenance":{"source_file":"data/_kb/scrum_reviews.jsonl","sig_hash":"h1","recorded_at":"2026-04-30T00:00:00Z"}}
{"run_id":"r2","model_role":"executor","text":"another output","source_files":["src/bar.rs"],"provenance":{"source_file":"data/_kb/scrum_reviews.jsonl","sig_hash":"h2","recorded_at":"2026-04-30T00:00:00Z"}}
`
	if err := os.WriteFile(filepath.Join(scoredDir, "run.jsonl"), []byte(scoredJSONL), 0o644); err != nil {
		t.Fatalf("write scored: %v", err)
	}
	if err := os.WriteFile(filepath.Join(evidenceDir, "run.jsonl"), []byte(evidenceJSONL), 0o644); err != nil {
		t.Fatalf("write evidence: %v", err)
	}
	res, err := ExportSft(ExportSftOptions{
		Root:       tmp,
		RecordedAt: "2026-04-30T00:00:00Z",
	})
	if err != nil {
		t.Fatalf("ExportSft: %v", err)
	}
	if res.RecordsRead != 2 || res.RecordsExported != 1 || res.RecordsQuarantined != 1 {
		t.Errorf("counts: read=%d exported=%d quarantined=%d (want 2/1/1)",
			res.RecordsRead, res.RecordsExported, res.RecordsQuarantined)
	}
	out, err := os.ReadFile(res.OutputPath)
	if err != nil {
		t.Fatalf("read output: %v", err)
	}
	if !strings.Contains(string(out), "Review the file 'src/foo.rs'") {
		t.Errorf("output missing expected scrum_reviews instruction; got:\n%s", string(out))
	}
	if strings.Contains(string(out), "src/bar.rs") {
		t.Errorf("output contains rejected record's source_file — firewall leak")
	}
}

// TestExportSft_FirewallFiresBeforeEvidenceLoad locks the order-of-
// operations: even if evidence records are missing, the
// firewall counts records as quarantined so the contamination
// guarantee never depends on side data being present. Records
// that pass the firewall but lack evidence get the more honest
// "not instructable" label rather than being silently exported.
func TestExportSft_FirewallFiresBeforeEvidenceLoad(t *testing.T) {
	tmp := t.TempDir()
	dir := filepath.Join(tmp, "data", "scored-runs", "2026", "04", "30")
	if err := os.MkdirAll(dir, 0o755); err != nil {
		t.Fatalf("mkdir: %v", err)
	}
	jsonl := `{"category":"accepted","evidence_run_id":"r1","provenance":{"source_file":"data/_kb/scrum_reviews.jsonl","sig_hash":"h1","recorded_at":"2026-04-30T00:00:00Z"}}
{"category":"rejected","evidence_run_id":"r2","provenance":{"source_file":"data/_kb/scrum_reviews.jsonl","sig_hash":"h2","recorded_at":"2026-04-30T00:00:00Z"}}
{"category":"partially_accepted","evidence_run_id":"r3","provenance":{"source_file":"data/_kb/scrum_reviews.jsonl","sig_hash":"h3","recorded_at":"2026-04-30T00:00:00Z"}}
{"category":"needs_human_review","evidence_run_id":"r4","provenance":{"source_file":"data/_kb/scrum_reviews.jsonl","sig_hash":"h4","recorded_at":"2026-04-30T00:00:00Z"}}
`
	if err := os.WriteFile(filepath.Join(dir, "run.jsonl"), []byte(jsonl), 0o644); err != nil {
		t.Fatalf("write: %v", err)
	}
	// No evidence directory created → records that pass the firewall
	// land in "not instructable" since synthesis can't proceed.
	res, err := ExportSft(ExportSftOptions{
		Root:       tmp,
		RecordedAt: "2026-04-30T00:00:00Z",
		DryRun:     true,
	})
	if err != nil {
		t.Fatalf("ExportSft: %v", err)
	}
	if res.RecordsRead != 4 {
		t.Errorf("RecordsRead: got %d, want 4", res.RecordsRead)
	}
	if res.RecordsQuarantined != 2 {
		t.Errorf("RecordsQuarantined (firewall-blocked): got %d, want 2", res.RecordsQuarantined)
	}
	if res.RecordsExported != 0 {
		t.Errorf("RecordsExported with no evidence: got %d, want 0", res.RecordsExported)
	}
	if !strings.Contains(res.QuarantineSummary, "not-instructable=2") {
		t.Errorf("expected quarantine summary to flag 2 not-instructable, got %q", res.QuarantineSummary)
	}
}