package distillation import ( "os" "path/filepath" "strings" "testing" ) // TestIsSftNever_Firewall locks the contamination firewall set: // the predicate fires for "rejected" and "needs_human_review" and // no others. Per project_distillation_substrate.md: this is one of // the substrate's load-bearing knobs — touching the firewall set // requires explicit sign-off. func TestIsSftNever_Firewall(t *testing.T) { mustBlock := []ScoreCategory{ CategoryRejected, CategoryNeedsHumanReview, } for _, c := range mustBlock { if !IsSftNever(c) { t.Errorf("firewall must block %q", c) } } // Anything else should NOT be blocked. Read every category // constant in this package and assert non-blocked unless it's // in mustBlock. allKnown := []ScoreCategory{ CategoryAccepted, CategoryPartiallyAccepted, CategoryRejected, CategoryNeedsHumanReview, } for _, c := range allKnown { shouldBlock := false for _, b := range mustBlock { if c == b { shouldBlock = true break } } if got := IsSftNever(c); got != shouldBlock { t.Errorf("IsSftNever(%q) = %v, want %v", c, got, shouldBlock) } } // Unknown category is NOT blocked — that's the safe default // (operators bumping ScoreCategory enum should explicitly add // to firewall if they want it gated). if IsSftNever(ScoreCategory("custom_future_category")) { t.Errorf("unknown category must not be blocked by firewall") } } // TestSftNever_PinsExpectedSet locks the firewall slice contents. // If a future commit adds or removes categories from SftNever, this // test fails — forcing the change through review. func TestSftNever_PinsExpectedSet(t *testing.T) { want := map[ScoreCategory]bool{ CategoryRejected: true, CategoryNeedsHumanReview: true, } if len(SftNever) != len(want) { t.Fatalf("SftNever has %d entries, want %d (firewall set changed without review?)", len(SftNever), len(want)) } for _, c := range SftNever { if !want[c] { t.Errorf("SftNever contains %q, which is not in the expected firewall set", c) } } } // TestListScoredRunFiles_Empty: missing root → no files, no error. // Matches Rust behavior; operators running ExportSft on a fresh box // shouldn't see an error before any scored runs have landed. func TestListScoredRunFiles_Empty(t *testing.T) { tmp := t.TempDir() files, err := ListScoredRunFiles(tmp) if err != nil { t.Fatalf("ListScoredRunFiles: %v", err) } if len(files) != 0 { t.Errorf("empty root: expected 0 files, got %d", len(files)) } } // TestListScoredRunFiles_WalksYearMonthDay locks the directory walk // pattern: data/scored-runs/YYYY/MM/DD/*.jsonl. Subset of full // Rust-side test coverage but proves the walk visits the right // nesting. func TestListScoredRunFiles_WalksYearMonthDay(t *testing.T) { tmp := t.TempDir() // Create the expected nested structure. dirs := []string{ filepath.Join(tmp, "data", "scored-runs", "2026", "04", "30"), filepath.Join(tmp, "data", "scored-runs", "2026", "05", "01"), } for _, d := range dirs { if err := os.MkdirAll(d, 0o755); err != nil { t.Fatalf("mkdir: %v", err) } } // Drop a JSONL in each + a non-JSONL we should skip. for i, d := range dirs { jsonlPath := filepath.Join(d, "run.jsonl") if err := os.WriteFile(jsonlPath, []byte("{}\n"), 0o644); err != nil { t.Fatalf("write %s: %v", jsonlPath, err) } // Non-JSONL — must be skipped. other := filepath.Join(d, "skip.txt") if err := os.WriteFile(other, []byte("ignore me"), 0o644); err != nil { t.Fatalf("write %s: %v", other, err) } _ = i } files, err := ListScoredRunFiles(tmp) if err != nil { t.Fatalf("ListScoredRunFiles: %v", err) } if len(files) != 2 { t.Errorf("expected 2 .jsonl files, got %d (%v)", len(files), files) } // Sort order: 2026-04-30 before 2026-05-01. Critical for audit // baselines — the longitudinal signal depends on stable order. if len(files) >= 2 { if files[0] >= files[1] { t.Errorf("files not sorted ascending: %q vs %q", files[0], files[1]) } } // Non-JSONL must be skipped. for _, f := range files { if filepath.Ext(f) != ".jsonl" { t.Errorf("listing returned non-.jsonl: %q", f) } } } // TestSynthesizeSft_PerSourceClass locks the per-source-class // instruction templates byte-for-byte against the Rust source. // If a future commit changes a template, this test fails — the // trained-model behavior shifts under our feet. func TestSynthesizeSft_PerSourceClass(t *testing.T) { cases := []struct { name string sourceFile string taskID string sourceFiles []string wantPrefix string }{ { "scrum_reviews", "data/_kb/scrum_reviews.jsonl", "any", []string{"src/foo.rs"}, "Review the file 'src/foo.rs' against", }, { "mode_experiments", "data/_kb/mode_experiments.jsonl", "task_42", []string{"src/bar.go"}, "Run task_class='task_42' for file 'src/bar.go'.", }, { "auto_apply", "data/_kb/auto_apply.jsonl", "any", []string{"src/baz.ts"}, "Auto-apply: emit a 6-line surgical patch for 'src/baz.ts'", }, { "audits with phase: prefix stripped", "data/_kb/audits.jsonl", "phase:G2", nil, "Audit phase 'G2' and report findings", }, { "observer_reviews", "data/_kb/observer_reviews.jsonl", "any", []string{"f.rs"}, "Observer-review the latest attempt on 'f.rs'.", }, { "contract_analyses with permit: prefix", "data/_kb/contract_analyses.jsonl", "permit:ABC123", nil, "Analyze permit 'ABC123'. Recommend with risk markers.", }, { "outcomes", "data/_kb/outcomes.jsonl", "any", nil, "Run scenario; report per-event outcome with citations.", }, { "unknown source falls back to default", "data/_kb/something_new.jsonl", "any", nil, "Source 'something_new' run; produce the appropriate output", }, } for _, c := range cases { t.Run(c.name, func(t *testing.T) { scored := ScoredRun{ EvidenceRunID: "rid", Category: CategoryAccepted, Provenance: Provenance{ SourceFile: c.sourceFile, SigHash: "abc", RecordedAt: "2026-04-30T00:00:00Z", }, } ev := EvidenceRecord{ RunID: "rid", TaskID: c.taskID, ModelRole: RoleExecutor, Text: "model response text", SourceFiles: c.sourceFiles, Provenance: Provenance{SourceFile: c.sourceFile, SigHash: "abc", RecordedAt: "2026-04-30T00:00:00Z"}, } sample := SynthesizeSft(scored, ev, "2026-04-30T00:00:00Z", "test-id") if sample == nil { t.Fatalf("expected non-nil sample for %s", c.name) } if !strings.HasPrefix(sample.Instruction, c.wantPrefix) { t.Errorf("instruction prefix mismatch:\n got: %q\n want: %q...", sample.Instruction, c.wantPrefix) } }) } } // TestSynthesizeSft_RejectsExtraction: extraction-class records // have no instruction→response shape (they're pure data extraction, // not model-output-as-training-target). Synthesis must return nil. func TestSynthesizeSft_RejectsExtraction(t *testing.T) { ev := EvidenceRecord{ RunID: "rid", ModelRole: RoleExtractor, Text: "extracted data", Provenance: Provenance{SourceFile: "data/_kb/anything.jsonl", SigHash: "abc", RecordedAt: "2026-04-30T00:00:00Z"}, } scored := ScoredRun{EvidenceRunID: "rid", Category: CategoryAccepted, Provenance: ev.Provenance} if sample := SynthesizeSft(scored, ev, "2026-04-30T00:00:00Z", "id"); sample != nil { t.Errorf("extraction record must produce nil sample, got %+v", sample) } } // TestSynthesizeSft_RejectsEmptyText: text is the response side of // the SFT pair; empty text means nothing to learn. func TestSynthesizeSft_RejectsEmptyText(t *testing.T) { ev := EvidenceRecord{ RunID: "rid", ModelRole: RoleExecutor, Text: " \n\t", Provenance: Provenance{SourceFile: "data/_kb/scrum_reviews.jsonl", SigHash: "abc", RecordedAt: "2026-04-30T00:00:00Z"}, } scored := ScoredRun{EvidenceRunID: "rid", Category: CategoryAccepted, Provenance: ev.Provenance} if sample := SynthesizeSft(scored, ev, "2026-04-30T00:00:00Z", "id"); sample != nil { t.Errorf("empty-text record must produce nil sample, got %+v", sample) } } // TestSynthesizeSft_ContextAssembly verifies the terse " · "-joined // context string carries matrix corpora + pathway fingerprints + // model name in the documented order. func TestSynthesizeSft_ContextAssembly(t *testing.T) { ev := EvidenceRecord{ RunID: "rid", ModelRole: RoleReviewer, Text: "verdict", ModelName: "qwen3.5", RetrievedContext: &RetrievedContext{ MatrixCorpora: []string{"workers", "candidates"}, PathwayFingerprintsSeen: 88, }, Provenance: Provenance{SourceFile: "data/_kb/scrum_reviews.jsonl", SigHash: "abc", RecordedAt: "2026-04-30T00:00:00Z"}, } scored := ScoredRun{EvidenceRunID: "rid", Category: CategoryAccepted, Provenance: ev.Provenance} sample := SynthesizeSft(scored, ev, "2026-04-30T00:00:00Z", "id") if sample == nil { t.Fatalf("expected non-nil sample") } want := "matrix=workers,candidates · pathway_fingerprints=88 · model=qwen3.5" if sample.Context != want { t.Errorf("context mismatch:\n got: %q\n want: %q", sample.Context, want) } } // TestExportSft_FullPort_WritesJSONL covers the fully-ported path: // scored runs + paired evidence both present, synthesis produces // SftSamples, output JSONL is written. Locks the end-to-end // contract that next-wave changes (synthesis tweaks, output layout) // have to preserve. func TestExportSft_FullPort_WritesJSONL(t *testing.T) { tmp := t.TempDir() scoredDir := filepath.Join(tmp, "data", "scored-runs", "2026", "04", "30") evidenceDir := filepath.Join(tmp, "data", "evidence", "2026", "04", "30") for _, d := range []string{scoredDir, evidenceDir} { if err := os.MkdirAll(d, 0o755); err != nil { t.Fatalf("mkdir %s: %v", d, err) } } scoredJSONL := `{"category":"accepted","evidence_run_id":"r1","provenance":{"source_file":"data/_kb/scrum_reviews.jsonl","sig_hash":"h1","recorded_at":"2026-04-30T00:00:00Z"}} {"category":"rejected","evidence_run_id":"r2","provenance":{"source_file":"data/_kb/scrum_reviews.jsonl","sig_hash":"h2","recorded_at":"2026-04-30T00:00:00Z"}} ` evidenceJSONL := `{"run_id":"r1","model_role":"executor","text":"some review output","source_files":["src/foo.rs"],"provenance":{"source_file":"data/_kb/scrum_reviews.jsonl","sig_hash":"h1","recorded_at":"2026-04-30T00:00:00Z"}} {"run_id":"r2","model_role":"executor","text":"another output","source_files":["src/bar.rs"],"provenance":{"source_file":"data/_kb/scrum_reviews.jsonl","sig_hash":"h2","recorded_at":"2026-04-30T00:00:00Z"}} ` if err := os.WriteFile(filepath.Join(scoredDir, "run.jsonl"), []byte(scoredJSONL), 0o644); err != nil { t.Fatalf("write scored: %v", err) } if err := os.WriteFile(filepath.Join(evidenceDir, "run.jsonl"), []byte(evidenceJSONL), 0o644); err != nil { t.Fatalf("write evidence: %v", err) } res, err := ExportSft(ExportSftOptions{ Root: tmp, RecordedAt: "2026-04-30T00:00:00Z", }) if err != nil { t.Fatalf("ExportSft: %v", err) } if res.RecordsRead != 2 || res.RecordsExported != 1 || res.RecordsQuarantined != 1 { t.Errorf("counts: read=%d exported=%d quarantined=%d (want 2/1/1)", res.RecordsRead, res.RecordsExported, res.RecordsQuarantined) } out, err := os.ReadFile(res.OutputPath) if err != nil { t.Fatalf("read output: %v", err) } if !strings.Contains(string(out), "Review the file 'src/foo.rs'") { t.Errorf("output missing expected scrum_reviews instruction; got:\n%s", string(out)) } if strings.Contains(string(out), "src/bar.rs") { t.Errorf("output contains rejected record's source_file — firewall leak") } } // TestExportSft_FirewallFiresBeforeEvidenceLoad locks the order-of- // operations: even if evidence records are missing, the // firewall counts records as quarantined so the contamination // guarantee never depends on side data being present. Records // that pass the firewall but lack evidence get the more honest // "not instructable" label rather than being silently exported. func TestExportSft_FirewallFiresBeforeEvidenceLoad(t *testing.T) { tmp := t.TempDir() dir := filepath.Join(tmp, "data", "scored-runs", "2026", "04", "30") if err := os.MkdirAll(dir, 0o755); err != nil { t.Fatalf("mkdir: %v", err) } jsonl := `{"category":"accepted","evidence_run_id":"r1","provenance":{"source_file":"data/_kb/scrum_reviews.jsonl","sig_hash":"h1","recorded_at":"2026-04-30T00:00:00Z"}} {"category":"rejected","evidence_run_id":"r2","provenance":{"source_file":"data/_kb/scrum_reviews.jsonl","sig_hash":"h2","recorded_at":"2026-04-30T00:00:00Z"}} {"category":"partially_accepted","evidence_run_id":"r3","provenance":{"source_file":"data/_kb/scrum_reviews.jsonl","sig_hash":"h3","recorded_at":"2026-04-30T00:00:00Z"}} {"category":"needs_human_review","evidence_run_id":"r4","provenance":{"source_file":"data/_kb/scrum_reviews.jsonl","sig_hash":"h4","recorded_at":"2026-04-30T00:00:00Z"}} ` if err := os.WriteFile(filepath.Join(dir, "run.jsonl"), []byte(jsonl), 0o644); err != nil { t.Fatalf("write: %v", err) } // No evidence directory created → records that pass the firewall // land in "not instructable" since synthesis can't proceed. res, err := ExportSft(ExportSftOptions{ Root: tmp, RecordedAt: "2026-04-30T00:00:00Z", DryRun: true, }) if err != nil { t.Fatalf("ExportSft: %v", err) } if res.RecordsRead != 4 { t.Errorf("RecordsRead: got %d, want 4", res.RecordsRead) } if res.RecordsQuarantined != 2 { t.Errorf("RecordsQuarantined (firewall-blocked): got %d, want 2", res.RecordsQuarantined) } if res.RecordsExported != 0 { t.Errorf("RecordsExported with no evidence: got %d, want 0", res.RecordsExported) } if !strings.Contains(res.QuarantineSummary, "not-instructable=2") { t.Errorf("expected quarantine summary to flag 2 not-instructable, got %q", res.QuarantineSummary) } }