package distillation import ( "os" "path/filepath" "strings" "testing" ) // TestRunAuditFull_EmptyRoot: missing data directories yield // failures on required checks but doesn't error out the run. // Operator running on a fresh box sees the report with the // expected "missing" actuals. func TestRunAuditFull_EmptyRoot(t *testing.T) { tmp := t.TempDir() report := RunAuditFull(AuditFullOptions{Root: tmp}) if len(report.Checks) == 0 { t.Fatalf("expected check rows even on empty root, got %d", len(report.Checks)) } // Phase 3's "scored-runs on disk" must fail (required); the // failure count rises by at least 1. if report.Failed < 1 { t.Errorf("expected ≥1 required failure on empty root, got %d", report.Failed) } } // TestPhase2_EvidenceTallyFromOnDisk seeds data/evidence/ and // asserts phase 2 reads + tallies the rows correctly. The // observer-mode port (no live materializer invocation) means the // check works against any-runtime-emitted evidence files. func TestPhase2_EvidenceTallyFromOnDisk(t *testing.T) { tmp := t.TempDir() dir := filepath.Join(tmp, "data", "evidence", "2026", "05", "01") if err := os.MkdirAll(dir, 0o755); err != nil { t.Fatalf("mkdir: %v", err) } // 3 records: 2 from scrum_reviews (a tier-1 source), 1 from // "other_source" (not in tier-1 list). Phase 2 should tally // 3 rows total + flag 1/4 tier-1 sources hit. jsonl := `{"run_id":"r1","provenance":{"source_file":"data/_kb/scrum_reviews.jsonl","sig_hash":"a","recorded_at":"2026-05-01T00:00:00Z"}} {"run_id":"r2","provenance":{"source_file":"data/_kb/scrum_reviews.jsonl","sig_hash":"b","recorded_at":"2026-05-01T00:00:00Z"}} {"run_id":"r3","provenance":{"source_file":"data/_kb/other_source.jsonl","sig_hash":"c","recorded_at":"2026-05-01T00:00:00Z"}} ` if err := os.WriteFile(filepath.Join(dir, "evidence.jsonl"), []byte(jsonl), 0o644); err != nil { t.Fatalf("write: %v", err) } report := RunAuditFull(AuditFullOptions{Root: tmp}) // GoTestModule empty disables phase 1 if report.Metrics["p2_evidence_rows"] != 3 { t.Errorf("p2_evidence_rows: got %d, want 3", report.Metrics["p2_evidence_rows"]) } if report.Metrics["p2_evidence_skips"] != 0 { t.Errorf("p2_evidence_skips: got %d, want 0", report.Metrics["p2_evidence_skips"]) } // Find the tier-1 hit count check. for _, c := range report.Checks { if c.Phase == 2 && c.Name == "tier-1 sources each materialize ≥1 row" { if !c.Passed { t.Errorf("expected tier-1 check to pass with 1/4 sources hit (≥1 = ok), got %+v", c) } if !strings.Contains(c.Actual, "1/4") || !strings.Contains(c.Actual, "scrum_reviews") { t.Errorf("tier-1 actual missing expected counts: %s", c.Actual) } } } } // TestPhase5_FullSummaryFlow seeds reports/distillation/{run_id}/ // with summary.json + 5 stage receipts and asserts phase 5 passes // all required checks. func TestPhase5_FullSummaryFlow(t *testing.T) { tmp := t.TempDir() runID := "test-run-id" runDir := filepath.Join(tmp, "reports", "distillation", runID) if err := os.MkdirAll(runDir, 0o755); err != nil { t.Fatalf("mkdir: %v", err) } // 5 stage receipts (parse-as-JSON only — full schema validation // is Rust-side). for _, s := range []string{"collect", "score", "export-rag", "export-sft", "export-preference"} { if err := os.WriteFile(filepath.Join(runDir, s+".json"), []byte(`{}`), 0o644); err != nil { t.Fatalf("write %s: %v", s, err) } } // summary.json with valid schema_version, 40-char git_commit, 64-char run_hash. summary := `{ "schema_version": 1, "run_id": "test-run-id", "git_commit": "0123456789abcdef0123456789abcdef01234567", "run_hash": "a1b2c3d4e5f60718293a4b5c6d7e8f900112233445566778899aabbccddeeff0", "stages": [{"stage":"collect"},{"stage":"score"},{"stage":"export-rag"},{"stage":"export-sft"},{"stage":"export-preference"}] }` if err := os.WriteFile(filepath.Join(runDir, "summary.json"), []byte(summary), 0o644); err != nil { t.Fatalf("write summary: %v", err) } report := RunAuditFull(AuditFullOptions{Root: tmp}) for _, c := range report.Checks { if c.Phase == 5 && c.Required && !c.Passed { t.Errorf("phase 5 required check failed: %s — actual=%q", c.Name, c.Actual) } } } // TestPhase5_ShortRunHashCaught: a run_hash that isn't 64-char hex // must fail the required check. func TestPhase5_ShortRunHashCaught(t *testing.T) { tmp := t.TempDir() runDir := filepath.Join(tmp, "reports", "distillation", "id") if err := os.MkdirAll(runDir, 0o755); err != nil { t.Fatalf("mkdir: %v", err) } for _, s := range []string{"collect", "score", "export-rag", "export-sft", "export-preference"} { _ = os.WriteFile(filepath.Join(runDir, s+".json"), []byte(`{}`), 0o644) } bad := `{"schema_version":1,"run_id":"id","git_commit":"0123456789abcdef0123456789abcdef01234567","run_hash":"too_short","stages":[]}` _ = os.WriteFile(filepath.Join(runDir, "summary.json"), []byte(bad), 0o644) report := RunAuditFull(AuditFullOptions{Root: tmp}) hashFailed := false for _, c := range report.Checks { if c.Phase == 5 && c.Name == "run_hash is sha256" && !c.Passed { hashFailed = true } } if !hashFailed { t.Errorf("expected run_hash sha256 check to fail on too_short") } } // TestPhase7_ReplayLogReadsFromDisk seeds a replay_runs.jsonl and // asserts phase 7 reports the correct row count. func TestPhase7_ReplayLogReadsFromDisk(t *testing.T) { tmp := t.TempDir() dir := filepath.Join(tmp, "data", "_kb") if err := os.MkdirAll(dir, 0o755); err != nil { t.Fatalf("mkdir: %v", err) } jsonl := `{"task":"a","passed":true} {"task":"b","passed":true} {"task":"c","passed":false} ` if err := os.WriteFile(filepath.Join(dir, "replay_runs.jsonl"), []byte(jsonl), 0o644); err != nil { t.Fatalf("write: %v", err) } report := RunAuditFull(AuditFullOptions{Root: tmp}) for _, c := range report.Checks { if c.Phase == 7 && c.Name == "replay_runs.jsonl exists" { if !c.Passed { t.Errorf("expected pass, got %+v", c) } if !strings.Contains(c.Actual, "3 rows") { t.Errorf("expected '3 rows' in actual, got %s", c.Actual) } } } } // TestPhase7_MalformedTailRowsCaught seeds a replay log with a // trailing malformed row and asserts the structural check fires. func TestPhase7_MalformedTailRowsCaught(t *testing.T) { tmp := t.TempDir() dir := filepath.Join(tmp, "data", "_kb") if err := os.MkdirAll(dir, 0o755); err != nil { t.Fatalf("mkdir: %v", err) } jsonl := `{"task":"a"} {"task":"b"} not valid json garbage ` _ = os.WriteFile(filepath.Join(dir, "replay_runs.jsonl"), []byte(jsonl), 0o644) report := RunAuditFull(AuditFullOptions{Root: tmp}) parseFailed := false for _, c := range report.Checks { if c.Phase == 7 && c.Name == "replay_runs.jsonl tail rows parse as JSON" && !c.Passed { parseFailed = true } } if !parseFailed { t.Errorf("expected tail-row parse check to fail on malformed line") } } // TestRunAuditFull_FullFixtureFlow seeds a complete data layout // and verifies all phases produce the expected metrics + a clean // PASS verdict. Locks the end-to-end orchestration. func TestRunAuditFull_FullFixtureFlow(t *testing.T) { tmp := t.TempDir() // scored-runs: one accepted record (passes phase 3 required check) scoredDir := filepath.Join(tmp, "data", "scored-runs", "2026", "05", "01") if err := os.MkdirAll(scoredDir, 0o755); err != nil { t.Fatalf("mkdir scored: %v", err) } scoredJSONL := `{"category":"accepted","evidence_run_id":"r1","provenance":{"source_file":"data/_kb/scrum_reviews.jsonl","sig_hash":"a1b2c3d4e5f60718293a4b5c6d7e8f900112233445566778899aabbccddeeff0","recorded_at":"2026-05-01T00:00:00Z"}} {"category":"partially_accepted","evidence_run_id":"r2","provenance":{"source_file":"data/_kb/scrum_reviews.jsonl","sig_hash":"a1b2c3d4e5f60718293a4b5c6d7e8f900112233445566778899aabbccddeeff1","recorded_at":"2026-05-01T00:00:00Z"}} {"category":"rejected","evidence_run_id":"r3","provenance":{"source_file":"data/_kb/scrum_reviews.jsonl","sig_hash":"a1b2c3d4e5f60718293a4b5c6d7e8f900112233445566778899aabbccddeeff2","recorded_at":"2026-05-01T00:00:00Z"}} ` if err := os.WriteFile(filepath.Join(scoredDir, "run.jsonl"), []byte(scoredJSONL), 0o644); err != nil { t.Fatalf("write scored: %v", err) } // SFT export: only legal quality scores, valid sig_hash on every row. sftDir := filepath.Join(tmp, "exports", "sft") if err := os.MkdirAll(sftDir, 0o755); err != nil { t.Fatalf("mkdir sft: %v", err) } sftJSONL := `{"quality_score":"accepted","provenance":{"sig_hash":"a1b2c3d4e5f60718293a4b5c6d7e8f900112233445566778899aabbccddeeff0"}} {"quality_score":"partially_accepted","provenance":{"sig_hash":"a1b2c3d4e5f60718293a4b5c6d7e8f900112233445566778899aabbccddeeff1"}} ` if err := os.WriteFile(filepath.Join(sftDir, "instruction_response.jsonl"), []byte(sftJSONL), 0o644); err != nil { t.Fatalf("write sft: %v", err) } // RAG: no rejected leaks ragDir := filepath.Join(tmp, "exports", "rag") if err := os.MkdirAll(ragDir, 0o755); err != nil { t.Fatalf("mkdir rag: %v", err) } ragJSONL := `{"success_score":"accepted","provenance":{"sig_hash":"a1b2c3d4e5f60718293a4b5c6d7e8f900112233445566778899aabbccddeeff0"}} ` if err := os.WriteFile(filepath.Join(ragDir, "playbooks.jsonl"), []byte(ragJSONL), 0o644); err != nil { t.Fatalf("write rag: %v", err) } // Preference: distinct chosen vs rejected, no self-pairs prefDir := filepath.Join(tmp, "exports", "preference") if err := os.MkdirAll(prefDir, 0o755); err != nil { t.Fatalf("mkdir pref: %v", err) } prefJSONL := `{"chosen_run_id":"a","rejected_run_id":"b","chosen":"good","rejected":"bad","provenance":{"sig_hash":"a1b2c3d4e5f60718293a4b5c6d7e8f900112233445566778899aabbccddeeff0"}} ` if err := os.WriteFile(filepath.Join(prefDir, "chosen_rejected.jsonl"), []byte(prefJSONL), 0o644); err != nil { t.Fatalf("write pref: %v", err) } // Phase 2: evidence directory with at least one row. evidenceDir := filepath.Join(tmp, "data", "evidence", "2026", "05", "01") if err := os.MkdirAll(evidenceDir, 0o755); err != nil { t.Fatalf("mkdir evidence: %v", err) } evidenceJSONL := `{"run_id":"r1","provenance":{"source_file":"data/_kb/scrum_reviews.jsonl","sig_hash":"a","recorded_at":"2026-05-01T00:00:00Z"}} ` if err := os.WriteFile(filepath.Join(evidenceDir, "evidence.jsonl"), []byte(evidenceJSONL), 0o644); err != nil { t.Fatalf("write evidence: %v", err) } // Phase 5: reports/distillation/{run_id}/ with summary + 5 receipts. runDir := filepath.Join(tmp, "reports", "distillation", "test-run") if err := os.MkdirAll(runDir, 0o755); err != nil { t.Fatalf("mkdir runDir: %v", err) } for _, s := range []string{"collect", "score", "export-rag", "export-sft", "export-preference"} { _ = os.WriteFile(filepath.Join(runDir, s+".json"), []byte(`{}`), 0o644) } summaryJSON := `{"schema_version":1,"run_id":"test-run","git_commit":"0123456789abcdef0123456789abcdef01234567","run_hash":"a1b2c3d4e5f60718293a4b5c6d7e8f900112233445566778899aabbccddeeff0","stages":[]}` _ = os.WriteFile(filepath.Join(runDir, "summary.json"), []byte(summaryJSON), 0o644) report := RunAuditFull(AuditFullOptions{Root: tmp}) if report.Failed != 0 { t.Errorf("clean fixture should have 0 required failures, got %d", report.Failed) for _, c := range report.Checks { if c.Required && !c.Passed { t.Logf(" failed: phase=%d name=%q actual=%q", c.Phase, c.Name, c.Actual) } } } // Metrics populated correctly if report.Metrics["p3_accepted"] != 1 { t.Errorf("p3_accepted: got %d, want 1", report.Metrics["p3_accepted"]) } if report.Metrics["p3_partial"] != 1 { t.Errorf("p3_partial: got %d, want 1", report.Metrics["p3_partial"]) } if report.Metrics["p3_rejected"] != 1 { t.Errorf("p3_rejected: got %d, want 1", report.Metrics["p3_rejected"]) } if report.Metrics["p4_sft_rows"] != 2 { t.Errorf("p4_sft_rows: got %d, want 2", report.Metrics["p4_sft_rows"]) } if report.Metrics["p4_rag_rows"] != 1 { t.Errorf("p4_rag_rows: got %d, want 1", report.Metrics["p4_rag_rows"]) } if report.Metrics["p4_pref_pairs"] != 1 { t.Errorf("p4_pref_pairs: got %d, want 1", report.Metrics["p4_pref_pairs"]) } } // TestPhase4_SftFirewallCatchesRejected: contamination must never // leak into SFT export. Test seeds a row with a forbidden // quality_score and asserts the firewall flags it. func TestPhase4_SftFirewallCatchesRejected(t *testing.T) { tmp := t.TempDir() sftDir := filepath.Join(tmp, "exports", "sft") if err := os.MkdirAll(sftDir, 0o755); err != nil { t.Fatalf("mkdir: %v", err) } bad := `{"quality_score":"rejected","provenance":{"sig_hash":"a1b2c3d4e5f60718293a4b5c6d7e8f900112233445566778899aabbccddeeff0"}} ` if err := os.WriteFile(filepath.Join(sftDir, "instruction_response.jsonl"), []byte(bad), 0o644); err != nil { t.Fatalf("write: %v", err) } report := RunAuditFull(AuditFullOptions{Root: tmp}) found := false for _, c := range report.Checks { if c.Phase == 4 && strings.Contains(c.Name, "SFT contamination firewall") { if c.Passed { t.Errorf("firewall should fail on rejected SFT row, but check passed") } if c.Actual != "1" { t.Errorf("firewall actual: got %q, want '1'", c.Actual) } found = true } } if !found { t.Errorf("firewall check not present in report") } } // TestPhase4_PreferenceSelfPairCaught: same chosen + rejected run_id // is structural noise and must be flagged. func TestPhase4_PreferenceSelfPairCaught(t *testing.T) { tmp := t.TempDir() prefDir := filepath.Join(tmp, "exports", "preference") if err := os.MkdirAll(prefDir, 0o755); err != nil { t.Fatalf("mkdir: %v", err) } bad := `{"chosen_run_id":"X","rejected_run_id":"X","chosen":"a","rejected":"b","provenance":{"sig_hash":"a1b2c3d4e5f60718293a4b5c6d7e8f900112233445566778899aabbccddeeff0"}} ` if err := os.WriteFile(filepath.Join(prefDir, "chosen_rejected.jsonl"), []byte(bad), 0o644); err != nil { t.Fatalf("write: %v", err) } report := RunAuditFull(AuditFullOptions{Root: tmp}) found := false for _, c := range report.Checks { if c.Phase == 4 && strings.Contains(c.Name, "self-pairs") { if c.Passed { t.Errorf("self-pair check should fail, but passed") } found = true } } if !found { t.Errorf("self-pair check not present in report") } } // TestPhase4_ProvenanceRequiresValidSha256: bad sig_hash must be // flagged. Locks the regex shape — only 64-char lowercase hex. func TestPhase4_ProvenanceRequiresValidSha256(t *testing.T) { tmp := t.TempDir() sftDir := filepath.Join(tmp, "exports", "sft") if err := os.MkdirAll(sftDir, 0o755); err != nil { t.Fatalf("mkdir: %v", err) } // Three rows: one valid, one wrong-length, one wrong-charset (uppercase). bad := `{"quality_score":"accepted","provenance":{"sig_hash":"a1b2c3d4e5f60718293a4b5c6d7e8f900112233445566778899aabbccddeeff0"}} {"quality_score":"accepted","provenance":{"sig_hash":"too_short"}} {"quality_score":"accepted","provenance":{"sig_hash":"A1B2C3D4E5F60718293A4B5C6D7E8F900112233445566778899AABBCCDDEEFF0"}} ` if err := os.WriteFile(filepath.Join(sftDir, "instruction_response.jsonl"), []byte(bad), 0o644); err != nil { t.Fatalf("write: %v", err) } report := RunAuditFull(AuditFullOptions{Root: tmp}) for _, c := range report.Checks { if c.Phase == 4 && strings.Contains(c.Name, "sig_hash") { if c.Actual != "2 missing" { t.Errorf("provenance check: got actual=%q, want '2 missing'", c.Actual) } if c.Passed { t.Errorf("provenance check should fail with 2 bad sig_hashes") } } } } // TestFormatAuditFullReport_RendersCheckTable: smoke-test the // Markdown formatter — operators should see the right verdict + // per-phase rows. func TestFormatAuditFullReport_RendersCheckTable(t *testing.T) { report := PhaseCheckReport{ GitHEAD: "deadbeef", Checks: []PhaseCheck{ {Phase: 0, Name: "test check", Expected: "x", Actual: "x", Passed: true, Required: true}, {Phase: 4, Name: "fail check", Expected: "0", Actual: "5", Passed: false, Required: true}, }, Metrics: map[string]int64{"p3_accepted": 42, "p4_sft_rows": 17}, Failed: 1, Skipped: 4, } out := FormatAuditFullReport(report) for _, want := range []string{"FAIL", "deadbeef", "test check", "fail check", "p3_accepted", "42", "deferred"} { if !strings.Contains(out, want) { t.Errorf("expected %q in formatted report:\n%s", want, out) } } }