package distillation import ( "errors" "strings" "testing" ) func mkRecord(sourceFile string) EvidenceRecord { return EvidenceRecord{ RunID: "run-1", TaskID: "task-1", Timestamp: "2026-04-29T12:00:00Z", SchemaVersion: EvidenceSchemaVersion, Provenance: Provenance{ SourceFile: sourceFile, SigHash: "deadbeef", RecordedAt: "2026-04-29T12:00:01Z", }, } } func TestSourceClassFor(t *testing.T) { cases := []struct { path string want sourceClass }{ {"data/_kb/scrum_reviews.jsonl", classVerdict}, {"data/_kb/observer_reviews.jsonl", classVerdict}, {"data/_kb/audits.jsonl", classVerdict}, {"data/_kb/contract_analyses.jsonl", classVerdict}, {"data/_kb/auto_apply.jsonl", classTelemetry}, {"data/_kb/outcomes.jsonl", classTelemetry}, {"data/_kb/mode_experiments.jsonl", classTelemetry}, {"data/_kb/distilled_facts.jsonl", classExtraction}, {"data/_kb/audit_facts.jsonl", classExtraction}, {"data/_kb/observer_escalations.jsonl", classExtraction}, {"data/_kb/wholly_unknown.jsonl", classExtraction}, // unknown → extraction (conservative) } for _, c := range cases { got := sourceClassFor(c.path) if got != c.want { t.Errorf("sourceClassFor(%q): want %q, got %q", c.path, c.want, got) } } } func TestScoreScrumReview(t *testing.T) { cases := []struct { name string successMarkers []string wantCategory ScoreCategory wantReasonSub string }{ { name: "first attempt → accepted", successMarkers: []string{"accepted_on_attempt_1"}, wantCategory: CategoryAccepted, wantReasonSub: "first attempt", }, { name: "second attempt → partial", successMarkers: []string{"accepted_on_attempt_2"}, wantCategory: CategoryPartiallyAccepted, wantReasonSub: "after 2 attempts", }, { name: "fourth attempt → partial (high-cost)", successMarkers: []string{"accepted_on_attempt_4"}, wantCategory: CategoryPartiallyAccepted, wantReasonSub: "high-cost", }, { name: "missing marker → needs_human_review", successMarkers: []string{}, wantCategory: CategoryNeedsHumanReview, wantReasonSub: "missing accepted_on_attempt", }, } for _, c := range cases { t.Run(c.name, func(t *testing.T) { rec := mkRecord("data/_kb/scrum_reviews.jsonl") rec.SuccessMarkers = c.successMarkers out := ScoreRecord(rec) if out.Category != c.wantCategory { t.Errorf("category: want %q, got %q (reasons=%v)", c.wantCategory, out.Category, out.Reasons) } if !reasonsContain(out.Reasons, c.wantReasonSub) { t.Errorf("reasons missing %q: %v", c.wantReasonSub, out.Reasons) } }) } } func TestScoreObserverReview(t *testing.T) { cases := []struct { verdict ObserverVerdict want ScoreCategory }{ {VerdictAccept, CategoryAccepted}, {VerdictReject, CategoryRejected}, {VerdictCycle, CategoryPartiallyAccepted}, {"", CategoryNeedsHumanReview}, {"weird-verdict", CategoryNeedsHumanReview}, } for _, c := range cases { rec := mkRecord("data/_kb/observer_reviews.jsonl") rec.ObserverVerdict = c.verdict out := ScoreRecord(rec) if out.Category != c.want { t.Errorf("verdict=%q: want %q, got %q", c.verdict, c.want, out.Category) } } } func TestScoreAudit_LegacyAndSeverityMarkers(t *testing.T) { cases := []struct { name string succ []string fail []string want ScoreCategory }{ {"legacy approved", []string{"approved"}, nil, CategoryAccepted}, {"legacy blocked", nil, []string{"blocked"}, CategoryRejected}, {"legacy request_changes", nil, []string{"request_changes"}, CategoryPartiallyAccepted}, {"severity_low → accepted", []string{"audit_severity_low"}, nil, CategoryAccepted}, {"severity_info → accepted", []string{"audit_severity_info"}, nil, CategoryAccepted}, {"severity_medium fail → partial", nil, []string{"audit_severity_medium"}, CategoryPartiallyAccepted}, {"severity_high → rejected", nil, []string{"audit_severity_high"}, CategoryRejected}, {"severity_critical → rejected", nil, []string{"audit_severity_critical"}, CategoryRejected}, {"no markers", nil, nil, CategoryNeedsHumanReview}, } for _, c := range cases { t.Run(c.name, func(t *testing.T) { rec := mkRecord("data/_kb/audits.jsonl") rec.SuccessMarkers = c.succ rec.FailureMarkers = c.fail out := ScoreRecord(rec) if out.Category != c.want { t.Errorf("want %q, got %q (reasons=%v)", c.want, out.Category, out.Reasons) } }) } } func TestScoreAutoApply(t *testing.T) { cases := []struct { name string succ []string fail []string want ScoreCategory }{ {"committed → accepted", []string{"committed"}, nil, CategoryAccepted}, {"reverted_build_red → rejected", nil, []string{"reverted_build_red"}, CategoryRejected}, {"reverted other → rejected", nil, []string{"reverted_warning_count_up"}, CategoryRejected}, {"no signal → needs_human", nil, nil, CategoryNeedsHumanReview}, } for _, c := range cases { t.Run(c.name, func(t *testing.T) { rec := mkRecord("data/_kb/auto_apply.jsonl") rec.SuccessMarkers = c.succ rec.FailureMarkers = c.fail out := ScoreRecord(rec) if out.Category != c.want { t.Errorf("want %q, got %q", c.want, out.Category) } }) } } func TestScoreOutcomes(t *testing.T) { rec := mkRecord("data/_kb/outcomes.jsonl") rec.SuccessMarkers = []string{"all_events_ok"} if out := ScoreRecord(rec); out.Category != CategoryAccepted { t.Errorf("all_events_ok: want accepted, got %q", out.Category) } rec2 := mkRecord("data/_kb/outcomes.jsonl") rec2.ValidationResults = map[string]any{"gap_signals": float64(2)} if out := ScoreRecord(rec2); out.Category != CategoryPartiallyAccepted { t.Errorf("gap_signals=2: want partial, got %q (reasons=%v)", out.Category, out.Reasons) } rec3 := mkRecord("data/_kb/outcomes.jsonl") if out := ScoreRecord(rec3); out.Category != CategoryNeedsHumanReview { t.Errorf("no signal: want needs_human, got %q", out.Category) } } func TestScoreModeExperiment(t *testing.T) { rec := mkRecord("data/_kb/mode_experiments.jsonl") rec.Text = "" if out := ScoreRecord(rec); out.Category != CategoryRejected { t.Errorf("empty text: want rejected, got %q", out.Category) } rec.Text = "real response" rec.LatencyMs = 130_000 if out := ScoreRecord(rec); out.Category != CategoryPartiallyAccepted { t.Errorf("over latency cap: want partial, got %q", out.Category) } rec.LatencyMs = 5000 if out := ScoreRecord(rec); out.Category != CategoryNeedsHumanReview { t.Errorf("normal: want needs_human (verdict not yet wired), got %q", out.Category) } } func TestScoreExtraction_Defaults(t *testing.T) { for _, src := range []string{ "data/_kb/distilled_facts.jsonl", "data/_kb/distilled_procedures.jsonl", "data/_kb/audit_facts.jsonl", "data/_kb/observer_escalations.jsonl", } { rec := mkRecord(src) out := ScoreRecord(rec) if out.Category != CategoryNeedsHumanReview { t.Errorf("%s: want needs_human_review, got %q", src, out.Category) } } } // ─── Contamination firewall — the safety-critical guarantee ─────── func TestValidateSftSample_RejectsContaminationCategories(t *testing.T) { for _, contaminated := range []SftQualityScore{ SftQualityScore("rejected"), SftQualityScore("needs_human_review"), } { s := goodSftSample() s.QualityScore = contaminated err := ValidateSftSample(s) if err == nil { t.Errorf("contaminated quality_score=%q should fail validation", contaminated) continue } if !errors.Is(err, ErrSftContamination) { t.Errorf("contaminated %q: want errors.Is(err, ErrSftContamination), got %v", contaminated, err) } } } func TestValidateSftSample_AcceptsLegalCategories(t *testing.T) { for _, legal := range []SftQualityScore{SftQualityAccepted, SftQualityPartiallyAccepted} { s := goodSftSample() s.QualityScore = legal if err := ValidateSftSample(s); err != nil { t.Errorf("legal quality_score=%q failed: %v", legal, err) } } } func TestValidateSftSample_RejectsTypoCategory(t *testing.T) { s := goodSftSample() s.QualityScore = "approved" // close to "accepted" but wrong err := ValidateSftSample(s) if err == nil { t.Fatal("typo category should fail validation") } // Typo is NOT contamination — should be a regular ValidationError, // not the firewall sentinel. This distinguishes "you typo'd" from // "you broke the spec." if errors.Is(err, ErrSftContamination) { t.Error("typo should not surface as ErrSftContamination") } } func TestValidateSftSample_RejectsEmptyPair(t *testing.T) { s := goodSftSample() s.Instruction = " " if err := ValidateSftSample(s); err == nil { t.Error("whitespace-only instruction should fail") } s2 := goodSftSample() s2.Response = "" if err := ValidateSftSample(s2); err == nil { t.Error("empty response should fail") } } func TestValidateScoredRun_ReasonsRequired(t *testing.T) { r := ScoredRun{ SchemaVersion: ScoredRunSchemaVersion, EvidenceRunID: "x", EvidenceTaskID: "y", Category: CategoryAccepted, Reasons: nil, // empty — must fail ScoredAt: "2026-04-29T12:00:00Z", ScorerVersion: ScorerVersion, Provenance: Provenance{ SourceFile: "data/_kb/scrum_reviews.jsonl", SigHash: "abc", RecordedAt: "2026-04-29T12:00:00Z", }, } err := ValidateScoredRun(r) if err == nil { t.Fatal("empty reasons should fail") } if !strings.Contains(err.Error(), "reasons") { t.Errorf("error should mention reasons: %v", err) } } func TestBuildScoredRun_DeterministicSigHash(t *testing.T) { rec := mkRecord("data/_kb/scrum_reviews.jsonl") rec.SuccessMarkers = []string{"accepted_on_attempt_1"} r1, err := BuildScoredRun(rec, "data/scored-runs/2026/04/29/x.jsonl", 0, "2026-04-29T12:00:00Z") if err != nil { t.Fatal(err) } r2, err := BuildScoredRun(rec, "data/scored-runs/2026/04/29/x.jsonl", 0, "2026-04-29T12:00:00Z") if err != nil { t.Fatal(err) } if r1.Provenance.SigHash != r2.Provenance.SigHash { t.Errorf("identical EvidenceRecord should produce identical sig_hash: %s vs %s", r1.Provenance.SigHash, r2.Provenance.SigHash) } if r1.Category != CategoryAccepted { t.Errorf("scored category: %q", r1.Category) } if r1.ScorerVersion != ScorerVersion { t.Errorf("scorer version stamped wrong: %q", r1.ScorerVersion) } } func TestScoreRecord_PureFunction_NoMutationOfInput(t *testing.T) { // Belt-and-braces: the contract says "NO mutable state, identical // input → identical output forever." Verify by scoring the same // record twice and ensuring the input hasn't been touched. rec := mkRecord("data/_kb/scrum_reviews.jsonl") rec.SuccessMarkers = []string{"accepted_on_attempt_2"} original := rec out1 := ScoreRecord(rec) out2 := ScoreRecord(rec) if rec.RunID != original.RunID || len(rec.SuccessMarkers) != 1 { t.Error("ScoreRecord mutated its input") } if out1.Category != out2.Category { t.Error("ScoreRecord is non-deterministic") } } // ─── Helpers ───────────────────────────────────────────────────── func goodSftSample() SftSample { return SftSample{ SchemaVersion: SftSampleSchemaVersion, ID: "sft-1", Instruction: "summarize the diff", Context: "diff body...", Response: "the diff adds a function", SourceRunID: "run-1", QualityScore: SftQualityAccepted, CreatedAt: "2026-04-29T12:00:00Z", Provenance: Provenance{ SourceFile: "data/scored-runs/2026/04/29/x.jsonl", SigHash: "deadbeef", RecordedAt: "2026-04-29T12:00:01Z", }, } } func reasonsContain(reasons []string, sub string) bool { for _, r := range reasons { if strings.Contains(r, sub) { return true } } return false }