E (partial): distillation port — scorer + contamination firewall

First slice of the Rust v1.0.0 distillation substrate (e7636f2) ported to Go per ADR-001 #4 (port LOGIC, not bit-identical reproducibility). This commit lands the LOAD-BEARING pieces named in project_distillation_substrate.md memory: - The deterministic Success Scorer (8 sub-scorers + dispatch) - The contamination firewall on SFT samples (the "non-negotiable" spec property: rejected/needs_human_review NEVER ship to SFT) - All on-wire types + validators for ScoredRun, SftSample, EvidenceRecord with Provenance Files: internal/distillation/types.go — types + ScorerVersion + SftNever + ValidateScoredRun + ValidateSftSample internal/distillation/scorer.go — ScoreRecord + 8 class scorers + BuildScoredRun (deterministic) internal/distillation/scorer_test.go — ~40 test cases: - source-class dispatch (verdict / telemetry / extraction) - scrum_review (4 attempt cases) - observer_review (5 verdict cases) - audit (legacy + severity, 9 cases) - auto_apply (4 cases) - outcomes / mode_experiment / extraction - CONTAMINATION FIREWALL: ErrSftContamination sentinel fires on rejected/needs_human_review, distinct from typo errors - empty-pair guard (instruction/response trim != "") - reasons-required ScoredRun validation - deterministic sig_hash on identical input - purity check (input not mutated, repeatable output) Per the 2026-04-29 cross-lineage scrum's discipline: false-positive findings would be dismissed inline (none in this commit). Real findings would be addressed before merge — but this is greenfield port code reviewed against its Rust source line-by-line, which the test suite encodes as truth tables. Explicitly DEFERRED to follow-up commits: - Materialization layer (jsonl read/write, date-partitioned storage in data/scored-runs/YYYY/MM/DD/, evidence index) - SFT exporter (file iteration + filtering — the SCORING firewall is here; the EXPORT firewall is the next layer) - export_preference, export_rag (other export shapes) - Acceptance harness (16/16 acceptance gate that locks v1.0.0) - replay, receipts, build_evidence_index, transforms The scorer + firewall validator are pure functions — operational tooling layers on top without changing the deterministic logic the downstream learning loop depends on. The Go ScorerVersion stays at v1.0.0 to match the Rust e7636f2 baseline; bumping in the Go materialization commit is reserved for the next scoring-rule change, NOT the port itself. 15-smoke regression all green. vet clean. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-29 20:04:29 -05:00 · 2026-04-29 20:04:29 -05:00 · 57d0df125d
commit 57d0df125d
parent 7f42089521
3 changed files with 1269 additions and 0 deletions
--- a/internal/distillation/scorer.go
+++ b/internal/distillation/scorer.go
@ -0,0 +1,410 @@
 package distillation
 // scorer.go — pure deterministic Success Scorer (port of Rust
 // scripts/distillation/scorer.ts at e7636f2).
 //
 // Takes one EvidenceRecord, returns category + reasons + sub_scores.
 // NO I/O, NO LLM, NO clock reads, NO mutable state. Identical input
 // → identical output forever. Same contract as the Rust source —
 // future scoring-rule changes bump ScorerVersion atomically with
 // the logic.
 //
 // Three-class strategy mirrors the Rust source taxonomy
 // (docs/recon/local-distillation-recon.md + data/_kb/evidence_health.md):
 //
 //   CLASS A — verdict-bearing
 //     scrum_reviews, observer_reviews, audits, contract_analyses
 //     Direct scoring from existing markers / observer_verdict
 //
 //   CLASS B — telemetry-rich
 //     auto_apply, outcomes, mode_experiments
 //     Markers exist but partial; needs_human_review fills the gap
 //
 //   CLASS C — pure-extraction (no native scoring signal)
 //     distilled_*, audit_facts, observer_escalations
 //     Default needs_human_review; v2 will JOIN to parent verdict
 import (
 	"crypto/sha256"
 	"encoding/hex"
 	"encoding/json"
 	"fmt"
 	"strconv"
 	"strings"
 )
 // sourceClass categorizes an EvidenceRecord's source_file for the
 // scorer's three-class dispatch.
 type sourceClass string
 const (
 	classVerdict    sourceClass = "verdict"
 	classTelemetry  sourceClass = "telemetry"
 	classExtraction sourceClass = "extraction"
 )
 // sourceClassFor maps a source_file (from provenance) to a class.
 // Centralized so adding a new source is a one-line change. Mirrors
 // the Rust switch on the stem (data/_kb/X.jsonl → X).
 func sourceClassFor(sourceFile string) sourceClass {
 	stem := strings.TrimSuffix(strings.TrimPrefix(sourceFile, "data/_kb/"), ".jsonl")
 	switch stem {
 	case "scrum_reviews", "observer_reviews", "audits", "contract_analyses":
 		return classVerdict
 	case "auto_apply", "outcomes", "mode_experiments":
 		return classTelemetry
 	case "distilled_facts", "distilled_procedures", "distilled_config_hints",
 		"audit_facts", "observer_escalations":
 		return classExtraction
 	default:
 		// Unknown source → most conservative path (forces
 		// needs_human_review until a transform is added).
 		return classExtraction
 	}
 }
 // stemOf extracts the stable corpus identifier from a source_file.
 // E.g. "data/_kb/scrum_reviews.jsonl" → "scrum_reviews".
 func stemOf(sourceFile string) string {
 	return strings.TrimSuffix(strings.TrimPrefix(sourceFile, "data/_kb/"), ".jsonl")
 }
 // ScoreOutput is the scorer's return shape — category + reasons +
 // the captured sub-signals. Reasons is always non-empty (validator
 // requires it).
 type ScoreOutput struct {
 	Category  ScoreCategory
 	Reasons   []string
 	SubScores *SubScores
 }
 // ScoreRecord dispatches an EvidenceRecord to the appropriate class
 // scorer and returns the verdict + reasons + sub-scores. Pure
 // function. Caller wraps the output in a ScoredRun via BuildScoredRun
 // for the on-wire shape.
 func ScoreRecord(rec EvidenceRecord) ScoreOutput {
 	cls := sourceClassFor(rec.Provenance.SourceFile)
 	stem := stemOf(rec.Provenance.SourceFile)
 	switch cls {
 	case classVerdict:
 		switch stem {
 		case "scrum_reviews":
 			return scoreScrumReview(rec)
 		case "observer_reviews":
 			return scoreObserverReview(rec)
 		case "audits":
 			return scoreAudit(rec)
 		case "contract_analyses":
 			return scoreContractAnalysis(rec)
 		}
 	case classTelemetry:
 		switch stem {
 		case "auto_apply":
 			return scoreAutoApply(rec)
 		case "outcomes":
 			return scoreOutcomes(rec)
 		case "mode_experiments":
 			return scoreModeExperiment(rec)
 		}
 	}
 	return scoreExtraction()
 }
 // BuildScoredRun composes a complete ScoredRun for persistence.
 // Caller supplies recorded_at + the source file path/line offset.
 // SigHash is computed deterministically from the EvidenceRecord
 // JSON; ScoredRun traces to the materialized evidence row.
 func BuildScoredRun(rec EvidenceRecord, sourceFile string, lineOffset int64, recordedAt string) (ScoredRun, error) {
 	out := ScoreRecord(rec)
 	sig, err := canonicalSha256(rec)
 	if err != nil {
 		return ScoredRun{}, fmt.Errorf("scoredrun sig hash: %w", err)
 	}
 	return ScoredRun{
 		SchemaVersion:  ScoredRunSchemaVersion,
 		EvidenceRunID:  rec.RunID,
 		EvidenceTaskID: rec.TaskID,
 		Category:       out.Category,
 		Reasons:        out.Reasons,
 		ScoredAt:       recordedAt,
 		ScorerVersion:  ScorerVersion,
 		SubScores:      out.SubScores,
 		Provenance: Provenance{
 			SourceFile: sourceFile,
 			LineOffset: lineOffset,
 			SigHash:    sig,
 			RecordedAt: recordedAt,
 		},
 	}, nil
 }
 // canonicalSha256 hashes a value's canonical JSON encoding. Used
 // for ScoredRun.Provenance.SigHash. Matches the Rust pattern of
 // "hash the structured object, not the raw source bytes" so
 // re-materialization with same logic produces same hash.
 func canonicalSha256(v any) (string, error) {
 	bs, err := json.Marshal(v)
 	if err != nil {
 		return "", err
 	}
 	sum := sha256.Sum256(bs)
 	return hex.EncodeToString(sum[:]), nil
 }
 // ─── Class A: verdict-bearing ────────────────────────────────────
 func scoreScrumReview(r EvidenceRecord) ScoreOutput {
 	subs := &SubScores{}
 	successMarker := findPrefix(r.SuccessMarkers, "accepted_on_attempt_")
 	if successMarker == "" {
 		return ScoreOutput{
 			Category: CategoryNeedsHumanReview,
 			Reasons:  []string{"scrum_review missing accepted_on_attempt_* success marker"},
 			SubScores: subs,
 		}
 	}
 	attemptStr := strings.TrimPrefix(successMarker, "accepted_on_attempt_")
 	attempt, err := strconv.Atoi(attemptStr)
 	if err != nil {
 		return ScoreOutput{
 			Category: CategoryNeedsHumanReview,
 			Reasons:  []string{"scrum_review accepted_on_attempt_* marker has non-integer suffix: " + attemptStr},
 			SubScores: subs,
 		}
 	}
 	subs.AcceptedOnAttempt = &attempt
 	switch {
 	case attempt == 1:
 		return ScoreOutput{
 			Category: CategoryAccepted,
 			Reasons:  []string{"scrum: accepted on first attempt"},
 			SubScores: subs,
 		}
 	case attempt <= 3:
 		return ScoreOutput{
 			Category: CategoryPartiallyAccepted,
 			Reasons:  []string{fmt.Sprintf("scrum: accepted after %d attempts", attempt)},
 			SubScores: subs,
 		}
 	default:
 		return ScoreOutput{
 			Category: CategoryPartiallyAccepted,
 			Reasons:  []string{fmt.Sprintf("scrum: accepted only after %d attempts (high-cost path)", attempt)},
 			SubScores: subs,
 		}
 	}
 }
 func scoreObserverReview(r EvidenceRecord) ScoreOutput {
 	subs := &SubScores{}
 	switch r.ObserverVerdict {
 	case VerdictAccept:
 		subs.ObserverVerdict = VerdictAccept
 		return ScoreOutput{
 			Category: CategoryAccepted,
 			Reasons:  []string{"observer accepted the reviewed attempt"},
 			SubScores: subs,
 		}
 	case VerdictReject:
 		subs.ObserverVerdict = VerdictReject
 		return ScoreOutput{
 			Category: CategoryRejected,
 			Reasons:  []string{"observer rejected the reviewed attempt"},
 			SubScores: subs,
 		}
 	case VerdictCycle:
 		subs.ObserverVerdict = VerdictCycle
 		return ScoreOutput{
 			Category: CategoryPartiallyAccepted,
 			Reasons:  []string{"observer flagged the attempt as cycling — partial signal"},
 			SubScores: subs,
 		}
 	default:
 		return ScoreOutput{
 			Category: CategoryNeedsHumanReview,
 			Reasons:  []string{fmt.Sprintf("observer_verdict missing or unrecognized: %q", r.ObserverVerdict)},
 			SubScores: subs,
 		}
 	}
 }
 func scoreAudit(r EvidenceRecord) ScoreOutput {
 	subs := &SubScores{}
 	succ := r.SuccessMarkers
 	fail := r.FailureMarkers
 	// Legacy markers (back-compat with pre-fix materializations).
 	if contains(succ, "approved") {
 		return ScoreOutput{Category: CategoryAccepted,
 			Reasons: []string{"audit overall=approved (legacy marker)"}, SubScores: subs}
 	}
 	if contains(fail, "blocked") {
 		return ScoreOutput{Category: CategoryRejected,
 			Reasons: []string{"audit overall=block (legacy marker)"}, SubScores: subs}
 	}
 	if contains(fail, "request_changes") {
 		return ScoreOutput{Category: CategoryPartiallyAccepted,
 			Reasons: []string{"audit overall=request_changes (legacy marker)"}, SubScores: subs}
 	}
 	// Severity-derived markers (Phase 2 transform).
 	sevSucc := findPrefix(succ, "audit_severity_")
 	sevFail := findPrefix(fail, "audit_severity_")
 	if sevSucc != "" {
 		return ScoreOutput{Category: CategoryAccepted,
 			Reasons: []string{sevSucc + " → minor finding"}, SubScores: subs}
 	}
 	if sevFail == "audit_severity_medium" {
 		return ScoreOutput{Category: CategoryPartiallyAccepted,
 			Reasons: []string{"audit_severity_medium → finding warrants review"}, SubScores: subs}
 	}
 	if sevFail == "audit_severity_high" || sevFail == "audit_severity_critical" {
 		return ScoreOutput{Category: CategoryRejected,
 			Reasons: []string{sevFail + " → blocking finding"}, SubScores: subs}
 	}
 	return ScoreOutput{Category: CategoryNeedsHumanReview,
 		Reasons: []string{"audit row has no severity or overall marker"}, SubScores: subs}
 }
 func scoreContractAnalysis(r EvidenceRecord) ScoreOutput {
 	subs := &SubScores{}
 	// failure_markers takes precedence: explicit rejection beats absent verdict.
 	if contains(r.FailureMarkers, "observer_rejected") || r.ObserverVerdict == VerdictReject {
 		subs.ObserverVerdict = VerdictReject
 		return ScoreOutput{Category: CategoryRejected,
 			Reasons: []string{"contract analysis: observer rejected"}, SubScores: subs}
 	}
 	switch r.ObserverVerdict {
 	case VerdictAccept:
 		subs.ObserverVerdict = VerdictAccept
 		return ScoreOutput{Category: CategoryAccepted,
 			Reasons: []string{"contract analysis: observer accepted"}, SubScores: subs}
 	case VerdictCycle:
 		subs.ObserverVerdict = VerdictCycle
 		return ScoreOutput{Category: CategoryPartiallyAccepted,
 			Reasons: []string{"contract analysis: observer cycled (partial)"}, SubScores: subs}
 	}
 	return ScoreOutput{Category: CategoryNeedsHumanReview,
 		Reasons: []string{"contract analysis: no observer verdict signal"}, SubScores: subs}
 }
 // ─── Class B: telemetry-rich ─────────────────────────────────────
 func scoreAutoApply(r EvidenceRecord) ScoreOutput {
 	subs := &SubScores{}
 	if contains(r.SuccessMarkers, "committed") {
 		t := true
 		subs.CargoGreen = &t
 		return ScoreOutput{Category: CategoryAccepted,
 			Reasons: []string{"auto_apply: patch committed (cargo green + warning baseline + rationale alignment passed)"},
 			SubScores: subs}
 	}
 	reverted := findContaining(r.FailureMarkers, "reverted")
 	if reverted != "" {
 		if strings.Contains(reverted, "build_red") {
 			f := false
 			subs.CargoGreen = &f
 		}
 		return ScoreOutput{Category: CategoryRejected,
 			Reasons: []string{"auto_apply: " + reverted}, SubScores: subs}
 	}
 	return ScoreOutput{Category: CategoryNeedsHumanReview,
 		Reasons: []string{"auto_apply: no commit + no revert (likely no_patches or dry_run)"},
 		SubScores: subs}
 }
 func scoreOutcomes(r EvidenceRecord) ScoreOutput {
 	subs := &SubScores{}
 	if contains(r.SuccessMarkers, "all_events_ok") {
 		return ScoreOutput{Category: CategoryAccepted,
 			Reasons: []string{"outcomes: all events ok"}, SubScores: subs}
 	}
 	if gap := numericFromMap(r.ValidationResults, "gap_signals"); gap > 0 {
 		return ScoreOutput{Category: CategoryPartiallyAccepted,
 			Reasons: []string{fmt.Sprintf("outcomes: %d gap signal(s) detected", int(gap))},
 			SubScores: subs}
 	}
 	return ScoreOutput{Category: CategoryNeedsHumanReview,
 		Reasons: []string{"outcomes: no decisive marker — defer to human"},
 		SubScores: subs}
 }
 func scoreModeExperiment(r EvidenceRecord) ScoreOutput {
 	subs := &SubScores{}
 	if strings.TrimSpace(r.Text) == "" {
 		return ScoreOutput{Category: CategoryRejected,
 			Reasons: []string{"mode_experiment: empty response text"}, SubScores: subs}
 	}
 	if r.LatencyMs > 120_000 {
 		return ScoreOutput{Category: CategoryPartiallyAccepted,
 			Reasons: []string{fmt.Sprintf("mode_experiment: latency %dms exceeds 2-minute soft cap", r.LatencyMs)},
 			SubScores: subs}
 	}
 	return ScoreOutput{Category: CategoryNeedsHumanReview,
 		Reasons: []string{"mode_experiment: response present, latency within bounds; verdict not yet wired"},
 		SubScores: subs}
 }
 // ─── Class C: pure-extraction ────────────────────────────────────
 func scoreExtraction() ScoreOutput {
 	return ScoreOutput{
 		Category: CategoryNeedsHumanReview,
 		Reasons:  []string{"extraction-class source has no native scoring signal — JOIN to parent verdict pending Phase 3 v2"},
 		SubScores: &SubScores{},
 	}
 }
 // ─── Internal helpers ────────────────────────────────────────────
 func contains(slice []string, want string) bool {
 	for _, s := range slice {
 		if s == want {
 			return true
 		}
 	}
 	return false
 }
 func findPrefix(slice []string, prefix string) string {
 	for _, s := range slice {
 		if strings.HasPrefix(s, prefix) {
 			return s
 		}
 	}
 	return ""
 }
 func findContaining(slice []string, sub string) string {
 	for _, s := range slice {
 		if strings.Contains(s, sub) {
 			return s
 		}
 	}
 	return ""
 }
 func numericFromMap(m map[string]any, key string) float64 {
 	if m == nil {
 		return 0
 	}
 	v, ok := m[key]
 	if !ok {
 		return 0
 	}
 	switch n := v.(type) {
 	case int:
 		return float64(n)
 	case int64:
 		return float64(n)
 	case float32:
 		return float64(n)
 	case float64:
 		return n
 	case json.Number:
 		f, _ := n.Float64()
 		return f
 	}
 	return 0
 }
--- a/internal/distillation/scorer_test.go
+++ b/internal/distillation/scorer_test.go
@ -0,0 +1,375 @@
 package distillation
 import (
 	"errors"
 	"strings"
 	"testing"
 )
 func mkRecord(sourceFile string) EvidenceRecord {
 	return EvidenceRecord{
 		RunID:         "run-1",
 		TaskID:        "task-1",
 		Timestamp:     "2026-04-29T12:00:00Z",
 		SchemaVersion: EvidenceSchemaVersion,
 		Provenance: Provenance{
 			SourceFile: sourceFile,
 			SigHash:    "deadbeef",
 			RecordedAt: "2026-04-29T12:00:01Z",
 		},
 	}
 }
 func TestSourceClassFor(t *testing.T) {
 	cases := []struct {
 		path string
 		want sourceClass
 	}{
 		{"data/_kb/scrum_reviews.jsonl", classVerdict},
 		{"data/_kb/observer_reviews.jsonl", classVerdict},
 		{"data/_kb/audits.jsonl", classVerdict},
 		{"data/_kb/contract_analyses.jsonl", classVerdict},
 		{"data/_kb/auto_apply.jsonl", classTelemetry},
 		{"data/_kb/outcomes.jsonl", classTelemetry},
 		{"data/_kb/mode_experiments.jsonl", classTelemetry},
 		{"data/_kb/distilled_facts.jsonl", classExtraction},
 		{"data/_kb/audit_facts.jsonl", classExtraction},
 		{"data/_kb/observer_escalations.jsonl", classExtraction},
 		{"data/_kb/wholly_unknown.jsonl", classExtraction}, // unknown → extraction (conservative)
 	}
 	for _, c := range cases {
 		got := sourceClassFor(c.path)
 		if got != c.want {
 			t.Errorf("sourceClassFor(%q): want %q, got %q", c.path, c.want, got)
 		}
 	}
 }
 func TestScoreScrumReview(t *testing.T) {
 	cases := []struct {
 		name           string
 		successMarkers []string
 		wantCategory   ScoreCategory
 		wantReasonSub  string
 	}{
 		{
 			name:           "first attempt → accepted",
 			successMarkers: []string{"accepted_on_attempt_1"},
 			wantCategory:   CategoryAccepted,
 			wantReasonSub:  "first attempt",
 		},
 		{
 			name:           "second attempt → partial",
 			successMarkers: []string{"accepted_on_attempt_2"},
 			wantCategory:   CategoryPartiallyAccepted,
 			wantReasonSub:  "after 2 attempts",
 		},
 		{
 			name:           "fourth attempt → partial (high-cost)",
 			successMarkers: []string{"accepted_on_attempt_4"},
 			wantCategory:   CategoryPartiallyAccepted,
 			wantReasonSub:  "high-cost",
 		},
 		{
 			name:           "missing marker → needs_human_review",
 			successMarkers: []string{},
 			wantCategory:   CategoryNeedsHumanReview,
 			wantReasonSub:  "missing accepted_on_attempt",
 		},
 	}
 	for _, c := range cases {
 		t.Run(c.name, func(t *testing.T) {
 			rec := mkRecord("data/_kb/scrum_reviews.jsonl")
 			rec.SuccessMarkers = c.successMarkers
 			out := ScoreRecord(rec)
 			if out.Category != c.wantCategory {
 				t.Errorf("category: want %q, got %q (reasons=%v)", c.wantCategory, out.Category, out.Reasons)
 			}
 			if !reasonsContain(out.Reasons, c.wantReasonSub) {
 				t.Errorf("reasons missing %q: %v", c.wantReasonSub, out.Reasons)
 			}
 		})
 	}
 }
 func TestScoreObserverReview(t *testing.T) {
 	cases := []struct {
 		verdict ObserverVerdict
 		want    ScoreCategory
 	}{
 		{VerdictAccept, CategoryAccepted},
 		{VerdictReject, CategoryRejected},
 		{VerdictCycle, CategoryPartiallyAccepted},
 		{"", CategoryNeedsHumanReview},
 		{"weird-verdict", CategoryNeedsHumanReview},
 	}
 	for _, c := range cases {
 		rec := mkRecord("data/_kb/observer_reviews.jsonl")
 		rec.ObserverVerdict = c.verdict
 		out := ScoreRecord(rec)
 		if out.Category != c.want {
 			t.Errorf("verdict=%q: want %q, got %q", c.verdict, c.want, out.Category)
 		}
 	}
 }
 func TestScoreAudit_LegacyAndSeverityMarkers(t *testing.T) {
 	cases := []struct {
 		name string
 		succ []string
 		fail []string
 		want ScoreCategory
 	}{
 		{"legacy approved", []string{"approved"}, nil, CategoryAccepted},
 		{"legacy blocked", nil, []string{"blocked"}, CategoryRejected},
 		{"legacy request_changes", nil, []string{"request_changes"}, CategoryPartiallyAccepted},
 		{"severity_low → accepted", []string{"audit_severity_low"}, nil, CategoryAccepted},
 		{"severity_info → accepted", []string{"audit_severity_info"}, nil, CategoryAccepted},
 		{"severity_medium fail → partial", nil, []string{"audit_severity_medium"}, CategoryPartiallyAccepted},
 		{"severity_high → rejected", nil, []string{"audit_severity_high"}, CategoryRejected},
 		{"severity_critical → rejected", nil, []string{"audit_severity_critical"}, CategoryRejected},
 		{"no markers", nil, nil, CategoryNeedsHumanReview},
 	}
 	for _, c := range cases {
 		t.Run(c.name, func(t *testing.T) {
 			rec := mkRecord("data/_kb/audits.jsonl")
 			rec.SuccessMarkers = c.succ
 			rec.FailureMarkers = c.fail
 			out := ScoreRecord(rec)
 			if out.Category != c.want {
 				t.Errorf("want %q, got %q (reasons=%v)", c.want, out.Category, out.Reasons)
 			}
 		})
 	}
 }
 func TestScoreAutoApply(t *testing.T) {
 	cases := []struct {
 		name string
 		succ []string
 		fail []string
 		want ScoreCategory
 	}{
 		{"committed → accepted", []string{"committed"}, nil, CategoryAccepted},
 		{"reverted_build_red → rejected", nil, []string{"reverted_build_red"}, CategoryRejected},
 		{"reverted other → rejected", nil, []string{"reverted_warning_count_up"}, CategoryRejected},
 		{"no signal → needs_human", nil, nil, CategoryNeedsHumanReview},
 	}
 	for _, c := range cases {
 		t.Run(c.name, func(t *testing.T) {
 			rec := mkRecord("data/_kb/auto_apply.jsonl")
 			rec.SuccessMarkers = c.succ
 			rec.FailureMarkers = c.fail
 			out := ScoreRecord(rec)
 			if out.Category != c.want {
 				t.Errorf("want %q, got %q", c.want, out.Category)
 			}
 		})
 	}
 }
 func TestScoreOutcomes(t *testing.T) {
 	rec := mkRecord("data/_kb/outcomes.jsonl")
 	rec.SuccessMarkers = []string{"all_events_ok"}
 	if out := ScoreRecord(rec); out.Category != CategoryAccepted {
 		t.Errorf("all_events_ok: want accepted, got %q", out.Category)
 	}
 	rec2 := mkRecord("data/_kb/outcomes.jsonl")
 	rec2.ValidationResults = map[string]any{"gap_signals": float64(2)}
 	if out := ScoreRecord(rec2); out.Category != CategoryPartiallyAccepted {
 		t.Errorf("gap_signals=2: want partial, got %q (reasons=%v)", out.Category, out.Reasons)
 	}
 	rec3 := mkRecord("data/_kb/outcomes.jsonl")
 	if out := ScoreRecord(rec3); out.Category != CategoryNeedsHumanReview {
 		t.Errorf("no signal: want needs_human, got %q", out.Category)
 	}
 }
 func TestScoreModeExperiment(t *testing.T) {
 	rec := mkRecord("data/_kb/mode_experiments.jsonl")
 	rec.Text = ""
 	if out := ScoreRecord(rec); out.Category != CategoryRejected {
 		t.Errorf("empty text: want rejected, got %q", out.Category)
 	}
 	rec.Text = "real response"
 	rec.LatencyMs = 130_000
 	if out := ScoreRecord(rec); out.Category != CategoryPartiallyAccepted {
 		t.Errorf("over latency cap: want partial, got %q", out.Category)
 	}
 	rec.LatencyMs = 5000
 	if out := ScoreRecord(rec); out.Category != CategoryNeedsHumanReview {
 		t.Errorf("normal: want needs_human (verdict not yet wired), got %q", out.Category)
 	}
 }
 func TestScoreExtraction_Defaults(t *testing.T) {
 	for _, src := range []string{
 		"data/_kb/distilled_facts.jsonl",
 		"data/_kb/distilled_procedures.jsonl",
 		"data/_kb/audit_facts.jsonl",
 		"data/_kb/observer_escalations.jsonl",
 	} {
 		rec := mkRecord(src)
 		out := ScoreRecord(rec)
 		if out.Category != CategoryNeedsHumanReview {
 			t.Errorf("%s: want needs_human_review, got %q", src, out.Category)
 		}
 	}
 }
 // ─── Contamination firewall — the safety-critical guarantee ───────
 func TestValidateSftSample_RejectsContaminationCategories(t *testing.T) {
 	for _, contaminated := range []SftQualityScore{
 		SftQualityScore("rejected"),
 		SftQualityScore("needs_human_review"),
 	} {
 		s := goodSftSample()
 		s.QualityScore = contaminated
 		err := ValidateSftSample(s)
 		if err == nil {
 			t.Errorf("contaminated quality_score=%q should fail validation", contaminated)
 			continue
 		}
 		if !errors.Is(err, ErrSftContamination) {
 			t.Errorf("contaminated %q: want errors.Is(err, ErrSftContamination), got %v", contaminated, err)
 		}
 	}
 }
 func TestValidateSftSample_AcceptsLegalCategories(t *testing.T) {
 	for _, legal := range []SftQualityScore{SftQualityAccepted, SftQualityPartiallyAccepted} {
 		s := goodSftSample()
 		s.QualityScore = legal
 		if err := ValidateSftSample(s); err != nil {
 			t.Errorf("legal quality_score=%q failed: %v", legal, err)
 		}
 	}
 }
 func TestValidateSftSample_RejectsTypoCategory(t *testing.T) {
 	s := goodSftSample()
 	s.QualityScore = "approved" // close to "accepted" but wrong
 	err := ValidateSftSample(s)
 	if err == nil {
 		t.Fatal("typo category should fail validation")
 	}
 	// Typo is NOT contamination — should be a regular ValidationError,
 	// not the firewall sentinel. This distinguishes "you typo'd" from
 	// "you broke the spec."
 	if errors.Is(err, ErrSftContamination) {
 		t.Error("typo should not surface as ErrSftContamination")
 	}
 }
 func TestValidateSftSample_RejectsEmptyPair(t *testing.T) {
 	s := goodSftSample()
 	s.Instruction = "  "
 	if err := ValidateSftSample(s); err == nil {
 		t.Error("whitespace-only instruction should fail")
 	}
 	s2 := goodSftSample()
 	s2.Response = ""
 	if err := ValidateSftSample(s2); err == nil {
 		t.Error("empty response should fail")
 	}
 }
 func TestValidateScoredRun_ReasonsRequired(t *testing.T) {
 	r := ScoredRun{
 		SchemaVersion:  ScoredRunSchemaVersion,
 		EvidenceRunID:  "x",
 		EvidenceTaskID: "y",
 		Category:       CategoryAccepted,
 		Reasons:        nil, // empty — must fail
 		ScoredAt:       "2026-04-29T12:00:00Z",
 		ScorerVersion:  ScorerVersion,
 		Provenance: Provenance{
 			SourceFile: "data/_kb/scrum_reviews.jsonl",
 			SigHash:    "abc",
 			RecordedAt: "2026-04-29T12:00:00Z",
 		},
 	}
 	err := ValidateScoredRun(r)
 	if err == nil {
 		t.Fatal("empty reasons should fail")
 	}
 	if !strings.Contains(err.Error(), "reasons") {
 		t.Errorf("error should mention reasons: %v", err)
 	}
 }
 func TestBuildScoredRun_DeterministicSigHash(t *testing.T) {
 	rec := mkRecord("data/_kb/scrum_reviews.jsonl")
 	rec.SuccessMarkers = []string{"accepted_on_attempt_1"}
 	r1, err := BuildScoredRun(rec, "data/scored-runs/2026/04/29/x.jsonl", 0, "2026-04-29T12:00:00Z")
 	if err != nil {
 		t.Fatal(err)
 	}
 	r2, err := BuildScoredRun(rec, "data/scored-runs/2026/04/29/x.jsonl", 0, "2026-04-29T12:00:00Z")
 	if err != nil {
 		t.Fatal(err)
 	}
 	if r1.Provenance.SigHash != r2.Provenance.SigHash {
 		t.Errorf("identical EvidenceRecord should produce identical sig_hash: %s vs %s",
 			r1.Provenance.SigHash, r2.Provenance.SigHash)
 	}
 	if r1.Category != CategoryAccepted {
 		t.Errorf("scored category: %q", r1.Category)
 	}
 	if r1.ScorerVersion != ScorerVersion {
 		t.Errorf("scorer version stamped wrong: %q", r1.ScorerVersion)
 	}
 }
 func TestScoreRecord_PureFunction_NoMutationOfInput(t *testing.T) {
 	// Belt-and-braces: the contract says "NO mutable state, identical
 	// input → identical output forever." Verify by scoring the same
 	// record twice and ensuring the input hasn't been touched.
 	rec := mkRecord("data/_kb/scrum_reviews.jsonl")
 	rec.SuccessMarkers = []string{"accepted_on_attempt_2"}
 	original := rec
 	out1 := ScoreRecord(rec)
 	out2 := ScoreRecord(rec)
 	if rec.RunID != original.RunID || len(rec.SuccessMarkers) != 1 {
 		t.Error("ScoreRecord mutated its input")
 	}
 	if out1.Category != out2.Category {
 		t.Error("ScoreRecord is non-deterministic")
 	}
 }
 // ─── Helpers ─────────────────────────────────────────────────────
 func goodSftSample() SftSample {
 	return SftSample{
 		SchemaVersion: SftSampleSchemaVersion,
 		ID:            "sft-1",
 		Instruction:   "summarize the diff",
 		Context:       "diff body...",
 		Response:      "the diff adds a function",
 		SourceRunID:   "run-1",
 		QualityScore:  SftQualityAccepted,
 		CreatedAt:     "2026-04-29T12:00:00Z",
 		Provenance: Provenance{
 			SourceFile: "data/scored-runs/2026/04/29/x.jsonl",
 			SigHash:    "deadbeef",
 			RecordedAt: "2026-04-29T12:00:01Z",
 		},
 	}
 }
 func reasonsContain(reasons []string, sub string) bool {
 	for _, r := range reasons {
 		if strings.Contains(r, sub) {
 			return true
 		}
 	}
 	return false
 }
--- a/internal/distillation/types.go
+++ b/internal/distillation/types.go
@ -0,0 +1,484 @@
 // Package distillation is the Go port of the Rust v1.0.0 distillation
 // substrate (frozen at e7636f2). Per ADR-001 #4: port LOGIC, not
 // bit-identical reproducibility.
 //
 // What this package owns (this commit):
 //   - The deterministic scorer: EvidenceRecord → ScoredRun
 //   - Score categories + scorer version constant
 //   - SftSample type + validator with the contamination firewall
 //     (the safety-critical piece — rejected/needs_human_review must
 //     NEVER ship to SFT)
 //
 // What's deferred to follow-up commits:
 //   - Materialization layer (file iteration, jsonl read/write,
 //     date-partitioned storage) — operational tooling on top of
 //     the scorer logic
 //   - export_preference, export_rag (other export shapes)
 //   - acceptance harness (the gate that locks v1.0.0)
 //   - replay, receipts, evidence-index builders
 //
 // The scorer + SftSample validator are the LOAD-BEARING pieces
 // per project_distillation_substrate.md memory. The rest is plumbing
 // that can land incrementally without changing the logic the
 // downstream learning loop depends on.
 package distillation
 import (
 	"encoding/json"
 	"errors"
 	"fmt"
 	"strings"
 	"time"
 )
 // ScoreCategory is one of the 4 deterministic verdicts. Matches Rust
 // SCORE_CATEGORIES exactly.
 type ScoreCategory string
 const (
 	CategoryAccepted          ScoreCategory = "accepted"
 	CategoryPartiallyAccepted ScoreCategory = "partially_accepted"
 	CategoryRejected          ScoreCategory = "rejected"
 	CategoryNeedsHumanReview  ScoreCategory = "needs_human_review"
 )
 // AllScoreCategories lists every legal category — used by validators.
 var AllScoreCategories = []ScoreCategory{
 	CategoryAccepted,
 	CategoryPartiallyAccepted,
 	CategoryRejected,
 	CategoryNeedsHumanReview,
 }
 // ScorerVersion is hardcoded — the deterministic-output contract
 // requires this. Bump the literal in the same commit as any scoring-
 // rule change so the version stamp moves atomically with logic.
 // Mirrors the Rust SCORER_VERSION (also v1.0.0 at e7636f2).
 const ScorerVersion = "v1.0.0"
 // SftQualityScore enumerates the categories LEGAL in SFT exports.
 // SFT_NEVER (defined below) is the inverse — categories that NEVER
 // ship to SFT under any flag combination. The contamination firewall
 // is enforced at the schema layer (ValidateSftSample) AND by the
 // exporter; defense in depth.
 type SftQualityScore string
 const (
 	SftQualityAccepted          SftQualityScore = "accepted"
 	SftQualityPartiallyAccepted SftQualityScore = "partially_accepted"
 )
 // SftQualityScores lists quality scores legal in SFT samples.
 // Default is SftQualityAccepted only; --include-partial CLI flag
 // expands to both. rejected and needs_human_review are NEVER legal.
 var SftQualityScores = []SftQualityScore{
 	SftQualityAccepted,
 	SftQualityPartiallyAccepted,
 }
 // SftNever is the contamination firewall: ScoreCategories that NEVER
 // ship to SFT under ANY caller flag. Enforced at the schema layer
 // (ValidateSftSample) AND at the exporter layer. Per the Rust
 // e7636f2 spec: "Hard non-negotiable: this set never expands. If you
 // find yourself adding 'needs_human_review' or 'rejected' here, stop
 // — that's the contamination the spec forbids."
 //
 // Exported so callers AND the validator share the same source of
 // truth. Modifying this constant changes the contract; reviewers
 // should treat any commit that touches it as a security review.
 var SftNever = []ScoreCategory{
 	CategoryRejected,
 	CategoryNeedsHumanReview,
 }
 // SftSampleSchemaVersion bumps when the on-wire SftSample shape
 // changes incompatibly. Match the Rust SFT_SAMPLE_SCHEMA_VERSION.
 const SftSampleSchemaVersion = 1
 // ScoredRunSchemaVersion bumps when the on-wire ScoredRun shape
 // changes incompatibly. Match the Rust SCORED_RUN_SCHEMA_VERSION.
 const ScoredRunSchemaVersion = 1
 // EvidenceSchemaVersion mirrors the Rust EVIDENCE_SCHEMA_VERSION.
 // This package consumes EvidenceRecord; producing it is a separate
 // concern (the materialization layer not yet ported).
 const EvidenceSchemaVersion = 1
 // ModelRole categorizes the kind of model output represented by an
 // EvidenceRecord. Used by the SFT exporter to filter "real model
 // output" from pure-extraction rows.
 type ModelRole string
 const (
 	RoleExecutor    ModelRole = "executor"
 	RoleReviewer    ModelRole = "reviewer"
 	RoleExtractor   ModelRole = "extractor"
 	RoleVerifier    ModelRole = "verifier"
 	RoleCategorizer ModelRole = "categorizer"
 	RoleTiebreaker  ModelRole = "tiebreaker"
 	RoleApplier     ModelRole = "applier"
 	RoleEmbedder    ModelRole = "embedder"
 	RoleOther       ModelRole = "other"
 )
 // Provenance is the source-linkage every distillation record carries.
 // SourceFile is required (no record without source linkage); other
 // fields are best-effort for de-duplication and trace-back.
 type Provenance struct {
 	SourceFile  string `json:"source_file"`
 	LineOffset  int64  `json:"line_offset,omitempty"`
 	SigHash     string `json:"sig_hash"`
 	RecordedAt  string `json:"recorded_at"` // ISO 8601
 }
 // ObserverVerdict is what an observer returned for an executor's
 // output. Matches the Rust enum but as a string type for JSON
 // flexibility.
 type ObserverVerdict string
 const (
 	VerdictAccept ObserverVerdict = "accept"
 	VerdictReject ObserverVerdict = "reject"
 	VerdictCycle  ObserverVerdict = "cycle"
 )
 // EvidenceRecord is one row in the canonical evidence stream.
 // Producing it (transforms from raw KB streams) is a separate
 // concern; this package consumes it.
 //
 // Fields mirror the Rust EvidenceRecord at e7636f2. Optional fields
 // use Go pointers / slices so missing-vs-empty stays distinguishable
 // for the scorer's heuristics.
 type EvidenceRecord struct {
 	RunID         string `json:"run_id"`
 	TaskID        string `json:"task_id"`
 	Timestamp     string `json:"timestamp"`
 	SchemaVersion int    `json:"schema_version"`
 	Provenance Provenance `json:"provenance"`
 	ModelName     string    `json:"model_name,omitempty"`
 	ModelProvider string    `json:"model_provider,omitempty"`
 	ModelRole     ModelRole `json:"model_role,omitempty"`
 	InputHash  string `json:"input_hash,omitempty"`
 	OutputHash string `json:"output_hash,omitempty"`
 	SourceFiles []string `json:"source_files,omitempty"`
 	CommandsRun []string `json:"commands_run,omitempty"`
 	RetrievedContext *RetrievedContext `json:"retrieved_context,omitempty"`
 	ObserverNotes      []string         `json:"observer_notes,omitempty"`
 	ObserverVerdict    ObserverVerdict  `json:"observer_verdict,omitempty"`
 	ObserverConfidence float64          `json:"observer_confidence,omitempty"`
 	ScratchpadSummary  string           `json:"scratchpad_summary,omitempty"`
 	SuccessMarkers []string `json:"success_markers,omitempty"`
 	FailureMarkers []string `json:"failure_markers,omitempty"`
 	ValidationResults map[string]any `json:"validation_results,omitempty"`
 	HumanOverride *HumanOverride `json:"human_override,omitempty"`
 	CostUSD   float64 `json:"cost_usd,omitempty"`
 	LatencyMs int64   `json:"latency_ms,omitempty"`
 	Text      string  `json:"text,omitempty"`
 }
 // RetrievedContext captures what the model saw via retrieval. Matches
 // the Rust shape exactly so the JSON round-trips byte-identical (per
 // ADR-001 #4 "logic, not bit-identical" — but on-wire compatibility
 // is desirable for tooling that consumes EvidenceRecord JSONL).
 type RetrievedContext struct {
 	MatrixCorpora             []string `json:"matrix_corpora,omitempty"`
 	MatrixHits                int      `json:"matrix_hits,omitempty"`
 	MatrixChunksKept          int      `json:"matrix_chunks_kept,omitempty"`
 	MatrixChunksDropped       int      `json:"matrix_chunks_dropped,omitempty"`
 	PathwayFingerprintsSeen   int      `json:"pathway_fingerprints_seen,omitempty"`
 }
 // HumanOverride captures a human-in-the-loop decision overriding the
 // scorer's verdict. Recorded but doesn't change the scorer's output;
 // downstream consumers (UI, distillation acceptance) decide how to
 // treat it.
 type HumanOverride struct {
 	Overrider     string `json:"overrider"`
 	Decision      string `json:"decision"` // accept|reject|needs_review
 	Reason        string `json:"reason"`
 	OverriddenAt  string `json:"overridden_at"`
 }
 // SubScores carries the deterministic scorer's intermediate signals
 // alongside the final ScoreCategory. Persisted on every ScoredRun
 // so a downstream UI can show "why" without re-running the scorer.
 type SubScores struct {
 	CargoGreen              *bool  `json:"cargo_green,omitempty"`
 	AnchorGrounding         *float64 `json:"anchor_grounding,omitempty"`
 	SchemaValid             *bool  `json:"schema_valid,omitempty"`
 	PathwayReplaySucceeded  *bool  `json:"pathway_replay_succeeded,omitempty"`
 	ObserverVerdict         ObserverVerdict `json:"observer_verdict,omitempty"`
 	AcceptedOnAttempt       *int   `json:"accepted_on_attempt,omitempty"`
 	// Extra fields the Rust schema accepted as `[key: string]: unknown`.
 	// Captured here as a free-form map so future signals don't require
 	// type-system changes.
 	Extras map[string]any `json:"-"`
 }
 // ScoredRun is the deterministic scorer's output. One per
 // EvidenceRecord. Provenance ties back to the materialized evidence
 // row (not the raw source stream).
 type ScoredRun struct {
 	SchemaVersion    int           `json:"schema_version"`
 	EvidenceRunID    string        `json:"evidence_run_id"`
 	EvidenceTaskID   string        `json:"evidence_task_id"`
 	Category         ScoreCategory `json:"category"`
 	Reasons          []string      `json:"reasons"` // non-empty
 	ScoredAt         string        `json:"scored_at"`
 	ScorerVersion    string        `json:"scorer_version"`
 	SubScores        *SubScores    `json:"sub_scores,omitempty"`
 	Provenance       Provenance    `json:"provenance"`
 }
 // SftSample is one entry in exports/sft/instruction_response.jsonl.
 // The contamination firewall lives in ValidateSftSample.
 type SftSample struct {
 	SchemaVersion int             `json:"schema_version"`
 	ID            string          `json:"id"`
 	Instruction   string          `json:"instruction"`
 	Context       string          `json:"context"` // empty allowed; null/missing not
 	Response      string          `json:"response"`
 	SourceRunID   string          `json:"source_run_id"`
 	QualityScore  SftQualityScore `json:"quality_score"`
 	CreatedAt     string          `json:"created_at"`
 	Provenance    Provenance      `json:"provenance"`
 }
 // ─── Validators ──────────────────────────────────────────────────
 // ValidationError is a single field-level violation.
 type ValidationError struct {
 	Field   string
 	Message string
 }
 func (e ValidationError) Error() string {
 	return fmt.Sprintf("%s: %s", e.Field, e.Message)
 }
 // ValidationErrors is the joinable error returned by the validators
 // when one or more fields violate the schema.
 type ValidationErrors []ValidationError
 func (es ValidationErrors) Error() string {
 	if len(es) == 0 {
 		return "no errors"
 	}
 	parts := make([]string, len(es))
 	for i, e := range es {
 		parts[i] = e.Error()
 	}
 	return strings.Join(parts, "; ")
 }
 // HasErrors returns true when one or more errors are present.
 func (es ValidationErrors) HasErrors() bool { return len(es) > 0 }
 // ValidateScoredRun mirrors the Rust validateScoredRun. Returns nil
 // on success or a ValidationErrors with the field-level violations.
 func ValidateScoredRun(r ScoredRun) error {
 	var errs ValidationErrors
 	if r.SchemaVersion != ScoredRunSchemaVersion {
 		errs = append(errs, ValidationError{
 			"schema_version",
 			fmt.Sprintf("expected %d, got %d", ScoredRunSchemaVersion, r.SchemaVersion),
 		})
 	}
 	if r.EvidenceRunID == "" {
 		errs = append(errs, ValidationError{"evidence_run_id", "must be non-empty"})
 	}
 	if r.EvidenceTaskID == "" {
 		errs = append(errs, ValidationError{"evidence_task_id", "must be non-empty"})
 	}
 	if !validISOTimestamp(r.ScoredAt) {
 		errs = append(errs, ValidationError{"scored_at", "must be ISO 8601 timestamp"})
 	}
 	if r.ScorerVersion == "" {
 		errs = append(errs, ValidationError{"scorer_version", "must be non-empty"})
 	}
 	if len(r.Reasons) == 0 {
 		errs = append(errs, ValidationError{"reasons", "must be non-empty (every score needs a reason)"})
 	}
 	if !isValidCategory(r.Category) {
 		errs = append(errs, ValidationError{"category", fmt.Sprintf("must be one of %v, got %q", AllScoreCategories, r.Category)})
 	}
 	if err := validateProvenance(r.Provenance, "provenance"); err != nil {
 		errs = append(errs, err...)
 	}
 	if r.SubScores != nil && r.SubScores.AnchorGrounding != nil {
 		ag := *r.SubScores.AnchorGrounding
 		if ag < 0 || ag > 1 {
 			errs = append(errs, ValidationError{"sub_scores.anchor_grounding", "must be in [0, 1]"})
 		}
 	}
 	if errs.HasErrors() {
 		return errs
 	}
 	return nil
 }
 // ValidateSftSample is the contamination firewall. Returns ErrSftContamination
 // (wrapped) when quality_score is in SftNever — which is the safety-critical
 // guarantee the spec calls non-negotiable.
 //
 // Other field violations come back as ValidationErrors.
 func ValidateSftSample(s SftSample) error {
 	var errs ValidationErrors
 	if s.SchemaVersion != SftSampleSchemaVersion {
 		errs = append(errs, ValidationError{
 			"schema_version",
 			fmt.Sprintf("expected %d, got %d", SftSampleSchemaVersion, s.SchemaVersion),
 		})
 	}
 	if s.ID == "" {
 		errs = append(errs, ValidationError{"id", "must be non-empty"})
 	}
 	if strings.TrimSpace(s.Instruction) == "" {
 		errs = append(errs, ValidationError{"instruction", "must be non-whitespace (no empty pairs)"})
 	}
 	if strings.TrimSpace(s.Response) == "" {
 		errs = append(errs, ValidationError{"response", "must be non-whitespace (no empty pairs)"})
 	}
 	// Context is required-string but empty is allowed.
 	// (Field is always typed as string in Go, so the only way to
 	// distinguish "set" from "missing" was via the JSON layer; here
 	// empty is fine.)
 	if s.SourceRunID == "" {
 		errs = append(errs, ValidationError{"source_run_id", "must be non-empty"})
 	}
 	if !validISOTimestamp(s.CreatedAt) {
 		errs = append(errs, ValidationError{"created_at", "must be ISO 8601 timestamp"})
 	}
 	if err := validateProvenance(s.Provenance, "provenance"); err != nil {
 		errs = append(errs, err...)
 	}
 	// Contamination firewall. Hard non-negotiable per the spec.
 	if !isLegalSftQualityScore(s.QualityScore) {
 		// If it's in SftNever, surface the firewall sentinel — callers
 		// can errors.Is(err, ErrSftContamination) to reliably detect
 		// "the spec said never" as opposed to "you typo'd a category."
 		if isContaminationCategory(s.QualityScore) {
 			return fmt.Errorf("%w: quality_score %q in SftNever (rejected/needs_human_review never legal in SFT)",
 				ErrSftContamination, s.QualityScore)
 		}
 		errs = append(errs, ValidationError{
 			"quality_score",
 			fmt.Sprintf("must be one of %v, got %q", SftQualityScores, s.QualityScore),
 		})
 	}
 	if errs.HasErrors() {
 		return errs
 	}
 	return nil
 }
 // ErrSftContamination is the firewall sentinel — when ValidateSftSample
 // rejects a sample because its quality_score is in SftNever, callers
 // can errors.Is(err, ErrSftContamination) to reliably distinguish
 // "spec violation" from "typo'd category."
 var ErrSftContamination = errors.New("distillation: SFT contamination — quality_score in SftNever")
 // ─── Internal helpers ────────────────────────────────────────────
 func isValidCategory(c ScoreCategory) bool {
 	for _, v := range AllScoreCategories {
 		if c == v {
 			return true
 		}
 	}
 	return false
 }
 func isLegalSftQualityScore(q SftQualityScore) bool {
 	for _, v := range SftQualityScores {
 		if q == v {
 			return true
 		}
 	}
 	return false
 }
 func isContaminationCategory(q SftQualityScore) bool {
 	// Compare as ScoreCategory — the on-wire string is the same; this
 	// just guards the firewall against typos that happen to match
 	// SftNever string-wise.
 	for _, v := range SftNever {
 		if string(v) == string(q) {
 			return true
 		}
 	}
 	return false
 }
 func validISOTimestamp(s string) bool {
 	if s == "" {
 		return false
 	}
 	// time.Parse with RFC3339 covers most ISO 8601. We accept both
 	// the basic and nano variants since the Rust producers vary.
 	if _, err := time.Parse(time.RFC3339, s); err == nil {
 		return true
 	}
 	if _, err := time.Parse(time.RFC3339Nano, s); err == nil {
 		return true
 	}
 	return false
 }
 func validateProvenance(p Provenance, field string) ValidationErrors {
 	var errs ValidationErrors
 	if p.SourceFile == "" {
 		errs = append(errs, ValidationError{field + ".source_file", "must be non-empty"})
 	}
 	if p.SigHash == "" {
 		errs = append(errs, ValidationError{field + ".sig_hash", "must be non-empty"})
 	}
 	if !validISOTimestamp(p.RecordedAt) {
 		errs = append(errs, ValidationError{field + ".recorded_at", "must be ISO 8601 timestamp"})
 	}
 	return errs
 }
 // MarshalSubScores is a shim — Go's encoding/json doesn't merge a
 // "rest" map into the struct's JSON output by default. Callers that
 // need Extras serialized into the same object can use this helper.
 func MarshalSubScores(s *SubScores) ([]byte, error) {
 	if s == nil {
 		return []byte("null"), nil
 	}
 	// First marshal the typed fields normally.
 	type alias SubScores
 	base, err := json.Marshal((*alias)(s))
 	if err != nil {
 		return nil, err
 	}
 	if len(s.Extras) == 0 {
 		return base, nil
 	}
 	// Decode back to a map, merge Extras, re-encode. Less efficient
 	// but keeps the field semantics correct (typed fields override
 	// extras on collision — first-write-wins for known keys).
 	var combined map[string]any
 	if err := json.Unmarshal(base, &combined); err != nil {
 		return nil, err
 	}
 	for k, v := range s.Extras {
 		if _, exists := combined[k]; !exists {
 			combined[k] = v
 		}
 	}
 	return json.Marshal(combined)
 }