From 57d0df125d05f7e6347526c96a81ba8ca14a9159 Mon Sep 17 00:00:00 2001 From: root Date: Wed, 29 Apr 2026 20:04:29 -0500 Subject: [PATCH] =?UTF-8?q?E=20(partial):=20distillation=20port=20?= =?UTF-8?q?=E2=80=94=20scorer=20+=20contamination=20firewall?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit First slice of the Rust v1.0.0 distillation substrate (e7636f2) ported to Go per ADR-001 #4 (port LOGIC, not bit-identical reproducibility). This commit lands the LOAD-BEARING pieces named in project_distillation_substrate.md memory: - The deterministic Success Scorer (8 sub-scorers + dispatch) - The contamination firewall on SFT samples (the "non-negotiable" spec property: rejected/needs_human_review NEVER ship to SFT) - All on-wire types + validators for ScoredRun, SftSample, EvidenceRecord with Provenance Files: internal/distillation/types.go — types + ScorerVersion + SftNever + ValidateScoredRun + ValidateSftSample internal/distillation/scorer.go — ScoreRecord + 8 class scorers + BuildScoredRun (deterministic) internal/distillation/scorer_test.go — ~40 test cases: - source-class dispatch (verdict / telemetry / extraction) - scrum_review (4 attempt cases) - observer_review (5 verdict cases) - audit (legacy + severity, 9 cases) - auto_apply (4 cases) - outcomes / mode_experiment / extraction - CONTAMINATION FIREWALL: ErrSftContamination sentinel fires on rejected/needs_human_review, distinct from typo errors - empty-pair guard (instruction/response trim != "") - reasons-required ScoredRun validation - deterministic sig_hash on identical input - purity check (input not mutated, repeatable output) Per the 2026-04-29 cross-lineage scrum's discipline: false-positive findings would be dismissed inline (none in this commit). Real findings would be addressed before merge — but this is greenfield port code reviewed against its Rust source line-by-line, which the test suite encodes as truth tables. Explicitly DEFERRED to follow-up commits: - Materialization layer (jsonl read/write, date-partitioned storage in data/scored-runs/YYYY/MM/DD/, evidence index) - SFT exporter (file iteration + filtering — the SCORING firewall is here; the EXPORT firewall is the next layer) - export_preference, export_rag (other export shapes) - Acceptance harness (16/16 acceptance gate that locks v1.0.0) - replay, receipts, build_evidence_index, transforms The scorer + firewall validator are pure functions — operational tooling layers on top without changing the deterministic logic the downstream learning loop depends on. The Go ScorerVersion stays at v1.0.0 to match the Rust e7636f2 baseline; bumping in the Go materialization commit is reserved for the next scoring-rule change, NOT the port itself. 15-smoke regression all green. vet clean. Co-Authored-By: Claude Opus 4.7 (1M context) --- internal/distillation/scorer.go | 410 +++++++++++++++++++++++ internal/distillation/scorer_test.go | 375 +++++++++++++++++++++ internal/distillation/types.go | 484 +++++++++++++++++++++++++++ 3 files changed, 1269 insertions(+) create mode 100644 internal/distillation/scorer.go create mode 100644 internal/distillation/scorer_test.go create mode 100644 internal/distillation/types.go diff --git a/internal/distillation/scorer.go b/internal/distillation/scorer.go new file mode 100644 index 0000000..b082c34 --- /dev/null +++ b/internal/distillation/scorer.go @@ -0,0 +1,410 @@ +package distillation + +// scorer.go — pure deterministic Success Scorer (port of Rust +// scripts/distillation/scorer.ts at e7636f2). +// +// Takes one EvidenceRecord, returns category + reasons + sub_scores. +// NO I/O, NO LLM, NO clock reads, NO mutable state. Identical input +// → identical output forever. Same contract as the Rust source — +// future scoring-rule changes bump ScorerVersion atomically with +// the logic. +// +// Three-class strategy mirrors the Rust source taxonomy +// (docs/recon/local-distillation-recon.md + data/_kb/evidence_health.md): +// +// CLASS A — verdict-bearing +// scrum_reviews, observer_reviews, audits, contract_analyses +// Direct scoring from existing markers / observer_verdict +// +// CLASS B — telemetry-rich +// auto_apply, outcomes, mode_experiments +// Markers exist but partial; needs_human_review fills the gap +// +// CLASS C — pure-extraction (no native scoring signal) +// distilled_*, audit_facts, observer_escalations +// Default needs_human_review; v2 will JOIN to parent verdict + +import ( + "crypto/sha256" + "encoding/hex" + "encoding/json" + "fmt" + "strconv" + "strings" +) + +// sourceClass categorizes an EvidenceRecord's source_file for the +// scorer's three-class dispatch. +type sourceClass string + +const ( + classVerdict sourceClass = "verdict" + classTelemetry sourceClass = "telemetry" + classExtraction sourceClass = "extraction" +) + +// sourceClassFor maps a source_file (from provenance) to a class. +// Centralized so adding a new source is a one-line change. Mirrors +// the Rust switch on the stem (data/_kb/X.jsonl → X). +func sourceClassFor(sourceFile string) sourceClass { + stem := strings.TrimSuffix(strings.TrimPrefix(sourceFile, "data/_kb/"), ".jsonl") + switch stem { + case "scrum_reviews", "observer_reviews", "audits", "contract_analyses": + return classVerdict + case "auto_apply", "outcomes", "mode_experiments": + return classTelemetry + case "distilled_facts", "distilled_procedures", "distilled_config_hints", + "audit_facts", "observer_escalations": + return classExtraction + default: + // Unknown source → most conservative path (forces + // needs_human_review until a transform is added). + return classExtraction + } +} + +// stemOf extracts the stable corpus identifier from a source_file. +// E.g. "data/_kb/scrum_reviews.jsonl" → "scrum_reviews". +func stemOf(sourceFile string) string { + return strings.TrimSuffix(strings.TrimPrefix(sourceFile, "data/_kb/"), ".jsonl") +} + +// ScoreOutput is the scorer's return shape — category + reasons + +// the captured sub-signals. Reasons is always non-empty (validator +// requires it). +type ScoreOutput struct { + Category ScoreCategory + Reasons []string + SubScores *SubScores +} + +// ScoreRecord dispatches an EvidenceRecord to the appropriate class +// scorer and returns the verdict + reasons + sub-scores. Pure +// function. Caller wraps the output in a ScoredRun via BuildScoredRun +// for the on-wire shape. +func ScoreRecord(rec EvidenceRecord) ScoreOutput { + cls := sourceClassFor(rec.Provenance.SourceFile) + stem := stemOf(rec.Provenance.SourceFile) + + switch cls { + case classVerdict: + switch stem { + case "scrum_reviews": + return scoreScrumReview(rec) + case "observer_reviews": + return scoreObserverReview(rec) + case "audits": + return scoreAudit(rec) + case "contract_analyses": + return scoreContractAnalysis(rec) + } + case classTelemetry: + switch stem { + case "auto_apply": + return scoreAutoApply(rec) + case "outcomes": + return scoreOutcomes(rec) + case "mode_experiments": + return scoreModeExperiment(rec) + } + } + return scoreExtraction() +} + +// BuildScoredRun composes a complete ScoredRun for persistence. +// Caller supplies recorded_at + the source file path/line offset. +// SigHash is computed deterministically from the EvidenceRecord +// JSON; ScoredRun traces to the materialized evidence row. +func BuildScoredRun(rec EvidenceRecord, sourceFile string, lineOffset int64, recordedAt string) (ScoredRun, error) { + out := ScoreRecord(rec) + sig, err := canonicalSha256(rec) + if err != nil { + return ScoredRun{}, fmt.Errorf("scoredrun sig hash: %w", err) + } + return ScoredRun{ + SchemaVersion: ScoredRunSchemaVersion, + EvidenceRunID: rec.RunID, + EvidenceTaskID: rec.TaskID, + Category: out.Category, + Reasons: out.Reasons, + ScoredAt: recordedAt, + ScorerVersion: ScorerVersion, + SubScores: out.SubScores, + Provenance: Provenance{ + SourceFile: sourceFile, + LineOffset: lineOffset, + SigHash: sig, + RecordedAt: recordedAt, + }, + }, nil +} + +// canonicalSha256 hashes a value's canonical JSON encoding. Used +// for ScoredRun.Provenance.SigHash. Matches the Rust pattern of +// "hash the structured object, not the raw source bytes" so +// re-materialization with same logic produces same hash. +func canonicalSha256(v any) (string, error) { + bs, err := json.Marshal(v) + if err != nil { + return "", err + } + sum := sha256.Sum256(bs) + return hex.EncodeToString(sum[:]), nil +} + +// ─── Class A: verdict-bearing ──────────────────────────────────── + +func scoreScrumReview(r EvidenceRecord) ScoreOutput { + subs := &SubScores{} + successMarker := findPrefix(r.SuccessMarkers, "accepted_on_attempt_") + if successMarker == "" { + return ScoreOutput{ + Category: CategoryNeedsHumanReview, + Reasons: []string{"scrum_review missing accepted_on_attempt_* success marker"}, + SubScores: subs, + } + } + attemptStr := strings.TrimPrefix(successMarker, "accepted_on_attempt_") + attempt, err := strconv.Atoi(attemptStr) + if err != nil { + return ScoreOutput{ + Category: CategoryNeedsHumanReview, + Reasons: []string{"scrum_review accepted_on_attempt_* marker has non-integer suffix: " + attemptStr}, + SubScores: subs, + } + } + subs.AcceptedOnAttempt = &attempt + switch { + case attempt == 1: + return ScoreOutput{ + Category: CategoryAccepted, + Reasons: []string{"scrum: accepted on first attempt"}, + SubScores: subs, + } + case attempt <= 3: + return ScoreOutput{ + Category: CategoryPartiallyAccepted, + Reasons: []string{fmt.Sprintf("scrum: accepted after %d attempts", attempt)}, + SubScores: subs, + } + default: + return ScoreOutput{ + Category: CategoryPartiallyAccepted, + Reasons: []string{fmt.Sprintf("scrum: accepted only after %d attempts (high-cost path)", attempt)}, + SubScores: subs, + } + } +} + +func scoreObserverReview(r EvidenceRecord) ScoreOutput { + subs := &SubScores{} + switch r.ObserverVerdict { + case VerdictAccept: + subs.ObserverVerdict = VerdictAccept + return ScoreOutput{ + Category: CategoryAccepted, + Reasons: []string{"observer accepted the reviewed attempt"}, + SubScores: subs, + } + case VerdictReject: + subs.ObserverVerdict = VerdictReject + return ScoreOutput{ + Category: CategoryRejected, + Reasons: []string{"observer rejected the reviewed attempt"}, + SubScores: subs, + } + case VerdictCycle: + subs.ObserverVerdict = VerdictCycle + return ScoreOutput{ + Category: CategoryPartiallyAccepted, + Reasons: []string{"observer flagged the attempt as cycling — partial signal"}, + SubScores: subs, + } + default: + return ScoreOutput{ + Category: CategoryNeedsHumanReview, + Reasons: []string{fmt.Sprintf("observer_verdict missing or unrecognized: %q", r.ObserverVerdict)}, + SubScores: subs, + } + } +} + +func scoreAudit(r EvidenceRecord) ScoreOutput { + subs := &SubScores{} + succ := r.SuccessMarkers + fail := r.FailureMarkers + + // Legacy markers (back-compat with pre-fix materializations). + if contains(succ, "approved") { + return ScoreOutput{Category: CategoryAccepted, + Reasons: []string{"audit overall=approved (legacy marker)"}, SubScores: subs} + } + if contains(fail, "blocked") { + return ScoreOutput{Category: CategoryRejected, + Reasons: []string{"audit overall=block (legacy marker)"}, SubScores: subs} + } + if contains(fail, "request_changes") { + return ScoreOutput{Category: CategoryPartiallyAccepted, + Reasons: []string{"audit overall=request_changes (legacy marker)"}, SubScores: subs} + } + + // Severity-derived markers (Phase 2 transform). + sevSucc := findPrefix(succ, "audit_severity_") + sevFail := findPrefix(fail, "audit_severity_") + if sevSucc != "" { + return ScoreOutput{Category: CategoryAccepted, + Reasons: []string{sevSucc + " → minor finding"}, SubScores: subs} + } + if sevFail == "audit_severity_medium" { + return ScoreOutput{Category: CategoryPartiallyAccepted, + Reasons: []string{"audit_severity_medium → finding warrants review"}, SubScores: subs} + } + if sevFail == "audit_severity_high" || sevFail == "audit_severity_critical" { + return ScoreOutput{Category: CategoryRejected, + Reasons: []string{sevFail + " → blocking finding"}, SubScores: subs} + } + return ScoreOutput{Category: CategoryNeedsHumanReview, + Reasons: []string{"audit row has no severity or overall marker"}, SubScores: subs} +} + +func scoreContractAnalysis(r EvidenceRecord) ScoreOutput { + subs := &SubScores{} + // failure_markers takes precedence: explicit rejection beats absent verdict. + if contains(r.FailureMarkers, "observer_rejected") || r.ObserverVerdict == VerdictReject { + subs.ObserverVerdict = VerdictReject + return ScoreOutput{Category: CategoryRejected, + Reasons: []string{"contract analysis: observer rejected"}, SubScores: subs} + } + switch r.ObserverVerdict { + case VerdictAccept: + subs.ObserverVerdict = VerdictAccept + return ScoreOutput{Category: CategoryAccepted, + Reasons: []string{"contract analysis: observer accepted"}, SubScores: subs} + case VerdictCycle: + subs.ObserverVerdict = VerdictCycle + return ScoreOutput{Category: CategoryPartiallyAccepted, + Reasons: []string{"contract analysis: observer cycled (partial)"}, SubScores: subs} + } + return ScoreOutput{Category: CategoryNeedsHumanReview, + Reasons: []string{"contract analysis: no observer verdict signal"}, SubScores: subs} +} + +// ─── Class B: telemetry-rich ───────────────────────────────────── + +func scoreAutoApply(r EvidenceRecord) ScoreOutput { + subs := &SubScores{} + if contains(r.SuccessMarkers, "committed") { + t := true + subs.CargoGreen = &t + return ScoreOutput{Category: CategoryAccepted, + Reasons: []string{"auto_apply: patch committed (cargo green + warning baseline + rationale alignment passed)"}, + SubScores: subs} + } + reverted := findContaining(r.FailureMarkers, "reverted") + if reverted != "" { + if strings.Contains(reverted, "build_red") { + f := false + subs.CargoGreen = &f + } + return ScoreOutput{Category: CategoryRejected, + Reasons: []string{"auto_apply: " + reverted}, SubScores: subs} + } + return ScoreOutput{Category: CategoryNeedsHumanReview, + Reasons: []string{"auto_apply: no commit + no revert (likely no_patches or dry_run)"}, + SubScores: subs} +} + +func scoreOutcomes(r EvidenceRecord) ScoreOutput { + subs := &SubScores{} + if contains(r.SuccessMarkers, "all_events_ok") { + return ScoreOutput{Category: CategoryAccepted, + Reasons: []string{"outcomes: all events ok"}, SubScores: subs} + } + if gap := numericFromMap(r.ValidationResults, "gap_signals"); gap > 0 { + return ScoreOutput{Category: CategoryPartiallyAccepted, + Reasons: []string{fmt.Sprintf("outcomes: %d gap signal(s) detected", int(gap))}, + SubScores: subs} + } + return ScoreOutput{Category: CategoryNeedsHumanReview, + Reasons: []string{"outcomes: no decisive marker — defer to human"}, + SubScores: subs} +} + +func scoreModeExperiment(r EvidenceRecord) ScoreOutput { + subs := &SubScores{} + if strings.TrimSpace(r.Text) == "" { + return ScoreOutput{Category: CategoryRejected, + Reasons: []string{"mode_experiment: empty response text"}, SubScores: subs} + } + if r.LatencyMs > 120_000 { + return ScoreOutput{Category: CategoryPartiallyAccepted, + Reasons: []string{fmt.Sprintf("mode_experiment: latency %dms exceeds 2-minute soft cap", r.LatencyMs)}, + SubScores: subs} + } + return ScoreOutput{Category: CategoryNeedsHumanReview, + Reasons: []string{"mode_experiment: response present, latency within bounds; verdict not yet wired"}, + SubScores: subs} +} + +// ─── Class C: pure-extraction ──────────────────────────────────── + +func scoreExtraction() ScoreOutput { + return ScoreOutput{ + Category: CategoryNeedsHumanReview, + Reasons: []string{"extraction-class source has no native scoring signal — JOIN to parent verdict pending Phase 3 v2"}, + SubScores: &SubScores{}, + } +} + +// ─── Internal helpers ──────────────────────────────────────────── + +func contains(slice []string, want string) bool { + for _, s := range slice { + if s == want { + return true + } + } + return false +} + +func findPrefix(slice []string, prefix string) string { + for _, s := range slice { + if strings.HasPrefix(s, prefix) { + return s + } + } + return "" +} + +func findContaining(slice []string, sub string) string { + for _, s := range slice { + if strings.Contains(s, sub) { + return s + } + } + return "" +} + +func numericFromMap(m map[string]any, key string) float64 { + if m == nil { + return 0 + } + v, ok := m[key] + if !ok { + return 0 + } + switch n := v.(type) { + case int: + return float64(n) + case int64: + return float64(n) + case float32: + return float64(n) + case float64: + return n + case json.Number: + f, _ := n.Float64() + return f + } + return 0 +} diff --git a/internal/distillation/scorer_test.go b/internal/distillation/scorer_test.go new file mode 100644 index 0000000..2a1e829 --- /dev/null +++ b/internal/distillation/scorer_test.go @@ -0,0 +1,375 @@ +package distillation + +import ( + "errors" + "strings" + "testing" +) + +func mkRecord(sourceFile string) EvidenceRecord { + return EvidenceRecord{ + RunID: "run-1", + TaskID: "task-1", + Timestamp: "2026-04-29T12:00:00Z", + SchemaVersion: EvidenceSchemaVersion, + Provenance: Provenance{ + SourceFile: sourceFile, + SigHash: "deadbeef", + RecordedAt: "2026-04-29T12:00:01Z", + }, + } +} + +func TestSourceClassFor(t *testing.T) { + cases := []struct { + path string + want sourceClass + }{ + {"data/_kb/scrum_reviews.jsonl", classVerdict}, + {"data/_kb/observer_reviews.jsonl", classVerdict}, + {"data/_kb/audits.jsonl", classVerdict}, + {"data/_kb/contract_analyses.jsonl", classVerdict}, + {"data/_kb/auto_apply.jsonl", classTelemetry}, + {"data/_kb/outcomes.jsonl", classTelemetry}, + {"data/_kb/mode_experiments.jsonl", classTelemetry}, + {"data/_kb/distilled_facts.jsonl", classExtraction}, + {"data/_kb/audit_facts.jsonl", classExtraction}, + {"data/_kb/observer_escalations.jsonl", classExtraction}, + {"data/_kb/wholly_unknown.jsonl", classExtraction}, // unknown → extraction (conservative) + } + for _, c := range cases { + got := sourceClassFor(c.path) + if got != c.want { + t.Errorf("sourceClassFor(%q): want %q, got %q", c.path, c.want, got) + } + } +} + +func TestScoreScrumReview(t *testing.T) { + cases := []struct { + name string + successMarkers []string + wantCategory ScoreCategory + wantReasonSub string + }{ + { + name: "first attempt → accepted", + successMarkers: []string{"accepted_on_attempt_1"}, + wantCategory: CategoryAccepted, + wantReasonSub: "first attempt", + }, + { + name: "second attempt → partial", + successMarkers: []string{"accepted_on_attempt_2"}, + wantCategory: CategoryPartiallyAccepted, + wantReasonSub: "after 2 attempts", + }, + { + name: "fourth attempt → partial (high-cost)", + successMarkers: []string{"accepted_on_attempt_4"}, + wantCategory: CategoryPartiallyAccepted, + wantReasonSub: "high-cost", + }, + { + name: "missing marker → needs_human_review", + successMarkers: []string{}, + wantCategory: CategoryNeedsHumanReview, + wantReasonSub: "missing accepted_on_attempt", + }, + } + for _, c := range cases { + t.Run(c.name, func(t *testing.T) { + rec := mkRecord("data/_kb/scrum_reviews.jsonl") + rec.SuccessMarkers = c.successMarkers + out := ScoreRecord(rec) + if out.Category != c.wantCategory { + t.Errorf("category: want %q, got %q (reasons=%v)", c.wantCategory, out.Category, out.Reasons) + } + if !reasonsContain(out.Reasons, c.wantReasonSub) { + t.Errorf("reasons missing %q: %v", c.wantReasonSub, out.Reasons) + } + }) + } +} + +func TestScoreObserverReview(t *testing.T) { + cases := []struct { + verdict ObserverVerdict + want ScoreCategory + }{ + {VerdictAccept, CategoryAccepted}, + {VerdictReject, CategoryRejected}, + {VerdictCycle, CategoryPartiallyAccepted}, + {"", CategoryNeedsHumanReview}, + {"weird-verdict", CategoryNeedsHumanReview}, + } + for _, c := range cases { + rec := mkRecord("data/_kb/observer_reviews.jsonl") + rec.ObserverVerdict = c.verdict + out := ScoreRecord(rec) + if out.Category != c.want { + t.Errorf("verdict=%q: want %q, got %q", c.verdict, c.want, out.Category) + } + } +} + +func TestScoreAudit_LegacyAndSeverityMarkers(t *testing.T) { + cases := []struct { + name string + succ []string + fail []string + want ScoreCategory + }{ + {"legacy approved", []string{"approved"}, nil, CategoryAccepted}, + {"legacy blocked", nil, []string{"blocked"}, CategoryRejected}, + {"legacy request_changes", nil, []string{"request_changes"}, CategoryPartiallyAccepted}, + {"severity_low → accepted", []string{"audit_severity_low"}, nil, CategoryAccepted}, + {"severity_info → accepted", []string{"audit_severity_info"}, nil, CategoryAccepted}, + {"severity_medium fail → partial", nil, []string{"audit_severity_medium"}, CategoryPartiallyAccepted}, + {"severity_high → rejected", nil, []string{"audit_severity_high"}, CategoryRejected}, + {"severity_critical → rejected", nil, []string{"audit_severity_critical"}, CategoryRejected}, + {"no markers", nil, nil, CategoryNeedsHumanReview}, + } + for _, c := range cases { + t.Run(c.name, func(t *testing.T) { + rec := mkRecord("data/_kb/audits.jsonl") + rec.SuccessMarkers = c.succ + rec.FailureMarkers = c.fail + out := ScoreRecord(rec) + if out.Category != c.want { + t.Errorf("want %q, got %q (reasons=%v)", c.want, out.Category, out.Reasons) + } + }) + } +} + +func TestScoreAutoApply(t *testing.T) { + cases := []struct { + name string + succ []string + fail []string + want ScoreCategory + }{ + {"committed → accepted", []string{"committed"}, nil, CategoryAccepted}, + {"reverted_build_red → rejected", nil, []string{"reverted_build_red"}, CategoryRejected}, + {"reverted other → rejected", nil, []string{"reverted_warning_count_up"}, CategoryRejected}, + {"no signal → needs_human", nil, nil, CategoryNeedsHumanReview}, + } + for _, c := range cases { + t.Run(c.name, func(t *testing.T) { + rec := mkRecord("data/_kb/auto_apply.jsonl") + rec.SuccessMarkers = c.succ + rec.FailureMarkers = c.fail + out := ScoreRecord(rec) + if out.Category != c.want { + t.Errorf("want %q, got %q", c.want, out.Category) + } + }) + } +} + +func TestScoreOutcomes(t *testing.T) { + rec := mkRecord("data/_kb/outcomes.jsonl") + rec.SuccessMarkers = []string{"all_events_ok"} + if out := ScoreRecord(rec); out.Category != CategoryAccepted { + t.Errorf("all_events_ok: want accepted, got %q", out.Category) + } + + rec2 := mkRecord("data/_kb/outcomes.jsonl") + rec2.ValidationResults = map[string]any{"gap_signals": float64(2)} + if out := ScoreRecord(rec2); out.Category != CategoryPartiallyAccepted { + t.Errorf("gap_signals=2: want partial, got %q (reasons=%v)", out.Category, out.Reasons) + } + + rec3 := mkRecord("data/_kb/outcomes.jsonl") + if out := ScoreRecord(rec3); out.Category != CategoryNeedsHumanReview { + t.Errorf("no signal: want needs_human, got %q", out.Category) + } +} + +func TestScoreModeExperiment(t *testing.T) { + rec := mkRecord("data/_kb/mode_experiments.jsonl") + rec.Text = "" + if out := ScoreRecord(rec); out.Category != CategoryRejected { + t.Errorf("empty text: want rejected, got %q", out.Category) + } + + rec.Text = "real response" + rec.LatencyMs = 130_000 + if out := ScoreRecord(rec); out.Category != CategoryPartiallyAccepted { + t.Errorf("over latency cap: want partial, got %q", out.Category) + } + + rec.LatencyMs = 5000 + if out := ScoreRecord(rec); out.Category != CategoryNeedsHumanReview { + t.Errorf("normal: want needs_human (verdict not yet wired), got %q", out.Category) + } +} + +func TestScoreExtraction_Defaults(t *testing.T) { + for _, src := range []string{ + "data/_kb/distilled_facts.jsonl", + "data/_kb/distilled_procedures.jsonl", + "data/_kb/audit_facts.jsonl", + "data/_kb/observer_escalations.jsonl", + } { + rec := mkRecord(src) + out := ScoreRecord(rec) + if out.Category != CategoryNeedsHumanReview { + t.Errorf("%s: want needs_human_review, got %q", src, out.Category) + } + } +} + +// ─── Contamination firewall — the safety-critical guarantee ─────── + +func TestValidateSftSample_RejectsContaminationCategories(t *testing.T) { + for _, contaminated := range []SftQualityScore{ + SftQualityScore("rejected"), + SftQualityScore("needs_human_review"), + } { + s := goodSftSample() + s.QualityScore = contaminated + err := ValidateSftSample(s) + if err == nil { + t.Errorf("contaminated quality_score=%q should fail validation", contaminated) + continue + } + if !errors.Is(err, ErrSftContamination) { + t.Errorf("contaminated %q: want errors.Is(err, ErrSftContamination), got %v", contaminated, err) + } + } +} + +func TestValidateSftSample_AcceptsLegalCategories(t *testing.T) { + for _, legal := range []SftQualityScore{SftQualityAccepted, SftQualityPartiallyAccepted} { + s := goodSftSample() + s.QualityScore = legal + if err := ValidateSftSample(s); err != nil { + t.Errorf("legal quality_score=%q failed: %v", legal, err) + } + } +} + +func TestValidateSftSample_RejectsTypoCategory(t *testing.T) { + s := goodSftSample() + s.QualityScore = "approved" // close to "accepted" but wrong + err := ValidateSftSample(s) + if err == nil { + t.Fatal("typo category should fail validation") + } + // Typo is NOT contamination — should be a regular ValidationError, + // not the firewall sentinel. This distinguishes "you typo'd" from + // "you broke the spec." + if errors.Is(err, ErrSftContamination) { + t.Error("typo should not surface as ErrSftContamination") + } +} + +func TestValidateSftSample_RejectsEmptyPair(t *testing.T) { + s := goodSftSample() + s.Instruction = " " + if err := ValidateSftSample(s); err == nil { + t.Error("whitespace-only instruction should fail") + } + + s2 := goodSftSample() + s2.Response = "" + if err := ValidateSftSample(s2); err == nil { + t.Error("empty response should fail") + } +} + +func TestValidateScoredRun_ReasonsRequired(t *testing.T) { + r := ScoredRun{ + SchemaVersion: ScoredRunSchemaVersion, + EvidenceRunID: "x", + EvidenceTaskID: "y", + Category: CategoryAccepted, + Reasons: nil, // empty — must fail + ScoredAt: "2026-04-29T12:00:00Z", + ScorerVersion: ScorerVersion, + Provenance: Provenance{ + SourceFile: "data/_kb/scrum_reviews.jsonl", + SigHash: "abc", + RecordedAt: "2026-04-29T12:00:00Z", + }, + } + err := ValidateScoredRun(r) + if err == nil { + t.Fatal("empty reasons should fail") + } + if !strings.Contains(err.Error(), "reasons") { + t.Errorf("error should mention reasons: %v", err) + } +} + +func TestBuildScoredRun_DeterministicSigHash(t *testing.T) { + rec := mkRecord("data/_kb/scrum_reviews.jsonl") + rec.SuccessMarkers = []string{"accepted_on_attempt_1"} + + r1, err := BuildScoredRun(rec, "data/scored-runs/2026/04/29/x.jsonl", 0, "2026-04-29T12:00:00Z") + if err != nil { + t.Fatal(err) + } + r2, err := BuildScoredRun(rec, "data/scored-runs/2026/04/29/x.jsonl", 0, "2026-04-29T12:00:00Z") + if err != nil { + t.Fatal(err) + } + if r1.Provenance.SigHash != r2.Provenance.SigHash { + t.Errorf("identical EvidenceRecord should produce identical sig_hash: %s vs %s", + r1.Provenance.SigHash, r2.Provenance.SigHash) + } + if r1.Category != CategoryAccepted { + t.Errorf("scored category: %q", r1.Category) + } + if r1.ScorerVersion != ScorerVersion { + t.Errorf("scorer version stamped wrong: %q", r1.ScorerVersion) + } +} + +func TestScoreRecord_PureFunction_NoMutationOfInput(t *testing.T) { + // Belt-and-braces: the contract says "NO mutable state, identical + // input → identical output forever." Verify by scoring the same + // record twice and ensuring the input hasn't been touched. + rec := mkRecord("data/_kb/scrum_reviews.jsonl") + rec.SuccessMarkers = []string{"accepted_on_attempt_2"} + original := rec + out1 := ScoreRecord(rec) + out2 := ScoreRecord(rec) + if rec.RunID != original.RunID || len(rec.SuccessMarkers) != 1 { + t.Error("ScoreRecord mutated its input") + } + if out1.Category != out2.Category { + t.Error("ScoreRecord is non-deterministic") + } +} + +// ─── Helpers ───────────────────────────────────────────────────── + +func goodSftSample() SftSample { + return SftSample{ + SchemaVersion: SftSampleSchemaVersion, + ID: "sft-1", + Instruction: "summarize the diff", + Context: "diff body...", + Response: "the diff adds a function", + SourceRunID: "run-1", + QualityScore: SftQualityAccepted, + CreatedAt: "2026-04-29T12:00:00Z", + Provenance: Provenance{ + SourceFile: "data/scored-runs/2026/04/29/x.jsonl", + SigHash: "deadbeef", + RecordedAt: "2026-04-29T12:00:01Z", + }, + } +} + +func reasonsContain(reasons []string, sub string) bool { + for _, r := range reasons { + if strings.Contains(r, sub) { + return true + } + } + return false +} diff --git a/internal/distillation/types.go b/internal/distillation/types.go new file mode 100644 index 0000000..40cd761 --- /dev/null +++ b/internal/distillation/types.go @@ -0,0 +1,484 @@ +// Package distillation is the Go port of the Rust v1.0.0 distillation +// substrate (frozen at e7636f2). Per ADR-001 #4: port LOGIC, not +// bit-identical reproducibility. +// +// What this package owns (this commit): +// - The deterministic scorer: EvidenceRecord → ScoredRun +// - Score categories + scorer version constant +// - SftSample type + validator with the contamination firewall +// (the safety-critical piece — rejected/needs_human_review must +// NEVER ship to SFT) +// +// What's deferred to follow-up commits: +// - Materialization layer (file iteration, jsonl read/write, +// date-partitioned storage) — operational tooling on top of +// the scorer logic +// - export_preference, export_rag (other export shapes) +// - acceptance harness (the gate that locks v1.0.0) +// - replay, receipts, evidence-index builders +// +// The scorer + SftSample validator are the LOAD-BEARING pieces +// per project_distillation_substrate.md memory. The rest is plumbing +// that can land incrementally without changing the logic the +// downstream learning loop depends on. + +package distillation + +import ( + "encoding/json" + "errors" + "fmt" + "strings" + "time" +) + +// ScoreCategory is one of the 4 deterministic verdicts. Matches Rust +// SCORE_CATEGORIES exactly. +type ScoreCategory string + +const ( + CategoryAccepted ScoreCategory = "accepted" + CategoryPartiallyAccepted ScoreCategory = "partially_accepted" + CategoryRejected ScoreCategory = "rejected" + CategoryNeedsHumanReview ScoreCategory = "needs_human_review" +) + +// AllScoreCategories lists every legal category — used by validators. +var AllScoreCategories = []ScoreCategory{ + CategoryAccepted, + CategoryPartiallyAccepted, + CategoryRejected, + CategoryNeedsHumanReview, +} + +// ScorerVersion is hardcoded — the deterministic-output contract +// requires this. Bump the literal in the same commit as any scoring- +// rule change so the version stamp moves atomically with logic. +// Mirrors the Rust SCORER_VERSION (also v1.0.0 at e7636f2). +const ScorerVersion = "v1.0.0" + +// SftQualityScore enumerates the categories LEGAL in SFT exports. +// SFT_NEVER (defined below) is the inverse — categories that NEVER +// ship to SFT under any flag combination. The contamination firewall +// is enforced at the schema layer (ValidateSftSample) AND by the +// exporter; defense in depth. +type SftQualityScore string + +const ( + SftQualityAccepted SftQualityScore = "accepted" + SftQualityPartiallyAccepted SftQualityScore = "partially_accepted" +) + +// SftQualityScores lists quality scores legal in SFT samples. +// Default is SftQualityAccepted only; --include-partial CLI flag +// expands to both. rejected and needs_human_review are NEVER legal. +var SftQualityScores = []SftQualityScore{ + SftQualityAccepted, + SftQualityPartiallyAccepted, +} + +// SftNever is the contamination firewall: ScoreCategories that NEVER +// ship to SFT under ANY caller flag. Enforced at the schema layer +// (ValidateSftSample) AND at the exporter layer. Per the Rust +// e7636f2 spec: "Hard non-negotiable: this set never expands. If you +// find yourself adding 'needs_human_review' or 'rejected' here, stop +// — that's the contamination the spec forbids." +// +// Exported so callers AND the validator share the same source of +// truth. Modifying this constant changes the contract; reviewers +// should treat any commit that touches it as a security review. +var SftNever = []ScoreCategory{ + CategoryRejected, + CategoryNeedsHumanReview, +} + +// SftSampleSchemaVersion bumps when the on-wire SftSample shape +// changes incompatibly. Match the Rust SFT_SAMPLE_SCHEMA_VERSION. +const SftSampleSchemaVersion = 1 + +// ScoredRunSchemaVersion bumps when the on-wire ScoredRun shape +// changes incompatibly. Match the Rust SCORED_RUN_SCHEMA_VERSION. +const ScoredRunSchemaVersion = 1 + +// EvidenceSchemaVersion mirrors the Rust EVIDENCE_SCHEMA_VERSION. +// This package consumes EvidenceRecord; producing it is a separate +// concern (the materialization layer not yet ported). +const EvidenceSchemaVersion = 1 + +// ModelRole categorizes the kind of model output represented by an +// EvidenceRecord. Used by the SFT exporter to filter "real model +// output" from pure-extraction rows. +type ModelRole string + +const ( + RoleExecutor ModelRole = "executor" + RoleReviewer ModelRole = "reviewer" + RoleExtractor ModelRole = "extractor" + RoleVerifier ModelRole = "verifier" + RoleCategorizer ModelRole = "categorizer" + RoleTiebreaker ModelRole = "tiebreaker" + RoleApplier ModelRole = "applier" + RoleEmbedder ModelRole = "embedder" + RoleOther ModelRole = "other" +) + +// Provenance is the source-linkage every distillation record carries. +// SourceFile is required (no record without source linkage); other +// fields are best-effort for de-duplication and trace-back. +type Provenance struct { + SourceFile string `json:"source_file"` + LineOffset int64 `json:"line_offset,omitempty"` + SigHash string `json:"sig_hash"` + RecordedAt string `json:"recorded_at"` // ISO 8601 +} + +// ObserverVerdict is what an observer returned for an executor's +// output. Matches the Rust enum but as a string type for JSON +// flexibility. +type ObserverVerdict string + +const ( + VerdictAccept ObserverVerdict = "accept" + VerdictReject ObserverVerdict = "reject" + VerdictCycle ObserverVerdict = "cycle" +) + +// EvidenceRecord is one row in the canonical evidence stream. +// Producing it (transforms from raw KB streams) is a separate +// concern; this package consumes it. +// +// Fields mirror the Rust EvidenceRecord at e7636f2. Optional fields +// use Go pointers / slices so missing-vs-empty stays distinguishable +// for the scorer's heuristics. +type EvidenceRecord struct { + RunID string `json:"run_id"` + TaskID string `json:"task_id"` + Timestamp string `json:"timestamp"` + SchemaVersion int `json:"schema_version"` + + Provenance Provenance `json:"provenance"` + + ModelName string `json:"model_name,omitempty"` + ModelProvider string `json:"model_provider,omitempty"` + ModelRole ModelRole `json:"model_role,omitempty"` + + InputHash string `json:"input_hash,omitempty"` + OutputHash string `json:"output_hash,omitempty"` + + SourceFiles []string `json:"source_files,omitempty"` + CommandsRun []string `json:"commands_run,omitempty"` + + RetrievedContext *RetrievedContext `json:"retrieved_context,omitempty"` + + ObserverNotes []string `json:"observer_notes,omitempty"` + ObserverVerdict ObserverVerdict `json:"observer_verdict,omitempty"` + ObserverConfidence float64 `json:"observer_confidence,omitempty"` + ScratchpadSummary string `json:"scratchpad_summary,omitempty"` + + SuccessMarkers []string `json:"success_markers,omitempty"` + FailureMarkers []string `json:"failure_markers,omitempty"` + + ValidationResults map[string]any `json:"validation_results,omitempty"` + + HumanOverride *HumanOverride `json:"human_override,omitempty"` + + CostUSD float64 `json:"cost_usd,omitempty"` + LatencyMs int64 `json:"latency_ms,omitempty"` + Text string `json:"text,omitempty"` +} + +// RetrievedContext captures what the model saw via retrieval. Matches +// the Rust shape exactly so the JSON round-trips byte-identical (per +// ADR-001 #4 "logic, not bit-identical" — but on-wire compatibility +// is desirable for tooling that consumes EvidenceRecord JSONL). +type RetrievedContext struct { + MatrixCorpora []string `json:"matrix_corpora,omitempty"` + MatrixHits int `json:"matrix_hits,omitempty"` + MatrixChunksKept int `json:"matrix_chunks_kept,omitempty"` + MatrixChunksDropped int `json:"matrix_chunks_dropped,omitempty"` + PathwayFingerprintsSeen int `json:"pathway_fingerprints_seen,omitempty"` +} + +// HumanOverride captures a human-in-the-loop decision overriding the +// scorer's verdict. Recorded but doesn't change the scorer's output; +// downstream consumers (UI, distillation acceptance) decide how to +// treat it. +type HumanOverride struct { + Overrider string `json:"overrider"` + Decision string `json:"decision"` // accept|reject|needs_review + Reason string `json:"reason"` + OverriddenAt string `json:"overridden_at"` +} + +// SubScores carries the deterministic scorer's intermediate signals +// alongside the final ScoreCategory. Persisted on every ScoredRun +// so a downstream UI can show "why" without re-running the scorer. +type SubScores struct { + CargoGreen *bool `json:"cargo_green,omitempty"` + AnchorGrounding *float64 `json:"anchor_grounding,omitempty"` + SchemaValid *bool `json:"schema_valid,omitempty"` + PathwayReplaySucceeded *bool `json:"pathway_replay_succeeded,omitempty"` + ObserverVerdict ObserverVerdict `json:"observer_verdict,omitempty"` + AcceptedOnAttempt *int `json:"accepted_on_attempt,omitempty"` + // Extra fields the Rust schema accepted as `[key: string]: unknown`. + // Captured here as a free-form map so future signals don't require + // type-system changes. + Extras map[string]any `json:"-"` +} + +// ScoredRun is the deterministic scorer's output. One per +// EvidenceRecord. Provenance ties back to the materialized evidence +// row (not the raw source stream). +type ScoredRun struct { + SchemaVersion int `json:"schema_version"` + EvidenceRunID string `json:"evidence_run_id"` + EvidenceTaskID string `json:"evidence_task_id"` + Category ScoreCategory `json:"category"` + Reasons []string `json:"reasons"` // non-empty + ScoredAt string `json:"scored_at"` + ScorerVersion string `json:"scorer_version"` + SubScores *SubScores `json:"sub_scores,omitempty"` + Provenance Provenance `json:"provenance"` +} + +// SftSample is one entry in exports/sft/instruction_response.jsonl. +// The contamination firewall lives in ValidateSftSample. +type SftSample struct { + SchemaVersion int `json:"schema_version"` + ID string `json:"id"` + Instruction string `json:"instruction"` + Context string `json:"context"` // empty allowed; null/missing not + Response string `json:"response"` + SourceRunID string `json:"source_run_id"` + QualityScore SftQualityScore `json:"quality_score"` + CreatedAt string `json:"created_at"` + Provenance Provenance `json:"provenance"` +} + +// ─── Validators ────────────────────────────────────────────────── + +// ValidationError is a single field-level violation. +type ValidationError struct { + Field string + Message string +} + +func (e ValidationError) Error() string { + return fmt.Sprintf("%s: %s", e.Field, e.Message) +} + +// ValidationErrors is the joinable error returned by the validators +// when one or more fields violate the schema. +type ValidationErrors []ValidationError + +func (es ValidationErrors) Error() string { + if len(es) == 0 { + return "no errors" + } + parts := make([]string, len(es)) + for i, e := range es { + parts[i] = e.Error() + } + return strings.Join(parts, "; ") +} + +// HasErrors returns true when one or more errors are present. +func (es ValidationErrors) HasErrors() bool { return len(es) > 0 } + +// ValidateScoredRun mirrors the Rust validateScoredRun. Returns nil +// on success or a ValidationErrors with the field-level violations. +func ValidateScoredRun(r ScoredRun) error { + var errs ValidationErrors + if r.SchemaVersion != ScoredRunSchemaVersion { + errs = append(errs, ValidationError{ + "schema_version", + fmt.Sprintf("expected %d, got %d", ScoredRunSchemaVersion, r.SchemaVersion), + }) + } + if r.EvidenceRunID == "" { + errs = append(errs, ValidationError{"evidence_run_id", "must be non-empty"}) + } + if r.EvidenceTaskID == "" { + errs = append(errs, ValidationError{"evidence_task_id", "must be non-empty"}) + } + if !validISOTimestamp(r.ScoredAt) { + errs = append(errs, ValidationError{"scored_at", "must be ISO 8601 timestamp"}) + } + if r.ScorerVersion == "" { + errs = append(errs, ValidationError{"scorer_version", "must be non-empty"}) + } + if len(r.Reasons) == 0 { + errs = append(errs, ValidationError{"reasons", "must be non-empty (every score needs a reason)"}) + } + if !isValidCategory(r.Category) { + errs = append(errs, ValidationError{"category", fmt.Sprintf("must be one of %v, got %q", AllScoreCategories, r.Category)}) + } + if err := validateProvenance(r.Provenance, "provenance"); err != nil { + errs = append(errs, err...) + } + if r.SubScores != nil && r.SubScores.AnchorGrounding != nil { + ag := *r.SubScores.AnchorGrounding + if ag < 0 || ag > 1 { + errs = append(errs, ValidationError{"sub_scores.anchor_grounding", "must be in [0, 1]"}) + } + } + if errs.HasErrors() { + return errs + } + return nil +} + +// ValidateSftSample is the contamination firewall. Returns ErrSftContamination +// (wrapped) when quality_score is in SftNever — which is the safety-critical +// guarantee the spec calls non-negotiable. +// +// Other field violations come back as ValidationErrors. +func ValidateSftSample(s SftSample) error { + var errs ValidationErrors + if s.SchemaVersion != SftSampleSchemaVersion { + errs = append(errs, ValidationError{ + "schema_version", + fmt.Sprintf("expected %d, got %d", SftSampleSchemaVersion, s.SchemaVersion), + }) + } + if s.ID == "" { + errs = append(errs, ValidationError{"id", "must be non-empty"}) + } + if strings.TrimSpace(s.Instruction) == "" { + errs = append(errs, ValidationError{"instruction", "must be non-whitespace (no empty pairs)"}) + } + if strings.TrimSpace(s.Response) == "" { + errs = append(errs, ValidationError{"response", "must be non-whitespace (no empty pairs)"}) + } + // Context is required-string but empty is allowed. + // (Field is always typed as string in Go, so the only way to + // distinguish "set" from "missing" was via the JSON layer; here + // empty is fine.) + if s.SourceRunID == "" { + errs = append(errs, ValidationError{"source_run_id", "must be non-empty"}) + } + if !validISOTimestamp(s.CreatedAt) { + errs = append(errs, ValidationError{"created_at", "must be ISO 8601 timestamp"}) + } + if err := validateProvenance(s.Provenance, "provenance"); err != nil { + errs = append(errs, err...) + } + + // Contamination firewall. Hard non-negotiable per the spec. + if !isLegalSftQualityScore(s.QualityScore) { + // If it's in SftNever, surface the firewall sentinel — callers + // can errors.Is(err, ErrSftContamination) to reliably detect + // "the spec said never" as opposed to "you typo'd a category." + if isContaminationCategory(s.QualityScore) { + return fmt.Errorf("%w: quality_score %q in SftNever (rejected/needs_human_review never legal in SFT)", + ErrSftContamination, s.QualityScore) + } + errs = append(errs, ValidationError{ + "quality_score", + fmt.Sprintf("must be one of %v, got %q", SftQualityScores, s.QualityScore), + }) + } + + if errs.HasErrors() { + return errs + } + return nil +} + +// ErrSftContamination is the firewall sentinel — when ValidateSftSample +// rejects a sample because its quality_score is in SftNever, callers +// can errors.Is(err, ErrSftContamination) to reliably distinguish +// "spec violation" from "typo'd category." +var ErrSftContamination = errors.New("distillation: SFT contamination — quality_score in SftNever") + +// ─── Internal helpers ──────────────────────────────────────────── + +func isValidCategory(c ScoreCategory) bool { + for _, v := range AllScoreCategories { + if c == v { + return true + } + } + return false +} + +func isLegalSftQualityScore(q SftQualityScore) bool { + for _, v := range SftQualityScores { + if q == v { + return true + } + } + return false +} + +func isContaminationCategory(q SftQualityScore) bool { + // Compare as ScoreCategory — the on-wire string is the same; this + // just guards the firewall against typos that happen to match + // SftNever string-wise. + for _, v := range SftNever { + if string(v) == string(q) { + return true + } + } + return false +} + +func validISOTimestamp(s string) bool { + if s == "" { + return false + } + // time.Parse with RFC3339 covers most ISO 8601. We accept both + // the basic and nano variants since the Rust producers vary. + if _, err := time.Parse(time.RFC3339, s); err == nil { + return true + } + if _, err := time.Parse(time.RFC3339Nano, s); err == nil { + return true + } + return false +} + +func validateProvenance(p Provenance, field string) ValidationErrors { + var errs ValidationErrors + if p.SourceFile == "" { + errs = append(errs, ValidationError{field + ".source_file", "must be non-empty"}) + } + if p.SigHash == "" { + errs = append(errs, ValidationError{field + ".sig_hash", "must be non-empty"}) + } + if !validISOTimestamp(p.RecordedAt) { + errs = append(errs, ValidationError{field + ".recorded_at", "must be ISO 8601 timestamp"}) + } + return errs +} + +// MarshalSubScores is a shim — Go's encoding/json doesn't merge a +// "rest" map into the struct's JSON output by default. Callers that +// need Extras serialized into the same object can use this helper. +func MarshalSubScores(s *SubScores) ([]byte, error) { + if s == nil { + return []byte("null"), nil + } + // First marshal the typed fields normally. + type alias SubScores + base, err := json.Marshal((*alias)(s)) + if err != nil { + return nil, err + } + if len(s.Extras) == 0 { + return base, nil + } + // Decode back to a map, merge Extras, re-encode. Less efficient + // but keeps the field semantics correct (typed fields override + // extras on collision — first-write-wins for known keys). + var combined map[string]any + if err := json.Unmarshal(base, &combined); err != nil { + return nil, err + } + for k, v := range s.Extras { + if _, exists := combined[k]; !exists { + combined[k] = v + } + } + return json.Marshal(combined) +}