package distillation // scorer.go — pure deterministic Success Scorer (port of Rust // scripts/distillation/scorer.ts at e7636f2). // // Takes one EvidenceRecord, returns category + reasons + sub_scores. // NO I/O, NO LLM, NO clock reads, NO mutable state. Identical input // → identical output forever. Same contract as the Rust source — // future scoring-rule changes bump ScorerVersion atomically with // the logic. // // Three-class strategy mirrors the Rust source taxonomy // (docs/recon/local-distillation-recon.md + data/_kb/evidence_health.md): // // CLASS A — verdict-bearing // scrum_reviews, observer_reviews, audits, contract_analyses // Direct scoring from existing markers / observer_verdict // // CLASS B — telemetry-rich // auto_apply, outcomes, mode_experiments // Markers exist but partial; needs_human_review fills the gap // // CLASS C — pure-extraction (no native scoring signal) // distilled_*, audit_facts, observer_escalations // Default needs_human_review; v2 will JOIN to parent verdict import ( "crypto/sha256" "encoding/hex" "encoding/json" "fmt" "strconv" "strings" ) // sourceClass categorizes an EvidenceRecord's source_file for the // scorer's three-class dispatch. type sourceClass string const ( classVerdict sourceClass = "verdict" classTelemetry sourceClass = "telemetry" classExtraction sourceClass = "extraction" ) // sourceClassFor maps a source_file (from provenance) to a class. // Centralized so adding a new source is a one-line change. Mirrors // the Rust switch on the stem (data/_kb/X.jsonl → X). func sourceClassFor(sourceFile string) sourceClass { stem := strings.TrimSuffix(strings.TrimPrefix(sourceFile, "data/_kb/"), ".jsonl") switch stem { case "scrum_reviews", "observer_reviews", "audits", "contract_analyses": return classVerdict case "auto_apply", "outcomes", "mode_experiments": return classTelemetry case "distilled_facts", "distilled_procedures", "distilled_config_hints", "audit_facts", "observer_escalations": return classExtraction default: // Unknown source → most conservative path (forces // needs_human_review until a transform is added). return classExtraction } } // stemOf extracts the stable corpus identifier from a source_file. // E.g. "data/_kb/scrum_reviews.jsonl" → "scrum_reviews". func stemOf(sourceFile string) string { return strings.TrimSuffix(strings.TrimPrefix(sourceFile, "data/_kb/"), ".jsonl") } // ScoreOutput is the scorer's return shape — category + reasons + // the captured sub-signals. Reasons is always non-empty (validator // requires it). type ScoreOutput struct { Category ScoreCategory Reasons []string SubScores *SubScores } // ScoreRecord dispatches an EvidenceRecord to the appropriate class // scorer and returns the verdict + reasons + sub-scores. Pure // function. Caller wraps the output in a ScoredRun via BuildScoredRun // for the on-wire shape. func ScoreRecord(rec EvidenceRecord) ScoreOutput { cls := sourceClassFor(rec.Provenance.SourceFile) stem := stemOf(rec.Provenance.SourceFile) switch cls { case classVerdict: switch stem { case "scrum_reviews": return scoreScrumReview(rec) case "observer_reviews": return scoreObserverReview(rec) case "audits": return scoreAudit(rec) case "contract_analyses": return scoreContractAnalysis(rec) } case classTelemetry: switch stem { case "auto_apply": return scoreAutoApply(rec) case "outcomes": return scoreOutcomes(rec) case "mode_experiments": return scoreModeExperiment(rec) } } return scoreExtraction() } // BuildScoredRun composes a complete ScoredRun for persistence. // Caller supplies recorded_at + the source file path/line offset. // SigHash is computed deterministically from the EvidenceRecord // JSON; ScoredRun traces to the materialized evidence row. func BuildScoredRun(rec EvidenceRecord, sourceFile string, lineOffset int64, recordedAt string) (ScoredRun, error) { out := ScoreRecord(rec) sig, err := canonicalSha256(rec) if err != nil { return ScoredRun{}, fmt.Errorf("scoredrun sig hash: %w", err) } return ScoredRun{ SchemaVersion: ScoredRunSchemaVersion, EvidenceRunID: rec.RunID, EvidenceTaskID: rec.TaskID, Category: out.Category, Reasons: out.Reasons, ScoredAt: recordedAt, ScorerVersion: ScorerVersion, SubScores: out.SubScores, Provenance: Provenance{ SourceFile: sourceFile, LineOffset: lineOffset, SigHash: sig, RecordedAt: recordedAt, }, }, nil } // canonicalSha256 hashes a value's canonical JSON encoding. Used // for ScoredRun.Provenance.SigHash. Matches the Rust pattern of // "hash the structured object, not the raw source bytes" so // re-materialization with same logic produces same hash. func canonicalSha256(v any) (string, error) { bs, err := json.Marshal(v) if err != nil { return "", err } sum := sha256.Sum256(bs) return hex.EncodeToString(sum[:]), nil } // ─── Class A: verdict-bearing ──────────────────────────────────── func scoreScrumReview(r EvidenceRecord) ScoreOutput { subs := &SubScores{} successMarker := findPrefix(r.SuccessMarkers, "accepted_on_attempt_") if successMarker == "" { return ScoreOutput{ Category: CategoryNeedsHumanReview, Reasons: []string{"scrum_review missing accepted_on_attempt_* success marker"}, SubScores: subs, } } attemptStr := strings.TrimPrefix(successMarker, "accepted_on_attempt_") attempt, err := strconv.Atoi(attemptStr) if err != nil { return ScoreOutput{ Category: CategoryNeedsHumanReview, Reasons: []string{"scrum_review accepted_on_attempt_* marker has non-integer suffix: " + attemptStr}, SubScores: subs, } } subs.AcceptedOnAttempt = &attempt switch { case attempt == 1: return ScoreOutput{ Category: CategoryAccepted, Reasons: []string{"scrum: accepted on first attempt"}, SubScores: subs, } case attempt <= 3: return ScoreOutput{ Category: CategoryPartiallyAccepted, Reasons: []string{fmt.Sprintf("scrum: accepted after %d attempts", attempt)}, SubScores: subs, } default: return ScoreOutput{ Category: CategoryPartiallyAccepted, Reasons: []string{fmt.Sprintf("scrum: accepted only after %d attempts (high-cost path)", attempt)}, SubScores: subs, } } } func scoreObserverReview(r EvidenceRecord) ScoreOutput { subs := &SubScores{} switch r.ObserverVerdict { case VerdictAccept: subs.ObserverVerdict = VerdictAccept return ScoreOutput{ Category: CategoryAccepted, Reasons: []string{"observer accepted the reviewed attempt"}, SubScores: subs, } case VerdictReject: subs.ObserverVerdict = VerdictReject return ScoreOutput{ Category: CategoryRejected, Reasons: []string{"observer rejected the reviewed attempt"}, SubScores: subs, } case VerdictCycle: subs.ObserverVerdict = VerdictCycle return ScoreOutput{ Category: CategoryPartiallyAccepted, Reasons: []string{"observer flagged the attempt as cycling — partial signal"}, SubScores: subs, } default: return ScoreOutput{ Category: CategoryNeedsHumanReview, Reasons: []string{fmt.Sprintf("observer_verdict missing or unrecognized: %q", r.ObserverVerdict)}, SubScores: subs, } } } func scoreAudit(r EvidenceRecord) ScoreOutput { subs := &SubScores{} succ := r.SuccessMarkers fail := r.FailureMarkers // Legacy markers (back-compat with pre-fix materializations). if contains(succ, "approved") { return ScoreOutput{Category: CategoryAccepted, Reasons: []string{"audit overall=approved (legacy marker)"}, SubScores: subs} } if contains(fail, "blocked") { return ScoreOutput{Category: CategoryRejected, Reasons: []string{"audit overall=block (legacy marker)"}, SubScores: subs} } if contains(fail, "request_changes") { return ScoreOutput{Category: CategoryPartiallyAccepted, Reasons: []string{"audit overall=request_changes (legacy marker)"}, SubScores: subs} } // Severity-derived markers (Phase 2 transform). sevSucc := findPrefix(succ, "audit_severity_") sevFail := findPrefix(fail, "audit_severity_") if sevSucc != "" { return ScoreOutput{Category: CategoryAccepted, Reasons: []string{sevSucc + " → minor finding"}, SubScores: subs} } if sevFail == "audit_severity_medium" { return ScoreOutput{Category: CategoryPartiallyAccepted, Reasons: []string{"audit_severity_medium → finding warrants review"}, SubScores: subs} } if sevFail == "audit_severity_high" || sevFail == "audit_severity_critical" { return ScoreOutput{Category: CategoryRejected, Reasons: []string{sevFail + " → blocking finding"}, SubScores: subs} } return ScoreOutput{Category: CategoryNeedsHumanReview, Reasons: []string{"audit row has no severity or overall marker"}, SubScores: subs} } func scoreContractAnalysis(r EvidenceRecord) ScoreOutput { subs := &SubScores{} // failure_markers takes precedence: explicit rejection beats absent verdict. if contains(r.FailureMarkers, "observer_rejected") || r.ObserverVerdict == VerdictReject { subs.ObserverVerdict = VerdictReject return ScoreOutput{Category: CategoryRejected, Reasons: []string{"contract analysis: observer rejected"}, SubScores: subs} } switch r.ObserverVerdict { case VerdictAccept: subs.ObserverVerdict = VerdictAccept return ScoreOutput{Category: CategoryAccepted, Reasons: []string{"contract analysis: observer accepted"}, SubScores: subs} case VerdictCycle: subs.ObserverVerdict = VerdictCycle return ScoreOutput{Category: CategoryPartiallyAccepted, Reasons: []string{"contract analysis: observer cycled (partial)"}, SubScores: subs} } return ScoreOutput{Category: CategoryNeedsHumanReview, Reasons: []string{"contract analysis: no observer verdict signal"}, SubScores: subs} } // ─── Class B: telemetry-rich ───────────────────────────────────── func scoreAutoApply(r EvidenceRecord) ScoreOutput { subs := &SubScores{} if contains(r.SuccessMarkers, "committed") { t := true subs.CargoGreen = &t return ScoreOutput{Category: CategoryAccepted, Reasons: []string{"auto_apply: patch committed (cargo green + warning baseline + rationale alignment passed)"}, SubScores: subs} } reverted := findContaining(r.FailureMarkers, "reverted") if reverted != "" { if strings.Contains(reverted, "build_red") { f := false subs.CargoGreen = &f } return ScoreOutput{Category: CategoryRejected, Reasons: []string{"auto_apply: " + reverted}, SubScores: subs} } return ScoreOutput{Category: CategoryNeedsHumanReview, Reasons: []string{"auto_apply: no commit + no revert (likely no_patches or dry_run)"}, SubScores: subs} } func scoreOutcomes(r EvidenceRecord) ScoreOutput { subs := &SubScores{} if contains(r.SuccessMarkers, "all_events_ok") { return ScoreOutput{Category: CategoryAccepted, Reasons: []string{"outcomes: all events ok"}, SubScores: subs} } if gap := numericFromMap(r.ValidationResults, "gap_signals"); gap > 0 { return ScoreOutput{Category: CategoryPartiallyAccepted, Reasons: []string{fmt.Sprintf("outcomes: %d gap signal(s) detected", int(gap))}, SubScores: subs} } return ScoreOutput{Category: CategoryNeedsHumanReview, Reasons: []string{"outcomes: no decisive marker — defer to human"}, SubScores: subs} } func scoreModeExperiment(r EvidenceRecord) ScoreOutput { subs := &SubScores{} if strings.TrimSpace(r.Text) == "" { return ScoreOutput{Category: CategoryRejected, Reasons: []string{"mode_experiment: empty response text"}, SubScores: subs} } if r.LatencyMs > 120_000 { return ScoreOutput{Category: CategoryPartiallyAccepted, Reasons: []string{fmt.Sprintf("mode_experiment: latency %dms exceeds 2-minute soft cap", r.LatencyMs)}, SubScores: subs} } return ScoreOutput{Category: CategoryNeedsHumanReview, Reasons: []string{"mode_experiment: response present, latency within bounds; verdict not yet wired"}, SubScores: subs} } // ─── Class C: pure-extraction ──────────────────────────────────── func scoreExtraction() ScoreOutput { return ScoreOutput{ Category: CategoryNeedsHumanReview, Reasons: []string{"extraction-class source has no native scoring signal — JOIN to parent verdict pending Phase 3 v2"}, SubScores: &SubScores{}, } } // ─── Internal helpers ──────────────────────────────────────────── func contains(slice []string, want string) bool { for _, s := range slice { if s == want { return true } } return false } func findPrefix(slice []string, prefix string) string { for _, s := range slice { if strings.HasPrefix(s, prefix) { return s } } return "" } func findContaining(slice []string, sub string) string { for _, s := range slice { if strings.Contains(s, sub) { return s } } return "" } func numericFromMap(m map[string]any, key string) float64 { if m == nil { return 0 } v, ok := m[key] if !ok { return 0 } switch n := v.(type) { case int: return float64(n) case int64: return float64(n) case float32: return float64(n) case float64: return n case json.Number: f, _ := n.Float64() return f } return 0 }