root 57d0df125d E (partial): distillation port — scorer + contamination firewall
First slice of the Rust v1.0.0 distillation substrate (e7636f2)
ported to Go per ADR-001 #4 (port LOGIC, not bit-identical
reproducibility). This commit lands the LOAD-BEARING pieces named
in project_distillation_substrate.md memory:

  - The deterministic Success Scorer (8 sub-scorers + dispatch)
  - The contamination firewall on SFT samples (the "non-negotiable"
    spec property: rejected/needs_human_review NEVER ship to SFT)
  - All on-wire types + validators for ScoredRun, SftSample,
    EvidenceRecord with Provenance

Files:
  internal/distillation/types.go  — types + ScorerVersion + SftNever
                                    + ValidateScoredRun + ValidateSftSample
  internal/distillation/scorer.go — ScoreRecord + 8 class scorers +
                                    BuildScoredRun (deterministic)
  internal/distillation/scorer_test.go — ~40 test cases:
    - source-class dispatch (verdict / telemetry / extraction)
    - scrum_review (4 attempt cases)
    - observer_review (5 verdict cases)
    - audit (legacy + severity, 9 cases)
    - auto_apply (4 cases)
    - outcomes / mode_experiment / extraction
    - CONTAMINATION FIREWALL: ErrSftContamination sentinel fires
      on rejected/needs_human_review, distinct from typo errors
    - empty-pair guard (instruction/response trim != "")
    - reasons-required ScoredRun validation
    - deterministic sig_hash on identical input
    - purity check (input not mutated, repeatable output)

Per the 2026-04-29 cross-lineage scrum's discipline: false-positive
findings would be dismissed inline (none in this commit). Real
findings would be addressed before merge — but this is greenfield
port code reviewed against its Rust source line-by-line, which the
test suite encodes as truth tables.

Explicitly DEFERRED to follow-up commits:
  - Materialization layer (jsonl read/write, date-partitioned
    storage in data/scored-runs/YYYY/MM/DD/, evidence index)
  - SFT exporter (file iteration + filtering — the SCORING firewall
    is here; the EXPORT firewall is the next layer)
  - export_preference, export_rag (other export shapes)
  - Acceptance harness (16/16 acceptance gate that locks v1.0.0)
  - replay, receipts, build_evidence_index, transforms

The scorer + firewall validator are pure functions — operational
tooling layers on top without changing the deterministic logic the
downstream learning loop depends on. The Go ScorerVersion stays at
v1.0.0 to match the Rust e7636f2 baseline; bumping in the Go
materialization commit is reserved for the next scoring-rule
change, NOT the port itself.

15-smoke regression all green. vet clean.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-29 20:04:29 -05:00

411 lines
13 KiB
Go

package distillation
// scorer.go — pure deterministic Success Scorer (port of Rust
// scripts/distillation/scorer.ts at e7636f2).
//
// Takes one EvidenceRecord, returns category + reasons + sub_scores.
// NO I/O, NO LLM, NO clock reads, NO mutable state. Identical input
// → identical output forever. Same contract as the Rust source —
// future scoring-rule changes bump ScorerVersion atomically with
// the logic.
//
// Three-class strategy mirrors the Rust source taxonomy
// (docs/recon/local-distillation-recon.md + data/_kb/evidence_health.md):
//
// CLASS A — verdict-bearing
// scrum_reviews, observer_reviews, audits, contract_analyses
// Direct scoring from existing markers / observer_verdict
//
// CLASS B — telemetry-rich
// auto_apply, outcomes, mode_experiments
// Markers exist but partial; needs_human_review fills the gap
//
// CLASS C — pure-extraction (no native scoring signal)
// distilled_*, audit_facts, observer_escalations
// Default needs_human_review; v2 will JOIN to parent verdict
import (
"crypto/sha256"
"encoding/hex"
"encoding/json"
"fmt"
"strconv"
"strings"
)
// sourceClass categorizes an EvidenceRecord's source_file for the
// scorer's three-class dispatch.
type sourceClass string
const (
classVerdict sourceClass = "verdict"
classTelemetry sourceClass = "telemetry"
classExtraction sourceClass = "extraction"
)
// sourceClassFor maps a source_file (from provenance) to a class.
// Centralized so adding a new source is a one-line change. Mirrors
// the Rust switch on the stem (data/_kb/X.jsonl → X).
func sourceClassFor(sourceFile string) sourceClass {
stem := strings.TrimSuffix(strings.TrimPrefix(sourceFile, "data/_kb/"), ".jsonl")
switch stem {
case "scrum_reviews", "observer_reviews", "audits", "contract_analyses":
return classVerdict
case "auto_apply", "outcomes", "mode_experiments":
return classTelemetry
case "distilled_facts", "distilled_procedures", "distilled_config_hints",
"audit_facts", "observer_escalations":
return classExtraction
default:
// Unknown source → most conservative path (forces
// needs_human_review until a transform is added).
return classExtraction
}
}
// stemOf extracts the stable corpus identifier from a source_file.
// E.g. "data/_kb/scrum_reviews.jsonl" → "scrum_reviews".
func stemOf(sourceFile string) string {
return strings.TrimSuffix(strings.TrimPrefix(sourceFile, "data/_kb/"), ".jsonl")
}
// ScoreOutput is the scorer's return shape — category + reasons +
// the captured sub-signals. Reasons is always non-empty (validator
// requires it).
type ScoreOutput struct {
Category ScoreCategory
Reasons []string
SubScores *SubScores
}
// ScoreRecord dispatches an EvidenceRecord to the appropriate class
// scorer and returns the verdict + reasons + sub-scores. Pure
// function. Caller wraps the output in a ScoredRun via BuildScoredRun
// for the on-wire shape.
func ScoreRecord(rec EvidenceRecord) ScoreOutput {
cls := sourceClassFor(rec.Provenance.SourceFile)
stem := stemOf(rec.Provenance.SourceFile)
switch cls {
case classVerdict:
switch stem {
case "scrum_reviews":
return scoreScrumReview(rec)
case "observer_reviews":
return scoreObserverReview(rec)
case "audits":
return scoreAudit(rec)
case "contract_analyses":
return scoreContractAnalysis(rec)
}
case classTelemetry:
switch stem {
case "auto_apply":
return scoreAutoApply(rec)
case "outcomes":
return scoreOutcomes(rec)
case "mode_experiments":
return scoreModeExperiment(rec)
}
}
return scoreExtraction()
}
// BuildScoredRun composes a complete ScoredRun for persistence.
// Caller supplies recorded_at + the source file path/line offset.
// SigHash is computed deterministically from the EvidenceRecord
// JSON; ScoredRun traces to the materialized evidence row.
func BuildScoredRun(rec EvidenceRecord, sourceFile string, lineOffset int64, recordedAt string) (ScoredRun, error) {
out := ScoreRecord(rec)
sig, err := canonicalSha256(rec)
if err != nil {
return ScoredRun{}, fmt.Errorf("scoredrun sig hash: %w", err)
}
return ScoredRun{
SchemaVersion: ScoredRunSchemaVersion,
EvidenceRunID: rec.RunID,
EvidenceTaskID: rec.TaskID,
Category: out.Category,
Reasons: out.Reasons,
ScoredAt: recordedAt,
ScorerVersion: ScorerVersion,
SubScores: out.SubScores,
Provenance: Provenance{
SourceFile: sourceFile,
LineOffset: lineOffset,
SigHash: sig,
RecordedAt: recordedAt,
},
}, nil
}
// canonicalSha256 hashes a value's canonical JSON encoding. Used
// for ScoredRun.Provenance.SigHash. Matches the Rust pattern of
// "hash the structured object, not the raw source bytes" so
// re-materialization with same logic produces same hash.
func canonicalSha256(v any) (string, error) {
bs, err := json.Marshal(v)
if err != nil {
return "", err
}
sum := sha256.Sum256(bs)
return hex.EncodeToString(sum[:]), nil
}
// ─── Class A: verdict-bearing ────────────────────────────────────
func scoreScrumReview(r EvidenceRecord) ScoreOutput {
subs := &SubScores{}
successMarker := findPrefix(r.SuccessMarkers, "accepted_on_attempt_")
if successMarker == "" {
return ScoreOutput{
Category: CategoryNeedsHumanReview,
Reasons: []string{"scrum_review missing accepted_on_attempt_* success marker"},
SubScores: subs,
}
}
attemptStr := strings.TrimPrefix(successMarker, "accepted_on_attempt_")
attempt, err := strconv.Atoi(attemptStr)
if err != nil {
return ScoreOutput{
Category: CategoryNeedsHumanReview,
Reasons: []string{"scrum_review accepted_on_attempt_* marker has non-integer suffix: " + attemptStr},
SubScores: subs,
}
}
subs.AcceptedOnAttempt = &attempt
switch {
case attempt == 1:
return ScoreOutput{
Category: CategoryAccepted,
Reasons: []string{"scrum: accepted on first attempt"},
SubScores: subs,
}
case attempt <= 3:
return ScoreOutput{
Category: CategoryPartiallyAccepted,
Reasons: []string{fmt.Sprintf("scrum: accepted after %d attempts", attempt)},
SubScores: subs,
}
default:
return ScoreOutput{
Category: CategoryPartiallyAccepted,
Reasons: []string{fmt.Sprintf("scrum: accepted only after %d attempts (high-cost path)", attempt)},
SubScores: subs,
}
}
}
func scoreObserverReview(r EvidenceRecord) ScoreOutput {
subs := &SubScores{}
switch r.ObserverVerdict {
case VerdictAccept:
subs.ObserverVerdict = VerdictAccept
return ScoreOutput{
Category: CategoryAccepted,
Reasons: []string{"observer accepted the reviewed attempt"},
SubScores: subs,
}
case VerdictReject:
subs.ObserverVerdict = VerdictReject
return ScoreOutput{
Category: CategoryRejected,
Reasons: []string{"observer rejected the reviewed attempt"},
SubScores: subs,
}
case VerdictCycle:
subs.ObserverVerdict = VerdictCycle
return ScoreOutput{
Category: CategoryPartiallyAccepted,
Reasons: []string{"observer flagged the attempt as cycling — partial signal"},
SubScores: subs,
}
default:
return ScoreOutput{
Category: CategoryNeedsHumanReview,
Reasons: []string{fmt.Sprintf("observer_verdict missing or unrecognized: %q", r.ObserverVerdict)},
SubScores: subs,
}
}
}
func scoreAudit(r EvidenceRecord) ScoreOutput {
subs := &SubScores{}
succ := r.SuccessMarkers
fail := r.FailureMarkers
// Legacy markers (back-compat with pre-fix materializations).
if contains(succ, "approved") {
return ScoreOutput{Category: CategoryAccepted,
Reasons: []string{"audit overall=approved (legacy marker)"}, SubScores: subs}
}
if contains(fail, "blocked") {
return ScoreOutput{Category: CategoryRejected,
Reasons: []string{"audit overall=block (legacy marker)"}, SubScores: subs}
}
if contains(fail, "request_changes") {
return ScoreOutput{Category: CategoryPartiallyAccepted,
Reasons: []string{"audit overall=request_changes (legacy marker)"}, SubScores: subs}
}
// Severity-derived markers (Phase 2 transform).
sevSucc := findPrefix(succ, "audit_severity_")
sevFail := findPrefix(fail, "audit_severity_")
if sevSucc != "" {
return ScoreOutput{Category: CategoryAccepted,
Reasons: []string{sevSucc + " → minor finding"}, SubScores: subs}
}
if sevFail == "audit_severity_medium" {
return ScoreOutput{Category: CategoryPartiallyAccepted,
Reasons: []string{"audit_severity_medium → finding warrants review"}, SubScores: subs}
}
if sevFail == "audit_severity_high" || sevFail == "audit_severity_critical" {
return ScoreOutput{Category: CategoryRejected,
Reasons: []string{sevFail + " → blocking finding"}, SubScores: subs}
}
return ScoreOutput{Category: CategoryNeedsHumanReview,
Reasons: []string{"audit row has no severity or overall marker"}, SubScores: subs}
}
func scoreContractAnalysis(r EvidenceRecord) ScoreOutput {
subs := &SubScores{}
// failure_markers takes precedence: explicit rejection beats absent verdict.
if contains(r.FailureMarkers, "observer_rejected") || r.ObserverVerdict == VerdictReject {
subs.ObserverVerdict = VerdictReject
return ScoreOutput{Category: CategoryRejected,
Reasons: []string{"contract analysis: observer rejected"}, SubScores: subs}
}
switch r.ObserverVerdict {
case VerdictAccept:
subs.ObserverVerdict = VerdictAccept
return ScoreOutput{Category: CategoryAccepted,
Reasons: []string{"contract analysis: observer accepted"}, SubScores: subs}
case VerdictCycle:
subs.ObserverVerdict = VerdictCycle
return ScoreOutput{Category: CategoryPartiallyAccepted,
Reasons: []string{"contract analysis: observer cycled (partial)"}, SubScores: subs}
}
return ScoreOutput{Category: CategoryNeedsHumanReview,
Reasons: []string{"contract analysis: no observer verdict signal"}, SubScores: subs}
}
// ─── Class B: telemetry-rich ─────────────────────────────────────
func scoreAutoApply(r EvidenceRecord) ScoreOutput {
subs := &SubScores{}
if contains(r.SuccessMarkers, "committed") {
t := true
subs.CargoGreen = &t
return ScoreOutput{Category: CategoryAccepted,
Reasons: []string{"auto_apply: patch committed (cargo green + warning baseline + rationale alignment passed)"},
SubScores: subs}
}
reverted := findContaining(r.FailureMarkers, "reverted")
if reverted != "" {
if strings.Contains(reverted, "build_red") {
f := false
subs.CargoGreen = &f
}
return ScoreOutput{Category: CategoryRejected,
Reasons: []string{"auto_apply: " + reverted}, SubScores: subs}
}
return ScoreOutput{Category: CategoryNeedsHumanReview,
Reasons: []string{"auto_apply: no commit + no revert (likely no_patches or dry_run)"},
SubScores: subs}
}
func scoreOutcomes(r EvidenceRecord) ScoreOutput {
subs := &SubScores{}
if contains(r.SuccessMarkers, "all_events_ok") {
return ScoreOutput{Category: CategoryAccepted,
Reasons: []string{"outcomes: all events ok"}, SubScores: subs}
}
if gap := numericFromMap(r.ValidationResults, "gap_signals"); gap > 0 {
return ScoreOutput{Category: CategoryPartiallyAccepted,
Reasons: []string{fmt.Sprintf("outcomes: %d gap signal(s) detected", int(gap))},
SubScores: subs}
}
return ScoreOutput{Category: CategoryNeedsHumanReview,
Reasons: []string{"outcomes: no decisive marker — defer to human"},
SubScores: subs}
}
func scoreModeExperiment(r EvidenceRecord) ScoreOutput {
subs := &SubScores{}
if strings.TrimSpace(r.Text) == "" {
return ScoreOutput{Category: CategoryRejected,
Reasons: []string{"mode_experiment: empty response text"}, SubScores: subs}
}
if r.LatencyMs > 120_000 {
return ScoreOutput{Category: CategoryPartiallyAccepted,
Reasons: []string{fmt.Sprintf("mode_experiment: latency %dms exceeds 2-minute soft cap", r.LatencyMs)},
SubScores: subs}
}
return ScoreOutput{Category: CategoryNeedsHumanReview,
Reasons: []string{"mode_experiment: response present, latency within bounds; verdict not yet wired"},
SubScores: subs}
}
// ─── Class C: pure-extraction ────────────────────────────────────
func scoreExtraction() ScoreOutput {
return ScoreOutput{
Category: CategoryNeedsHumanReview,
Reasons: []string{"extraction-class source has no native scoring signal — JOIN to parent verdict pending Phase 3 v2"},
SubScores: &SubScores{},
}
}
// ─── Internal helpers ────────────────────────────────────────────
func contains(slice []string, want string) bool {
for _, s := range slice {
if s == want {
return true
}
}
return false
}
func findPrefix(slice []string, prefix string) string {
for _, s := range slice {
if strings.HasPrefix(s, prefix) {
return s
}
}
return ""
}
func findContaining(slice []string, sub string) string {
for _, s := range slice {
if strings.Contains(s, sub) {
return s
}
}
return ""
}
func numericFromMap(m map[string]any, key string) float64 {
if m == nil {
return 0
}
v, ok := m[key]
if !ok {
return 0
}
switch n := v.(type) {
case int:
return float64(n)
case int64:
return float64(n)
case float32:
return float64(n)
case float64:
return n
case json.Number:
f, _ := n.Float64()
return f
}
return 0
}