// Package drift quantifies when historical decisions stop matching // current reality. Per the PRD's 5-loop substrate, this is loop 5 // (drift) — distinct from the rating+distillation loop because // drift is about MEASUREMENT, not learning. The learning loop says // "this match worked, remember it"; the drift loop says "the // playbook entry from 4 months ago — does it still match what the // substrate would surface today?" // // First-shipped drift shape: SCORER drift. When the deterministic // scorer's logic changes (ScorerVersion bumped), historical // ScoredRuns may no longer match what the current scorer would // produce on the same EvidenceRecord. ComputeScorerDrift re-runs // the current scorer over a slice of (EvidenceRecord, persisted // category) pairs and reports mismatches. // // Why this matters: the rating+distillation loop only learns // forward. Without a drift quantifier, a scorer-rule change // silently invalidates the historical training data feeding the // loop. With drift quantification, a rule change surfaces a // concrete number ("847 of 4701 historical scoredruns now // disagree") that triggers a re-score-and-retrain cycle rather // than letting the substrate quietly rot. // // Future drift shapes (not in this commit): // - PLAYBOOK drift: for each playbook entry, re-run its query // through current matrix-search; if the recorded answer is no // longer in top-K, the world has moved. // - EMBEDDING drift: KS-test on the distribution of embedding // vectors at T1 vs T2; large shifts = the corpus has changed // materially. // - AUDIT BASELINE drift: track how PR audit verdicts shift over // scorer/auditor versions; matches the Rust audit_baselines.jsonl // longitudinal signal. package drift import ( "sort" "git.agentview.dev/profit/golangLAKEHOUSE/internal/distillation" ) // ScorerDriftEntry is one mismatch — a historical (record, category) // pair where the current scorer disagrees with the persisted // verdict. Reasons captures the current scorer's explanation so // operators can see WHY the verdict changed. type ScorerDriftEntry struct { EvidenceRunID string `json:"evidence_run_id"` EvidenceTaskID string `json:"evidence_task_id"` PersistedCategory distillation.ScoreCategory `json:"persisted_category"` CurrentCategory distillation.ScoreCategory `json:"current_category"` CurrentReasons []string `json:"current_reasons"` SourceFile string `json:"source_file"` } // CategoryShift is one cell in the drift matrix — "X persisted // records that NOW classify as Y." e.g. "12 records that were // 'rejected' yesterday are 'partially_accepted' today." type CategoryShift struct { From distillation.ScoreCategory `json:"from"` To distillation.ScoreCategory `json:"to"` Count int `json:"count"` } // ScorerDriftReport is the summary returned by ComputeScorerDrift. // The shape is intentionally machine-readable so a downstream // dashboard / alerting layer can threshold on Drifted / TotalChecked // without parsing the entries list. type ScorerDriftReport struct { ScorerVersion string `json:"scorer_version"` // current scorer's version TotalChecked int `json:"total_checked"` Matched int `json:"matched"` // current == persisted Drifted int `json:"drifted"` // current != persisted DriftRate float64 `json:"drift_rate"` // Drifted / TotalChecked ShiftMatrix []CategoryShift `json:"shift_matrix,omitempty"` Entries []ScorerDriftEntry `json:"entries,omitempty"` // mismatches only } // ScorerDriftInput is one (record, persisted_category) pair to check. // Caller is responsible for materializing these from disk; this // package is pure compute. type ScorerDriftInput struct { Record distillation.EvidenceRecord PersistedCategory distillation.ScoreCategory } // ComputeScorerDrift re-runs distillation.ScoreRecord over each // input and reports mismatches. Pure function — no I/O. The caller // supplies the inputs (typically by reading a directory of // scored-runs JSONL alongside the corresponding evidence JSONL). // // IncludeEntries controls whether the per-mismatch detail list is // populated. For large corpora (e.g. 4,701 fill events) the // summary numbers may be all the caller needs; setting this to // false avoids allocating the entries slice. func ComputeScorerDrift(inputs []ScorerDriftInput, includeEntries bool) ScorerDriftReport { report := ScorerDriftReport{ ScorerVersion: distillation.ScorerVersion, TotalChecked: len(inputs), } shiftCounts := make(map[[2]distillation.ScoreCategory]int) for _, in := range inputs { out := distillation.ScoreRecord(in.Record) if out.Category == in.PersistedCategory { report.Matched++ continue } report.Drifted++ shiftCounts[[2]distillation.ScoreCategory{in.PersistedCategory, out.Category}]++ if includeEntries { report.Entries = append(report.Entries, ScorerDriftEntry{ EvidenceRunID: in.Record.RunID, EvidenceTaskID: in.Record.TaskID, PersistedCategory: in.PersistedCategory, CurrentCategory: out.Category, CurrentReasons: out.Reasons, SourceFile: in.Record.Provenance.SourceFile, }) } } if report.TotalChecked > 0 { report.DriftRate = float64(report.Drifted) / float64(report.TotalChecked) } if len(shiftCounts) > 0 { report.ShiftMatrix = make([]CategoryShift, 0, len(shiftCounts)) for k, v := range shiftCounts { report.ShiftMatrix = append(report.ShiftMatrix, CategoryShift{ From: k[0], To: k[1], Count: v, }) } // Sort: largest shifts first, then alphabetical-ish for ties. // Stable ordering matters for downstream display and JSON // determinism in tests. sort.Slice(report.ShiftMatrix, func(i, j int) bool { a, b := report.ShiftMatrix[i], report.ShiftMatrix[j] if a.Count != b.Count { return a.Count > b.Count } if a.From != b.From { return string(a.From) < string(b.From) } return string(a.To) < string(b.To) }) } return report }