package distillation // Audit-baseline lineage — the longitudinal signal that distinguishes // "metrics shifted because the world changed" from "metrics shifted // because we broke something." Mirrors the Rust audit_full.ts // LoadBaseline/AppendBaseline/buildDriftTable shape so a Go-side // audit run can be compared against Rust-side baselines and // vice-versa during the migration. // // Storage: data/_kb/audit_baselines.jsonl, one AuditBaseline per // line, append-only. The LAST line is the most recent. New runs // read the prior baseline, compute drift vs current metrics, then // append a fresh entry. // // Why generic on metric name (vs Rust's pinned p2_evidence_rows // etc.): the Rust phase numbering (p0..p7) doesn't translate to Go // directly. Operators with mixed Rust+Go pipelines should use the // SAME metric names on both sides so the drift table compares // like-for-like. Helper constants below pin the Rust-compat names // for callers running both runtimes. import ( "bufio" "bytes" "encoding/json" "errors" "fmt" "math" "os" "path/filepath" "strings" ) // AuditBaseline is one entry in the audit_baselines.jsonl // longitudinal log. Schema-stable; new metrics land as new keys // in the Metrics map (additive — readers tolerate unknown keys). type AuditBaseline struct { RecordedAt string `json:"recorded_at"` // ISO 8601 UTC GitCommit string `json:"git_commit,omitempty"` // sha of the run's HEAD Metrics map[string]int64 `json:"metrics"` } // AuditBaselineRustCompat lists the metric names the Rust pipeline // emits at audit_full.ts. Go-side callers running an equivalent // audit should use these names so drift compares across runtimes. // Adding new names here requires the Rust side to mint them too. var AuditBaselineRustCompat = []string{ "p2_evidence_rows", "p2_evidence_skips", "p3_accepted", "p3_partial", "p3_rejected", "p3_human", "p4_rag_rows", "p4_sft_rows", "p4_pref_pairs", "p4_total_quarantined", } // DefaultBaselinePath returns the canonical audit baselines path // rooted at the lakehouse data dir. Match Rust's BASELINE_PATH_FOR. func DefaultBaselinePath(root string) string { return filepath.Join(root, "data", "_kb", "audit_baselines.jsonl") } // LoadLastBaseline reads audit_baselines.jsonl and returns the // most recent entry — i.e. the LAST non-empty JSON line. Missing // file or empty file returns (nil, nil), not an error: a fresh // pipeline has no baseline yet, and the caller should treat that // as "first run" via BuildAuditDriftTable's nil-prior handling. // // Malformed last line returns an error (rather than silently // skipping to the previous line) so operators don't lose drift // signal under partial-write corruption. func LoadLastBaseline(path string) (*AuditBaseline, error) { data, err := os.ReadFile(path) if os.IsNotExist(err) { return nil, nil } if err != nil { return nil, fmt.Errorf("read baselines: %w", err) } lines := strings.Split(string(data), "\n") // Walk back to the last non-empty line. for i := len(lines) - 1; i >= 0; i-- { s := strings.TrimSpace(lines[i]) if s == "" { continue } var b AuditBaseline if err := json.Unmarshal([]byte(s), &b); err != nil { return nil, fmt.Errorf("decode last baseline (line %d): %w", i+1, err) } return &b, nil } return nil, nil } // AppendBaseline appends one AuditBaseline as a JSON line to // audit_baselines.jsonl. Creates the parent directory if missing. // Atomic write at the line level: a partial write on disk-full or // crash leaves the file with at most one truncated trailing line, // which LoadLastBaseline will surface as a decode error. func AppendBaseline(path string, b AuditBaseline) error { if b.RecordedAt == "" { return errors.New("audit_baseline: RecordedAt required") } if b.Metrics == nil { return errors.New("audit_baseline: Metrics required (use empty map for zero-metric run)") } if err := os.MkdirAll(filepath.Dir(path), 0o755); err != nil { return fmt.Errorf("mkdir baseline dir: %w", err) } line, err := json.Marshal(b) if err != nil { return fmt.Errorf("encode baseline: %w", err) } f, err := os.OpenFile(path, os.O_APPEND|os.O_CREATE|os.O_WRONLY, 0o644) if err != nil { return fmt.Errorf("open baselines: %w", err) } defer f.Close() w := bufio.NewWriter(f) if _, err := w.Write(line); err != nil { return fmt.Errorf("write baseline: %w", err) } if err := w.WriteByte('\n'); err != nil { return fmt.Errorf("write newline: %w", err) } return w.Flush() } // AuditDriftFlag categorizes a single metric's drift verdict. // Mirrors the Rust DriftRow.flag values exactly. type AuditDriftFlag string const ( AuditDriftFlagFirstRun AuditDriftFlag = "first_run" // no prior baseline → can't compute change AuditDriftFlagOK AuditDriftFlag = "ok" // |Δ%| ≤ threshold AuditDriftFlagWarn AuditDriftFlag = "warn" // |Δ%| > threshold ) // DefaultDriftWarnThreshold is 20% — matches Rust's hard-coded // `Math.abs(pct) > 0.20`. Operators tuning sensitivity per metric // can pass a different value to BuildAuditDriftTable. const DefaultDriftWarnThreshold = 0.20 // AuditDriftRow is one metric's drift verdict. PctChange is nil // when prior baseline was zero (division-by-zero) OR when this is // the first run. Encoded as *float64 so JSON omits the field // rather than emitting 0.0 for "unknowable" cases. type AuditDriftRow struct { Metric string `json:"metric"` Baseline *int64 `json:"baseline"` Current int64 `json:"current"` PctChange *float64 `json:"pct_change"` Flag AuditDriftFlag `json:"flag"` } // BuildAuditDriftTable computes per-metric drift between a prior // baseline (nil = first run) and the current metric snapshot. The // result is sorted by metric name for stable display. // // Threshold is the absolute percent-change above which a metric is // flagged "warn". Pass DefaultDriftWarnThreshold (0.20 = 20%) to // match Rust audit_full.ts. Use a per-metric threshold map by // calling BuildAuditDriftTable once per metric subset. func BuildAuditDriftTable(prior *AuditBaseline, current map[string]int64, threshold float64) []AuditDriftRow { if threshold <= 0 { threshold = DefaultDriftWarnThreshold } // Union of metric names so a metric that disappeared from // current still surfaces as "current=0, drifted -100%". names := make(map[string]struct{}, len(current)) for k := range current { names[k] = struct{}{} } if prior != nil { for k := range prior.Metrics { names[k] = struct{}{} } } rows := make([]AuditDriftRow, 0, len(names)) for name := range names { row := AuditDriftRow{Metric: name, Current: current[name]} if prior == nil { row.Flag = AuditDriftFlagFirstRun rows = append(rows, row) continue } priorVal, hadPrior := prior.Metrics[name] if !hadPrior { // New metric in current — treat as first-run for THIS metric. row.Flag = AuditDriftFlagFirstRun rows = append(rows, row) continue } row.Baseline = &priorVal if priorVal == 0 { // Division-by-zero: leave PctChange nil. If current is // also 0 → ok (no change). Otherwise → warn (the metric // went from zero to non-zero, which is always notable). if current[name] == 0 { row.Flag = AuditDriftFlagOK } else { row.Flag = AuditDriftFlagWarn } rows = append(rows, row) continue } pct := float64(current[name]-priorVal) / float64(priorVal) row.PctChange = &pct if math.Abs(pct) > threshold { row.Flag = AuditDriftFlagWarn } else { row.Flag = AuditDriftFlagOK } rows = append(rows, row) } // Sort for stable display + deterministic JSON output. Bubble- // sort by name; size is at most a few dozen metrics, so the // O(n²) cost is irrelevant. for i := 0; i < len(rows); i++ { for j := i + 1; j < len(rows); j++ { if rows[i].Metric > rows[j].Metric { rows[i], rows[j] = rows[j], rows[i] } } } return rows } // FormatAuditDriftTable renders a drift table as a fixed-width // text grid — useful for stdout dumps in audit-full runs. Matches // the Rust output shape so an operator can grep across runtimes // without re-learning the layout. func FormatAuditDriftTable(rows []AuditDriftRow) string { if len(rows) == 0 { return "(no metrics)\n" } var buf bytes.Buffer fmt.Fprintf(&buf, "%-26s %12s %12s %10s %s\n", "metric", "baseline", "current", "Δ%", "flag") for _, r := range rows { baseline := "-" if r.Baseline != nil { baseline = fmt.Sprintf("%d", *r.Baseline) } pct := "-" if r.PctChange != nil { pct = fmt.Sprintf("%+.1f%%", *r.PctChange*100) } fmt.Fprintf(&buf, "%-26s %12s %12d %10s %s\n", r.Metric, baseline, r.Current, pct, r.Flag) } return buf.String() }