golangLAKEHOUSE/internal/distillation/audit_baseline.go

package distillation

// Audit-baseline lineage — the longitudinal signal that distinguishes
// "metrics shifted because the world changed" from "metrics shifted
// because we broke something." Mirrors the Rust audit_full.ts
// LoadBaseline/AppendBaseline/buildDriftTable shape so a Go-side
// audit run can be compared against Rust-side baselines and
// vice-versa during the migration.
//
// Storage: data/_kb/audit_baselines.jsonl, one AuditBaseline per
// line, append-only. The LAST line is the most recent. New runs
// read the prior baseline, compute drift vs current metrics, then
// append a fresh entry.
//
// Why generic on metric name (vs Rust's pinned p2_evidence_rows
// etc.): the Rust phase numbering (p0..p7) doesn't translate to Go
// directly. Operators with mixed Rust+Go pipelines should use the
// SAME metric names on both sides so the drift table compares
// like-for-like. Helper constants below pin the Rust-compat names
// for callers running both runtimes.

import (
	"bufio"
	"bytes"
	"encoding/json"
	"errors"
	"fmt"
	"math"
	"os"
	"path/filepath"
	"strings"
)

// AuditBaseline is one entry in the audit_baselines.jsonl
// longitudinal log. Schema-stable; new metrics land as new keys
// in the Metrics map (additive — readers tolerate unknown keys).
type AuditBaseline struct {
	RecordedAt string           `json:"recorded_at"`         // ISO 8601 UTC
	GitCommit  string           `json:"git_commit,omitempty"` // sha of the run's HEAD
	Metrics    map[string]int64 `json:"metrics"`
}

// AuditBaselineRustCompat lists the metric names the Rust pipeline
// emits at audit_full.ts. Go-side callers running an equivalent
// audit should use these names so drift compares across runtimes.
// Adding new names here requires the Rust side to mint them too.
var AuditBaselineRustCompat = []string{
	"p2_evidence_rows",
	"p2_evidence_skips",
	"p3_accepted",
	"p3_partial",
	"p3_rejected",
	"p3_human",
	"p4_rag_rows",
	"p4_sft_rows",
	"p4_pref_pairs",
	"p4_total_quarantined",
}

// DefaultBaselinePath returns the canonical audit baselines path
// rooted at the lakehouse data dir. Match Rust's BASELINE_PATH_FOR.
func DefaultBaselinePath(root string) string {
	return filepath.Join(root, "data", "_kb", "audit_baselines.jsonl")
}

// LoadLastBaseline reads audit_baselines.jsonl and returns the
// most recent entry — i.e. the LAST non-empty JSON line. Missing
// file or empty file returns (nil, nil), not an error: a fresh
// pipeline has no baseline yet, and the caller should treat that
// as "first run" via BuildAuditDriftTable's nil-prior handling.
//
// Malformed last line returns an error (rather than silently
// skipping to the previous line) so operators don't lose drift
// signal under partial-write corruption.
func LoadLastBaseline(path string) (*AuditBaseline, error) {
	data, err := os.ReadFile(path)
	if os.IsNotExist(err) {
		return nil, nil
	}
	if err != nil {
		return nil, fmt.Errorf("read baselines: %w", err)
	}
	lines := strings.Split(string(data), "\n")
	// Walk back to the last non-empty line.
	for i := len(lines) - 1; i >= 0; i-- {
		s := strings.TrimSpace(lines[i])
		if s == "" {
			continue
		}
		var b AuditBaseline
		if err := json.Unmarshal([]byte(s), &b); err != nil {
			return nil, fmt.Errorf("decode last baseline (line %d): %w", i+1, err)
		}
		return &b, nil
	}
	return nil, nil
}

// AppendBaseline appends one AuditBaseline as a JSON line to
// audit_baselines.jsonl. Creates the parent directory if missing.
// Atomic write at the line level: a partial write on disk-full or
// crash leaves the file with at most one truncated trailing line,
// which LoadLastBaseline will surface as a decode error.
func AppendBaseline(path string, b AuditBaseline) error {
	if b.RecordedAt == "" {
		return errors.New("audit_baseline: RecordedAt required")
	}
	if b.Metrics == nil {
		return errors.New("audit_baseline: Metrics required (use empty map for zero-metric run)")
	}
	if err := os.MkdirAll(filepath.Dir(path), 0o755); err != nil {
		return fmt.Errorf("mkdir baseline dir: %w", err)
	}
	line, err := json.Marshal(b)
	if err != nil {
		return fmt.Errorf("encode baseline: %w", err)
	}
	f, err := os.OpenFile(path, os.O_APPEND|os.O_CREATE|os.O_WRONLY, 0o644)
	if err != nil {
		return fmt.Errorf("open baselines: %w", err)
	}
	defer f.Close()
	w := bufio.NewWriter(f)
	if _, err := w.Write(line); err != nil {
		return fmt.Errorf("write baseline: %w", err)
	}
	if err := w.WriteByte('\n'); err != nil {
		return fmt.Errorf("write newline: %w", err)
	}
	return w.Flush()
}

// AuditDriftFlag categorizes a single metric's drift verdict.
// Mirrors the Rust DriftRow.flag values exactly.
type AuditDriftFlag string

const (
	AuditDriftFlagFirstRun AuditDriftFlag = "first_run" // no prior baseline → can't compute change
	AuditDriftFlagOK       AuditDriftFlag = "ok"        // |Δ%| ≤ threshold
	AuditDriftFlagWarn     AuditDriftFlag = "warn"      // |Δ%| > threshold
)

// DefaultDriftWarnThreshold is 20% — matches Rust's hard-coded
// `Math.abs(pct) > 0.20`. Operators tuning sensitivity per metric
// can pass a different value to BuildAuditDriftTable.
const DefaultDriftWarnThreshold = 0.20

// AuditDriftRow is one metric's drift verdict. PctChange is nil
// when prior baseline was zero (division-by-zero) OR when this is
// the first run. Encoded as *float64 so JSON omits the field
// rather than emitting 0.0 for "unknowable" cases.
type AuditDriftRow struct {
	Metric    string         `json:"metric"`
	Baseline  *int64         `json:"baseline"`
	Current   int64          `json:"current"`
	PctChange *float64       `json:"pct_change"`
	Flag      AuditDriftFlag `json:"flag"`
}

// BuildAuditDriftTable computes per-metric drift between a prior
// baseline (nil = first run) and the current metric snapshot. The
// result is sorted by metric name for stable display.
//
// Threshold is the absolute percent-change above which a metric is
// flagged "warn". Pass DefaultDriftWarnThreshold (0.20 = 20%) to
// match Rust audit_full.ts. Use a per-metric threshold map by
// calling BuildAuditDriftTable once per metric subset.
func BuildAuditDriftTable(prior *AuditBaseline, current map[string]int64, threshold float64) []AuditDriftRow {
	if threshold <= 0 {
		threshold = DefaultDriftWarnThreshold
	}
	// Union of metric names so a metric that disappeared from
	// current still surfaces as "current=0, drifted -100%".
	names := make(map[string]struct{}, len(current))
	for k := range current {
		names[k] = struct{}{}
	}
	if prior != nil {
		for k := range prior.Metrics {
			names[k] = struct{}{}
		}
	}
	rows := make([]AuditDriftRow, 0, len(names))
	for name := range names {
		row := AuditDriftRow{Metric: name, Current: current[name]}
		if prior == nil {
			row.Flag = AuditDriftFlagFirstRun
			rows = append(rows, row)
			continue
		}
		priorVal, hadPrior := prior.Metrics[name]
		if !hadPrior {
			// New metric in current — treat as first-run for THIS metric.
			row.Flag = AuditDriftFlagFirstRun
			rows = append(rows, row)
			continue
		}
		row.Baseline = &priorVal
		if priorVal == 0 {
			// Division-by-zero: leave PctChange nil. If current is
			// also 0 → ok (no change). Otherwise → warn (the metric
			// went from zero to non-zero, which is always notable).
			if current[name] == 0 {
				row.Flag = AuditDriftFlagOK
			} else {
				row.Flag = AuditDriftFlagWarn
			}
			rows = append(rows, row)
			continue
		}
		pct := float64(current[name]-priorVal) / float64(priorVal)
		row.PctChange = &pct
		if math.Abs(pct) > threshold {
			row.Flag = AuditDriftFlagWarn
		} else {
			row.Flag = AuditDriftFlagOK
		}
		rows = append(rows, row)
	}
	// Sort for stable display + deterministic JSON output. Bubble-
	// sort by name; size is at most a few dozen metrics, so the
	// O(n²) cost is irrelevant.
	for i := 0; i < len(rows); i++ {
		for j := i + 1; j < len(rows); j++ {
			if rows[i].Metric > rows[j].Metric {
				rows[i], rows[j] = rows[j], rows[i]
			}
		}
	}
	return rows
}

// FormatAuditDriftTable renders a drift table as a fixed-width
// text grid — useful for stdout dumps in audit-full runs. Matches
// the Rust output shape so an operator can grep across runtimes
// without re-learning the layout.
func FormatAuditDriftTable(rows []AuditDriftRow) string {
	if len(rows) == 0 {
		return "(no metrics)\n"
	}
	var buf bytes.Buffer
	fmt.Fprintf(&buf, "%-26s %12s %12s %10s %s\n", "metric", "baseline", "current", "Δ%", "flag")
	for _, r := range rows {
		baseline := "-"
		if r.Baseline != nil {
			baseline = fmt.Sprintf("%d", *r.Baseline)
		}
		pct := "-"
		if r.PctChange != nil {
			pct = fmt.Sprintf("%+.1f%%", *r.PctChange*100)
		}
		fmt.Fprintf(&buf, "%-26s %12s %12d %10s %s\n",
			r.Metric, baseline, r.Current, pct, r.Flag)
	}
	return buf.String()
}