root ee2a40c505 audit-FULL: port phases 1/2/5/7 — only acceptance.ts (TS-only) remains skipped
Closes 4 of the 5 phases the initial audit-FULL port left as
deferred. The pattern: most "deferred" phases didn't actually need
the un-ported Rust pieces — they were observer-mode by design and
just needed to read existing on-disk artifacts.

Phase 1 (schema validators) → ported via exec.Command:
  Invokes `go test ./internal/distillation/...` — the Go equivalent
  of Rust's `bun test auditor/schemas/distillation/`. New
  GoTestModule field on AuditFullOptions controls the package
  pattern; empty disables the invocation (test mode, prevents
  recursion when audit-full is invoked from inside `go test`).

Phase 2 (evidence materialization) → ported as observer:
  Reads data/evidence/ directly and tallies rows + tier-1 source
  hits. Doesn't re-run the materializer (which is Rust-side TS).
  Emits p2_evidence_rows + p2_evidence_skips metrics matching
  Rust shape — drop-in audit_baselines.jsonl entries possible.

Phase 5 (run summary) → ported as observer:
  Reads reports/distillation/{run_id}/summary.json + 5 stage
  receipts. Validates schema_version=1, run_hash sha256, git_commit
  40-char hex, all stage receipts decode as JSON. Full schema
  validation (StageReceipt schema) is intentionally NOT ported —
  it would require porting the TS schemas/distillation/ validators
  in full; basic shape checks catch the load-bearing invariants.

Phase 7 (replay log) → ported as observer:
  Reads data/_kb/replay_runs.jsonl, validates last 50 rows parse
  as JSON. Skips the live-replay invocation that Rust's phase 7
  also does — porting Rust replay.ts is substantial and not in
  scope. The "log shape sanity" check is what audit-full actually
  needs; the live invocation is a separate concern.

Phase 6 (acceptance gate) — STILL SKIPPED:
  Rust acceptance.ts is a TS-only fixture harness with bun-specific
  deps. Porting the fixtures (tests/fixtures/distillation/acceptance/)
  + the 22-invariant runner to Go is an ADR-worth undertaking.
  Documented in the header comment.

Live-data probe (against /home/profit/lakehouse):
  Skips count: 4 → 1 (only phase 6).
  Required checks: 6/6 → 12/12 PASS.
  New metric: p2_evidence_rows=1055, BYTE-EQUAL to the Rust
  pipeline's collect.records_out from the latest summary.json.
  Cross-runtime parity now extends across phases 0/1/2/3/4/5/7.

6 new tests:
- TestPhase2_EvidenceTallyFromOnDisk: row + tier-1-hit tallying
- TestPhase5_FullSummaryFlow: complete run-summary fixture passes
- TestPhase5_ShortRunHashCaught: bad run_hash fails required check
- TestPhase7_ReplayLogReadsFromDisk: row-count reporting
- TestPhase7_MalformedTailRowsCaught: structural parse failure
- TestRunAuditFull_FullFixtureFlow updated to seed evidence/ +
  reports/distillation/ for the phases now wired.

Cleanup: removed local sortStrings helper (replaced with sort.Strings
now that `sort` is imported for phase 5's mtime-sort).

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-01 02:35:13 -05:00

395 lines
16 KiB
Go

package distillation
import (
"os"
"path/filepath"
"strings"
"testing"
)
// TestRunAuditFull_EmptyRoot: missing data directories yield
// failures on required checks but doesn't error out the run.
// Operator running on a fresh box sees the report with the
// expected "missing" actuals.
func TestRunAuditFull_EmptyRoot(t *testing.T) {
tmp := t.TempDir()
report := RunAuditFull(AuditFullOptions{Root: tmp})
if len(report.Checks) == 0 {
t.Fatalf("expected check rows even on empty root, got %d", len(report.Checks))
}
// Phase 3's "scored-runs on disk" must fail (required); the
// failure count rises by at least 1.
if report.Failed < 1 {
t.Errorf("expected ≥1 required failure on empty root, got %d", report.Failed)
}
}
// TestPhase2_EvidenceTallyFromOnDisk seeds data/evidence/ and
// asserts phase 2 reads + tallies the rows correctly. The
// observer-mode port (no live materializer invocation) means the
// check works against any-runtime-emitted evidence files.
func TestPhase2_EvidenceTallyFromOnDisk(t *testing.T) {
tmp := t.TempDir()
dir := filepath.Join(tmp, "data", "evidence", "2026", "05", "01")
if err := os.MkdirAll(dir, 0o755); err != nil {
t.Fatalf("mkdir: %v", err)
}
// 3 records: 2 from scrum_reviews (a tier-1 source), 1 from
// "other_source" (not in tier-1 list). Phase 2 should tally
// 3 rows total + flag 1/4 tier-1 sources hit.
jsonl := `{"run_id":"r1","provenance":{"source_file":"data/_kb/scrum_reviews.jsonl","sig_hash":"a","recorded_at":"2026-05-01T00:00:00Z"}}
{"run_id":"r2","provenance":{"source_file":"data/_kb/scrum_reviews.jsonl","sig_hash":"b","recorded_at":"2026-05-01T00:00:00Z"}}
{"run_id":"r3","provenance":{"source_file":"data/_kb/other_source.jsonl","sig_hash":"c","recorded_at":"2026-05-01T00:00:00Z"}}
`
if err := os.WriteFile(filepath.Join(dir, "evidence.jsonl"), []byte(jsonl), 0o644); err != nil {
t.Fatalf("write: %v", err)
}
report := RunAuditFull(AuditFullOptions{Root: tmp}) // GoTestModule empty disables phase 1
if report.Metrics["p2_evidence_rows"] != 3 {
t.Errorf("p2_evidence_rows: got %d, want 3", report.Metrics["p2_evidence_rows"])
}
if report.Metrics["p2_evidence_skips"] != 0 {
t.Errorf("p2_evidence_skips: got %d, want 0", report.Metrics["p2_evidence_skips"])
}
// Find the tier-1 hit count check.
for _, c := range report.Checks {
if c.Phase == 2 && c.Name == "tier-1 sources each materialize ≥1 row" {
if !c.Passed {
t.Errorf("expected tier-1 check to pass with 1/4 sources hit (≥1 = ok), got %+v", c)
}
if !strings.Contains(c.Actual, "1/4") || !strings.Contains(c.Actual, "scrum_reviews") {
t.Errorf("tier-1 actual missing expected counts: %s", c.Actual)
}
}
}
}
// TestPhase5_FullSummaryFlow seeds reports/distillation/{run_id}/
// with summary.json + 5 stage receipts and asserts phase 5 passes
// all required checks.
func TestPhase5_FullSummaryFlow(t *testing.T) {
tmp := t.TempDir()
runID := "test-run-id"
runDir := filepath.Join(tmp, "reports", "distillation", runID)
if err := os.MkdirAll(runDir, 0o755); err != nil {
t.Fatalf("mkdir: %v", err)
}
// 5 stage receipts (parse-as-JSON only — full schema validation
// is Rust-side).
for _, s := range []string{"collect", "score", "export-rag", "export-sft", "export-preference"} {
if err := os.WriteFile(filepath.Join(runDir, s+".json"), []byte(`{}`), 0o644); err != nil {
t.Fatalf("write %s: %v", s, err)
}
}
// summary.json with valid schema_version, 40-char git_commit, 64-char run_hash.
summary := `{
"schema_version": 1,
"run_id": "test-run-id",
"git_commit": "0123456789abcdef0123456789abcdef01234567",
"run_hash": "a1b2c3d4e5f60718293a4b5c6d7e8f900112233445566778899aabbccddeeff0",
"stages": [{"stage":"collect"},{"stage":"score"},{"stage":"export-rag"},{"stage":"export-sft"},{"stage":"export-preference"}]
}`
if err := os.WriteFile(filepath.Join(runDir, "summary.json"), []byte(summary), 0o644); err != nil {
t.Fatalf("write summary: %v", err)
}
report := RunAuditFull(AuditFullOptions{Root: tmp})
for _, c := range report.Checks {
if c.Phase == 5 && c.Required && !c.Passed {
t.Errorf("phase 5 required check failed: %s — actual=%q", c.Name, c.Actual)
}
}
}
// TestPhase5_ShortRunHashCaught: a run_hash that isn't 64-char hex
// must fail the required check.
func TestPhase5_ShortRunHashCaught(t *testing.T) {
tmp := t.TempDir()
runDir := filepath.Join(tmp, "reports", "distillation", "id")
if err := os.MkdirAll(runDir, 0o755); err != nil {
t.Fatalf("mkdir: %v", err)
}
for _, s := range []string{"collect", "score", "export-rag", "export-sft", "export-preference"} {
_ = os.WriteFile(filepath.Join(runDir, s+".json"), []byte(`{}`), 0o644)
}
bad := `{"schema_version":1,"run_id":"id","git_commit":"0123456789abcdef0123456789abcdef01234567","run_hash":"too_short","stages":[]}`
_ = os.WriteFile(filepath.Join(runDir, "summary.json"), []byte(bad), 0o644)
report := RunAuditFull(AuditFullOptions{Root: tmp})
hashFailed := false
for _, c := range report.Checks {
if c.Phase == 5 && c.Name == "run_hash is sha256" && !c.Passed {
hashFailed = true
}
}
if !hashFailed {
t.Errorf("expected run_hash sha256 check to fail on too_short")
}
}
// TestPhase7_ReplayLogReadsFromDisk seeds a replay_runs.jsonl and
// asserts phase 7 reports the correct row count.
func TestPhase7_ReplayLogReadsFromDisk(t *testing.T) {
tmp := t.TempDir()
dir := filepath.Join(tmp, "data", "_kb")
if err := os.MkdirAll(dir, 0o755); err != nil {
t.Fatalf("mkdir: %v", err)
}
jsonl := `{"task":"a","passed":true}
{"task":"b","passed":true}
{"task":"c","passed":false}
`
if err := os.WriteFile(filepath.Join(dir, "replay_runs.jsonl"), []byte(jsonl), 0o644); err != nil {
t.Fatalf("write: %v", err)
}
report := RunAuditFull(AuditFullOptions{Root: tmp})
for _, c := range report.Checks {
if c.Phase == 7 && c.Name == "replay_runs.jsonl exists" {
if !c.Passed {
t.Errorf("expected pass, got %+v", c)
}
if !strings.Contains(c.Actual, "3 rows") {
t.Errorf("expected '3 rows' in actual, got %s", c.Actual)
}
}
}
}
// TestPhase7_MalformedTailRowsCaught seeds a replay log with a
// trailing malformed row and asserts the structural check fires.
func TestPhase7_MalformedTailRowsCaught(t *testing.T) {
tmp := t.TempDir()
dir := filepath.Join(tmp, "data", "_kb")
if err := os.MkdirAll(dir, 0o755); err != nil {
t.Fatalf("mkdir: %v", err)
}
jsonl := `{"task":"a"}
{"task":"b"}
not valid json garbage
`
_ = os.WriteFile(filepath.Join(dir, "replay_runs.jsonl"), []byte(jsonl), 0o644)
report := RunAuditFull(AuditFullOptions{Root: tmp})
parseFailed := false
for _, c := range report.Checks {
if c.Phase == 7 && c.Name == "replay_runs.jsonl tail rows parse as JSON" && !c.Passed {
parseFailed = true
}
}
if !parseFailed {
t.Errorf("expected tail-row parse check to fail on malformed line")
}
}
// TestRunAuditFull_FullFixtureFlow seeds a complete data layout
// and verifies all phases produce the expected metrics + a clean
// PASS verdict. Locks the end-to-end orchestration.
func TestRunAuditFull_FullFixtureFlow(t *testing.T) {
tmp := t.TempDir()
// scored-runs: one accepted record (passes phase 3 required check)
scoredDir := filepath.Join(tmp, "data", "scored-runs", "2026", "05", "01")
if err := os.MkdirAll(scoredDir, 0o755); err != nil {
t.Fatalf("mkdir scored: %v", err)
}
scoredJSONL := `{"category":"accepted","evidence_run_id":"r1","provenance":{"source_file":"data/_kb/scrum_reviews.jsonl","sig_hash":"a1b2c3d4e5f60718293a4b5c6d7e8f900112233445566778899aabbccddeeff0","recorded_at":"2026-05-01T00:00:00Z"}}
{"category":"partially_accepted","evidence_run_id":"r2","provenance":{"source_file":"data/_kb/scrum_reviews.jsonl","sig_hash":"a1b2c3d4e5f60718293a4b5c6d7e8f900112233445566778899aabbccddeeff1","recorded_at":"2026-05-01T00:00:00Z"}}
{"category":"rejected","evidence_run_id":"r3","provenance":{"source_file":"data/_kb/scrum_reviews.jsonl","sig_hash":"a1b2c3d4e5f60718293a4b5c6d7e8f900112233445566778899aabbccddeeff2","recorded_at":"2026-05-01T00:00:00Z"}}
`
if err := os.WriteFile(filepath.Join(scoredDir, "run.jsonl"), []byte(scoredJSONL), 0o644); err != nil {
t.Fatalf("write scored: %v", err)
}
// SFT export: only legal quality scores, valid sig_hash on every row.
sftDir := filepath.Join(tmp, "exports", "sft")
if err := os.MkdirAll(sftDir, 0o755); err != nil {
t.Fatalf("mkdir sft: %v", err)
}
sftJSONL := `{"quality_score":"accepted","provenance":{"sig_hash":"a1b2c3d4e5f60718293a4b5c6d7e8f900112233445566778899aabbccddeeff0"}}
{"quality_score":"partially_accepted","provenance":{"sig_hash":"a1b2c3d4e5f60718293a4b5c6d7e8f900112233445566778899aabbccddeeff1"}}
`
if err := os.WriteFile(filepath.Join(sftDir, "instruction_response.jsonl"), []byte(sftJSONL), 0o644); err != nil {
t.Fatalf("write sft: %v", err)
}
// RAG: no rejected leaks
ragDir := filepath.Join(tmp, "exports", "rag")
if err := os.MkdirAll(ragDir, 0o755); err != nil {
t.Fatalf("mkdir rag: %v", err)
}
ragJSONL := `{"success_score":"accepted","provenance":{"sig_hash":"a1b2c3d4e5f60718293a4b5c6d7e8f900112233445566778899aabbccddeeff0"}}
`
if err := os.WriteFile(filepath.Join(ragDir, "playbooks.jsonl"), []byte(ragJSONL), 0o644); err != nil {
t.Fatalf("write rag: %v", err)
}
// Preference: distinct chosen vs rejected, no self-pairs
prefDir := filepath.Join(tmp, "exports", "preference")
if err := os.MkdirAll(prefDir, 0o755); err != nil {
t.Fatalf("mkdir pref: %v", err)
}
prefJSONL := `{"chosen_run_id":"a","rejected_run_id":"b","chosen":"good","rejected":"bad","provenance":{"sig_hash":"a1b2c3d4e5f60718293a4b5c6d7e8f900112233445566778899aabbccddeeff0"}}
`
if err := os.WriteFile(filepath.Join(prefDir, "chosen_rejected.jsonl"), []byte(prefJSONL), 0o644); err != nil {
t.Fatalf("write pref: %v", err)
}
// Phase 2: evidence directory with at least one row.
evidenceDir := filepath.Join(tmp, "data", "evidence", "2026", "05", "01")
if err := os.MkdirAll(evidenceDir, 0o755); err != nil {
t.Fatalf("mkdir evidence: %v", err)
}
evidenceJSONL := `{"run_id":"r1","provenance":{"source_file":"data/_kb/scrum_reviews.jsonl","sig_hash":"a","recorded_at":"2026-05-01T00:00:00Z"}}
`
if err := os.WriteFile(filepath.Join(evidenceDir, "evidence.jsonl"), []byte(evidenceJSONL), 0o644); err != nil {
t.Fatalf("write evidence: %v", err)
}
// Phase 5: reports/distillation/{run_id}/ with summary + 5 receipts.
runDir := filepath.Join(tmp, "reports", "distillation", "test-run")
if err := os.MkdirAll(runDir, 0o755); err != nil {
t.Fatalf("mkdir runDir: %v", err)
}
for _, s := range []string{"collect", "score", "export-rag", "export-sft", "export-preference"} {
_ = os.WriteFile(filepath.Join(runDir, s+".json"), []byte(`{}`), 0o644)
}
summaryJSON := `{"schema_version":1,"run_id":"test-run","git_commit":"0123456789abcdef0123456789abcdef01234567","run_hash":"a1b2c3d4e5f60718293a4b5c6d7e8f900112233445566778899aabbccddeeff0","stages":[]}`
_ = os.WriteFile(filepath.Join(runDir, "summary.json"), []byte(summaryJSON), 0o644)
report := RunAuditFull(AuditFullOptions{Root: tmp})
if report.Failed != 0 {
t.Errorf("clean fixture should have 0 required failures, got %d", report.Failed)
for _, c := range report.Checks {
if c.Required && !c.Passed {
t.Logf(" failed: phase=%d name=%q actual=%q", c.Phase, c.Name, c.Actual)
}
}
}
// Metrics populated correctly
if report.Metrics["p3_accepted"] != 1 {
t.Errorf("p3_accepted: got %d, want 1", report.Metrics["p3_accepted"])
}
if report.Metrics["p3_partial"] != 1 {
t.Errorf("p3_partial: got %d, want 1", report.Metrics["p3_partial"])
}
if report.Metrics["p3_rejected"] != 1 {
t.Errorf("p3_rejected: got %d, want 1", report.Metrics["p3_rejected"])
}
if report.Metrics["p4_sft_rows"] != 2 {
t.Errorf("p4_sft_rows: got %d, want 2", report.Metrics["p4_sft_rows"])
}
if report.Metrics["p4_rag_rows"] != 1 {
t.Errorf("p4_rag_rows: got %d, want 1", report.Metrics["p4_rag_rows"])
}
if report.Metrics["p4_pref_pairs"] != 1 {
t.Errorf("p4_pref_pairs: got %d, want 1", report.Metrics["p4_pref_pairs"])
}
}
// TestPhase4_SftFirewallCatchesRejected: contamination must never
// leak into SFT export. Test seeds a row with a forbidden
// quality_score and asserts the firewall flags it.
func TestPhase4_SftFirewallCatchesRejected(t *testing.T) {
tmp := t.TempDir()
sftDir := filepath.Join(tmp, "exports", "sft")
if err := os.MkdirAll(sftDir, 0o755); err != nil {
t.Fatalf("mkdir: %v", err)
}
bad := `{"quality_score":"rejected","provenance":{"sig_hash":"a1b2c3d4e5f60718293a4b5c6d7e8f900112233445566778899aabbccddeeff0"}}
`
if err := os.WriteFile(filepath.Join(sftDir, "instruction_response.jsonl"), []byte(bad), 0o644); err != nil {
t.Fatalf("write: %v", err)
}
report := RunAuditFull(AuditFullOptions{Root: tmp})
found := false
for _, c := range report.Checks {
if c.Phase == 4 && strings.Contains(c.Name, "SFT contamination firewall") {
if c.Passed {
t.Errorf("firewall should fail on rejected SFT row, but check passed")
}
if c.Actual != "1" {
t.Errorf("firewall actual: got %q, want '1'", c.Actual)
}
found = true
}
}
if !found {
t.Errorf("firewall check not present in report")
}
}
// TestPhase4_PreferenceSelfPairCaught: same chosen + rejected run_id
// is structural noise and must be flagged.
func TestPhase4_PreferenceSelfPairCaught(t *testing.T) {
tmp := t.TempDir()
prefDir := filepath.Join(tmp, "exports", "preference")
if err := os.MkdirAll(prefDir, 0o755); err != nil {
t.Fatalf("mkdir: %v", err)
}
bad := `{"chosen_run_id":"X","rejected_run_id":"X","chosen":"a","rejected":"b","provenance":{"sig_hash":"a1b2c3d4e5f60718293a4b5c6d7e8f900112233445566778899aabbccddeeff0"}}
`
if err := os.WriteFile(filepath.Join(prefDir, "chosen_rejected.jsonl"), []byte(bad), 0o644); err != nil {
t.Fatalf("write: %v", err)
}
report := RunAuditFull(AuditFullOptions{Root: tmp})
found := false
for _, c := range report.Checks {
if c.Phase == 4 && strings.Contains(c.Name, "self-pairs") {
if c.Passed {
t.Errorf("self-pair check should fail, but passed")
}
found = true
}
}
if !found {
t.Errorf("self-pair check not present in report")
}
}
// TestPhase4_ProvenanceRequiresValidSha256: bad sig_hash must be
// flagged. Locks the regex shape — only 64-char lowercase hex.
func TestPhase4_ProvenanceRequiresValidSha256(t *testing.T) {
tmp := t.TempDir()
sftDir := filepath.Join(tmp, "exports", "sft")
if err := os.MkdirAll(sftDir, 0o755); err != nil {
t.Fatalf("mkdir: %v", err)
}
// Three rows: one valid, one wrong-length, one wrong-charset (uppercase).
bad := `{"quality_score":"accepted","provenance":{"sig_hash":"a1b2c3d4e5f60718293a4b5c6d7e8f900112233445566778899aabbccddeeff0"}}
{"quality_score":"accepted","provenance":{"sig_hash":"too_short"}}
{"quality_score":"accepted","provenance":{"sig_hash":"A1B2C3D4E5F60718293A4B5C6D7E8F900112233445566778899AABBCCDDEEFF0"}}
`
if err := os.WriteFile(filepath.Join(sftDir, "instruction_response.jsonl"), []byte(bad), 0o644); err != nil {
t.Fatalf("write: %v", err)
}
report := RunAuditFull(AuditFullOptions{Root: tmp})
for _, c := range report.Checks {
if c.Phase == 4 && strings.Contains(c.Name, "sig_hash") {
if c.Actual != "2 missing" {
t.Errorf("provenance check: got actual=%q, want '2 missing'", c.Actual)
}
if c.Passed {
t.Errorf("provenance check should fail with 2 bad sig_hashes")
}
}
}
}
// TestFormatAuditFullReport_RendersCheckTable: smoke-test the
// Markdown formatter — operators should see the right verdict +
// per-phase rows.
func TestFormatAuditFullReport_RendersCheckTable(t *testing.T) {
report := PhaseCheckReport{
GitHEAD: "deadbeef",
Checks: []PhaseCheck{
{Phase: 0, Name: "test check", Expected: "x", Actual: "x", Passed: true, Required: true},
{Phase: 4, Name: "fail check", Expected: "0", Actual: "5", Passed: false, Required: true},
},
Metrics: map[string]int64{"p3_accepted": 42, "p4_sft_rows": 17},
Failed: 1,
Skipped: 4,
}
out := FormatAuditFullReport(report)
for _, want := range []string{"FAIL", "deadbeef", "test check", "fail check", "p3_accepted", "42", "deferred"} {
if !strings.Contains(out, want) {
t.Errorf("expected %q in formatted report:\n%s", want, out)
}
}
}