Closes 4 of the 5 phases the initial audit-FULL port left as
deferred. The pattern: most "deferred" phases didn't actually need
the un-ported Rust pieces — they were observer-mode by design and
just needed to read existing on-disk artifacts.
Phase 1 (schema validators) → ported via exec.Command:
Invokes `go test ./internal/distillation/...` — the Go equivalent
of Rust's `bun test auditor/schemas/distillation/`. New
GoTestModule field on AuditFullOptions controls the package
pattern; empty disables the invocation (test mode, prevents
recursion when audit-full is invoked from inside `go test`).
Phase 2 (evidence materialization) → ported as observer:
Reads data/evidence/ directly and tallies rows + tier-1 source
hits. Doesn't re-run the materializer (which is Rust-side TS).
Emits p2_evidence_rows + p2_evidence_skips metrics matching
Rust shape — drop-in audit_baselines.jsonl entries possible.
Phase 5 (run summary) → ported as observer:
Reads reports/distillation/{run_id}/summary.json + 5 stage
receipts. Validates schema_version=1, run_hash sha256, git_commit
40-char hex, all stage receipts decode as JSON. Full schema
validation (StageReceipt schema) is intentionally NOT ported —
it would require porting the TS schemas/distillation/ validators
in full; basic shape checks catch the load-bearing invariants.
Phase 7 (replay log) → ported as observer:
Reads data/_kb/replay_runs.jsonl, validates last 50 rows parse
as JSON. Skips the live-replay invocation that Rust's phase 7
also does — porting Rust replay.ts is substantial and not in
scope. The "log shape sanity" check is what audit-full actually
needs; the live invocation is a separate concern.
Phase 6 (acceptance gate) — STILL SKIPPED:
Rust acceptance.ts is a TS-only fixture harness with bun-specific
deps. Porting the fixtures (tests/fixtures/distillation/acceptance/)
+ the 22-invariant runner to Go is an ADR-worth undertaking.
Documented in the header comment.
Live-data probe (against /home/profit/lakehouse):
Skips count: 4 → 1 (only phase 6).
Required checks: 6/6 → 12/12 PASS.
New metric: p2_evidence_rows=1055, BYTE-EQUAL to the Rust
pipeline's collect.records_out from the latest summary.json.
Cross-runtime parity now extends across phases 0/1/2/3/4/5/7.
6 new tests:
- TestPhase2_EvidenceTallyFromOnDisk: row + tier-1-hit tallying
- TestPhase5_FullSummaryFlow: complete run-summary fixture passes
- TestPhase5_ShortRunHashCaught: bad run_hash fails required check
- TestPhase7_ReplayLogReadsFromDisk: row-count reporting
- TestPhase7_MalformedTailRowsCaught: structural parse failure
- TestRunAuditFull_FullFixtureFlow updated to seed evidence/ +
reports/distillation/ for the phases now wired.
Cleanup: removed local sortStrings helper (replaced with sort.Strings
now that `sort` is imported for phase 5's mtime-sort).
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
808 lines
28 KiB
Go
808 lines
28 KiB
Go
package distillation
|
|
|
|
// Audit-FULL pipeline — Go port of scripts/distillation/audit_full.ts
|
|
// (Rust legacy). Runs the metric-collection passes that produce
|
|
// audit_baselines.jsonl entries. Pure observability: never modifies
|
|
// pipeline data, only reads and tallies.
|
|
//
|
|
// Phase coverage in this port:
|
|
// - Phase 0 (file presence) ✓ ported
|
|
// - Phase 1 (schema validators) ✓ ported (invokes `go test`
|
|
// on internal/distillation)
|
|
// - Phase 2 (evidence materialization) ✓ ported as observer — reads
|
|
// existing data/evidence/
|
|
// and tallies rows. Doesn't
|
|
// re-run the materializer
|
|
// (which is Rust-side); the
|
|
// audit-FULL discipline is
|
|
// OBSERVATION, not re-execution.
|
|
// - Phase 3 (scored-runs distribution) ✓ ported
|
|
// - Phase 4 (contamination firewall) ✓ ported
|
|
// - Phase 5 (receipts validation) ✓ ported as observer — reads
|
|
// reports/distillation/{run_id}/
|
|
// summary.json + 5 stage
|
|
// receipts (any-runtime artifacts).
|
|
// - Phase 6 (acceptance gate) ✗ skipped — TS-only fixture
|
|
// harness at scripts/distillation/
|
|
// acceptance.ts with bun-
|
|
// specific deps. Porting the
|
|
// fixtures + invariant runner
|
|
// to Go is its own ADR-worth
|
|
// of work; out of scope.
|
|
// - Phase 7 (replay log shape) ✓ ported as observer — reads
|
|
// data/_kb/replay_runs.jsonl
|
|
// and checks shape, doesn't
|
|
// re-run replay (Rust-side
|
|
// replay.ts is the producer).
|
|
//
|
|
// Output: a structured PhaseCheckReport plus a Markdown summary.
|
|
// Operators run this from cmd/audit_full to validate a Go-side
|
|
// distillation pipeline run produced sane outputs.
|
|
|
|
import (
|
|
"encoding/json"
|
|
"fmt"
|
|
"os"
|
|
"os/exec"
|
|
"path/filepath"
|
|
"regexp"
|
|
"sort"
|
|
"strings"
|
|
)
|
|
|
|
// PhaseCheck is one observable check within a phase. Mirrors the
|
|
// Rust shape exactly — Markdown rendering uses the same column
|
|
// layout so cross-runtime diff'ing is meaningful.
|
|
type PhaseCheck struct {
|
|
Phase int `json:"phase"`
|
|
Name string `json:"name"`
|
|
Expected string `json:"expected"`
|
|
Actual string `json:"actual"`
|
|
Passed bool `json:"passed"`
|
|
Required bool `json:"required"` // false → informational only, doesn't fail audit
|
|
Notes []string `json:"notes,omitempty"`
|
|
}
|
|
|
|
// PhaseCheckReport is the aggregate result of one audit-full run.
|
|
// Metrics is the AuditBaseline-shape metric snapshot that the
|
|
// caller can pass to AppendBaseline to grow the longitudinal log.
|
|
type PhaseCheckReport struct {
|
|
Checks []PhaseCheck `json:"checks"`
|
|
Metrics map[string]int64 `json:"metrics"`
|
|
Failed int `json:"failed"` // count of REQUIRED checks that failed
|
|
Skipped int `json:"deferred_phases"` // phases not yet ported
|
|
GitHEAD string `json:"git_head,omitempty"`
|
|
}
|
|
|
|
// AuditFullOptions controls a single audit-full run. Root is the
|
|
// data dir (defaults to LH_DISTILL_ROOT or /home/profit/lakehouse
|
|
// to keep operators running both runtimes hitting the same paths).
|
|
type AuditFullOptions struct {
|
|
Root string
|
|
GitHEAD string // optional — caller resolves and passes through
|
|
// GoTestModule is the package-pattern Phase 1 invokes via
|
|
// `go test`. Defaults to "./internal/distillation/..." when
|
|
// empty. Tests pass an empty path to disable the live
|
|
// `go test` invocation (which would recurse).
|
|
GoTestModule string
|
|
}
|
|
|
|
// RunAuditFull orchestrates the ported phases (0, 3, 4) and
|
|
// returns the aggregated report. Each phase is independent; a
|
|
// phase that errors is recorded as a failed check rather than
|
|
// aborting the run, matching Rust's "always emit a report" stance.
|
|
func RunAuditFull(opts AuditFullOptions) PhaseCheckReport {
|
|
if opts.Root == "" {
|
|
if env := os.Getenv("LH_DISTILL_ROOT"); env != "" {
|
|
opts.Root = env
|
|
} else {
|
|
opts.Root = "/home/profit/lakehouse"
|
|
}
|
|
}
|
|
report := PhaseCheckReport{
|
|
Metrics: make(map[string]int64),
|
|
GitHEAD: opts.GitHEAD,
|
|
Skipped: 1, // only phase 6 (TS-only acceptance harness) deferred
|
|
}
|
|
auditPhase0(opts.Root, &report)
|
|
auditPhase1(opts.Root, &report, opts.GoTestModule)
|
|
auditPhase2(opts.Root, &report)
|
|
auditPhase3(opts.Root, &report)
|
|
auditPhase4(opts.Root, &report)
|
|
auditPhase5(opts.Root, &report)
|
|
// phase 6 intentionally skipped — see header comment
|
|
auditPhase7(opts.Root, &report)
|
|
for _, c := range report.Checks {
|
|
if c.Required && !c.Passed {
|
|
report.Failed++
|
|
}
|
|
}
|
|
return report
|
|
}
|
|
|
|
// ── Phase 0: file presence ─────────────────────────────────────────
|
|
|
|
func auditPhase0(root string, report *PhaseCheckReport) {
|
|
// The recon doc is Rust-specific (docs/recon/local-distillation-
|
|
// recon.md); a Go-side equivalent would live in the
|
|
// golangLAKEHOUSE repo. For audit-full's purposes, we treat its
|
|
// presence as informational rather than required when running
|
|
// against a non-Rust root.
|
|
reconPath := filepath.Join(root, "docs", "recon", "local-distillation-recon.md")
|
|
exists := fileExists(reconPath)
|
|
report.Checks = append(report.Checks, PhaseCheck{
|
|
Phase: 0, Name: "recon doc exists",
|
|
Expected: "docs/recon/local-distillation-recon.md present",
|
|
Actual: fmt.Sprintf("%v", exists),
|
|
Passed: exists, Required: false, // informational on Go-side runs
|
|
})
|
|
|
|
tier1 := []string{
|
|
"data/_kb/distilled_facts.jsonl",
|
|
"data/_kb/scrum_reviews.jsonl",
|
|
"data/_kb/audit_facts.jsonl",
|
|
"data/_kb/mode_experiments.jsonl",
|
|
}
|
|
missing := []string{}
|
|
for _, p := range tier1 {
|
|
if !fileExists(filepath.Join(root, p)) {
|
|
missing = append(missing, p)
|
|
}
|
|
}
|
|
notes := []string{}
|
|
if len(missing) > 0 {
|
|
notes = append(notes, "fresh-clone or post-rotation environment — Phase 2 will tally as rows_present=false; not a hard fail")
|
|
}
|
|
report.Checks = append(report.Checks, PhaseCheck{
|
|
Phase: 0, Name: "tier-1 source streams present",
|
|
Expected: "all 4 tier-1 jsonls on disk",
|
|
Actual: func() string {
|
|
if len(missing) == 0 {
|
|
return "all present"
|
|
}
|
|
return "missing: " + strings.Join(missing, ", ")
|
|
}(),
|
|
Passed: len(missing) == 0, Required: false,
|
|
Notes: notes,
|
|
})
|
|
}
|
|
|
|
// ── Phase 1: schema validators ─────────────────────────────────────
|
|
|
|
// auditPhase1 invokes `go test` on the distillation package — the Go
|
|
// equivalent of Rust's `bun test auditor/schemas/distillation/`. The
|
|
// audit-FULL semantic: "do the schema validators still pass on
|
|
// fixtures?" When module == "" (test mode) the phase records a
|
|
// skipped-with-rationale check rather than recursing into itself.
|
|
func auditPhase1(root string, report *PhaseCheckReport, module string) {
|
|
if module == "" {
|
|
// Test-disabled mode: record but don't invoke (would recurse
|
|
// when called from a `go test` already in progress).
|
|
report.Checks = append(report.Checks, PhaseCheck{
|
|
Phase: 1, Name: "schema validators (skipped — test invocation disabled)",
|
|
Expected: "go test ./internal/distillation/...",
|
|
Actual: "skipped",
|
|
Passed: true, Required: false,
|
|
Notes: []string{"caller passed empty GoTestModule — typically because we're already inside a test run"},
|
|
})
|
|
return
|
|
}
|
|
cmd := exec.Command("go", "test", "-count=1", module)
|
|
cmd.Dir = root // run from go module root if caller supplied it; otherwise cwd
|
|
out, err := cmd.CombinedOutput()
|
|
passed := err == nil
|
|
actual := "PASS"
|
|
if !passed {
|
|
actual = "FAIL — " + abbrevOutput(string(out), 200)
|
|
}
|
|
report.Checks = append(report.Checks, PhaseCheck{
|
|
Phase: 1, Name: "schema validators pass on fixtures",
|
|
Expected: "go test ./internal/distillation/... → exit 0",
|
|
Actual: actual,
|
|
Passed: passed, Required: true,
|
|
})
|
|
}
|
|
|
|
// abbrevOutput truncates noisy command-output to a stable preview.
|
|
// Long stack traces would blow out the report Markdown without this.
|
|
func abbrevOutput(s string, max int) string {
|
|
s = strings.TrimSpace(s)
|
|
if len(s) <= max {
|
|
return s
|
|
}
|
|
return s[:max] + "...(truncated)"
|
|
}
|
|
|
|
// ── Phase 2: evidence materialization (observer) ───────────────────
|
|
|
|
// auditPhase2 reads data/evidence/ and tallies rows + skipped
|
|
// markers. Mirrors the Rust phase 2's "materializer dry-run
|
|
// completes / tier-1 sources each materialize ≥1 row" checks but
|
|
// in OBSERVER mode — doesn't re-run the materializer (which is
|
|
// Rust-side); instead reads what the Rust side already produced.
|
|
//
|
|
// Records p2_evidence_rows + p2_evidence_skips metrics that match
|
|
// the Rust shape, so a Go-side audit-full producing baselines is
|
|
// drop-in-comparable to a Rust-side run.
|
|
func auditPhase2(root string, report *PhaseCheckReport) {
|
|
evidenceDir := filepath.Join(root, "data", "evidence")
|
|
if !fileExists(evidenceDir) {
|
|
report.Checks = append(report.Checks, PhaseCheck{
|
|
Phase: 2, Name: "evidence materialization output present",
|
|
Expected: "data/evidence/ populated",
|
|
Actual: "missing",
|
|
Passed: false, Required: true,
|
|
Notes: []string{"run materializer (Rust: ./scripts/distill collect; Go-side materializer not yet ported) before audit-full"},
|
|
})
|
|
return
|
|
}
|
|
rows := int64(0)
|
|
skips := int64(0)
|
|
bySource := map[string]int64{}
|
|
tier1Hits := map[string]bool{
|
|
"distilled_facts": false,
|
|
"scrum_reviews": false,
|
|
"audit_facts": false,
|
|
"mode_experiments": false,
|
|
}
|
|
|
|
walkErr := filepath.Walk(evidenceDir, func(path string, info os.FileInfo, err error) error {
|
|
if err != nil {
|
|
return nil
|
|
}
|
|
if info.IsDir() || !strings.HasSuffix(path, ".jsonl") {
|
|
return nil
|
|
}
|
|
data, err := os.ReadFile(path)
|
|
if err != nil {
|
|
return nil
|
|
}
|
|
// Tally per-source via the ev.provenance.source_file field on
|
|
// each evidence row. Match Rust's "by_source" map shape.
|
|
for _, line := range strings.Split(string(data), "\n") {
|
|
line = strings.TrimSpace(line)
|
|
if line == "" {
|
|
continue
|
|
}
|
|
rows++
|
|
var rec struct {
|
|
Provenance struct {
|
|
SourceFile string `json:"source_file"`
|
|
} `json:"provenance"`
|
|
SuccessMarkers []string `json:"success_markers,omitempty"`
|
|
FailureMarkers []string `json:"failure_markers,omitempty"`
|
|
}
|
|
if err := json.Unmarshal([]byte(line), &rec); err != nil {
|
|
skips++
|
|
continue
|
|
}
|
|
stem := stemFromSourceFile(rec.Provenance.SourceFile)
|
|
bySource[stem]++
|
|
if _, ok := tier1Hits[stem]; ok {
|
|
tier1Hits[stem] = true
|
|
}
|
|
}
|
|
return nil
|
|
})
|
|
if walkErr != nil {
|
|
report.Checks = append(report.Checks, PhaseCheck{
|
|
Phase: 2, Name: "evidence walk",
|
|
Expected: "no error", Actual: walkErr.Error(),
|
|
Passed: false, Required: true,
|
|
})
|
|
return
|
|
}
|
|
|
|
report.Metrics["p2_evidence_rows"] = rows
|
|
report.Metrics["p2_evidence_skips"] = skips
|
|
|
|
report.Checks = append(report.Checks, PhaseCheck{
|
|
Phase: 2, Name: "evidence materialization output non-empty",
|
|
Expected: ">=1 row across all sources",
|
|
Actual: fmt.Sprintf("%d rows · %d skipped", rows, skips),
|
|
Passed: rows >= 1, Required: true,
|
|
})
|
|
|
|
tier1Found := []string{}
|
|
for src, hit := range tier1Hits {
|
|
if hit {
|
|
tier1Found = append(tier1Found, src)
|
|
}
|
|
}
|
|
sort.Strings(tier1Found)
|
|
notes := []string{}
|
|
if len(tier1Found) < 4 {
|
|
notes = append(notes, "fresh-environment OK; expect lower count when source streams are absent")
|
|
}
|
|
report.Checks = append(report.Checks, PhaseCheck{
|
|
Phase: 2, Name: "tier-1 sources each materialize ≥1 row",
|
|
Expected: "4/4: distilled_facts, scrum_reviews, audit_facts, mode_experiments",
|
|
Actual: fmt.Sprintf("%d/4 hit (%s)", len(tier1Found), strings.Join(tier1Found, ", ")),
|
|
Passed: len(tier1Found) >= 1, Required: false,
|
|
Notes: notes,
|
|
})
|
|
}
|
|
|
|
// ── Phase 3: scored-runs distribution ──────────────────────────────
|
|
|
|
func auditPhase3(root string, report *PhaseCheckReport) {
|
|
scoredDir := filepath.Join(root, "data", "scored-runs")
|
|
if !fileExists(scoredDir) {
|
|
report.Checks = append(report.Checks, PhaseCheck{
|
|
Phase: 3, Name: "scored-runs on disk",
|
|
Expected: "data/scored-runs/ populated",
|
|
Actual: "missing",
|
|
Passed: false, Required: true,
|
|
Notes: []string{"run scoring before audit-full (Go: scripts/distillation/score; Rust: ./scripts/distill score)"},
|
|
})
|
|
return
|
|
}
|
|
|
|
counts := map[string]int64{
|
|
"accepted": 0,
|
|
"partially_accepted": 0,
|
|
"rejected": 0,
|
|
"needs_human_review": 0,
|
|
}
|
|
files, err := ListScoredRunFiles(root)
|
|
if err != nil {
|
|
report.Checks = append(report.Checks, PhaseCheck{
|
|
Phase: 3, Name: "scored-runs walk",
|
|
Expected: "no error", Actual: err.Error(),
|
|
Passed: false, Required: true,
|
|
})
|
|
return
|
|
}
|
|
for _, f := range files {
|
|
runs, _, err := LoadScoredRunsFromFile(f)
|
|
if err != nil {
|
|
continue
|
|
}
|
|
for _, r := range runs {
|
|
if _, ok := counts[string(r.Category)]; ok {
|
|
counts[string(r.Category)]++
|
|
}
|
|
}
|
|
}
|
|
total := counts["accepted"] + counts["partially_accepted"] + counts["rejected"] + counts["needs_human_review"]
|
|
|
|
report.Metrics["p3_accepted"] = counts["accepted"]
|
|
report.Metrics["p3_partial"] = counts["partially_accepted"]
|
|
report.Metrics["p3_rejected"] = counts["rejected"]
|
|
report.Metrics["p3_human"] = counts["needs_human_review"]
|
|
|
|
report.Checks = append(report.Checks, PhaseCheck{
|
|
Phase: 3, Name: "on-disk scored-runs distribution non-empty",
|
|
Expected: ">=1 accepted",
|
|
Actual: fmt.Sprintf("acc=%d part=%d rej=%d hum=%d", counts["accepted"], counts["partially_accepted"], counts["rejected"], counts["needs_human_review"]),
|
|
Passed: counts["accepted"] >= 1, Required: true,
|
|
})
|
|
report.Checks = append(report.Checks, PhaseCheck{
|
|
Phase: 3, Name: "scored-runs distribution sums positive",
|
|
Expected: ">0 total", Actual: fmt.Sprintf("%d total", total),
|
|
Passed: total > 0, Required: false,
|
|
})
|
|
}
|
|
|
|
// ── Phase 4: contamination firewall + provenance ───────────────────
|
|
|
|
// sigHashRe pre-compiled match for the canonical sig_hash shape:
|
|
// 64 lowercase hex characters (sha256 hex). Used per-row in the
|
|
// provenance check.
|
|
var sigHashRe = regexp.MustCompile(`^[0-9a-f]{64}$`)
|
|
|
|
func auditPhase4(root string, report *PhaseCheckReport) {
|
|
sftPath := filepath.Join(root, "exports", "sft", "instruction_response.jsonl")
|
|
ragPath := filepath.Join(root, "exports", "rag", "playbooks.jsonl")
|
|
prefPath := filepath.Join(root, "exports", "preference", "chosen_rejected.jsonl")
|
|
|
|
sftRows := readJSONLLines(sftPath)
|
|
ragRows := readJSONLLines(ragPath)
|
|
prefRows := readJSONLLines(prefPath)
|
|
|
|
report.Metrics["p4_sft_rows"] = int64(len(sftRows))
|
|
report.Metrics["p4_rag_rows"] = int64(len(ragRows))
|
|
report.Metrics["p4_pref_pairs"] = int64(len(prefRows))
|
|
|
|
// SFT contamination firewall: 0 forbidden quality_scores. The
|
|
// only legal SFT quality scores are accepted + partially_accepted.
|
|
sftForbidden := 0
|
|
for _, line := range sftRows {
|
|
var r struct {
|
|
QualityScore string `json:"quality_score"`
|
|
}
|
|
if err := json.Unmarshal([]byte(line), &r); err != nil {
|
|
continue // tolerate malformed (matches Rust)
|
|
}
|
|
if r.QualityScore != "accepted" && r.QualityScore != "partially_accepted" {
|
|
sftForbidden++
|
|
}
|
|
}
|
|
report.Checks = append(report.Checks, PhaseCheck{
|
|
Phase: 4, Name: "SFT contamination firewall: 0 forbidden quality_scores",
|
|
Expected: "0", Actual: fmt.Sprintf("%d", sftForbidden),
|
|
Passed: sftForbidden == 0, Required: true,
|
|
Notes: []string{"this is the spec non-negotiable — rejected/needs_human_review must NEVER appear in SFT"},
|
|
})
|
|
|
|
// RAG firewall: 0 rejected leaks
|
|
ragRejected := 0
|
|
for _, line := range ragRows {
|
|
var r struct {
|
|
SuccessScore string `json:"success_score"`
|
|
}
|
|
if err := json.Unmarshal([]byte(line), &r); err != nil {
|
|
continue
|
|
}
|
|
if r.SuccessScore == "rejected" {
|
|
ragRejected++
|
|
}
|
|
}
|
|
report.Checks = append(report.Checks, PhaseCheck{
|
|
Phase: 4, Name: "RAG firewall: 0 rejected leaks",
|
|
Expected: "0", Actual: fmt.Sprintf("%d", ragRejected),
|
|
Passed: ragRejected == 0, Required: true,
|
|
})
|
|
|
|
// Preference: 0 self-pairs + 0 identical-text pairs.
|
|
prefSelfPairs, prefIdenticalText := 0, 0
|
|
for _, line := range prefRows {
|
|
var r struct {
|
|
ChosenRunID string `json:"chosen_run_id"`
|
|
RejectedRunID string `json:"rejected_run_id"`
|
|
Chosen string `json:"chosen"`
|
|
Rejected string `json:"rejected"`
|
|
}
|
|
if err := json.Unmarshal([]byte(line), &r); err != nil {
|
|
continue
|
|
}
|
|
if r.ChosenRunID == r.RejectedRunID {
|
|
prefSelfPairs++
|
|
}
|
|
if r.Chosen == r.Rejected {
|
|
prefIdenticalText++
|
|
}
|
|
}
|
|
report.Checks = append(report.Checks, PhaseCheck{
|
|
Phase: 4, Name: "Preference: 0 self-pairs (chosen_run_id != rejected_run_id)",
|
|
Expected: "0", Actual: fmt.Sprintf("%d", prefSelfPairs),
|
|
Passed: prefSelfPairs == 0, Required: true,
|
|
})
|
|
report.Checks = append(report.Checks, PhaseCheck{
|
|
Phase: 4, Name: "Preference: 0 identical-text pairs",
|
|
Expected: "0", Actual: fmt.Sprintf("%d", prefIdenticalText),
|
|
Passed: prefIdenticalText == 0, Required: true,
|
|
})
|
|
|
|
// Provenance check: every export row must carry a 64-char hex
|
|
// sig_hash. Walks sft + rag + pref together since the contract
|
|
// is uniform across all three.
|
|
noProv := 0
|
|
checkProv := func(line string) {
|
|
var r struct {
|
|
Provenance struct {
|
|
SigHash string `json:"sig_hash"`
|
|
} `json:"provenance"`
|
|
}
|
|
if err := json.Unmarshal([]byte(line), &r); err != nil {
|
|
return
|
|
}
|
|
if r.Provenance.SigHash == "" || !sigHashRe.MatchString(r.Provenance.SigHash) {
|
|
noProv++
|
|
}
|
|
}
|
|
for _, line := range sftRows {
|
|
checkProv(line)
|
|
}
|
|
for _, line := range ragRows {
|
|
checkProv(line)
|
|
}
|
|
for _, line := range prefRows {
|
|
checkProv(line)
|
|
}
|
|
report.Checks = append(report.Checks, PhaseCheck{
|
|
Phase: 4, Name: "every export row carries valid sha256 provenance.sig_hash",
|
|
Expected: "0 missing", Actual: fmt.Sprintf("%d missing", noProv),
|
|
Passed: noProv == 0, Required: true,
|
|
})
|
|
|
|
// Quarantine totals (informational — feeds the p4_total_quarantined
|
|
// metric used by the longitudinal drift signal).
|
|
totalQuar := int64(0)
|
|
for _, qp := range []string{
|
|
"exports/quarantine/sft.jsonl",
|
|
"exports/quarantine/rag.jsonl",
|
|
"exports/quarantine/preference.jsonl",
|
|
} {
|
|
totalQuar += int64(len(readJSONLLines(filepath.Join(root, qp))))
|
|
}
|
|
report.Metrics["p4_total_quarantined"] = totalQuar
|
|
}
|
|
|
|
// ── Phase 5: receipts validation (observer) ────────────────────────
|
|
|
|
// runSummaryShape mirrors the Rust RunSummary just enough to
|
|
// validate the file's shape — schema_version, run_hash sha256,
|
|
// git_commit hex, and the 5 stage names. Full schema validation
|
|
// is intentionally NOT ported (it would require porting the
|
|
// schemas/distillation/ TS validators); we check the load-bearing
|
|
// invariants and call it good.
|
|
type runSummaryShape struct {
|
|
SchemaVersion int `json:"schema_version"`
|
|
RunID string `json:"run_id"`
|
|
GitCommit string `json:"git_commit"`
|
|
RunHash string `json:"run_hash"`
|
|
Stages []struct {
|
|
Stage string `json:"stage"`
|
|
} `json:"stages"`
|
|
}
|
|
|
|
func auditPhase5(root string, report *PhaseCheckReport) {
|
|
reportsDir := filepath.Join(root, "reports", "distillation")
|
|
if !fileExists(reportsDir) {
|
|
report.Checks = append(report.Checks, PhaseCheck{
|
|
Phase: 5, Name: "receipts directory exists",
|
|
Expected: "reports/distillation/", Actual: "MISSING",
|
|
Passed: false, Required: true,
|
|
})
|
|
return
|
|
}
|
|
// Find the most recent run_id directory with a summary.json.
|
|
// Mirrors the Rust mtime-sort behavior — ordering matters when
|
|
// both Rust + Go runs land in the same directory.
|
|
type cand struct {
|
|
id string
|
|
mtime int64
|
|
}
|
|
var cands []cand
|
|
entries, err := os.ReadDir(reportsDir)
|
|
if err != nil {
|
|
report.Checks = append(report.Checks, PhaseCheck{
|
|
Phase: 5, Name: "scan reports/distillation",
|
|
Expected: "no error", Actual: err.Error(),
|
|
Passed: false, Required: true,
|
|
})
|
|
return
|
|
}
|
|
for _, e := range entries {
|
|
if !e.IsDir() {
|
|
continue
|
|
}
|
|
sumPath := filepath.Join(reportsDir, e.Name(), "summary.json")
|
|
st, err := os.Stat(sumPath)
|
|
if err != nil {
|
|
continue
|
|
}
|
|
cands = append(cands, cand{id: e.Name(), mtime: st.ModTime().UnixMilli()})
|
|
}
|
|
if len(cands) == 0 {
|
|
report.Checks = append(report.Checks, PhaseCheck{
|
|
Phase: 5, Name: "≥1 run with summary.json",
|
|
Expected: "≥1", Actual: "0",
|
|
Passed: false, Required: false,
|
|
Notes: []string{"no Phase 5 run-all has executed yet — Rust: ./scripts/distill run-all"},
|
|
})
|
|
return
|
|
}
|
|
sort.Slice(cands, func(i, j int) bool { return cands[i].mtime > cands[j].mtime })
|
|
latest := cands[0]
|
|
runDir := filepath.Join(reportsDir, latest.id)
|
|
|
|
// All 5 stage receipts present.
|
|
expected := []string{"collect", "score", "export-rag", "export-sft", "export-preference"}
|
|
missing := []string{}
|
|
for _, s := range expected {
|
|
if !fileExists(filepath.Join(runDir, s+".json")) {
|
|
missing = append(missing, s)
|
|
}
|
|
}
|
|
report.Checks = append(report.Checks, PhaseCheck{
|
|
Phase: 5, Name: fmt.Sprintf("latest run (%s) has all 5 stage receipts", latest.id),
|
|
Expected: strings.Join(expected, ","),
|
|
Actual: func() string {
|
|
if len(missing) == 0 {
|
|
return "all present"
|
|
}
|
|
return "missing: " + strings.Join(missing, ",")
|
|
}(),
|
|
Passed: len(missing) == 0, Required: true,
|
|
})
|
|
|
|
// Each receipt parses as JSON. Full schema validation (StageReceipt
|
|
// schema) is Rust-side only; we check basic decodability here.
|
|
invalid := 0
|
|
for _, s := range expected {
|
|
path := filepath.Join(runDir, s+".json")
|
|
data, err := os.ReadFile(path)
|
|
if err != nil {
|
|
continue
|
|
}
|
|
var anyShape any
|
|
if err := json.Unmarshal(data, &anyShape); err != nil {
|
|
invalid++
|
|
}
|
|
}
|
|
report.Checks = append(report.Checks, PhaseCheck{
|
|
Phase: 5, Name: "every stage receipt parses as JSON",
|
|
Expected: "0 invalid", Actual: fmt.Sprintf("%d invalid", invalid),
|
|
Passed: invalid == 0, Required: true,
|
|
})
|
|
|
|
// RunSummary shape: schema_version=1, run_hash sha256, git_commit
|
|
// 40-char hex.
|
|
summaryPath := filepath.Join(runDir, "summary.json")
|
|
data, err := os.ReadFile(summaryPath)
|
|
if err != nil {
|
|
report.Checks = append(report.Checks, PhaseCheck{
|
|
Phase: 5, Name: "summary.json readable",
|
|
Expected: "ok", Actual: err.Error(),
|
|
Passed: false, Required: true,
|
|
})
|
|
return
|
|
}
|
|
var sum runSummaryShape
|
|
if err := json.Unmarshal(data, &sum); err != nil {
|
|
report.Checks = append(report.Checks, PhaseCheck{
|
|
Phase: 5, Name: "summary.json decodable",
|
|
Expected: "ok", Actual: err.Error(),
|
|
Passed: false, Required: true,
|
|
})
|
|
return
|
|
}
|
|
report.Checks = append(report.Checks, PhaseCheck{
|
|
Phase: 5, Name: "summary.schema_version == 1",
|
|
Expected: "1", Actual: fmt.Sprintf("%d", sum.SchemaVersion),
|
|
Passed: sum.SchemaVersion == 1, Required: true,
|
|
})
|
|
gitHEADRe := regexp.MustCompile(`^[0-9a-f]{40}$`)
|
|
report.Checks = append(report.Checks, PhaseCheck{
|
|
Phase: 5, Name: "summary.git_commit is 40-char hex",
|
|
Expected: "/^[0-9a-f]{40}$/", Actual: shortHash(sum.GitCommit),
|
|
Passed: gitHEADRe.MatchString(sum.GitCommit), Required: false,
|
|
})
|
|
report.Checks = append(report.Checks, PhaseCheck{
|
|
Phase: 5, Name: "run_hash is sha256",
|
|
Expected: "/^[0-9a-f]{64}$/", Actual: shortHash(sum.RunHash),
|
|
Passed: sigHashRe.MatchString(sum.RunHash), Required: true,
|
|
})
|
|
}
|
|
|
|
func shortHash(h string) string {
|
|
if len(h) <= 16 {
|
|
return h
|
|
}
|
|
return h[:16] + "..."
|
|
}
|
|
|
|
// ── Phase 7: replay log shape (observer) ───────────────────────────
|
|
|
|
// auditPhase7 checks data/_kb/replay_runs.jsonl exists and contains
|
|
// well-shaped records. Mirrors Rust phase 7's "persisted log shape"
|
|
// check but skips the live-replay invocation (which would require
|
|
// porting Rust replay.ts, a substantial effort). The full Rust
|
|
// phase 7 also runs 3 dry-run replays — operators wanting that
|
|
// signal continue to invoke the Rust audit-full.
|
|
func auditPhase7(root string, report *PhaseCheckReport) {
|
|
logPath := filepath.Join(root, "data", "_kb", "replay_runs.jsonl")
|
|
lines := readJSONLLines(logPath)
|
|
report.Checks = append(report.Checks, PhaseCheck{
|
|
Phase: 7, Name: "replay_runs.jsonl exists",
|
|
Expected: "exists with ≥1 row",
|
|
Actual: func() string {
|
|
if !fileExists(logPath) {
|
|
return "missing"
|
|
}
|
|
return fmt.Sprintf("%d rows total", len(lines))
|
|
}(),
|
|
Passed: fileExists(logPath), Required: false,
|
|
})
|
|
if !fileExists(logPath) {
|
|
return
|
|
}
|
|
// Validate shape on a sample of rows — full validation across
|
|
// thousands of lines isn't worth the cost, and a structural
|
|
// problem will show up in any sample.
|
|
sample := lines
|
|
if len(sample) > 50 {
|
|
sample = sample[len(sample)-50:]
|
|
}
|
|
malformed := 0
|
|
for _, line := range sample {
|
|
var anyShape any
|
|
if err := json.Unmarshal([]byte(line), &anyShape); err != nil {
|
|
malformed++
|
|
}
|
|
}
|
|
report.Checks = append(report.Checks, PhaseCheck{
|
|
Phase: 7, Name: "replay_runs.jsonl tail rows parse as JSON",
|
|
Expected: "0 malformed in last 50", Actual: fmt.Sprintf("%d malformed", malformed),
|
|
Passed: malformed == 0, Required: true,
|
|
})
|
|
}
|
|
|
|
// ── helpers ────────────────────────────────────────────────────────
|
|
|
|
func fileExists(p string) bool {
|
|
_, err := os.Stat(p)
|
|
return err == nil
|
|
}
|
|
|
|
// readJSONLLines reads a JSONL file and returns non-empty lines.
|
|
// Returns nil on missing file (matches Rust's existsSync ? read : []).
|
|
func readJSONLLines(path string) []string {
|
|
data, err := os.ReadFile(path)
|
|
if err != nil {
|
|
return nil
|
|
}
|
|
out := make([]string, 0)
|
|
for _, line := range strings.Split(string(data), "\n") {
|
|
if strings.TrimSpace(line) != "" {
|
|
out = append(out, line)
|
|
}
|
|
}
|
|
return out
|
|
}
|
|
|
|
// FormatAuditFullReport renders a Markdown report mirroring the
|
|
// Rust phase8-full-audit-report.md shape so operators reading
|
|
// across runtimes don't have to re-learn the layout.
|
|
func FormatAuditFullReport(report PhaseCheckReport) string {
|
|
var b strings.Builder
|
|
fmt.Fprintln(&b, "# Audit-FULL report (Go)")
|
|
fmt.Fprintln(&b)
|
|
if report.GitHEAD != "" {
|
|
fmt.Fprintf(&b, "**git HEAD:** `%s`\n\n", report.GitHEAD)
|
|
}
|
|
failed := report.Failed
|
|
total := 0
|
|
for _, c := range report.Checks {
|
|
if c.Required {
|
|
total++
|
|
}
|
|
}
|
|
verdict := "PASS"
|
|
if failed > 0 {
|
|
verdict = "FAIL"
|
|
}
|
|
fmt.Fprintf(&b, "**Verdict:** %s — %d/%d required checks passed; %d phase(s) deferred.\n\n",
|
|
verdict, total-failed, total, report.Skipped)
|
|
|
|
fmt.Fprintln(&b, "## Checks")
|
|
fmt.Fprintln(&b)
|
|
fmt.Fprintln(&b, "| phase | name | expected | actual | required | passed |")
|
|
fmt.Fprintln(&b, "|---|---|---|---|---|---|")
|
|
for _, c := range report.Checks {
|
|
req := "no"
|
|
if c.Required {
|
|
req = "**yes**"
|
|
}
|
|
passed := "✗"
|
|
if c.Passed {
|
|
passed = "✓"
|
|
}
|
|
fmt.Fprintf(&b, "| %d | %s | %s | %s | %s | %s |\n",
|
|
c.Phase, c.Name, c.Expected, c.Actual, req, passed)
|
|
for _, n := range c.Notes {
|
|
fmt.Fprintf(&b, "| | _note_ | %s | | | |\n", n)
|
|
}
|
|
}
|
|
|
|
if len(report.Metrics) > 0 {
|
|
fmt.Fprintln(&b)
|
|
fmt.Fprintln(&b, "## Metrics")
|
|
fmt.Fprintln(&b)
|
|
fmt.Fprintln(&b, "| metric | value |")
|
|
fmt.Fprintln(&b, "|---|---:|")
|
|
// Stable order for diffs.
|
|
names := make([]string, 0, len(report.Metrics))
|
|
for k := range report.Metrics {
|
|
names = append(names, k)
|
|
}
|
|
sort.Strings(names)
|
|
for _, k := range names {
|
|
fmt.Fprintf(&b, "| %s | %d |\n", k, report.Metrics[k])
|
|
}
|
|
}
|
|
return b.String()
|
|
}
|