Ports the metric-collection passes from scripts/distillation/audit_full.ts. The substrate that PRODUCES audit_baselines.jsonl entries — the half OPEN #2 left as "deferred to next wave" after the read/write substrate landed in ca142b9. Phase coverage: Phase 0 (file presence) ported Phase 1 (schema validators) skipped (Go's `go test` covers it) Phase 2 (materializer dry-run) deferred (Go materializer not yet ported) Phase 3 (scored-runs distribution) ported Phase 4 (contamination firewall) ported Phase 5 (receipts validation) deferred (Go run-summary JSON not yet emitted) Phase 6 (replay sanity) deferred (Go replay tool not ported) Phase 7 (run summary lineage) deferred (same) Cross-runtime parity verified end-to-end: Go-side audit-full against /home/profit/lakehouse produced metrics IDENTICAL to the last Rust-emitted audit_baselines.jsonl entry. All 8 ported metrics match byte-for-byte: p3_accepted=386, p3_partial=132, p3_rejected=57, p3_human=480, p4_sft_rows=353, p4_rag_rows=448, p4_pref_pairs=83, p4_total_quarantined=1325 6/6 required checks pass on live data. Components: - internal/distillation/audit_full.go: PhaseCheck struct (mirrors Rust shape), PhaseCheckReport aggregation, RunAuditFull orchestrator, auditPhase0/3/4 implementations, FormatAuditFullReport Markdown writer. - cmd/audit_full/main.go: CLI binary with -root, -out, -json, -append-baseline flags. Operators run "./bin/audit_full -append-baseline" to grow the longitudinal log alongside the Rust pipeline (entries are interchangeable — same envelope shape). - 6 new tests: empty-root failure handling, full-fixture clean PASS (locks all 8 metrics + all 6 required checks), SFT firewall contamination detection, preference self-pair detection, sig_hash regex correctness (rejects wrong-length + uppercase), Markdown formatter smoke. Live-data probe captured at reports/cutover/audit_full_go_vs_rust.md (linked from reports/cutover/SUMMARY.md). Same shape as the audit_baselines round-trip evidence — both Go-side ports of the distillation surface are now validated against real Rust data, not just fixtures. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
106 lines
3.1 KiB
Go
106 lines
3.1 KiB
Go
// audit_full — Go-side audit-full runner. Calls into
|
|
// internal/distillation.RunAuditFull, dumps the Markdown report to
|
|
// stdout (or a file), and optionally appends an AuditBaseline entry
|
|
// to data/_kb/audit_baselines.jsonl for the longitudinal log.
|
|
//
|
|
// Usage:
|
|
// audit_full # report only
|
|
// audit_full -root /home/profit/lakehouse # custom root
|
|
// audit_full -append-baseline # also append to audit_baselines.jsonl
|
|
// audit_full -out reports/distillation/run.md # write report file
|
|
//
|
|
// Designed to live alongside the Rust scripts/distillation/audit_full.ts
|
|
// — operators can run either runtime against the same root and the
|
|
// audit_baselines.jsonl entries are interchangeable.
|
|
package main
|
|
|
|
import (
|
|
"encoding/json"
|
|
"flag"
|
|
"fmt"
|
|
"log"
|
|
"os"
|
|
"os/exec"
|
|
"strings"
|
|
"time"
|
|
|
|
"git.agentview.dev/profit/golangLAKEHOUSE/internal/distillation"
|
|
)
|
|
|
|
func main() {
|
|
root := flag.String("root", "", "lakehouse data root (defaults to $LH_DISTILL_ROOT or /home/profit/lakehouse)")
|
|
out := flag.String("out", "", "write Markdown report to this path (default: stdout)")
|
|
appendBaseline := flag.Bool("append-baseline", false, "append an AuditBaseline entry to data/_kb/audit_baselines.jsonl after the run")
|
|
jsonOut := flag.Bool("json", false, "emit the full PhaseCheckReport as JSON instead of Markdown")
|
|
flag.Parse()
|
|
|
|
gitHEAD := resolveGitHEAD()
|
|
report := distillation.RunAuditFull(distillation.AuditFullOptions{
|
|
Root: *root,
|
|
GitHEAD: gitHEAD,
|
|
})
|
|
|
|
var body []byte
|
|
if *jsonOut {
|
|
body = mustJSON(report)
|
|
} else {
|
|
body = []byte(distillation.FormatAuditFullReport(report))
|
|
}
|
|
|
|
if *out == "" {
|
|
_, _ = os.Stdout.Write(body)
|
|
} else {
|
|
if err := os.WriteFile(*out, body, 0o644); err != nil {
|
|
log.Fatalf("write %s: %v", *out, err)
|
|
}
|
|
fmt.Fprintf(os.Stderr, "wrote %s (%d bytes)\n", *out, len(body))
|
|
}
|
|
|
|
if *appendBaseline {
|
|
// Resolve the same path the Rust pipeline uses so both
|
|
// runtimes share the audit_baselines.jsonl log.
|
|
resolvedRoot := *root
|
|
if resolvedRoot == "" {
|
|
if env := os.Getenv("LH_DISTILL_ROOT"); env != "" {
|
|
resolvedRoot = env
|
|
} else {
|
|
resolvedRoot = "/home/profit/lakehouse"
|
|
}
|
|
}
|
|
bp := distillation.DefaultBaselinePath(resolvedRoot)
|
|
err := distillation.AppendBaseline(bp, distillation.AuditBaseline{
|
|
RecordedAt: time.Now().UTC().Format(time.RFC3339),
|
|
GitCommit: gitHEAD,
|
|
Metrics: report.Metrics,
|
|
})
|
|
if err != nil {
|
|
log.Fatalf("append baseline: %v", err)
|
|
}
|
|
fmt.Fprintf(os.Stderr, "appended baseline to %s\n", bp)
|
|
}
|
|
|
|
if report.Failed > 0 {
|
|
os.Exit(1)
|
|
}
|
|
}
|
|
|
|
// resolveGitHEAD returns the current commit SHA if the Go repo is a
|
|
// git checkout. Falls back to "" rather than failing — the audit
|
|
// runs even on a fresh clone without git.
|
|
func resolveGitHEAD() string {
|
|
cmd := exec.Command("git", "rev-parse", "HEAD")
|
|
bs, err := cmd.Output()
|
|
if err != nil {
|
|
return ""
|
|
}
|
|
return strings.TrimSpace(string(bs))
|
|
}
|
|
|
|
func mustJSON(v any) []byte {
|
|
bs, err := json.MarshalIndent(v, "", " ")
|
|
if err != nil {
|
|
log.Fatalf("json marshal: %v", err)
|
|
}
|
|
return append(bs, '\n')
|
|
}
|