root 262a77a52a subject-audit parity (Step 8) — Go reader + cross-runtime probe
Per /home/profit/lakehouse/docs/specs/SUBJECT_MANIFESTS_ON_CATALOGD.md §5 Step 8.

Go side reads SubjectManifest + verifies HMAC chain on per-subject
audit JSONL files using IDENTICAL canonical-JSON + HMAC-SHA256 algorithm
to crates/catalogd/src/subject_audit.rs. A Rust-written chain now
verifies under Go and vice versa.

Files:
  - internal/catalogd/subject.go
      SubjectManifest, SubjectAuditRow, AuditAccessor, AuditLogEntry
      LoadSubjectManifest, LoadKeyFile (32-byte minimum, matches Rust)
      ReadAuditLog, VerifyChain
      canonicalRowBytesFromRaw (production), canonicalRowBytesFromStruct (tests)
      computeRowHMAC, CanonicalAndHmac (parity helper)
  - internal/catalogd/subject_test.go (10 unit tests)
  - scripts/cutover/parity/subject_audit_helper/main.go
      CLI helper mirroring crates/catalogd/src/bin/parity_subject_audit.rs
  - scripts/cutover/parity/subject_audit_parity.sh
      Two-phase probe: known-answer + every real audit log

Two real bugs caught + fixed by the probe authoring loop:

1. omitempty on AuditAccessor.TraceID stripped the field when empty,
   producing different canonical bytes than Rust (which always writes
   the field). Removed omitempty. Rust + Go now produce identical
   bytes for rows with trace_id="" (the common production case).

2. time.RFC3339Nano strips trailing zeros from nanoseconds, producing
   "...46143921" where Rust's chrono AutoSi produces "...461439210".
   Hashing through the parsed-then-re-marshaled struct breaks the
   chain on any row whose nanos end in 0. Fixed by canonicalizing
   from the RAW LINE BYTES (preserves the original timestamp string
   byte-for-byte). Test TestVerifyChain_RawBytesPreserveTimePrecision
   regression-locks this with a hand-crafted nanos=461439210 row.

Live verification (6 / 6 byte-identical assertions):
  - Phase 1 known-answer: canonical bytes (266) + HMAC match
  - Phase 2 real logs: WORKER-1..5 audit JSONL all verify under both
    runtimes with identical (count, tip, verified, error) output

Report: reports/cutover/gauntlet_2026-05-02/parity/subject_audit_parity.md

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-03 04:17:15 -05:00

169 lines
4.0 KiB
Go

// Cross-runtime parity helper — Go side.
//
// Specification: /home/profit/lakehouse/docs/specs/SUBJECT_MANIFESTS_ON_CATALOGD.md §5 Step 8.
//
// Counterpart of crates/catalogd/src/bin/parity_subject_audit.rs.
// Both helpers MUST produce byte-identical output for the same input.
//
// Modes:
//
// --known-answer
// Print canonical-JSON + HMAC for a hardcoded fixture. Compared
// byte-for-byte against the Rust helper's output. If they
// differ, the canonical-JSON or HMAC algorithm has drifted.
//
// --verify <audit_log_path> --key <key_path>
// Replay the HMAC chain on a real audit JSONL. Print one JSON
// object: {mode, count, tip, verified, error}.
package main
import (
"encoding/json"
"fmt"
"os"
"path/filepath"
"strings"
"time"
cat "git.agentview.dev/profit/golangLAKEHOUSE/internal/catalogd"
)
const genesis = "GENESIS"
func deterministicKey() []byte {
k := make([]byte, 32)
for i := range k {
k[i] = byte(i)
}
return k
}
// knownAnswerOut is intentionally identical to KnownAnswerOut in the
// Rust helper so a stdout diff is a one-line semantic comparison.
type knownAnswerOut struct {
Mode string `json:"mode"`
Canonical string `json:"canonical"`
Hmac string `json:"hmac"`
CanonicalBytesLen int `json:"canonical_bytes_len"`
}
type verifyOut struct {
Mode string `json:"mode"`
Count int `json:"count"`
Tip string `json:"tip"`
Verified bool `json:"verified"`
Error *string `json:"error"`
}
func runKnownAnswer() {
row := cat.SubjectAuditRow{
Schema: "subject_audit.v1",
Ts: time.Date(2026, 5, 3, 12, 0, 0, 0, time.UTC),
CandidateID: "WORKER-FIXED",
Accessor: cat.AuditAccessor{
Kind: "gateway_lookup",
Daemon: "gateway",
Purpose: "parity_test",
TraceID: "trace-fixed",
},
FieldsAccessed: []string{"name"},
Result: "success",
PrevChainHash: genesis,
}
canonical, hmacHex, err := cat.CanonicalAndHmac(&row, deterministicKey(), genesis)
if err != nil {
die("canonical/hmac: %v", err)
}
out := knownAnswerOut{
Mode: "known_answer",
Canonical: string(canonical),
Hmac: hmacHex,
CanonicalBytesLen: len(canonical),
}
emit(out)
}
func runVerify(auditPath, keyPath string) {
entries, err := cat.ReadAuditLog(auditPath)
if err != nil {
die("read audit log: %v", err)
}
key, err := os.ReadFile(keyPath)
if err != nil {
die("read key: %v", err)
}
count, tip, verr := cat.VerifyChain(entries, key)
out := verifyOut{
Mode: "verify",
Count: count,
Tip: tip,
Verified: verr == nil,
}
if verr != nil {
s := verr.Error()
out.Error = &s
// Reset count + tip to match the Rust helper's error semantics.
out.Count = 0
out.Tip = genesis
}
emit(out)
}
func emit(v any) {
bs, err := json.Marshal(v)
if err != nil {
die("marshal output: %v", err)
}
fmt.Println(string(bs))
}
func die(format string, a ...any) {
fmt.Fprintf(os.Stderr, format+"\n", a...)
os.Exit(2)
}
func main() {
args := os.Args[1:]
var (
knownAnswer bool
auditPath string
keyPath string
)
for i := 0; i < len(args); i++ {
switch args[i] {
case "--known-answer":
knownAnswer = true
case "--verify":
if i+1 >= len(args) {
die("--verify needs a path")
}
auditPath = args[i+1]
i++
case "--key":
if i+1 >= len(args) {
die("--key needs a path")
}
keyPath = args[i+1]
i++
case "-h", "--help":
fmt.Fprintln(os.Stderr, "subject_audit_helper --known-answer")
fmt.Fprintln(os.Stderr, "subject_audit_helper --verify <audit_log> --key <key_file>")
os.Exit(0)
default:
die("unknown arg: %s", args[i])
}
}
if knownAnswer {
runKnownAnswer()
return
}
if auditPath == "" || keyPath == "" {
die("need --known-answer OR (--verify <path> --key <path>)")
}
// Sanity: file naming convention <candidate_id>.audit.jsonl.
if !strings.HasSuffix(filepath.Base(auditPath), ".audit.jsonl") {
die("audit log path must end with .audit.jsonl")
}
runVerify(auditPath, keyPath)
}