subject-audit parity (Step 8) — Go reader + cross-runtime probe

Per /home/profit/lakehouse/docs/specs/SUBJECT_MANIFESTS_ON_CATALOGD.md §5 Step 8. Go side reads SubjectManifest + verifies HMAC chain on per-subject audit JSONL files using IDENTICAL canonical-JSON + HMAC-SHA256 algorithm to crates/catalogd/src/subject_audit.rs. A Rust-written chain now verifies under Go and vice versa. Files: - internal/catalogd/subject.go SubjectManifest, SubjectAuditRow, AuditAccessor, AuditLogEntry LoadSubjectManifest, LoadKeyFile (32-byte minimum, matches Rust) ReadAuditLog, VerifyChain canonicalRowBytesFromRaw (production), canonicalRowBytesFromStruct (tests) computeRowHMAC, CanonicalAndHmac (parity helper) - internal/catalogd/subject_test.go (10 unit tests) - scripts/cutover/parity/subject_audit_helper/main.go CLI helper mirroring crates/catalogd/src/bin/parity_subject_audit.rs - scripts/cutover/parity/subject_audit_parity.sh Two-phase probe: known-answer + every real audit log Two real bugs caught + fixed by the probe authoring loop: 1. omitempty on AuditAccessor.TraceID stripped the field when empty, producing different canonical bytes than Rust (which always writes the field). Removed omitempty. Rust + Go now produce identical bytes for rows with trace_id="" (the common production case). 2. time.RFC3339Nano strips trailing zeros from nanoseconds, producing "...46143921" where Rust's chrono AutoSi produces "...461439210". Hashing through the parsed-then-re-marshaled struct breaks the chain on any row whose nanos end in 0. Fixed by canonicalizing from the RAW LINE BYTES (preserves the original timestamp string byte-for-byte). Test TestVerifyChain_RawBytesPreserveTimePrecision regression-locks this with a hand-crafted nanos=461439210 row. Live verification (6 / 6 byte-identical assertions): - Phase 1 known-answer: canonical bytes (266) + HMAC match - Phase 2 real logs: WORKER-1..5 audit JSONL all verify under both runtimes with identical (count, tip, verified, error) output Report: reports/cutover/gauntlet_2026-05-02/parity/subject_audit_parity.md Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-03 04:17:15 -05:00 · 2026-05-03 04:17:15 -05:00 · 262a77a52a
commit 262a77a52a
parent 22c0b42e96
5 changed files with 1068 additions and 0 deletions
--- a/internal/catalogd/subject.go
+++ b/internal/catalogd/subject.go
@ -0,0 +1,329 @@
 // Subject manifests + per-subject audit-log chain verification.
 //
 // Specification: /home/profit/lakehouse/docs/specs/SUBJECT_MANIFESTS_ON_CATALOGD.md.
 //
 // This file is the Go side of Step 8 (cross-runtime parity). The Rust
 // side is the writer (crates/catalogd/src/subject_audit.rs); Go is a
 // READER + VERIFIER only — both sides serialize / hash with identical
 // algorithms so a chain written by Rust verifies under Go.
 //
 // Algorithm (must match Rust crates/catalogd/src/subject_audit.rs exactly):
 //   row_hmac = HMAC-SHA256(key, prev_chain_hash_bytes || canonical_row_bytes)
 //
 // where:
 //   - prev_chain_hash_bytes is the ASCII bytes of the previous row's
 //     row_hmac field, OR the literal ASCII string "GENESIS" for the
 //     first row (no hex decode!)
 //   - canonical_row_bytes is the row JSON with the row_hmac field
 //     dropped, with all object keys sorted alphabetically at every
 //     nesting depth, with no insignificant whitespace
 //   - the final hash is rendered as 64 lowercase hex characters
 package catalogd
 import (
 	"crypto/hmac"
 	"crypto/sha256"
 	"encoding/hex"
 	"encoding/json"
 	"fmt"
 	"os"
 	"sort"
 	"strings"
 	"time"
 )
 const GenesisHash = "GENESIS"
 // SubjectManifest mirrors crates/shared/src/types.rs::SubjectManifest.
 // Keep field tags byte-identical so manifest JSON written by Rust round-trips.
 type SubjectManifest struct {
 	Schema             string             `json:"schema"`
 	CandidateID        string             `json:"candidate_id"`
 	CreatedAt          time.Time          `json:"created_at"`
 	UpdatedAt          time.Time          `json:"updated_at"`
 	Status             string             `json:"status"`
 	Vertical           string             `json:"vertical,omitempty"`
 	Consent            SubjectConsent     `json:"consent"`
 	Retention          SubjectRetention   `json:"retention"`
 	Datasets           []SubjectDatasetRef `json:"datasets"`
 	SafeViews          []string           `json:"safe_views"`
 	AuditLogPath       string             `json:"audit_log_path"`
 	AuditLogChainRoot  string             `json:"audit_log_chain_root"`
 }
 type SubjectConsent struct {
 	GeneralPii GeneralPiiConsent `json:"general_pii"`
 	Biometric  BiometricConsent  `json:"biometric"`
 }
 type GeneralPiiConsent struct {
 	Status       string     `json:"status"`
 	Version      string     `json:"version,omitempty"`
 	GivenAt      *time.Time `json:"given_at,omitempty"`
 	WithdrawnAt  *time.Time `json:"withdrawn_at,omitempty"`
 }
 type BiometricConsent struct {
 	Status         string     `json:"status"`
 	RetentionUntil *time.Time `json:"retention_until,omitempty"`
 }
 type SubjectRetention struct {
 	GeneralPiiUntil time.Time `json:"general_pii_until"`
 	Policy          string    `json:"policy,omitempty"`
 }
 type SubjectDatasetRef struct {
 	Name      string `json:"name"`
 	KeyColumn string `json:"key_column"`
 	KeyValue  string `json:"key_value"`
 }
 // SubjectAuditRow mirrors crates/shared/src/types.rs::SubjectAuditRow.
 // JSON field order does NOT matter (canonicalizer sorts at hash time)
 // but field names + types MUST match for round-trip.
 type SubjectAuditRow struct {
 	Schema         string         `json:"schema"`
 	Ts             time.Time      `json:"ts"`
 	CandidateID    string         `json:"candidate_id"`
 	Accessor       AuditAccessor  `json:"accessor"`
 	FieldsAccessed []string       `json:"fields_accessed"`
 	Result         string         `json:"result"`
 	PrevChainHash  string         `json:"prev_chain_hash"`
 	RowHmac        string         `json:"row_hmac"`
 }
 // AuditAccessor mirrors crates/shared/src/types.rs::AuditAccessor.
 //
 // IMPORTANT: trace_id has NO omitempty — Rust's serde always writes
 // the field (even empty) because `#[serde(default)]` only affects
 // READS. Stripping it here produces different canonical bytes than
 // the writer used → HMAC mismatch on real production rows. (Caught
 // 2026-05-03 by subject_audit_parity.sh on a live WORKER-1 row with
 // trace_id="".)
 type AuditAccessor struct {
 	Kind    string `json:"kind"`
 	Daemon  string `json:"daemon"`
 	Purpose string `json:"purpose"`
 	TraceID string `json:"trace_id"`
 }
 // LoadSubjectManifest reads + parses a subject manifest JSON file.
 func LoadSubjectManifest(path string) (*SubjectManifest, error) {
 	bytes, err := os.ReadFile(path)
 	if err != nil {
 		return nil, fmt.Errorf("read manifest %s: %w", path, err)
 	}
 	var m SubjectManifest
 	if err := json.Unmarshal(bytes, &m); err != nil {
 		return nil, fmt.Errorf("parse manifest %s: %w", path, err)
 	}
 	return &m, nil
 }
 // AuditLogEntry is one parsed audit row PLUS the raw line bytes from
 // the file. The raw bytes are load-bearing: we MUST canonicalize from
 // them (not from the re-marshaled struct) to avoid time-precision drift.
 //
 // Why: Rust's chrono RFC3339-AutoSi serializes nanoseconds with 9 digits
 // (e.g. "461439210"). Go's time.RFC3339Nano strips trailing zeros (e.g.
 // "46143921"). Round-tripping through time.Time changes the byte sequence
 // and breaks the HMAC. Read once, hash from the original bytes.
 // (Caught 2026-05-03 by subject_audit_parity.sh on a real WORKER-5 row
 // whose nanoseconds happened to end in 0.)
 type AuditLogEntry struct {
 	Row SubjectAuditRow
 	Raw []byte
 }
 // ReadAuditLog parses an audit JSONL file. Returns one AuditLogEntry
 // per non-empty line. Defensive: blank lines are skipped, unparseable
 // lines are skipped (matches Rust read_rows_in_range).
 func ReadAuditLog(path string) ([]AuditLogEntry, error) {
 	bytes, err := os.ReadFile(path)
 	if err != nil {
 		if os.IsNotExist(err) {
 			return nil, nil
 		}
 		return nil, fmt.Errorf("read audit log %s: %w", path, err)
 	}
 	var entries []AuditLogEntry
 	for _, line := range strings.Split(string(bytes), "\n") {
 		trimmed := strings.TrimSpace(line)
 		if trimmed == "" {
 			continue
 		}
 		var row SubjectAuditRow
 		if err := json.Unmarshal([]byte(trimmed), &row); err != nil {
 			continue
 		}
 		entries = append(entries, AuditLogEntry{Row: row, Raw: []byte(trimmed)})
 	}
 	return entries, nil
 }
 // canonicalRowBytesFromRaw is the production canonicalizer used by
 // VerifyChain. It accepts the raw line bytes from the audit JSONL,
 // drops the row_hmac key, sorts all keys alphabetically at every
 // nesting depth, and re-emits compact JSON. Numbers + strings pass
 // through verbatim (json.Number preserves number text, strings are
 // re-JSON-escaped identically to Rust's serde_json).
 //
 // CRITICAL: hashing must always go through this function. NEVER hash
 // the bytes produced by canonicalRowBytesFromStruct against a real
 // production chain — the time-precision drift will give wrong HMACs
 // for any row whose nanoseconds end in 0.
 func canonicalRowBytesFromRaw(rawLine []byte) ([]byte, error) {
 	dec := json.NewDecoder(strings.NewReader(string(rawLine)))
 	dec.UseNumber()
 	var v any
 	if err := dec.Decode(&v); err != nil {
 		return nil, fmt.Errorf("decode raw line: %w", err)
 	}
 	if obj, ok := v.(map[string]any); ok {
 		delete(obj, "row_hmac")
 	}
 	var buf strings.Builder
 	if err := writeCanonical(&buf, v); err != nil {
 		return nil, err
 	}
 	return []byte(buf.String()), nil
 }
 // canonicalRowBytesFromStruct is for the parity probe's known-answer
 // vector + tests where we WANT to canonicalize a Go-built row from
 // scratch (no original bytes exist yet). Production verification uses
 // canonicalRowBytesFromRaw against the file bytes; this function is
 // only safe for synthetic inputs whose ts has no trailing-zero nanos.
 func canonicalRowBytesFromStruct(row *SubjectAuditRow) ([]byte, error) {
 	raw, err := json.Marshal(row)
 	if err != nil {
 		return nil, fmt.Errorf("marshal row: %w", err)
 	}
 	return canonicalRowBytesFromRaw(raw)
 }
 // writeCanonical recursively writes v as canonical JSON: object keys
 // sorted alphabetically, no insignificant whitespace. Arrays preserve
 // element order (semantically significant per spec §3).
 func writeCanonical(buf *strings.Builder, v any) error {
 	switch t := v.(type) {
 	case map[string]any:
 		keys := make([]string, 0, len(t))
 		for k := range t {
 			keys = append(keys, k)
 		}
 		sort.Strings(keys)
 		buf.WriteByte('{')
 		for i, k := range keys {
 			if i > 0 {
 				buf.WriteByte(',')
 			}
 			ks, err := json.Marshal(k)
 			if err != nil {
 				return fmt.Errorf("marshal key: %w", err)
 			}
 			buf.Write(ks)
 			buf.WriteByte(':')
 			if err := writeCanonical(buf, t[k]); err != nil {
 				return err
 			}
 		}
 		buf.WriteByte('}')
 	case []any:
 		buf.WriteByte('[')
 		for i, elem := range t {
 			if i > 0 {
 				buf.WriteByte(',')
 			}
 			if err := writeCanonical(buf, elem); err != nil {
 				return err
 			}
 		}
 		buf.WriteByte(']')
 	default:
 		// json.Number, string, bool, nil — encoding/json renders these
 		// the same way Rust's serde_json does (compact, RFC-8259-conformant).
 		bs, err := json.Marshal(v)
 		if err != nil {
 			return fmt.Errorf("marshal scalar: %w", err)
 		}
 		buf.Write(bs)
 	}
 	return nil
 }
 // CanonicalAndHmac is a helper for the parity probe: canonicalizes the
 // row from scratch (struct → JSON → canonical), computes the HMAC
 // against the given prev_hash, returns both. Used only for the
 // known-answer fixture; production verification uses VerifyChain
 // (which canonicalizes from the original file bytes to dodge time
 // precision drift).
 func CanonicalAndHmac(row *SubjectAuditRow, key []byte, prevHash string) (canonical []byte, hmacHex string, err error) {
 	canonical, err = canonicalRowBytesFromStruct(row)
 	if err != nil {
 		return nil, "", err
 	}
 	hmacHex = computeRowHMAC(key, prevHash, canonical)
 	return canonical, hmacHex, nil
 }
 // computeRowHMAC = HMAC-SHA256(key, prev_hash_ascii_bytes || canonical_row_bytes)
 // rendered as 64-char lowercase hex.
 func computeRowHMAC(key []byte, prevHash string, canonical []byte) string {
 	mac := hmac.New(sha256.New, key)
 	mac.Write([]byte(prevHash))
 	mac.Write(canonical)
 	return hex.EncodeToString(mac.Sum(nil))
 }
 // VerifyChain replays the chain for a subject's audit log. Returns the
 // number of rows verified, the chain tip (last row's row_hmac, or
 // GENESIS when log is empty), and an error describing the first chain
 // break found.
 //
 // CRITICAL: canonicalizes from each entry's RAW LINE BYTES, not from
 // the parsed struct. Round-tripping through encoding/json + time.Time
 // strips trailing zeros from RFC3339 nanos and produces different
 // canonical bytes than the Rust writer used (which used chrono's
 // AutoSi 9-digit format). Catches: 2026-05-03 WORKER-5 nanos
 // 461439210 → Rust emits 9 digits, Go's time.RFC3339Nano emits 8.
 //
 // Special case: empty/missing log returns (0, GENESIS, nil).
 func VerifyChain(entries []AuditLogEntry, key []byte) (count int, chainTip string, err error) {
 	prev := GenesisHash
 	for i, entry := range entries {
 		if entry.Row.PrevChainHash != prev {
 			return i, prev, fmt.Errorf("chain break at row %d: prev_chain_hash=%q expected=%q",
 				i+1, entry.Row.PrevChainHash, prev)
 		}
 		claimed := entry.Row.RowHmac
 		canonical, cerr := canonicalRowBytesFromRaw(entry.Raw)
 		if cerr != nil {
 			return i, prev, fmt.Errorf("canonicalize row %d: %w", i+1, cerr)
 		}
 		recomputed := computeRowHMAC(key, prev, canonical)
 		if recomputed != claimed {
 			return i, prev, fmt.Errorf("hmac mismatch at row %d: stored=%s recomputed=%s",
 				i+1, claimed, recomputed)
 		}
 		prev = claimed
 		count = i + 1
 	}
 	chainTip = prev
 	return count, chainTip, nil
 }
 // LoadKeyFile reads a signing key file. Refuses keys shorter than 32
 // bytes (matches Rust: 2026-05-03 kimi BLOCK fix lifted the minimum
 // from 16 to 32 to align with HMAC-SHA256 best practice).
 func LoadKeyFile(path string) ([]byte, error) {
 	key, err := os.ReadFile(path)
 	if err != nil {
 		return nil, fmt.Errorf("read key file %s: %w", path, err)
 	}
 	if len(key) < 32 {
 		return nil, fmt.Errorf("signing key is %d bytes; recommend ≥32 bytes for HMAC-SHA256", len(key))
 	}
 	return key, nil
 }
--- a/internal/catalogd/subject_test.go
+++ b/internal/catalogd/subject_test.go
@ -0,0 +1,333 @@
 package catalogd
 import (
 	"crypto/hmac"
 	"crypto/sha256"
 	"encoding/hex"
 	"encoding/json"
 	"strings"
 	"testing"
 	"time"
 )
 // deterministicKey is the same fixture key the Rust tests use:
 // (0u8..32).collect() — so a Rust-written chain verifies under Go.
 func deterministicKey() []byte {
 	k := make([]byte, 32)
 	for i := range k {
 		k[i] = byte(i)
 	}
 	return k
 }
 func mkRow(candidateID string, fields []string, prevHash, ts string) SubjectAuditRow {
 	t, _ := time.Parse(time.RFC3339Nano, ts)
 	return SubjectAuditRow{
 		Schema:      "subject_audit.v1",
 		Ts:          t,
 		CandidateID: candidateID,
 		Accessor: AuditAccessor{
 			Kind:    "gateway_lookup",
 			Daemon:  "gateway",
 			Purpose: "fill_validation",
 			TraceID: "",
 		},
 		FieldsAccessed: fields,
 		Result:         "success",
 		PrevChainHash:  prevHash,
 		RowHmac:        "", // computed below
 	}
 }
 // TestCanonicalJSON_KeysSortedAlphabetically asserts the same property
 // the Rust unit test asserts (subject_audit::tests::canonical_json_sorts_keys_alphabetically).
 func TestCanonicalJSON_KeysSortedAlphabetically(t *testing.T) {
 	v := map[string]any{
 		"z": 1,
 		"a": 2,
 		"m": map[string]any{"y": 1, "b": 2},
 	}
 	var buf strings.Builder
 	if err := writeCanonical(&buf, v); err != nil {
 		t.Fatalf("canonical: %v", err)
 	}
 	s := buf.String()
 	a, m, z := strings.Index(s, "\"a\""), strings.Index(s, "\"m\""), strings.Index(s, "\"z\"")
 	if !(a < m && m < z) {
 		t.Fatalf("top-level keys out of order: %s", s)
 	}
 	b, y := strings.Index(s, "\"b\""), strings.Index(s, "\"y\"")
 	if !(b < y) {
 		t.Fatalf("nested keys out of order: %s", s)
 	}
 }
 // TestCanonicalJSON_ArraysPreserveOrder asserts arrays are NOT sorted —
 // matches Rust subject_audit::tests::canonical_json_arrays_preserve_order.
 func TestCanonicalJSON_ArraysPreserveOrder(t *testing.T) {
 	v := map[string]any{"k": []any{"c", "a", "b"}}
 	var buf strings.Builder
 	if err := writeCanonical(&buf, v); err != nil {
 		t.Fatalf("canonical: %v", err)
 	}
 	if !strings.Contains(buf.String(), "\"c\",\"a\",\"b\"") {
 		t.Fatalf("array order altered: %s", buf.String())
 	}
 }
 // buildEntry produces an AuditLogEntry by computing the HMAC against
 // the row's struct-derived canonical bytes, then storing the resulting
 // row JSON as the raw bytes. Test-only — production reads raw bytes
 // straight from disk so the time-precision drift doesn't apply.
 func buildEntry(row SubjectAuditRow, key []byte, prev string) AuditLogEntry {
 	canon, err := canonicalRowBytesFromStruct(&row)
 	if err != nil {
 		panic(err)
 	}
 	row.RowHmac = computeRowHMAC(key, prev, canon)
 	raw, err := json.Marshal(row)
 	if err != nil {
 		panic(err)
 	}
 	return AuditLogEntry{Row: row, Raw: raw}
 }
 // TestVerifyChain_ReplaysAndReachesTip writes 3 rows with HMACs computed
 // the same way Rust would, then verifies they chain. This is the local
 // half of the parity contract — the cross-runtime half (Rust writes,
 // Go verifies) is covered by scripts/cutover/parity/subject_audit_parity.sh.
 func TestVerifyChain_ReplaysAndReachesTip(t *testing.T) {
 	key := deterministicKey()
 	r1 := mkRow("CAND-PARITY", []string{"name"}, GenesisHash, "2026-05-03T12:00:00Z")
 	e1 := buildEntry(r1, key, GenesisHash)
 	r2 := mkRow("CAND-PARITY", []string{"phone"}, e1.Row.RowHmac, "2026-05-03T12:00:01Z")
 	e2 := buildEntry(r2, key, e1.Row.RowHmac)
 	r3 := mkRow("CAND-PARITY", []string{"email"}, e2.Row.RowHmac, "2026-05-03T12:00:02Z")
 	e3 := buildEntry(r3, key, e2.Row.RowHmac)
 	count, tip, err := VerifyChain([]AuditLogEntry{e1, e2, e3}, key)
 	if err != nil {
 		t.Fatalf("verify failed: %v", err)
 	}
 	if count != 3 {
 		t.Fatalf("expected 3 rows verified, got %d", count)
 	}
 	if tip != e3.Row.RowHmac {
 		t.Fatalf("chain tip wrong: tip=%s expected=%s", tip, e3.Row.RowHmac)
 	}
 }
 // TestVerifyChain_EmptyLogIsTriviallyValid mirrors Rust's empty-log
 // special case: 0 rows, GENESIS tip, no error.
 func TestVerifyChain_EmptyLogIsTriviallyValid(t *testing.T) {
 	count, tip, err := VerifyChain(nil, deterministicKey())
 	if err != nil {
 		t.Fatalf("empty log returned error: %v", err)
 	}
 	if count != 0 {
 		t.Fatalf("expected 0 rows on empty log, got %d", count)
 	}
 	if tip != GenesisHash {
 		t.Fatalf("expected GENESIS tip on empty log, got %q", tip)
 	}
 }
 // TestVerifyChain_TamperDetected: tamper the raw line's `result` field
 // (the canonicalizer sees the new bytes; HMAC mismatches the stored hash).
 func TestVerifyChain_TamperDetected(t *testing.T) {
 	key := deterministicKey()
 	r1 := mkRow("CAND-T", []string{"name"}, GenesisHash, "2026-05-03T12:00:00Z")
 	e1 := buildEntry(r1, key, GenesisHash)
 	// Tamper: replace "success" with "denied" in the raw bytes ONLY.
 	// The struct's row_hmac (used as the "stored" comparator) stays put.
 	e1.Raw = []byte(strings.Replace(string(e1.Raw), `"success"`, `"denied"`, 1))
 	_, _, err := VerifyChain([]AuditLogEntry{e1}, key)
 	if err == nil {
 		t.Fatal("expected hmac mismatch after tamper, got nil")
 	}
 	if !strings.Contains(err.Error(), "hmac mismatch") {
 		t.Fatalf("expected hmac mismatch, got: %v", err)
 	}
 }
 // TestVerifyChain_BadKeyRejectsValidRows: same rows + wrong key = mismatch.
 func TestVerifyChain_BadKeyRejectsValidRows(t *testing.T) {
 	good := deterministicKey()
 	r1 := mkRow("CAND-BK", []string{"name"}, GenesisHash, "2026-05-03T12:00:00Z")
 	e1 := buildEntry(r1, good, GenesisHash)
 	bad := make([]byte, 32)
 	for i := range bad {
 		bad[i] = 0xff
 	}
 	_, _, err := VerifyChain([]AuditLogEntry{e1}, bad)
 	if err == nil {
 		t.Fatal("expected hmac mismatch with wrong key")
 	}
 }
 // TestComputeRowHMAC_StableAcrossRuns: same row + same key always = same hash.
 func TestComputeRowHMAC_StableAcrossRuns(t *testing.T) {
 	key := deterministicKey()
 	r := mkRow("CAND-S", []string{"a", "b"}, GenesisHash, "2026-05-03T12:00:00Z")
 	c1, _ := canonicalRowBytesFromStruct(&r)
 	c2, _ := canonicalRowBytesFromStruct(&r)
 	if string(c1) != string(c2) {
 		t.Fatalf("canonical bytes unstable across runs:\n  c1=%s\n  c2=%s", c1, c2)
 	}
 	h1 := computeRowHMAC(key, GenesisHash, c1)
 	h2 := computeRowHMAC(key, GenesisHash, c2)
 	if h1 != h2 {
 		t.Fatalf("hmac unstable across runs: %s vs %s", h1, h2)
 	}
 	if len(h1) != 64 {
 		t.Fatalf("hmac wrong length %d", len(h1))
 	}
 	// Sanity: hex-decodable.
 	if _, err := hex.DecodeString(h1); err != nil {
 		t.Fatalf("hmac not hex: %v", err)
 	}
 }
 // TestKnownAnswerVector matches a Go-computed reference. The same
 // inputs must produce this exact byte string under Rust as well —
 // scripts/cutover/parity/subject_audit_parity.sh runs the Rust helper
 // against this exact fixture and asserts byte-identical output.
 //
 // If you change the fixture, rebuild Rust's parity_subject_audit + Go's
 // helper and update both sides together.
 func TestKnownAnswerVector(t *testing.T) {
 	key := deterministicKey()
 	r := SubjectAuditRow{
 		Schema:      "subject_audit.v1",
 		Ts:          time.Date(2026, 5, 3, 12, 0, 0, 0, time.UTC),
 		CandidateID: "WORKER-FIXED",
 		Accessor: AuditAccessor{
 			Kind:    "gateway_lookup",
 			Daemon:  "gateway",
 			Purpose: "parity_test",
 			TraceID: "trace-fixed",
 		},
 		FieldsAccessed: []string{"name"},
 		Result:         "success",
 		PrevChainHash:  GenesisHash,
 		RowHmac:        "",
 	}
 	canon, err := canonicalRowBytesFromStruct(&r)
 	if err != nil {
 		t.Fatalf("canonical: %v", err)
 	}
 	t.Logf("canonical bytes: %s", canon)
 	hmacHex := computeRowHMAC(key, GenesisHash, canon)
 	t.Logf("hmac: %s", hmacHex)
 	// Sanity: round-trip through encoding/json + canonicalization is stable.
 	again, err := canonicalRowBytesFromStruct(&r)
 	if err != nil {
 		t.Fatalf("canonical 2: %v", err)
 	}
 	if string(canon) != string(again) {
 		t.Fatalf("canonical drift: %s vs %s", canon, again)
 	}
 	// Sanity: real HMAC against the canonical bytes.
 	mac := hmac.New(sha256.New, key)
 	mac.Write([]byte(GenesisHash))
 	mac.Write(canon)
 	expected := hex.EncodeToString(mac.Sum(nil))
 	if hmacHex != expected {
 		t.Fatalf("computeRowHMAC drift: %s vs %s", hmacHex, expected)
 	}
 }
 // TestVerifyChain_RawBytesPreserveTimePrecision is the regression test
 // for the 2026-05-03 WORKER-5 finding: when a row's nanoseconds end in
 // 0, time.RFC3339Nano strips the trailing zero on re-marshal, producing
 // different canonical bytes than Rust's chrono AutoSi (which always
 // emits 9 digits). VerifyChain MUST canonicalize from the raw line
 // bytes to avoid this drift. Test feeds a hand-crafted raw line whose
 // ts has a trailing-zero nano value and asserts verify succeeds when
 // the chain hash was computed against THOSE EXACT bytes.
 func TestVerifyChain_RawBytesPreserveTimePrecision(t *testing.T) {
 	key := deterministicKey()
 	// Hand-crafted raw line exactly as Rust would write it, with
 	// nanoseconds=461439210 (trailing zero present).
 	rawNoHmac := `{"schema":"subject_audit.v1","ts":"2026-05-03T09:12:47.461439210Z","candidate_id":"WORKER-5","accessor":{"kind":"validator_lookup","daemon":"gateway","purpose":"validator_worker_lookup","trace_id":""},"fields_accessed":["exists"],"result":"not_found","prev_chain_hash":"GENESIS"}`
 	canonical, err := canonicalRowBytesFromRaw([]byte(rawNoHmac))
 	if err != nil {
 		t.Fatalf("canonicalize raw: %v", err)
 	}
 	hmacHex := computeRowHMAC(key, GenesisHash, canonical)
 	// Compose the full row by injecting row_hmac at the end (matches
 	// what the Rust writer produces — declaration order + appended hmac).
 	rawFull := strings.TrimSuffix(rawNoHmac, "}") + `,"row_hmac":"` + hmacHex + `"}`
 	var row SubjectAuditRow
 	if err := json.Unmarshal([]byte(rawFull), &row); err != nil {
 		t.Fatalf("unmarshal: %v", err)
 	}
 	entry := AuditLogEntry{Row: row, Raw: []byte(rawFull)}
 	count, tip, err := VerifyChain([]AuditLogEntry{entry}, key)
 	if err != nil {
 		t.Fatalf("verify failed (regression: time-precision drift): %v", err)
 	}
 	if count != 1 {
 		t.Fatalf("expected 1 row verified, got %d", count)
 	}
 	if tip != hmacHex {
 		t.Fatalf("tip mismatch: %s vs %s", tip, hmacHex)
 	}
 }
 // TestSubjectManifest_RoundTripJSON: parse a fixture JSON identical in
 // shape to what crates/catalogd/src/registry.rs::put_subject writes to
 // data/_catalog/subjects/<id>.json. If this fails, the Go reader is
 // out of sync with the Rust writer (a Step 8 contract violation).
 func TestSubjectManifest_RoundTripJSON(t *testing.T) {
 	src := `{
  "schema": "subject_manifest.v1",
  "candidate_id": "WORKER-1",
  "created_at": "2026-05-03T08:22:24.571647177Z",
  "updated_at": "2026-05-03T08:22:24.571647177Z",
  "status": "active",
  "vertical": "unknown",
  "consent": {
    "general_pii": {
      "status": "pending_backfill_review",
      "version": ""
    },
    "biometric": {
      "status": "never_collected"
    }
  },
  "retention": {
    "general_pii_until": "2030-05-02T08:22:24.571647177Z",
    "policy": "4_year_default"
  },
  "datasets": [
    {"name": "workers_500k", "key_column": "worker_id", "key_value": "1"}
  ],
  "safe_views": ["workers_safe"],
  "audit_log_path": "_catalog/subjects/WORKER-1.audit.jsonl",
  "audit_log_chain_root": ""
 }`
 	var m SubjectManifest
 	if err := json.Unmarshal([]byte(src), &m); err != nil {
 		t.Fatalf("unmarshal: %v", err)
 	}
 	if m.CandidateID != "WORKER-1" {
 		t.Fatalf("candidate_id wrong: %s", m.CandidateID)
 	}
 	if m.Status != "active" {
 		t.Fatalf("status wrong: %s", m.Status)
 	}
 	if m.Consent.GeneralPii.Status != "pending_backfill_review" {
 		t.Fatalf("general_pii.status wrong: %s", m.Consent.GeneralPii.Status)
 	}
 	if m.Consent.Biometric.Status != "never_collected" {
 		t.Fatalf("biometric.status wrong: %s", m.Consent.Biometric.Status)
 	}
 	if m.Retention.Policy != "4_year_default" {
 		t.Fatalf("retention.policy wrong: %s", m.Retention.Policy)
 	}
 	if len(m.Datasets) != 1 || m.Datasets[0].Name != "workers_500k" {
 		t.Fatalf("datasets wrong: %+v", m.Datasets)
 	}
 }
--- a/reports/cutover/gauntlet_2026-05-02/parity/subject_audit_parity.md
+++ b/reports/cutover/gauntlet_2026-05-02/parity/subject_audit_parity.md
@ -0,0 +1,35 @@
 # subject_audit_parity
 **Generated:** 2026-05-03 09:16:14 UTC
 **Spec:** /home/profit/lakehouse/docs/specs/SUBJECT_MANIFESTS_ON_CATALOGD.md §5 Step 8
 **Rust helper:** `/home/profit/lakehouse/target/release/parity_subject_audit`
 **Go helper:** `./bin/subject_audit_helper`
 **Audit dir:** `/home/profit/lakehouse/data/_catalog/subjects`
 ## Phase 1 — Known-answer vector
 Hardcoded fixture row, identical inputs, byte-compare canonical-JSON + HMAC.
 **MATCH** ✓
 ```json
 {"mode":"known_answer","canonical":"{\"accessor\":{\"daemon\":\"gateway\",\"kind\":\"gateway_lookup\",\"purpose\":\"parity_test\",\"trace_id\":\"trace-fixed\"},\"candidate_id\":\"WORKER-FIXED\",\"fields_accessed\":[\"name\"],\"prev_chain_hash\":\"GENESIS\",\"result\":\"success\",\"schema\":\"subject_audit.v1\",\"ts\":\"2026-05-03T12:00:00Z\"}","hmac":"f730fa038c847c27386b92eb1939ec64c62086c0a92617ac0bdf9f650c390b96","canonical_bytes_len":266}
 ```
 ## Phase 2 — Real production audit logs
 Every `*.audit.jsonl` in `/home/profit/lakehouse/data/_catalog/subjects` verified by both runtimes.
 | Audit log | Rust verified | Go verified | Result |
 |---|---|---|---|
 | `WORKER-1.audit.jsonl` | 1 rows (true) | 1 rows (true) | **MATCH** ✓ |
 | `WORKER-2.audit.jsonl` | 1 rows (true) | 1 rows (true) | **MATCH** ✓ |
 | `WORKER-3.audit.jsonl` | 1 rows (true) | 1 rows (true) | **MATCH** ✓ |
 | `WORKER-4.audit.jsonl` | 1 rows (true) | 1 rows (true) | **MATCH** ✓ |
 | `WORKER-5.audit.jsonl` | 1 rows (true) | 1 rows (true) | **MATCH** ✓ |
 ## Summary
 **6 / 6** parity assertions passed.
 **Status: PARITY** — every Rust assertion matches Go byte-for-byte.
--- a/scripts/cutover/parity/subject_audit_helper/main.go
+++ b/scripts/cutover/parity/subject_audit_helper/main.go
@ -0,0 +1,168 @@
 // Cross-runtime parity helper — Go side.
 //
 // Specification: /home/profit/lakehouse/docs/specs/SUBJECT_MANIFESTS_ON_CATALOGD.md §5 Step 8.
 //
 // Counterpart of crates/catalogd/src/bin/parity_subject_audit.rs.
 // Both helpers MUST produce byte-identical output for the same input.
 //
 // Modes:
 //
 //   --known-answer
 //       Print canonical-JSON + HMAC for a hardcoded fixture. Compared
 //       byte-for-byte against the Rust helper's output. If they
 //       differ, the canonical-JSON or HMAC algorithm has drifted.
 //
 //   --verify <audit_log_path> --key <key_path>
 //       Replay the HMAC chain on a real audit JSONL. Print one JSON
 //       object: {mode, count, tip, verified, error}.
 package main
 import (
 	"encoding/json"
 	"fmt"
 	"os"
 	"path/filepath"
 	"strings"
 	"time"
 	cat "git.agentview.dev/profit/golangLAKEHOUSE/internal/catalogd"
 )
 const genesis = "GENESIS"
 func deterministicKey() []byte {
 	k := make([]byte, 32)
 	for i := range k {
 		k[i] = byte(i)
 	}
 	return k
 }
 // knownAnswerOut is intentionally identical to KnownAnswerOut in the
 // Rust helper so a stdout diff is a one-line semantic comparison.
 type knownAnswerOut struct {
 	Mode              string `json:"mode"`
 	Canonical         string `json:"canonical"`
 	Hmac              string `json:"hmac"`
 	CanonicalBytesLen int    `json:"canonical_bytes_len"`
 }
 type verifyOut struct {
 	Mode     string `json:"mode"`
 	Count    int    `json:"count"`
 	Tip      string `json:"tip"`
 	Verified bool   `json:"verified"`
 	Error    *string `json:"error"`
 }
 func runKnownAnswer() {
 	row := cat.SubjectAuditRow{
 		Schema:      "subject_audit.v1",
 		Ts:          time.Date(2026, 5, 3, 12, 0, 0, 0, time.UTC),
 		CandidateID: "WORKER-FIXED",
 		Accessor: cat.AuditAccessor{
 			Kind:    "gateway_lookup",
 			Daemon:  "gateway",
 			Purpose: "parity_test",
 			TraceID: "trace-fixed",
 		},
 		FieldsAccessed: []string{"name"},
 		Result:         "success",
 		PrevChainHash:  genesis,
 	}
 	canonical, hmacHex, err := cat.CanonicalAndHmac(&row, deterministicKey(), genesis)
 	if err != nil {
 		die("canonical/hmac: %v", err)
 	}
 	out := knownAnswerOut{
 		Mode:              "known_answer",
 		Canonical:         string(canonical),
 		Hmac:              hmacHex,
 		CanonicalBytesLen: len(canonical),
 	}
 	emit(out)
 }
 func runVerify(auditPath, keyPath string) {
 	entries, err := cat.ReadAuditLog(auditPath)
 	if err != nil {
 		die("read audit log: %v", err)
 	}
 	key, err := os.ReadFile(keyPath)
 	if err != nil {
 		die("read key: %v", err)
 	}
 	count, tip, verr := cat.VerifyChain(entries, key)
 	out := verifyOut{
 		Mode:     "verify",
 		Count:    count,
 		Tip:      tip,
 		Verified: verr == nil,
 	}
 	if verr != nil {
 		s := verr.Error()
 		out.Error = &s
 		// Reset count + tip to match the Rust helper's error semantics.
 		out.Count = 0
 		out.Tip = genesis
 	}
 	emit(out)
 }
 func emit(v any) {
 	bs, err := json.Marshal(v)
 	if err != nil {
 		die("marshal output: %v", err)
 	}
 	fmt.Println(string(bs))
 }
 func die(format string, a ...any) {
 	fmt.Fprintf(os.Stderr, format+"\n", a...)
 	os.Exit(2)
 }
 func main() {
 	args := os.Args[1:]
 	var (
 		knownAnswer bool
 		auditPath   string
 		keyPath     string
 	)
 	for i := 0; i < len(args); i++ {
 		switch args[i] {
 		case "--known-answer":
 			knownAnswer = true
 		case "--verify":
 			if i+1 >= len(args) {
 				die("--verify needs a path")
 			}
 			auditPath = args[i+1]
 			i++
 		case "--key":
 			if i+1 >= len(args) {
 				die("--key needs a path")
 			}
 			keyPath = args[i+1]
 			i++
 		case "-h", "--help":
 			fmt.Fprintln(os.Stderr, "subject_audit_helper --known-answer")
 			fmt.Fprintln(os.Stderr, "subject_audit_helper --verify <audit_log> --key <key_file>")
 			os.Exit(0)
 		default:
 			die("unknown arg: %s", args[i])
 		}
 	}
 	if knownAnswer {
 		runKnownAnswer()
 		return
 	}
 	if auditPath == "" || keyPath == "" {
 		die("need --known-answer OR (--verify <path> --key <path>)")
 	}
 	// Sanity: file naming convention <candidate_id>.audit.jsonl.
 	if !strings.HasSuffix(filepath.Base(auditPath), ".audit.jsonl") {
 		die("audit log path must end with .audit.jsonl")
 	}
 	runVerify(auditPath, keyPath)
 }
--- a/scripts/cutover/parity/subject_audit_parity.sh
+++ b/scripts/cutover/parity/subject_audit_parity.sh
@ -0,0 +1,203 @@
 #!/usr/bin/env bash
 # subject_audit_parity — verify Rust and Go produce byte-identical
 # canonical JSON + HMAC-SHA256 chain hashes for subject audit logs.
 #
 # Why: the SubjectManifest + audit-log substrate (Rust crates/catalogd
 # subject_audit.rs) is consumed by the legal-tier endpoint /audit/subject/{id}
 # AND by the Go internal/catalogd reader. A canonical-JSON drift between
 # the two runtimes would mean a chain written by Rust does not verify
 # under Go (or worse, verifies as tampered) — silently breaking the
 # defensible-audit guarantee.
 #
 # The probe runs in two phases:
 #
 #   Phase 1 — Known-answer vector
 #     Both helpers serialize the same hardcoded SubjectAuditRow,
 #     emit canonical bytes + HMAC. Outputs MUST be byte-identical.
 #     This catches algorithm drift independent of any real data.
 #
 #   Phase 2 — Real production audit logs
 #     Walk every *.audit.jsonl in the live data dir. Run BOTH helpers
 #     against each one with the production signing key. Outputs MUST
 #     be byte-identical. This catches real-world drift (e.g. struct
 #     field tag mismatches that only fire on certain field values).
 #
 # Outputs: reports/cutover/gauntlet_2026-05-02/parity/subject_audit_parity.md
 #
 # Env overrides:
 #   RUST_REPO=/home/profit/lakehouse
 #   RUST_BIN=$RUST_REPO/target/release/parity_subject_audit
 #   GO_BIN=./bin/subject_audit_helper
 #   AUDIT_DIR=$RUST_REPO/data/_catalog/subjects
 #   KEY_PATH=/tmp/lakehouse_audit/subject_audit.key
 set -uo pipefail
 cd "$(dirname "$0")/../../.."
 RUST_REPO="${RUST_REPO:-/home/profit/lakehouse}"
 RUST_BIN="${RUST_BIN:-$RUST_REPO/target/release/parity_subject_audit}"
 GO_BIN="${GO_BIN:-./bin/subject_audit_helper}"
 AUDIT_DIR="${AUDIT_DIR:-$RUST_REPO/data/_catalog/subjects}"
 KEY_PATH="${KEY_PATH:-/tmp/lakehouse_audit/subject_audit.key}"
 OUT_DIR="reports/cutover/gauntlet_2026-05-02/parity"
 mkdir -p "$OUT_DIR"
 OUT="$OUT_DIR/subject_audit_parity.md"
 export PATH="$PATH:/usr/local/go/bin"
 # ── Build / verify both sides ───────────────────────────────────────
 if [ ! -x "$RUST_BIN" ]; then
  echo "[subject-audit-parity] building Rust helper..."
  (cd "$RUST_REPO" && cargo build -p catalogd --bin parity_subject_audit --release 2>&1 | tail -3)
 fi
 if [ ! -x "$RUST_BIN" ]; then
  echo "[subject-audit-parity] SKIP: $RUST_BIN missing"
  exit 0
 fi
 if [ ! -x "$GO_BIN" ]; then
  echo "[subject-audit-parity] building Go helper..."
  go build -o "$GO_BIN" ./scripts/cutover/parity/subject_audit_helper/
 fi
 if [ ! -x "$GO_BIN" ]; then
  echo "[subject-audit-parity] FAIL: $GO_BIN missing after build"
  exit 1
 fi
 # ── Report header ───────────────────────────────────────────────────
 {
  echo "# subject_audit_parity"
  echo
  echo "**Generated:** $(date -u '+%Y-%m-%d %H:%M:%S UTC')"
  echo "**Spec:** /home/profit/lakehouse/docs/specs/SUBJECT_MANIFESTS_ON_CATALOGD.md §5 Step 8"
  echo "**Rust helper:** \`$RUST_BIN\`"
  echo "**Go helper:** \`$GO_BIN\`"
  echo "**Audit dir:** \`$AUDIT_DIR\`"
  echo
 } > "$OUT"
 PASS=0
 FAIL=0
 # ── Phase 1 — Known-answer vector ───────────────────────────────────
 {
  echo "## Phase 1 — Known-answer vector"
  echo
  echo "Hardcoded fixture row, identical inputs, byte-compare canonical-JSON + HMAC."
  echo
 } >> "$OUT"
 RUST_KA="$(mktemp)"; GO_KA="$(mktemp)"
 "$RUST_BIN" --known-answer > "$RUST_KA" 2>&1 || true
 "$GO_BIN" --known-answer > "$GO_KA" 2>&1 || true
 if diff -q "$RUST_KA" "$GO_KA" >/dev/null 2>&1; then
  PASS=$((PASS+1))
  {
    echo "**MATCH** ✓"
    echo
    echo '```json'
    cat "$RUST_KA"
    echo '```'
    echo
  } >> "$OUT"
 else
  FAIL=$((FAIL+1))
  {
    echo "**MISMATCH** ✗"
    echo
    echo "### Rust"
    echo '```json'
    cat "$RUST_KA"
    echo '```'
    echo
    echo "### Go"
    echo '```json'
    cat "$GO_KA"
    echo '```'
    echo
    echo "### Diff"
    echo '```diff'
    diff "$RUST_KA" "$GO_KA" || true
    echo '```'
    echo
  } >> "$OUT"
 fi
 # ── Phase 2 — Real production audit logs ────────────────────────────
 {
  echo "## Phase 2 — Real production audit logs"
  echo
  echo "Every \`*.audit.jsonl\` in \`$AUDIT_DIR\` verified by both runtimes."
  echo
 } >> "$OUT"
 if [ ! -r "$KEY_PATH" ]; then
  {
    echo "**SKIP** — signing key not readable at \`$KEY_PATH\`."
    echo "Set \`KEY_PATH=...\` or seed /tmp/lakehouse_audit/subject_audit.key (see systemd unit)."
    echo
  } >> "$OUT"
 else
  shopt -s nullglob
  LOGS=( "$AUDIT_DIR"/*.audit.jsonl )
  if [ "${#LOGS[@]}" -eq 0 ]; then
    {
      echo "**SKIP** — no \`*.audit.jsonl\` files under \`$AUDIT_DIR\`."
      echo "(Trigger one by hitting the gateway with /v1/validate on a candidate_id.)"
      echo
    } >> "$OUT"
  else
    {
      echo "| Audit log | Rust verified | Go verified | Result |"
      echo "|---|---|---|---|"
    } >> "$OUT"
    for log in "${LOGS[@]}"; do
      label="$(basename "$log")"
      RUST_OUT="$(mktemp)"; GO_OUT="$(mktemp)"
      "$RUST_BIN" --verify "$log" --key "$KEY_PATH" > "$RUST_OUT" 2>&1 || true
      "$GO_BIN" --verify "$log" --key "$KEY_PATH" > "$GO_OUT" 2>&1 || true
      rust_count=$(jq -r '.count // 0' < "$RUST_OUT" 2>/dev/null || echo "?")
      go_count=$(jq -r '.count // 0' < "$GO_OUT" 2>/dev/null || echo "?")
      rust_ok=$(jq -r '.verified // false' < "$RUST_OUT" 2>/dev/null || echo "?")
      go_ok=$(jq -r '.verified // false' < "$GO_OUT" 2>/dev/null || echo "?")
      if diff -q "$RUST_OUT" "$GO_OUT" >/dev/null 2>&1; then
        PASS=$((PASS+1))
        echo "| \`$label\` | $rust_count rows ($rust_ok) | $go_count rows ($go_ok) | **MATCH** ✓ |" >> "$OUT"
      else
        FAIL=$((FAIL+1))
        {
          echo "| \`$label\` | $rust_count rows ($rust_ok) | $go_count rows ($go_ok) | **MISMATCH** ✗ |"
          echo
          echo "### Diff for \`$label\`"
          echo '```diff'
          diff "$RUST_OUT" "$GO_OUT" || true
          echo '```'
          echo
        } >> "$OUT"
      fi
      rm -f "$RUST_OUT" "$GO_OUT"
    done
  fi
 fi
 rm -f "$RUST_KA" "$GO_KA"
 # ── Summary ─────────────────────────────────────────────────────────
 TOTAL=$((PASS + FAIL))
 {
  echo
  echo "## Summary"
  echo
  echo "**$PASS / $TOTAL** parity assertions passed."
  echo
  if [ "$FAIL" -gt 0 ]; then
    echo "**Status: DIVERGED** — Rust and Go disagree on at least one canonical-JSON or HMAC computation."
    echo "Investigate the diff above before declaring cross-runtime parity."
  else
    echo "**Status: PARITY** — every Rust assertion matches Go byte-for-byte."
  fi
 } >> "$OUT"
 echo "[subject-audit-parity] $PASS / $TOTAL pass — report: $OUT"
 [ "$FAIL" -eq 0 ]