Per /home/profit/lakehouse/docs/specs/SUBJECT_MANIFESTS_ON_CATALOGD.md §5 Step 8.
Go side reads SubjectManifest + verifies HMAC chain on per-subject
audit JSONL files using IDENTICAL canonical-JSON + HMAC-SHA256 algorithm
to crates/catalogd/src/subject_audit.rs. A Rust-written chain now
verifies under Go and vice versa.
Files:
- internal/catalogd/subject.go
SubjectManifest, SubjectAuditRow, AuditAccessor, AuditLogEntry
LoadSubjectManifest, LoadKeyFile (32-byte minimum, matches Rust)
ReadAuditLog, VerifyChain
canonicalRowBytesFromRaw (production), canonicalRowBytesFromStruct (tests)
computeRowHMAC, CanonicalAndHmac (parity helper)
- internal/catalogd/subject_test.go (10 unit tests)
- scripts/cutover/parity/subject_audit_helper/main.go
CLI helper mirroring crates/catalogd/src/bin/parity_subject_audit.rs
- scripts/cutover/parity/subject_audit_parity.sh
Two-phase probe: known-answer + every real audit log
Two real bugs caught + fixed by the probe authoring loop:
1. omitempty on AuditAccessor.TraceID stripped the field when empty,
producing different canonical bytes than Rust (which always writes
the field). Removed omitempty. Rust + Go now produce identical
bytes for rows with trace_id="" (the common production case).
2. time.RFC3339Nano strips trailing zeros from nanoseconds, producing
"...46143921" where Rust's chrono AutoSi produces "...461439210".
Hashing through the parsed-then-re-marshaled struct breaks the
chain on any row whose nanos end in 0. Fixed by canonicalizing
from the RAW LINE BYTES (preserves the original timestamp string
byte-for-byte). Test TestVerifyChain_RawBytesPreserveTimePrecision
regression-locks this with a hand-crafted nanos=461439210 row.
Live verification (6 / 6 byte-identical assertions):
- Phase 1 known-answer: canonical bytes (266) + HMAC match
- Phase 2 real logs: WORKER-1..5 audit JSONL all verify under both
runtimes with identical (count, tip, verified, error) output
Report: reports/cutover/gauntlet_2026-05-02/parity/subject_audit_parity.md
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
169 lines
4.0 KiB
Go
169 lines
4.0 KiB
Go
// Cross-runtime parity helper — Go side.
|
|
//
|
|
// Specification: /home/profit/lakehouse/docs/specs/SUBJECT_MANIFESTS_ON_CATALOGD.md §5 Step 8.
|
|
//
|
|
// Counterpart of crates/catalogd/src/bin/parity_subject_audit.rs.
|
|
// Both helpers MUST produce byte-identical output for the same input.
|
|
//
|
|
// Modes:
|
|
//
|
|
// --known-answer
|
|
// Print canonical-JSON + HMAC for a hardcoded fixture. Compared
|
|
// byte-for-byte against the Rust helper's output. If they
|
|
// differ, the canonical-JSON or HMAC algorithm has drifted.
|
|
//
|
|
// --verify <audit_log_path> --key <key_path>
|
|
// Replay the HMAC chain on a real audit JSONL. Print one JSON
|
|
// object: {mode, count, tip, verified, error}.
|
|
package main
|
|
|
|
import (
|
|
"encoding/json"
|
|
"fmt"
|
|
"os"
|
|
"path/filepath"
|
|
"strings"
|
|
"time"
|
|
|
|
cat "git.agentview.dev/profit/golangLAKEHOUSE/internal/catalogd"
|
|
)
|
|
|
|
const genesis = "GENESIS"
|
|
|
|
func deterministicKey() []byte {
|
|
k := make([]byte, 32)
|
|
for i := range k {
|
|
k[i] = byte(i)
|
|
}
|
|
return k
|
|
}
|
|
|
|
// knownAnswerOut is intentionally identical to KnownAnswerOut in the
|
|
// Rust helper so a stdout diff is a one-line semantic comparison.
|
|
type knownAnswerOut struct {
|
|
Mode string `json:"mode"`
|
|
Canonical string `json:"canonical"`
|
|
Hmac string `json:"hmac"`
|
|
CanonicalBytesLen int `json:"canonical_bytes_len"`
|
|
}
|
|
|
|
type verifyOut struct {
|
|
Mode string `json:"mode"`
|
|
Count int `json:"count"`
|
|
Tip string `json:"tip"`
|
|
Verified bool `json:"verified"`
|
|
Error *string `json:"error"`
|
|
}
|
|
|
|
func runKnownAnswer() {
|
|
row := cat.SubjectAuditRow{
|
|
Schema: "subject_audit.v1",
|
|
Ts: time.Date(2026, 5, 3, 12, 0, 0, 0, time.UTC),
|
|
CandidateID: "WORKER-FIXED",
|
|
Accessor: cat.AuditAccessor{
|
|
Kind: "gateway_lookup",
|
|
Daemon: "gateway",
|
|
Purpose: "parity_test",
|
|
TraceID: "trace-fixed",
|
|
},
|
|
FieldsAccessed: []string{"name"},
|
|
Result: "success",
|
|
PrevChainHash: genesis,
|
|
}
|
|
canonical, hmacHex, err := cat.CanonicalAndHmac(&row, deterministicKey(), genesis)
|
|
if err != nil {
|
|
die("canonical/hmac: %v", err)
|
|
}
|
|
out := knownAnswerOut{
|
|
Mode: "known_answer",
|
|
Canonical: string(canonical),
|
|
Hmac: hmacHex,
|
|
CanonicalBytesLen: len(canonical),
|
|
}
|
|
emit(out)
|
|
}
|
|
|
|
func runVerify(auditPath, keyPath string) {
|
|
entries, err := cat.ReadAuditLog(auditPath)
|
|
if err != nil {
|
|
die("read audit log: %v", err)
|
|
}
|
|
key, err := os.ReadFile(keyPath)
|
|
if err != nil {
|
|
die("read key: %v", err)
|
|
}
|
|
count, tip, verr := cat.VerifyChain(entries, key)
|
|
out := verifyOut{
|
|
Mode: "verify",
|
|
Count: count,
|
|
Tip: tip,
|
|
Verified: verr == nil,
|
|
}
|
|
if verr != nil {
|
|
s := verr.Error()
|
|
out.Error = &s
|
|
// Reset count + tip to match the Rust helper's error semantics.
|
|
out.Count = 0
|
|
out.Tip = genesis
|
|
}
|
|
emit(out)
|
|
}
|
|
|
|
func emit(v any) {
|
|
bs, err := json.Marshal(v)
|
|
if err != nil {
|
|
die("marshal output: %v", err)
|
|
}
|
|
fmt.Println(string(bs))
|
|
}
|
|
|
|
func die(format string, a ...any) {
|
|
fmt.Fprintf(os.Stderr, format+"\n", a...)
|
|
os.Exit(2)
|
|
}
|
|
|
|
func main() {
|
|
args := os.Args[1:]
|
|
var (
|
|
knownAnswer bool
|
|
auditPath string
|
|
keyPath string
|
|
)
|
|
for i := 0; i < len(args); i++ {
|
|
switch args[i] {
|
|
case "--known-answer":
|
|
knownAnswer = true
|
|
case "--verify":
|
|
if i+1 >= len(args) {
|
|
die("--verify needs a path")
|
|
}
|
|
auditPath = args[i+1]
|
|
i++
|
|
case "--key":
|
|
if i+1 >= len(args) {
|
|
die("--key needs a path")
|
|
}
|
|
keyPath = args[i+1]
|
|
i++
|
|
case "-h", "--help":
|
|
fmt.Fprintln(os.Stderr, "subject_audit_helper --known-answer")
|
|
fmt.Fprintln(os.Stderr, "subject_audit_helper --verify <audit_log> --key <key_file>")
|
|
os.Exit(0)
|
|
default:
|
|
die("unknown arg: %s", args[i])
|
|
}
|
|
}
|
|
if knownAnswer {
|
|
runKnownAnswer()
|
|
return
|
|
}
|
|
if auditPath == "" || keyPath == "" {
|
|
die("need --known-answer OR (--verify <path> --key <path>)")
|
|
}
|
|
// Sanity: file naming convention <candidate_id>.audit.jsonl.
|
|
if !strings.HasSuffix(filepath.Base(auditPath), ".audit.jsonl") {
|
|
die("audit log path must end with .audit.jsonl")
|
|
}
|
|
runVerify(auditPath, keyPath)
|
|
}
|