package catalogd import ( "crypto/hmac" "crypto/sha256" "encoding/hex" "encoding/json" "strings" "testing" "time" ) // deterministicKey is the same fixture key the Rust tests use: // (0u8..32).collect() — so a Rust-written chain verifies under Go. func deterministicKey() []byte { k := make([]byte, 32) for i := range k { k[i] = byte(i) } return k } func mkRow(candidateID string, fields []string, prevHash, ts string) SubjectAuditRow { t, _ := time.Parse(time.RFC3339Nano, ts) return SubjectAuditRow{ Schema: "subject_audit.v1", Ts: t, CandidateID: candidateID, Accessor: AuditAccessor{ Kind: "gateway_lookup", Daemon: "gateway", Purpose: "fill_validation", TraceID: "", }, FieldsAccessed: fields, Result: "success", PrevChainHash: prevHash, RowHmac: "", // computed below } } // TestCanonicalJSON_KeysSortedAlphabetically asserts the same property // the Rust unit test asserts (subject_audit::tests::canonical_json_sorts_keys_alphabetically). func TestCanonicalJSON_KeysSortedAlphabetically(t *testing.T) { v := map[string]any{ "z": 1, "a": 2, "m": map[string]any{"y": 1, "b": 2}, } var buf strings.Builder if err := writeCanonical(&buf, v); err != nil { t.Fatalf("canonical: %v", err) } s := buf.String() a, m, z := strings.Index(s, "\"a\""), strings.Index(s, "\"m\""), strings.Index(s, "\"z\"") if !(a < m && m < z) { t.Fatalf("top-level keys out of order: %s", s) } b, y := strings.Index(s, "\"b\""), strings.Index(s, "\"y\"") if !(b < y) { t.Fatalf("nested keys out of order: %s", s) } } // TestCanonicalJSON_ArraysPreserveOrder asserts arrays are NOT sorted — // matches Rust subject_audit::tests::canonical_json_arrays_preserve_order. func TestCanonicalJSON_ArraysPreserveOrder(t *testing.T) { v := map[string]any{"k": []any{"c", "a", "b"}} var buf strings.Builder if err := writeCanonical(&buf, v); err != nil { t.Fatalf("canonical: %v", err) } if !strings.Contains(buf.String(), "\"c\",\"a\",\"b\"") { t.Fatalf("array order altered: %s", buf.String()) } } // buildEntry produces an AuditLogEntry by computing the HMAC against // the row's struct-derived canonical bytes, then storing the resulting // row JSON as the raw bytes. Test-only — production reads raw bytes // straight from disk so the time-precision drift doesn't apply. func buildEntry(row SubjectAuditRow, key []byte, prev string) AuditLogEntry { canon, err := canonicalRowBytesFromStruct(&row) if err != nil { panic(err) } row.RowHmac = computeRowHMAC(key, prev, canon) raw, err := json.Marshal(row) if err != nil { panic(err) } return AuditLogEntry{Row: row, Raw: raw} } // TestVerifyChain_ReplaysAndReachesTip writes 3 rows with HMACs computed // the same way Rust would, then verifies they chain. This is the local // half of the parity contract — the cross-runtime half (Rust writes, // Go verifies) is covered by scripts/cutover/parity/subject_audit_parity.sh. func TestVerifyChain_ReplaysAndReachesTip(t *testing.T) { key := deterministicKey() r1 := mkRow("CAND-PARITY", []string{"name"}, GenesisHash, "2026-05-03T12:00:00Z") e1 := buildEntry(r1, key, GenesisHash) r2 := mkRow("CAND-PARITY", []string{"phone"}, e1.Row.RowHmac, "2026-05-03T12:00:01Z") e2 := buildEntry(r2, key, e1.Row.RowHmac) r3 := mkRow("CAND-PARITY", []string{"email"}, e2.Row.RowHmac, "2026-05-03T12:00:02Z") e3 := buildEntry(r3, key, e2.Row.RowHmac) count, tip, err := VerifyChain([]AuditLogEntry{e1, e2, e3}, key) if err != nil { t.Fatalf("verify failed: %v", err) } if count != 3 { t.Fatalf("expected 3 rows verified, got %d", count) } if tip != e3.Row.RowHmac { t.Fatalf("chain tip wrong: tip=%s expected=%s", tip, e3.Row.RowHmac) } } // TestVerifyChain_EmptyLogIsTriviallyValid mirrors Rust's empty-log // special case: 0 rows, GENESIS tip, no error. func TestVerifyChain_EmptyLogIsTriviallyValid(t *testing.T) { count, tip, err := VerifyChain(nil, deterministicKey()) if err != nil { t.Fatalf("empty log returned error: %v", err) } if count != 0 { t.Fatalf("expected 0 rows on empty log, got %d", count) } if tip != GenesisHash { t.Fatalf("expected GENESIS tip on empty log, got %q", tip) } } // TestVerifyChain_TamperDetected: tamper the raw line's `result` field // (the canonicalizer sees the new bytes; HMAC mismatches the stored hash). func TestVerifyChain_TamperDetected(t *testing.T) { key := deterministicKey() r1 := mkRow("CAND-T", []string{"name"}, GenesisHash, "2026-05-03T12:00:00Z") e1 := buildEntry(r1, key, GenesisHash) // Tamper: replace "success" with "denied" in the raw bytes ONLY. // The struct's row_hmac (used as the "stored" comparator) stays put. e1.Raw = []byte(strings.Replace(string(e1.Raw), `"success"`, `"denied"`, 1)) _, _, err := VerifyChain([]AuditLogEntry{e1}, key) if err == nil { t.Fatal("expected hmac mismatch after tamper, got nil") } if !strings.Contains(err.Error(), "hmac mismatch") { t.Fatalf("expected hmac mismatch, got: %v", err) } } // TestVerifyChain_BadKeyRejectsValidRows: same rows + wrong key = mismatch. func TestVerifyChain_BadKeyRejectsValidRows(t *testing.T) { good := deterministicKey() r1 := mkRow("CAND-BK", []string{"name"}, GenesisHash, "2026-05-03T12:00:00Z") e1 := buildEntry(r1, good, GenesisHash) bad := make([]byte, 32) for i := range bad { bad[i] = 0xff } _, _, err := VerifyChain([]AuditLogEntry{e1}, bad) if err == nil { t.Fatal("expected hmac mismatch with wrong key") } } // TestComputeRowHMAC_StableAcrossRuns: same row + same key always = same hash. func TestComputeRowHMAC_StableAcrossRuns(t *testing.T) { key := deterministicKey() r := mkRow("CAND-S", []string{"a", "b"}, GenesisHash, "2026-05-03T12:00:00Z") c1, _ := canonicalRowBytesFromStruct(&r) c2, _ := canonicalRowBytesFromStruct(&r) if string(c1) != string(c2) { t.Fatalf("canonical bytes unstable across runs:\n c1=%s\n c2=%s", c1, c2) } h1 := computeRowHMAC(key, GenesisHash, c1) h2 := computeRowHMAC(key, GenesisHash, c2) if h1 != h2 { t.Fatalf("hmac unstable across runs: %s vs %s", h1, h2) } if len(h1) != 64 { t.Fatalf("hmac wrong length %d", len(h1)) } // Sanity: hex-decodable. if _, err := hex.DecodeString(h1); err != nil { t.Fatalf("hmac not hex: %v", err) } } // TestKnownAnswerVector matches a Go-computed reference. The same // inputs must produce this exact byte string under Rust as well — // scripts/cutover/parity/subject_audit_parity.sh runs the Rust helper // against this exact fixture and asserts byte-identical output. // // If you change the fixture, rebuild Rust's parity_subject_audit + Go's // helper and update both sides together. func TestKnownAnswerVector(t *testing.T) { key := deterministicKey() r := SubjectAuditRow{ Schema: "subject_audit.v1", Ts: time.Date(2026, 5, 3, 12, 0, 0, 0, time.UTC), CandidateID: "WORKER-FIXED", Accessor: AuditAccessor{ Kind: "gateway_lookup", Daemon: "gateway", Purpose: "parity_test", TraceID: "trace-fixed", }, FieldsAccessed: []string{"name"}, Result: "success", PrevChainHash: GenesisHash, RowHmac: "", } canon, err := canonicalRowBytesFromStruct(&r) if err != nil { t.Fatalf("canonical: %v", err) } t.Logf("canonical bytes: %s", canon) hmacHex := computeRowHMAC(key, GenesisHash, canon) t.Logf("hmac: %s", hmacHex) // Sanity: round-trip through encoding/json + canonicalization is stable. again, err := canonicalRowBytesFromStruct(&r) if err != nil { t.Fatalf("canonical 2: %v", err) } if string(canon) != string(again) { t.Fatalf("canonical drift: %s vs %s", canon, again) } // Sanity: real HMAC against the canonical bytes. mac := hmac.New(sha256.New, key) mac.Write([]byte(GenesisHash)) mac.Write(canon) expected := hex.EncodeToString(mac.Sum(nil)) if hmacHex != expected { t.Fatalf("computeRowHMAC drift: %s vs %s", hmacHex, expected) } } // TestVerifyChain_HtmlChars_NotEscaped is the regression test for the // 2026-05-03 opus scrum WARN: Go's json.Marshal escapes `<`, `>`, `&` // to `<`, `>`, `&` by default; Rust's serde_json keeps // them literal. Audit rows with these chars in any string field would // silently break the chain across runtimes. Fix is in writeCanonical's // marshalNoEscapeHTML helper. This test asserts canonical bytes contain // the literal `<`, `>`, `&` (proving the fix is in place). func TestVerifyChain_HtmlChars_NotEscaped(t *testing.T) { r := mkRow("CAND-HTML", []string{"name"}, GenesisHash, "2026-05-03T12:00:00Z") r.Accessor.Purpose = "error & retry" // & must NOT be & r.Accessor.TraceID = "" // < and > must NOT be < / > canon, err := canonicalRowBytesFromStruct(&r) if err != nil { t.Fatalf("canonical: %v", err) } s := string(canon) // FAIL if the bytes contain Go's HTML-safe < / > / & // escape sequences (six raw chars each: backslash, u, 0, 0, hex, hex). // Those wouldn't match Rust's literal-char output and would silently // break the cross-runtime HMAC chain. Note: the strings below are // raw-string literals — the backslash + u006xx is six literal bytes, // NOT a Go-source unicode escape. if strings.Contains(s, "\\u003c") || strings.Contains(s, "\\u003e") || strings.Contains(s, "\\u0026") { t.Fatalf("canonical bytes contain Go HTML-escape sequences (would diverge from Rust):\n%s", s) } // PASS only if the literal chars survived round-trip. if !strings.Contains(s, "\"\"") || !strings.Contains(s, "\"error & retry\"") { t.Fatalf("canonical bytes missing literal <>&:\n%s", s) } } // TestVerifyChain_RawBytesPreserveTimePrecision is the regression test // for the 2026-05-03 WORKER-5 finding: when a row's nanoseconds end in // 0, time.RFC3339Nano strips the trailing zero on re-marshal, producing // different canonical bytes than Rust's chrono AutoSi (which always // emits 9 digits). VerifyChain MUST canonicalize from the raw line // bytes to avoid this drift. Test feeds a hand-crafted raw line whose // ts has a trailing-zero nano value and asserts verify succeeds when // the chain hash was computed against THOSE EXACT bytes. func TestVerifyChain_RawBytesPreserveTimePrecision(t *testing.T) { key := deterministicKey() // Hand-crafted raw line exactly as Rust would write it, with // nanoseconds=461439210 (trailing zero present). rawNoHmac := `{"schema":"subject_audit.v1","ts":"2026-05-03T09:12:47.461439210Z","candidate_id":"WORKER-5","accessor":{"kind":"validator_lookup","daemon":"gateway","purpose":"validator_worker_lookup","trace_id":""},"fields_accessed":["exists"],"result":"not_found","prev_chain_hash":"GENESIS"}` canonical, err := canonicalRowBytesFromRaw([]byte(rawNoHmac)) if err != nil { t.Fatalf("canonicalize raw: %v", err) } hmacHex := computeRowHMAC(key, GenesisHash, canonical) // Compose the full row by injecting row_hmac at the end (matches // what the Rust writer produces — declaration order + appended hmac). rawFull := strings.TrimSuffix(rawNoHmac, "}") + `,"row_hmac":"` + hmacHex + `"}` var row SubjectAuditRow if err := json.Unmarshal([]byte(rawFull), &row); err != nil { t.Fatalf("unmarshal: %v", err) } entry := AuditLogEntry{Row: row, Raw: []byte(rawFull)} count, tip, err := VerifyChain([]AuditLogEntry{entry}, key) if err != nil { t.Fatalf("verify failed (regression: time-precision drift): %v", err) } if count != 1 { t.Fatalf("expected 1 row verified, got %d", count) } if tip != hmacHex { t.Fatalf("tip mismatch: %s vs %s", tip, hmacHex) } } // TestSubjectManifest_RoundTripJSON: parse a fixture JSON identical in // shape to what crates/catalogd/src/registry.rs::put_subject writes to // data/_catalog/subjects/.json. If this fails, the Go reader is // out of sync with the Rust writer (a Step 8 contract violation). func TestSubjectManifest_RoundTripJSON(t *testing.T) { src := `{ "schema": "subject_manifest.v1", "candidate_id": "WORKER-1", "created_at": "2026-05-03T08:22:24.571647177Z", "updated_at": "2026-05-03T08:22:24.571647177Z", "status": "active", "vertical": "unknown", "consent": { "general_pii": { "status": "pending_backfill_review", "version": "" }, "biometric": { "status": "never_collected" } }, "retention": { "general_pii_until": "2030-05-02T08:22:24.571647177Z", "policy": "4_year_default" }, "datasets": [ {"name": "workers_500k", "key_column": "worker_id", "key_value": "1"} ], "safe_views": ["workers_safe"], "audit_log_path": "_catalog/subjects/WORKER-1.audit.jsonl", "audit_log_chain_root": "" }` var m SubjectManifest if err := json.Unmarshal([]byte(src), &m); err != nil { t.Fatalf("unmarshal: %v", err) } if m.CandidateID != "WORKER-1" { t.Fatalf("candidate_id wrong: %s", m.CandidateID) } if m.Status != "active" { t.Fatalf("status wrong: %s", m.Status) } if m.Consent.GeneralPii.Status != "pending_backfill_review" { t.Fatalf("general_pii.status wrong: %s", m.Consent.GeneralPii.Status) } if m.Consent.Biometric.Status != "never_collected" { t.Fatalf("biometric.status wrong: %s", m.Consent.Biometric.Status) } if m.Retention.Policy != "4_year_default" { t.Fatalf("retention.policy wrong: %s", m.Retention.Policy) } if len(m.Datasets) != 1 || m.Datasets[0].Name != "workers_500k" { t.Fatalf("datasets wrong: %+v", m.Datasets) } }