root 857ca4c971 catalogd: HTML-safe escape fix + decisions tracker entry
Per 2026-05-03 step_7_8_retention_and_parity scrum (opus WARN on
parity_subject_audit.rs:canonical_json):

Go's json.Marshal HTML-escapes < > & to < > & by
default. Rust's serde_json::to_vec keeps them literal. Any audit
row with these chars in any string field would silently produce
different canonical bytes across runtimes → broken HMAC chain.
Latent because no production audit field has carried <>& yet, but
realistic for purpose strings ("error & retry") or trace_id values
("<HTTP-Request-Id>").

Fix: marshalNoEscapeHTML helper wraps json.Encoder.SetEscapeHTML(false)
+ trims trailing newline. Routed through writeCanonical for both
keys and scalar values.

Regression test: TestVerifyChain_HtmlChars_NotEscaped (purpose has &,
trace_id has <>) asserts the canonical bytes contain literal chars,
not escape sequences.

11 unit tests pass including the new one; parity probe still 6/6
byte-identical against live production audit logs.

Decisions tracker: added 2026-05-03 entry for SUBJECT_MANIFESTS_ON_CATALOGD
Steps 1-8 closure + 6th cross-runtime parity probe (was 5).

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-03 04:29:53 -05:00

365 lines
13 KiB
Go

package catalogd
import (
"crypto/hmac"
"crypto/sha256"
"encoding/hex"
"encoding/json"
"strings"
"testing"
"time"
)
// deterministicKey is the same fixture key the Rust tests use:
// (0u8..32).collect() — so a Rust-written chain verifies under Go.
func deterministicKey() []byte {
k := make([]byte, 32)
for i := range k {
k[i] = byte(i)
}
return k
}
func mkRow(candidateID string, fields []string, prevHash, ts string) SubjectAuditRow {
t, _ := time.Parse(time.RFC3339Nano, ts)
return SubjectAuditRow{
Schema: "subject_audit.v1",
Ts: t,
CandidateID: candidateID,
Accessor: AuditAccessor{
Kind: "gateway_lookup",
Daemon: "gateway",
Purpose: "fill_validation",
TraceID: "",
},
FieldsAccessed: fields,
Result: "success",
PrevChainHash: prevHash,
RowHmac: "", // computed below
}
}
// TestCanonicalJSON_KeysSortedAlphabetically asserts the same property
// the Rust unit test asserts (subject_audit::tests::canonical_json_sorts_keys_alphabetically).
func TestCanonicalJSON_KeysSortedAlphabetically(t *testing.T) {
v := map[string]any{
"z": 1,
"a": 2,
"m": map[string]any{"y": 1, "b": 2},
}
var buf strings.Builder
if err := writeCanonical(&buf, v); err != nil {
t.Fatalf("canonical: %v", err)
}
s := buf.String()
a, m, z := strings.Index(s, "\"a\""), strings.Index(s, "\"m\""), strings.Index(s, "\"z\"")
if !(a < m && m < z) {
t.Fatalf("top-level keys out of order: %s", s)
}
b, y := strings.Index(s, "\"b\""), strings.Index(s, "\"y\"")
if !(b < y) {
t.Fatalf("nested keys out of order: %s", s)
}
}
// TestCanonicalJSON_ArraysPreserveOrder asserts arrays are NOT sorted —
// matches Rust subject_audit::tests::canonical_json_arrays_preserve_order.
func TestCanonicalJSON_ArraysPreserveOrder(t *testing.T) {
v := map[string]any{"k": []any{"c", "a", "b"}}
var buf strings.Builder
if err := writeCanonical(&buf, v); err != nil {
t.Fatalf("canonical: %v", err)
}
if !strings.Contains(buf.String(), "\"c\",\"a\",\"b\"") {
t.Fatalf("array order altered: %s", buf.String())
}
}
// buildEntry produces an AuditLogEntry by computing the HMAC against
// the row's struct-derived canonical bytes, then storing the resulting
// row JSON as the raw bytes. Test-only — production reads raw bytes
// straight from disk so the time-precision drift doesn't apply.
func buildEntry(row SubjectAuditRow, key []byte, prev string) AuditLogEntry {
canon, err := canonicalRowBytesFromStruct(&row)
if err != nil {
panic(err)
}
row.RowHmac = computeRowHMAC(key, prev, canon)
raw, err := json.Marshal(row)
if err != nil {
panic(err)
}
return AuditLogEntry{Row: row, Raw: raw}
}
// TestVerifyChain_ReplaysAndReachesTip writes 3 rows with HMACs computed
// the same way Rust would, then verifies they chain. This is the local
// half of the parity contract — the cross-runtime half (Rust writes,
// Go verifies) is covered by scripts/cutover/parity/subject_audit_parity.sh.
func TestVerifyChain_ReplaysAndReachesTip(t *testing.T) {
key := deterministicKey()
r1 := mkRow("CAND-PARITY", []string{"name"}, GenesisHash, "2026-05-03T12:00:00Z")
e1 := buildEntry(r1, key, GenesisHash)
r2 := mkRow("CAND-PARITY", []string{"phone"}, e1.Row.RowHmac, "2026-05-03T12:00:01Z")
e2 := buildEntry(r2, key, e1.Row.RowHmac)
r3 := mkRow("CAND-PARITY", []string{"email"}, e2.Row.RowHmac, "2026-05-03T12:00:02Z")
e3 := buildEntry(r3, key, e2.Row.RowHmac)
count, tip, err := VerifyChain([]AuditLogEntry{e1, e2, e3}, key)
if err != nil {
t.Fatalf("verify failed: %v", err)
}
if count != 3 {
t.Fatalf("expected 3 rows verified, got %d", count)
}
if tip != e3.Row.RowHmac {
t.Fatalf("chain tip wrong: tip=%s expected=%s", tip, e3.Row.RowHmac)
}
}
// TestVerifyChain_EmptyLogIsTriviallyValid mirrors Rust's empty-log
// special case: 0 rows, GENESIS tip, no error.
func TestVerifyChain_EmptyLogIsTriviallyValid(t *testing.T) {
count, tip, err := VerifyChain(nil, deterministicKey())
if err != nil {
t.Fatalf("empty log returned error: %v", err)
}
if count != 0 {
t.Fatalf("expected 0 rows on empty log, got %d", count)
}
if tip != GenesisHash {
t.Fatalf("expected GENESIS tip on empty log, got %q", tip)
}
}
// TestVerifyChain_TamperDetected: tamper the raw line's `result` field
// (the canonicalizer sees the new bytes; HMAC mismatches the stored hash).
func TestVerifyChain_TamperDetected(t *testing.T) {
key := deterministicKey()
r1 := mkRow("CAND-T", []string{"name"}, GenesisHash, "2026-05-03T12:00:00Z")
e1 := buildEntry(r1, key, GenesisHash)
// Tamper: replace "success" with "denied" in the raw bytes ONLY.
// The struct's row_hmac (used as the "stored" comparator) stays put.
e1.Raw = []byte(strings.Replace(string(e1.Raw), `"success"`, `"denied"`, 1))
_, _, err := VerifyChain([]AuditLogEntry{e1}, key)
if err == nil {
t.Fatal("expected hmac mismatch after tamper, got nil")
}
if !strings.Contains(err.Error(), "hmac mismatch") {
t.Fatalf("expected hmac mismatch, got: %v", err)
}
}
// TestVerifyChain_BadKeyRejectsValidRows: same rows + wrong key = mismatch.
func TestVerifyChain_BadKeyRejectsValidRows(t *testing.T) {
good := deterministicKey()
r1 := mkRow("CAND-BK", []string{"name"}, GenesisHash, "2026-05-03T12:00:00Z")
e1 := buildEntry(r1, good, GenesisHash)
bad := make([]byte, 32)
for i := range bad {
bad[i] = 0xff
}
_, _, err := VerifyChain([]AuditLogEntry{e1}, bad)
if err == nil {
t.Fatal("expected hmac mismatch with wrong key")
}
}
// TestComputeRowHMAC_StableAcrossRuns: same row + same key always = same hash.
func TestComputeRowHMAC_StableAcrossRuns(t *testing.T) {
key := deterministicKey()
r := mkRow("CAND-S", []string{"a", "b"}, GenesisHash, "2026-05-03T12:00:00Z")
c1, _ := canonicalRowBytesFromStruct(&r)
c2, _ := canonicalRowBytesFromStruct(&r)
if string(c1) != string(c2) {
t.Fatalf("canonical bytes unstable across runs:\n c1=%s\n c2=%s", c1, c2)
}
h1 := computeRowHMAC(key, GenesisHash, c1)
h2 := computeRowHMAC(key, GenesisHash, c2)
if h1 != h2 {
t.Fatalf("hmac unstable across runs: %s vs %s", h1, h2)
}
if len(h1) != 64 {
t.Fatalf("hmac wrong length %d", len(h1))
}
// Sanity: hex-decodable.
if _, err := hex.DecodeString(h1); err != nil {
t.Fatalf("hmac not hex: %v", err)
}
}
// TestKnownAnswerVector matches a Go-computed reference. The same
// inputs must produce this exact byte string under Rust as well —
// scripts/cutover/parity/subject_audit_parity.sh runs the Rust helper
// against this exact fixture and asserts byte-identical output.
//
// If you change the fixture, rebuild Rust's parity_subject_audit + Go's
// helper and update both sides together.
func TestKnownAnswerVector(t *testing.T) {
key := deterministicKey()
r := SubjectAuditRow{
Schema: "subject_audit.v1",
Ts: time.Date(2026, 5, 3, 12, 0, 0, 0, time.UTC),
CandidateID: "WORKER-FIXED",
Accessor: AuditAccessor{
Kind: "gateway_lookup",
Daemon: "gateway",
Purpose: "parity_test",
TraceID: "trace-fixed",
},
FieldsAccessed: []string{"name"},
Result: "success",
PrevChainHash: GenesisHash,
RowHmac: "",
}
canon, err := canonicalRowBytesFromStruct(&r)
if err != nil {
t.Fatalf("canonical: %v", err)
}
t.Logf("canonical bytes: %s", canon)
hmacHex := computeRowHMAC(key, GenesisHash, canon)
t.Logf("hmac: %s", hmacHex)
// Sanity: round-trip through encoding/json + canonicalization is stable.
again, err := canonicalRowBytesFromStruct(&r)
if err != nil {
t.Fatalf("canonical 2: %v", err)
}
if string(canon) != string(again) {
t.Fatalf("canonical drift: %s vs %s", canon, again)
}
// Sanity: real HMAC against the canonical bytes.
mac := hmac.New(sha256.New, key)
mac.Write([]byte(GenesisHash))
mac.Write(canon)
expected := hex.EncodeToString(mac.Sum(nil))
if hmacHex != expected {
t.Fatalf("computeRowHMAC drift: %s vs %s", hmacHex, expected)
}
}
// TestVerifyChain_HtmlChars_NotEscaped is the regression test for the
// 2026-05-03 opus scrum WARN: Go's json.Marshal escapes `<`, `>`, `&`
// to `<`, `>`, `&` by default; Rust's serde_json keeps
// them literal. Audit rows with these chars in any string field would
// silently break the chain across runtimes. Fix is in writeCanonical's
// marshalNoEscapeHTML helper. This test asserts canonical bytes contain
// the literal `<`, `>`, `&` (proving the fix is in place).
func TestVerifyChain_HtmlChars_NotEscaped(t *testing.T) {
r := mkRow("CAND-HTML", []string{"name"}, GenesisHash, "2026-05-03T12:00:00Z")
r.Accessor.Purpose = "error & retry" // & must NOT be &
r.Accessor.TraceID = "<HTTP-Req-Id>" // < and > must NOT be < / >
canon, err := canonicalRowBytesFromStruct(&r)
if err != nil {
t.Fatalf("canonical: %v", err)
}
s := string(canon)
// FAIL if the bytes contain Go's HTML-safe < / > / &
// escape sequences (six raw chars each: backslash, u, 0, 0, hex, hex).
// Those wouldn't match Rust's literal-char output and would silently
// break the cross-runtime HMAC chain. Note: the strings below are
// raw-string literals — the backslash + u006xx is six literal bytes,
// NOT a Go-source unicode escape.
if strings.Contains(s, "\\u003c") || strings.Contains(s, "\\u003e") || strings.Contains(s, "\\u0026") {
t.Fatalf("canonical bytes contain Go HTML-escape sequences (would diverge from Rust):\n%s", s)
}
// PASS only if the literal chars survived round-trip.
if !strings.Contains(s, "\"<HTTP-Req-Id>\"") || !strings.Contains(s, "\"error & retry\"") {
t.Fatalf("canonical bytes missing literal <>&:\n%s", s)
}
}
// TestVerifyChain_RawBytesPreserveTimePrecision is the regression test
// for the 2026-05-03 WORKER-5 finding: when a row's nanoseconds end in
// 0, time.RFC3339Nano strips the trailing zero on re-marshal, producing
// different canonical bytes than Rust's chrono AutoSi (which always
// emits 9 digits). VerifyChain MUST canonicalize from the raw line
// bytes to avoid this drift. Test feeds a hand-crafted raw line whose
// ts has a trailing-zero nano value and asserts verify succeeds when
// the chain hash was computed against THOSE EXACT bytes.
func TestVerifyChain_RawBytesPreserveTimePrecision(t *testing.T) {
key := deterministicKey()
// Hand-crafted raw line exactly as Rust would write it, with
// nanoseconds=461439210 (trailing zero present).
rawNoHmac := `{"schema":"subject_audit.v1","ts":"2026-05-03T09:12:47.461439210Z","candidate_id":"WORKER-5","accessor":{"kind":"validator_lookup","daemon":"gateway","purpose":"validator_worker_lookup","trace_id":""},"fields_accessed":["exists"],"result":"not_found","prev_chain_hash":"GENESIS"}`
canonical, err := canonicalRowBytesFromRaw([]byte(rawNoHmac))
if err != nil {
t.Fatalf("canonicalize raw: %v", err)
}
hmacHex := computeRowHMAC(key, GenesisHash, canonical)
// Compose the full row by injecting row_hmac at the end (matches
// what the Rust writer produces — declaration order + appended hmac).
rawFull := strings.TrimSuffix(rawNoHmac, "}") + `,"row_hmac":"` + hmacHex + `"}`
var row SubjectAuditRow
if err := json.Unmarshal([]byte(rawFull), &row); err != nil {
t.Fatalf("unmarshal: %v", err)
}
entry := AuditLogEntry{Row: row, Raw: []byte(rawFull)}
count, tip, err := VerifyChain([]AuditLogEntry{entry}, key)
if err != nil {
t.Fatalf("verify failed (regression: time-precision drift): %v", err)
}
if count != 1 {
t.Fatalf("expected 1 row verified, got %d", count)
}
if tip != hmacHex {
t.Fatalf("tip mismatch: %s vs %s", tip, hmacHex)
}
}
// TestSubjectManifest_RoundTripJSON: parse a fixture JSON identical in
// shape to what crates/catalogd/src/registry.rs::put_subject writes to
// data/_catalog/subjects/<id>.json. If this fails, the Go reader is
// out of sync with the Rust writer (a Step 8 contract violation).
func TestSubjectManifest_RoundTripJSON(t *testing.T) {
src := `{
"schema": "subject_manifest.v1",
"candidate_id": "WORKER-1",
"created_at": "2026-05-03T08:22:24.571647177Z",
"updated_at": "2026-05-03T08:22:24.571647177Z",
"status": "active",
"vertical": "unknown",
"consent": {
"general_pii": {
"status": "pending_backfill_review",
"version": ""
},
"biometric": {
"status": "never_collected"
}
},
"retention": {
"general_pii_until": "2030-05-02T08:22:24.571647177Z",
"policy": "4_year_default"
},
"datasets": [
{"name": "workers_500k", "key_column": "worker_id", "key_value": "1"}
],
"safe_views": ["workers_safe"],
"audit_log_path": "_catalog/subjects/WORKER-1.audit.jsonl",
"audit_log_chain_root": ""
}`
var m SubjectManifest
if err := json.Unmarshal([]byte(src), &m); err != nil {
t.Fatalf("unmarshal: %v", err)
}
if m.CandidateID != "WORKER-1" {
t.Fatalf("candidate_id wrong: %s", m.CandidateID)
}
if m.Status != "active" {
t.Fatalf("status wrong: %s", m.Status)
}
if m.Consent.GeneralPii.Status != "pending_backfill_review" {
t.Fatalf("general_pii.status wrong: %s", m.Consent.GeneralPii.Status)
}
if m.Consent.Biometric.Status != "never_collected" {
t.Fatalf("biometric.status wrong: %s", m.Consent.Biometric.Status)
}
if m.Retention.Policy != "4_year_default" {
t.Fatalf("retention.policy wrong: %s", m.Retention.Policy)
}
if len(m.Datasets) != 1 || m.Datasets[0].Name != "workers_500k" {
t.Fatalf("datasets wrong: %+v", m.Datasets)
}
}