#!/usr/bin/env bash # subject_audit_parity — verify Rust and Go produce byte-identical # canonical JSON + HMAC-SHA256 chain hashes for subject audit logs. # # Why: the SubjectManifest + audit-log substrate (Rust crates/catalogd # subject_audit.rs) is consumed by the legal-tier endpoint /audit/subject/{id} # AND by the Go internal/catalogd reader. A canonical-JSON drift between # the two runtimes would mean a chain written by Rust does not verify # under Go (or worse, verifies as tampered) — silently breaking the # defensible-audit guarantee. # # The probe runs in two phases: # # Phase 1 — Known-answer vector # Both helpers serialize the same hardcoded SubjectAuditRow, # emit canonical bytes + HMAC. Outputs MUST be byte-identical. # This catches algorithm drift independent of any real data. # # Phase 2 — Real production audit logs # Walk every *.audit.jsonl in the live data dir. Run BOTH helpers # against each one with the production signing key. Outputs MUST # be byte-identical. This catches real-world drift (e.g. struct # field tag mismatches that only fire on certain field values). # # Outputs: reports/cutover/gauntlet_2026-05-02/parity/subject_audit_parity.md # # Env overrides: # RUST_REPO=/home/profit/lakehouse # RUST_BIN=$RUST_REPO/target/release/parity_subject_audit # GO_BIN=./bin/subject_audit_helper # AUDIT_DIR=$RUST_REPO/data/_catalog/subjects # KEY_PATH=/tmp/lakehouse_audit/subject_audit.key set -uo pipefail cd "$(dirname "$0")/../../.." RUST_REPO="${RUST_REPO:-/home/profit/lakehouse}" RUST_BIN="${RUST_BIN:-$RUST_REPO/target/release/parity_subject_audit}" GO_BIN="${GO_BIN:-./bin/subject_audit_helper}" AUDIT_DIR="${AUDIT_DIR:-$RUST_REPO/data/_catalog/subjects}" KEY_PATH="${KEY_PATH:-/tmp/lakehouse_audit/subject_audit.key}" OUT_DIR="reports/cutover/gauntlet_2026-05-02/parity" mkdir -p "$OUT_DIR" OUT="$OUT_DIR/subject_audit_parity.md" export PATH="$PATH:/usr/local/go/bin" # ── Build / verify both sides ─────────────────────────────────────── if [ ! -x "$RUST_BIN" ]; then echo "[subject-audit-parity] building Rust helper..." (cd "$RUST_REPO" && cargo build -p catalogd --bin parity_subject_audit --release 2>&1 | tail -3) fi if [ ! -x "$RUST_BIN" ]; then echo "[subject-audit-parity] SKIP: $RUST_BIN missing" exit 0 fi if [ ! -x "$GO_BIN" ]; then echo "[subject-audit-parity] building Go helper..." go build -o "$GO_BIN" ./scripts/cutover/parity/subject_audit_helper/ fi if [ ! -x "$GO_BIN" ]; then echo "[subject-audit-parity] FAIL: $GO_BIN missing after build" exit 1 fi # ── Report header ─────────────────────────────────────────────────── { echo "# subject_audit_parity" echo echo "**Generated:** $(date -u '+%Y-%m-%d %H:%M:%S UTC')" echo "**Spec:** /home/profit/lakehouse/docs/specs/SUBJECT_MANIFESTS_ON_CATALOGD.md §5 Step 8" echo "**Rust helper:** \`$RUST_BIN\`" echo "**Go helper:** \`$GO_BIN\`" echo "**Audit dir:** \`$AUDIT_DIR\`" echo } > "$OUT" PASS=0 FAIL=0 # ── Phase 1 — Known-answer vector ─────────────────────────────────── { echo "## Phase 1 — Known-answer vector" echo echo "Hardcoded fixture row, identical inputs, byte-compare canonical-JSON + HMAC." echo } >> "$OUT" RUST_KA="$(mktemp)"; GO_KA="$(mktemp)" "$RUST_BIN" --known-answer > "$RUST_KA" 2>&1 || true "$GO_BIN" --known-answer > "$GO_KA" 2>&1 || true if diff -q "$RUST_KA" "$GO_KA" >/dev/null 2>&1; then PASS=$((PASS+1)) { echo "**MATCH** ✓" echo echo '```json' cat "$RUST_KA" echo '```' echo } >> "$OUT" else FAIL=$((FAIL+1)) { echo "**MISMATCH** ✗" echo echo "### Rust" echo '```json' cat "$RUST_KA" echo '```' echo echo "### Go" echo '```json' cat "$GO_KA" echo '```' echo echo "### Diff" echo '```diff' diff "$RUST_KA" "$GO_KA" || true echo '```' echo } >> "$OUT" fi # ── Phase 2 — Real production audit logs ──────────────────────────── { echo "## Phase 2 — Real production audit logs" echo echo "Every \`*.audit.jsonl\` in \`$AUDIT_DIR\` verified by both runtimes." echo } >> "$OUT" if [ ! -r "$KEY_PATH" ]; then { echo "**SKIP** — signing key not readable at \`$KEY_PATH\`." echo "Set \`KEY_PATH=...\` or seed /tmp/lakehouse_audit/subject_audit.key (see systemd unit)." echo } >> "$OUT" else shopt -s nullglob LOGS=( "$AUDIT_DIR"/*.audit.jsonl ) if [ "${#LOGS[@]}" -eq 0 ]; then { echo "**SKIP** — no \`*.audit.jsonl\` files under \`$AUDIT_DIR\`." echo "(Trigger one by hitting the gateway with /v1/validate on a candidate_id.)" echo } >> "$OUT" else { echo "| Audit log | Rust verified | Go verified | Result |" echo "|---|---|---|---|" } >> "$OUT" for log in "${LOGS[@]}"; do label="$(basename "$log")" RUST_OUT="$(mktemp)"; GO_OUT="$(mktemp)" "$RUST_BIN" --verify "$log" --key "$KEY_PATH" > "$RUST_OUT" 2>&1 || true "$GO_BIN" --verify "$log" --key "$KEY_PATH" > "$GO_OUT" 2>&1 || true rust_count=$(jq -r '.count // 0' < "$RUST_OUT" 2>/dev/null || echo "?") go_count=$(jq -r '.count // 0' < "$GO_OUT" 2>/dev/null || echo "?") rust_ok=$(jq -r '.verified // false' < "$RUST_OUT" 2>/dev/null || echo "?") go_ok=$(jq -r '.verified // false' < "$GO_OUT" 2>/dev/null || echo "?") if diff -q "$RUST_OUT" "$GO_OUT" >/dev/null 2>&1; then PASS=$((PASS+1)) echo "| \`$label\` | $rust_count rows ($rust_ok) | $go_count rows ($go_ok) | **MATCH** ✓ |" >> "$OUT" else FAIL=$((FAIL+1)) { echo "| \`$label\` | $rust_count rows ($rust_ok) | $go_count rows ($go_ok) | **MISMATCH** ✗ |" echo echo "### Diff for \`$label\`" echo '```diff' diff "$RUST_OUT" "$GO_OUT" || true echo '```' echo } >> "$OUT" fi rm -f "$RUST_OUT" "$GO_OUT" done fi fi rm -f "$RUST_KA" "$GO_KA" # ── Summary ───────────────────────────────────────────────────────── TOTAL=$((PASS + FAIL)) { echo echo "## Summary" echo echo "**$PASS / $TOTAL** parity assertions passed." echo if [ "$FAIL" -gt 0 ]; then echo "**Status: DIVERGED** — Rust and Go disagree on at least one canonical-JSON or HMAC computation." echo "Investigate the diff above before declaring cross-runtime parity." else echo "**Status: PARITY** — every Rust assertion matches Go byte-for-byte." fi } >> "$OUT" echo "[subject-audit-parity] $PASS / $TOTAL pass — report: $OUT" [ "$FAIL" -eq 0 ]