golangLAKEHOUSE/scripts/cutover/parity/subject_audit_parity.sh

#!/usr/bin/env bash
# subject_audit_parity — verify Rust and Go produce byte-identical
# canonical JSON + HMAC-SHA256 chain hashes for subject audit logs.
#
# Why: the SubjectManifest + audit-log substrate (Rust crates/catalogd
# subject_audit.rs) is consumed by the legal-tier endpoint /audit/subject/{id}
# AND by the Go internal/catalogd reader. A canonical-JSON drift between
# the two runtimes would mean a chain written by Rust does not verify
# under Go (or worse, verifies as tampered) — silently breaking the
# defensible-audit guarantee.
#
# The probe runs in two phases:
#
#   Phase 1 — Known-answer vector
#     Both helpers serialize the same hardcoded SubjectAuditRow,
#     emit canonical bytes + HMAC. Outputs MUST be byte-identical.
#     This catches algorithm drift independent of any real data.
#
#   Phase 2 — Real production audit logs
#     Walk every *.audit.jsonl in the live data dir. Run BOTH helpers
#     against each one with the production signing key. Outputs MUST
#     be byte-identical. This catches real-world drift (e.g. struct
#     field tag mismatches that only fire on certain field values).
#
# Outputs: reports/cutover/gauntlet_2026-05-02/parity/subject_audit_parity.md
#
# Env overrides:
#   RUST_REPO=/home/profit/lakehouse
#   RUST_BIN=$RUST_REPO/target/release/parity_subject_audit
#   GO_BIN=./bin/subject_audit_helper
#   AUDIT_DIR=$RUST_REPO/data/_catalog/subjects
#   KEY_PATH=/tmp/lakehouse_audit/subject_audit.key

set -uo pipefail
cd "$(dirname "$0")/../../.."

RUST_REPO="${RUST_REPO:-/home/profit/lakehouse}"
RUST_BIN="${RUST_BIN:-$RUST_REPO/target/release/parity_subject_audit}"
GO_BIN="${GO_BIN:-./bin/subject_audit_helper}"
AUDIT_DIR="${AUDIT_DIR:-$RUST_REPO/data/_catalog/subjects}"
KEY_PATH="${KEY_PATH:-/tmp/lakehouse_audit/subject_audit.key}"

OUT_DIR="reports/cutover/gauntlet_2026-05-02/parity"
mkdir -p "$OUT_DIR"
OUT="$OUT_DIR/subject_audit_parity.md"

export PATH="$PATH:/usr/local/go/bin"

# ── Build / verify both sides ───────────────────────────────────────
if [ ! -x "$RUST_BIN" ]; then
  echo "[subject-audit-parity] building Rust helper..."
  (cd "$RUST_REPO" && cargo build -p catalogd --bin parity_subject_audit --release 2>&1 | tail -3)
fi
if [ ! -x "$RUST_BIN" ]; then
  echo "[subject-audit-parity] SKIP: $RUST_BIN missing"
  exit 0
fi

if [ ! -x "$GO_BIN" ]; then
  echo "[subject-audit-parity] building Go helper..."
  go build -o "$GO_BIN" ./scripts/cutover/parity/subject_audit_helper/
fi
if [ ! -x "$GO_BIN" ]; then
  echo "[subject-audit-parity] FAIL: $GO_BIN missing after build"
  exit 1
fi

# ── Report header ───────────────────────────────────────────────────
{
  echo "# subject_audit_parity"
  echo
  echo "**Generated:** $(date -u '+%Y-%m-%d %H:%M:%S UTC')"
  echo "**Spec:** /home/profit/lakehouse/docs/specs/SUBJECT_MANIFESTS_ON_CATALOGD.md §5 Step 8"
  echo "**Rust helper:** \`$RUST_BIN\`"
  echo "**Go helper:** \`$GO_BIN\`"
  echo "**Audit dir:** \`$AUDIT_DIR\`"
  echo
} > "$OUT"

PASS=0
FAIL=0

# ── Phase 1 — Known-answer vector ───────────────────────────────────
{
  echo "## Phase 1 — Known-answer vector"
  echo
  echo "Hardcoded fixture row, identical inputs, byte-compare canonical-JSON + HMAC."
  echo
} >> "$OUT"

RUST_KA="$(mktemp)"; GO_KA="$(mktemp)"
"$RUST_BIN" --known-answer > "$RUST_KA" 2>&1 || true
"$GO_BIN" --known-answer > "$GO_KA" 2>&1 || true

if diff -q "$RUST_KA" "$GO_KA" >/dev/null 2>&1; then
  PASS=$((PASS+1))
  {
    echo "**MATCH** ✓"
    echo
    echo '```json'
    cat "$RUST_KA"
    echo '```'
    echo
  } >> "$OUT"
else
  FAIL=$((FAIL+1))
  {
    echo "**MISMATCH** ✗"
    echo
    echo "### Rust"
    echo '```json'
    cat "$RUST_KA"
    echo '```'
    echo
    echo "### Go"
    echo '```json'
    cat "$GO_KA"
    echo '```'
    echo
    echo "### Diff"
    echo '```diff'
    diff "$RUST_KA" "$GO_KA" || true
    echo '```'
    echo
  } >> "$OUT"
fi

# ── Phase 2 — Real production audit logs ────────────────────────────
{
  echo "## Phase 2 — Real production audit logs"
  echo
  echo "Every \`*.audit.jsonl\` in \`$AUDIT_DIR\` verified by both runtimes."
  echo
} >> "$OUT"

if [ ! -r "$KEY_PATH" ]; then
  {
    echo "**SKIP** — signing key not readable at \`$KEY_PATH\`."
    echo "Set \`KEY_PATH=...\` or seed /tmp/lakehouse_audit/subject_audit.key (see systemd unit)."
    echo
  } >> "$OUT"
else
  shopt -s nullglob
  LOGS=( "$AUDIT_DIR"/*.audit.jsonl )
  if [ "${#LOGS[@]}" -eq 0 ]; then
    {
      echo "**SKIP** — no \`*.audit.jsonl\` files under \`$AUDIT_DIR\`."
      echo "(Trigger one by hitting the gateway with /v1/validate on a candidate_id.)"
      echo
    } >> "$OUT"
  else
    {
      echo "| Audit log | Rust verified | Go verified | Result |"
      echo "|---|---|---|---|"
    } >> "$OUT"
    for log in "${LOGS[@]}"; do
      label="$(basename "$log")"
      RUST_OUT="$(mktemp)"; GO_OUT="$(mktemp)"
      "$RUST_BIN" --verify "$log" --key "$KEY_PATH" > "$RUST_OUT" 2>&1 || true
      "$GO_BIN" --verify "$log" --key "$KEY_PATH" > "$GO_OUT" 2>&1 || true
      rust_count=$(jq -r '.count // 0' < "$RUST_OUT" 2>/dev/null || echo "?")
      go_count=$(jq -r '.count // 0' < "$GO_OUT" 2>/dev/null || echo "?")
      rust_ok=$(jq -r '.verified // false' < "$RUST_OUT" 2>/dev/null || echo "?")
      go_ok=$(jq -r '.verified // false' < "$GO_OUT" 2>/dev/null || echo "?")
      if diff -q "$RUST_OUT" "$GO_OUT" >/dev/null 2>&1; then
        PASS=$((PASS+1))
        echo "| \`$label\` | $rust_count rows ($rust_ok) | $go_count rows ($go_ok) | **MATCH** ✓ |" >> "$OUT"
      else
        FAIL=$((FAIL+1))
        {
          echo "| \`$label\` | $rust_count rows ($rust_ok) | $go_count rows ($go_ok) | **MISMATCH** ✗ |"
          echo
          echo "### Diff for \`$label\`"
          echo '```diff'
          diff "$RUST_OUT" "$GO_OUT" || true
          echo '```'
          echo
        } >> "$OUT"
      fi
      rm -f "$RUST_OUT" "$GO_OUT"
    done
  fi
fi
rm -f "$RUST_KA" "$GO_KA"

# ── Summary ─────────────────────────────────────────────────────────
TOTAL=$((PASS + FAIL))
{
  echo
  echo "## Summary"
  echo
  echo "**$PASS / $TOTAL** parity assertions passed."
  echo
  if [ "$FAIL" -gt 0 ]; then
    echo "**Status: DIVERGED** — Rust and Go disagree on at least one canonical-JSON or HMAC computation."
    echo "Investigate the diff above before declaring cross-runtime parity."
  else
    echo "**Status: PARITY** — every Rust assertion matches Go byte-for-byte."
  fi
} >> "$OUT"

echo "[subject-audit-parity] $PASS / $TOTAL pass — report: $OUT"
[ "$FAIL" -eq 0 ]