#!/usr/bin/env bash # materializer_parity — run Bun + Go materializer against an identical # synthetic root, diff the resulting data/evidence/ JSONL files. # # This validates the parity claim from the 2026-05-02 port: "on-wire # JSON shape matches TS so Bun and Go runs are interchangeable." Any # divergence here is a finding the architecture comparison should # record (precedent: 2026-05-02 validator parity probe surfaced the # Rust serde-tagged-enum vs Go flat-struct error envelope gap). # # Approach: # 1. Set up a temp ROOT with a fixed source data/_kb/distilled_facts.jsonl # + observer_escalations.jsonl (small, every transform field exercised) # 2. Run Bun materializer: # bun run /home/profit/lakehouse/scripts/distillation/build_evidence_index.ts # against TS-side ROOT (TS expects a real lakehouse repo layout) # 3. Run Go materializer: # ./bin/materializer -root # against the same ROOT (after wiping evidence/ between runs) # 4. Diff the output JSONL files, normalized for non-deterministic # fields (provenance.recorded_at, ordering). # # Outputs: reports/cutover/gauntlet_2026-05-02/parity/materializer_parity.md # # Exit 0 = byte-equal (modulo timestamps); exit non-zero = drift. # # Env overrides: # RUST_REPO=/home/profit/lakehouse # Rust legacy repo # GO_BIN=./bin/materializer # Go binary (built per-call) set -uo pipefail cd "$(dirname "$0")/../../.." RUST_REPO="${RUST_REPO:-/home/profit/lakehouse}" GO_BIN="${GO_BIN:-./bin/materializer}" OUT_DIR="reports/cutover/gauntlet_2026-05-02/parity" mkdir -p "$OUT_DIR" OUT="$OUT_DIR/materializer_parity.md" # Build Go materializer fresh. export PATH="$PATH:/usr/local/go/bin" go build -o "$GO_BIN" ./cmd/materializer # Locate Bun. Skip with an explicit message if it's missing (CI without bun). if ! command -v bun >/dev/null 2>&1; then echo "[materializer-parity] SKIP: bun not on PATH" exit 0 fi # Confirm Rust-side materializer is present. TS_MAT="$RUST_REPO/scripts/distillation/build_evidence_index.ts" if [ ! -f "$TS_MAT" ]; then echo "[materializer-parity] SKIP: $TS_MAT not found" exit 0 fi ROOT="$(mktemp -d)" trap 'rm -rf "$ROOT"' EXIT INT TERM mkdir -p "$ROOT/data/_kb" # Synthetic distilled_facts — exercises the simplest transform shape. cat > "$ROOT/data/_kb/distilled_facts.jsonl" < "$ROOT/data/_kb/observer_escalations.jsonl" < /tmp/bun_mat.log 2>&1 || { echo "[materializer-parity] bun run failed:" tail -30 /tmp/bun_mat.log exit 1 } # ── Go run ───────────────────────────────────────────────────────── GO_ROOT="$ROOT/go_side" mkdir -p "$GO_ROOT/data/_kb" cp "$ROOT/data/_kb/"* "$GO_ROOT/data/_kb/" echo "[materializer-parity] running Go materializer..." "$GO_BIN" -root "$GO_ROOT" > /tmp/go_mat.log 2>&1 || { echo "[materializer-parity] go run failed:" tail -30 /tmp/go_mat.log exit 1 } # ── Find output day-partition ────────────────────────────────────── # Both runs use today's UTC date. Look up the partition. TODAY="$(date -u +%Y/%m/%d)" BUN_OUT="$BUN_ROOT/data/evidence/$TODAY" GO_OUT="$GO_ROOT/data/evidence/$TODAY" if [ ! -d "$BUN_OUT" ]; then echo "[materializer-parity] no Bun output dir: $BUN_OUT" ls -la "$BUN_ROOT/data/evidence" 2>/dev/null || true exit 1 fi if [ ! -d "$GO_OUT" ]; then echo "[materializer-parity] no Go output dir: $GO_OUT" exit 1 fi # ── Normalize + diff per source-stem ─────────────────────────────── # Stripped fields: # provenance.recorded_at — different per-run wall clock # # Sorted by sig_hash so dedup ordering can't matter. normalize() { jq -c -S 'del(.provenance.recorded_at)' "$1" 2>/dev/null \ | sort } TOTAL=0; MATCH=0; DIFF=0 DIFF_DETAIL="" for f in "$BUN_OUT"/*.jsonl; do stem=$(basename "$f" .jsonl) go_f="$GO_OUT/$stem.jsonl" TOTAL=$((TOTAL+1)) if [ ! -f "$go_f" ]; then DIFF=$((DIFF+1)) DIFF_DETAIL="$DIFF_DETAIL"$'\n'"- $stem: present in Bun, missing in Go" continue fi bun_norm=$(normalize "$f") go_norm=$(normalize "$go_f") if [ "$bun_norm" = "$go_norm" ]; then MATCH=$((MATCH+1)) else DIFF=$((DIFF+1)) # Capture a small diff for the report. diff_block="$(diff <(echo "$bun_norm") <(echo "$go_norm") | head -40)" DIFF_DETAIL="$DIFF_DETAIL"$'\n\n'"### $stem"$'\n''```diff'$'\n'"$diff_block"$'\n''```' fi done # ── Write report ─────────────────────────────────────────────────── { echo "# Materializer parity probe — Bun vs Go" echo echo "**Date:** $(date -u +%Y-%m-%dT%H:%M:%SZ)" echo "**Bun:** \`$TS_MAT\`" echo "**Go:** \`$GO_BIN\`" echo echo "Identical \`data/_kb/\` source → both runtimes' materializer." echo "Match = JSONL byte-equal after normalizing \`provenance.recorded_at\`" echo "(per-run wall clock) + sorted line order (dedup ordering)." echo echo "**Tally:** $MATCH match · $DIFF diff (out of $TOTAL stems)" if [ -n "$DIFF_DETAIL" ]; then echo echo "## Divergences" echo "$DIFF_DETAIL" else echo echo "_No divergences — on-wire JSON parity holds._" fi } > "$OUT" echo "[parity] materializer: $MATCH match / $DIFF diff (out of $TOTAL) → $OUT" [ "$DIFF" -eq 0 ]