#!/usr/bin/env bash # extract_json_parity — feed identical model-output strings through # both Rust extract_json AND Go ExtractJSON; diff outputs. # # Why: the iteration loop's correctness hinges on extract_json finding # the same JSON object in the same model output regardless of runtime. # A divergence here means a model output that one runtime accepts and # the other rejects (or worse, both accept but parse differently). # # Approach: # 1. Run cargo test -p gateway extract_json to assert the LIVE Rust # function still passes its own unit tests (substrate gate) # 2. For each fixture (input, label) tuple: # Rust: ./target/release/parity_extract_json < fixture # Go: ./bin/parity_extract_json_go < fixture # Compare {matched, value} JSON outputs # 3. Emit a markdown report with per-fixture matches/diffs # # Outputs: reports/cutover/gauntlet_2026-05-02/parity/extract_json_parity.md # # Env overrides: # RUST_REPO=/home/profit/lakehouse # RUST_BIN=$RUST_REPO/target/release/parity_extract_json set -uo pipefail cd "$(dirname "$0")/../../.." RUST_REPO="${RUST_REPO:-/home/profit/lakehouse}" RUST_BIN="${RUST_BIN:-$RUST_REPO/target/release/parity_extract_json}" GO_BIN="${GO_BIN:-./bin/parity_extract_json_go}" OUT_DIR="reports/cutover/gauntlet_2026-05-02/parity" mkdir -p "$OUT_DIR" OUT="$OUT_DIR/extract_json_parity.md" export PATH="$PATH:/usr/local/go/bin" # ── Build / verify both sides ─────────────────────────────────────── if [ ! -x "$RUST_BIN" ]; then echo "[extract-json-parity] building Rust helper..." (cd "$RUST_REPO" && cargo build -p gateway --bin parity_extract_json --release 2>&1 | tail -3) fi if [ ! -x "$RUST_BIN" ]; then echo "[extract-json-parity] SKIP: $RUST_BIN missing" exit 0 fi # Run live Rust unit tests (substrate gate) — ensures our helper # matches the production extract_json behavior. echo "[extract-json-parity] running cargo test extract_json (substrate gate)..." (cd "$RUST_REPO" && cargo test -p gateway --release extract_json 2>&1 | tail -8) > /tmp/rust_extract_test.log if ! grep -q "test result: ok" /tmp/rust_extract_test.log; then echo "[extract-json-parity] live Rust tests FAILED — aborting probe" cat /tmp/rust_extract_test.log exit 1 fi echo " ✓ live Rust extract_json tests PASS" # Build Go-side helper from internal/validator.ExtractJSON. go build -o "$GO_BIN" ./scripts/cutover/parity/extract_json_helper # ── Fixture set ───────────────────────────────────────────────────── # Inline as label||raw pairs. Curated to exercise every documented # branch: # - fenced ```json``` block # - fenced unlabeled ``` block # - bare-braces with stray prose # - first-balanced-of-many # - nested object # - unicode in string values # - escaped quotes # - empty object # - top-level array (both runtimes return first inner object) # - no JSON at all # - malformed JSON-shaped text (depth balanced but invalid syntax) # - very-large input (~10KB of prose around a tiny object) declare -a FIXTURES=( "fenced_json_block||Here's my answer: \`\`\`json {\"fills\":[{\"candidate_id\":\"W-1\"}]} \`\`\` Done." "fenced_unlabeled||result: \`\`\` {\"k\":\"v\"} \`\`\`" "bare_braces||Here you go: {\"fills\":[{\"candidate_id\":\"W-2\"}]}" "first_of_many||{\"a\":1} then {\"b\":2}" "nested||prefix {\"outer\":{\"inner\":[1,2,3]},\"x\":\"y\"} suffix" "unicode||{\"name\":\"Café résumé\",\"emoji\":\"⭐\"}" "escaped_quotes||{\"msg\":\"she said \\\"hello\\\"\"}" "empty_object||{}" "array_of_objects||[{\"a\":1},{\"b\":2}]" "no_json||just prose, no json" "depth_balanced_invalid||{not a key: still not}" "trailing_garbage||{\"k\":\"v\"} and then 5} more } stuff" ) TOTAL=0; MATCH=0; DIFF=0 DIFF_DETAIL="" for entry in "${FIXTURES[@]}"; do IFS='||' read -r label raw <<<"$entry" TOTAL=$((TOTAL+1)) rust_out=$(printf '%s' "$raw" | "$RUST_BIN" 2>/dev/null || echo "RUST_ERROR") go_out=$(printf '%s' "$raw" | "$GO_BIN" 2>/dev/null || echo "GO_ERROR") # Normalize JSON serialization (key order) before comparing. rust_norm=$(echo "$rust_out" | jq -cS . 2>/dev/null || echo "$rust_out") go_norm=$(echo "$go_out" | jq -cS . 2>/dev/null || echo "$go_out") if [ "$rust_norm" = "$go_norm" ]; then MATCH=$((MATCH+1)) else DIFF=$((DIFF+1)) raw_short=$(printf '%s' "$raw" | head -c 120 | tr '\n' ' ') DIFF_DETAIL="$DIFF_DETAIL"$'\n\n'"### $label"$'\n''**Input (first 120 chars):** `'"$raw_short"'`'$'\n\n''**Rust:**'$'\n''```json'$'\n'"$rust_norm"$'\n''```'$'\n\n''**Go:**'$'\n''```json'$'\n'"$go_norm"$'\n''```' fi done # ── Report ────────────────────────────────────────────────────────── { echo "# extract_json parity probe — Rust vs Go" echo echo "**Date:** $(date -u +%Y-%m-%dT%H:%M:%SZ)" echo "**Rust helper:** \`$RUST_BIN\` (links live \`gateway::v1::iterate::extract_json\`)" echo "**Go helper:** \`$GO_BIN\` (links live \`internal/validator.ExtractJSON\`)" echo echo "Identical model-output strings → both runtimes' \`extract_json\`." echo "Match = identical \`{matched, value}\` JSON output." echo echo "**Substrate gate:** \`cargo test -p gateway extract_json\` PASS before probe." echo echo "**Tally:** $MATCH match · $DIFF diff (out of $TOTAL fixtures)" if [ -n "$DIFF_DETAIL" ]; then echo echo "## Divergences" echo "$DIFF_DETAIL" else echo echo "_No divergences — extract_json parity holds across all fixtures._" fi } > "$OUT" echo "[parity] extract_json: $MATCH match / $DIFF diff (out of $TOTAL) → $OUT" [ "$DIFF" -eq 0 ]