#!/usr/bin/env bash # session_log_parity — verify Rust gateway and Go validatord write # byte-equivalent SessionRecord JSONL rows from identical input. # # Why: the longitudinal session log (one row per /v1/iterate session) # is the offline-analysis layer. Cross-runtime drift means a DuckDB # query that works against one log fails against the other — the # whole "unified longitudinal view" theory unravels. This probe is # the regression gate. # # Approach (pure schema check, no live daemon needed): # 1. Build both runtimes' session_log writers as standalone helper # binaries that take a fixture JSON on stdin and emit one row. # 2. Feed identical fixture inputs through each. # 3. Diff the resulting rows after normalizing the few fields that # MUST differ (timestamp, daemon). # # Outputs: reports/cutover/gauntlet_2026-05-02/parity/session_log_parity.md # # Exit 0 on schema match; exit 1 on drift. set -uo pipefail cd "$(dirname "$0")/../../.." RUST_REPO="${RUST_REPO:-/home/profit/lakehouse}" RUST_BIN="${RUST_BIN:-$RUST_REPO/target/release/parity_session_log}" GO_BIN="${GO_BIN:-./bin/parity_session_log_go}" OUT_DIR="reports/cutover/gauntlet_2026-05-02/parity" mkdir -p "$OUT_DIR" OUT="$OUT_DIR/session_log_parity.md" export PATH="$PATH:/usr/local/go/bin" # ── Build both helpers ───────────────────────────────────────────── go build -o "$GO_BIN" ./scripts/cutover/parity/session_log_helper if [ ! -x "$RUST_BIN" ]; then echo "[session-log-parity] building Rust helper..." (cd "$RUST_REPO" && cargo build -p gateway --bin parity_session_log --release 2>&1 | tail -3) fi if [ ! -x "$RUST_BIN" ]; then echo "[session-log-parity] SKIP: $RUST_BIN missing" exit 0 fi # ── Fixtures ─────────────────────────────────────────────────────── # Parallel arrays — labels and fixtures keyed by index. Avoids a # separator-in-string scheme (the prior `||` form silently injected # a `|` character into every fixture body, which both helpers then # rejected with empty output → trivially-equal "match" — caught # during initial probe authoring 2026-05-02). LABELS=( "accepted_grounded" "max_iter_exhausted" "infra_error" "unicode_in_prompt" ) FIXTURES=( '{"session_id":"trace-1","kind":"fill","model":"qwen3.5:latest","provider":"ollama","prompt":"produce a fill","iterations":1,"max_iterations":3,"final_verdict":"accepted","attempts":[{"iteration":0,"verdict_kind":"accepted","span_id":"sp-0"}],"artifact":{"fills":[{"candidate_id":"W-1"}]},"grounded_in_roster":true,"duration_ms":50}' '{"session_id":"trace-2","kind":"fill","model":"qwen3.5:latest","provider":"ollama","prompt":"P","iterations":3,"max_iterations":3,"final_verdict":"max_iter_exhausted","attempts":[{"iteration":0,"verdict_kind":"validation_failed","error":"phantom W-X","span_id":"sp-a"},{"iteration":1,"verdict_kind":"validation_failed","error":"phantom W-Y","span_id":"sp-b"},{"iteration":2,"verdict_kind":"no_json","span_id":"sp-c"}],"duration_ms":3200}' '{"session_id":"trace-3","kind":"playbook","model":"qwen","provider":"ollama","prompt":"P","iterations":0,"max_iterations":3,"final_verdict":"infra_error","attempts":[{"iteration":0,"verdict_kind":"infra_error","error":"connection refused"}],"duration_ms":12}' '{"session_id":"trace-4","kind":"playbook","model":"qwen","provider":"ollama","prompt":"Café résumé ⭐ 你好","iterations":1,"max_iterations":3,"final_verdict":"accepted","attempts":[{"iteration":0,"verdict_kind":"accepted","span_id":"sp-u"}],"artifact":{"endorsed_names":["W-1"],"operation":"fill: X x1 in A, B","fingerprint":"abc"},"duration_ms":80}' ) normalize() { # Strip the few fields that MUST differ between Rust+Go runs # (timestamp = now; daemon = producer name). Sort the keys so the # JSON serialization order doesn't break a byte-equal diff. jq -cS 'del(.timestamp) | del(.daemon)' <<<"$1" } TOTAL=0; MATCH=0; DIFF=0 DIFF_DETAIL="" for i in "${!FIXTURES[@]}"; do label="${LABELS[$i]}" fixture="${FIXTURES[$i]}" TOTAL=$((TOTAL+1)) rust_row=$(printf '%s' "$fixture" | "$RUST_BIN" 2>&1 || echo '{"error":"rust_helper_failed"}') go_row=$(printf '%s' "$fixture" | "$GO_BIN" 2>&1 || echo '{"error":"go_helper_failed"}') # Refuse trivial-equal-empty matches: if either side produced no # parseable JSON at all, mark this as a diff rather than a match. if ! echo "$rust_row" | jq -e . >/dev/null 2>&1; then rust_norm="" else rust_norm=$(normalize "$rust_row") fi if ! echo "$go_row" | jq -e . >/dev/null 2>&1; then go_norm="" else go_norm=$(normalize "$go_row") fi if [ "$rust_norm" = "$go_norm" ]; then MATCH=$((MATCH+1)) else DIFF=$((DIFF+1)) DIFF_DETAIL="$DIFF_DETAIL"$'\n\n'"### $label"$'\n\n''**Rust:**'$'\n''```json'$'\n'"$rust_norm"$'\n''```'$'\n\n''**Go:**'$'\n''```json'$'\n'"$go_norm"$'\n''```' fi done # ── Report ───────────────────────────────────────────────────────── { echo "# session_log parity probe — Rust gateway vs Go validatord" echo echo "**Date:** $(date -u +%Y-%m-%dT%H:%M:%SZ)" echo "**Rust helper:** \`$RUST_BIN\`" echo "**Go helper:** \`$GO_BIN\`" echo echo "Identical fixture inputs through each runtime's" echo "\`SessionRecord\` builder + JSON marshaler. Match = byte-equal" echo "after stripping \`timestamp\` (per-run wall clock) + \`daemon\`" echo "(\"gateway\" on Rust side, \"validatord\" on Go side; both are" echo "valid producers in the same longitudinal log)." echo echo "**Tally:** $MATCH match · $DIFF diff (out of $TOTAL fixtures)" if [ -n "$DIFF_DETAIL" ]; then echo echo "## Divergences" echo "$DIFF_DETAIL" else echo echo "_No divergences — schema parity holds across all fixtures._" fi } > "$OUT" echo "[parity] session_log: $MATCH match / $DIFF diff (out of $TOTAL) → $OUT" [ "$DIFF" -eq 0 ]