Companion to lakehouse commit 57bde63 (Rust gateway gains
trace-id propagation + coordinator session JSONL). The
cross-runtime parity probe is the regression gate that prevents
silent schema drift between the two runtimes.
scripts/cutover/parity/session_log_parity.sh:
- 4 fixtures (accepted_grounded, max_iter_exhausted, infra_error,
unicode_in_prompt) feed identical input to both helpers
- jq -e validity gate + non-trivial-equal guard prevents the
"both sides fail identically → spurious match" failure mode
(caught one IFS='||' bug during initial authoring — recorded
in the script comment)
- normalize() strips timestamp + daemon (legitimate per-producer
differences); everything else must be byte-equal
- Result: 4/4 fixtures match, including unicode
scripts/cutover/parity/session_log_helper/main.go:
- Tiny stdin/stdout Go helper that round-trips a fixture
through validator.SessionRecord serde
- Counterpart to crates/gateway/src/bin/parity_session_log.rs
docs/ARCHITECTURE_COMPARISON.md decisions tracker:
- "Rust observability parity" row added (DONE 2026-05-02)
- Cross-runtime probe documented as reusable gate
STATE_OF_PLAY refreshed.
Both observability pieces (trace-id propagation, session JSONL)
now exist on both runtimes. Operators who point Rust gateway and
Go validatord at the same session-log path get a unified
longitudinal stream queryable via DuckDB.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
128 lines
6.1 KiB
Bash
Executable File
128 lines
6.1 KiB
Bash
Executable File
#!/usr/bin/env bash
|
|
# session_log_parity — verify Rust gateway and Go validatord write
|
|
# byte-equivalent SessionRecord JSONL rows from identical input.
|
|
#
|
|
# Why: the longitudinal session log (one row per /v1/iterate session)
|
|
# is the offline-analysis layer. Cross-runtime drift means a DuckDB
|
|
# query that works against one log fails against the other — the
|
|
# whole "unified longitudinal view" theory unravels. This probe is
|
|
# the regression gate.
|
|
#
|
|
# Approach (pure schema check, no live daemon needed):
|
|
# 1. Build both runtimes' session_log writers as standalone helper
|
|
# binaries that take a fixture JSON on stdin and emit one row.
|
|
# 2. Feed identical fixture inputs through each.
|
|
# 3. Diff the resulting rows after normalizing the few fields that
|
|
# MUST differ (timestamp, daemon).
|
|
#
|
|
# Outputs: reports/cutover/gauntlet_2026-05-02/parity/session_log_parity.md
|
|
#
|
|
# Exit 0 on schema match; exit 1 on drift.
|
|
|
|
set -uo pipefail
|
|
cd "$(dirname "$0")/../../.."
|
|
|
|
RUST_REPO="${RUST_REPO:-/home/profit/lakehouse}"
|
|
RUST_BIN="${RUST_BIN:-$RUST_REPO/target/release/parity_session_log}"
|
|
GO_BIN="${GO_BIN:-./bin/parity_session_log_go}"
|
|
OUT_DIR="reports/cutover/gauntlet_2026-05-02/parity"
|
|
mkdir -p "$OUT_DIR"
|
|
OUT="$OUT_DIR/session_log_parity.md"
|
|
|
|
export PATH="$PATH:/usr/local/go/bin"
|
|
|
|
# ── Build both helpers ─────────────────────────────────────────────
|
|
go build -o "$GO_BIN" ./scripts/cutover/parity/session_log_helper
|
|
|
|
if [ ! -x "$RUST_BIN" ]; then
|
|
echo "[session-log-parity] building Rust helper..."
|
|
(cd "$RUST_REPO" && cargo build -p gateway --bin parity_session_log --release 2>&1 | tail -3)
|
|
fi
|
|
if [ ! -x "$RUST_BIN" ]; then
|
|
echo "[session-log-parity] SKIP: $RUST_BIN missing"
|
|
exit 0
|
|
fi
|
|
|
|
# ── Fixtures ───────────────────────────────────────────────────────
|
|
# Parallel arrays — labels and fixtures keyed by index. Avoids a
|
|
# separator-in-string scheme (the prior `||` form silently injected
|
|
# a `|` character into every fixture body, which both helpers then
|
|
# rejected with empty output → trivially-equal "match" — caught
|
|
# during initial probe authoring 2026-05-02).
|
|
LABELS=(
|
|
"accepted_grounded"
|
|
"max_iter_exhausted"
|
|
"infra_error"
|
|
"unicode_in_prompt"
|
|
)
|
|
FIXTURES=(
|
|
'{"session_id":"trace-1","kind":"fill","model":"qwen3.5:latest","provider":"ollama","prompt":"produce a fill","iterations":1,"max_iterations":3,"final_verdict":"accepted","attempts":[{"iteration":0,"verdict_kind":"accepted","span_id":"sp-0"}],"artifact":{"fills":[{"candidate_id":"W-1"}]},"grounded_in_roster":true,"duration_ms":50}'
|
|
'{"session_id":"trace-2","kind":"fill","model":"qwen3.5:latest","provider":"ollama","prompt":"P","iterations":3,"max_iterations":3,"final_verdict":"max_iter_exhausted","attempts":[{"iteration":0,"verdict_kind":"validation_failed","error":"phantom W-X","span_id":"sp-a"},{"iteration":1,"verdict_kind":"validation_failed","error":"phantom W-Y","span_id":"sp-b"},{"iteration":2,"verdict_kind":"no_json","span_id":"sp-c"}],"duration_ms":3200}'
|
|
'{"session_id":"trace-3","kind":"playbook","model":"qwen","provider":"ollama","prompt":"P","iterations":0,"max_iterations":3,"final_verdict":"infra_error","attempts":[{"iteration":0,"verdict_kind":"infra_error","error":"connection refused"}],"duration_ms":12}'
|
|
'{"session_id":"trace-4","kind":"playbook","model":"qwen","provider":"ollama","prompt":"Café résumé ⭐ 你好","iterations":1,"max_iterations":3,"final_verdict":"accepted","attempts":[{"iteration":0,"verdict_kind":"accepted","span_id":"sp-u"}],"artifact":{"endorsed_names":["W-1"],"operation":"fill: X x1 in A, B","fingerprint":"abc"},"duration_ms":80}'
|
|
)
|
|
|
|
normalize() {
|
|
# Strip the few fields that MUST differ between Rust+Go runs
|
|
# (timestamp = now; daemon = producer name). Sort the keys so the
|
|
# JSON serialization order doesn't break a byte-equal diff.
|
|
jq -cS 'del(.timestamp) | del(.daemon)' <<<"$1"
|
|
}
|
|
|
|
TOTAL=0; MATCH=0; DIFF=0
|
|
DIFF_DETAIL=""
|
|
|
|
for i in "${!FIXTURES[@]}"; do
|
|
label="${LABELS[$i]}"
|
|
fixture="${FIXTURES[$i]}"
|
|
TOTAL=$((TOTAL+1))
|
|
rust_row=$(printf '%s' "$fixture" | "$RUST_BIN" 2>&1 || echo '{"error":"rust_helper_failed"}')
|
|
go_row=$(printf '%s' "$fixture" | "$GO_BIN" 2>&1 || echo '{"error":"go_helper_failed"}')
|
|
# Refuse trivial-equal-empty matches: if either side produced no
|
|
# parseable JSON at all, mark this as a diff rather than a match.
|
|
if ! echo "$rust_row" | jq -e . >/dev/null 2>&1; then
|
|
rust_norm="<rust helper failed: $(echo "$rust_row" | head -c 200)>"
|
|
else
|
|
rust_norm=$(normalize "$rust_row")
|
|
fi
|
|
if ! echo "$go_row" | jq -e . >/dev/null 2>&1; then
|
|
go_norm="<go helper failed: $(echo "$go_row" | head -c 200)>"
|
|
else
|
|
go_norm=$(normalize "$go_row")
|
|
fi
|
|
if [ "$rust_norm" = "$go_norm" ]; then
|
|
MATCH=$((MATCH+1))
|
|
else
|
|
DIFF=$((DIFF+1))
|
|
DIFF_DETAIL="$DIFF_DETAIL"$'\n\n'"### $label"$'\n\n''**Rust:**'$'\n''```json'$'\n'"$rust_norm"$'\n''```'$'\n\n''**Go:**'$'\n''```json'$'\n'"$go_norm"$'\n''```'
|
|
fi
|
|
done
|
|
|
|
# ── Report ─────────────────────────────────────────────────────────
|
|
{
|
|
echo "# session_log parity probe — Rust gateway vs Go validatord"
|
|
echo
|
|
echo "**Date:** $(date -u +%Y-%m-%dT%H:%M:%SZ)"
|
|
echo "**Rust helper:** \`$RUST_BIN\`"
|
|
echo "**Go helper:** \`$GO_BIN\`"
|
|
echo
|
|
echo "Identical fixture inputs through each runtime's"
|
|
echo "\`SessionRecord\` builder + JSON marshaler. Match = byte-equal"
|
|
echo "after stripping \`timestamp\` (per-run wall clock) + \`daemon\`"
|
|
echo "(\"gateway\" on Rust side, \"validatord\" on Go side; both are"
|
|
echo "valid producers in the same longitudinal log)."
|
|
echo
|
|
echo "**Tally:** $MATCH match · $DIFF diff (out of $TOTAL fixtures)"
|
|
if [ -n "$DIFF_DETAIL" ]; then
|
|
echo
|
|
echo "## Divergences"
|
|
echo "$DIFF_DETAIL"
|
|
else
|
|
echo
|
|
echo "_No divergences — schema parity holds across all fixtures._"
|
|
fi
|
|
} > "$OUT"
|
|
|
|
echo "[parity] session_log: $MATCH match / $DIFF diff (out of $TOTAL) → $OUT"
|
|
[ "$DIFF" -eq 0 ]
|