Caught by the audit rerun: with cache-warm binaries, 04 fires its
first SELECT faster than queryd's 500ms refresh tick — Q1 returned
400 ("table not found") even though 03_ingest had registered the
manifest. Subsequent queries (after the next tick) succeeded.
This is an eventual-consistency wait, not a retry — queryd's
contract is that views appear within one tick of catalogd having the
manifest. Production code does not need changing.
Added to lib/http.sh:
proof_wait_for_sql <budget_sec> <sql>
polls a SQL probe until it returns 200 or budget elapses; emits
no evidence (test setup, not a claim).
Used in 04_query_correctness:
Wait up to 5s for queryd to have the view before running the 5
SQL assertions. Skip-with-loud-reason if the view never appears.
Verified: integration mode back to 104 pass / 0 fail / 1 skip after
fix. The skip is the unchanged GOLAKE-085 informational record.
This is exactly the kind of finding the harness was designed to
surface — the regression existed in the codebase the moment Phase D
shipped, but only fired when the next compare run hit cache-warm
timing. Without the harness, it would have surfaced on a CI run
weeks from now and been hard to bisect.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
79 lines
3.3 KiB
Bash
Executable File
79 lines
3.3 KiB
Bash
Executable File
#!/usr/bin/env bash
|
|
# 04_query_correctness.sh — GOLAKE-040.
|
|
# Runs the 5 SQL assertions from fixtures/expected/queries.json against
|
|
# the workers dataset ingested by 03_ingest_csv_to_parquet. Each query
|
|
# is recorded with full evidence; this case is the canonical "does the
|
|
# SQL path return correct results" claim.
|
|
|
|
set -uo pipefail
|
|
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
|
source "${SCRIPT_DIR}/../lib/env.sh"
|
|
source "${SCRIPT_DIR}/../lib/http.sh"
|
|
source "${SCRIPT_DIR}/../lib/assert.sh"
|
|
|
|
CASE_ID="GOLAKE-040"
|
|
CASE_NAME="Query correctness — 5 SQL assertions on workers fixture"
|
|
CASE_TYPE="integration"
|
|
if [ "${1:-}" = "--metadata-only" ]; then return 0 2>/dev/null || exit 0; fi
|
|
|
|
DATASET="proof_workers"
|
|
EXPECTED_FILE="${PROOF_REPO_ROOT}/tests/proof/fixtures/expected/queries.json"
|
|
|
|
# Spec's SQL fixtures use unquoted table name "workers" but ingestd
|
|
# registers under whatever ?name= we passed in 03 — proof_workers.
|
|
# Substitute on the fly so the queries still reference the right view.
|
|
substitute_table() {
|
|
sed "s/FROM workers/FROM ${DATASET}/g; s/from workers/from ${DATASET}/g"
|
|
}
|
|
|
|
# Wait for queryd to have the view from 03's ingest. queryd refreshes
|
|
# every 500ms (proof override of the 30s prod default); on cache-warm
|
|
# runs cases fire faster than the next tick. Up to 5s budget.
|
|
if ! proof_wait_for_sql 5 "SELECT 1 FROM ${DATASET} LIMIT 0"; then
|
|
proof_skip "$CASE_ID" "queryd view ${DATASET} never appeared in 5s" \
|
|
"queryd refresh ticker may be stalled or 03_ingest registration failed"
|
|
return 0 2>/dev/null || exit 0
|
|
fi
|
|
|
|
# Iterate the 5 queries.
|
|
n=$(jq '.queries | length' "$EXPECTED_FILE")
|
|
for i in $(seq 0 $((n-1))); do
|
|
qid=$(jq -r ".queries[$i].id" "$EXPECTED_FILE")
|
|
qclaim=$(jq -r ".queries[$i].claim" "$EXPECTED_FILE")
|
|
qsql=$(jq -r ".queries[$i].sql" "$EXPECTED_FILE" | substitute_table)
|
|
# Each expected key/value drives one assertion.
|
|
expected_keys=$(jq -r ".queries[$i].expected | keys[]" "$EXPECTED_FILE")
|
|
|
|
# Build a minimal JSON body — escape the SQL via jq.
|
|
body=$(jq -nc --arg sql "$qsql" '{sql:$sql}')
|
|
|
|
proof_post "$CASE_ID" "${qid}_query" \
|
|
"${PROOF_GATEWAY_URL}/v1/sql" \
|
|
"application/json" "$body" >/dev/null
|
|
|
|
qstatus=$(proof_status_of "$CASE_ID" "${qid}_query")
|
|
proof_assert_eq "$CASE_ID" "${qid}: ${qclaim} — query status 200" \
|
|
"200" "$qstatus"
|
|
|
|
# Skip the value assertions if the query failed.
|
|
if [ "$qstatus" != "200" ]; then continue; fi
|
|
|
|
qbody="${PROOF_REPORT_DIR}/raw/http/${CASE_ID}/${qid}_query.body"
|
|
|
|
# queryd response shape: {columns: [{name,type}], rows: [[...]], row_count: N}
|
|
# We compare each expected key against the value at column index for
|
|
# that key in row 0.
|
|
for ek in $expected_keys; do
|
|
expected=$(jq -r ".queries[$i].expected.\"$ek\"" "$EXPECTED_FILE")
|
|
# Find the column index for $ek in the response, then read row[0][idx].
|
|
col_idx=$(jq -r --arg n "$ek" '.columns | map(.name) | index($n)' "$qbody")
|
|
if [ "$col_idx" = "null" ]; then
|
|
_proof_record "$CASE_ID" "${qid}: column ${ek} present in response" \
|
|
fail "${ek}" "<missing>" "column not found in response"
|
|
continue
|
|
fi
|
|
actual=$(jq -r ".rows[0][$col_idx]" "$qbody")
|
|
proof_assert_eq "$CASE_ID" "${qid}: ${qclaim}" "$expected" "$actual"
|
|
done
|
|
done
|