golangLAKEHOUSE/tests/proof/cases/04_query_correctness.sh

#!/usr/bin/env bash
# 04_query_correctness.sh — GOLAKE-040.
# Runs the 5 SQL assertions from fixtures/expected/queries.json against
# the workers dataset ingested by 03_ingest_csv_to_parquet. Each query
# is recorded with full evidence; this case is the canonical "does the
# SQL path return correct results" claim.

set -uo pipefail
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
source "${SCRIPT_DIR}/../lib/env.sh"
source "${SCRIPT_DIR}/../lib/http.sh"
source "${SCRIPT_DIR}/../lib/assert.sh"

CASE_ID="GOLAKE-040"
CASE_NAME="Query correctness — 5 SQL assertions on workers fixture"
CASE_TYPE="integration"
if [ "${1:-}" = "--metadata-only" ]; then return 0 2>/dev/null || exit 0; fi

DATASET="proof_workers"
EXPECTED_FILE="${PROOF_REPO_ROOT}/tests/proof/fixtures/expected/queries.json"

# Spec's SQL fixtures use unquoted table name "workers" but ingestd
# registers under whatever ?name= we passed in 03 — proof_workers.
# Substitute on the fly so the queries still reference the right view.
substitute_table() {
    sed "s/FROM workers/FROM ${DATASET}/g; s/from workers/from ${DATASET}/g"
}

# Wait for queryd to have the view from 03's ingest. queryd refreshes
# every 500ms (proof override of the 30s prod default); on cache-warm
# runs cases fire faster than the next tick. Up to 5s budget.
if ! proof_wait_for_sql 5 "SELECT 1 FROM ${DATASET} LIMIT 0"; then
    proof_skip "$CASE_ID" "queryd view ${DATASET} never appeared in 5s" \
        "queryd refresh ticker may be stalled or 03_ingest registration failed"
    return 0 2>/dev/null || exit 0
fi

# Iterate the 5 queries.
n=$(jq '.queries | length' "$EXPECTED_FILE")
for i in $(seq 0 $((n-1))); do
    qid=$(jq -r ".queries[$i].id" "$EXPECTED_FILE")
    qclaim=$(jq -r ".queries[$i].claim" "$EXPECTED_FILE")
    qsql=$(jq -r ".queries[$i].sql" "$EXPECTED_FILE" | substitute_table)
    # Each expected key/value drives one assertion.
    expected_keys=$(jq -r ".queries[$i].expected | keys[]" "$EXPECTED_FILE")

    # Build a minimal JSON body — escape the SQL via jq.
    body=$(jq -nc --arg sql "$qsql" '{sql:$sql}')

    proof_post "$CASE_ID" "${qid}_query" \
        "${PROOF_GATEWAY_URL}/v1/sql" \
        "application/json" "$body" >/dev/null

    qstatus=$(proof_status_of "$CASE_ID" "${qid}_query")
    proof_assert_eq "$CASE_ID" "${qid}: ${qclaim} — query status 200" \
        "200" "$qstatus"

    # Skip the value assertions if the query failed.
    if [ "$qstatus" != "200" ]; then continue; fi

    qbody="${PROOF_REPORT_DIR}/raw/http/${CASE_ID}/${qid}_query.body"

    # queryd response shape: {columns: [{name,type}], rows: [[...]], row_count: N}
    # We compare each expected key against the value at column index for
    # that key in row 0.
    for ek in $expected_keys; do
        expected=$(jq -r ".queries[$i].expected.\"$ek\"" "$EXPECTED_FILE")
        # Find the column index for $ek in the response, then read row[0][idx].
        col_idx=$(jq -r --arg n "$ek" '.columns | map(.name) | index($n)' "$qbody")
        if [ "$col_idx" = "null" ]; then
            _proof_record "$CASE_ID" "${qid}: column ${ek} present in response" \
                fail "${ek}" "<missing>" "column not found in response"
            continue
        fi
        actual=$(jq -r ".rows[0][$col_idx]" "$qbody")
        proof_assert_eq "$CASE_ID" "${qid}: ${qclaim}" "$expected" "$actual"
    done
done