#!/usr/bin/env bash # 02_catalog_manifest.sh — GOLAKE-020 + GOLAKE-021 + GOLAKE-022. # Catalog register idempotency + manifest read + list inclusion + # schema-drift 409 (the ADR-020 contract). Uses a synthetic manifest # referencing a fake parquet object so we don't depend on prior ingest. set -uo pipefail SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" source "${SCRIPT_DIR}/../lib/env.sh" source "${SCRIPT_DIR}/../lib/http.sh" source "${SCRIPT_DIR}/../lib/assert.sh" CASE_ID="GOLAKE-020-022" CASE_NAME="Catalog manifest — register idempotent + drift 409" CASE_TYPE="integration" if [ "${1:-}" = "--metadata-only" ]; then return 0 2>/dev/null || exit 0; fi # Fresh-each-run name so the existing=false assertion is meaningful. # Catalog dataset_id is deterministic UUIDv5 from name; reusing the # same name across runs would always show existing=true on second run. NAME="proof_catalog_${PROOF_RUN_ID}" FP_A="sha256:proof_test_fp_aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" FP_B="sha256:proof_test_fp_bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb" reg_body() { local name="$1" fp="$2" cat </dev/null proof_assert_eq "$CASE_ID" "first register → 200" "200" \ "$(proof_status_of "$CASE_ID" "register_first")" first_body="${PROOF_REPORT_DIR}/raw/http/${CASE_ID}/register_first.body" existing_first=$(jq -r '.existing' "$first_body") proof_assert_eq "$CASE_ID" "first register existing=false" \ "false" "$existing_first" dataset_id_first=$(jq -r '.manifest.dataset_id' "$first_body") proof_assert_ne "$CASE_ID" "first register dataset_id non-empty" "" "$dataset_id_first" # Manifest read matches what was registered. proof_get "$CASE_ID" "manifest_read" \ "${PROOF_GATEWAY_URL}/v1/catalog/manifest/${NAME}" >/dev/null proof_assert_eq "$CASE_ID" "manifest read → 200" "200" \ "$(proof_status_of "$CASE_ID" "manifest_read")" read_body="${PROOF_REPORT_DIR}/raw/http/${CASE_ID}/manifest_read.body" read_fp=$(jq -r '.schema_fingerprint' "$read_body") proof_assert_eq "$CASE_ID" "manifest schema_fingerprint matches" \ "$FP_A" "$read_fp" read_id=$(jq -r '.dataset_id' "$read_body") proof_assert_eq "$CASE_ID" "manifest dataset_id matches" \ "$dataset_id_first" "$read_id" # List contains the dataset. proof_get "$CASE_ID" "list" \ "${PROOF_GATEWAY_URL}/v1/catalog/list" >/dev/null proof_assert_eq "$CASE_ID" "list → 200" "200" \ "$(proof_status_of "$CASE_ID" "list")" list_body=$(proof_body_of "$CASE_ID" "list") proof_assert_contains "$CASE_ID" "list contains dataset_id" \ "$dataset_id_first" "$list_body" # Idempotent re-register with same name+fp → existing=true, dataset_id stable. proof_post "$CASE_ID" "register_second" \ "${PROOF_GATEWAY_URL}/v1/catalog/register" \ "application/json" "$(reg_body "$NAME" "$FP_A")" >/dev/null proof_assert_eq "$CASE_ID" "second register → 200" "200" \ "$(proof_status_of "$CASE_ID" "register_second")" second_body="${PROOF_REPORT_DIR}/raw/http/${CASE_ID}/register_second.body" existing_second=$(jq -r '.existing' "$second_body") proof_assert_eq "$CASE_ID" "second register existing=true (idempotent)" \ "true" "$existing_second" dataset_id_second=$(jq -r '.manifest.dataset_id' "$second_body") proof_assert_eq "$CASE_ID" "dataset_id stable across re-register" \ "$dataset_id_first" "$dataset_id_second" # Schema drift — different fp on same name → 409 (ADR-020). proof_post "$CASE_ID" "register_drift" \ "${PROOF_GATEWAY_URL}/v1/catalog/register" \ "application/json" "$(reg_body "$NAME" "$FP_B")" >/dev/null proof_assert_eq "$CASE_ID" "drift register → 409 (ADR-020)" "409" \ "$(proof_status_of "$CASE_ID" "register_drift")"