#!/usr/bin/env bash # End-to-end test: upload Parquet → register dataset → SQL query set -e BASE="http://localhost:3100" echo "=== Generate test Parquet file ===" python3 -c " import struct, io # Minimal Parquet via pyarrow if available, else skip try: import pyarrow as pa import pyarrow.parquet as pq table = pa.table({ 'id': [1, 2, 3, 4, 5], 'name': ['alice', 'bob', 'carol', 'dave', 'eve'], 'score': [9.5, 8.2, 7.8, 6.1, 9.9], }) pq.write_table(table, '/tmp/test_data.parquet') print('generated with pyarrow') except ImportError: print('pyarrow not available, generating via rust helper') exit(1) " echo "=== Upload Parquet to storage ===" curl -s -X PUT "$BASE/storage/objects/datasets/scores.parquet" \ --data-binary @/tmp/test_data.parquet echo "" echo "=== Register dataset in catalog ===" SIZE=$(stat -c%s /tmp/test_data.parquet) curl -s -X POST "$BASE/catalog/datasets" \ -H "Content-Type: application/json" \ -d "{\"name\":\"scores\",\"schema_fingerprint\":\"test\",\"objects\":[{\"bucket\":\"data\",\"key\":\"datasets/scores.parquet\",\"size_bytes\":$SIZE}]}" | python3 -m json.tool echo "" echo "=== SQL: SELECT * FROM scores ===" curl -s -X POST "$BASE/query/sql" \ -H "Content-Type: application/json" \ -d '{"sql":"SELECT * FROM scores"}' | python3 -m json.tool echo "" echo "=== SQL: SELECT name, score FROM scores WHERE score > 8.0 ORDER BY score DESC ===" curl -s -X POST "$BASE/query/sql" \ -H "Content-Type: application/json" \ -d '{"sql":"SELECT name, score FROM scores WHERE score > 8.0 ORDER BY score DESC"}' | python3 -m json.tool echo "" echo "=== SQL: SELECT COUNT(*), AVG(score) FROM scores ===" curl -s -X POST "$BASE/query/sql" \ -H "Content-Type: application/json" \ -d '{"sql":"SELECT COUNT(*) as cnt, AVG(score) as avg_score FROM scores"}' | python3 -m json.tool echo "" echo "=== DONE ==="