lakehouse/tests/e2e_query.sh
root 19bdfab227 Phase 2: DataFusion query engine over Parquet
- queryd: SessionContext with custom URL scheme to avoid path doubling with LocalFileSystem
- queryd: ListingTable registration from catalog ObjectRefs with schema inference
- queryd: POST /query/sql returns JSON {columns, rows, row_count}
- queryd→catalogd wiring: reads all datasets, registers as named tables
- gateway: wires QueryEngine with shared store + registry
- e2e verified: SELECT *, WHERE/ORDER BY, COUNT/AVG all correct

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-27 05:48:20 -05:00

58 lines
1.9 KiB
Bash

#!/usr/bin/env bash
# End-to-end test: upload Parquet → register dataset → SQL query
set -e
BASE="http://localhost:3100"
echo "=== Generate test Parquet file ==="
python3 -c "
import struct, io
# Minimal Parquet via pyarrow if available, else skip
try:
import pyarrow as pa
import pyarrow.parquet as pq
table = pa.table({
'id': [1, 2, 3, 4, 5],
'name': ['alice', 'bob', 'carol', 'dave', 'eve'],
'score': [9.5, 8.2, 7.8, 6.1, 9.9],
})
pq.write_table(table, '/tmp/test_data.parquet')
print('generated with pyarrow')
except ImportError:
print('pyarrow not available, generating via rust helper')
exit(1)
"
echo "=== Upload Parquet to storage ==="
curl -s -X PUT "$BASE/storage/objects/datasets/scores.parquet" \
--data-binary @/tmp/test_data.parquet
echo ""
echo "=== Register dataset in catalog ==="
SIZE=$(stat -c%s /tmp/test_data.parquet)
curl -s -X POST "$BASE/catalog/datasets" \
-H "Content-Type: application/json" \
-d "{\"name\":\"scores\",\"schema_fingerprint\":\"test\",\"objects\":[{\"bucket\":\"data\",\"key\":\"datasets/scores.parquet\",\"size_bytes\":$SIZE}]}" | python3 -m json.tool
echo ""
echo "=== SQL: SELECT * FROM scores ==="
curl -s -X POST "$BASE/query/sql" \
-H "Content-Type: application/json" \
-d '{"sql":"SELECT * FROM scores"}' | python3 -m json.tool
echo ""
echo "=== SQL: SELECT name, score FROM scores WHERE score > 8.0 ORDER BY score DESC ==="
curl -s -X POST "$BASE/query/sql" \
-H "Content-Type: application/json" \
-d '{"sql":"SELECT name, score FROM scores WHERE score > 8.0 ORDER BY score DESC"}' | python3 -m json.tool
echo ""
echo "=== SQL: SELECT COUNT(*), AVG(score) FROM scores ==="
curl -s -X POST "$BASE/query/sql" \
-H "Content-Type: application/json" \
-d '{"sql":"SELECT COUNT(*) as cnt, AVG(score) as avg_score FROM scores"}' | python3 -m json.tool
echo ""
echo "=== DONE ==="