use arrow::array::RecordBatch; use arrow::json::writer::{JsonArray, Writer as JsonWriter}; use axum::{ Json, Router, extract::{Path, Query, State}, http::StatusCode, response::IntoResponse, routing::{get, post}, }; use serde::{Deserialize, Serialize}; use crate::context::QueryEngine; use crate::delta; use crate::paged::ResultStore; use tracing; use std::collections::HashSet; #[derive(Clone)] pub struct QueryState { pub engine: QueryEngine, pub result_store: ResultStore, } pub fn router(engine: QueryEngine) -> Router { let state = QueryState { engine: engine.clone(), result_store: ResultStore::new(100, 50), // 100 rows/page, keep 50 results }; Router::new() .route("/health", get(health)) .route("/sql", post(execute_query)) .route("/paged", post(paged_query)) .route("/page/{query_id}/{page}", get(get_page)) .route("/cache/pin", post(pin_dataset)) .route("/cache/evict", post(evict_dataset)) .route("/cache/stats", get(cache_stats)) .route("/compact", post(compact_dataset)) .with_state(state) } async fn health() -> &'static str { "queryd ok" } // --- SQL Query --- #[derive(Deserialize)] struct QueryRequest { sql: String, } #[derive(Serialize)] struct QueryResponse { columns: Vec, rows: serde_json::Value, row_count: usize, } #[derive(Serialize)] struct ColumnInfo { name: String, data_type: String, } fn batches_to_json(batches: &[RecordBatch]) -> Result { let mut buf = Vec::new(); let mut writer = JsonWriter::<_, JsonArray>::new(&mut buf); for batch in batches { writer.write(batch).map_err(|e| format!("JSON write error: {e}"))?; } writer.finish().map_err(|e| format!("JSON finish error: {e}"))?; drop(writer); serde_json::from_slice(&buf).map_err(|e| format!("JSON parse error: {e}")) } async fn execute_query( State(state): State, Json(req): Json, ) -> impl IntoResponse { tracing::info!("executing query: {}", req.sql); match state.engine.query(&req.sql).await { Ok(batches) => { if batches.is_empty() { return Ok(Json(QueryResponse { columns: vec![], rows: serde_json::Value::Array(vec![]), row_count: 0, })); } let schema = batches[0].schema(); let columns: Vec = schema.fields().iter().map(|f| ColumnInfo { name: f.name().clone(), data_type: f.data_type().to_string(), }).collect(); let rows = batches_to_json(&batches) .map_err(|e| (StatusCode::INTERNAL_SERVER_ERROR, e))?; let row_count = rows.as_array().map(|a| a.len()).unwrap_or(0); Ok(Json(QueryResponse { columns, rows, row_count, })) } Err(e) => Err((StatusCode::BAD_REQUEST, e)), } } // --- Paged Queries (large result sets) --- async fn paged_query( State(state): State, Json(req): Json, ) -> impl IntoResponse { tracing::info!("paged query: {}", req.sql); match state.result_store.execute_and_store(&state.engine, &req.sql).await { Ok(handle) => Ok(Json(handle)), Err(e) => Err((StatusCode::BAD_REQUEST, e)), } } #[derive(Deserialize)] struct PageQuery { size: Option, } async fn get_page( State(state): State, Path((query_id, page)): Path<(String, usize)>, Query(q): Query, ) -> impl IntoResponse { match state.result_store.get_page(&query_id, page, q.size).await { Ok(result) => Ok(Json(result)), Err(e) => Err((StatusCode::NOT_FOUND, e)), } } // --- Cache Management --- #[derive(Deserialize)] struct CacheRequest { dataset: String, } async fn pin_dataset( State(state): State, Json(req): Json, ) -> impl IntoResponse { match state.engine.pin_dataset(&req.dataset).await { Ok(()) => Ok((StatusCode::OK, format!("pinned: {}", req.dataset))), Err(e) => Err((StatusCode::INTERNAL_SERVER_ERROR, e)), } } async fn evict_dataset( State(state): State, Json(req): Json, ) -> impl IntoResponse { if state.engine.cache().evict(&req.dataset).await { (StatusCode::OK, format!("evicted: {}", req.dataset)) } else { (StatusCode::NOT_FOUND, format!("not cached: {}", req.dataset)) } } async fn cache_stats(State(state): State) -> impl IntoResponse { let stats = state.engine.cache().stats().await; Json(stats) } // --- Compaction --- #[derive(Deserialize)] struct CompactRequest { dataset: String, base_key: String, primary_key: Option, } async fn compact_dataset( State(state): State, Json(req): Json, ) -> impl IntoResponse { // Phase E: pull tombstones for this dataset and let compact physically // drop those rows. After a successful rewrite, clear the tombstone log // — the rows are gone from disk, the tombstones have done their job. let tombstones = state .engine .catalog() .list_tombstones(&req.dataset) .await .unwrap_or_default(); match delta::compact( state.engine.store(), &req.dataset, &req.base_key, req.primary_key.as_deref(), &tombstones, ).await { Ok(result) => { if result.rows_dropped_by_tombstones > 0 { if let Err(e) = state.engine.catalog().tombstones().clear(&req.dataset).await { tracing::warn!("post-compact tombstone clear failed: {e}"); } } Ok(Json(result)) } Err(e) => Err((StatusCode::INTERNAL_SERVER_ERROR, e)), } }