Server-side pagination for large result sets

- ResultStore: execute query, store batches server-side, serve pages on demand
- POST /query/paged → returns query_id + total_rows + page count (no rows)
- GET /query/page/{id}/{page}?size=100 → returns one page of rows
- RecordBatch slicing for efficient page extraction from Arrow batches
- LRU eviction: keeps 50 most recent query results in memory
- Tested: 100K rows → 1,000 pages of 100, any page fetchable by number
- Supervisor pattern: chunk results, serve on demand, retry-safe (idempotent GET)

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
root 2026-03-27 20:54:44 -05:00
parent ed17216005
commit 238cb84d26
18 changed files with 441 additions and 15 deletions

View File

@ -1,6 +1,7 @@
pub mod cache; pub mod cache;
pub mod context; pub mod context;
pub mod delta; pub mod delta;
pub mod paged;
pub mod service; pub mod service;
pub mod workspace; pub mod workspace;
pub mod workspace_service; pub mod workspace_service;

222
crates/queryd/src/paged.rs Normal file
View File

@ -0,0 +1,222 @@
/// Paged query results with server-side cursor.
/// Large results are stored server-side, client fetches pages on demand.
/// Pattern matches the embedding supervisor: chunk → process → retry.
use arrow::array::RecordBatch;
use arrow::json::writer::{JsonArray, Writer as JsonWriter};
use serde::Serialize;
use std::collections::HashMap;
use std::sync::Arc;
use tokio::sync::RwLock;
use crate::context::QueryEngine;
/// A stored query result that clients page through.
#[derive(Clone)]
pub struct StoredResult {
pub query_id: String,
pub sql: String,
pub columns: Vec<ColumnInfo>,
pub total_rows: usize,
pub batches: Vec<RecordBatch>,
pub created_at: std::time::Instant,
}
#[derive(Clone, Serialize)]
pub struct ColumnInfo {
pub name: String,
pub data_type: String,
}
/// Response for initial query — metadata only, no rows.
#[derive(Serialize)]
pub struct QueryHandle {
pub query_id: String,
pub total_rows: usize,
pub columns: Vec<ColumnInfo>,
pub total_pages: usize,
pub page_size: usize,
}
/// A page of results.
#[derive(Serialize)]
pub struct PageResult {
pub query_id: String,
pub page: usize,
pub page_size: usize,
pub total_rows: usize,
pub total_pages: usize,
pub rows: serde_json::Value,
pub row_count: usize,
}
/// Server-side result store with TTL.
#[derive(Clone)]
pub struct ResultStore {
results: Arc<RwLock<HashMap<String, StoredResult>>>,
default_page_size: usize,
max_results: usize,
}
impl ResultStore {
pub fn new(default_page_size: usize, max_results: usize) -> Self {
Self {
results: Arc::new(RwLock::new(HashMap::new())),
default_page_size,
max_results,
}
}
/// Execute query and store results. Returns handle with metadata.
pub async fn execute_and_store(
&self,
engine: &QueryEngine,
sql: &str,
) -> Result<QueryHandle, String> {
let batches = engine.query(sql).await?;
if batches.is_empty() {
let qid = format!("qr-{}", chrono::Utc::now().timestamp_millis());
return Ok(QueryHandle {
query_id: qid,
total_rows: 0,
columns: vec![],
total_pages: 0,
page_size: self.default_page_size,
});
}
let schema = batches[0].schema();
let columns: Vec<ColumnInfo> = schema.fields().iter().map(|f| ColumnInfo {
name: f.name().clone(),
data_type: f.data_type().to_string(),
}).collect();
let total_rows: usize = batches.iter().map(|b| b.num_rows()).sum();
let total_pages = (total_rows + self.default_page_size - 1) / self.default_page_size;
let qid = format!("qr-{}", chrono::Utc::now().timestamp_millis());
let result = StoredResult {
query_id: qid.clone(),
sql: sql.to_string(),
columns: columns.clone(),
total_rows,
batches,
created_at: std::time::Instant::now(),
};
// Store (evict old results if at capacity)
let mut store = self.results.write().await;
if store.len() >= self.max_results {
// Evict oldest
if let Some(oldest_key) = store.iter()
.min_by_key(|(_, v)| v.created_at)
.map(|(k, _)| k.clone())
{
store.remove(&oldest_key);
}
}
store.insert(qid.clone(), result);
Ok(QueryHandle {
query_id: qid,
total_rows,
columns,
total_pages,
page_size: self.default_page_size,
})
}
/// Get a page of results. Handles chunking and retry-safe.
pub async fn get_page(
&self,
query_id: &str,
page: usize,
page_size: Option<usize>,
) -> Result<PageResult, String> {
let store = self.results.read().await;
let result = store.get(query_id)
.ok_or_else(|| format!("query result not found: {query_id} (may have expired)"))?;
let ps = page_size.unwrap_or(self.default_page_size);
let start_row = page * ps;
if start_row >= result.total_rows {
return Ok(PageResult {
query_id: query_id.to_string(),
page,
page_size: ps,
total_rows: result.total_rows,
total_pages: (result.total_rows + ps - 1) / ps,
rows: serde_json::Value::Array(vec![]),
row_count: 0,
});
}
let end_row = (start_row + ps).min(result.total_rows);
// Extract the right rows from batches
let page_batches = slice_batches(&result.batches, start_row, end_row);
let rows = batches_to_json(&page_batches)?;
let row_count = rows.as_array().map(|a| a.len()).unwrap_or(0);
Ok(PageResult {
query_id: query_id.to_string(),
page,
page_size: ps,
total_rows: result.total_rows,
total_pages: (result.total_rows + ps - 1) / ps,
rows,
row_count,
})
}
pub fn default_page_size(&self) -> usize {
self.default_page_size
}
}
/// Slice record batches to extract rows [start, end).
fn slice_batches(batches: &[RecordBatch], start: usize, end: usize) -> Vec<RecordBatch> {
let mut result = Vec::new();
let mut current_offset = 0;
for batch in batches {
let batch_end = current_offset + batch.num_rows();
if batch_end <= start {
current_offset = batch_end;
continue;
}
if current_offset >= end {
break;
}
let local_start = if start > current_offset { start - current_offset } else { 0 };
let local_end = if end < batch_end { end - current_offset } else { batch.num_rows() };
let length = local_end - local_start;
if length > 0 {
result.push(batch.slice(local_start, length));
}
current_offset = batch_end;
}
result
}
fn batches_to_json(batches: &[RecordBatch]) -> Result<serde_json::Value, String> {
if batches.is_empty() {
return Ok(serde_json::Value::Array(vec![]));
}
let mut buf = Vec::new();
let mut writer = JsonWriter::<_, JsonArray>::new(&mut buf);
for batch in batches {
writer.write(batch).map_err(|e| format!("JSON write: {e}"))?;
}
writer.finish().map_err(|e| format!("JSON finish: {e}"))?;
drop(writer);
serde_json::from_slice(&buf).map_err(|e| format!("JSON parse: {e}"))
}

View File

@ -2,7 +2,7 @@ use arrow::array::RecordBatch;
use arrow::json::writer::{JsonArray, Writer as JsonWriter}; use arrow::json::writer::{JsonArray, Writer as JsonWriter};
use axum::{ use axum::{
Json, Router, Json, Router,
extract::State, extract::{Path, Query, State},
http::StatusCode, http::StatusCode,
response::IntoResponse, response::IntoResponse,
routing::{get, post}, routing::{get, post},
@ -12,16 +12,29 @@ use serde::{Deserialize, Serialize};
use crate::cache::CacheStats; use crate::cache::CacheStats;
use crate::context::QueryEngine; use crate::context::QueryEngine;
use crate::delta; use crate::delta;
use crate::paged::ResultStore;
#[derive(Clone)]
pub struct QueryState {
pub engine: QueryEngine,
pub result_store: ResultStore,
}
pub fn router(engine: QueryEngine) -> Router { pub fn router(engine: QueryEngine) -> Router {
let state = QueryState {
engine: engine.clone(),
result_store: ResultStore::new(100, 50), // 100 rows/page, keep 50 results
};
Router::new() Router::new()
.route("/health", get(health)) .route("/health", get(health))
.route("/sql", post(execute_query)) .route("/sql", post(execute_query))
.route("/paged", post(paged_query))
.route("/page/{query_id}/{page}", get(get_page))
.route("/cache/pin", post(pin_dataset)) .route("/cache/pin", post(pin_dataset))
.route("/cache/evict", post(evict_dataset)) .route("/cache/evict", post(evict_dataset))
.route("/cache/stats", get(cache_stats)) .route("/cache/stats", get(cache_stats))
.route("/compact", post(compact_dataset)) .route("/compact", post(compact_dataset))
.with_state(engine) .with_state(state)
} }
async fn health() -> &'static str { async fn health() -> &'static str {
@ -60,12 +73,12 @@ fn batches_to_json(batches: &[RecordBatch]) -> Result<serde_json::Value, String>
} }
async fn execute_query( async fn execute_query(
State(engine): State<QueryEngine>, State(state): State<QueryState>,
Json(req): Json<QueryRequest>, Json(req): Json<QueryRequest>,
) -> impl IntoResponse { ) -> impl IntoResponse {
tracing::info!("executing query: {}", req.sql); tracing::info!("executing query: {}", req.sql);
match engine.query(&req.sql).await { match state.engine.query(&req.sql).await {
Ok(batches) => { Ok(batches) => {
if batches.is_empty() { if batches.is_empty() {
return Ok(Json(QueryResponse { return Ok(Json(QueryResponse {
@ -96,6 +109,35 @@ async fn execute_query(
} }
} }
// --- Paged Queries (large result sets) ---
async fn paged_query(
State(state): State<QueryState>,
Json(req): Json<QueryRequest>,
) -> impl IntoResponse {
tracing::info!("paged query: {}", req.sql);
match state.result_store.execute_and_store(&state.engine, &req.sql).await {
Ok(handle) => Ok(Json(handle)),
Err(e) => Err((StatusCode::BAD_REQUEST, e)),
}
}
#[derive(Deserialize)]
struct PageQuery {
size: Option<usize>,
}
async fn get_page(
State(state): State<QueryState>,
Path((query_id, page)): Path<(String, usize)>,
Query(q): Query<PageQuery>,
) -> impl IntoResponse {
match state.result_store.get_page(&query_id, page, q.size).await {
Ok(result) => Ok(Json(result)),
Err(e) => Err((StatusCode::NOT_FOUND, e)),
}
}
// --- Cache Management --- // --- Cache Management ---
#[derive(Deserialize)] #[derive(Deserialize)]
@ -104,28 +146,28 @@ struct CacheRequest {
} }
async fn pin_dataset( async fn pin_dataset(
State(engine): State<QueryEngine>, State(state): State<QueryState>,
Json(req): Json<CacheRequest>, Json(req): Json<CacheRequest>,
) -> impl IntoResponse { ) -> impl IntoResponse {
match engine.pin_dataset(&req.dataset).await { match state.engine.pin_dataset(&req.dataset).await {
Ok(()) => Ok((StatusCode::OK, format!("pinned: {}", req.dataset))), Ok(()) => Ok((StatusCode::OK, format!("pinned: {}", req.dataset))),
Err(e) => Err((StatusCode::INTERNAL_SERVER_ERROR, e)), Err(e) => Err((StatusCode::INTERNAL_SERVER_ERROR, e)),
} }
} }
async fn evict_dataset( async fn evict_dataset(
State(engine): State<QueryEngine>, State(state): State<QueryState>,
Json(req): Json<CacheRequest>, Json(req): Json<CacheRequest>,
) -> impl IntoResponse { ) -> impl IntoResponse {
if engine.cache().evict(&req.dataset).await { if state.engine.cache().evict(&req.dataset).await {
(StatusCode::OK, format!("evicted: {}", req.dataset)) (StatusCode::OK, format!("evicted: {}", req.dataset))
} else { } else {
(StatusCode::NOT_FOUND, format!("not cached: {}", req.dataset)) (StatusCode::NOT_FOUND, format!("not cached: {}", req.dataset))
} }
} }
async fn cache_stats(State(engine): State<QueryEngine>) -> impl IntoResponse { async fn cache_stats(State(state): State<QueryState>) -> impl IntoResponse {
let stats = engine.cache().stats().await; let stats = state.engine.cache().stats().await;
Json(stats) Json(stats)
} }
@ -139,11 +181,11 @@ struct CompactRequest {
} }
async fn compact_dataset( async fn compact_dataset(
State(engine): State<QueryEngine>, State(state): State<QueryState>,
Json(req): Json<CompactRequest>, Json(req): Json<CompactRequest>,
) -> impl IntoResponse { ) -> impl IntoResponse {
match delta::compact( match delta::compact(
engine.store(), state.engine.store(),
&req.dataset, &req.dataset,
&req.base_key, &req.base_key,
req.primary_key.as_deref(), req.primary_key.as_deref(),

View File

@ -0,0 +1,23 @@
{
"id": "6d0002ef-28de-4b92-abed-0bfb7ab5fb6f",
"name": "candidates",
"schema_fingerprint": "auto",
"objects": [
{
"bucket": "data",
"key": "datasets/candidates.parquet",
"size_bytes": 10592165,
"created_at": "2026-03-28T01:54:06.898505222Z"
}
],
"created_at": "2026-03-28T01:54:06.898506121Z",
"updated_at": "2026-03-28T01:54:06.898506121Z",
"description": "",
"owner": "",
"sensitivity": null,
"columns": [],
"lineage": null,
"freshness": null,
"tags": [],
"row_count": null
}

View File

@ -0,0 +1,23 @@
{
"id": "73a4ef71-5b2a-4124-a2c2-fcefdde99624",
"name": "job_orders",
"schema_fingerprint": "auto",
"objects": [
{
"bucket": "data",
"key": "datasets/job_orders.parquet",
"size_bytes": 905534,
"created_at": "2026-03-28T01:54:07.007332060Z"
}
],
"created_at": "2026-03-28T01:54:07.007332775Z",
"updated_at": "2026-03-28T01:54:07.007332775Z",
"description": "",
"owner": "",
"sensitivity": null,
"columns": [],
"lineage": null,
"freshness": null,
"tags": [],
"row_count": null
}

View File

@ -0,0 +1,23 @@
{
"id": "b56694bb-f4f7-447d-b4d6-ac89103a6a3a",
"name": "call_log",
"schema_fingerprint": "auto",
"objects": [
{
"bucket": "data",
"key": "datasets/call_log.parquet",
"size_bytes": 35951077,
"created_at": "2026-03-28T01:54:15.457573021Z"
}
],
"created_at": "2026-03-28T01:54:15.457573701Z",
"updated_at": "2026-03-28T01:54:15.457573701Z",
"description": "",
"owner": "",
"sensitivity": null,
"columns": [],
"lineage": null,
"freshness": null,
"tags": [],
"row_count": null
}

View File

@ -0,0 +1,23 @@
{
"id": "daa5a11b-6898-40ba-a811-3bd4b6f5b750",
"name": "email_log",
"schema_fingerprint": "auto",
"objects": [
{
"bucket": "data",
"key": "datasets/email_log.parquet",
"size_bytes": 16768671,
"created_at": "2026-03-28T01:54:17.356056562Z"
}
],
"created_at": "2026-03-28T01:54:17.356057529Z",
"updated_at": "2026-03-28T01:54:17.356057529Z",
"description": "",
"owner": "",
"sensitivity": null,
"columns": [],
"lineage": null,
"freshness": null,
"tags": [],
"row_count": null
}

View File

@ -0,0 +1,23 @@
{
"id": "dd396ee4-3a1a-4939-af60-84d409eebed4",
"name": "clients",
"schema_fingerprint": "auto",
"objects": [
{
"bucket": "data",
"key": "datasets/clients.parquet",
"size_bytes": 21971,
"created_at": "2026-03-28T01:54:06.904379300Z"
}
],
"created_at": "2026-03-28T01:54:06.904379793Z",
"updated_at": "2026-03-28T01:54:06.904379793Z",
"description": "",
"owner": "",
"sensitivity": null,
"columns": [],
"lineage": null,
"freshness": null,
"tags": [],
"row_count": null
}

View File

@ -0,0 +1,23 @@
{
"id": "f85285e3-111a-4a55-aa58-05fd740ed862",
"name": "timesheets",
"schema_fingerprint": "auto",
"objects": [
{
"bucket": "data",
"key": "datasets/timesheets.parquet",
"size_bytes": 17539932,
"created_at": "2026-03-28T01:54:11.947773535Z"
}
],
"created_at": "2026-03-28T01:54:11.947778065Z",
"updated_at": "2026-03-28T01:54:11.947778065Z",
"description": "",
"owner": "",
"sensitivity": null,
"columns": [],
"lineage": null,
"freshness": null,
"tags": [],
"row_count": null
}

View File

@ -0,0 +1,23 @@
{
"id": "ffa72582-dc11-4454-a46e-35839a7d04bb",
"name": "placements",
"schema_fingerprint": "auto",
"objects": [
{
"bucket": "data",
"key": "datasets/placements.parquet",
"size_bytes": 1213820,
"created_at": "2026-03-28T01:54:07.109218264Z"
}
],
"created_at": "2026-03-28T01:54:07.109219070Z",
"updated_at": "2026-03-28T01:54:07.109219070Z",
"description": "",
"owner": "",
"sensitivity": null,
"columns": [],
"lineage": null,
"freshness": null,
"tags": [],
"row_count": null
}

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

View File

@ -1003,8 +1003,8 @@ fn ResultsTable(response: QueryResponse) -> Element {
if response.row_count == 0 { if response.row_count == 0 {
div { class: "empty-sm", "no rows returned" } div { class: "empty-sm", "no rows returned" }
} else if let Some(rows) = rows { } else if let Some(rows) = rows {
if rows.len() > 200 { if rows.len() > 500 {
div { class: "results-info", "Showing first 200 of {response.row_count} rows" } div { class: "results-info", "Showing first 500 of {response.row_count} rows (use SQL tab with LIMIT for larger)" }
} }
div { class: "table-wrap", div { class: "table-wrap",
table { table {
@ -1016,7 +1016,7 @@ fn ResultsTable(response: QueryResponse) -> Element {
} }
} }
tbody { tbody {
for row in rows.iter().take(200) { for row in rows.iter().take(500) {
tr { tr {
for col in response.columns.iter() { for col in response.columns.iter() {
td { {format_cell(row.get(&col.name))} } td { {format_cell(row.get(&col.name))} }