Scrum iter 9 flagged that gateway's audit row stores null for `latency_ms` — required for PRD audit-log parity. The field didn't exist; adding it now with a single Instant captured at handler entry, populated on both response paths (empty batches + non-empty result). No behavior change for existing clients — they read the JSON and ignore unknown fields. Audit-log consumers can now surface p50/p99 latency from the response body instead of inferring from tracing. Narrow fingerprint on crates/queryd already has this as a known BoundaryViolation pattern (`latency_ms-row_count` key) — iter 10 on any queryd file will see the preamble say "this was fixed in iter 10" when it runs. Workspace warnings unchanged at 11. 7 policy tests still pass. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
333 lines
10 KiB
Rust
333 lines
10 KiB
Rust
use arrow::array::RecordBatch;
|
|
use arrow::json::writer::{JsonArray, Writer as JsonWriter};
|
|
use axum::{
|
|
Json, Router,
|
|
extract::{Path, Query, State},
|
|
http::StatusCode,
|
|
response::IntoResponse,
|
|
routing::{get, post},
|
|
};
|
|
use serde::{Deserialize, Serialize};
|
|
|
|
use std::sync::Arc;
|
|
use truth::{RuleAction, TruthStore};
|
|
|
|
use crate::context::QueryEngine;
|
|
use crate::delta;
|
|
use crate::paged::ResultStore;
|
|
|
|
#[derive(Clone)]
|
|
pub struct QueryState {
|
|
pub engine: QueryEngine,
|
|
pub result_store: ResultStore,
|
|
// Policy gate for incoming SQL. Every /sql and /paged request is
|
|
// evaluated against this store before hitting DataFusion. Added for
|
|
// P42-002 ("raw SQL forwarded without schema or policy gate") after
|
|
// the scrum master's queryd/service.rs finding looped across iters
|
|
// 3-5 without ever being reachable by the 6-line auto-applier.
|
|
pub truth: Arc<TruthStore>,
|
|
}
|
|
|
|
pub fn router(engine: QueryEngine) -> Router {
|
|
router_with_truth(engine, Arc::new(truth::sql_query_guard_store()))
|
|
}
|
|
|
|
/// Test/integration hook: construct the router with a caller-supplied
|
|
/// TruthStore so tests can assert reject/pass behavior deterministically
|
|
/// without depending on the default needle list.
|
|
pub fn router_with_truth(engine: QueryEngine, truth: Arc<TruthStore>) -> Router {
|
|
let state = QueryState {
|
|
engine: engine.clone(),
|
|
result_store: ResultStore::new(100, 50), // 100 rows/page, keep 50 results
|
|
truth,
|
|
};
|
|
Router::new()
|
|
.route("/health", get(health))
|
|
.route("/sql", post(execute_query))
|
|
.route("/paged", post(paged_query))
|
|
.route("/page/{query_id}/{page}", get(get_page))
|
|
.route("/cache/pin", post(pin_dataset))
|
|
.route("/cache/evict", post(evict_dataset))
|
|
.route("/cache/stats", get(cache_stats))
|
|
.route("/compact", post(compact_dataset))
|
|
.with_state(state)
|
|
}
|
|
|
|
async fn health() -> &'static str {
|
|
"queryd ok"
|
|
}
|
|
|
|
// --- SQL Query ---
|
|
|
|
#[derive(Deserialize)]
|
|
struct QueryRequest {
|
|
sql: String,
|
|
}
|
|
|
|
#[derive(Serialize)]
|
|
struct QueryResponse {
|
|
columns: Vec<ColumnInfo>,
|
|
rows: serde_json::Value,
|
|
row_count: usize,
|
|
// Elapsed wall time from handler entry to response. Required for
|
|
// audit-log parity — gateway's audit row previously stored null here.
|
|
// Scrum iter 9 finding, populated from std::time::Instant captured
|
|
// at the top of execute_query / paged_query.
|
|
latency_ms: u64,
|
|
}
|
|
|
|
#[derive(Serialize)]
|
|
struct ColumnInfo {
|
|
name: String,
|
|
data_type: String,
|
|
}
|
|
|
|
fn batches_to_json(batches: &[RecordBatch]) -> Result<serde_json::Value, String> {
|
|
let mut buf = Vec::new();
|
|
let mut writer = JsonWriter::<_, JsonArray>::new(&mut buf);
|
|
for batch in batches {
|
|
writer.write(batch).map_err(|e| format!("JSON write error: {e}"))?;
|
|
}
|
|
writer.finish().map_err(|e| format!("JSON finish error: {e}"))?;
|
|
drop(writer);
|
|
serde_json::from_slice(&buf).map_err(|e| format!("JSON parse error: {e}"))
|
|
}
|
|
|
|
/// Evaluate the request SQL against the configured TruthStore. Returns
|
|
/// the Reject/Block message on the first failing mandatory rule so the
|
|
/// handler can short-circuit. Returns None when all rules pass (or when
|
|
/// the failures' declared action is non-mandatory like Redact/Pass).
|
|
fn sql_policy_check(truth: &TruthStore, sql: &str) -> Option<String> {
|
|
let ctx = serde_json::json!({ "sql": sql });
|
|
for outcome in truth.evaluate("sql_query", &ctx) {
|
|
if !outcome.passed {
|
|
// FieldEmpty / FieldContainsAny etc. are enforced only when
|
|
// condition HOLDS (i.e. passed=true). Below means "passed=false",
|
|
// so the rule condition did not hold — no enforcement.
|
|
continue;
|
|
}
|
|
match &outcome.action {
|
|
RuleAction::Reject { message } | RuleAction::Block { message } => {
|
|
return Some(message.clone());
|
|
}
|
|
_ => {}
|
|
}
|
|
}
|
|
None
|
|
}
|
|
|
|
async fn execute_query(
|
|
State(state): State<QueryState>,
|
|
Json(req): Json<QueryRequest>,
|
|
) -> impl IntoResponse {
|
|
let started = std::time::Instant::now();
|
|
tracing::info!("executing query: {}", req.sql);
|
|
|
|
if let Some(reason) = sql_policy_check(&state.truth, &req.sql) {
|
|
tracing::warn!("sql rejected by truth gate: {reason}");
|
|
return Err((StatusCode::FORBIDDEN, reason));
|
|
}
|
|
|
|
match state.engine.query(&req.sql).await {
|
|
Ok(batches) => {
|
|
if batches.is_empty() {
|
|
return Ok(Json(QueryResponse {
|
|
columns: vec![],
|
|
rows: serde_json::Value::Array(vec![]),
|
|
row_count: 0,
|
|
latency_ms: started.elapsed().as_millis() as u64,
|
|
}));
|
|
}
|
|
|
|
let schema = batches[0].schema();
|
|
let columns: Vec<ColumnInfo> = schema.fields().iter().map(|f| ColumnInfo {
|
|
name: f.name().clone(),
|
|
data_type: f.data_type().to_string(),
|
|
}).collect();
|
|
|
|
let rows = batches_to_json(&batches)
|
|
.map_err(|e| (StatusCode::INTERNAL_SERVER_ERROR, e))?;
|
|
|
|
let row_count = rows.as_array().map(|a| a.len()).unwrap_or(0);
|
|
|
|
Ok(Json(QueryResponse {
|
|
columns,
|
|
rows,
|
|
row_count,
|
|
latency_ms: started.elapsed().as_millis() as u64,
|
|
}))
|
|
}
|
|
Err(e) => Err((StatusCode::BAD_REQUEST, e)),
|
|
}
|
|
}
|
|
|
|
// --- Paged Queries (large result sets) ---
|
|
|
|
async fn paged_query(
|
|
State(state): State<QueryState>,
|
|
Json(req): Json<QueryRequest>,
|
|
) -> impl IntoResponse {
|
|
tracing::info!("paged query: {}", req.sql);
|
|
if let Some(reason) = sql_policy_check(&state.truth, &req.sql) {
|
|
tracing::warn!("paged sql rejected by truth gate: {reason}");
|
|
return Err((StatusCode::FORBIDDEN, reason));
|
|
}
|
|
match state.result_store.execute_and_store(&state.engine, &req.sql).await {
|
|
Ok(handle) => Ok(Json(handle)),
|
|
Err(e) => Err((StatusCode::BAD_REQUEST, e)),
|
|
}
|
|
}
|
|
|
|
#[derive(Deserialize)]
|
|
struct PageQuery {
|
|
size: Option<usize>,
|
|
}
|
|
|
|
async fn get_page(
|
|
State(state): State<QueryState>,
|
|
Path((query_id, page)): Path<(String, usize)>,
|
|
Query(q): Query<PageQuery>,
|
|
) -> impl IntoResponse {
|
|
match state.result_store.get_page(&query_id, page, q.size).await {
|
|
Ok(result) => Ok(Json(result)),
|
|
Err(e) => Err((StatusCode::NOT_FOUND, e)),
|
|
}
|
|
}
|
|
|
|
// --- Cache Management ---
|
|
|
|
#[derive(Deserialize)]
|
|
struct CacheRequest {
|
|
dataset: String,
|
|
}
|
|
|
|
async fn pin_dataset(
|
|
State(state): State<QueryState>,
|
|
Json(req): Json<CacheRequest>,
|
|
) -> impl IntoResponse {
|
|
match state.engine.pin_dataset(&req.dataset).await {
|
|
Ok(()) => Ok((StatusCode::OK, format!("pinned: {}", req.dataset))),
|
|
Err(e) => Err((StatusCode::INTERNAL_SERVER_ERROR, e)),
|
|
}
|
|
}
|
|
|
|
async fn evict_dataset(
|
|
State(state): State<QueryState>,
|
|
Json(req): Json<CacheRequest>,
|
|
) -> impl IntoResponse {
|
|
if state.engine.cache().evict(&req.dataset).await {
|
|
(StatusCode::OK, format!("evicted: {}", req.dataset))
|
|
} else {
|
|
(StatusCode::NOT_FOUND, format!("not cached: {}", req.dataset))
|
|
}
|
|
}
|
|
|
|
async fn cache_stats(State(state): State<QueryState>) -> impl IntoResponse {
|
|
let stats = state.engine.cache().stats().await;
|
|
Json(stats)
|
|
}
|
|
|
|
// --- Compaction ---
|
|
|
|
#[derive(Deserialize)]
|
|
struct CompactRequest {
|
|
dataset: String,
|
|
base_key: String,
|
|
primary_key: Option<String>,
|
|
}
|
|
|
|
async fn compact_dataset(
|
|
State(state): State<QueryState>,
|
|
Json(req): Json<CompactRequest>,
|
|
) -> impl IntoResponse {
|
|
// Phase E: pull tombstones for this dataset and let compact physically
|
|
// drop those rows. After a successful rewrite, clear the tombstone log
|
|
// — the rows are gone from disk, the tombstones have done their job.
|
|
let tombstones = state
|
|
.engine
|
|
.catalog()
|
|
.list_tombstones(&req.dataset)
|
|
.await
|
|
.unwrap_or_default();
|
|
|
|
match delta::compact(
|
|
state.engine.store(),
|
|
&req.dataset,
|
|
&req.base_key,
|
|
req.primary_key.as_deref(),
|
|
&tombstones,
|
|
).await {
|
|
Ok(result) => {
|
|
if result.rows_dropped_by_tombstones > 0 {
|
|
if let Err(e) = state.engine.catalog().tombstones().clear(&req.dataset).await {
|
|
tracing::warn!("post-compact tombstone clear failed: {e}");
|
|
}
|
|
}
|
|
Ok(Json(result))
|
|
}
|
|
Err(e) => Err((StatusCode::INTERNAL_SERVER_ERROR, e)),
|
|
}
|
|
}
|
|
|
|
#[cfg(test)]
|
|
mod sql_policy_tests {
|
|
use super::*;
|
|
use truth::sql_query_guard_store;
|
|
|
|
// These tests exercise the policy gate without spinning up a DataFusion
|
|
// engine — they only need `TruthStore`. Purpose: prove the P42-002
|
|
// enforcement point actually rejects destructive SQL. This is the
|
|
// regression guard for the queryd/service.rs finding that looped
|
|
// across scrum iters 3-5.
|
|
|
|
#[test]
|
|
fn blocks_drop_table() {
|
|
let store = sql_query_guard_store();
|
|
let reason = sql_policy_check(&store, "DROP TABLE users").expect("must reject");
|
|
assert!(reason.contains("destructive"), "reason: {reason}");
|
|
}
|
|
|
|
#[test]
|
|
fn blocks_delete_from() {
|
|
let store = sql_query_guard_store();
|
|
assert!(sql_policy_check(&store, "delete from t where 1=1").is_some());
|
|
}
|
|
|
|
#[test]
|
|
fn blocks_truncate() {
|
|
let store = sql_query_guard_store();
|
|
assert!(sql_policy_check(&store, "TRUNCATE workers").is_some());
|
|
}
|
|
|
|
#[test]
|
|
fn blocks_empty_sql() {
|
|
let store = sql_query_guard_store();
|
|
assert!(sql_policy_check(&store, "").is_some());
|
|
}
|
|
|
|
#[test]
|
|
fn allows_benign_select() {
|
|
let store = sql_query_guard_store();
|
|
assert!(sql_policy_check(&store, "SELECT count(*) FROM workers").is_none());
|
|
}
|
|
|
|
#[test]
|
|
fn allows_select_with_deleted_word_in_column() {
|
|
// Substring match is narrow ("delete from", not "delete"), so a
|
|
// column named `deleted_at` doesn't trip the guard. Important
|
|
// check — false positives on benign queries would make the gate
|
|
// unusable in practice.
|
|
let store = sql_query_guard_store();
|
|
assert!(
|
|
sql_policy_check(&store, "SELECT deleted_at FROM t").is_none(),
|
|
"column names containing 'delete' must not be rejected"
|
|
);
|
|
}
|
|
|
|
#[test]
|
|
fn case_insensitive_match_catches_mixed_case() {
|
|
let store = sql_query_guard_store();
|
|
assert!(sql_policy_check(&store, "Drop Table X").is_some());
|
|
}
|
|
}
|