P42-002: wire truth gate into queryd /sql + /paged SQL paths

The scrum master flagged crates/queryd/src/service.rs across iters 3-5 with the same finding: "raw SQL forwarded to DataFusion without schema or policy gate; violates PRD §42-002 truth enforcement." Confidence 79-95%, gradient tier auto/dry_run. Applier couldn't touch it — the fix is larger than 6 lines and crosses crate boundaries. Hand-fix lands the missing enforcement point: - truth: new RuleCondition::FieldContainsAny { field, needles } with case-insensitive substring matching. 4 new unit tests cover the positive, negative, missing-field, and empty-needles paths. - truth: sql_query_guard_store() helper returns a baseline store that rejects destructive verbs (DROP/TRUNCATE/DELETE FROM) and empty SQL. - queryd: QueryState grows an Arc<TruthStore>; default router() loads sql_query_guard_store; new router_with_truth(engine, store) lets tests inject a custom store. - queryd: sql_policy_check() runs truth.evaluate("sql_query", ctx) before hitting DataFusion. Reject/Block actions on matched conditions short-circuit to HTTP 403 with the rule's message. Both /sql and /paged gated. - queryd: 7 new tests cover block/allow/case-insensitive/false- positive scenarios. "SELECT deleted_at FROM t" must NOT be rejected (substring match is narrow: "delete from", not "delete"). Total: 28 truth tests green (was 24), 7 new queryd policy tests green. Workspace baseline warnings unchanged at 11. This is a signal-driven fix the mechanical pipeline couldn't produce but the scrum master kept asking for. Closes one of four LOOPING files. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-24 04:38:52 -05:00 · 2026-04-24 04:38:52 -05:00 · 9cc0ceb894
commit 9cc0ceb894
parent 5e8d87bf34
3 changed files with 232 additions and 0 deletions
--- a/crates/queryd/Cargo.toml
+++ b/crates/queryd/Cargo.toml
@ -7,6 +7,7 @@ edition = "2024"
 shared = { path = "../shared" }
 catalogd = { path = "../catalogd" }
 storaged = { path = "../storaged" }
+truth = { path = "../truth" }
 tokio = { workspace = true }
 axum = { workspace = true }
 serde = { workspace = true }
--- a/crates/queryd/src/service.rs
+++ b/crates/queryd/src/service.rs
@ -9,6 +9,9 @@ use axum::{
 };
 use serde::{Deserialize, Serialize};

+use std::sync::Arc;
+use truth::{RuleAction, TruthStore};
+
 use crate::context::QueryEngine;
 use crate::delta;
 use crate::paged::ResultStore;
@ -17,12 +20,26 @@ use crate::paged::ResultStore;
 pub struct QueryState {
    pub engine: QueryEngine,
    pub result_store: ResultStore,
+    // Policy gate for incoming SQL. Every /sql and /paged request is
+    // evaluated against this store before hitting DataFusion. Added for
+    // P42-002 ("raw SQL forwarded without schema or policy gate") after
+    // the scrum master's queryd/service.rs finding looped across iters
+    // 3-5 without ever being reachable by the 6-line auto-applier.
+    pub truth: Arc<TruthStore>,
 }

 pub fn router(engine: QueryEngine) -> Router {
+    router_with_truth(engine, Arc::new(truth::sql_query_guard_store()))
+}
+
+/// Test/integration hook: construct the router with a caller-supplied
+/// TruthStore so tests can assert reject/pass behavior deterministically
+/// without depending on the default needle list.
+pub fn router_with_truth(engine: QueryEngine, truth: Arc<TruthStore>) -> Router {
    let state = QueryState {
        engine: engine.clone(),
        result_store: ResultStore::new(100, 50), // 100 rows/page, keep 50 results
+        truth,
    };
    Router::new()
        .route("/health", get(health))
@ -71,12 +88,40 @@ fn batches_to_json(batches: &[RecordBatch]) -> Result<serde_json::Value, String>
    serde_json::from_slice(&buf).map_err(|e| format!("JSON parse error: {e}"))
 }

+/// Evaluate the request SQL against the configured TruthStore. Returns
+/// the Reject/Block message on the first failing mandatory rule so the
+/// handler can short-circuit. Returns None when all rules pass (or when
+/// the failures' declared action is non-mandatory like Redact/Pass).
+fn sql_policy_check(truth: &TruthStore, sql: &str) -> Option<String> {
+    let ctx = serde_json::json!({ "sql": sql });
+    for outcome in truth.evaluate("sql_query", &ctx) {
+        if !outcome.passed {
+            // FieldEmpty / FieldContainsAny etc. are enforced only when
+            // condition HOLDS (i.e. passed=true). Below means "passed=false",
+            // so the rule condition did not hold — no enforcement.
+            continue;
+        }
+        match &outcome.action {
+            RuleAction::Reject { message } | RuleAction::Block { message } => {
+                return Some(message.clone());
+            }
+            _ => {}
+        }
+    }
+    None
+}
+
 async fn execute_query(
    State(state): State<QueryState>,
    Json(req): Json<QueryRequest>,
 ) -> impl IntoResponse {
    tracing::info!("executing query: {}", req.sql);

+    if let Some(reason) = sql_policy_check(&state.truth, &req.sql) {
+        tracing::warn!("sql rejected by truth gate: {reason}");
+        return Err((StatusCode::FORBIDDEN, reason));
+    }
+
    match state.engine.query(&req.sql).await {
        Ok(batches) => {
            if batches.is_empty() {
@ -115,6 +160,10 @@ async fn paged_query(
    Json(req): Json<QueryRequest>,
 ) -> impl IntoResponse {
    tracing::info!("paged query: {}", req.sql);
+    if let Some(reason) = sql_policy_check(&state.truth, &req.sql) {
+        tracing::warn!("paged sql rejected by truth gate: {reason}");
+        return Err((StatusCode::FORBIDDEN, reason));
+    }
    match state.result_store.execute_and_store(&state.engine, &req.sql).await {
        Ok(handle) => Ok(Json(handle)),
        Err(e) => Err((StatusCode::BAD_REQUEST, e)),
@ -211,3 +260,65 @@ async fn compact_dataset(
        Err(e) => Err((StatusCode::INTERNAL_SERVER_ERROR, e)),
    }
 }
+
+#[cfg(test)]
+mod sql_policy_tests {
+    use super::*;
+    use truth::sql_query_guard_store;
+
+    // These tests exercise the policy gate without spinning up a DataFusion
+    // engine — they only need `TruthStore`. Purpose: prove the P42-002
+    // enforcement point actually rejects destructive SQL. This is the
+    // regression guard for the queryd/service.rs finding that looped
+    // across scrum iters 3-5.
+
+    #[test]
+    fn blocks_drop_table() {
+        let store = sql_query_guard_store();
+        let reason = sql_policy_check(&store, "DROP TABLE users").expect("must reject");
+        assert!(reason.contains("destructive"), "reason: {reason}");
+    }
+
+    #[test]
+    fn blocks_delete_from() {
+        let store = sql_query_guard_store();
+        assert!(sql_policy_check(&store, "delete from t where 1=1").is_some());
+    }
+
+    #[test]
+    fn blocks_truncate() {
+        let store = sql_query_guard_store();
+        assert!(sql_policy_check(&store, "TRUNCATE workers").is_some());
+    }
+
+    #[test]
+    fn blocks_empty_sql() {
+        let store = sql_query_guard_store();
+        assert!(sql_policy_check(&store, "").is_some());
+    }
+
+    #[test]
+    fn allows_benign_select() {
+        let store = sql_query_guard_store();
+        assert!(sql_policy_check(&store, "SELECT count(*) FROM workers").is_none());
+    }
+
+    #[test]
+    fn allows_select_with_deleted_word_in_column() {
+        // Substring match is narrow ("delete from", not "delete"), so a
+        // column named `deleted_at` doesn't trip the guard. Important
+        // check — false positives on benign queries would make the gate
+        // unusable in practice.
+        let store = sql_query_guard_store();
+        assert!(
+            sql_policy_check(&store, "SELECT deleted_at FROM t").is_none(),
+            "column names containing 'delete' must not be rejected"
+        );
+    }
+
+    #[test]
+    fn case_insensitive_match_catches_mixed_case() {
+        let store = sql_query_guard_store();
+        assert!(sql_policy_check(&store, "Drop Table X").is_some());
+    }
+}
--- a/crates/truth/src/lib.rs
+++ b/crates/truth/src/lib.rs
@ -18,6 +18,11 @@ pub enum RuleCondition {
    FieldMismatch { field: String, value: String },
    FieldEmpty { field: String },
    FieldGreater { field: String, threshold: i64 },
+    // Case-insensitive substring scan — true if the field value contains
+    // ANY of `needles`. Added for SQL/command guards where rules of the
+    // form "sql must not contain DROP/DELETE/TRUNCATE" need to express
+    // enforcement as a passing precondition being absent.
+    FieldContainsAny { field: String, needles: Vec<String> },
 }

 #[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
@ -124,6 +129,15 @@ fn evaluate_condition(cond: &RuleCondition, ctx: &serde_json::Value) -> bool {
                .map(|n| n > *threshold)
                .unwrap_or(false)
        }
+        RuleCondition::FieldContainsAny { field, needles } => {
+            match field_as_string(ctx, field) {
+                None => false,
+                Some(s) => {
+                    let haystack = s.to_ascii_lowercase();
+                    needles.iter().any(|n| haystack.contains(&n.to_ascii_lowercase()))
+                }
+            }
+        }
    }
 }

@ -147,6 +161,49 @@ fn field_as_string(ctx: &serde_json::Value, path: &str) -> Option<String> {
    })
 }

+/// Minimal SQL guard — rejects destructive verbs (DROP/TRUNCATE/DELETE).
+/// queryd/src/service.rs loads this into its `QueryState` and evaluates
+/// every `/sql` request against it before hitting the DataFusion engine.
+/// This is the P42-002 enforcement point flagged across scrum iters 3-5
+/// ("raw SQL forwarded without schema or policy gate").
+///
+/// Intentionally narrow: it's a safety net, not a full SQL parser. If
+/// callers need richer AST-aware enforcement they should extend this with
+/// structured rules rather than new needles.
+pub fn sql_query_guard_store() -> TruthStore {
+    let mut store = TruthStore::new();
+    store.add_rule(TruthRule {
+        id: "no-destructive-sql".to_string(),
+        task_class: "sql_query".to_string(),
+        description: "SQL must not contain destructive verbs".to_string(),
+        condition: RuleCondition::FieldContainsAny {
+            field: "sql".to_string(),
+            needles: vec![
+                "drop table".to_string(),
+                "drop schema".to_string(),
+                "drop database".to_string(),
+                "truncate".to_string(),
+                "delete from".to_string(),
+            ],
+        },
+        action: RuleAction::Reject {
+            message: "destructive SQL rejected by truth.sql_query_guard".to_string(),
+        },
+    });
+    store.add_rule(TruthRule {
+        id: "sql-not-empty".to_string(),
+        task_class: "sql_query".to_string(),
+        description: "SQL must not be empty".to_string(),
+        condition: RuleCondition::FieldEmpty {
+            field: "sql".to_string(),
+        },
+        action: RuleAction::Reject {
+            message: "empty SQL rejected".to_string(),
+        },
+    });
+    store
+}
+
 pub fn default_truth_store() -> TruthStore {
    let mut store = TruthStore::new();

@ -520,4 +577,67 @@ mod tests {
        let actions = s.check("t");
        assert_eq!(actions.len(), 3, "check returns one action per rule regardless of condition");
    }
+
+    fn sql_guard_store() -> TruthStore {
+        let mut s = TruthStore::new();
+        s.add_rule(TruthRule {
+            id: "no-destructive".into(),
+            task_class: "sql_query".into(),
+            description: "SQL must not contain destructive verbs".into(),
+            condition: RuleCondition::FieldContainsAny {
+                field: "sql".into(),
+                needles: vec![
+                    "drop table".into(),
+                    "drop schema".into(),
+                    "truncate".into(),
+                    "delete from".into(),
+                ],
+            },
+            action: RuleAction::Reject {
+                message: "destructive SQL rejected".into(),
+            },
+        });
+        s
+    }
+
+    #[test]
+    fn field_contains_any_matches_case_insensitively() {
+        let s = sql_guard_store();
+        let ctx = serde_json::json!({"sql": "SELECT * FROM t; DROP TABLE users;"});
+        let o = s.evaluate("sql_query", &ctx);
+        assert!(o[0].passed, "condition holds when needle present (case-insensitive)");
+    }
+
+    #[test]
+    fn field_contains_any_is_false_when_no_needle_matches() {
+        let s = sql_guard_store();
+        let ctx = serde_json::json!({"sql": "SELECT count(*) FROM workers"});
+        let o = s.evaluate("sql_query", &ctx);
+        assert!(!o[0].passed, "benign SELECT should not match destructive needles");
+    }
+
+    #[test]
+    fn field_contains_any_false_when_field_missing() {
+        let s = sql_guard_store();
+        let ctx = serde_json::json!({});
+        let o = s.evaluate("sql_query", &ctx);
+        assert!(!o[0].passed, "missing field → condition cannot hold");
+    }
+
+    #[test]
+    fn field_contains_any_empty_needles_list_never_matches() {
+        let mut s = TruthStore::new();
+        s.add_rule(TruthRule {
+            id: "empty".into(),
+            task_class: "x".into(),
+            description: "".into(),
+            condition: RuleCondition::FieldContainsAny {
+                field: "sql".into(),
+                needles: vec![],
+            },
+            action: RuleAction::Pass,
+        });
+        let o = s.evaluate("x", &serde_json::json!({"sql": "anything"}));
+        assert!(!o[0].passed, "no needles → any::<bool> is false");
+    }
 }