lakehouse/crates/truth/src/lib.rs

use serde::{Deserialize, Serialize};
use std::collections::HashMap;

pub mod staffing;
pub mod devops;
pub mod loader;

#[derive(Clone, Debug, Serialize, Deserialize)]
pub struct TruthRule {
    pub id: String,
    pub task_class: String,
    pub description: String,
    pub condition: RuleCondition,
    pub action: RuleAction,
}

#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
#[serde(tag = "type")]
pub enum RuleCondition {
    Always,
    FieldEquals { field: String, value: String },
    FieldMismatch { field: String, value: String },
    FieldEmpty { field: String },
    FieldGreater { field: String, threshold: i64 },
    // Case-insensitive substring scan — true if the field value contains
    // ANY of `needles`. Added for SQL/command guards where rules of the
    // form "sql must not contain DROP/DELETE/TRUNCATE" need to express
    // enforcement as a passing precondition being absent.
    FieldContainsAny { field: String, needles: Vec<String> },
}

#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
#[serde(tag = "type")]
pub enum RuleAction {
    Pass,
    Reject { message: String },
    Redact { fields: Vec<String> },
    Block { message: String },
}

#[derive(Default)]
pub struct TruthStore {
    rules: HashMap<String, Vec<TruthRule>>,
}

impl TruthStore {
    pub fn new() -> Self {
        Self::default()
    }

    pub fn add_rule(&mut self, rule: TruthRule) {
        self.rules
            .entry(rule.task_class.clone())
            .or_default()
            .push(rule);
    }

    /// All rule IDs across every task class. Used by the file loader
    /// to detect duplicate-ID collisions before registering new rules.
    pub fn all_rule_ids(&self) -> std::collections::HashSet<String> {
        self.rules
            .values()
            .flat_map(|v| v.iter().map(|r| r.id.clone()))
            .collect()
    }

    pub fn get_rules(&self, task_class: &str) -> Vec<&TruthRule> {
        self.rules
            .get(task_class)
            .map(|v| v.iter().collect())
            .unwrap_or_default()
    }

    /// Legacy API: returns the list of actions registered for a task class
    /// without evaluating conditions. Retained for backward compatibility
    /// with callers that only want the action catalog. New callers should
    /// prefer `evaluate()`, which actually walks `RuleCondition` against
    /// a context and reports per-rule pass/fail.
    pub fn check(&self, task_class: &str) -> Vec<RuleAction> {
        let rules = self.get_rules(task_class);
        rules
            .into_iter()
            .map(|r| r.action.clone())
            .collect()
    }

    /// Evaluate every rule registered for `task_class` against `ctx`,
    /// returning one `RuleOutcome` per rule. `passed = true` means the
    /// rule's `condition` held; the rule's action is still attached so
    /// callers can distinguish "passed and therefore no-op" (RuleAction::Pass)
    /// from "passed and apply Redact". `passed = false` means the condition
    /// failed — callers should treat the attached action as the enforcement
    /// response (Reject/Block).
    ///
    /// Fixed P42-001 (2026-04-23): previously `check()` returned all actions
    /// unconditionally — the `RuleCondition` field was ignored. Now every
    /// rule is actually walked against the provided context.
    pub fn evaluate(&self, task_class: &str, ctx: &serde_json::Value) -> Vec<RuleOutcome> {
        self.get_rules(task_class)
            .into_iter()
            .map(|r| RuleOutcome {
                rule_id: r.id.clone(),
                passed: evaluate_condition(&r.condition, ctx),
                action: r.action.clone(),
            })
            .collect()
    }
}

/// Result of evaluating one rule against a context. `passed` reports
/// whether the condition held; `action` is the rule's declared action
/// regardless (callers decide how to apply it based on `passed`).
#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
pub struct RuleOutcome {
    pub rule_id: String,
    pub passed: bool,
    pub action: RuleAction,
}

fn evaluate_condition(cond: &RuleCondition, ctx: &serde_json::Value) -> bool {
    match cond {
        RuleCondition::Always => true,
        RuleCondition::FieldEquals { field, value } => {
            field_as_string(ctx, field)
                .map(|s| s == *value)
                .unwrap_or(false)
        }
        RuleCondition::FieldMismatch { field, value } => {
            field_as_string(ctx, field)
                .map(|s| s != *value)
                .unwrap_or(false)
        }
        RuleCondition::FieldEmpty { field } => {
            match lookup(ctx, field) {
                None => true,
                Some(v) => v.is_null() || v.as_str().map(|s| s.is_empty()).unwrap_or(false),
            }
        }
        RuleCondition::FieldGreater { field, threshold } => {
            lookup(ctx, field)
                .and_then(|v| v.as_i64().or_else(|| v.as_f64().map(|f| f as i64)))
                .map(|n| n > *threshold)
                .unwrap_or(false)
        }
        RuleCondition::FieldContainsAny { field, needles } => {
            match field_as_string(ctx, field) {
                None => false,
                Some(s) => {
                    let haystack = s.to_ascii_lowercase();
                    needles.iter().any(|n| haystack.contains(&n.to_ascii_lowercase()))
                }
            }
        }
    }
}

/// Walk a dot-separated path through a serde_json::Value. `"worker.status"`
/// → `ctx["worker"]["status"]`. Returns None if any segment is missing or
/// a non-object is encountered mid-path.
fn lookup<'a>(ctx: &'a serde_json::Value, path: &str) -> Option<&'a serde_json::Value> {
    let mut cur = ctx;
    for seg in path.split('.') {
        cur = cur.get(seg)?;
    }
    Some(cur)
}

fn field_as_string(ctx: &serde_json::Value, path: &str) -> Option<String> {
    lookup(ctx, path).and_then(|v| match v {
        serde_json::Value::String(s) => Some(s.clone()),
        serde_json::Value::Bool(b) => Some(b.to_string()),
        serde_json::Value::Number(n) => Some(n.to_string()),
        _ => None,
    })
}

/// Minimal SQL guard — rejects destructive verbs (DROP/TRUNCATE/DELETE).
/// queryd/src/service.rs loads this into its `QueryState` and evaluates
/// every `/sql` request against it before hitting the DataFusion engine.
/// This is the P42-002 enforcement point flagged across scrum iters 3-5
/// ("raw SQL forwarded without schema or policy gate").
///
/// Intentionally narrow: it's a safety net, not a full SQL parser. If
/// callers need richer AST-aware enforcement they should extend this with
/// structured rules rather than new needles.
pub fn sql_query_guard_store() -> TruthStore {
    let mut store = TruthStore::new();
    store.add_rule(TruthRule {
        id: "no-destructive-sql".to_string(),
        task_class: "sql_query".to_string(),
        description: "SQL must not contain destructive verbs".to_string(),
        condition: RuleCondition::FieldContainsAny {
            field: "sql".to_string(),
            needles: vec![
                "drop table".to_string(),
                "drop schema".to_string(),
                "drop database".to_string(),
                "truncate".to_string(),
                "delete from".to_string(),
            ],
        },
        action: RuleAction::Reject {
            message: "destructive SQL rejected by truth.sql_query_guard".to_string(),
        },
    });
    store.add_rule(TruthRule {
        id: "sql-not-empty".to_string(),
        task_class: "sql_query".to_string(),
        description: "SQL must not be empty".to_string(),
        condition: RuleCondition::FieldEmpty {
            field: "sql".to_string(),
        },
        action: RuleAction::Reject {
            message: "empty SQL rejected".to_string(),
        },
    });
    store
}

/// Phase 42 default store: staffing rules + DevOps scaffold composed
/// onto an empty TruthStore. Per the PRD: "Staffing rules ship first;
/// Terraform/Ansible rule shapes are scaffolded but unpopulated until
/// the long-horizon phase." The composition order is irrelevant here
/// (DevOps is empty) but preserved so the shape matches the PRD's
/// expected "compose on top" pattern.
///
/// Moved out of inline in-function rule registration (2026-04-24) to
/// land the Phase 42 module split the PRD called for: `staffing.rs` +
/// `devops.rs` each owns their task-class rule sets. Behavior unchanged
/// for existing callers.
pub fn default_truth_store() -> TruthStore {
    devops::devops_rules(staffing::staffing_rules(TruthStore::new()))
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn truth_store_new_is_empty() {
        let store = TruthStore::new();
        assert!(store.rules.is_empty());
    }

    #[test]
    fn add_rule_inserts_into_correct_task_class() {
        let mut store = TruthStore::new();
        store.add_rule(TruthRule {
            id: "test-rule".to_string(),
            task_class: "test.task".to_string(),
            description: "Test rule".to_string(),
            condition: RuleCondition::Always,
            action: RuleAction::Pass,
        });
        let rules = store.get_rules("test.task");
        assert_eq!(rules.len(), 1);
        assert_eq!(rules[0].id, "test-rule");
    }

    #[test]
    fn get_rules_returns_empty_for_unknown_class() {
        let store = TruthStore::new();
        let rules = store.get_rules("unknown.class");
        assert!(rules.is_empty());
    }

    #[test]
    fn check_returns_actions_for_task_class() {
        let mut store = TruthStore::new();
        store.add_rule(TruthRule {
            id: "a1".to_string(),
            task_class: "test".to_string(),
            description: "A1".to_string(),
            condition: RuleCondition::Always,
            action: RuleAction::Pass,
        });
        store.add_rule(TruthRule {
            id: "a2".to_string(),
            task_class: "test".to_string(),
            description: "A2".to_string(),
            condition: RuleCondition::Always,
            action: RuleAction::Reject {
                message: "test reject".to_string(),
            },
        });
        let actions = store.check("test");
        assert_eq!(actions.len(), 2);
    }

    #[test]
    fn rule_condition_serialize_always() {
        let cond = RuleCondition::Always;
        let json = serde_json::to_string(&cond).unwrap();
        assert!(json.contains(r#""type":"Always"#));
    }

    #[test]
    fn rule_condition_serialize_field_equals() {
        let cond = RuleCondition::FieldEquals {
            field: "foo".to_string(),
            value: "bar".to_string(),
        };
        let json = serde_json::to_string(&cond).unwrap();
        assert!(json.contains(r#""type":"FieldEquals""#));
        assert!(json.contains(r#""field":"foo""#));
        assert!(json.contains(r#""value":"bar""#));
    }

    #[test]
    fn rule_action_serialize_redact() {
        let action = RuleAction::Redact {
            fields: vec!["ssn".to_string()],
        };
        let json = serde_json::to_string(&action).unwrap();
        assert!(json.contains(r#""type":"Redact""#));
        assert!(json.contains("ssn"));
    }

    #[test]
    fn rule_action_serialize_reject() {
        let action = RuleAction::Reject {
            message: "test".to_string(),
        };
        let json = serde_json::to_string(&action).unwrap();
        assert!(json.contains(r#""type":"Reject""#));
    }

    #[test]
    fn default_truth_store_has_staffing_rules() {
        let store = default_truth_store();
        let fill_rules = store.get_rules("staffing.fill");
        assert!(!fill_rules.is_empty());
        let any_rules = store.get_rules("staffing.any");
        assert!(!any_rules.is_empty());
    }

    #[test]
    fn multiple_rules_same_task_class() {
        let mut store = TruthStore::new();
        for i in 0..5 {
            store.add_rule(TruthRule {
                id: format!("rule-{}", i),
                task_class: "test".to_string(),
                description: format!("Rule {}", i),
                condition: RuleCondition::Always,
                action: RuleAction::Pass,
            });
        }
        let rules = store.get_rules("test");
        assert_eq!(rules.len(), 5);
    }

    #[test]
    fn truth_rule_clone_preserves_data() {
        let rule = TruthRule {
            id: "clone-test".to_string(),
            task_class: "clone.task".to_string(),
            description: "Clone test".to_string(),
            condition: RuleCondition::FieldEquals {
                field: "x".to_string(),
                value: "y".to_string(),
            },
            action: RuleAction::Block {
                message: "blocked".to_string(),
            },
        };
        let cloned = rule.clone();
        assert_eq!(cloned.id, rule.id);
        assert_eq!(cloned.condition, rule.condition);
        assert_eq!(cloned.action, rule.action);
    }

    #[test]
    fn field_greater_condition_parse() {
        let json = r#"{"type":"FieldGreater","field":"count","threshold":10}"#;
        let cond: RuleCondition = serde_json::from_str(json).unwrap();
        match cond {
            RuleCondition::FieldGreater { field, threshold } => {
                assert_eq!(field, "count");
                assert_eq!(threshold, 10);
            }
            _ => panic!("Expected FieldGreater"),
        }
    }

    #[test]
    fn block_action_blocks_with_message() {
        let action = RuleAction::Block {
            message: "Rate limited".to_string(),
        };
        let json = serde_json::to_string(&action).unwrap();
        assert!(json.contains("Rate limited"));
    }

    #[test]
    fn empty_store_check_returns_empty() {
        let store = TruthStore::new();
        let actions = store.check("empty.class");
        assert!(actions.is_empty());
    }

    // ── P42-001 evaluate() tests — actually walk RuleCondition ──

    fn fill_store() -> TruthStore {
        let mut s = TruthStore::new();
        s.add_rule(TruthRule {
            id: "active".into(),
            task_class: "t".into(),
            description: "must be active".into(),
            condition: RuleCondition::FieldEquals {
                field: "worker.status".into(),
                value: "active".into(),
            },
            action: RuleAction::Reject {
                message: "worker not active".into(),
            },
        });
        s.add_rule(TruthRule {
            id: "deadline".into(),
            task_class: "t".into(),
            description: "deadline required".into(),
            condition: RuleCondition::FieldEmpty {
                field: "contract.deadline".into(),
            },
            action: RuleAction::Reject {
                message: "missing deadline".into(),
            },
        });
        s.add_rule(TruthRule {
            id: "budget".into(),
            task_class: "t".into(),
            description: "budget positive".into(),
            condition: RuleCondition::FieldGreater {
                field: "contract.budget".into(),
                threshold: 0,
            },
            action: RuleAction::Block {
                message: "budget must be positive".into(),
            },
        });
        s
    }

    #[test]
    fn evaluate_field_equals_pass_on_match() {
        let s = fill_store();
        let ctx = serde_json::json!({"worker": {"status": "active"}});
        let o = s.evaluate("t", &ctx);
        let active = o.iter().find(|r| r.rule_id == "active").unwrap();
        assert!(active.passed, "active condition should hold");
    }

    #[test]
    fn evaluate_field_equals_fail_on_mismatch() {
        let s = fill_store();
        let ctx = serde_json::json!({"worker": {"status": "terminated"}});
        let o = s.evaluate("t", &ctx);
        let active = o.iter().find(|r| r.rule_id == "active").unwrap();
        assert!(!active.passed, "terminated should fail active condition");
    }

    #[test]
    fn evaluate_field_equals_fail_on_missing() {
        let s = fill_store();
        let ctx = serde_json::json!({});
        let o = s.evaluate("t", &ctx);
        let active = o.iter().find(|r| r.rule_id == "active").unwrap();
        assert!(!active.passed, "missing worker.status should fail");
    }

    #[test]
    fn evaluate_field_empty_pass_when_absent() {
        let s = fill_store();
        // FieldEmpty passes when the field is missing/null/empty string.
        // Deadline rule says "field empty means action fires" — so passed=true
        // here means the rule's condition held (deadline IS empty).
        let ctx = serde_json::json!({});
        let o = s.evaluate("t", &ctx);
        let deadline = o.iter().find(|r| r.rule_id == "deadline").unwrap();
        assert!(deadline.passed);
    }

    #[test]
    fn evaluate_field_empty_fail_when_present() {
        let s = fill_store();
        let ctx = serde_json::json!({"contract": {"deadline": "2026-05-01"}});
        let o = s.evaluate("t", &ctx);
        let deadline = o.iter().find(|r| r.rule_id == "deadline").unwrap();
        assert!(!deadline.passed, "non-empty deadline should fail FieldEmpty check");
    }

    #[test]
    fn evaluate_field_greater_pass_and_fail() {
        let s = fill_store();
        let ctx_ok = serde_json::json!({"contract": {"budget": 100}});
        let ctx_bad = serde_json::json!({"contract": {"budget": 0}});
        let ok = s.evaluate("t", &ctx_ok);
        let bad = s.evaluate("t", &ctx_bad);
        assert!(ok.iter().find(|r| r.rule_id == "budget").unwrap().passed);
        assert!(!bad.iter().find(|r| r.rule_id == "budget").unwrap().passed);
    }

    #[test]
    fn evaluate_always_condition_passes_unconditionally() {
        let mut s = TruthStore::new();
        s.add_rule(TruthRule {
            id: "always".into(),
            task_class: "x".into(),
            description: "".into(),
            condition: RuleCondition::Always,
            action: RuleAction::Pass,
        });
        let o = s.evaluate("x", &serde_json::json!(null));
        assert!(o[0].passed);
    }

    #[test]
    fn evaluate_preserves_action_regardless_of_outcome() {
        let s = fill_store();
        let ctx = serde_json::json!({"worker": {"status": "active"}});
        let o = s.evaluate("t", &ctx);
        let active = o.iter().find(|r| r.rule_id == "active").unwrap();
        // Action is attached whether the rule passed or not — the consumer
        // decides how to use it.
        assert_eq!(
            active.action,
            RuleAction::Reject {
                message: "worker not active".into()
            }
        );
    }

    #[test]
    fn evaluate_on_unknown_task_class_returns_empty() {
        let s = fill_store();
        let o = s.evaluate("nonexistent", &serde_json::json!({}));
        assert!(o.is_empty());
    }

    #[test]
    fn check_still_returns_actions_unconditionally_for_back_compat() {
        // Legacy API should still behave the same — no condition walking.
        let s = fill_store();
        let actions = s.check("t");
        assert_eq!(actions.len(), 3, "check returns one action per rule regardless of condition");
    }

    fn sql_guard_store() -> TruthStore {
        let mut s = TruthStore::new();
        s.add_rule(TruthRule {
            id: "no-destructive".into(),
            task_class: "sql_query".into(),
            description: "SQL must not contain destructive verbs".into(),
            condition: RuleCondition::FieldContainsAny {
                field: "sql".into(),
                needles: vec![
                    "drop table".into(),
                    "drop schema".into(),
                    "truncate".into(),
                    "delete from".into(),
                ],
            },
            action: RuleAction::Reject {
                message: "destructive SQL rejected".into(),
            },
        });
        s
    }

    #[test]
    fn field_contains_any_matches_case_insensitively() {
        let s = sql_guard_store();
        let ctx = serde_json::json!({"sql": "SELECT * FROM t; DROP TABLE users;"});
        let o = s.evaluate("sql_query", &ctx);
        assert!(o[0].passed, "condition holds when needle present (case-insensitive)");
    }

    #[test]
    fn field_contains_any_is_false_when_no_needle_matches() {
        let s = sql_guard_store();
        let ctx = serde_json::json!({"sql": "SELECT count(*) FROM workers"});
        let o = s.evaluate("sql_query", &ctx);
        assert!(!o[0].passed, "benign SELECT should not match destructive needles");
    }

    #[test]
    fn field_contains_any_false_when_field_missing() {
        let s = sql_guard_store();
        let ctx = serde_json::json!({});
        let o = s.evaluate("sql_query", &ctx);
        assert!(!o[0].passed, "missing field → condition cannot hold");
    }

    #[test]
    fn field_contains_any_empty_needles_list_never_matches() {
        let mut s = TruthStore::new();
        s.add_rule(TruthRule {
            id: "empty".into(),
            task_class: "x".into(),
            description: "".into(),
            condition: RuleCondition::FieldContainsAny {
                field: "sql".into(),
                needles: vec![],
            },
            action: RuleAction::Pass,
        });
        let o = s.evaluate("x", &serde_json::json!({"sql": "anything"}));
        assert!(!o[0].passed, "no needles → any::<bool> is false");
    }
}