root de8fb10f52
Some checks failed
lakehouse/auditor 4 blocking issues: todo!() macro call in tests/real-world/scrum_master_pipeline.ts
phase-42: truth/ repo-root dir + TOML rule loader
Phase 42 PRD (docs/CONTROL_PLANE_PRD.md:144): "truth/ dir at repo
root — rule files, versioned in git." Didn't exist. Landing both the
dir + its loader.

New files:

  truth/
    README.md                — documents file format, rule shape,
                               composition model (file rules are
                               additive on top of in-code default_
                               truth_store), explicit non-goals
                               (no hot reload, no inheritance)
    staffing.fill.toml       — 2 staffing.fill rules:
                               endorsed-count-matches-target,
                               city-required (both Reject via
                               FieldEmpty)
    staffing.any.toml        — 1 staffing.any rule:
                               no-destructive-sql-in-context via
                               FieldContainsAny (parallel to the
                               queryd SQL gate we already ship)

  crates/truth/src/loader.rs — load_from_dir(store, dir)
                             — 5 tests: happy path, duplicate-ID
                               rejection within files, duplicate-ID
                               rejection against in-code rules,
                               non-toml files skipped, missing-dir
                               error. Alphabetical file order for
                               reproducible error messages.

  crates/truth/src/lib.rs    — new pub fn all_rule_ids() helper on
                               TruthStore so the loader can detect
                               collisions without breaching the
                               private `rules` field.

  crates/truth/Cargo.toml    — adds `toml` workspace dep.

Composition model: file rules are ADDITIVE on top of what
default_truth_store() registers in code. Operators can tune
thresholds/needles/descriptions at the file layer without a code
deploy. Schema changes (new RuleCondition variants) still need a
code bump.

Integration hook (not in this commit, flagged for follow-up):
main.rs should call loader::load_from_dir(&mut store, "truth/")
after default_truth_store() so file-backed rules take effect on
gateway boot. Deliberately separate: this commit lands the
machinery; wiring it on happens when the team is ready to own
the rule file lifecycle.

Total: 37 truth tests green (was 32). Workspace warnings still 0.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-24 13:44:23 -05:00

610 lines
21 KiB
Rust

use serde::{Deserialize, Serialize};
use std::collections::HashMap;
pub mod staffing;
pub mod devops;
pub mod loader;
#[derive(Clone, Debug, Serialize, Deserialize)]
pub struct TruthRule {
pub id: String,
pub task_class: String,
pub description: String,
pub condition: RuleCondition,
pub action: RuleAction,
}
#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
#[serde(tag = "type")]
pub enum RuleCondition {
Always,
FieldEquals { field: String, value: String },
FieldMismatch { field: String, value: String },
FieldEmpty { field: String },
FieldGreater { field: String, threshold: i64 },
// Case-insensitive substring scan — true if the field value contains
// ANY of `needles`. Added for SQL/command guards where rules of the
// form "sql must not contain DROP/DELETE/TRUNCATE" need to express
// enforcement as a passing precondition being absent.
FieldContainsAny { field: String, needles: Vec<String> },
}
#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
#[serde(tag = "type")]
pub enum RuleAction {
Pass,
Reject { message: String },
Redact { fields: Vec<String> },
Block { message: String },
}
#[derive(Default)]
pub struct TruthStore {
rules: HashMap<String, Vec<TruthRule>>,
}
impl TruthStore {
pub fn new() -> Self {
Self::default()
}
pub fn add_rule(&mut self, rule: TruthRule) {
self.rules
.entry(rule.task_class.clone())
.or_default()
.push(rule);
}
/// All rule IDs across every task class. Used by the file loader
/// to detect duplicate-ID collisions before registering new rules.
pub fn all_rule_ids(&self) -> std::collections::HashSet<String> {
self.rules
.values()
.flat_map(|v| v.iter().map(|r| r.id.clone()))
.collect()
}
pub fn get_rules(&self, task_class: &str) -> Vec<&TruthRule> {
self.rules
.get(task_class)
.map(|v| v.iter().collect())
.unwrap_or_default()
}
/// Legacy API: returns the list of actions registered for a task class
/// without evaluating conditions. Retained for backward compatibility
/// with callers that only want the action catalog. New callers should
/// prefer `evaluate()`, which actually walks `RuleCondition` against
/// a context and reports per-rule pass/fail.
pub fn check(&self, task_class: &str) -> Vec<RuleAction> {
let rules = self.get_rules(task_class);
rules
.into_iter()
.map(|r| r.action.clone())
.collect()
}
/// Evaluate every rule registered for `task_class` against `ctx`,
/// returning one `RuleOutcome` per rule. `passed = true` means the
/// rule's `condition` held; the rule's action is still attached so
/// callers can distinguish "passed and therefore no-op" (RuleAction::Pass)
/// from "passed and apply Redact". `passed = false` means the condition
/// failed — callers should treat the attached action as the enforcement
/// response (Reject/Block).
///
/// Fixed P42-001 (2026-04-23): previously `check()` returned all actions
/// unconditionally — the `RuleCondition` field was ignored. Now every
/// rule is actually walked against the provided context.
pub fn evaluate(&self, task_class: &str, ctx: &serde_json::Value) -> Vec<RuleOutcome> {
self.get_rules(task_class)
.into_iter()
.map(|r| RuleOutcome {
rule_id: r.id.clone(),
passed: evaluate_condition(&r.condition, ctx),
action: r.action.clone(),
})
.collect()
}
}
/// Result of evaluating one rule against a context. `passed` reports
/// whether the condition held; `action` is the rule's declared action
/// regardless (callers decide how to apply it based on `passed`).
#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
pub struct RuleOutcome {
pub rule_id: String,
pub passed: bool,
pub action: RuleAction,
}
fn evaluate_condition(cond: &RuleCondition, ctx: &serde_json::Value) -> bool {
match cond {
RuleCondition::Always => true,
RuleCondition::FieldEquals { field, value } => {
field_as_string(ctx, field)
.map(|s| s == *value)
.unwrap_or(false)
}
RuleCondition::FieldMismatch { field, value } => {
field_as_string(ctx, field)
.map(|s| s != *value)
.unwrap_or(false)
}
RuleCondition::FieldEmpty { field } => {
match lookup(ctx, field) {
None => true,
Some(v) => v.is_null() || v.as_str().map(|s| s.is_empty()).unwrap_or(false),
}
}
RuleCondition::FieldGreater { field, threshold } => {
lookup(ctx, field)
.and_then(|v| v.as_i64().or_else(|| v.as_f64().map(|f| f as i64)))
.map(|n| n > *threshold)
.unwrap_or(false)
}
RuleCondition::FieldContainsAny { field, needles } => {
match field_as_string(ctx, field) {
None => false,
Some(s) => {
let haystack = s.to_ascii_lowercase();
needles.iter().any(|n| haystack.contains(&n.to_ascii_lowercase()))
}
}
}
}
}
/// Walk a dot-separated path through a serde_json::Value. `"worker.status"`
/// → `ctx["worker"]["status"]`. Returns None if any segment is missing or
/// a non-object is encountered mid-path.
fn lookup<'a>(ctx: &'a serde_json::Value, path: &str) -> Option<&'a serde_json::Value> {
let mut cur = ctx;
for seg in path.split('.') {
cur = cur.get(seg)?;
}
Some(cur)
}
fn field_as_string(ctx: &serde_json::Value, path: &str) -> Option<String> {
lookup(ctx, path).and_then(|v| match v {
serde_json::Value::String(s) => Some(s.clone()),
serde_json::Value::Bool(b) => Some(b.to_string()),
serde_json::Value::Number(n) => Some(n.to_string()),
_ => None,
})
}
/// Minimal SQL guard — rejects destructive verbs (DROP/TRUNCATE/DELETE).
/// queryd/src/service.rs loads this into its `QueryState` and evaluates
/// every `/sql` request against it before hitting the DataFusion engine.
/// This is the P42-002 enforcement point flagged across scrum iters 3-5
/// ("raw SQL forwarded without schema or policy gate").
///
/// Intentionally narrow: it's a safety net, not a full SQL parser. If
/// callers need richer AST-aware enforcement they should extend this with
/// structured rules rather than new needles.
pub fn sql_query_guard_store() -> TruthStore {
let mut store = TruthStore::new();
store.add_rule(TruthRule {
id: "no-destructive-sql".to_string(),
task_class: "sql_query".to_string(),
description: "SQL must not contain destructive verbs".to_string(),
condition: RuleCondition::FieldContainsAny {
field: "sql".to_string(),
needles: vec![
"drop table".to_string(),
"drop schema".to_string(),
"drop database".to_string(),
"truncate".to_string(),
"delete from".to_string(),
],
},
action: RuleAction::Reject {
message: "destructive SQL rejected by truth.sql_query_guard".to_string(),
},
});
store.add_rule(TruthRule {
id: "sql-not-empty".to_string(),
task_class: "sql_query".to_string(),
description: "SQL must not be empty".to_string(),
condition: RuleCondition::FieldEmpty {
field: "sql".to_string(),
},
action: RuleAction::Reject {
message: "empty SQL rejected".to_string(),
},
});
store
}
/// Phase 42 default store: staffing rules + DevOps scaffold composed
/// onto an empty TruthStore. Per the PRD: "Staffing rules ship first;
/// Terraform/Ansible rule shapes are scaffolded but unpopulated until
/// the long-horizon phase." The composition order is irrelevant here
/// (DevOps is empty) but preserved so the shape matches the PRD's
/// expected "compose on top" pattern.
///
/// Moved out of inline in-function rule registration (2026-04-24) to
/// land the Phase 42 module split the PRD called for: `staffing.rs` +
/// `devops.rs` each owns their task-class rule sets. Behavior unchanged
/// for existing callers.
pub fn default_truth_store() -> TruthStore {
devops::devops_rules(staffing::staffing_rules(TruthStore::new()))
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn truth_store_new_is_empty() {
let store = TruthStore::new();
assert!(store.rules.is_empty());
}
#[test]
fn add_rule_inserts_into_correct_task_class() {
let mut store = TruthStore::new();
store.add_rule(TruthRule {
id: "test-rule".to_string(),
task_class: "test.task".to_string(),
description: "Test rule".to_string(),
condition: RuleCondition::Always,
action: RuleAction::Pass,
});
let rules = store.get_rules("test.task");
assert_eq!(rules.len(), 1);
assert_eq!(rules[0].id, "test-rule");
}
#[test]
fn get_rules_returns_empty_for_unknown_class() {
let store = TruthStore::new();
let rules = store.get_rules("unknown.class");
assert!(rules.is_empty());
}
#[test]
fn check_returns_actions_for_task_class() {
let mut store = TruthStore::new();
store.add_rule(TruthRule {
id: "a1".to_string(),
task_class: "test".to_string(),
description: "A1".to_string(),
condition: RuleCondition::Always,
action: RuleAction::Pass,
});
store.add_rule(TruthRule {
id: "a2".to_string(),
task_class: "test".to_string(),
description: "A2".to_string(),
condition: RuleCondition::Always,
action: RuleAction::Reject {
message: "test reject".to_string(),
},
});
let actions = store.check("test");
assert_eq!(actions.len(), 2);
}
#[test]
fn rule_condition_serialize_always() {
let cond = RuleCondition::Always;
let json = serde_json::to_string(&cond).unwrap();
assert!(json.contains(r#""type":"Always"#));
}
#[test]
fn rule_condition_serialize_field_equals() {
let cond = RuleCondition::FieldEquals {
field: "foo".to_string(),
value: "bar".to_string(),
};
let json = serde_json::to_string(&cond).unwrap();
assert!(json.contains(r#""type":"FieldEquals""#));
assert!(json.contains(r#""field":"foo""#));
assert!(json.contains(r#""value":"bar""#));
}
#[test]
fn rule_action_serialize_redact() {
let action = RuleAction::Redact {
fields: vec!["ssn".to_string()],
};
let json = serde_json::to_string(&action).unwrap();
assert!(json.contains(r#""type":"Redact""#));
assert!(json.contains("ssn"));
}
#[test]
fn rule_action_serialize_reject() {
let action = RuleAction::Reject {
message: "test".to_string(),
};
let json = serde_json::to_string(&action).unwrap();
assert!(json.contains(r#""type":"Reject""#));
}
#[test]
fn default_truth_store_has_staffing_rules() {
let store = default_truth_store();
let fill_rules = store.get_rules("staffing.fill");
assert!(!fill_rules.is_empty());
let any_rules = store.get_rules("staffing.any");
assert!(!any_rules.is_empty());
}
#[test]
fn multiple_rules_same_task_class() {
let mut store = TruthStore::new();
for i in 0..5 {
store.add_rule(TruthRule {
id: format!("rule-{}", i),
task_class: "test".to_string(),
description: format!("Rule {}", i),
condition: RuleCondition::Always,
action: RuleAction::Pass,
});
}
let rules = store.get_rules("test");
assert_eq!(rules.len(), 5);
}
#[test]
fn truth_rule_clone_preserves_data() {
let rule = TruthRule {
id: "clone-test".to_string(),
task_class: "clone.task".to_string(),
description: "Clone test".to_string(),
condition: RuleCondition::FieldEquals {
field: "x".to_string(),
value: "y".to_string(),
},
action: RuleAction::Block {
message: "blocked".to_string(),
},
};
let cloned = rule.clone();
assert_eq!(cloned.id, rule.id);
assert_eq!(cloned.condition, rule.condition);
assert_eq!(cloned.action, rule.action);
}
#[test]
fn field_greater_condition_parse() {
let json = r#"{"type":"FieldGreater","field":"count","threshold":10}"#;
let cond: RuleCondition = serde_json::from_str(json).unwrap();
match cond {
RuleCondition::FieldGreater { field, threshold } => {
assert_eq!(field, "count");
assert_eq!(threshold, 10);
}
_ => panic!("Expected FieldGreater"),
}
}
#[test]
fn block_action_blocks_with_message() {
let action = RuleAction::Block {
message: "Rate limited".to_string(),
};
let json = serde_json::to_string(&action).unwrap();
assert!(json.contains("Rate limited"));
}
#[test]
fn empty_store_check_returns_empty() {
let store = TruthStore::new();
let actions = store.check("empty.class");
assert!(actions.is_empty());
}
// ── P42-001 evaluate() tests — actually walk RuleCondition ──
fn fill_store() -> TruthStore {
let mut s = TruthStore::new();
s.add_rule(TruthRule {
id: "active".into(),
task_class: "t".into(),
description: "must be active".into(),
condition: RuleCondition::FieldEquals {
field: "worker.status".into(),
value: "active".into(),
},
action: RuleAction::Reject {
message: "worker not active".into(),
},
});
s.add_rule(TruthRule {
id: "deadline".into(),
task_class: "t".into(),
description: "deadline required".into(),
condition: RuleCondition::FieldEmpty {
field: "contract.deadline".into(),
},
action: RuleAction::Reject {
message: "missing deadline".into(),
},
});
s.add_rule(TruthRule {
id: "budget".into(),
task_class: "t".into(),
description: "budget positive".into(),
condition: RuleCondition::FieldGreater {
field: "contract.budget".into(),
threshold: 0,
},
action: RuleAction::Block {
message: "budget must be positive".into(),
},
});
s
}
#[test]
fn evaluate_field_equals_pass_on_match() {
let s = fill_store();
let ctx = serde_json::json!({"worker": {"status": "active"}});
let o = s.evaluate("t", &ctx);
let active = o.iter().find(|r| r.rule_id == "active").unwrap();
assert!(active.passed, "active condition should hold");
}
#[test]
fn evaluate_field_equals_fail_on_mismatch() {
let s = fill_store();
let ctx = serde_json::json!({"worker": {"status": "terminated"}});
let o = s.evaluate("t", &ctx);
let active = o.iter().find(|r| r.rule_id == "active").unwrap();
assert!(!active.passed, "terminated should fail active condition");
}
#[test]
fn evaluate_field_equals_fail_on_missing() {
let s = fill_store();
let ctx = serde_json::json!({});
let o = s.evaluate("t", &ctx);
let active = o.iter().find(|r| r.rule_id == "active").unwrap();
assert!(!active.passed, "missing worker.status should fail");
}
#[test]
fn evaluate_field_empty_pass_when_absent() {
let s = fill_store();
// FieldEmpty passes when the field is missing/null/empty string.
// Deadline rule says "field empty means action fires" — so passed=true
// here means the rule's condition held (deadline IS empty).
let ctx = serde_json::json!({});
let o = s.evaluate("t", &ctx);
let deadline = o.iter().find(|r| r.rule_id == "deadline").unwrap();
assert!(deadline.passed);
}
#[test]
fn evaluate_field_empty_fail_when_present() {
let s = fill_store();
let ctx = serde_json::json!({"contract": {"deadline": "2026-05-01"}});
let o = s.evaluate("t", &ctx);
let deadline = o.iter().find(|r| r.rule_id == "deadline").unwrap();
assert!(!deadline.passed, "non-empty deadline should fail FieldEmpty check");
}
#[test]
fn evaluate_field_greater_pass_and_fail() {
let s = fill_store();
let ctx_ok = serde_json::json!({"contract": {"budget": 100}});
let ctx_bad = serde_json::json!({"contract": {"budget": 0}});
let ok = s.evaluate("t", &ctx_ok);
let bad = s.evaluate("t", &ctx_bad);
assert!(ok.iter().find(|r| r.rule_id == "budget").unwrap().passed);
assert!(!bad.iter().find(|r| r.rule_id == "budget").unwrap().passed);
}
#[test]
fn evaluate_always_condition_passes_unconditionally() {
let mut s = TruthStore::new();
s.add_rule(TruthRule {
id: "always".into(),
task_class: "x".into(),
description: "".into(),
condition: RuleCondition::Always,
action: RuleAction::Pass,
});
let o = s.evaluate("x", &serde_json::json!(null));
assert!(o[0].passed);
}
#[test]
fn evaluate_preserves_action_regardless_of_outcome() {
let s = fill_store();
let ctx = serde_json::json!({"worker": {"status": "active"}});
let o = s.evaluate("t", &ctx);
let active = o.iter().find(|r| r.rule_id == "active").unwrap();
// Action is attached whether the rule passed or not — the consumer
// decides how to use it.
assert_eq!(
active.action,
RuleAction::Reject {
message: "worker not active".into()
}
);
}
#[test]
fn evaluate_on_unknown_task_class_returns_empty() {
let s = fill_store();
let o = s.evaluate("nonexistent", &serde_json::json!({}));
assert!(o.is_empty());
}
#[test]
fn check_still_returns_actions_unconditionally_for_back_compat() {
// Legacy API should still behave the same — no condition walking.
let s = fill_store();
let actions = s.check("t");
assert_eq!(actions.len(), 3, "check returns one action per rule regardless of condition");
}
fn sql_guard_store() -> TruthStore {
let mut s = TruthStore::new();
s.add_rule(TruthRule {
id: "no-destructive".into(),
task_class: "sql_query".into(),
description: "SQL must not contain destructive verbs".into(),
condition: RuleCondition::FieldContainsAny {
field: "sql".into(),
needles: vec![
"drop table".into(),
"drop schema".into(),
"truncate".into(),
"delete from".into(),
],
},
action: RuleAction::Reject {
message: "destructive SQL rejected".into(),
},
});
s
}
#[test]
fn field_contains_any_matches_case_insensitively() {
let s = sql_guard_store();
let ctx = serde_json::json!({"sql": "SELECT * FROM t; DROP TABLE users;"});
let o = s.evaluate("sql_query", &ctx);
assert!(o[0].passed, "condition holds when needle present (case-insensitive)");
}
#[test]
fn field_contains_any_is_false_when_no_needle_matches() {
let s = sql_guard_store();
let ctx = serde_json::json!({"sql": "SELECT count(*) FROM workers"});
let o = s.evaluate("sql_query", &ctx);
assert!(!o[0].passed, "benign SELECT should not match destructive needles");
}
#[test]
fn field_contains_any_false_when_field_missing() {
let s = sql_guard_store();
let ctx = serde_json::json!({});
let o = s.evaluate("sql_query", &ctx);
assert!(!o[0].passed, "missing field → condition cannot hold");
}
#[test]
fn field_contains_any_empty_needles_list_never_matches() {
let mut s = TruthStore::new();
s.add_rule(TruthRule {
id: "empty".into(),
task_class: "x".into(),
description: "".into(),
condition: RuleCondition::FieldContainsAny {
field: "sql".into(),
needles: vec![],
},
action: RuleAction::Pass,
});
let o = s.evaluate("x", &serde_json::json!({"sql": "anything"}));
assert!(!o[0].passed, "no needles → any::<bool> is false");
}
}