diff --git a/crates/truth/Cargo.toml b/crates/truth/Cargo.toml index dbdbabb..756fe2d 100644 --- a/crates/truth/Cargo.toml +++ b/crates/truth/Cargo.toml @@ -7,4 +7,5 @@ edition = "2024" serde = { workspace = true } serde_json = { workspace = true } tokio = { workspace = true } -tracing = { workspace = true } \ No newline at end of file +tracing = { workspace = true } +toml = { workspace = true } \ No newline at end of file diff --git a/crates/truth/src/lib.rs b/crates/truth/src/lib.rs index 6a48b72..78fb8c5 100644 --- a/crates/truth/src/lib.rs +++ b/crates/truth/src/lib.rs @@ -3,6 +3,7 @@ use std::collections::HashMap; pub mod staffing; pub mod devops; +pub mod loader; #[derive(Clone, Debug, Serialize, Deserialize)] pub struct TruthRule { @@ -54,6 +55,15 @@ impl TruthStore { .push(rule); } + /// All rule IDs across every task class. Used by the file loader + /// to detect duplicate-ID collisions before registering new rules. + pub fn all_rule_ids(&self) -> std::collections::HashSet { + self.rules + .values() + .flat_map(|v| v.iter().map(|r| r.id.clone())) + .collect() + } + pub fn get_rules(&self, task_class: &str) -> Vec<&TruthRule> { self.rules .get(task_class) diff --git a/crates/truth/src/loader.rs b/crates/truth/src/loader.rs new file mode 100644 index 0000000..79741da --- /dev/null +++ b/crates/truth/src/loader.rs @@ -0,0 +1,187 @@ +//! File-backed TruthRule loader (Phase 42 PRD). +//! +//! PRD: "truth/ dir at repo root — rule files, versioned in git." +//! This module walks a directory, parses every `*.toml` file it finds, +//! and registers the rules into a caller-supplied store. Rule IDs must +//! be unique across the combined set — duplicate-ID collisions are +//! load-time errors. +//! +//! The TOML format matches the shape at `truth/README.md`. The same +//! `RuleCondition` + `RuleAction` enums used by the in-code registrars +//! deserialize directly from `condition = { type = "FieldEquals", ... }` +//! thanks to `#[serde(tag = "type")]`. + +use std::fs; +use std::path::Path; +use serde::Deserialize; + +use crate::{TruthRule, TruthStore}; + +/// Deserialization wrapper — a TOML file is a list of [[rule]] blocks. +#[derive(Deserialize)] +struct RuleFile { + #[serde(default)] + rule: Vec, +} + +/// Load every `*.toml` file in `dir` and add its rules to `store`. +/// Returns the number of rules loaded across all files. +/// +/// Errors: +/// - directory doesn't exist or can't be read +/// - any `.toml` file fails to parse +/// - any rule ID collides with an existing rule (same ID already +/// registered in the store) +/// +/// Non-goals: recursive walk (flat dir only), hot reload (one-shot load). +pub fn load_from_dir(store: &mut TruthStore, dir: impl AsRef) -> Result { + let dir = dir.as_ref(); + let entries = fs::read_dir(dir) + .map_err(|e| format!("read_dir {}: {e}", dir.display()))?; + + let mut loaded_ids = store.all_rule_ids(); + let mut count = 0usize; + + let mut paths: Vec<_> = entries + .filter_map(|e| e.ok()) + .map(|e| e.path()) + .filter(|p| p.extension().and_then(|s| s.to_str()) == Some("toml")) + .collect(); + // Deterministic order — alphabetical by filename. Matters when a + // cross-file ID collision happens; the earlier filename wins + // nothing (both error), but the error message is reproducible. + paths.sort(); + + for path in paths { + let raw = fs::read_to_string(&path) + .map_err(|e| format!("read {}: {e}", path.display()))?; + let file: RuleFile = toml::from_str(&raw) + .map_err(|e| format!("parse {}: {e}", path.display()))?; + for rule in file.rule { + if !loaded_ids.insert(rule.id.clone()) { + return Err(format!( + "duplicate rule id '{}' from {}", + rule.id, + path.display() + )); + } + store.add_rule(rule); + count += 1; + } + } + + Ok(count) +} + +#[cfg(test)] +mod tests { + use super::*; + use std::io::Write; + + fn write_file(dir: &Path, name: &str, content: &str) { + let path = dir.join(name); + let mut f = fs::File::create(&path).unwrap(); + f.write_all(content.as_bytes()).unwrap(); + } + + #[test] + fn loads_rules_from_toml_files() { + let tmp = tempdir_for("loader_test"); + write_file(&tmp, "a.toml", r#" +[[rule]] +id = "a-rule" +task_class = "test" +description = "test rule" +action = { type = "Pass" } + +[rule.condition] +type = "Always" +"#); + let mut store = TruthStore::new(); + let n = load_from_dir(&mut store, &tmp).unwrap(); + assert_eq!(n, 1); + assert_eq!(store.get_rules("test").len(), 1); + let _ = fs::remove_dir_all(&tmp); + } + + #[test] + fn rejects_duplicate_rule_ids() { + let tmp = tempdir_for("dup_ids"); + write_file(&tmp, "a.toml", r#" +[[rule]] +id = "same" +task_class = "t" +description = "" +action = { type = "Pass" } +[rule.condition] +type = "Always" +"#); + write_file(&tmp, "b.toml", r#" +[[rule]] +id = "same" +task_class = "t" +description = "" +action = { type = "Pass" } +[rule.condition] +type = "Always" +"#); + let mut store = TruthStore::new(); + let err = load_from_dir(&mut store, &tmp).unwrap_err(); + assert!(err.contains("duplicate"), "got: {err}"); + let _ = fs::remove_dir_all(&tmp); + } + + #[test] + fn duplicate_with_in_code_rule_is_rejected() { + // Existing in-store IDs count as "already registered." Operator + // can't shadow an in-code rule by file without changing the ID. + let tmp = tempdir_for("dup_in_code"); + write_file(&tmp, "conflict.toml", r#" +[[rule]] +id = "worker-active" +task_class = "staffing.fill" +description = "file attempt" +action = { type = "Pass" } +[rule.condition] +type = "Always" +"#); + // staffing_rules registers "worker-active" + let mut store = crate::staffing::staffing_rules(TruthStore::new()); + let err = load_from_dir(&mut store, &tmp).unwrap_err(); + assert!(err.contains("duplicate") && err.contains("worker-active")); + let _ = fs::remove_dir_all(&tmp); + } + + #[test] + fn skips_non_toml_files() { + let tmp = tempdir_for("skip_non_toml"); + write_file(&tmp, "a.toml", r#" +[[rule]] +id = "x" +task_class = "t" +description = "" +action = { type = "Pass" } +[rule.condition] +type = "Always" +"#); + write_file(&tmp, "README.md", "not a toml file"); + let mut store = TruthStore::new(); + let n = load_from_dir(&mut store, &tmp).unwrap(); + assert_eq!(n, 1); // README.md ignored + let _ = fs::remove_dir_all(&tmp); + } + + #[test] + fn missing_dir_returns_error() { + let mut store = TruthStore::new(); + let err = load_from_dir(&mut store, "/nonexistent/path/here").unwrap_err(); + assert!(err.contains("read_dir")); + } + + fn tempdir_for(tag: &str) -> std::path::PathBuf { + let dir = std::env::temp_dir().join(format!("truth_loader_{}_{}", tag, + std::process::id())); + fs::create_dir_all(&dir).unwrap(); + dir + } +} diff --git a/truth/README.md b/truth/README.md new file mode 100644 index 0000000..e259107 --- /dev/null +++ b/truth/README.md @@ -0,0 +1,71 @@ +# Truth rules — file-backed policy + +Phase 42 PRD: *"truth/ dir at repo root — rule files, versioned in git."* + +This directory is the canonical home for TruthStore rules loaded from +disk. Each `*.toml` file holds a set of `TruthRule` records for one +task class. The truth crate's `load_from_dir(path)` walks this +directory, parses every `.toml` file, and registers the rules it finds. + +## Structure + +``` +truth/ +├── README.md ← this file +├── staffing.fill.toml ← rules for task_class="staffing.fill" +└── staffing.any.toml ← rules for task_class="staffing.any" +``` + +File naming is informational — `load_from_dir` respects whatever +`task_class` the rule declares internally, NOT the filename. Using +task-class-matching filenames is a convention for humans reading the +git tree. + +## Rule shape + +```toml +[[rule]] +id = "worker-active" +task_class = "staffing.fill" +description = "Worker must be active" +condition = { type = "FieldEquals", field = "worker.status", value = "active" } +action = { type = "Pass" } +``` + +`condition.type` is one of: + - `Always` — always true + - `FieldEquals { field, value }` + - `FieldMismatch { field, value }` + - `FieldEmpty { field }` + - `FieldGreater { field, threshold }` + - `FieldContainsAny { field, needles }` + +`action.type` is one of: + - `Pass` — rule informational; no enforcement + - `Reject { message }` — short-circuit with error + - `Redact { fields }` — mutate the context, strip fields + - `Block { message }` — hard stop, alert + +## Composition + +The crate's `default_truth_store()` continues to register rules +**in code** for backward-compat. Operators can layer file-backed +rules ON TOP via `load_from_dir`: + +```rust +let store = truth::default_truth_store(); +let store = truth::load_from_dir(&store, "/home/profit/lakehouse/truth")?; +``` + +File-loaded rules are additive — they do NOT replace in-code rules. +This lets the staffing team tune rules at the file level (edit a +threshold, add a new `FieldContainsAny` blocklist) without waiting +for a code deploy. + +## Explicit non-goals + +- **No hot reload** — per Phase 42 PRD ("Truth reload is explicit + in this phase"). Operators bounce the gateway or POST `/v1/context` + refresh endpoint (future) to pick up changes. +- **No inheritance** — each file stands alone; rule IDs must be unique + across all files. Duplicate-ID detection is a load-time error. diff --git a/truth/staffing.any.toml b/truth/staffing.any.toml new file mode 100644 index 0000000..a199d42 --- /dev/null +++ b/truth/staffing.any.toml @@ -0,0 +1,20 @@ +# Phase 42 — staffing.any task class rules (file-backed). +# +# Rules that apply across ALL staffing task classes (fill, rescue, +# sms_draft, etc). The router registers them once and evaluates them +# on every staffing.* call. + +[[rule]] +id = "any.no-destructive-sql-in-context" +task_class = "staffing.any" +description = "Reject staffing calls whose SQL context contains destructive verbs" +action = { type = "Reject", message = "destructive SQL rejected by staffing.any gate" } + +[rule.condition] +type = "FieldContainsAny" +field = "sql_context" +needles = ["drop table", "truncate", "delete from", "drop schema", "drop database"] + +# Additional staffing.any rules (e.g. PII scrubbing, rate limits) can be +# layered here; see the in-code staffing_rules() for the pii-redact rule +# that ships by default. diff --git a/truth/staffing.fill.toml b/truth/staffing.fill.toml new file mode 100644 index 0000000..e9e71f9 --- /dev/null +++ b/truth/staffing.fill.toml @@ -0,0 +1,32 @@ +# Phase 42 — staffing.fill task class rules (file-backed). +# +# These rules mirror what crates/truth/src/staffing.rs registers in code +# via staffing_rules(). Both sets load at gateway startup; rule IDs MUST +# be unique across the combined set, so operator-file overrides require +# changing the in-code registration (or vice versa). +# +# Edit a threshold / add a needle / change a description? File-level +# edits land without a code deploy. Schema changes (new RuleCondition +# variants) still need a code bump. + +[[rule]] +id = "fill.endorsed-count-matches-target" +task_class = "staffing.fill" +description = "endorsed_names.length must equal target_count" +action = { type = "Reject", message = "endorsed count does not match target_count" } + +[rule.condition] +type = "FieldEmpty" +field = "contract.target_count" +# When target_count is absent entirely, the rule fires and we reject. +# Callers must include target_count; this is the first gate. + +[[rule]] +id = "fill.city-required" +task_class = "staffing.fill" +description = "contract.target_city must be present and non-empty" +action = { type = "Reject", message = "target_city is required" } + +[rule.condition] +type = "FieldEmpty" +field = "contract.target_city"