//! Phase 43 Validation Pipeline. //! //! PRD: "Staffing outputs run through schema / completeness / //! consistency / policy gates. Plug into Layer 5 execution loop — //! failure triggers observer-correction iteration." //! //! This crate provides the `Validator` trait + `Artifact` enum + //! Report/ValidationError types. Staffing validators (fill, email, //! playbook) and the DevOps scaffold live in submodules. //! //! Landed 2026-04-24 as a scaffold — the trait + types + module //! layout match the PRD; individual validator implementations are //! `Unimplemented` stubs that return a clear "phase 43 not wired" //! error rather than silently passing. The execution-loop integration //! (generate → validate → correct → retry) comes in a follow-up //! commit once the stubs are filled. use serde::{Deserialize, Serialize}; use thiserror::Error; pub mod staffing; pub mod devops; /// What a validator saw. One variant per artifact class we validate. #[derive(Debug, Clone, Serialize, Deserialize)] #[serde(tag = "kind")] pub enum Artifact { /// A fill proposal from the staffing executor — shape is /// `{fills: [{candidate_id, name}]}` per PRD. FillProposal(serde_json::Value), /// An email/SMS draft for outreach. EmailDraft(serde_json::Value), /// A playbook being sealed for memory. Playbook(serde_json::Value), /// Terraform plan output (scaffold, long-horizon). TerraformPlan(serde_json::Value), /// Ansible playbook (scaffold, long-horizon). AnsiblePlaybook(serde_json::Value), } /// Success report. Empty `findings` means a clean pass. Populated /// findings with `Severity::Warning` means "acceptable but notable" — /// the artifact passes. `Severity::Error` means validation failed; /// the validator should return `Err(...)` in that case, not `Ok`. #[derive(Debug, Clone, Serialize, Deserialize)] pub struct Report { pub findings: Vec, pub elapsed_ms: u64, } #[derive(Debug, Clone, Serialize, Deserialize)] pub struct Finding { pub field: String, pub severity: Severity, pub message: String, } #[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq)] #[serde(rename_all = "snake_case")] pub enum Severity { Warning, Error, } /// Validation failure — what went wrong + where + why. Returned as /// `Err` from `validate`. Execution loop catches these and feeds them /// to the observer-correction retry loop. #[derive(Debug, Clone, Error, Serialize, Deserialize)] pub enum ValidationError { /// Artifact schema doesn't match what we expected. #[error("schema mismatch at {field}: {reason}")] Schema { field: String, reason: String }, /// Required data missing (e.g. endorsed count != target count). #[error("completeness: {reason}")] Completeness { reason: String }, /// Data that's inconsistent with another source of truth /// (e.g. worker_id doesn't exist in the workers table). #[error("consistency: {reason}")] Consistency { reason: String }, /// Policy violation — truth rule or access control said no. #[error("policy: {reason}")] Policy { reason: String }, /// Validator hasn't been implemented yet — scaffold stub. #[error("validator not yet implemented for {artifact} — phase 43 scaffold")] Unimplemented { artifact: &'static str }, } /// Core validation contract. Implementations live in `staffing::*` and /// `devops::*`. The execution loop dispatches to the right impl based /// on the Artifact variant. pub trait Validator: Send + Sync { fn validate(&self, artifact: &Artifact) -> Result; /// Human-readable name for logs + Langfuse traces. fn name(&self) -> &'static str; } // ─── Worker lookup (Phase 43 v2) ──────────────────────────────────────── // // Validators that cross-check artifacts against the worker roster // (FillValidator, EmailValidator) take an `Arc` at // construction. Keeping the trait sync + in-memory mirrors the // lakehouse pattern of "load truth into memory, validate against // snapshot, refresh periodically" rather than per-call DB hits. // // Production impl: wrap a parquet snapshot loaded from // `data/datasets/workers_500k.parquet` (or its safe view counterpart // once Track A.B lands). Tests use `InMemoryWorkerLookup`. /// One worker row from the staffing roster — the fields validators /// actually read. Anything not on this struct (resume_text, scores, /// communications) is intentionally hidden from the validator path. #[derive(Debug, Clone, Serialize, Deserialize)] pub struct WorkerRecord { pub candidate_id: String, pub name: String, /// Free-form. Validators check for `"active"` (any other value /// fails the status check). Common values from existing data: /// "active", "inactive", "placed", "blacklisted". pub status: String, pub city: Option, pub state: Option, pub role: Option, /// Client ids this worker has been blacklisted from. Populated /// from joining a blacklist table; empty when not provided. #[serde(default)] pub blacklisted_clients: Vec, } /// Worker lookup contract. Sync by design — implementations should /// hold an in-memory snapshot, not perform per-call I/O. pub trait WorkerLookup: Send + Sync { fn find(&self, candidate_id: &str) -> Option; /// Number of workers in the snapshot. Default 0 for impls that /// genuinely don't know (e.g. a future SQL-backed lookup that /// counts on demand). InMemoryWorkerLookup overrides with the /// HashMap size; ParquetWorkerLookup constructs an /// InMemoryWorkerLookup so it inherits the override. Used by /// /v1/health to report data-load status during production /// switchover (the Chicago dataset replaces synthetic test data; /// the health endpoint is how operators verify the new file /// loaded correctly without restart-and-pray). fn len(&self) -> usize { 0 } } /// HashMap-backed lookup. Used by validator unit tests + as a /// reasonable bootstrap impl for production once the parquet loader /// fills it on startup. pub struct InMemoryWorkerLookup { rows: std::collections::HashMap, } impl InMemoryWorkerLookup { pub fn new() -> Self { Self { rows: Default::default() } } pub fn from_records(records: Vec) -> Self { let mut rows = std::collections::HashMap::with_capacity(records.len()); for r in records { rows.insert(r.candidate_id.clone(), r); } Self { rows } } pub fn insert(&mut self, record: WorkerRecord) { self.rows.insert(record.candidate_id.clone(), record); } pub fn len(&self) -> usize { self.rows.len() } pub fn is_empty(&self) -> bool { self.rows.is_empty() } } impl Default for InMemoryWorkerLookup { fn default() -> Self { Self::new() } } impl WorkerLookup for InMemoryWorkerLookup { fn find(&self, candidate_id: &str) -> Option { self.rows.get(candidate_id).cloned() } fn len(&self) -> usize { self.rows.len() } }