diff --git a/Cargo.toml b/Cargo.toml index d41d84f..c4566ed 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -15,6 +15,7 @@ members = [ "crates/lance-bench", "crates/vectord-lance", "crates/truth", + "crates/validator", ] [workspace.dependencies] diff --git a/crates/validator/Cargo.toml b/crates/validator/Cargo.toml new file mode 100644 index 0000000..7b7f585 --- /dev/null +++ b/crates/validator/Cargo.toml @@ -0,0 +1,11 @@ +[package] +name = "validator" +version = "0.1.0" +edition = "2024" + +[dependencies] +serde = { workspace = true } +serde_json = { workspace = true } +thiserror = { workspace = true } +tokio = { workspace = true } +tracing = { workspace = true } diff --git a/crates/validator/src/devops.rs b/crates/validator/src/devops.rs new file mode 100644 index 0000000..796b448 --- /dev/null +++ b/crates/validator/src/devops.rs @@ -0,0 +1,44 @@ +//! DevOps validator scaffold — long-horizon. +//! +//! PRD: "scaffold only: stubbed Terraform/Ansible validators +//! (`terraform validate`, `ansible-lint`) for the long-horizon phase." +//! Shipped as Unimplemented stubs so the execution-loop dispatcher +//! has a consistent failure shape to surface ("phase 43 not wired") +//! instead of a missing-impl panic. + +use crate::{Artifact, Report, Validator, ValidationError}; + +pub struct TerraformValidator; + +impl Validator for TerraformValidator { + fn name(&self) -> &'static str { "devops.terraform" } + fn validate(&self, _artifact: &Artifact) -> Result { + Err(ValidationError::Unimplemented { artifact: "terraform_plan" }) + } +} + +pub struct AnsibleValidator; + +impl Validator for AnsibleValidator { + fn name(&self) -> &'static str { "devops.ansible" } + fn validate(&self, _artifact: &Artifact) -> Result { + Err(ValidationError::Unimplemented { artifact: "ansible_playbook" }) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn terraform_scaffold_returns_unimplemented() { + let r = TerraformValidator.validate(&Artifact::TerraformPlan(serde_json::json!({}))); + assert!(matches!(r, Err(ValidationError::Unimplemented { .. }))); + } + + #[test] + fn ansible_scaffold_returns_unimplemented() { + let r = AnsibleValidator.validate(&Artifact::AnsiblePlaybook(serde_json::json!({}))); + assert!(matches!(r, Err(ValidationError::Unimplemented { .. }))); + } +} diff --git a/crates/validator/src/lib.rs b/crates/validator/src/lib.rs new file mode 100644 index 0000000..4be1559 --- /dev/null +++ b/crates/validator/src/lib.rs @@ -0,0 +1,95 @@ +//! Phase 43 Validation Pipeline. +//! +//! PRD: "Staffing outputs run through schema / completeness / +//! consistency / policy gates. Plug into Layer 5 execution loop — +//! failure triggers observer-correction iteration." +//! +//! This crate provides the `Validator` trait + `Artifact` enum + +//! Report/ValidationError types. Staffing validators (fill, email, +//! playbook) and the DevOps scaffold live in submodules. +//! +//! Landed 2026-04-24 as a scaffold — the trait + types + module +//! layout match the PRD; individual validator implementations are +//! `Unimplemented` stubs that return a clear "phase 43 not wired" +//! error rather than silently passing. The execution-loop integration +//! (generate → validate → correct → retry) comes in a follow-up +//! commit once the stubs are filled. + +use serde::{Deserialize, Serialize}; +use thiserror::Error; + +pub mod staffing; +pub mod devops; + +/// What a validator saw. One variant per artifact class we validate. +#[derive(Debug, Clone, Serialize, Deserialize)] +#[serde(tag = "kind")] +pub enum Artifact { + /// A fill proposal from the staffing executor — shape is + /// `{fills: [{candidate_id, name}]}` per PRD. + FillProposal(serde_json::Value), + /// An email/SMS draft for outreach. + EmailDraft(serde_json::Value), + /// A playbook being sealed for memory. + Playbook(serde_json::Value), + /// Terraform plan output (scaffold, long-horizon). + TerraformPlan(serde_json::Value), + /// Ansible playbook (scaffold, long-horizon). + AnsiblePlaybook(serde_json::Value), +} + +/// Success report. Empty `findings` means a clean pass. Populated +/// findings with `Severity::Warning` means "acceptable but notable" — +/// the artifact passes. `Severity::Error` means validation failed; +/// the validator should return `Err(...)` in that case, not `Ok`. +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct Report { + pub findings: Vec, + pub elapsed_ms: u64, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct Finding { + pub field: String, + pub severity: Severity, + pub message: String, +} + +#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq)] +#[serde(rename_all = "snake_case")] +pub enum Severity { + Warning, + Error, +} + +/// Validation failure — what went wrong + where + why. Returned as +/// `Err` from `validate`. Execution loop catches these and feeds them +/// to the observer-correction retry loop. +#[derive(Debug, Clone, Error, Serialize, Deserialize)] +pub enum ValidationError { + /// Artifact schema doesn't match what we expected. + #[error("schema mismatch at {field}: {reason}")] + Schema { field: String, reason: String }, + /// Required data missing (e.g. endorsed count != target count). + #[error("completeness: {reason}")] + Completeness { reason: String }, + /// Data that's inconsistent with another source of truth + /// (e.g. worker_id doesn't exist in the workers table). + #[error("consistency: {reason}")] + Consistency { reason: String }, + /// Policy violation — truth rule or access control said no. + #[error("policy: {reason}")] + Policy { reason: String }, + /// Validator hasn't been implemented yet — scaffold stub. + #[error("validator not yet implemented for {artifact} — phase 43 scaffold")] + Unimplemented { artifact: &'static str }, +} + +/// Core validation contract. Implementations live in `staffing::*` and +/// `devops::*`. The execution loop dispatches to the right impl based +/// on the Artifact variant. +pub trait Validator: Send + Sync { + fn validate(&self, artifact: &Artifact) -> Result; + /// Human-readable name for logs + Langfuse traces. + fn name(&self) -> &'static str; +} diff --git a/crates/validator/src/staffing/email.rs b/crates/validator/src/staffing/email.rs new file mode 100644 index 0000000..264d491 --- /dev/null +++ b/crates/validator/src/staffing/email.rs @@ -0,0 +1,116 @@ +//! Email/SMS draft validator. +//! +//! PRD checks: +//! - Schema (TO/BODY fields present) +//! - Length (SMS ≤ 160 chars; email subject ≤ 78 chars) +//! - PII absence (no SSN / salary leaked into outgoing text) +//! - Worker-name consistency (name in message matches worker record) +//! +//! Scaffold implements schema + length. PII regex (SSN pattern, +//! salary-number pattern) lives in `shared::pii::strip_pii` — plug in +//! a follow-up when the validator caller knows the worker record to +//! cross-check name consistency. + +use crate::{Artifact, Report, Validator, ValidationError}; +use std::time::Instant; + +pub struct EmailValidator; + +const SMS_MAX_CHARS: usize = 160; +const EMAIL_SUBJECT_MAX_CHARS: usize = 78; + +impl Validator for EmailValidator { + fn name(&self) -> &'static str { "staffing.email" } + + fn validate(&self, artifact: &Artifact) -> Result { + let started = Instant::now(); + let value = match artifact { + Artifact::EmailDraft(v) => v, + other => return Err(ValidationError::Schema { + field: "artifact".into(), + reason: format!("EmailValidator expects EmailDraft, got {other:?}"), + }), + }; + + let to = value.get("to").and_then(|v| v.as_str()).ok_or( + ValidationError::Schema { + field: "to".into(), + reason: "missing or not a string".into(), + }, + )?; + let body = value.get("body").and_then(|v| v.as_str()).ok_or( + ValidationError::Schema { + field: "body".into(), + reason: "missing or not a string".into(), + }, + )?; + + let is_sms = value.get("kind").and_then(|k| k.as_str()) == Some("sms"); + if is_sms && body.len() > SMS_MAX_CHARS { + return Err(ValidationError::Completeness { + reason: format!("SMS body is {} chars, max {SMS_MAX_CHARS}", body.len()), + }); + } + + if let Some(subject) = value.get("subject").and_then(|v| v.as_str()) { + if subject.len() > EMAIL_SUBJECT_MAX_CHARS { + return Err(ValidationError::Completeness { + reason: format!( + "email subject is {} chars, max {EMAIL_SUBJECT_MAX_CHARS}", + subject.len() + ), + }); + } + } + + let _ = to; // touched for future name-consistency check + // TODO(phase-43 v2): PII scan + worker-name consistency. + + Ok(Report { + findings: vec![], + elapsed_ms: started.elapsed().as_millis() as u64, + }) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn long_sms_fails_completeness() { + let body = "x".repeat(200); + let r = EmailValidator.validate(&Artifact::EmailDraft(serde_json::json!({ + "to": "+15555550123", + "body": body, + "kind": "sms" + }))); + assert!(matches!(r, Err(ValidationError::Completeness { .. }))); + } + + #[test] + fn long_email_subject_fails_completeness() { + let r = EmailValidator.validate(&Artifact::EmailDraft(serde_json::json!({ + "to": "a@b.com", + "body": "hi", + "subject": "x".repeat(100) + }))); + assert!(matches!(r, Err(ValidationError::Completeness { .. }))); + } + + #[test] + fn missing_to_fails_schema() { + let r = EmailValidator.validate(&Artifact::EmailDraft(serde_json::json!({"body": "hi"}))); + assert!(matches!(r, Err(ValidationError::Schema { field, .. }) if field == "to")); + } + + #[test] + fn well_formed_email_passes() { + let r = EmailValidator.validate(&Artifact::EmailDraft(serde_json::json!({ + "to": "hiring@example.com", + "subject": "Interview: Friday 10am", + "body": "Hi Jane — confirming interview Friday 10am." + }))); + assert!(r.is_ok(), "well-formed email should pass: {:?}", r); + } +} diff --git a/crates/validator/src/staffing/fill.rs b/crates/validator/src/staffing/fill.rs new file mode 100644 index 0000000..bbe4bf6 --- /dev/null +++ b/crates/validator/src/staffing/fill.rs @@ -0,0 +1,103 @@ +//! Fill-proposal validator. +//! +//! PRD checks: +//! - Schema compliance (propose_done shape matches +//! `{fills: [{candidate_id, name}]}`) +//! - Completeness (endorsed count == target_count) +//! - Worker existence (every candidate_id present in workers_500k) +//! - Status check (active, not_on_client_blacklist) +//! - Geo/role match (worker city/state/role matches contract) +//! +//! Today this is a scaffold — schema check is real (it's cheap); the +//! worker-existence / status / geo checks need a catalog lookup and +//! land in a follow-up when the catalog query helper is wired into +//! this crate. + +use crate::{Artifact, Report, Validator, ValidationError}; +use std::time::Instant; + +pub struct FillValidator; + +impl Validator for FillValidator { + fn name(&self) -> &'static str { "staffing.fill" } + + fn validate(&self, artifact: &Artifact) -> Result { + let started = Instant::now(); + let value = match artifact { + Artifact::FillProposal(v) => v, + other => return Err(ValidationError::Schema { + field: "artifact".into(), + reason: format!("FillValidator expects FillProposal, got {other:?}"), + }), + }; + + // Schema check — the only real validation shipped in this + // scaffold. Catches the common "model emitted prose instead of + // JSON" failure mode before the consistency checks even run. + let fills = value.get("fills").and_then(|f| f.as_array()).ok_or( + ValidationError::Schema { + field: "fills".into(), + reason: "expected top-level `fills` array".into(), + }, + )?; + for (i, fill) in fills.iter().enumerate() { + if fill.get("candidate_id").is_none() { + return Err(ValidationError::Schema { + field: format!("fills[{i}].candidate_id"), + reason: "missing".into(), + }); + } + if fill.get("name").is_none() { + return Err(ValidationError::Schema { + field: format!("fills[{i}].name"), + reason: "missing".into(), + }); + } + } + + // TODO(phase-43 v2): worker-existence / status / geo checks. + // Need a catalog query handle injected into FillValidator's + // constructor — out of scope for the scaffold. + + Ok(Report { + findings: vec![], + elapsed_ms: started.elapsed().as_millis() as u64, + }) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn wrong_artifact_type_fails_schema() { + let r = FillValidator.validate(&Artifact::EmailDraft(serde_json::json!({}))); + assert!(matches!(r, Err(ValidationError::Schema { .. }))); + } + + #[test] + fn missing_fills_array_fails_schema() { + let r = FillValidator.validate(&Artifact::FillProposal(serde_json::json!({}))); + assert!(matches!(r, Err(ValidationError::Schema { field, .. }) if field == "fills")); + } + + #[test] + fn fill_without_candidate_id_fails() { + let r = FillValidator.validate(&Artifact::FillProposal(serde_json::json!({ + "fills": [{"name": "Jane"}] + }))); + assert!(matches!(r, Err(ValidationError::Schema { field, .. }) if field.contains("candidate_id"))); + } + + #[test] + fn well_formed_proposal_passes_schema() { + let r = FillValidator.validate(&Artifact::FillProposal(serde_json::json!({ + "fills": [ + {"candidate_id": "W-123", "name": "Jane Doe"}, + {"candidate_id": "W-456", "name": "John Smith"} + ] + }))); + assert!(r.is_ok(), "well-formed proposal should pass schema: {:?}", r); + } +} diff --git a/crates/validator/src/staffing/mod.rs b/crates/validator/src/staffing/mod.rs new file mode 100644 index 0000000..c959cd2 --- /dev/null +++ b/crates/validator/src/staffing/mod.rs @@ -0,0 +1,8 @@ +//! Staffing validators — fill proposals, email/SMS drafts, sealed +//! playbooks. Phase 43 PRD: "the 0→85% pattern reproduces on real +//! staffing tasks — the iteration loop with validation in place is +//! what made small models successful." + +pub mod fill; +pub mod email; +pub mod playbook; diff --git a/crates/validator/src/staffing/playbook.rs b/crates/validator/src/staffing/playbook.rs new file mode 100644 index 0000000..0f8bb5d --- /dev/null +++ b/crates/validator/src/staffing/playbook.rs @@ -0,0 +1,134 @@ +//! Sealed playbook validator. +//! +//! PRD checks: +//! - Operation format (`fill: Role xN in City, ST`) +//! - endorsed_names non-empty, ≤ target_count × 2 +//! - fingerprint populated (Phase 25 validity window requirement) + +use crate::{Artifact, Report, Validator, ValidationError}; +use std::time::Instant; + +pub struct PlaybookValidator; + +impl Validator for PlaybookValidator { + fn name(&self) -> &'static str { "staffing.playbook" } + + fn validate(&self, artifact: &Artifact) -> Result { + let started = Instant::now(); + let value = match artifact { + Artifact::Playbook(v) => v, + other => return Err(ValidationError::Schema { + field: "artifact".into(), + reason: format!("PlaybookValidator expects Playbook, got {other:?}"), + }), + }; + + // Operation format: "fill: Role xN in City, ST" — at minimum + // we check the string-shape. Fuller grammar parse lives in + // phase 25 code where operations are structured beyond strings. + let op = value.get("operation").and_then(|v| v.as_str()).ok_or( + ValidationError::Schema { + field: "operation".into(), + reason: "missing or not a string".into(), + }, + )?; + if !op.starts_with("fill:") { + return Err(ValidationError::Schema { + field: "operation".into(), + reason: format!("expected `fill: ...` prefix, got {op:?}"), + }); + } + + let endorsed = value.get("endorsed_names").and_then(|v| v.as_array()).ok_or( + ValidationError::Schema { + field: "endorsed_names".into(), + reason: "missing or not an array".into(), + }, + )?; + if endorsed.is_empty() { + return Err(ValidationError::Completeness { + reason: "endorsed_names must be non-empty".into(), + }); + } + + if let Some(target) = value.get("target_count").and_then(|v| v.as_u64()) { + let max = (target * 2) as usize; + if endorsed.len() > max { + return Err(ValidationError::Completeness { + reason: format!( + "endorsed_names ({}) exceeds target_count × 2 ({max})", + endorsed.len() + ), + }); + } + } + + if value.get("fingerprint").and_then(|v| v.as_str()).map_or(true, |s| s.is_empty()) { + return Err(ValidationError::Schema { + field: "fingerprint".into(), + reason: "missing — required for Phase 25 validity window".into(), + }); + } + + Ok(Report { + findings: vec![], + elapsed_ms: started.elapsed().as_millis() as u64, + }) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn well_formed_playbook_passes() { + let r = PlaybookValidator.validate(&Artifact::Playbook(serde_json::json!({ + "operation": "fill: Welder x2 in Toledo, OH", + "endorsed_names": ["W-123", "W-456"], + "target_count": 2, + "fingerprint": "abc123" + }))); + assert!(r.is_ok(), "got {:?}", r); + } + + #[test] + fn empty_endorsed_names_fails_completeness() { + let r = PlaybookValidator.validate(&Artifact::Playbook(serde_json::json!({ + "operation": "fill: Welder x2 in Toledo, OH", + "endorsed_names": [], + "fingerprint": "abc" + }))); + assert!(matches!(r, Err(ValidationError::Completeness { .. }))); + } + + #[test] + fn overfull_endorsed_names_fails_completeness() { + let r = PlaybookValidator.validate(&Artifact::Playbook(serde_json::json!({ + "operation": "fill: Welder x1 in Toledo, OH", + "endorsed_names": ["a", "b", "c"], + "target_count": 1, + "fingerprint": "abc" + }))); + assert!(matches!(r, Err(ValidationError::Completeness { .. }))); + } + + #[test] + fn missing_fingerprint_fails_schema() { + let r = PlaybookValidator.validate(&Artifact::Playbook(serde_json::json!({ + "operation": "fill: X x1 in A, B", + "endorsed_names": ["a"] + }))); + assert!(matches!(r, Err(ValidationError::Schema { field, .. }) if field == "fingerprint")); + } + + #[test] + fn wrong_operation_prefix_fails_schema() { + let r = PlaybookValidator.validate(&Artifact::Playbook(serde_json::json!({ + "operation": "sms_draft: hello", + "endorsed_names": ["a"], + "fingerprint": "x" + }))); + assert!(matches!(r, Err(ValidationError::Schema { .. }))); + } +}