//! Email/SMS draft validator (Phase 43 v2 — real PII + name checks). //! //! PRD checks: //! - Schema (TO/BODY fields present) //! - Length (SMS ≤ 160 chars; email subject ≤ 78 chars) //! - PII absence (no SSN / salary leaked into outgoing text) //! - Worker-name consistency (name in message matches worker record) //! //! Like FillValidator, EmailValidator takes `Arc` at //! construction. The contract metadata (which worker the message is //! about) travels under `_context.candidate_id` in the JSON payload. //! When `_context.candidate_id` is present and resolves, the validator //! cross-checks that the worker's name appears verbatim in the body. //! //! PII detection is std-only (no regex dep) — a hand-rolled scan //! covers the patterns we actually care about: SSN (NNN-NN-NNNN), //! salary statements ("salary" / "compensation" near a $ amount). use crate::{ Artifact, Report, Validator, ValidationError, WorkerLookup, }; use std::sync::Arc; use std::time::Instant; pub struct EmailValidator { workers: Arc, } impl EmailValidator { pub fn new(workers: Arc) -> Self { Self { workers } } } const SMS_MAX_CHARS: usize = 160; const EMAIL_SUBJECT_MAX_CHARS: usize = 78; impl Validator for EmailValidator { fn name(&self) -> &'static str { "staffing.email" } fn validate(&self, artifact: &Artifact) -> Result { let started = Instant::now(); let value = match artifact { Artifact::EmailDraft(v) => v, other => return Err(ValidationError::Schema { field: "artifact".into(), reason: format!("EmailValidator expects EmailDraft, got {other:?}"), }), }; let _to = value.get("to").and_then(|v| v.as_str()).ok_or( ValidationError::Schema { field: "to".into(), reason: "missing or not a string".into(), }, )?; let body = value.get("body").and_then(|v| v.as_str()).ok_or( ValidationError::Schema { field: "body".into(), reason: "missing or not a string".into(), }, )?; let is_sms = value.get("kind").and_then(|k| k.as_str()) == Some("sms"); if is_sms && body.len() > SMS_MAX_CHARS { return Err(ValidationError::Completeness { reason: format!("SMS body is {} chars, max {SMS_MAX_CHARS}", body.len()), }); } if let Some(subject) = value.get("subject").and_then(|v| v.as_str()) { if subject.len() > EMAIL_SUBJECT_MAX_CHARS { return Err(ValidationError::Completeness { reason: format!( "email subject is {} chars, max {EMAIL_SUBJECT_MAX_CHARS}", subject.len() ), }); } } // ── PII scan on body + subject combined ── let scanned = format!( "{} {}", value.get("subject").and_then(|v| v.as_str()).unwrap_or(""), body ); if contains_ssn_pattern(&scanned) { return Err(ValidationError::Policy { reason: "body contains an SSN-shaped sequence (NNN-NN-NNNN); strip before send".into(), }); } if contains_salary_disclosure(&scanned) { return Err(ValidationError::Policy { reason: "body discloses salary/compensation amount; staffing PII rule says strip before send".into(), }); } // ── Worker-name consistency ── let candidate_id = value.get("_context") .and_then(|c| c.get("candidate_id")) .and_then(|v| v.as_str()); let mut findings: Vec = vec![]; if let Some(cid) = candidate_id { match self.workers.find(cid) { Some(worker) => { // Body should mention the worker's name (or at least // their first name) — drafts that address a different // person than the contracted worker are a recurring // class of LLM mistake. let first = worker.name.split_whitespace().next().unwrap_or(&worker.name); let body_lower = body.to_lowercase(); let first_lower = first.to_lowercase(); if !first_lower.is_empty() && !body_lower.contains(&first_lower) { findings.push(crate::Finding { field: "body".into(), severity: crate::Severity::Warning, message: format!( "body doesn't mention worker first name {first:?} (candidate_id {cid:?})" ), }); } // Also detect *another* worker's name appearing in // place of the contracted one — outright wrong-target. // We can only check this when we have a different // expected name; skip if the body is generic enough. } None => { return Err(ValidationError::Consistency { reason: format!( "_context.candidate_id {cid:?} not found in worker roster" ), }); } } } Ok(Report { findings, elapsed_ms: started.elapsed().as_millis() as u64, }) } } // ─── PII scanners (std-only) ──────────────────────────────────────────── /// Detects an SSN-shaped sequence: 3 digits, dash, 2 digits, dash, 4 digits. /// Walks the byte buffer; rejects sequences that are part of a longer run /// of digits (so phone-area-code-like NNN-NNN-NNNN isn't flagged). Tight /// false-positive surface: it's specifically the NNN-NN-NNNN shape. fn contains_ssn_pattern(s: &str) -> bool { let bytes = s.as_bytes(); if bytes.len() < 11 { return false; } for i in 0..=bytes.len().saturating_sub(11) { let win = &bytes[i..i + 11]; let shape = win.iter().enumerate().all(|(j, &b)| match j { 0 | 1 | 2 | 4 | 5 | 7 | 8 | 9 | 10 => b.is_ascii_digit(), 3 | 6 => b == b'-', _ => unreachable!(), }); if !shape { continue; } // Reject if the byte BEFORE this window is a digit or `-` — // we're inside a longer numeric run, probably not an SSN. if i > 0 { let prev = bytes[i - 1]; if prev.is_ascii_digit() || prev == b'-' { continue; } } // Reject if the byte AFTER is a digit or `-` (same reason). if i + 11 < bytes.len() { let next = bytes[i + 11]; if next.is_ascii_digit() || next == b'-' { continue; } } return true; } false } /// Detects salary/compensation disclosure: the keywords "salary", /// "compensation", "pay rate", "bill rate", "hourly rate" appearing /// within ~40 chars of a `$` followed by digits. Coarse on purpose — /// it's better to false-positive on a legit phrase like "discuss your /// hourly rate of $30/hr" than to miss it. fn contains_salary_disclosure(s: &str) -> bool { let lower = s.to_lowercase(); const KEYWORDS: &[&str] = &[ "salary", "compensation", "pay rate", "bill rate", "hourly rate", ]; let mut keyword_positions: Vec = vec![]; for kw in KEYWORDS { let mut start = 0; while let Some(found) = lower[start..].find(kw) { let abs = start + found; keyword_positions.push(abs); start = abs + kw.len(); } } if keyword_positions.is_empty() { return false; } // Find every `$NNN+` in the text. let bytes = lower.as_bytes(); let mut dollar_positions: Vec = vec![]; for (i, &b) in bytes.iter().enumerate() { if b == b'$' && i + 1 < bytes.len() && bytes[i + 1].is_ascii_digit() { dollar_positions.push(i); } } if dollar_positions.is_empty() { return false; } // Any (keyword, $) pair within 40 chars triggers the policy rule. for &kp in &keyword_positions { for &dp in &dollar_positions { if kp.abs_diff(dp) <= 40 { return true; } } } false } #[cfg(test)] mod tests { use super::*; use crate::{InMemoryWorkerLookup, WorkerRecord}; use serde_json::json; fn lookup(records: Vec) -> Arc { Arc::new(InMemoryWorkerLookup::from_records(records)) } fn worker(id: &str, name: &str) -> WorkerRecord { WorkerRecord { candidate_id: id.into(), name: name.into(), status: "active".into(), city: None, state: None, role: None, blacklisted_clients: vec![], } } #[test] fn long_sms_fails_completeness() { let v = EmailValidator::new(lookup(vec![])); let body = "x".repeat(200); let r = v.validate(&Artifact::EmailDraft(json!({ "to": "+15555550123", "body": body, "kind": "sms" }))); assert!(matches!(r, Err(ValidationError::Completeness { .. }))); } #[test] fn long_email_subject_fails_completeness() { let v = EmailValidator::new(lookup(vec![])); let r = v.validate(&Artifact::EmailDraft(json!({ "to": "a@b.com", "body": "hi", "subject": "x".repeat(100) }))); assert!(matches!(r, Err(ValidationError::Completeness { .. }))); } #[test] fn missing_to_fails_schema() { let v = EmailValidator::new(lookup(vec![])); let r = v.validate(&Artifact::EmailDraft(json!({"body": "hi"}))); assert!(matches!(r, Err(ValidationError::Schema { field, .. }) if field == "to")); } #[test] fn well_formed_email_passes() { let v = EmailValidator::new(lookup(vec![])); let r = v.validate(&Artifact::EmailDraft(json!({ "to": "hiring@example.com", "subject": "Interview: Friday 10am", "body": "Hi Jane — confirming interview Friday 10am." }))); assert!(r.is_ok(), "well-formed email should pass: {:?}", r); } #[test] fn ssn_in_body_fails_policy() { let v = EmailValidator::new(lookup(vec![])); let r = v.validate(&Artifact::EmailDraft(json!({ "to": "x@y.com", "body": "Hi Jane — your file shows 123-45-6789 on record." }))); match r { Err(ValidationError::Policy { reason }) => assert!(reason.contains("SSN")), other => panic!("expected Policy SSN error, got {other:?}"), } } #[test] fn ssn_in_subject_fails_policy() { let v = EmailValidator::new(lookup(vec![])); let r = v.validate(&Artifact::EmailDraft(json!({ "to": "x@y.com", "subject": "Re: ID 123-45-6789", "body": "details inside" }))); assert!(matches!(r, Err(ValidationError::Policy { .. }))); } #[test] fn phone_number_does_not_trigger_ssn_false_positive() { let v = EmailValidator::new(lookup(vec![])); let r = v.validate(&Artifact::EmailDraft(json!({ "to": "x@y.com", "body": "Call me at 555-123-4567 to confirm." }))); assert!(r.is_ok(), "phone NNN-NNN-NNNN should NOT match SSN NNN-NN-NNNN: {:?}", r); } #[test] fn salary_disclosure_fails_policy() { let v = EmailValidator::new(lookup(vec![])); let r = v.validate(&Artifact::EmailDraft(json!({ "to": "x@y.com", "body": "Confirming your hourly rate of $32.50 per hour." }))); assert!(matches!(r, Err(ValidationError::Policy { .. }))); } #[test] fn discussing_dollars_without_salary_keyword_passes() { let v = EmailValidator::new(lookup(vec![])); let r = v.validate(&Artifact::EmailDraft(json!({ "to": "x@y.com", "body": "The $20 parking pass is at the front desk." }))); assert!(r.is_ok(), "non-salary $ should pass: {:?}", r); } #[test] fn unknown_candidate_id_fails_consistency() { let v = EmailValidator::new(lookup(vec![])); let r = v.validate(&Artifact::EmailDraft(json!({ "to": "x@y.com", "body": "Hi Jane", "_context": {"candidate_id": "W-FAKE"} }))); match r { Err(ValidationError::Consistency { reason }) => assert!(reason.contains("not found")), other => panic!("expected Consistency, got {other:?}"), } } #[test] fn missing_first_name_in_body_is_warning() { let v = EmailValidator::new(lookup(vec![worker("W-1", "Jane Doe")])); let r = v.validate(&Artifact::EmailDraft(json!({ "to": "x@y.com", "body": "Hi there — confirming your interview Friday.", "_context": {"candidate_id": "W-1"} }))); let report = r.expect("missing name should be warning, not error"); assert_eq!(report.findings.len(), 1); assert_eq!(report.findings[0].severity, crate::Severity::Warning); assert!(report.findings[0].message.to_lowercase().contains("first name")); } #[test] fn matching_first_name_passes_clean() { let v = EmailValidator::new(lookup(vec![worker("W-1", "Jane Doe")])); let r = v.validate(&Artifact::EmailDraft(json!({ "to": "x@y.com", "body": "Hi Jane — confirming your interview Friday.", "_context": {"candidate_id": "W-1"} }))); let report = r.expect("matching name should pass"); assert!(report.findings.is_empty(), "expected no findings, got {:?}", report.findings); } }