package validator import ( "fmt" "strings" "time" ) // EmailValidator is the Go port of Rust's EmailValidator. Per // `crates/validator/src/staffing/email.rs`: // // - Schema (TO/BODY fields present) // - Length (SMS ≤ 160 chars; email subject ≤ 78 chars) // - PII absence (no SSN-shape / salary leakage) // - Worker-name consistency (body mentions worker first name) // // PII detection is std-only — no regex dependency. Two scanners: // - SSN-shape: NNN-NN-NNNN with run-of-digits guards (so phone // numbers like NNN-NNN-NNNN don't false-positive). // - Salary disclosure: keywords near a `$amount` substring. // // Both mirror Rust byte-for-byte so cross-runtime audit logs // agree on which messages get flagged. type EmailValidator struct { workers WorkerLookup } // NewEmailValidator constructs an EmailValidator with the given // lookup. If you don't need the name-consistency check (e.g. // generic broadcast templates), pass NewInMemoryWorkerLookup(nil) // — the validator skips the worker check when _context.candidate_id // is absent. func NewEmailValidator(workers WorkerLookup) *EmailValidator { return &EmailValidator{workers: workers} } // Name satisfies Validator. Stable string used for audit trail / // receipts. Matches Rust output "staffing.email". func (v *EmailValidator) Name() string { return "staffing.email" } // Channel-shape limits — match Rust exactly. const ( smsMaxChars = 160 emailSubjectMaxChars = 78 ) // Validate implements Validator. Order: schema → length → // PII → worker-name consistency. func (v *EmailValidator) Validate(artifact Artifact) (Report, error) { started := time.Now() value := artifact.EmailDraft if value == nil { return Report{}, &ValidationError{ Kind: ErrSchema, Field: "artifact", Reason: fmt.Sprintf("EmailValidator expects EmailDraft, got %s", artifact.Kind()), } } // ── Schema (`to` + `body` required) ── if _, ok := value["to"].(string); !ok { return Report{}, &ValidationError{ Kind: ErrSchema, Field: "to", Reason: "missing or not a string", } } body, ok := value["body"].(string) if !ok { return Report{}, &ValidationError{ Kind: ErrSchema, Field: "body", Reason: "missing or not a string", } } // ── Length checks ── isSMS := false if k, ok := value["kind"].(string); ok && k == "sms" { isSMS = true } if isSMS && len(body) > smsMaxChars { return Report{}, &ValidationError{ Kind: ErrCompleteness, Reason: fmt.Sprintf("SMS body is %d chars, max %d", len(body), smsMaxChars), } } if subject, ok := value["subject"].(string); ok && len(subject) > emailSubjectMaxChars { return Report{}, &ValidationError{ Kind: ErrCompleteness, Reason: fmt.Sprintf("email subject is %d chars, max %d", len(subject), emailSubjectMaxChars), } } // ── PII scan over subject + body combined ── var subjectStr string if s, ok := value["subject"].(string); ok { subjectStr = s } scanned := subjectStr + " " + body if containsSSNPattern(scanned) { return Report{}, &ValidationError{ Kind: ErrPolicy, Reason: "body contains an SSN-shaped sequence (NNN-NN-NNNN); strip before send", } } if containsSalaryDisclosure(scanned) { return Report{}, &ValidationError{ Kind: ErrPolicy, Reason: "body discloses salary/compensation amount; staffing PII rule says strip before send", } } // ── Worker-name consistency ── var findings []Finding if ctx, ok := value["_context"].(map[string]any); ok { if cid, ok := ctx["candidate_id"].(string); ok && cid != "" { worker, found := v.workers.Find(cid) if !found { return Report{}, &ValidationError{ Kind: ErrConsistency, Reason: fmt.Sprintf("_context.candidate_id %q not found in worker roster", cid), } } // Body should mention the worker's name (or at least // their first name) — drafts that address a different // person than the contracted worker are a recurring // LLM mistake. first := strings.Fields(worker.Name) firstLower := "" if len(first) > 0 { firstLower = strings.ToLower(first[0]) } bodyLower := strings.ToLower(body) if firstLower != "" && !strings.Contains(bodyLower, firstLower) { findings = append(findings, Finding{ Field: "body", Severity: SeverityWarning, Message: fmt.Sprintf( "body doesn't mention worker first name %q (candidate_id %q)", first[0], cid, ), }) } } } return Report{ Findings: findings, ElapsedMs: elapsed(started), }, nil } // ── PII scanners — std-only, mirror Rust byte-for-byte ────────── // containsSSNPattern detects an SSN-shaped sequence: 3 digits, dash, // 2 digits, dash, 4 digits. Runs-of-digits guards: rejects sequences // flanked by digit/dash (so phone-area-code-like NNN-NNN-NNNN isn't // flagged). Tight false-positive surface: specifically the // NNN-NN-NNNN shape used by U.S. SSNs. // // Critical: this fires on PII in real-world drafts. Don't relax the // flanking guards without a regression test that exercises both // cases (an actual SSN should fire, a phone-NNN-NNN-NNNN should not). func containsSSNPattern(s string) bool { bytes := []byte(s) if len(bytes) < 11 { return false } for i := 0; i+11 <= len(bytes); i++ { win := bytes[i : i+11] shape := true for j := 0; j < 11; j++ { switch j { case 0, 1, 2, 4, 5, 7, 8, 9, 10: if !isAsciiDigit(win[j]) { shape = false } case 3, 6: if win[j] != '-' { shape = false } } if !shape { break } } if !shape { continue } // Reject if previous byte is digit or dash — we're // inside a longer numeric run, probably not an SSN. if i > 0 { prev := bytes[i-1] if isAsciiDigit(prev) || prev == '-' { continue } } // Reject if next byte is digit or dash — same reason. if i+11 < len(bytes) { next := bytes[i+11] if isAsciiDigit(next) || next == '-' { continue } } return true } return false } func isAsciiDigit(b byte) bool { return b >= '0' && b <= '9' } // containsSalaryDisclosure detects salary/compensation disclosure: // the keywords "salary", "compensation", "pay rate", "bill rate", // "hourly rate" appearing within ~40 chars of a `$NNN+` substring. // // Coarse on purpose — better to false-positive on a legit phrase // like "discuss your hourly rate of $30/hr" than to miss a real // disclosure. Operators tuning this should add tests, not loosen // the check. func containsSalaryDisclosure(s string) bool { lower := strings.ToLower(s) keywords := []string{"salary", "compensation", "pay rate", "bill rate", "hourly rate"} var keywordPositions []int for _, kw := range keywords { start := 0 for { idx := strings.Index(lower[start:], kw) if idx < 0 { break } abs := start + idx keywordPositions = append(keywordPositions, abs) start = abs + len(kw) } } if len(keywordPositions) == 0 { return false } var dollarPositions []int bytes := []byte(lower) for i := 0; i+1 < len(bytes); i++ { if bytes[i] == '$' && isAsciiDigit(bytes[i+1]) { dollarPositions = append(dollarPositions, i) } } if len(dollarPositions) == 0 { return false } for _, kp := range keywordPositions { for _, dp := range dollarPositions { if absDiff(kp, dp) <= 40 { return true } } } return false } func absDiff(a, b int) int { if a > b { return a - b } return b - a }