root b03521a506 validator: port FillValidator + EmailValidator from Rust validator crate
Per architecture_comparison.md universal-win for Go side: ports the
Rust crates/validator/src/staffing/ to internal/validator/. Production
safety net Go was missing — FillValidator catches phantom worker IDs
+ status/blacklist/geo/role mismatches; EmailValidator catches
SSN-shape PII + salary disclosure + wrong-target name in email/SMS
drafts.

Files:
- types.go: Artifact (FillProposal | EmailDraft), Validator interface,
  WorkerLookup interface, ValidationError + Finding + Severity
- lookup.go: InMemoryWorkerLookup with case-insensitive ID lookup
- fill.go: FillValidator — schema → completeness → cross-roster
  (phantom ID / status / blacklist / geo / role)
- email.go: EmailValidator — schema → length → PII (SSN + salary)
  → worker-name consistency
- fill_test.go + email_test.go: 24 tests covering happy path +
  every error variant + the load-bearing edge cases (phone-pattern
  not flagged as SSN, flanking-digit guard rejects extended
  numeric runs)

Validator names match Rust (staffing.fill / staffing.email) so
cross-runtime audit logs share the same identifier. PII scanners
(containsSSNPattern, containsSalaryDisclosure) ported byte-for-byte
so a draft flagged by one runtime is flagged by the other.

Caveat: the Rust validator crate also has parquet_lookup.rs (loads
workers_500k.parquet at startup) and playbook.rs (additional
checks). Those weren't ported in this wave — only the two
load-bearing validators that were named in the comparison doc.

Closes one of the two universal-win items for Go side. The other
(materializer port) remains deferred — it's a bigger surface change
and depends on transforms.ts source-class adapters.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-01 04:49:55 -05:00

221 lines
6.8 KiB
Go

package validator
import "testing"
// ── Schema ──
func TestEmail_WrongArtifactType_FailsSchema(t *testing.T) {
v := NewEmailValidator(mkLookup())
_, err := v.Validate(Artifact{FillProposal: map[string]any{}})
ve, _ := asValidationError(err)
if ve == nil || ve.Kind != ErrSchema {
t.Errorf("expected schema error on wrong artifact, got %+v", ve)
}
}
func TestEmail_MissingTo_FailsSchema(t *testing.T) {
v := NewEmailValidator(mkLookup())
_, err := v.Validate(Artifact{EmailDraft: map[string]any{"body": "hi"}})
ve, _ := asValidationError(err)
if ve == nil || ve.Kind != ErrSchema || ve.Field != "to" {
t.Errorf("expected schema/to error, got %+v", ve)
}
}
func TestEmail_MissingBody_FailsSchema(t *testing.T) {
v := NewEmailValidator(mkLookup())
_, err := v.Validate(Artifact{EmailDraft: map[string]any{"to": "a@b.com"}})
ve, _ := asValidationError(err)
if ve == nil || ve.Kind != ErrSchema || ve.Field != "body" {
t.Errorf("expected schema/body error, got %+v", ve)
}
}
// ── Length limits ──
func TestEmail_LongSMS_FailsCompleteness(t *testing.T) {
v := NewEmailValidator(mkLookup())
body := make([]byte, 200)
for i := range body {
body[i] = 'x'
}
_, err := v.Validate(Artifact{EmailDraft: map[string]any{
"to": "+15555550123",
"body": string(body),
"kind": "sms",
}})
ve, _ := asValidationError(err)
if ve == nil || ve.Kind != ErrCompleteness {
t.Errorf("expected completeness error on long SMS, got %+v", ve)
}
}
func TestEmail_LongSubject_FailsCompleteness(t *testing.T) {
v := NewEmailValidator(mkLookup())
subject := make([]byte, 100)
for i := range subject {
subject[i] = 'x'
}
_, err := v.Validate(Artifact{EmailDraft: map[string]any{
"to": "a@b.com",
"body": "hi",
"subject": string(subject),
}})
ve, _ := asValidationError(err)
if ve == nil || ve.Kind != ErrCompleteness {
t.Errorf("expected completeness error on long subject, got %+v", ve)
}
}
// ── PII: SSN ──
func TestEmail_SSNInBody_FailsPolicy(t *testing.T) {
v := NewEmailValidator(mkLookup())
_, err := v.Validate(Artifact{EmailDraft: map[string]any{
"to": "a@b.com",
"body": "Their SSN is 123-45-6789, please file accordingly.",
}})
ve, _ := asValidationError(err)
if ve == nil || ve.Kind != ErrPolicy {
t.Errorf("expected policy error on SSN, got %+v", ve)
}
}
func TestEmail_PhonePatternNotFlaggedAsSSN(t *testing.T) {
// NNN-NNN-NNNN (phone) must NOT trigger the NNN-NN-NNNN check.
// Critical false-positive case from Rust phone-pattern test.
v := NewEmailValidator(mkLookup())
_, err := v.Validate(Artifact{EmailDraft: map[string]any{
"to": "a@b.com",
"body": "Call me at 555-123-4567 to confirm.",
}})
if err != nil {
t.Errorf("phone pattern should NOT trigger SSN policy, got %v", err)
}
}
func TestEmail_SSNInsideLongerNumericRun_NotFlagged(t *testing.T) {
// 1234-56-78901 has the right shape pattern at offset 0 but
// flanking digits → not an SSN. Mirrors Rust's flanking-digit
// guard test.
v := NewEmailValidator(mkLookup())
_, err := v.Validate(Artifact{EmailDraft: map[string]any{
"to": "a@b.com",
"body": "ID 1234-56-78901 is the new format.",
}})
if err != nil {
t.Errorf("flanking-digit guard should reject this, got %v", err)
}
}
// ── PII: salary ──
func TestEmail_SalaryDisclosure_FailsPolicy(t *testing.T) {
v := NewEmailValidator(mkLookup())
_, err := v.Validate(Artifact{EmailDraft: map[string]any{
"to": "a@b.com",
"body": "Their salary is $45000 — please confirm before sending offer.",
}})
ve, _ := asValidationError(err)
if ve == nil || ve.Kind != ErrPolicy {
t.Errorf("expected policy error on salary disclosure, got %+v", ve)
}
}
func TestEmail_HourlyRateDisclosure_FailsPolicy(t *testing.T) {
v := NewEmailValidator(mkLookup())
_, err := v.Validate(Artifact{EmailDraft: map[string]any{
"to": "a@b.com",
"body": "Discuss your hourly rate of $30 with the client when you arrive.",
}})
ve, _ := asValidationError(err)
if ve == nil || ve.Kind != ErrPolicy {
t.Errorf("expected policy error on hourly rate, got %+v", ve)
}
}
func TestEmail_DollarFar_NotFlagged(t *testing.T) {
// $ amount > 40 chars from the keyword → not flagged.
v := NewEmailValidator(mkLookup())
body := "We're paid by salary, but the parking validation costs " +
"about three more sentences worth of text appearing in between, " +
"and then much later at $50 the trip is too expensive."
_, err := v.Validate(Artifact{EmailDraft: map[string]any{
"to": "a@b.com", "body": body,
}})
if err != nil {
t.Errorf("salary keyword far from $ amount should not flag, got %v", err)
}
}
// ── Worker-name consistency ──
func TestEmail_NameMissingFromBody_EmitsWarning(t *testing.T) {
v := NewEmailValidator(mkLookup(mkWorker("w1", "Alice Smith", "active", "Toledo", "OH", "Welder")))
report, err := v.Validate(Artifact{EmailDraft: map[string]any{
"to": "a@b.com",
"body": "Hello, please confirm your shift tomorrow.",
"_context": map[string]any{"candidate_id": "w1"},
}})
if err != nil {
t.Fatalf("name mismatch should NOT error (warning only), got %v", err)
}
if len(report.Findings) != 1 || report.Findings[0].Severity != SeverityWarning {
t.Errorf("expected 1 warning finding, got %v", report.Findings)
}
}
func TestEmail_NameInBody_NoFinding(t *testing.T) {
v := NewEmailValidator(mkLookup(mkWorker("w1", "Alice Smith", "active", "Toledo", "OH", "Welder")))
report, err := v.Validate(Artifact{EmailDraft: map[string]any{
"to": "a@b.com",
"body": "Hi Alice, please confirm tomorrow.",
"_context": map[string]any{"candidate_id": "w1"},
}})
if err != nil {
t.Fatalf("expected pass, got %v", err)
}
if len(report.Findings) != 0 {
t.Errorf("expected zero findings, got %v", report.Findings)
}
}
func TestEmail_PhantomCandidateID_FailsConsistency(t *testing.T) {
v := NewEmailValidator(mkLookup())
_, err := v.Validate(Artifact{EmailDraft: map[string]any{
"to": "a@b.com",
"body": "Hi Alice",
"_context": map[string]any{"candidate_id": "phantom"},
}})
ve, _ := asValidationError(err)
if ve == nil || ve.Kind != ErrConsistency {
t.Errorf("expected consistency error on phantom ID, got %+v", ve)
}
}
// ── Happy path ──
func TestEmail_WellFormed_Passes(t *testing.T) {
v := NewEmailValidator(mkLookup())
report, err := v.Validate(Artifact{EmailDraft: map[string]any{
"to": "alice@example.com",
"subject": "Shift confirmation",
"body": "Please confirm your shift starts at 9am tomorrow.",
}})
if err != nil {
t.Errorf("well-formed email should pass, got %v", err)
}
if len(report.Findings) != 0 {
t.Errorf("expected zero findings, got %v", report.Findings)
}
}
// ── Validator name is stable ──
func TestEmail_NameMatchesRust(t *testing.T) {
v := NewEmailValidator(mkLookup())
if v.Name() != "staffing.email" {
t.Errorf("name should match Rust 'staffing.email', got %q", v.Name())
}
}