validator: port FillValidator + EmailValidator from Rust validator crate

Per architecture_comparison.md universal-win for Go side: ports the Rust crates/validator/src/staffing/ to internal/validator/. Production safety net Go was missing — FillValidator catches phantom worker IDs + status/blacklist/geo/role mismatches; EmailValidator catches SSN-shape PII + salary disclosure + wrong-target name in email/SMS drafts. Files: - types.go: Artifact (FillProposal | EmailDraft), Validator interface, WorkerLookup interface, ValidationError + Finding + Severity - lookup.go: InMemoryWorkerLookup with case-insensitive ID lookup - fill.go: FillValidator — schema → completeness → cross-roster (phantom ID / status / blacklist / geo / role) - email.go: EmailValidator — schema → length → PII (SSN + salary) → worker-name consistency - fill_test.go + email_test.go: 24 tests covering happy path + every error variant + the load-bearing edge cases (phone-pattern not flagged as SSN, flanking-digit guard rejects extended numeric runs) Validator names match Rust (staffing.fill / staffing.email) so cross-runtime audit logs share the same identifier. PII scanners (containsSSNPattern, containsSalaryDisclosure) ported byte-for-byte so a draft flagged by one runtime is flagged by the other. Caveat: the Rust validator crate also has parquet_lookup.rs (loads workers_500k.parquet at startup) and playbook.rs (additional checks). Those weren't ported in this wave — only the two load-bearing validators that were named in the comparison doc. Closes one of the two universal-win items for Go side. The other (materializer port) remains deferred — it's a bigger surface change and depends on transforms.ts source-class adapters. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-01 04:49:55 -05:00 · 2026-05-01 04:49:55 -05:00 · b03521a506
commit b03521a506
parent b3ad14832d
6 changed files with 1190 additions and 0 deletions
--- a/internal/validator/email.go
+++ b/internal/validator/email.go
@ -0,0 +1,270 @@
 package validator
 import (
 	"fmt"
 	"strings"
 	"time"
 )
 // EmailValidator is the Go port of Rust's EmailValidator. Per
 // `crates/validator/src/staffing/email.rs`:
 //
 //   - Schema (TO/BODY fields present)
 //   - Length (SMS ≤ 160 chars; email subject ≤ 78 chars)
 //   - PII absence (no SSN-shape / salary leakage)
 //   - Worker-name consistency (body mentions worker first name)
 //
 // PII detection is std-only — no regex dependency. Two scanners:
 //   - SSN-shape: NNN-NN-NNNN with run-of-digits guards (so phone
 //     numbers like NNN-NNN-NNNN don't false-positive).
 //   - Salary disclosure: keywords near a `$amount` substring.
 //
 // Both mirror Rust byte-for-byte so cross-runtime audit logs
 // agree on which messages get flagged.
 type EmailValidator struct {
 	workers WorkerLookup
 }
 // NewEmailValidator constructs an EmailValidator with the given
 // lookup. If you don't need the name-consistency check (e.g.
 // generic broadcast templates), pass NewInMemoryWorkerLookup(nil)
 // — the validator skips the worker check when _context.candidate_id
 // is absent.
 func NewEmailValidator(workers WorkerLookup) *EmailValidator {
 	return &EmailValidator{workers: workers}
 }
 // Name satisfies Validator. Stable string used for audit trail /
 // receipts. Matches Rust output "staffing.email".
 func (v *EmailValidator) Name() string { return "staffing.email" }
 // Channel-shape limits — match Rust exactly.
 const (
 	smsMaxChars          = 160
 	emailSubjectMaxChars = 78
 )
 // Validate implements Validator. Order: schema → length →
 // PII → worker-name consistency.
 func (v *EmailValidator) Validate(artifact Artifact) (Report, error) {
 	started := time.Now()
 	value := artifact.EmailDraft
 	if value == nil {
 		return Report{}, &ValidationError{
 			Kind:  ErrSchema,
 			Field: "artifact",
 			Reason: fmt.Sprintf("EmailValidator expects EmailDraft, got %s", artifact.Kind()),
 		}
 	}
 	// ── Schema (`to` + `body` required) ──
 	if _, ok := value["to"].(string); !ok {
 		return Report{}, &ValidationError{
 			Kind:   ErrSchema,
 			Field:  "to",
 			Reason: "missing or not a string",
 		}
 	}
 	body, ok := value["body"].(string)
 	if !ok {
 		return Report{}, &ValidationError{
 			Kind:   ErrSchema,
 			Field:  "body",
 			Reason: "missing or not a string",
 		}
 	}
 	// ── Length checks ──
 	isSMS := false
 	if k, ok := value["kind"].(string); ok && k == "sms" {
 		isSMS = true
 	}
 	if isSMS && len(body) > smsMaxChars {
 		return Report{}, &ValidationError{
 			Kind: ErrCompleteness,
 			Reason: fmt.Sprintf("SMS body is %d chars, max %d",
 				len(body), smsMaxChars),
 		}
 	}
 	if subject, ok := value["subject"].(string); ok && len(subject) > emailSubjectMaxChars {
 		return Report{}, &ValidationError{
 			Kind: ErrCompleteness,
 			Reason: fmt.Sprintf("email subject is %d chars, max %d",
 				len(subject), emailSubjectMaxChars),
 		}
 	}
 	// ── PII scan over subject + body combined ──
 	var subjectStr string
 	if s, ok := value["subject"].(string); ok {
 		subjectStr = s
 	}
 	scanned := subjectStr + " " + body
 	if containsSSNPattern(scanned) {
 		return Report{}, &ValidationError{
 			Kind:   ErrPolicy,
 			Reason: "body contains an SSN-shaped sequence (NNN-NN-NNNN); strip before send",
 		}
 	}
 	if containsSalaryDisclosure(scanned) {
 		return Report{}, &ValidationError{
 			Kind:   ErrPolicy,
 			Reason: "body discloses salary/compensation amount; staffing PII rule says strip before send",
 		}
 	}
 	// ── Worker-name consistency ──
 	var findings []Finding
 	if ctx, ok := value["_context"].(map[string]any); ok {
 		if cid, ok := ctx["candidate_id"].(string); ok && cid != "" {
 			worker, found := v.workers.Find(cid)
 			if !found {
 				return Report{}, &ValidationError{
 					Kind:   ErrConsistency,
 					Reason: fmt.Sprintf("_context.candidate_id %q not found in worker roster", cid),
 				}
 			}
 			// Body should mention the worker's name (or at least
 			// their first name) — drafts that address a different
 			// person than the contracted worker are a recurring
 			// LLM mistake.
 			first := strings.Fields(worker.Name)
 			firstLower := ""
 			if len(first) > 0 {
 				firstLower = strings.ToLower(first[0])
 			}
 			bodyLower := strings.ToLower(body)
 			if firstLower != "" && !strings.Contains(bodyLower, firstLower) {
 				findings = append(findings, Finding{
 					Field:    "body",
 					Severity: SeverityWarning,
 					Message: fmt.Sprintf(
 						"body doesn't mention worker first name %q (candidate_id %q)",
 						first[0], cid,
 					),
 				})
 			}
 		}
 	}
 	return Report{
 		Findings:  findings,
 		ElapsedMs: elapsed(started),
 	}, nil
 }
 // ── PII scanners — std-only, mirror Rust byte-for-byte ──────────
 // containsSSNPattern detects an SSN-shaped sequence: 3 digits, dash,
 // 2 digits, dash, 4 digits. Runs-of-digits guards: rejects sequences
 // flanked by digit/dash (so phone-area-code-like NNN-NNN-NNNN isn't
 // flagged). Tight false-positive surface: specifically the
 // NNN-NN-NNNN shape used by U.S. SSNs.
 //
 // Critical: this fires on PII in real-world drafts. Don't relax the
 // flanking guards without a regression test that exercises both
 // cases (an actual SSN should fire, a phone-NNN-NNN-NNNN should not).
 func containsSSNPattern(s string) bool {
 	bytes := []byte(s)
 	if len(bytes) < 11 {
 		return false
 	}
 	for i := 0; i+11 <= len(bytes); i++ {
 		win := bytes[i : i+11]
 		shape := true
 		for j := 0; j < 11; j++ {
 			switch j {
 			case 0, 1, 2, 4, 5, 7, 8, 9, 10:
 				if !isAsciiDigit(win[j]) {
 					shape = false
 				}
 			case 3, 6:
 				if win[j] != '-' {
 					shape = false
 				}
 			}
 			if !shape {
 				break
 			}
 		}
 		if !shape {
 			continue
 		}
 		// Reject if previous byte is digit or dash — we're
 		// inside a longer numeric run, probably not an SSN.
 		if i > 0 {
 			prev := bytes[i-1]
 			if isAsciiDigit(prev) || prev == '-' {
 				continue
 			}
 		}
 		// Reject if next byte is digit or dash — same reason.
 		if i+11 < len(bytes) {
 			next := bytes[i+11]
 			if isAsciiDigit(next) || next == '-' {
 				continue
 			}
 		}
 		return true
 	}
 	return false
 }
 func isAsciiDigit(b byte) bool { return b >= '0' && b <= '9' }
 // containsSalaryDisclosure detects salary/compensation disclosure:
 // the keywords "salary", "compensation", "pay rate", "bill rate",
 // "hourly rate" appearing within ~40 chars of a `$NNN+` substring.
 //
 // Coarse on purpose — better to false-positive on a legit phrase
 // like "discuss your hourly rate of $30/hr" than to miss a real
 // disclosure. Operators tuning this should add tests, not loosen
 // the check.
 func containsSalaryDisclosure(s string) bool {
 	lower := strings.ToLower(s)
 	keywords := []string{"salary", "compensation", "pay rate", "bill rate", "hourly rate"}
 	var keywordPositions []int
 	for _, kw := range keywords {
 		start := 0
 		for {
 			idx := strings.Index(lower[start:], kw)
 			if idx < 0 {
 				break
 			}
 			abs := start + idx
 			keywordPositions = append(keywordPositions, abs)
 			start = abs + len(kw)
 		}
 	}
 	if len(keywordPositions) == 0 {
 		return false
 	}
 	var dollarPositions []int
 	bytes := []byte(lower)
 	for i := 0; i+1 < len(bytes); i++ {
 		if bytes[i] == '$' && isAsciiDigit(bytes[i+1]) {
 			dollarPositions = append(dollarPositions, i)
 		}
 	}
 	if len(dollarPositions) == 0 {
 		return false
 	}
 	for _, kp := range keywordPositions {
 		for _, dp := range dollarPositions {
 			if absDiff(kp, dp) <= 40 {
 				return true
 			}
 		}
 	}
 	return false
 }
 func absDiff(a, b int) int {
 	if a > b {
 		return a - b
 	}
 	return b - a
 }
--- a/internal/validator/email_test.go
+++ b/internal/validator/email_test.go
@ -0,0 +1,220 @@
 package validator
 import "testing"
 // ── Schema ──
 func TestEmail_WrongArtifactType_FailsSchema(t *testing.T) {
 	v := NewEmailValidator(mkLookup())
 	_, err := v.Validate(Artifact{FillProposal: map[string]any{}})
 	ve, _ := asValidationError(err)
 	if ve == nil || ve.Kind != ErrSchema {
 		t.Errorf("expected schema error on wrong artifact, got %+v", ve)
 	}
 }
 func TestEmail_MissingTo_FailsSchema(t *testing.T) {
 	v := NewEmailValidator(mkLookup())
 	_, err := v.Validate(Artifact{EmailDraft: map[string]any{"body": "hi"}})
 	ve, _ := asValidationError(err)
 	if ve == nil || ve.Kind != ErrSchema || ve.Field != "to" {
 		t.Errorf("expected schema/to error, got %+v", ve)
 	}
 }
 func TestEmail_MissingBody_FailsSchema(t *testing.T) {
 	v := NewEmailValidator(mkLookup())
 	_, err := v.Validate(Artifact{EmailDraft: map[string]any{"to": "a@b.com"}})
 	ve, _ := asValidationError(err)
 	if ve == nil || ve.Kind != ErrSchema || ve.Field != "body" {
 		t.Errorf("expected schema/body error, got %+v", ve)
 	}
 }
 // ── Length limits ──
 func TestEmail_LongSMS_FailsCompleteness(t *testing.T) {
 	v := NewEmailValidator(mkLookup())
 	body := make([]byte, 200)
 	for i := range body {
 		body[i] = 'x'
 	}
 	_, err := v.Validate(Artifact{EmailDraft: map[string]any{
 		"to":   "+15555550123",
 		"body": string(body),
 		"kind": "sms",
 	}})
 	ve, _ := asValidationError(err)
 	if ve == nil || ve.Kind != ErrCompleteness {
 		t.Errorf("expected completeness error on long SMS, got %+v", ve)
 	}
 }
 func TestEmail_LongSubject_FailsCompleteness(t *testing.T) {
 	v := NewEmailValidator(mkLookup())
 	subject := make([]byte, 100)
 	for i := range subject {
 		subject[i] = 'x'
 	}
 	_, err := v.Validate(Artifact{EmailDraft: map[string]any{
 		"to":      "a@b.com",
 		"body":    "hi",
 		"subject": string(subject),
 	}})
 	ve, _ := asValidationError(err)
 	if ve == nil || ve.Kind != ErrCompleteness {
 		t.Errorf("expected completeness error on long subject, got %+v", ve)
 	}
 }
 // ── PII: SSN ──
 func TestEmail_SSNInBody_FailsPolicy(t *testing.T) {
 	v := NewEmailValidator(mkLookup())
 	_, err := v.Validate(Artifact{EmailDraft: map[string]any{
 		"to":   "a@b.com",
 		"body": "Their SSN is 123-45-6789, please file accordingly.",
 	}})
 	ve, _ := asValidationError(err)
 	if ve == nil || ve.Kind != ErrPolicy {
 		t.Errorf("expected policy error on SSN, got %+v", ve)
 	}
 }
 func TestEmail_PhonePatternNotFlaggedAsSSN(t *testing.T) {
 	// NNN-NNN-NNNN (phone) must NOT trigger the NNN-NN-NNNN check.
 	// Critical false-positive case from Rust phone-pattern test.
 	v := NewEmailValidator(mkLookup())
 	_, err := v.Validate(Artifact{EmailDraft: map[string]any{
 		"to":   "a@b.com",
 		"body": "Call me at 555-123-4567 to confirm.",
 	}})
 	if err != nil {
 		t.Errorf("phone pattern should NOT trigger SSN policy, got %v", err)
 	}
 }
 func TestEmail_SSNInsideLongerNumericRun_NotFlagged(t *testing.T) {
 	// 1234-56-78901 has the right shape pattern at offset 0 but
 	// flanking digits → not an SSN. Mirrors Rust's flanking-digit
 	// guard test.
 	v := NewEmailValidator(mkLookup())
 	_, err := v.Validate(Artifact{EmailDraft: map[string]any{
 		"to":   "a@b.com",
 		"body": "ID 1234-56-78901 is the new format.",
 	}})
 	if err != nil {
 		t.Errorf("flanking-digit guard should reject this, got %v", err)
 	}
 }
 // ── PII: salary ──
 func TestEmail_SalaryDisclosure_FailsPolicy(t *testing.T) {
 	v := NewEmailValidator(mkLookup())
 	_, err := v.Validate(Artifact{EmailDraft: map[string]any{
 		"to":   "a@b.com",
 		"body": "Their salary is $45000 — please confirm before sending offer.",
 	}})
 	ve, _ := asValidationError(err)
 	if ve == nil || ve.Kind != ErrPolicy {
 		t.Errorf("expected policy error on salary disclosure, got %+v", ve)
 	}
 }
 func TestEmail_HourlyRateDisclosure_FailsPolicy(t *testing.T) {
 	v := NewEmailValidator(mkLookup())
 	_, err := v.Validate(Artifact{EmailDraft: map[string]any{
 		"to":   "a@b.com",
 		"body": "Discuss your hourly rate of $30 with the client when you arrive.",
 	}})
 	ve, _ := asValidationError(err)
 	if ve == nil || ve.Kind != ErrPolicy {
 		t.Errorf("expected policy error on hourly rate, got %+v", ve)
 	}
 }
 func TestEmail_DollarFar_NotFlagged(t *testing.T) {
 	// $ amount > 40 chars from the keyword → not flagged.
 	v := NewEmailValidator(mkLookup())
 	body := "We're paid by salary, but the parking validation costs " +
 		"about three more sentences worth of text appearing in between, " +
 		"and then much later at $50 the trip is too expensive."
 	_, err := v.Validate(Artifact{EmailDraft: map[string]any{
 		"to": "a@b.com", "body": body,
 	}})
 	if err != nil {
 		t.Errorf("salary keyword far from $ amount should not flag, got %v", err)
 	}
 }
 // ── Worker-name consistency ──
 func TestEmail_NameMissingFromBody_EmitsWarning(t *testing.T) {
 	v := NewEmailValidator(mkLookup(mkWorker("w1", "Alice Smith", "active", "Toledo", "OH", "Welder")))
 	report, err := v.Validate(Artifact{EmailDraft: map[string]any{
 		"to":       "a@b.com",
 		"body":     "Hello, please confirm your shift tomorrow.",
 		"_context": map[string]any{"candidate_id": "w1"},
 	}})
 	if err != nil {
 		t.Fatalf("name mismatch should NOT error (warning only), got %v", err)
 	}
 	if len(report.Findings) != 1 || report.Findings[0].Severity != SeverityWarning {
 		t.Errorf("expected 1 warning finding, got %v", report.Findings)
 	}
 }
 func TestEmail_NameInBody_NoFinding(t *testing.T) {
 	v := NewEmailValidator(mkLookup(mkWorker("w1", "Alice Smith", "active", "Toledo", "OH", "Welder")))
 	report, err := v.Validate(Artifact{EmailDraft: map[string]any{
 		"to":       "a@b.com",
 		"body":     "Hi Alice, please confirm tomorrow.",
 		"_context": map[string]any{"candidate_id": "w1"},
 	}})
 	if err != nil {
 		t.Fatalf("expected pass, got %v", err)
 	}
 	if len(report.Findings) != 0 {
 		t.Errorf("expected zero findings, got %v", report.Findings)
 	}
 }
 func TestEmail_PhantomCandidateID_FailsConsistency(t *testing.T) {
 	v := NewEmailValidator(mkLookup())
 	_, err := v.Validate(Artifact{EmailDraft: map[string]any{
 		"to":       "a@b.com",
 		"body":     "Hi Alice",
 		"_context": map[string]any{"candidate_id": "phantom"},
 	}})
 	ve, _ := asValidationError(err)
 	if ve == nil || ve.Kind != ErrConsistency {
 		t.Errorf("expected consistency error on phantom ID, got %+v", ve)
 	}
 }
 // ── Happy path ──
 func TestEmail_WellFormed_Passes(t *testing.T) {
 	v := NewEmailValidator(mkLookup())
 	report, err := v.Validate(Artifact{EmailDraft: map[string]any{
 		"to":      "alice@example.com",
 		"subject": "Shift confirmation",
 		"body":    "Please confirm your shift starts at 9am tomorrow.",
 	}})
 	if err != nil {
 		t.Errorf("well-formed email should pass, got %v", err)
 	}
 	if len(report.Findings) != 0 {
 		t.Errorf("expected zero findings, got %v", report.Findings)
 	}
 }
 // ── Validator name is stable ──
 func TestEmail_NameMatchesRust(t *testing.T) {
 	v := NewEmailValidator(mkLookup())
 	if v.Name() != "staffing.email" {
 		t.Errorf("name should match Rust 'staffing.email', got %q", v.Name())
 	}
 }
--- a/internal/validator/fill.go
+++ b/internal/validator/fill.go
@ -0,0 +1,274 @@
 package validator
 import (
 	"fmt"
 	"strings"
 	"time"
 )
 // FillValidator is the Go port of Rust's FillValidator. Per
 // `crates/validator/src/staffing/fill.rs`:
 //
 //   - Schema compliance (propose_done shape: {fills: [{candidate_id, name}]})
 //   - Completeness (endorsed count == target_count)
 //   - Worker existence (every candidate_id present in workers roster)
 //   - Status check (worker.status == "active")
 //   - Client blacklist (worker NOT in client.blacklisted_clients)
 //   - Geo/role match (worker city/state/role matches contract)
 //
 // Contract metadata travels alongside the JSON payload under a
 // `_context` key:
 //
 //	{"_context": {"target_count": 2, "city": "Toledo", "state": "OH",
 //	  "role": "Welder", "client_id": "CLI-00099"}, "fills": [...]}
 //
 // The duplicate-ID guard inside one fill catches the LLM mistake
 // of repeating the same candidate twice to satisfy a higher
 // target_count.
 type FillValidator struct {
 	workers WorkerLookup
 }
 // NewFillValidator constructs a FillValidator with the given lookup.
 // Lookup must be non-nil; pass NewInMemoryWorkerLookup(nil) for
 // tests that don't exercise existence checks.
 func NewFillValidator(workers WorkerLookup) *FillValidator {
 	return &FillValidator{workers: workers}
 }
 // Name satisfies Validator. Stable string used for audit
 // trail / receipts. Matches Rust output "staffing.fill" so
 // cross-runtime audit logs share the same name.
 func (v *FillValidator) Name() string { return "staffing.fill" }
 // fillContext is the optional contract metadata extracted from
 // _context. Each field is independently nil-able (Rust's Option<T>
 // pattern) — validators only enforce a check when both contract
 // and roster sides have a value.
 type fillContext struct {
 	TargetCount *int
 	City        *string
 	State       *string
 	Role        *string
 	ClientID    *string
 }
 func extractContext(value map[string]any) fillContext {
 	ctx, ok := value["_context"].(map[string]any)
 	if !ok {
 		return fillContext{}
 	}
 	out := fillContext{}
 	if v, ok := ctx["target_count"]; ok {
 		if n, ok := toInt(v); ok {
 			out.TargetCount = &n
 		}
 	}
 	if s, ok := ctx["city"].(string); ok {
 		out.City = &s
 	}
 	if s, ok := ctx["state"].(string); ok {
 		out.State = &s
 	}
 	if s, ok := ctx["role"].(string); ok {
 		out.Role = &s
 	}
 	if s, ok := ctx["client_id"].(string); ok {
 		out.ClientID = &s
 	}
 	return out
 }
 // toInt accepts JSON numbers (float64) and integers, returning
 // the int form when the value is a whole number ≥ 0.
 func toInt(v any) (int, bool) {
 	switch n := v.(type) {
 	case int:
 		return n, true
 	case int64:
 		return int(n), true
 	case float64:
 		// JSON unmarshals all numbers as float64; whole-number check
 		// is mandatory because target_count=2.5 makes no sense.
 		i := int(n)
 		if float64(i) == n {
 			return i, true
 		}
 		return 0, false
 	}
 	return 0, false
 }
 // eqCI is the case-insensitive equality used everywhere validators
 // compare strings (status, role, city, etc.). Trim+lowercase mirrors
 // Rust's `.trim().eq_ignore_ascii_case(other.trim())`.
 func eqCI(a, b string) bool {
 	return strings.EqualFold(strings.TrimSpace(a), strings.TrimSpace(b))
 }
 // Validate implements the Validator interface. Mirrors the Rust
 // validation order exactly: schema → completeness → cross-roster
 // per-fill checks.
 func (v *FillValidator) Validate(artifact Artifact) (Report, error) {
 	started := time.Now()
 	value := artifact.FillProposal
 	if value == nil {
 		return Report{}, &ValidationError{
 			Kind:  ErrSchema,
 			Field: "artifact",
 			Reason: fmt.Sprintf("FillValidator expects FillProposal, got %s", artifact.Kind()),
 		}
 	}
 	// ── Schema check ──
 	fillsRaw, ok := value["fills"].([]any)
 	if !ok {
 		return Report{}, &ValidationError{
 			Kind:   ErrSchema,
 			Field:  "fills",
 			Reason: "expected top-level `fills` array",
 		}
 	}
 	for i, fillRaw := range fillsRaw {
 		fill, ok := fillRaw.(map[string]any)
 		if !ok {
 			return Report{}, &ValidationError{
 				Kind:   ErrSchema,
 				Field:  fmt.Sprintf("fills[%d]", i),
 				Reason: "expected object",
 			}
 		}
 		if _, ok := fill["candidate_id"]; !ok {
 			return Report{}, &ValidationError{
 				Kind:   ErrSchema,
 				Field:  fmt.Sprintf("fills[%d].candidate_id", i),
 				Reason: "missing",
 			}
 		}
 		if _, ok := fill["name"]; !ok {
 			return Report{}, &ValidationError{
 				Kind:   ErrSchema,
 				Field:  fmt.Sprintf("fills[%d].name", i),
 				Reason: "missing",
 			}
 		}
 	}
 	ctx := extractContext(value)
 	// ── Completeness ──
 	if ctx.TargetCount != nil && len(fillsRaw) != *ctx.TargetCount {
 		return Report{}, &ValidationError{
 			Kind: ErrCompleteness,
 			Reason: fmt.Sprintf("endorsed count %d != target_count %d",
 				len(fillsRaw), *ctx.TargetCount),
 		}
 	}
 	// ── Cross-roster checks ──
 	var findings []Finding
 	seenIDs := make(map[string]bool, len(fillsRaw))
 	for i, fillRaw := range fillsRaw {
 		fill := fillRaw.(map[string]any) // already type-checked in schema pass
 		candidateID, _ := fill["candidate_id"].(string)
 		proposedName, _ := fill["name"].(string)
 		// Duplicate-ID guard inside one fill.
 		if seenIDs[candidateID] {
 			return Report{}, &ValidationError{
 				Kind: ErrConsistency,
 				Reason: fmt.Sprintf(
 					"duplicate candidate_id %q appears multiple times in fills",
 					candidateID,
 				),
 			}
 		}
 		seenIDs[candidateID] = true
 		// Worker existence — load-bearing check for the 0→85% pattern.
 		worker, ok := v.workers.Find(candidateID)
 		if !ok {
 			return Report{}, &ValidationError{
 				Kind: ErrConsistency,
 				Reason: fmt.Sprintf(
 					"fills[%d].candidate_id %q does not exist in worker roster",
 					i, candidateID,
 				),
 			}
 		}
 		// Status — only "active" workers can be endorsed.
 		if !eqCI(worker.Status, "active") {
 			return Report{}, &ValidationError{
 				Kind: ErrConsistency,
 				Reason: fmt.Sprintf(
 					"fills[%d] worker %q has status %q, expected \"active\"",
 					i, candidateID, worker.Status,
 				),
 			}
 		}
 		// Client blacklist.
 		if ctx.ClientID != nil {
 			for _, b := range worker.BlacklistedClients {
 				if eqCI(b, *ctx.ClientID) {
 					return Report{}, &ValidationError{
 						Kind: ErrPolicy,
 						Reason: fmt.Sprintf(
 							"fills[%d] worker %q blacklisted for client %q",
 							i, candidateID, *ctx.ClientID,
 						),
 					}
 				}
 			}
 		}
 		// Geo / role match — only when BOTH sides have a value.
 		if ctx.City != nil && worker.City != nil && !eqCI(*ctx.City, *worker.City) {
 			return Report{}, &ValidationError{
 				Kind: ErrConsistency,
 				Reason: fmt.Sprintf(
 					"fills[%d] worker %q city %q doesn't match contract city %q",
 					i, candidateID, *worker.City, *ctx.City,
 				),
 			}
 		}
 		if ctx.State != nil && worker.State != nil && !eqCI(*ctx.State, *worker.State) {
 			return Report{}, &ValidationError{
 				Kind: ErrConsistency,
 				Reason: fmt.Sprintf(
 					"fills[%d] worker %q state %q doesn't match contract state %q",
 					i, candidateID, *worker.State, *ctx.State,
 				),
 			}
 		}
 		if ctx.Role != nil && worker.Role != nil && !eqCI(*ctx.Role, *worker.Role) {
 			return Report{}, &ValidationError{
 				Kind: ErrConsistency,
 				Reason: fmt.Sprintf(
 					"fills[%d] worker %q role %q doesn't match contract role %q",
 					i, candidateID, *worker.Role, *ctx.Role,
 				),
 			}
 		}
 		// Name-mismatch is a warning, not an error — recruiters
 		// sometimes send updated names through the proposal layer
 		// before the roster catches up.
 		if proposedName != "" && !eqCI(proposedName, worker.Name) {
 			findings = append(findings, Finding{
 				Field:    fmt.Sprintf("fills[%d].name", i),
 				Severity: SeverityWarning,
 				Message: fmt.Sprintf(
 					"proposed name %q differs from roster name %q for %q",
 					proposedName, worker.Name, candidateID,
 				),
 			})
 		}
 	}
 	return Report{
 		Findings:  findings,
 		ElapsedMs: elapsed(started),
 	}, nil
 }
--- a/internal/validator/fill_test.go
+++ b/internal/validator/fill_test.go
@ -0,0 +1,226 @@
 package validator
 import (
 	"errors"
 	"testing"
 )
 // Helpers — mirror the Rust test helpers.
 func mkLookup(records ...WorkerRecord) WorkerLookup {
 	return NewInMemoryWorkerLookup(records)
 }
 func mkWorker(id, name, status, city, state, role string) WorkerRecord {
 	return WorkerRecord{
 		CandidateID: id,
 		Name:        name,
 		Status:      status,
 		City:        strPtr(city),
 		State:       strPtr(state),
 		Role:        strPtr(role),
 	}
 }
 func asValidationError(err error) (*ValidationError, bool) {
 	var ve *ValidationError
 	if errors.As(err, &ve) {
 		return ve, true
 	}
 	return nil, false
 }
 // ── Schema-level errors ──
 func TestFill_WrongArtifactType_FailsSchema(t *testing.T) {
 	v := NewFillValidator(mkLookup())
 	_, err := v.Validate(Artifact{EmailDraft: map[string]any{}})
 	ve, ok := asValidationError(err)
 	if !ok {
 		t.Fatalf("expected ValidationError, got %v", err)
 	}
 	if ve.Kind != ErrSchema || ve.Field != "artifact" {
 		t.Errorf("expected schema/artifact error, got %+v", ve)
 	}
 }
 func TestFill_MissingFillsArray_FailsSchema(t *testing.T) {
 	v := NewFillValidator(mkLookup())
 	_, err := v.Validate(Artifact{FillProposal: map[string]any{}})
 	ve, _ := asValidationError(err)
 	if ve == nil || ve.Kind != ErrSchema || ve.Field != "fills" {
 		t.Errorf("expected schema/fills error, got %+v", ve)
 	}
 }
 func TestFill_MissingCandidateID_FailsSchema(t *testing.T) {
 	v := NewFillValidator(mkLookup())
 	_, err := v.Validate(Artifact{FillProposal: map[string]any{
 		"fills": []any{
 			map[string]any{"name": "Alice"},
 		},
 	}})
 	ve, _ := asValidationError(err)
 	if ve == nil || ve.Kind != ErrSchema || ve.Field != "fills[0].candidate_id" {
 		t.Errorf("expected schema/fills[0].candidate_id error, got %+v", ve)
 	}
 }
 // ── Completeness ──
 func TestFill_TargetCountMismatch_FailsCompleteness(t *testing.T) {
 	v := NewFillValidator(mkLookup(mkWorker("w1", "Alice", "active", "Toledo", "OH", "Welder")))
 	_, err := v.Validate(Artifact{FillProposal: map[string]any{
 		"_context": map[string]any{"target_count": float64(2)},
 		"fills":    []any{map[string]any{"candidate_id": "w1", "name": "Alice"}},
 	}})
 	ve, _ := asValidationError(err)
 	if ve == nil || ve.Kind != ErrCompleteness {
 		t.Errorf("expected completeness error, got %+v", ve)
 	}
 }
 // ── Cross-roster checks ──
 func TestFill_PhantomID_FailsConsistency(t *testing.T) {
 	// Lookup is empty → any candidate_id is "phantom" — the
 	// load-bearing check for the 0→85% pattern.
 	v := NewFillValidator(mkLookup())
 	_, err := v.Validate(Artifact{FillProposal: map[string]any{
 		"fills": []any{map[string]any{"candidate_id": "phantom-id", "name": "Alice"}},
 	}})
 	ve, _ := asValidationError(err)
 	if ve == nil || ve.Kind != ErrConsistency {
 		t.Errorf("expected consistency error on phantom ID, got %+v", ve)
 	}
 }
 func TestFill_DuplicateID_FailsConsistency(t *testing.T) {
 	v := NewFillValidator(mkLookup(mkWorker("w1", "Alice", "active", "Toledo", "OH", "Welder")))
 	_, err := v.Validate(Artifact{FillProposal: map[string]any{
 		"fills": []any{
 			map[string]any{"candidate_id": "w1", "name": "Alice"},
 			map[string]any{"candidate_id": "w1", "name": "Alice"},
 		},
 	}})
 	ve, _ := asValidationError(err)
 	if ve == nil || ve.Kind != ErrConsistency {
 		t.Errorf("expected consistency error on duplicate ID, got %+v", ve)
 	}
 }
 func TestFill_InactiveStatus_FailsConsistency(t *testing.T) {
 	v := NewFillValidator(mkLookup(mkWorker("w1", "Alice", "inactive", "Toledo", "OH", "Welder")))
 	_, err := v.Validate(Artifact{FillProposal: map[string]any{
 		"fills": []any{map[string]any{"candidate_id": "w1", "name": "Alice"}},
 	}})
 	ve, _ := asValidationError(err)
 	if ve == nil || ve.Kind != ErrConsistency {
 		t.Errorf("expected consistency error on inactive status, got %+v", ve)
 	}
 }
 func TestFill_Blacklist_FailsPolicy(t *testing.T) {
 	w := mkWorker("w1", "Alice", "active", "Toledo", "OH", "Welder")
 	w.BlacklistedClients = []string{"CLI-99"}
 	v := NewFillValidator(mkLookup(w))
 	_, err := v.Validate(Artifact{FillProposal: map[string]any{
 		"_context": map[string]any{"client_id": "cli-99"}, // case-insensitive
 		"fills":    []any{map[string]any{"candidate_id": "w1", "name": "Alice"}},
 	}})
 	ve, _ := asValidationError(err)
 	if ve == nil || ve.Kind != ErrPolicy {
 		t.Errorf("expected policy error on blacklist, got %+v", ve)
 	}
 }
 func TestFill_GeoMismatch_FailsConsistency(t *testing.T) {
 	// Worker in Detroit, contract says Toledo.
 	v := NewFillValidator(mkLookup(mkWorker("w1", "Alice", "active", "Detroit", "MI", "Welder")))
 	_, err := v.Validate(Artifact{FillProposal: map[string]any{
 		"_context": map[string]any{"city": "Toledo", "state": "OH"},
 		"fills":    []any{map[string]any{"candidate_id": "w1", "name": "Alice"}},
 	}})
 	ve, _ := asValidationError(err)
 	if ve == nil || ve.Kind != ErrConsistency {
 		t.Errorf("expected consistency error on geo mismatch, got %+v", ve)
 	}
 }
 func TestFill_RoleMismatch_FailsConsistency(t *testing.T) {
 	v := NewFillValidator(mkLookup(mkWorker("w1", "Alice", "active", "Toledo", "OH", "Forklift Operator")))
 	_, err := v.Validate(Artifact{FillProposal: map[string]any{
 		"_context": map[string]any{"role": "Welder"},
 		"fills":    []any{map[string]any{"candidate_id": "w1", "name": "Alice"}},
 	}})
 	ve, _ := asValidationError(err)
 	if ve == nil || ve.Kind != ErrConsistency {
 		t.Errorf("expected consistency error on role mismatch, got %+v", ve)
 	}
 }
 // ── Happy path ──
 func TestFill_WellFormed_Passes(t *testing.T) {
 	v := NewFillValidator(mkLookup(
 		mkWorker("w1", "Alice", "active", "Toledo", "OH", "Welder"),
 		mkWorker("w2", "Bob", "active", "Toledo", "OH", "Welder"),
 	))
 	report, err := v.Validate(Artifact{FillProposal: map[string]any{
 		"_context": map[string]any{
 			"target_count": float64(2),
 			"city":         "Toledo",
 			"state":        "OH",
 			"role":         "Welder",
 		},
 		"fills": []any{
 			map[string]any{"candidate_id": "w1", "name": "Alice"},
 			map[string]any{"candidate_id": "w2", "name": "Bob"},
 		},
 	}})
 	if err != nil {
 		t.Fatalf("expected pass, got %v", err)
 	}
 	if len(report.Findings) != 0 {
 		t.Errorf("expected zero findings, got %v", report.Findings)
 	}
 }
 // ── Name mismatch is a Finding (warning), not an error ──
 func TestFill_NameMismatch_EmitsWarning(t *testing.T) {
 	v := NewFillValidator(mkLookup(mkWorker("w1", "Alice Smith", "active", "Toledo", "OH", "Welder")))
 	report, err := v.Validate(Artifact{FillProposal: map[string]any{
 		"fills": []any{
 			map[string]any{"candidate_id": "w1", "name": "Alyssa Smith"}, // typo / outdated
 		},
 	}})
 	if err != nil {
 		t.Fatalf("name mismatch should NOT error, got %v", err)
 	}
 	if len(report.Findings) != 1 || report.Findings[0].Severity != SeverityWarning {
 		t.Errorf("expected 1 warning finding, got %v", report.Findings)
 	}
 }
 // ── Case-insensitive matches ──
 func TestFill_CaseInsensitiveMatch_Passes(t *testing.T) {
 	v := NewFillValidator(mkLookup(mkWorker("w1", "Alice", "ACTIVE", "TOLEDO", "oh", "Welder")))
 	_, err := v.Validate(Artifact{FillProposal: map[string]any{
 		"_context": map[string]any{"city": "Toledo", "state": "OH"},
 		"fills":    []any{map[string]any{"candidate_id": "w1", "name": "Alice"}},
 	}})
 	if err != nil {
 		t.Errorf("case-insensitive comparisons should pass, got %v", err)
 	}
 }
 // ── Validator name is stable ──
 func TestFill_NameMatchesRust(t *testing.T) {
 	v := NewFillValidator(mkLookup())
 	if v.Name() != "staffing.fill" {
 		t.Errorf("name should match Rust 'staffing.fill', got %q", v.Name())
 	}
 }
--- a/internal/validator/lookup.go
+++ b/internal/validator/lookup.go
@ -0,0 +1,56 @@
 package validator
 import "strings"
 // InMemoryWorkerLookup is a zero-deps WorkerLookup useful for tests
 // and small-fixture validation. Mirrors Rust's
 // `InMemoryWorkerLookup::from_records`.
 //
 // Lookup is case-insensitive on candidate_id since Rust's
 // HashMap with PartialEq + the source data's casing inconsistency
 // (some IDs uppercase, some lowercase, some mixed) means
 // case-sensitive lookup misses real matches. Lower-casing on
 // insert keeps the contract.
 type InMemoryWorkerLookup struct {
 	byID map[string]WorkerRecord
 }
 // NewInMemoryWorkerLookup builds a lookup from a list of records.
 // Duplicate candidate_ids: last-write-wins. Empty candidate_id: skipped.
 func NewInMemoryWorkerLookup(records []WorkerRecord) *InMemoryWorkerLookup {
 	m := make(map[string]WorkerRecord, len(records))
 	for _, r := range records {
 		if r.CandidateID == "" {
 			continue
 		}
 		m[strings.ToLower(strings.TrimSpace(r.CandidateID))] = r
 	}
 	return &InMemoryWorkerLookup{byID: m}
 }
 // Find satisfies WorkerLookup. Returns (rec, true) on hit,
 // (nil, false) on miss.
 func (l *InMemoryWorkerLookup) Find(candidateID string) (*WorkerRecord, bool) {
 	if l == nil {
 		return nil, false
 	}
 	r, ok := l.byID[strings.ToLower(strings.TrimSpace(candidateID))]
 	if !ok {
 		return nil, false
 	}
 	// Return a copy so callers can't mutate the lookup's internal state.
 	cp := r
 	return &cp, true
 }
 // Len exposes the size for tests + admin endpoints.
 func (l *InMemoryWorkerLookup) Len() int {
 	if l == nil {
 		return 0
 	}
 	return len(l.byID)
 }
 // strPtr is a tiny convenience for tests that need *string fields
 // on WorkerRecord.City/State/Role.
 func strPtr(s string) *string { return &s }
--- a/internal/validator/types.go
+++ b/internal/validator/types.go
@ -0,0 +1,144 @@
 // Package validator is the Go port of Rust's `validator` crate
 // (`/home/profit/lakehouse/crates/validator/`). Production safety
 // nets for staffing-domain LLM outputs:
 //
 //   - FillValidator: catches phantom IDs / wrong-status workers /
 //     blacklist violations / geo-or-role mismatches in fill proposals
 //   - EmailValidator: catches SSN-shape sequences / salary
 //     disclosure / wrong-target name in email/SMS drafts
 //
 // Per `reports/cutover/architecture_comparison.md`'s "Go missing"
 // section: these were Rust-only until this port. Closes one of the
 // two named gaps for Go-primary operation (the other being the
 // materializer port).
 //
 // Architectural choice: we mirror the Rust shape exactly so the
 // Validator + Artifact + Finding interfaces are call-compatible
 // across runtimes. A future "validator service" daemon could expose
 // either runtime's implementation behind a uniform HTTP contract.
 package validator
 import "time"
 // Artifact is the discriminated union of input shapes a Validator
 // can receive. Mirrors Rust's `enum Artifact`. The first non-zero
 // field selects the kind.
 type Artifact struct {
 	// FillProposal: {fills: [{candidate_id, name}], _context: {...}}
 	FillProposal map[string]any
 	// EmailDraft: {to, body, subject?, kind?, _context?: {candidate_id?}}
 	EmailDraft map[string]any
 }
 // Kind returns a short string for error messages — mirrors the
 // Rust Debug shape used in "expected FillProposal, got X".
 func (a Artifact) Kind() string {
 	switch {
 	case a.FillProposal != nil:
 		return "FillProposal"
 	case a.EmailDraft != nil:
 		return "EmailDraft"
 	default:
 		return "Unknown"
 	}
 }
 // Severity matches Rust's enum {Error, Warning, Info}.
 type Severity string
 const (
 	SeverityError   Severity = "error"
 	SeverityWarning Severity = "warning"
 	SeverityInfo    Severity = "info"
 )
 // Finding is one warning-or-info note attached to a successful
 // validation. Errors abort validation; findings come back alongside
 // a passing report. Mirrors Rust's Finding shape exactly so JSON
 // round-trips between runtimes.
 type Finding struct {
 	Field    string   `json:"field"`
 	Severity Severity `json:"severity"`
 	Message  string   `json:"message"`
 }
 // Report is the success-path return value: zero or more findings
 // + per-validator wall-clock cost.
 type Report struct {
 	Findings  []Finding `json:"findings"`
 	ElapsedMs int64     `json:"elapsed_ms"`
 }
 // ValidationErrorKind discriminates the failure modes. Mirrors
 // Rust's ValidationError variants:
 //   - Schema: input shape doesn't match contract
 //   - Completeness: structural counts wrong (e.g. 3 fills, target_count=5)
 //   - Consistency: cross-source disagreement (phantom worker, wrong city)
 //   - Policy: org-level rule violation (blacklist, PII leak)
 type ValidationErrorKind string
 const (
 	ErrSchema       ValidationErrorKind = "schema"
 	ErrCompleteness ValidationErrorKind = "completeness"
 	ErrConsistency  ValidationErrorKind = "consistency"
 	ErrPolicy       ValidationErrorKind = "policy"
 )
 // ValidationError is the Go equivalent of Rust's enum + variant
 // fields. Field is set for Schema errors (the failing field name);
 // Reason carries the human-readable message for all variants.
 type ValidationError struct {
 	Kind   ValidationErrorKind
 	Field  string
 	Reason string
 }
 // Error makes ValidationError a Go error value. Format mirrors the
 // Rust Debug print so log scraping behaves the same.
 func (e *ValidationError) Error() string {
 	if e.Field != "" {
 		return string(e.Kind) + " (" + e.Field + "): " + e.Reason
 	}
 	return string(e.Kind) + ": " + e.Reason
 }
 // Validator is the interface every validator implements.
 // Stateless — construction takes any deps (e.g. WorkerLookup)
 // upfront, validate() is pure on its inputs.
 type Validator interface {
 	Name() string
 	Validate(artifact Artifact) (Report, error)
 }
 // WorkerRecord is the lookup-side worker shape. Pointer fields
 // for City/State/Role mirror Rust's Option<String> — None means
 // "we don't know," which is operationally distinct from empty
 // string (we know it's empty). Validators only enforce
 // city/state/role matches when both expected (from contract)
 // and actual (from lookup) are non-nil.
 type WorkerRecord struct {
 	CandidateID         string
 	Name                string
 	Status              string // "active" / "inactive" / etc.
 	City                *string
 	State               *string
 	Role                *string
 	BlacklistedClients  []string
 }
 // WorkerLookup is the gate validators go through to ask "does
 // this candidate_id exist + what's their record?" Implementations
 // can be in-memory (test fixture), DuckDB-backed (production
 // queryd), or HTTP-backed (cross-daemon). FillValidator + EmailValidator
 // take Arc<dyn WorkerLookup> on Rust side; in Go, an interface value.
 type WorkerLookup interface {
 	Find(candidateID string) (*WorkerRecord, bool)
 }
 // elapsed converts a start time into the milliseconds-elapsed
 // shape matched in Report.ElapsedMs (mirrors Rust's
 // .elapsed().as_millis() as u64).
 func elapsed(start time.Time) int64 {
 	return time.Since(start).Milliseconds()
 }