root b03521a506 validator: port FillValidator + EmailValidator from Rust validator crate
Per architecture_comparison.md universal-win for Go side: ports the
Rust crates/validator/src/staffing/ to internal/validator/. Production
safety net Go was missing — FillValidator catches phantom worker IDs
+ status/blacklist/geo/role mismatches; EmailValidator catches
SSN-shape PII + salary disclosure + wrong-target name in email/SMS
drafts.

Files:
- types.go: Artifact (FillProposal | EmailDraft), Validator interface,
  WorkerLookup interface, ValidationError + Finding + Severity
- lookup.go: InMemoryWorkerLookup with case-insensitive ID lookup
- fill.go: FillValidator — schema → completeness → cross-roster
  (phantom ID / status / blacklist / geo / role)
- email.go: EmailValidator — schema → length → PII (SSN + salary)
  → worker-name consistency
- fill_test.go + email_test.go: 24 tests covering happy path +
  every error variant + the load-bearing edge cases (phone-pattern
  not flagged as SSN, flanking-digit guard rejects extended
  numeric runs)

Validator names match Rust (staffing.fill / staffing.email) so
cross-runtime audit logs share the same identifier. PII scanners
(containsSSNPattern, containsSalaryDisclosure) ported byte-for-byte
so a draft flagged by one runtime is flagged by the other.

Caveat: the Rust validator crate also has parquet_lookup.rs (loads
workers_500k.parquet at startup) and playbook.rs (additional
checks). Those weren't ported in this wave — only the two
load-bearing validators that were named in the comparison doc.

Closes one of the two universal-win items for Go side. The other
(materializer port) remains deferred — it's a bigger surface change
and depends on transforms.ts source-class adapters.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-01 04:49:55 -05:00

275 lines
7.6 KiB
Go

package validator
import (
"fmt"
"strings"
"time"
)
// FillValidator is the Go port of Rust's FillValidator. Per
// `crates/validator/src/staffing/fill.rs`:
//
// - Schema compliance (propose_done shape: {fills: [{candidate_id, name}]})
// - Completeness (endorsed count == target_count)
// - Worker existence (every candidate_id present in workers roster)
// - Status check (worker.status == "active")
// - Client blacklist (worker NOT in client.blacklisted_clients)
// - Geo/role match (worker city/state/role matches contract)
//
// Contract metadata travels alongside the JSON payload under a
// `_context` key:
//
// {"_context": {"target_count": 2, "city": "Toledo", "state": "OH",
// "role": "Welder", "client_id": "CLI-00099"}, "fills": [...]}
//
// The duplicate-ID guard inside one fill catches the LLM mistake
// of repeating the same candidate twice to satisfy a higher
// target_count.
type FillValidator struct {
workers WorkerLookup
}
// NewFillValidator constructs a FillValidator with the given lookup.
// Lookup must be non-nil; pass NewInMemoryWorkerLookup(nil) for
// tests that don't exercise existence checks.
func NewFillValidator(workers WorkerLookup) *FillValidator {
return &FillValidator{workers: workers}
}
// Name satisfies Validator. Stable string used for audit
// trail / receipts. Matches Rust output "staffing.fill" so
// cross-runtime audit logs share the same name.
func (v *FillValidator) Name() string { return "staffing.fill" }
// fillContext is the optional contract metadata extracted from
// _context. Each field is independently nil-able (Rust's Option<T>
// pattern) — validators only enforce a check when both contract
// and roster sides have a value.
type fillContext struct {
TargetCount *int
City *string
State *string
Role *string
ClientID *string
}
func extractContext(value map[string]any) fillContext {
ctx, ok := value["_context"].(map[string]any)
if !ok {
return fillContext{}
}
out := fillContext{}
if v, ok := ctx["target_count"]; ok {
if n, ok := toInt(v); ok {
out.TargetCount = &n
}
}
if s, ok := ctx["city"].(string); ok {
out.City = &s
}
if s, ok := ctx["state"].(string); ok {
out.State = &s
}
if s, ok := ctx["role"].(string); ok {
out.Role = &s
}
if s, ok := ctx["client_id"].(string); ok {
out.ClientID = &s
}
return out
}
// toInt accepts JSON numbers (float64) and integers, returning
// the int form when the value is a whole number ≥ 0.
func toInt(v any) (int, bool) {
switch n := v.(type) {
case int:
return n, true
case int64:
return int(n), true
case float64:
// JSON unmarshals all numbers as float64; whole-number check
// is mandatory because target_count=2.5 makes no sense.
i := int(n)
if float64(i) == n {
return i, true
}
return 0, false
}
return 0, false
}
// eqCI is the case-insensitive equality used everywhere validators
// compare strings (status, role, city, etc.). Trim+lowercase mirrors
// Rust's `.trim().eq_ignore_ascii_case(other.trim())`.
func eqCI(a, b string) bool {
return strings.EqualFold(strings.TrimSpace(a), strings.TrimSpace(b))
}
// Validate implements the Validator interface. Mirrors the Rust
// validation order exactly: schema → completeness → cross-roster
// per-fill checks.
func (v *FillValidator) Validate(artifact Artifact) (Report, error) {
started := time.Now()
value := artifact.FillProposal
if value == nil {
return Report{}, &ValidationError{
Kind: ErrSchema,
Field: "artifact",
Reason: fmt.Sprintf("FillValidator expects FillProposal, got %s", artifact.Kind()),
}
}
// ── Schema check ──
fillsRaw, ok := value["fills"].([]any)
if !ok {
return Report{}, &ValidationError{
Kind: ErrSchema,
Field: "fills",
Reason: "expected top-level `fills` array",
}
}
for i, fillRaw := range fillsRaw {
fill, ok := fillRaw.(map[string]any)
if !ok {
return Report{}, &ValidationError{
Kind: ErrSchema,
Field: fmt.Sprintf("fills[%d]", i),
Reason: "expected object",
}
}
if _, ok := fill["candidate_id"]; !ok {
return Report{}, &ValidationError{
Kind: ErrSchema,
Field: fmt.Sprintf("fills[%d].candidate_id", i),
Reason: "missing",
}
}
if _, ok := fill["name"]; !ok {
return Report{}, &ValidationError{
Kind: ErrSchema,
Field: fmt.Sprintf("fills[%d].name", i),
Reason: "missing",
}
}
}
ctx := extractContext(value)
// ── Completeness ──
if ctx.TargetCount != nil && len(fillsRaw) != *ctx.TargetCount {
return Report{}, &ValidationError{
Kind: ErrCompleteness,
Reason: fmt.Sprintf("endorsed count %d != target_count %d",
len(fillsRaw), *ctx.TargetCount),
}
}
// ── Cross-roster checks ──
var findings []Finding
seenIDs := make(map[string]bool, len(fillsRaw))
for i, fillRaw := range fillsRaw {
fill := fillRaw.(map[string]any) // already type-checked in schema pass
candidateID, _ := fill["candidate_id"].(string)
proposedName, _ := fill["name"].(string)
// Duplicate-ID guard inside one fill.
if seenIDs[candidateID] {
return Report{}, &ValidationError{
Kind: ErrConsistency,
Reason: fmt.Sprintf(
"duplicate candidate_id %q appears multiple times in fills",
candidateID,
),
}
}
seenIDs[candidateID] = true
// Worker existence — load-bearing check for the 0→85% pattern.
worker, ok := v.workers.Find(candidateID)
if !ok {
return Report{}, &ValidationError{
Kind: ErrConsistency,
Reason: fmt.Sprintf(
"fills[%d].candidate_id %q does not exist in worker roster",
i, candidateID,
),
}
}
// Status — only "active" workers can be endorsed.
if !eqCI(worker.Status, "active") {
return Report{}, &ValidationError{
Kind: ErrConsistency,
Reason: fmt.Sprintf(
"fills[%d] worker %q has status %q, expected \"active\"",
i, candidateID, worker.Status,
),
}
}
// Client blacklist.
if ctx.ClientID != nil {
for _, b := range worker.BlacklistedClients {
if eqCI(b, *ctx.ClientID) {
return Report{}, &ValidationError{
Kind: ErrPolicy,
Reason: fmt.Sprintf(
"fills[%d] worker %q blacklisted for client %q",
i, candidateID, *ctx.ClientID,
),
}
}
}
}
// Geo / role match — only when BOTH sides have a value.
if ctx.City != nil && worker.City != nil && !eqCI(*ctx.City, *worker.City) {
return Report{}, &ValidationError{
Kind: ErrConsistency,
Reason: fmt.Sprintf(
"fills[%d] worker %q city %q doesn't match contract city %q",
i, candidateID, *worker.City, *ctx.City,
),
}
}
if ctx.State != nil && worker.State != nil && !eqCI(*ctx.State, *worker.State) {
return Report{}, &ValidationError{
Kind: ErrConsistency,
Reason: fmt.Sprintf(
"fills[%d] worker %q state %q doesn't match contract state %q",
i, candidateID, *worker.State, *ctx.State,
),
}
}
if ctx.Role != nil && worker.Role != nil && !eqCI(*ctx.Role, *worker.Role) {
return Report{}, &ValidationError{
Kind: ErrConsistency,
Reason: fmt.Sprintf(
"fills[%d] worker %q role %q doesn't match contract role %q",
i, candidateID, *worker.Role, *ctx.Role,
),
}
}
// Name-mismatch is a warning, not an error — recruiters
// sometimes send updated names through the proposal layer
// before the roster catches up.
if proposedName != "" && !eqCI(proposedName, worker.Name) {
findings = append(findings, Finding{
Field: fmt.Sprintf("fills[%d].name", i),
Severity: SeverityWarning,
Message: fmt.Sprintf(
"proposed name %q differs from roster name %q for %q",
proposedName, worker.Name, candidateID,
),
})
}
}
return Report{
Findings: findings,
ElapsedMs: elapsed(started),
}, nil
}