Per architecture_comparison.md universal-win for Go side: ports the Rust crates/validator/src/staffing/ to internal/validator/. Production safety net Go was missing — FillValidator catches phantom worker IDs + status/blacklist/geo/role mismatches; EmailValidator catches SSN-shape PII + salary disclosure + wrong-target name in email/SMS drafts. Files: - types.go: Artifact (FillProposal | EmailDraft), Validator interface, WorkerLookup interface, ValidationError + Finding + Severity - lookup.go: InMemoryWorkerLookup with case-insensitive ID lookup - fill.go: FillValidator — schema → completeness → cross-roster (phantom ID / status / blacklist / geo / role) - email.go: EmailValidator — schema → length → PII (SSN + salary) → worker-name consistency - fill_test.go + email_test.go: 24 tests covering happy path + every error variant + the load-bearing edge cases (phone-pattern not flagged as SSN, flanking-digit guard rejects extended numeric runs) Validator names match Rust (staffing.fill / staffing.email) so cross-runtime audit logs share the same identifier. PII scanners (containsSSNPattern, containsSalaryDisclosure) ported byte-for-byte so a draft flagged by one runtime is flagged by the other. Caveat: the Rust validator crate also has parquet_lookup.rs (loads workers_500k.parquet at startup) and playbook.rs (additional checks). Those weren't ported in this wave — only the two load-bearing validators that were named in the comparison doc. Closes one of the two universal-win items for Go side. The other (materializer port) remains deferred — it's a bigger surface change and depends on transforms.ts source-class adapters. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
275 lines
7.6 KiB
Go
275 lines
7.6 KiB
Go
package validator
|
|
|
|
import (
|
|
"fmt"
|
|
"strings"
|
|
"time"
|
|
)
|
|
|
|
// FillValidator is the Go port of Rust's FillValidator. Per
|
|
// `crates/validator/src/staffing/fill.rs`:
|
|
//
|
|
// - Schema compliance (propose_done shape: {fills: [{candidate_id, name}]})
|
|
// - Completeness (endorsed count == target_count)
|
|
// - Worker existence (every candidate_id present in workers roster)
|
|
// - Status check (worker.status == "active")
|
|
// - Client blacklist (worker NOT in client.blacklisted_clients)
|
|
// - Geo/role match (worker city/state/role matches contract)
|
|
//
|
|
// Contract metadata travels alongside the JSON payload under a
|
|
// `_context` key:
|
|
//
|
|
// {"_context": {"target_count": 2, "city": "Toledo", "state": "OH",
|
|
// "role": "Welder", "client_id": "CLI-00099"}, "fills": [...]}
|
|
//
|
|
// The duplicate-ID guard inside one fill catches the LLM mistake
|
|
// of repeating the same candidate twice to satisfy a higher
|
|
// target_count.
|
|
type FillValidator struct {
|
|
workers WorkerLookup
|
|
}
|
|
|
|
// NewFillValidator constructs a FillValidator with the given lookup.
|
|
// Lookup must be non-nil; pass NewInMemoryWorkerLookup(nil) for
|
|
// tests that don't exercise existence checks.
|
|
func NewFillValidator(workers WorkerLookup) *FillValidator {
|
|
return &FillValidator{workers: workers}
|
|
}
|
|
|
|
// Name satisfies Validator. Stable string used for audit
|
|
// trail / receipts. Matches Rust output "staffing.fill" so
|
|
// cross-runtime audit logs share the same name.
|
|
func (v *FillValidator) Name() string { return "staffing.fill" }
|
|
|
|
// fillContext is the optional contract metadata extracted from
|
|
// _context. Each field is independently nil-able (Rust's Option<T>
|
|
// pattern) — validators only enforce a check when both contract
|
|
// and roster sides have a value.
|
|
type fillContext struct {
|
|
TargetCount *int
|
|
City *string
|
|
State *string
|
|
Role *string
|
|
ClientID *string
|
|
}
|
|
|
|
func extractContext(value map[string]any) fillContext {
|
|
ctx, ok := value["_context"].(map[string]any)
|
|
if !ok {
|
|
return fillContext{}
|
|
}
|
|
out := fillContext{}
|
|
if v, ok := ctx["target_count"]; ok {
|
|
if n, ok := toInt(v); ok {
|
|
out.TargetCount = &n
|
|
}
|
|
}
|
|
if s, ok := ctx["city"].(string); ok {
|
|
out.City = &s
|
|
}
|
|
if s, ok := ctx["state"].(string); ok {
|
|
out.State = &s
|
|
}
|
|
if s, ok := ctx["role"].(string); ok {
|
|
out.Role = &s
|
|
}
|
|
if s, ok := ctx["client_id"].(string); ok {
|
|
out.ClientID = &s
|
|
}
|
|
return out
|
|
}
|
|
|
|
// toInt accepts JSON numbers (float64) and integers, returning
|
|
// the int form when the value is a whole number ≥ 0.
|
|
func toInt(v any) (int, bool) {
|
|
switch n := v.(type) {
|
|
case int:
|
|
return n, true
|
|
case int64:
|
|
return int(n), true
|
|
case float64:
|
|
// JSON unmarshals all numbers as float64; whole-number check
|
|
// is mandatory because target_count=2.5 makes no sense.
|
|
i := int(n)
|
|
if float64(i) == n {
|
|
return i, true
|
|
}
|
|
return 0, false
|
|
}
|
|
return 0, false
|
|
}
|
|
|
|
// eqCI is the case-insensitive equality used everywhere validators
|
|
// compare strings (status, role, city, etc.). Trim+lowercase mirrors
|
|
// Rust's `.trim().eq_ignore_ascii_case(other.trim())`.
|
|
func eqCI(a, b string) bool {
|
|
return strings.EqualFold(strings.TrimSpace(a), strings.TrimSpace(b))
|
|
}
|
|
|
|
// Validate implements the Validator interface. Mirrors the Rust
|
|
// validation order exactly: schema → completeness → cross-roster
|
|
// per-fill checks.
|
|
func (v *FillValidator) Validate(artifact Artifact) (Report, error) {
|
|
started := time.Now()
|
|
value := artifact.FillProposal
|
|
if value == nil {
|
|
return Report{}, &ValidationError{
|
|
Kind: ErrSchema,
|
|
Field: "artifact",
|
|
Reason: fmt.Sprintf("FillValidator expects FillProposal, got %s", artifact.Kind()),
|
|
}
|
|
}
|
|
|
|
// ── Schema check ──
|
|
fillsRaw, ok := value["fills"].([]any)
|
|
if !ok {
|
|
return Report{}, &ValidationError{
|
|
Kind: ErrSchema,
|
|
Field: "fills",
|
|
Reason: "expected top-level `fills` array",
|
|
}
|
|
}
|
|
for i, fillRaw := range fillsRaw {
|
|
fill, ok := fillRaw.(map[string]any)
|
|
if !ok {
|
|
return Report{}, &ValidationError{
|
|
Kind: ErrSchema,
|
|
Field: fmt.Sprintf("fills[%d]", i),
|
|
Reason: "expected object",
|
|
}
|
|
}
|
|
if _, ok := fill["candidate_id"]; !ok {
|
|
return Report{}, &ValidationError{
|
|
Kind: ErrSchema,
|
|
Field: fmt.Sprintf("fills[%d].candidate_id", i),
|
|
Reason: "missing",
|
|
}
|
|
}
|
|
if _, ok := fill["name"]; !ok {
|
|
return Report{}, &ValidationError{
|
|
Kind: ErrSchema,
|
|
Field: fmt.Sprintf("fills[%d].name", i),
|
|
Reason: "missing",
|
|
}
|
|
}
|
|
}
|
|
|
|
ctx := extractContext(value)
|
|
|
|
// ── Completeness ──
|
|
if ctx.TargetCount != nil && len(fillsRaw) != *ctx.TargetCount {
|
|
return Report{}, &ValidationError{
|
|
Kind: ErrCompleteness,
|
|
Reason: fmt.Sprintf("endorsed count %d != target_count %d",
|
|
len(fillsRaw), *ctx.TargetCount),
|
|
}
|
|
}
|
|
|
|
// ── Cross-roster checks ──
|
|
var findings []Finding
|
|
seenIDs := make(map[string]bool, len(fillsRaw))
|
|
for i, fillRaw := range fillsRaw {
|
|
fill := fillRaw.(map[string]any) // already type-checked in schema pass
|
|
candidateID, _ := fill["candidate_id"].(string)
|
|
proposedName, _ := fill["name"].(string)
|
|
|
|
// Duplicate-ID guard inside one fill.
|
|
if seenIDs[candidateID] {
|
|
return Report{}, &ValidationError{
|
|
Kind: ErrConsistency,
|
|
Reason: fmt.Sprintf(
|
|
"duplicate candidate_id %q appears multiple times in fills",
|
|
candidateID,
|
|
),
|
|
}
|
|
}
|
|
seenIDs[candidateID] = true
|
|
|
|
// Worker existence — load-bearing check for the 0→85% pattern.
|
|
worker, ok := v.workers.Find(candidateID)
|
|
if !ok {
|
|
return Report{}, &ValidationError{
|
|
Kind: ErrConsistency,
|
|
Reason: fmt.Sprintf(
|
|
"fills[%d].candidate_id %q does not exist in worker roster",
|
|
i, candidateID,
|
|
),
|
|
}
|
|
}
|
|
|
|
// Status — only "active" workers can be endorsed.
|
|
if !eqCI(worker.Status, "active") {
|
|
return Report{}, &ValidationError{
|
|
Kind: ErrConsistency,
|
|
Reason: fmt.Sprintf(
|
|
"fills[%d] worker %q has status %q, expected \"active\"",
|
|
i, candidateID, worker.Status,
|
|
),
|
|
}
|
|
}
|
|
|
|
// Client blacklist.
|
|
if ctx.ClientID != nil {
|
|
for _, b := range worker.BlacklistedClients {
|
|
if eqCI(b, *ctx.ClientID) {
|
|
return Report{}, &ValidationError{
|
|
Kind: ErrPolicy,
|
|
Reason: fmt.Sprintf(
|
|
"fills[%d] worker %q blacklisted for client %q",
|
|
i, candidateID, *ctx.ClientID,
|
|
),
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// Geo / role match — only when BOTH sides have a value.
|
|
if ctx.City != nil && worker.City != nil && !eqCI(*ctx.City, *worker.City) {
|
|
return Report{}, &ValidationError{
|
|
Kind: ErrConsistency,
|
|
Reason: fmt.Sprintf(
|
|
"fills[%d] worker %q city %q doesn't match contract city %q",
|
|
i, candidateID, *worker.City, *ctx.City,
|
|
),
|
|
}
|
|
}
|
|
if ctx.State != nil && worker.State != nil && !eqCI(*ctx.State, *worker.State) {
|
|
return Report{}, &ValidationError{
|
|
Kind: ErrConsistency,
|
|
Reason: fmt.Sprintf(
|
|
"fills[%d] worker %q state %q doesn't match contract state %q",
|
|
i, candidateID, *worker.State, *ctx.State,
|
|
),
|
|
}
|
|
}
|
|
if ctx.Role != nil && worker.Role != nil && !eqCI(*ctx.Role, *worker.Role) {
|
|
return Report{}, &ValidationError{
|
|
Kind: ErrConsistency,
|
|
Reason: fmt.Sprintf(
|
|
"fills[%d] worker %q role %q doesn't match contract role %q",
|
|
i, candidateID, *worker.Role, *ctx.Role,
|
|
),
|
|
}
|
|
}
|
|
|
|
// Name-mismatch is a warning, not an error — recruiters
|
|
// sometimes send updated names through the proposal layer
|
|
// before the roster catches up.
|
|
if proposedName != "" && !eqCI(proposedName, worker.Name) {
|
|
findings = append(findings, Finding{
|
|
Field: fmt.Sprintf("fills[%d].name", i),
|
|
Severity: SeverityWarning,
|
|
Message: fmt.Sprintf(
|
|
"proposed name %q differs from roster name %q for %q",
|
|
proposedName, worker.Name, candidateID,
|
|
),
|
|
})
|
|
}
|
|
}
|
|
|
|
return Report{
|
|
Findings: findings,
|
|
ElapsedMs: elapsed(started),
|
|
}, nil
|
|
}
|