validator: port FillValidator + EmailValidator from Rust validator crate
Per architecture_comparison.md universal-win for Go side: ports the Rust crates/validator/src/staffing/ to internal/validator/. Production safety net Go was missing — FillValidator catches phantom worker IDs + status/blacklist/geo/role mismatches; EmailValidator catches SSN-shape PII + salary disclosure + wrong-target name in email/SMS drafts. Files: - types.go: Artifact (FillProposal | EmailDraft), Validator interface, WorkerLookup interface, ValidationError + Finding + Severity - lookup.go: InMemoryWorkerLookup with case-insensitive ID lookup - fill.go: FillValidator — schema → completeness → cross-roster (phantom ID / status / blacklist / geo / role) - email.go: EmailValidator — schema → length → PII (SSN + salary) → worker-name consistency - fill_test.go + email_test.go: 24 tests covering happy path + every error variant + the load-bearing edge cases (phone-pattern not flagged as SSN, flanking-digit guard rejects extended numeric runs) Validator names match Rust (staffing.fill / staffing.email) so cross-runtime audit logs share the same identifier. PII scanners (containsSSNPattern, containsSalaryDisclosure) ported byte-for-byte so a draft flagged by one runtime is flagged by the other. Caveat: the Rust validator crate also has parquet_lookup.rs (loads workers_500k.parquet at startup) and playbook.rs (additional checks). Those weren't ported in this wave — only the two load-bearing validators that were named in the comparison doc. Closes one of the two universal-win items for Go side. The other (materializer port) remains deferred — it's a bigger surface change and depends on transforms.ts source-class adapters. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
b3ad14832d
commit
b03521a506
270
internal/validator/email.go
Normal file
270
internal/validator/email.go
Normal file
@ -0,0 +1,270 @@
|
|||||||
|
package validator
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
"strings"
|
||||||
|
"time"
|
||||||
|
)
|
||||||
|
|
||||||
|
// EmailValidator is the Go port of Rust's EmailValidator. Per
|
||||||
|
// `crates/validator/src/staffing/email.rs`:
|
||||||
|
//
|
||||||
|
// - Schema (TO/BODY fields present)
|
||||||
|
// - Length (SMS ≤ 160 chars; email subject ≤ 78 chars)
|
||||||
|
// - PII absence (no SSN-shape / salary leakage)
|
||||||
|
// - Worker-name consistency (body mentions worker first name)
|
||||||
|
//
|
||||||
|
// PII detection is std-only — no regex dependency. Two scanners:
|
||||||
|
// - SSN-shape: NNN-NN-NNNN with run-of-digits guards (so phone
|
||||||
|
// numbers like NNN-NNN-NNNN don't false-positive).
|
||||||
|
// - Salary disclosure: keywords near a `$amount` substring.
|
||||||
|
//
|
||||||
|
// Both mirror Rust byte-for-byte so cross-runtime audit logs
|
||||||
|
// agree on which messages get flagged.
|
||||||
|
type EmailValidator struct {
|
||||||
|
workers WorkerLookup
|
||||||
|
}
|
||||||
|
|
||||||
|
// NewEmailValidator constructs an EmailValidator with the given
|
||||||
|
// lookup. If you don't need the name-consistency check (e.g.
|
||||||
|
// generic broadcast templates), pass NewInMemoryWorkerLookup(nil)
|
||||||
|
// — the validator skips the worker check when _context.candidate_id
|
||||||
|
// is absent.
|
||||||
|
func NewEmailValidator(workers WorkerLookup) *EmailValidator {
|
||||||
|
return &EmailValidator{workers: workers}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Name satisfies Validator. Stable string used for audit trail /
|
||||||
|
// receipts. Matches Rust output "staffing.email".
|
||||||
|
func (v *EmailValidator) Name() string { return "staffing.email" }
|
||||||
|
|
||||||
|
// Channel-shape limits — match Rust exactly.
|
||||||
|
const (
|
||||||
|
smsMaxChars = 160
|
||||||
|
emailSubjectMaxChars = 78
|
||||||
|
)
|
||||||
|
|
||||||
|
// Validate implements Validator. Order: schema → length →
|
||||||
|
// PII → worker-name consistency.
|
||||||
|
func (v *EmailValidator) Validate(artifact Artifact) (Report, error) {
|
||||||
|
started := time.Now()
|
||||||
|
value := artifact.EmailDraft
|
||||||
|
if value == nil {
|
||||||
|
return Report{}, &ValidationError{
|
||||||
|
Kind: ErrSchema,
|
||||||
|
Field: "artifact",
|
||||||
|
Reason: fmt.Sprintf("EmailValidator expects EmailDraft, got %s", artifact.Kind()),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// ── Schema (`to` + `body` required) ──
|
||||||
|
if _, ok := value["to"].(string); !ok {
|
||||||
|
return Report{}, &ValidationError{
|
||||||
|
Kind: ErrSchema,
|
||||||
|
Field: "to",
|
||||||
|
Reason: "missing or not a string",
|
||||||
|
}
|
||||||
|
}
|
||||||
|
body, ok := value["body"].(string)
|
||||||
|
if !ok {
|
||||||
|
return Report{}, &ValidationError{
|
||||||
|
Kind: ErrSchema,
|
||||||
|
Field: "body",
|
||||||
|
Reason: "missing or not a string",
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// ── Length checks ──
|
||||||
|
isSMS := false
|
||||||
|
if k, ok := value["kind"].(string); ok && k == "sms" {
|
||||||
|
isSMS = true
|
||||||
|
}
|
||||||
|
if isSMS && len(body) > smsMaxChars {
|
||||||
|
return Report{}, &ValidationError{
|
||||||
|
Kind: ErrCompleteness,
|
||||||
|
Reason: fmt.Sprintf("SMS body is %d chars, max %d",
|
||||||
|
len(body), smsMaxChars),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if subject, ok := value["subject"].(string); ok && len(subject) > emailSubjectMaxChars {
|
||||||
|
return Report{}, &ValidationError{
|
||||||
|
Kind: ErrCompleteness,
|
||||||
|
Reason: fmt.Sprintf("email subject is %d chars, max %d",
|
||||||
|
len(subject), emailSubjectMaxChars),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// ── PII scan over subject + body combined ──
|
||||||
|
var subjectStr string
|
||||||
|
if s, ok := value["subject"].(string); ok {
|
||||||
|
subjectStr = s
|
||||||
|
}
|
||||||
|
scanned := subjectStr + " " + body
|
||||||
|
if containsSSNPattern(scanned) {
|
||||||
|
return Report{}, &ValidationError{
|
||||||
|
Kind: ErrPolicy,
|
||||||
|
Reason: "body contains an SSN-shaped sequence (NNN-NN-NNNN); strip before send",
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if containsSalaryDisclosure(scanned) {
|
||||||
|
return Report{}, &ValidationError{
|
||||||
|
Kind: ErrPolicy,
|
||||||
|
Reason: "body discloses salary/compensation amount; staffing PII rule says strip before send",
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// ── Worker-name consistency ──
|
||||||
|
var findings []Finding
|
||||||
|
if ctx, ok := value["_context"].(map[string]any); ok {
|
||||||
|
if cid, ok := ctx["candidate_id"].(string); ok && cid != "" {
|
||||||
|
worker, found := v.workers.Find(cid)
|
||||||
|
if !found {
|
||||||
|
return Report{}, &ValidationError{
|
||||||
|
Kind: ErrConsistency,
|
||||||
|
Reason: fmt.Sprintf("_context.candidate_id %q not found in worker roster", cid),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// Body should mention the worker's name (or at least
|
||||||
|
// their first name) — drafts that address a different
|
||||||
|
// person than the contracted worker are a recurring
|
||||||
|
// LLM mistake.
|
||||||
|
first := strings.Fields(worker.Name)
|
||||||
|
firstLower := ""
|
||||||
|
if len(first) > 0 {
|
||||||
|
firstLower = strings.ToLower(first[0])
|
||||||
|
}
|
||||||
|
bodyLower := strings.ToLower(body)
|
||||||
|
if firstLower != "" && !strings.Contains(bodyLower, firstLower) {
|
||||||
|
findings = append(findings, Finding{
|
||||||
|
Field: "body",
|
||||||
|
Severity: SeverityWarning,
|
||||||
|
Message: fmt.Sprintf(
|
||||||
|
"body doesn't mention worker first name %q (candidate_id %q)",
|
||||||
|
first[0], cid,
|
||||||
|
),
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return Report{
|
||||||
|
Findings: findings,
|
||||||
|
ElapsedMs: elapsed(started),
|
||||||
|
}, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// ── PII scanners — std-only, mirror Rust byte-for-byte ──────────
|
||||||
|
|
||||||
|
// containsSSNPattern detects an SSN-shaped sequence: 3 digits, dash,
|
||||||
|
// 2 digits, dash, 4 digits. Runs-of-digits guards: rejects sequences
|
||||||
|
// flanked by digit/dash (so phone-area-code-like NNN-NNN-NNNN isn't
|
||||||
|
// flagged). Tight false-positive surface: specifically the
|
||||||
|
// NNN-NN-NNNN shape used by U.S. SSNs.
|
||||||
|
//
|
||||||
|
// Critical: this fires on PII in real-world drafts. Don't relax the
|
||||||
|
// flanking guards without a regression test that exercises both
|
||||||
|
// cases (an actual SSN should fire, a phone-NNN-NNN-NNNN should not).
|
||||||
|
func containsSSNPattern(s string) bool {
|
||||||
|
bytes := []byte(s)
|
||||||
|
if len(bytes) < 11 {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
for i := 0; i+11 <= len(bytes); i++ {
|
||||||
|
win := bytes[i : i+11]
|
||||||
|
shape := true
|
||||||
|
for j := 0; j < 11; j++ {
|
||||||
|
switch j {
|
||||||
|
case 0, 1, 2, 4, 5, 7, 8, 9, 10:
|
||||||
|
if !isAsciiDigit(win[j]) {
|
||||||
|
shape = false
|
||||||
|
}
|
||||||
|
case 3, 6:
|
||||||
|
if win[j] != '-' {
|
||||||
|
shape = false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if !shape {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if !shape {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
// Reject if previous byte is digit or dash — we're
|
||||||
|
// inside a longer numeric run, probably not an SSN.
|
||||||
|
if i > 0 {
|
||||||
|
prev := bytes[i-1]
|
||||||
|
if isAsciiDigit(prev) || prev == '-' {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// Reject if next byte is digit or dash — same reason.
|
||||||
|
if i+11 < len(bytes) {
|
||||||
|
next := bytes[i+11]
|
||||||
|
if isAsciiDigit(next) || next == '-' {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
func isAsciiDigit(b byte) bool { return b >= '0' && b <= '9' }
|
||||||
|
|
||||||
|
// containsSalaryDisclosure detects salary/compensation disclosure:
|
||||||
|
// the keywords "salary", "compensation", "pay rate", "bill rate",
|
||||||
|
// "hourly rate" appearing within ~40 chars of a `$NNN+` substring.
|
||||||
|
//
|
||||||
|
// Coarse on purpose — better to false-positive on a legit phrase
|
||||||
|
// like "discuss your hourly rate of $30/hr" than to miss a real
|
||||||
|
// disclosure. Operators tuning this should add tests, not loosen
|
||||||
|
// the check.
|
||||||
|
func containsSalaryDisclosure(s string) bool {
|
||||||
|
lower := strings.ToLower(s)
|
||||||
|
keywords := []string{"salary", "compensation", "pay rate", "bill rate", "hourly rate"}
|
||||||
|
|
||||||
|
var keywordPositions []int
|
||||||
|
for _, kw := range keywords {
|
||||||
|
start := 0
|
||||||
|
for {
|
||||||
|
idx := strings.Index(lower[start:], kw)
|
||||||
|
if idx < 0 {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
abs := start + idx
|
||||||
|
keywordPositions = append(keywordPositions, abs)
|
||||||
|
start = abs + len(kw)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if len(keywordPositions) == 0 {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
var dollarPositions []int
|
||||||
|
bytes := []byte(lower)
|
||||||
|
for i := 0; i+1 < len(bytes); i++ {
|
||||||
|
if bytes[i] == '$' && isAsciiDigit(bytes[i+1]) {
|
||||||
|
dollarPositions = append(dollarPositions, i)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if len(dollarPositions) == 0 {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, kp := range keywordPositions {
|
||||||
|
for _, dp := range dollarPositions {
|
||||||
|
if absDiff(kp, dp) <= 40 {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
func absDiff(a, b int) int {
|
||||||
|
if a > b {
|
||||||
|
return a - b
|
||||||
|
}
|
||||||
|
return b - a
|
||||||
|
}
|
||||||
220
internal/validator/email_test.go
Normal file
220
internal/validator/email_test.go
Normal file
@ -0,0 +1,220 @@
|
|||||||
|
package validator
|
||||||
|
|
||||||
|
import "testing"
|
||||||
|
|
||||||
|
// ── Schema ──
|
||||||
|
|
||||||
|
func TestEmail_WrongArtifactType_FailsSchema(t *testing.T) {
|
||||||
|
v := NewEmailValidator(mkLookup())
|
||||||
|
_, err := v.Validate(Artifact{FillProposal: map[string]any{}})
|
||||||
|
ve, _ := asValidationError(err)
|
||||||
|
if ve == nil || ve.Kind != ErrSchema {
|
||||||
|
t.Errorf("expected schema error on wrong artifact, got %+v", ve)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestEmail_MissingTo_FailsSchema(t *testing.T) {
|
||||||
|
v := NewEmailValidator(mkLookup())
|
||||||
|
_, err := v.Validate(Artifact{EmailDraft: map[string]any{"body": "hi"}})
|
||||||
|
ve, _ := asValidationError(err)
|
||||||
|
if ve == nil || ve.Kind != ErrSchema || ve.Field != "to" {
|
||||||
|
t.Errorf("expected schema/to error, got %+v", ve)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestEmail_MissingBody_FailsSchema(t *testing.T) {
|
||||||
|
v := NewEmailValidator(mkLookup())
|
||||||
|
_, err := v.Validate(Artifact{EmailDraft: map[string]any{"to": "a@b.com"}})
|
||||||
|
ve, _ := asValidationError(err)
|
||||||
|
if ve == nil || ve.Kind != ErrSchema || ve.Field != "body" {
|
||||||
|
t.Errorf("expected schema/body error, got %+v", ve)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// ── Length limits ──
|
||||||
|
|
||||||
|
func TestEmail_LongSMS_FailsCompleteness(t *testing.T) {
|
||||||
|
v := NewEmailValidator(mkLookup())
|
||||||
|
body := make([]byte, 200)
|
||||||
|
for i := range body {
|
||||||
|
body[i] = 'x'
|
||||||
|
}
|
||||||
|
_, err := v.Validate(Artifact{EmailDraft: map[string]any{
|
||||||
|
"to": "+15555550123",
|
||||||
|
"body": string(body),
|
||||||
|
"kind": "sms",
|
||||||
|
}})
|
||||||
|
ve, _ := asValidationError(err)
|
||||||
|
if ve == nil || ve.Kind != ErrCompleteness {
|
||||||
|
t.Errorf("expected completeness error on long SMS, got %+v", ve)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestEmail_LongSubject_FailsCompleteness(t *testing.T) {
|
||||||
|
v := NewEmailValidator(mkLookup())
|
||||||
|
subject := make([]byte, 100)
|
||||||
|
for i := range subject {
|
||||||
|
subject[i] = 'x'
|
||||||
|
}
|
||||||
|
_, err := v.Validate(Artifact{EmailDraft: map[string]any{
|
||||||
|
"to": "a@b.com",
|
||||||
|
"body": "hi",
|
||||||
|
"subject": string(subject),
|
||||||
|
}})
|
||||||
|
ve, _ := asValidationError(err)
|
||||||
|
if ve == nil || ve.Kind != ErrCompleteness {
|
||||||
|
t.Errorf("expected completeness error on long subject, got %+v", ve)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// ── PII: SSN ──
|
||||||
|
|
||||||
|
func TestEmail_SSNInBody_FailsPolicy(t *testing.T) {
|
||||||
|
v := NewEmailValidator(mkLookup())
|
||||||
|
_, err := v.Validate(Artifact{EmailDraft: map[string]any{
|
||||||
|
"to": "a@b.com",
|
||||||
|
"body": "Their SSN is 123-45-6789, please file accordingly.",
|
||||||
|
}})
|
||||||
|
ve, _ := asValidationError(err)
|
||||||
|
if ve == nil || ve.Kind != ErrPolicy {
|
||||||
|
t.Errorf("expected policy error on SSN, got %+v", ve)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestEmail_PhonePatternNotFlaggedAsSSN(t *testing.T) {
|
||||||
|
// NNN-NNN-NNNN (phone) must NOT trigger the NNN-NN-NNNN check.
|
||||||
|
// Critical false-positive case from Rust phone-pattern test.
|
||||||
|
v := NewEmailValidator(mkLookup())
|
||||||
|
_, err := v.Validate(Artifact{EmailDraft: map[string]any{
|
||||||
|
"to": "a@b.com",
|
||||||
|
"body": "Call me at 555-123-4567 to confirm.",
|
||||||
|
}})
|
||||||
|
if err != nil {
|
||||||
|
t.Errorf("phone pattern should NOT trigger SSN policy, got %v", err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestEmail_SSNInsideLongerNumericRun_NotFlagged(t *testing.T) {
|
||||||
|
// 1234-56-78901 has the right shape pattern at offset 0 but
|
||||||
|
// flanking digits → not an SSN. Mirrors Rust's flanking-digit
|
||||||
|
// guard test.
|
||||||
|
v := NewEmailValidator(mkLookup())
|
||||||
|
_, err := v.Validate(Artifact{EmailDraft: map[string]any{
|
||||||
|
"to": "a@b.com",
|
||||||
|
"body": "ID 1234-56-78901 is the new format.",
|
||||||
|
}})
|
||||||
|
if err != nil {
|
||||||
|
t.Errorf("flanking-digit guard should reject this, got %v", err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// ── PII: salary ──
|
||||||
|
|
||||||
|
func TestEmail_SalaryDisclosure_FailsPolicy(t *testing.T) {
|
||||||
|
v := NewEmailValidator(mkLookup())
|
||||||
|
_, err := v.Validate(Artifact{EmailDraft: map[string]any{
|
||||||
|
"to": "a@b.com",
|
||||||
|
"body": "Their salary is $45000 — please confirm before sending offer.",
|
||||||
|
}})
|
||||||
|
ve, _ := asValidationError(err)
|
||||||
|
if ve == nil || ve.Kind != ErrPolicy {
|
||||||
|
t.Errorf("expected policy error on salary disclosure, got %+v", ve)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestEmail_HourlyRateDisclosure_FailsPolicy(t *testing.T) {
|
||||||
|
v := NewEmailValidator(mkLookup())
|
||||||
|
_, err := v.Validate(Artifact{EmailDraft: map[string]any{
|
||||||
|
"to": "a@b.com",
|
||||||
|
"body": "Discuss your hourly rate of $30 with the client when you arrive.",
|
||||||
|
}})
|
||||||
|
ve, _ := asValidationError(err)
|
||||||
|
if ve == nil || ve.Kind != ErrPolicy {
|
||||||
|
t.Errorf("expected policy error on hourly rate, got %+v", ve)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestEmail_DollarFar_NotFlagged(t *testing.T) {
|
||||||
|
// $ amount > 40 chars from the keyword → not flagged.
|
||||||
|
v := NewEmailValidator(mkLookup())
|
||||||
|
body := "We're paid by salary, but the parking validation costs " +
|
||||||
|
"about three more sentences worth of text appearing in between, " +
|
||||||
|
"and then much later at $50 the trip is too expensive."
|
||||||
|
_, err := v.Validate(Artifact{EmailDraft: map[string]any{
|
||||||
|
"to": "a@b.com", "body": body,
|
||||||
|
}})
|
||||||
|
if err != nil {
|
||||||
|
t.Errorf("salary keyword far from $ amount should not flag, got %v", err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// ── Worker-name consistency ──
|
||||||
|
|
||||||
|
func TestEmail_NameMissingFromBody_EmitsWarning(t *testing.T) {
|
||||||
|
v := NewEmailValidator(mkLookup(mkWorker("w1", "Alice Smith", "active", "Toledo", "OH", "Welder")))
|
||||||
|
report, err := v.Validate(Artifact{EmailDraft: map[string]any{
|
||||||
|
"to": "a@b.com",
|
||||||
|
"body": "Hello, please confirm your shift tomorrow.",
|
||||||
|
"_context": map[string]any{"candidate_id": "w1"},
|
||||||
|
}})
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("name mismatch should NOT error (warning only), got %v", err)
|
||||||
|
}
|
||||||
|
if len(report.Findings) != 1 || report.Findings[0].Severity != SeverityWarning {
|
||||||
|
t.Errorf("expected 1 warning finding, got %v", report.Findings)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestEmail_NameInBody_NoFinding(t *testing.T) {
|
||||||
|
v := NewEmailValidator(mkLookup(mkWorker("w1", "Alice Smith", "active", "Toledo", "OH", "Welder")))
|
||||||
|
report, err := v.Validate(Artifact{EmailDraft: map[string]any{
|
||||||
|
"to": "a@b.com",
|
||||||
|
"body": "Hi Alice, please confirm tomorrow.",
|
||||||
|
"_context": map[string]any{"candidate_id": "w1"},
|
||||||
|
}})
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("expected pass, got %v", err)
|
||||||
|
}
|
||||||
|
if len(report.Findings) != 0 {
|
||||||
|
t.Errorf("expected zero findings, got %v", report.Findings)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestEmail_PhantomCandidateID_FailsConsistency(t *testing.T) {
|
||||||
|
v := NewEmailValidator(mkLookup())
|
||||||
|
_, err := v.Validate(Artifact{EmailDraft: map[string]any{
|
||||||
|
"to": "a@b.com",
|
||||||
|
"body": "Hi Alice",
|
||||||
|
"_context": map[string]any{"candidate_id": "phantom"},
|
||||||
|
}})
|
||||||
|
ve, _ := asValidationError(err)
|
||||||
|
if ve == nil || ve.Kind != ErrConsistency {
|
||||||
|
t.Errorf("expected consistency error on phantom ID, got %+v", ve)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// ── Happy path ──
|
||||||
|
|
||||||
|
func TestEmail_WellFormed_Passes(t *testing.T) {
|
||||||
|
v := NewEmailValidator(mkLookup())
|
||||||
|
report, err := v.Validate(Artifact{EmailDraft: map[string]any{
|
||||||
|
"to": "alice@example.com",
|
||||||
|
"subject": "Shift confirmation",
|
||||||
|
"body": "Please confirm your shift starts at 9am tomorrow.",
|
||||||
|
}})
|
||||||
|
if err != nil {
|
||||||
|
t.Errorf("well-formed email should pass, got %v", err)
|
||||||
|
}
|
||||||
|
if len(report.Findings) != 0 {
|
||||||
|
t.Errorf("expected zero findings, got %v", report.Findings)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// ── Validator name is stable ──
|
||||||
|
|
||||||
|
func TestEmail_NameMatchesRust(t *testing.T) {
|
||||||
|
v := NewEmailValidator(mkLookup())
|
||||||
|
if v.Name() != "staffing.email" {
|
||||||
|
t.Errorf("name should match Rust 'staffing.email', got %q", v.Name())
|
||||||
|
}
|
||||||
|
}
|
||||||
274
internal/validator/fill.go
Normal file
274
internal/validator/fill.go
Normal file
@ -0,0 +1,274 @@
|
|||||||
|
package validator
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
"strings"
|
||||||
|
"time"
|
||||||
|
)
|
||||||
|
|
||||||
|
// FillValidator is the Go port of Rust's FillValidator. Per
|
||||||
|
// `crates/validator/src/staffing/fill.rs`:
|
||||||
|
//
|
||||||
|
// - Schema compliance (propose_done shape: {fills: [{candidate_id, name}]})
|
||||||
|
// - Completeness (endorsed count == target_count)
|
||||||
|
// - Worker existence (every candidate_id present in workers roster)
|
||||||
|
// - Status check (worker.status == "active")
|
||||||
|
// - Client blacklist (worker NOT in client.blacklisted_clients)
|
||||||
|
// - Geo/role match (worker city/state/role matches contract)
|
||||||
|
//
|
||||||
|
// Contract metadata travels alongside the JSON payload under a
|
||||||
|
// `_context` key:
|
||||||
|
//
|
||||||
|
// {"_context": {"target_count": 2, "city": "Toledo", "state": "OH",
|
||||||
|
// "role": "Welder", "client_id": "CLI-00099"}, "fills": [...]}
|
||||||
|
//
|
||||||
|
// The duplicate-ID guard inside one fill catches the LLM mistake
|
||||||
|
// of repeating the same candidate twice to satisfy a higher
|
||||||
|
// target_count.
|
||||||
|
type FillValidator struct {
|
||||||
|
workers WorkerLookup
|
||||||
|
}
|
||||||
|
|
||||||
|
// NewFillValidator constructs a FillValidator with the given lookup.
|
||||||
|
// Lookup must be non-nil; pass NewInMemoryWorkerLookup(nil) for
|
||||||
|
// tests that don't exercise existence checks.
|
||||||
|
func NewFillValidator(workers WorkerLookup) *FillValidator {
|
||||||
|
return &FillValidator{workers: workers}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Name satisfies Validator. Stable string used for audit
|
||||||
|
// trail / receipts. Matches Rust output "staffing.fill" so
|
||||||
|
// cross-runtime audit logs share the same name.
|
||||||
|
func (v *FillValidator) Name() string { return "staffing.fill" }
|
||||||
|
|
||||||
|
// fillContext is the optional contract metadata extracted from
|
||||||
|
// _context. Each field is independently nil-able (Rust's Option<T>
|
||||||
|
// pattern) — validators only enforce a check when both contract
|
||||||
|
// and roster sides have a value.
|
||||||
|
type fillContext struct {
|
||||||
|
TargetCount *int
|
||||||
|
City *string
|
||||||
|
State *string
|
||||||
|
Role *string
|
||||||
|
ClientID *string
|
||||||
|
}
|
||||||
|
|
||||||
|
func extractContext(value map[string]any) fillContext {
|
||||||
|
ctx, ok := value["_context"].(map[string]any)
|
||||||
|
if !ok {
|
||||||
|
return fillContext{}
|
||||||
|
}
|
||||||
|
out := fillContext{}
|
||||||
|
if v, ok := ctx["target_count"]; ok {
|
||||||
|
if n, ok := toInt(v); ok {
|
||||||
|
out.TargetCount = &n
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if s, ok := ctx["city"].(string); ok {
|
||||||
|
out.City = &s
|
||||||
|
}
|
||||||
|
if s, ok := ctx["state"].(string); ok {
|
||||||
|
out.State = &s
|
||||||
|
}
|
||||||
|
if s, ok := ctx["role"].(string); ok {
|
||||||
|
out.Role = &s
|
||||||
|
}
|
||||||
|
if s, ok := ctx["client_id"].(string); ok {
|
||||||
|
out.ClientID = &s
|
||||||
|
}
|
||||||
|
return out
|
||||||
|
}
|
||||||
|
|
||||||
|
// toInt accepts JSON numbers (float64) and integers, returning
|
||||||
|
// the int form when the value is a whole number ≥ 0.
|
||||||
|
func toInt(v any) (int, bool) {
|
||||||
|
switch n := v.(type) {
|
||||||
|
case int:
|
||||||
|
return n, true
|
||||||
|
case int64:
|
||||||
|
return int(n), true
|
||||||
|
case float64:
|
||||||
|
// JSON unmarshals all numbers as float64; whole-number check
|
||||||
|
// is mandatory because target_count=2.5 makes no sense.
|
||||||
|
i := int(n)
|
||||||
|
if float64(i) == n {
|
||||||
|
return i, true
|
||||||
|
}
|
||||||
|
return 0, false
|
||||||
|
}
|
||||||
|
return 0, false
|
||||||
|
}
|
||||||
|
|
||||||
|
// eqCI is the case-insensitive equality used everywhere validators
|
||||||
|
// compare strings (status, role, city, etc.). Trim+lowercase mirrors
|
||||||
|
// Rust's `.trim().eq_ignore_ascii_case(other.trim())`.
|
||||||
|
func eqCI(a, b string) bool {
|
||||||
|
return strings.EqualFold(strings.TrimSpace(a), strings.TrimSpace(b))
|
||||||
|
}
|
||||||
|
|
||||||
|
// Validate implements the Validator interface. Mirrors the Rust
|
||||||
|
// validation order exactly: schema → completeness → cross-roster
|
||||||
|
// per-fill checks.
|
||||||
|
func (v *FillValidator) Validate(artifact Artifact) (Report, error) {
|
||||||
|
started := time.Now()
|
||||||
|
value := artifact.FillProposal
|
||||||
|
if value == nil {
|
||||||
|
return Report{}, &ValidationError{
|
||||||
|
Kind: ErrSchema,
|
||||||
|
Field: "artifact",
|
||||||
|
Reason: fmt.Sprintf("FillValidator expects FillProposal, got %s", artifact.Kind()),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// ── Schema check ──
|
||||||
|
fillsRaw, ok := value["fills"].([]any)
|
||||||
|
if !ok {
|
||||||
|
return Report{}, &ValidationError{
|
||||||
|
Kind: ErrSchema,
|
||||||
|
Field: "fills",
|
||||||
|
Reason: "expected top-level `fills` array",
|
||||||
|
}
|
||||||
|
}
|
||||||
|
for i, fillRaw := range fillsRaw {
|
||||||
|
fill, ok := fillRaw.(map[string]any)
|
||||||
|
if !ok {
|
||||||
|
return Report{}, &ValidationError{
|
||||||
|
Kind: ErrSchema,
|
||||||
|
Field: fmt.Sprintf("fills[%d]", i),
|
||||||
|
Reason: "expected object",
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if _, ok := fill["candidate_id"]; !ok {
|
||||||
|
return Report{}, &ValidationError{
|
||||||
|
Kind: ErrSchema,
|
||||||
|
Field: fmt.Sprintf("fills[%d].candidate_id", i),
|
||||||
|
Reason: "missing",
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if _, ok := fill["name"]; !ok {
|
||||||
|
return Report{}, &ValidationError{
|
||||||
|
Kind: ErrSchema,
|
||||||
|
Field: fmt.Sprintf("fills[%d].name", i),
|
||||||
|
Reason: "missing",
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
ctx := extractContext(value)
|
||||||
|
|
||||||
|
// ── Completeness ──
|
||||||
|
if ctx.TargetCount != nil && len(fillsRaw) != *ctx.TargetCount {
|
||||||
|
return Report{}, &ValidationError{
|
||||||
|
Kind: ErrCompleteness,
|
||||||
|
Reason: fmt.Sprintf("endorsed count %d != target_count %d",
|
||||||
|
len(fillsRaw), *ctx.TargetCount),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// ── Cross-roster checks ──
|
||||||
|
var findings []Finding
|
||||||
|
seenIDs := make(map[string]bool, len(fillsRaw))
|
||||||
|
for i, fillRaw := range fillsRaw {
|
||||||
|
fill := fillRaw.(map[string]any) // already type-checked in schema pass
|
||||||
|
candidateID, _ := fill["candidate_id"].(string)
|
||||||
|
proposedName, _ := fill["name"].(string)
|
||||||
|
|
||||||
|
// Duplicate-ID guard inside one fill.
|
||||||
|
if seenIDs[candidateID] {
|
||||||
|
return Report{}, &ValidationError{
|
||||||
|
Kind: ErrConsistency,
|
||||||
|
Reason: fmt.Sprintf(
|
||||||
|
"duplicate candidate_id %q appears multiple times in fills",
|
||||||
|
candidateID,
|
||||||
|
),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
seenIDs[candidateID] = true
|
||||||
|
|
||||||
|
// Worker existence — load-bearing check for the 0→85% pattern.
|
||||||
|
worker, ok := v.workers.Find(candidateID)
|
||||||
|
if !ok {
|
||||||
|
return Report{}, &ValidationError{
|
||||||
|
Kind: ErrConsistency,
|
||||||
|
Reason: fmt.Sprintf(
|
||||||
|
"fills[%d].candidate_id %q does not exist in worker roster",
|
||||||
|
i, candidateID,
|
||||||
|
),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Status — only "active" workers can be endorsed.
|
||||||
|
if !eqCI(worker.Status, "active") {
|
||||||
|
return Report{}, &ValidationError{
|
||||||
|
Kind: ErrConsistency,
|
||||||
|
Reason: fmt.Sprintf(
|
||||||
|
"fills[%d] worker %q has status %q, expected \"active\"",
|
||||||
|
i, candidateID, worker.Status,
|
||||||
|
),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Client blacklist.
|
||||||
|
if ctx.ClientID != nil {
|
||||||
|
for _, b := range worker.BlacklistedClients {
|
||||||
|
if eqCI(b, *ctx.ClientID) {
|
||||||
|
return Report{}, &ValidationError{
|
||||||
|
Kind: ErrPolicy,
|
||||||
|
Reason: fmt.Sprintf(
|
||||||
|
"fills[%d] worker %q blacklisted for client %q",
|
||||||
|
i, candidateID, *ctx.ClientID,
|
||||||
|
),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Geo / role match — only when BOTH sides have a value.
|
||||||
|
if ctx.City != nil && worker.City != nil && !eqCI(*ctx.City, *worker.City) {
|
||||||
|
return Report{}, &ValidationError{
|
||||||
|
Kind: ErrConsistency,
|
||||||
|
Reason: fmt.Sprintf(
|
||||||
|
"fills[%d] worker %q city %q doesn't match contract city %q",
|
||||||
|
i, candidateID, *worker.City, *ctx.City,
|
||||||
|
),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if ctx.State != nil && worker.State != nil && !eqCI(*ctx.State, *worker.State) {
|
||||||
|
return Report{}, &ValidationError{
|
||||||
|
Kind: ErrConsistency,
|
||||||
|
Reason: fmt.Sprintf(
|
||||||
|
"fills[%d] worker %q state %q doesn't match contract state %q",
|
||||||
|
i, candidateID, *worker.State, *ctx.State,
|
||||||
|
),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if ctx.Role != nil && worker.Role != nil && !eqCI(*ctx.Role, *worker.Role) {
|
||||||
|
return Report{}, &ValidationError{
|
||||||
|
Kind: ErrConsistency,
|
||||||
|
Reason: fmt.Sprintf(
|
||||||
|
"fills[%d] worker %q role %q doesn't match contract role %q",
|
||||||
|
i, candidateID, *worker.Role, *ctx.Role,
|
||||||
|
),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Name-mismatch is a warning, not an error — recruiters
|
||||||
|
// sometimes send updated names through the proposal layer
|
||||||
|
// before the roster catches up.
|
||||||
|
if proposedName != "" && !eqCI(proposedName, worker.Name) {
|
||||||
|
findings = append(findings, Finding{
|
||||||
|
Field: fmt.Sprintf("fills[%d].name", i),
|
||||||
|
Severity: SeverityWarning,
|
||||||
|
Message: fmt.Sprintf(
|
||||||
|
"proposed name %q differs from roster name %q for %q",
|
||||||
|
proposedName, worker.Name, candidateID,
|
||||||
|
),
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return Report{
|
||||||
|
Findings: findings,
|
||||||
|
ElapsedMs: elapsed(started),
|
||||||
|
}, nil
|
||||||
|
}
|
||||||
226
internal/validator/fill_test.go
Normal file
226
internal/validator/fill_test.go
Normal file
@ -0,0 +1,226 @@
|
|||||||
|
package validator
|
||||||
|
|
||||||
|
import (
|
||||||
|
"errors"
|
||||||
|
"testing"
|
||||||
|
)
|
||||||
|
|
||||||
|
// Helpers — mirror the Rust test helpers.
|
||||||
|
|
||||||
|
func mkLookup(records ...WorkerRecord) WorkerLookup {
|
||||||
|
return NewInMemoryWorkerLookup(records)
|
||||||
|
}
|
||||||
|
|
||||||
|
func mkWorker(id, name, status, city, state, role string) WorkerRecord {
|
||||||
|
return WorkerRecord{
|
||||||
|
CandidateID: id,
|
||||||
|
Name: name,
|
||||||
|
Status: status,
|
||||||
|
City: strPtr(city),
|
||||||
|
State: strPtr(state),
|
||||||
|
Role: strPtr(role),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func asValidationError(err error) (*ValidationError, bool) {
|
||||||
|
var ve *ValidationError
|
||||||
|
if errors.As(err, &ve) {
|
||||||
|
return ve, true
|
||||||
|
}
|
||||||
|
return nil, false
|
||||||
|
}
|
||||||
|
|
||||||
|
// ── Schema-level errors ──
|
||||||
|
|
||||||
|
func TestFill_WrongArtifactType_FailsSchema(t *testing.T) {
|
||||||
|
v := NewFillValidator(mkLookup())
|
||||||
|
_, err := v.Validate(Artifact{EmailDraft: map[string]any{}})
|
||||||
|
ve, ok := asValidationError(err)
|
||||||
|
if !ok {
|
||||||
|
t.Fatalf("expected ValidationError, got %v", err)
|
||||||
|
}
|
||||||
|
if ve.Kind != ErrSchema || ve.Field != "artifact" {
|
||||||
|
t.Errorf("expected schema/artifact error, got %+v", ve)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestFill_MissingFillsArray_FailsSchema(t *testing.T) {
|
||||||
|
v := NewFillValidator(mkLookup())
|
||||||
|
_, err := v.Validate(Artifact{FillProposal: map[string]any{}})
|
||||||
|
ve, _ := asValidationError(err)
|
||||||
|
if ve == nil || ve.Kind != ErrSchema || ve.Field != "fills" {
|
||||||
|
t.Errorf("expected schema/fills error, got %+v", ve)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestFill_MissingCandidateID_FailsSchema(t *testing.T) {
|
||||||
|
v := NewFillValidator(mkLookup())
|
||||||
|
_, err := v.Validate(Artifact{FillProposal: map[string]any{
|
||||||
|
"fills": []any{
|
||||||
|
map[string]any{"name": "Alice"},
|
||||||
|
},
|
||||||
|
}})
|
||||||
|
ve, _ := asValidationError(err)
|
||||||
|
if ve == nil || ve.Kind != ErrSchema || ve.Field != "fills[0].candidate_id" {
|
||||||
|
t.Errorf("expected schema/fills[0].candidate_id error, got %+v", ve)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// ── Completeness ──
|
||||||
|
|
||||||
|
func TestFill_TargetCountMismatch_FailsCompleteness(t *testing.T) {
|
||||||
|
v := NewFillValidator(mkLookup(mkWorker("w1", "Alice", "active", "Toledo", "OH", "Welder")))
|
||||||
|
_, err := v.Validate(Artifact{FillProposal: map[string]any{
|
||||||
|
"_context": map[string]any{"target_count": float64(2)},
|
||||||
|
"fills": []any{map[string]any{"candidate_id": "w1", "name": "Alice"}},
|
||||||
|
}})
|
||||||
|
ve, _ := asValidationError(err)
|
||||||
|
if ve == nil || ve.Kind != ErrCompleteness {
|
||||||
|
t.Errorf("expected completeness error, got %+v", ve)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// ── Cross-roster checks ──
|
||||||
|
|
||||||
|
func TestFill_PhantomID_FailsConsistency(t *testing.T) {
|
||||||
|
// Lookup is empty → any candidate_id is "phantom" — the
|
||||||
|
// load-bearing check for the 0→85% pattern.
|
||||||
|
v := NewFillValidator(mkLookup())
|
||||||
|
_, err := v.Validate(Artifact{FillProposal: map[string]any{
|
||||||
|
"fills": []any{map[string]any{"candidate_id": "phantom-id", "name": "Alice"}},
|
||||||
|
}})
|
||||||
|
ve, _ := asValidationError(err)
|
||||||
|
if ve == nil || ve.Kind != ErrConsistency {
|
||||||
|
t.Errorf("expected consistency error on phantom ID, got %+v", ve)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestFill_DuplicateID_FailsConsistency(t *testing.T) {
|
||||||
|
v := NewFillValidator(mkLookup(mkWorker("w1", "Alice", "active", "Toledo", "OH", "Welder")))
|
||||||
|
_, err := v.Validate(Artifact{FillProposal: map[string]any{
|
||||||
|
"fills": []any{
|
||||||
|
map[string]any{"candidate_id": "w1", "name": "Alice"},
|
||||||
|
map[string]any{"candidate_id": "w1", "name": "Alice"},
|
||||||
|
},
|
||||||
|
}})
|
||||||
|
ve, _ := asValidationError(err)
|
||||||
|
if ve == nil || ve.Kind != ErrConsistency {
|
||||||
|
t.Errorf("expected consistency error on duplicate ID, got %+v", ve)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestFill_InactiveStatus_FailsConsistency(t *testing.T) {
|
||||||
|
v := NewFillValidator(mkLookup(mkWorker("w1", "Alice", "inactive", "Toledo", "OH", "Welder")))
|
||||||
|
_, err := v.Validate(Artifact{FillProposal: map[string]any{
|
||||||
|
"fills": []any{map[string]any{"candidate_id": "w1", "name": "Alice"}},
|
||||||
|
}})
|
||||||
|
ve, _ := asValidationError(err)
|
||||||
|
if ve == nil || ve.Kind != ErrConsistency {
|
||||||
|
t.Errorf("expected consistency error on inactive status, got %+v", ve)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestFill_Blacklist_FailsPolicy(t *testing.T) {
|
||||||
|
w := mkWorker("w1", "Alice", "active", "Toledo", "OH", "Welder")
|
||||||
|
w.BlacklistedClients = []string{"CLI-99"}
|
||||||
|
v := NewFillValidator(mkLookup(w))
|
||||||
|
_, err := v.Validate(Artifact{FillProposal: map[string]any{
|
||||||
|
"_context": map[string]any{"client_id": "cli-99"}, // case-insensitive
|
||||||
|
"fills": []any{map[string]any{"candidate_id": "w1", "name": "Alice"}},
|
||||||
|
}})
|
||||||
|
ve, _ := asValidationError(err)
|
||||||
|
if ve == nil || ve.Kind != ErrPolicy {
|
||||||
|
t.Errorf("expected policy error on blacklist, got %+v", ve)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestFill_GeoMismatch_FailsConsistency(t *testing.T) {
|
||||||
|
// Worker in Detroit, contract says Toledo.
|
||||||
|
v := NewFillValidator(mkLookup(mkWorker("w1", "Alice", "active", "Detroit", "MI", "Welder")))
|
||||||
|
_, err := v.Validate(Artifact{FillProposal: map[string]any{
|
||||||
|
"_context": map[string]any{"city": "Toledo", "state": "OH"},
|
||||||
|
"fills": []any{map[string]any{"candidate_id": "w1", "name": "Alice"}},
|
||||||
|
}})
|
||||||
|
ve, _ := asValidationError(err)
|
||||||
|
if ve == nil || ve.Kind != ErrConsistency {
|
||||||
|
t.Errorf("expected consistency error on geo mismatch, got %+v", ve)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestFill_RoleMismatch_FailsConsistency(t *testing.T) {
|
||||||
|
v := NewFillValidator(mkLookup(mkWorker("w1", "Alice", "active", "Toledo", "OH", "Forklift Operator")))
|
||||||
|
_, err := v.Validate(Artifact{FillProposal: map[string]any{
|
||||||
|
"_context": map[string]any{"role": "Welder"},
|
||||||
|
"fills": []any{map[string]any{"candidate_id": "w1", "name": "Alice"}},
|
||||||
|
}})
|
||||||
|
ve, _ := asValidationError(err)
|
||||||
|
if ve == nil || ve.Kind != ErrConsistency {
|
||||||
|
t.Errorf("expected consistency error on role mismatch, got %+v", ve)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// ── Happy path ──
|
||||||
|
|
||||||
|
func TestFill_WellFormed_Passes(t *testing.T) {
|
||||||
|
v := NewFillValidator(mkLookup(
|
||||||
|
mkWorker("w1", "Alice", "active", "Toledo", "OH", "Welder"),
|
||||||
|
mkWorker("w2", "Bob", "active", "Toledo", "OH", "Welder"),
|
||||||
|
))
|
||||||
|
report, err := v.Validate(Artifact{FillProposal: map[string]any{
|
||||||
|
"_context": map[string]any{
|
||||||
|
"target_count": float64(2),
|
||||||
|
"city": "Toledo",
|
||||||
|
"state": "OH",
|
||||||
|
"role": "Welder",
|
||||||
|
},
|
||||||
|
"fills": []any{
|
||||||
|
map[string]any{"candidate_id": "w1", "name": "Alice"},
|
||||||
|
map[string]any{"candidate_id": "w2", "name": "Bob"},
|
||||||
|
},
|
||||||
|
}})
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("expected pass, got %v", err)
|
||||||
|
}
|
||||||
|
if len(report.Findings) != 0 {
|
||||||
|
t.Errorf("expected zero findings, got %v", report.Findings)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// ── Name mismatch is a Finding (warning), not an error ──
|
||||||
|
|
||||||
|
func TestFill_NameMismatch_EmitsWarning(t *testing.T) {
|
||||||
|
v := NewFillValidator(mkLookup(mkWorker("w1", "Alice Smith", "active", "Toledo", "OH", "Welder")))
|
||||||
|
report, err := v.Validate(Artifact{FillProposal: map[string]any{
|
||||||
|
"fills": []any{
|
||||||
|
map[string]any{"candidate_id": "w1", "name": "Alyssa Smith"}, // typo / outdated
|
||||||
|
},
|
||||||
|
}})
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("name mismatch should NOT error, got %v", err)
|
||||||
|
}
|
||||||
|
if len(report.Findings) != 1 || report.Findings[0].Severity != SeverityWarning {
|
||||||
|
t.Errorf("expected 1 warning finding, got %v", report.Findings)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// ── Case-insensitive matches ──
|
||||||
|
|
||||||
|
func TestFill_CaseInsensitiveMatch_Passes(t *testing.T) {
|
||||||
|
v := NewFillValidator(mkLookup(mkWorker("w1", "Alice", "ACTIVE", "TOLEDO", "oh", "Welder")))
|
||||||
|
_, err := v.Validate(Artifact{FillProposal: map[string]any{
|
||||||
|
"_context": map[string]any{"city": "Toledo", "state": "OH"},
|
||||||
|
"fills": []any{map[string]any{"candidate_id": "w1", "name": "Alice"}},
|
||||||
|
}})
|
||||||
|
if err != nil {
|
||||||
|
t.Errorf("case-insensitive comparisons should pass, got %v", err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// ── Validator name is stable ──
|
||||||
|
|
||||||
|
func TestFill_NameMatchesRust(t *testing.T) {
|
||||||
|
v := NewFillValidator(mkLookup())
|
||||||
|
if v.Name() != "staffing.fill" {
|
||||||
|
t.Errorf("name should match Rust 'staffing.fill', got %q", v.Name())
|
||||||
|
}
|
||||||
|
}
|
||||||
56
internal/validator/lookup.go
Normal file
56
internal/validator/lookup.go
Normal file
@ -0,0 +1,56 @@
|
|||||||
|
package validator
|
||||||
|
|
||||||
|
import "strings"
|
||||||
|
|
||||||
|
// InMemoryWorkerLookup is a zero-deps WorkerLookup useful for tests
|
||||||
|
// and small-fixture validation. Mirrors Rust's
|
||||||
|
// `InMemoryWorkerLookup::from_records`.
|
||||||
|
//
|
||||||
|
// Lookup is case-insensitive on candidate_id since Rust's
|
||||||
|
// HashMap with PartialEq + the source data's casing inconsistency
|
||||||
|
// (some IDs uppercase, some lowercase, some mixed) means
|
||||||
|
// case-sensitive lookup misses real matches. Lower-casing on
|
||||||
|
// insert keeps the contract.
|
||||||
|
type InMemoryWorkerLookup struct {
|
||||||
|
byID map[string]WorkerRecord
|
||||||
|
}
|
||||||
|
|
||||||
|
// NewInMemoryWorkerLookup builds a lookup from a list of records.
|
||||||
|
// Duplicate candidate_ids: last-write-wins. Empty candidate_id: skipped.
|
||||||
|
func NewInMemoryWorkerLookup(records []WorkerRecord) *InMemoryWorkerLookup {
|
||||||
|
m := make(map[string]WorkerRecord, len(records))
|
||||||
|
for _, r := range records {
|
||||||
|
if r.CandidateID == "" {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
m[strings.ToLower(strings.TrimSpace(r.CandidateID))] = r
|
||||||
|
}
|
||||||
|
return &InMemoryWorkerLookup{byID: m}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Find satisfies WorkerLookup. Returns (rec, true) on hit,
|
||||||
|
// (nil, false) on miss.
|
||||||
|
func (l *InMemoryWorkerLookup) Find(candidateID string) (*WorkerRecord, bool) {
|
||||||
|
if l == nil {
|
||||||
|
return nil, false
|
||||||
|
}
|
||||||
|
r, ok := l.byID[strings.ToLower(strings.TrimSpace(candidateID))]
|
||||||
|
if !ok {
|
||||||
|
return nil, false
|
||||||
|
}
|
||||||
|
// Return a copy so callers can't mutate the lookup's internal state.
|
||||||
|
cp := r
|
||||||
|
return &cp, true
|
||||||
|
}
|
||||||
|
|
||||||
|
// Len exposes the size for tests + admin endpoints.
|
||||||
|
func (l *InMemoryWorkerLookup) Len() int {
|
||||||
|
if l == nil {
|
||||||
|
return 0
|
||||||
|
}
|
||||||
|
return len(l.byID)
|
||||||
|
}
|
||||||
|
|
||||||
|
// strPtr is a tiny convenience for tests that need *string fields
|
||||||
|
// on WorkerRecord.City/State/Role.
|
||||||
|
func strPtr(s string) *string { return &s }
|
||||||
144
internal/validator/types.go
Normal file
144
internal/validator/types.go
Normal file
@ -0,0 +1,144 @@
|
|||||||
|
// Package validator is the Go port of Rust's `validator` crate
|
||||||
|
// (`/home/profit/lakehouse/crates/validator/`). Production safety
|
||||||
|
// nets for staffing-domain LLM outputs:
|
||||||
|
//
|
||||||
|
// - FillValidator: catches phantom IDs / wrong-status workers /
|
||||||
|
// blacklist violations / geo-or-role mismatches in fill proposals
|
||||||
|
// - EmailValidator: catches SSN-shape sequences / salary
|
||||||
|
// disclosure / wrong-target name in email/SMS drafts
|
||||||
|
//
|
||||||
|
// Per `reports/cutover/architecture_comparison.md`'s "Go missing"
|
||||||
|
// section: these were Rust-only until this port. Closes one of the
|
||||||
|
// two named gaps for Go-primary operation (the other being the
|
||||||
|
// materializer port).
|
||||||
|
//
|
||||||
|
// Architectural choice: we mirror the Rust shape exactly so the
|
||||||
|
// Validator + Artifact + Finding interfaces are call-compatible
|
||||||
|
// across runtimes. A future "validator service" daemon could expose
|
||||||
|
// either runtime's implementation behind a uniform HTTP contract.
|
||||||
|
|
||||||
|
package validator
|
||||||
|
|
||||||
|
import "time"
|
||||||
|
|
||||||
|
// Artifact is the discriminated union of input shapes a Validator
|
||||||
|
// can receive. Mirrors Rust's `enum Artifact`. The first non-zero
|
||||||
|
// field selects the kind.
|
||||||
|
type Artifact struct {
|
||||||
|
// FillProposal: {fills: [{candidate_id, name}], _context: {...}}
|
||||||
|
FillProposal map[string]any
|
||||||
|
// EmailDraft: {to, body, subject?, kind?, _context?: {candidate_id?}}
|
||||||
|
EmailDraft map[string]any
|
||||||
|
}
|
||||||
|
|
||||||
|
// Kind returns a short string for error messages — mirrors the
|
||||||
|
// Rust Debug shape used in "expected FillProposal, got X".
|
||||||
|
func (a Artifact) Kind() string {
|
||||||
|
switch {
|
||||||
|
case a.FillProposal != nil:
|
||||||
|
return "FillProposal"
|
||||||
|
case a.EmailDraft != nil:
|
||||||
|
return "EmailDraft"
|
||||||
|
default:
|
||||||
|
return "Unknown"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Severity matches Rust's enum {Error, Warning, Info}.
|
||||||
|
type Severity string
|
||||||
|
|
||||||
|
const (
|
||||||
|
SeverityError Severity = "error"
|
||||||
|
SeverityWarning Severity = "warning"
|
||||||
|
SeverityInfo Severity = "info"
|
||||||
|
)
|
||||||
|
|
||||||
|
// Finding is one warning-or-info note attached to a successful
|
||||||
|
// validation. Errors abort validation; findings come back alongside
|
||||||
|
// a passing report. Mirrors Rust's Finding shape exactly so JSON
|
||||||
|
// round-trips between runtimes.
|
||||||
|
type Finding struct {
|
||||||
|
Field string `json:"field"`
|
||||||
|
Severity Severity `json:"severity"`
|
||||||
|
Message string `json:"message"`
|
||||||
|
}
|
||||||
|
|
||||||
|
// Report is the success-path return value: zero or more findings
|
||||||
|
// + per-validator wall-clock cost.
|
||||||
|
type Report struct {
|
||||||
|
Findings []Finding `json:"findings"`
|
||||||
|
ElapsedMs int64 `json:"elapsed_ms"`
|
||||||
|
}
|
||||||
|
|
||||||
|
// ValidationErrorKind discriminates the failure modes. Mirrors
|
||||||
|
// Rust's ValidationError variants:
|
||||||
|
// - Schema: input shape doesn't match contract
|
||||||
|
// - Completeness: structural counts wrong (e.g. 3 fills, target_count=5)
|
||||||
|
// - Consistency: cross-source disagreement (phantom worker, wrong city)
|
||||||
|
// - Policy: org-level rule violation (blacklist, PII leak)
|
||||||
|
type ValidationErrorKind string
|
||||||
|
|
||||||
|
const (
|
||||||
|
ErrSchema ValidationErrorKind = "schema"
|
||||||
|
ErrCompleteness ValidationErrorKind = "completeness"
|
||||||
|
ErrConsistency ValidationErrorKind = "consistency"
|
||||||
|
ErrPolicy ValidationErrorKind = "policy"
|
||||||
|
)
|
||||||
|
|
||||||
|
// ValidationError is the Go equivalent of Rust's enum + variant
|
||||||
|
// fields. Field is set for Schema errors (the failing field name);
|
||||||
|
// Reason carries the human-readable message for all variants.
|
||||||
|
type ValidationError struct {
|
||||||
|
Kind ValidationErrorKind
|
||||||
|
Field string
|
||||||
|
Reason string
|
||||||
|
}
|
||||||
|
|
||||||
|
// Error makes ValidationError a Go error value. Format mirrors the
|
||||||
|
// Rust Debug print so log scraping behaves the same.
|
||||||
|
func (e *ValidationError) Error() string {
|
||||||
|
if e.Field != "" {
|
||||||
|
return string(e.Kind) + " (" + e.Field + "): " + e.Reason
|
||||||
|
}
|
||||||
|
return string(e.Kind) + ": " + e.Reason
|
||||||
|
}
|
||||||
|
|
||||||
|
// Validator is the interface every validator implements.
|
||||||
|
// Stateless — construction takes any deps (e.g. WorkerLookup)
|
||||||
|
// upfront, validate() is pure on its inputs.
|
||||||
|
type Validator interface {
|
||||||
|
Name() string
|
||||||
|
Validate(artifact Artifact) (Report, error)
|
||||||
|
}
|
||||||
|
|
||||||
|
// WorkerRecord is the lookup-side worker shape. Pointer fields
|
||||||
|
// for City/State/Role mirror Rust's Option<String> — None means
|
||||||
|
// "we don't know," which is operationally distinct from empty
|
||||||
|
// string (we know it's empty). Validators only enforce
|
||||||
|
// city/state/role matches when both expected (from contract)
|
||||||
|
// and actual (from lookup) are non-nil.
|
||||||
|
type WorkerRecord struct {
|
||||||
|
CandidateID string
|
||||||
|
Name string
|
||||||
|
Status string // "active" / "inactive" / etc.
|
||||||
|
City *string
|
||||||
|
State *string
|
||||||
|
Role *string
|
||||||
|
BlacklistedClients []string
|
||||||
|
}
|
||||||
|
|
||||||
|
// WorkerLookup is the gate validators go through to ask "does
|
||||||
|
// this candidate_id exist + what's their record?" Implementations
|
||||||
|
// can be in-memory (test fixture), DuckDB-backed (production
|
||||||
|
// queryd), or HTTP-backed (cross-daemon). FillValidator + EmailValidator
|
||||||
|
// take Arc<dyn WorkerLookup> on Rust side; in Go, an interface value.
|
||||||
|
type WorkerLookup interface {
|
||||||
|
Find(candidateID string) (*WorkerRecord, bool)
|
||||||
|
}
|
||||||
|
|
||||||
|
// elapsed converts a start time into the milliseconds-elapsed
|
||||||
|
// shape matched in Report.ElapsedMs (mirrors Rust's
|
||||||
|
// .elapsed().as_millis() as u64).
|
||||||
|
func elapsed(start time.Time) int64 {
|
||||||
|
return time.Since(start).Milliseconds()
|
||||||
|
}
|
||||||
Loading…
x
Reference in New Issue
Block a user