Closes the last "Go primary" backlog item in docs/ARCHITECTURE_COMPARISON.md. Go now owns the entire validator path end-to-end — no Rust dep for staffing safety net. Architecture: cmd/validatord on :3221 hosts both endpoints. Calls chatd directly for the iterate loop's LLM hop (no gateway self-loopback like the Rust shape). Gateway proxies /v1/validate + /v1/iterate to validatord. What's in: - internal/validator/playbook.go — 3rd validator kind (PRD checks: fill: prefix, endorsed_names ≤ target_count×2, fingerprint required) - internal/validator/lookup_jsonl.go — JSONL roster loader (Parquet deferred; producer one-liner documented in package comment) - internal/validator/iterate.go — ExtractJSON helper + Iterate orchestrator with ChatCaller seam for unit tests - cmd/validatord/main.go — HTTP routes, roster load, chat client - internal/shared/config.go — ValidatordConfig + gateway URL field - lakehouse.toml — [validatord] section - cmd/gateway/main.go — proxy routes for /v1/validate + /v1/iterate Smoke: 5/5 PASS through gateway :3110: ✓ playbook happy path ✓ playbook missing fingerprint → 422 schema/fingerprint ✓ phantom candidate W-PHANTOM → 422 consistency ✓ unknown kind → 400 ✓ roster loaded with 3 records go test ./... green across 33 packages. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
87 lines
2.8 KiB
Go
87 lines
2.8 KiB
Go
package validator
|
|
|
|
import (
|
|
"bufio"
|
|
"encoding/json"
|
|
"fmt"
|
|
"os"
|
|
"strings"
|
|
)
|
|
|
|
// rosterRow is the on-disk shape of one line in a roster JSONL.
|
|
// Fields are tolerant — string-valued city/state/role become *string
|
|
// on WorkerRecord; absent or null fields stay nil so the validators
|
|
// know "we don't know" vs "we know it's empty."
|
|
//
|
|
// Mirrors the projection used in the Rust ParquetWorkerLookup so
|
|
// JSONL exported from `workers_500k.parquet` (or a synthetic dataset)
|
|
// loads here without translation. Producer:
|
|
//
|
|
// duckdb -c "COPY (SELECT candidate_id, name, status, city, state,
|
|
// role, blacklisted_clients FROM workers) TO 'roster.jsonl'
|
|
// (FORMAT JSON, ARRAY false)"
|
|
type rosterRow struct {
|
|
CandidateID string `json:"candidate_id"`
|
|
Name string `json:"name"`
|
|
Status string `json:"status"`
|
|
City *string `json:"city"`
|
|
State *string `json:"state"`
|
|
Role *string `json:"role"`
|
|
BlacklistedClients []string `json:"blacklisted_clients"`
|
|
}
|
|
|
|
// LoadJSONLRoster reads a roster JSONL file and returns an
|
|
// InMemoryWorkerLookup. The validators accept any WorkerLookup, so
|
|
// callers that need a different backing store (e.g. queryd-backed
|
|
// lookup against the live Parquet view) can plug in their own
|
|
// implementation without changing this function.
|
|
//
|
|
// Parse errors on individual lines are skipped, not fatal — the
|
|
// roster is operator-supplied and a corrupted line shouldn't
|
|
// disable the whole validator surface. The return error is for
|
|
// I/O failures (path missing, unreadable).
|
|
//
|
|
// Empty path returns an empty lookup + nil — gives the daemon a
|
|
// "no roster configured" mode where worker-existence checks fail
|
|
// Consistency. Matches the Rust gateway's default.
|
|
func LoadJSONLRoster(path string) (*InMemoryWorkerLookup, error) {
|
|
if path == "" {
|
|
return NewInMemoryWorkerLookup(nil), nil
|
|
}
|
|
f, err := os.Open(path)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("open roster: %w", err)
|
|
}
|
|
defer f.Close()
|
|
|
|
var records []WorkerRecord
|
|
scanner := bufio.NewScanner(f)
|
|
scanner.Buffer(make([]byte, 0, 1<<16), 1<<24)
|
|
for scanner.Scan() {
|
|
line := scanner.Bytes()
|
|
if len(line) == 0 {
|
|
continue
|
|
}
|
|
var row rosterRow
|
|
if err := json.Unmarshal(line, &row); err != nil {
|
|
continue // tolerate malformed lines
|
|
}
|
|
if strings.TrimSpace(row.CandidateID) == "" {
|
|
continue
|
|
}
|
|
records = append(records, WorkerRecord{
|
|
CandidateID: row.CandidateID,
|
|
Name: row.Name,
|
|
Status: row.Status,
|
|
City: row.City,
|
|
State: row.State,
|
|
Role: row.Role,
|
|
BlacklistedClients: row.BlacklistedClients,
|
|
})
|
|
}
|
|
if err := scanner.Err(); err != nil {
|
|
return nil, fmt.Errorf("scan roster: %w", err)
|
|
}
|
|
return NewInMemoryWorkerLookup(records), nil
|
|
}
|