package validator import ( "bufio" "encoding/json" "fmt" "os" "strings" ) // rosterRow is the on-disk shape of one line in a roster JSONL. // Fields are tolerant — string-valued city/state/role become *string // on WorkerRecord; absent or null fields stay nil so the validators // know "we don't know" vs "we know it's empty." // // Mirrors the projection used in the Rust ParquetWorkerLookup so // JSONL exported from `workers_500k.parquet` (or a synthetic dataset) // loads here without translation. Producer: // // duckdb -c "COPY (SELECT candidate_id, name, status, city, state, // role, blacklisted_clients FROM workers) TO 'roster.jsonl' // (FORMAT JSON, ARRAY false)" type rosterRow struct { CandidateID string `json:"candidate_id"` Name string `json:"name"` Status string `json:"status"` City *string `json:"city"` State *string `json:"state"` Role *string `json:"role"` BlacklistedClients []string `json:"blacklisted_clients"` } // LoadJSONLRoster reads a roster JSONL file and returns an // InMemoryWorkerLookup. The validators accept any WorkerLookup, so // callers that need a different backing store (e.g. queryd-backed // lookup against the live Parquet view) can plug in their own // implementation without changing this function. // // Parse errors on individual lines are skipped, not fatal — the // roster is operator-supplied and a corrupted line shouldn't // disable the whole validator surface. The return error is for // I/O failures (path missing, unreadable). // // Empty path returns an empty lookup + nil — gives the daemon a // "no roster configured" mode where worker-existence checks fail // Consistency. Matches the Rust gateway's default. func LoadJSONLRoster(path string) (*InMemoryWorkerLookup, error) { if path == "" { return NewInMemoryWorkerLookup(nil), nil } f, err := os.Open(path) if err != nil { return nil, fmt.Errorf("open roster: %w", err) } defer f.Close() var records []WorkerRecord scanner := bufio.NewScanner(f) scanner.Buffer(make([]byte, 0, 1<<16), 1<<24) for scanner.Scan() { line := scanner.Bytes() if len(line) == 0 { continue } var row rosterRow if err := json.Unmarshal(line, &row); err != nil { continue // tolerate malformed lines } if strings.TrimSpace(row.CandidateID) == "" { continue } records = append(records, WorkerRecord{ CandidateID: row.CandidateID, Name: row.Name, Status: row.Status, City: row.City, State: row.State, Role: row.Role, BlacklistedClients: row.BlacklistedClients, }) } if err := scanner.Err(); err != nil { return nil, fmt.Errorf("scan roster: %w", err) } return NewInMemoryWorkerLookup(records), nil }