// validatord is the staffing-validator service daemon. Hosts: // // POST /validate — dispatch a single artifact to FillValidator, // EmailValidator, or PlaybookValidator // POST /iterate — generate→validate→correct loop (Phase 43 PRD). // Calls chatd for the LLM hop and runs the // validator in-process for the gate. // GET /health — readiness (always 200; roster status reported // in /validate responses) // // Per docs/SPEC.md and architecture_comparison.md "Go primary path": // this closes the last bounded item — the now-Go-side validators get // a network surface so any caller (TS code path, other daemons, agents) // can validate artifacts via gateway /v1/validate or /v1/iterate. // // The roster (worker existence + city/state/role/blacklist) loads // from a JSONL file at startup. Empty path = no roster, worker-existence // checks fail Consistency. Production points this at a roster that's // regenerated from workers_500k.parquet on a schedule. package main import ( "bytes" "context" "encoding/json" "errors" "flag" "fmt" "io" "log/slog" "net/http" "os" "time" "github.com/go-chi/chi/v5" "git.agentview.dev/profit/golangLAKEHOUSE/internal/langfuse" "git.agentview.dev/profit/golangLAKEHOUSE/internal/shared" "git.agentview.dev/profit/golangLAKEHOUSE/internal/validator" ) const maxRequestBytes = 4 << 20 // 4 MiB func main() { configPath := flag.String("config", "lakehouse.toml", "path to TOML config") flag.Parse() cfg, err := shared.LoadConfig(*configPath) if err != nil { slog.Error("config", "err", err) os.Exit(1) } lookup, err := validator.LoadJSONLRoster(cfg.Validatord.RosterPath) if err != nil { slog.Error("roster load", "path", cfg.Validatord.RosterPath, "err", err) os.Exit(1) } slog.Info("validatord roster", "path", cfg.Validatord.RosterPath, "records", lookup.Len(), ) chatTimeout := time.Duration(cfg.Validatord.ChatTimeoutSecs) * time.Second if chatTimeout <= 0 { chatTimeout = 240 * time.Second } h := &handlers{ lookup: lookup, chatdURL: cfg.Validatord.ChatdURL, chatClient: &http.Client{Timeout: chatTimeout}, iterCfg: validator.IterateConfig{ DefaultMaxIterations: cfg.Validatord.DefaultMaxIterations, DefaultMaxTokens: cfg.Validatord.DefaultMaxTokens, }, // Same env loader the middleware uses — getting our own // reference lets us emit child spans of the per-request trace. // nil when Langfuse env isn't set; the iterate handler skips // span emission gracefully in that case. lf: shared.LoadLangfuseFromEnv(), sessionLog: validator.NewSessionLogger(cfg.Validatord.SessionLogPath), } if h.sessionLog != nil { slog.Info("validatord session log", "path", cfg.Validatord.SessionLogPath) } if err := shared.Run("validatord", cfg.Validatord.Bind, h.register, cfg.Auth); err != nil { slog.Error("server", "err", err) os.Exit(1) } } type handlers struct { lookup validator.WorkerLookup chatdURL string chatClient *http.Client iterCfg validator.IterateConfig // lf is the Langfuse client (nil when unconfigured — best-effort // posture matching the rest of the stack). Per-attempt iteration // spans get emitted via lf when set. The HTTP middleware that // creates the parent trace lives in shared.Run; this client lets // validatord's handler emit application-level child spans. lf *langfuse.Client // sessionLog appends one SessionRecord per /v1/iterate session to // `coordinator_sessions.jsonl` for offline DuckDB analysis. nil // when the session log is unconfigured (best-effort posture). sessionLog *validator.SessionLogger } func (h *handlers) register(r chi.Router) { r.Post("/validate", h.handleValidate) r.Post("/iterate", h.handleIterate) } // validateRequest is the request body for POST /validate. Mirrors // Rust's ValidateRequest in `crates/gateway/src/v1/validate.rs`. type validateRequest struct { Kind string `json:"kind"` // "fill" | "email" | "playbook" Artifact map[string]any `json:"artifact"` Context map[string]any `json:"context,omitempty"` } func (h *handlers) handleValidate(w http.ResponseWriter, r *http.Request) { r.Body = http.MaxBytesReader(w, r.Body, maxRequestBytes) defer r.Body.Close() var req validateRequest if err := json.NewDecoder(r.Body).Decode(&req); err != nil { http.Error(w, "invalid JSON: "+err.Error(), http.StatusBadRequest) return } if req.Kind == "" { http.Error(w, "kind is required", http.StatusBadRequest) return } if req.Artifact == nil { http.Error(w, "artifact is required", http.StatusBadRequest) return } report, vErr, kindErr := h.runValidator(req.Kind, req.Artifact, req.Context) switch { case kindErr != nil: http.Error(w, kindErr.Error(), http.StatusBadRequest) case vErr != nil: writeJSON(w, http.StatusUnprocessableEntity, vErr) default: writeJSON(w, http.StatusOK, report) } } // runValidator dispatches by kind. Returns (Report, ValidationError, kindErr). // kindErr is non-nil only for unknown kind strings (400). func (h *handlers) runValidator(kind string, artifact, ctx map[string]any) (*validator.Report, *validator.ValidationError, error) { merged := mergeContext(artifact, ctx) a, kindErr := buildArtifact(kind, merged) if kindErr != nil { return nil, nil, kindErr } v, vErr := pickValidator(kind, h.lookup) if vErr != nil { return nil, nil, vErr } report, err := v.Validate(a) if err != nil { var ve *validator.ValidationError if errors.As(err, &ve) { return nil, ve, nil } // Validators only ever return ValidationError; an "any other // error" path means the validator violated its own contract. // Surface as 500 rather than silently coercing. return nil, &validator.ValidationError{ Kind: validator.ErrSchema, Reason: "internal validator error: " + err.Error(), }, nil } return &report, nil, nil } // buildArtifact maps the kind string to the right Artifact union arm. // Unknown kinds return a 400-friendly error. func buildArtifact(kind string, body map[string]any) (validator.Artifact, error) { switch kind { case "fill": return validator.Artifact{FillProposal: body}, nil case "email": return validator.Artifact{EmailDraft: body}, nil case "playbook": return validator.Artifact{Playbook: body}, nil default: return validator.Artifact{}, fmt.Errorf("unknown kind %q — expected fill | email | playbook", kind) } } func pickValidator(kind string, lookup validator.WorkerLookup) (validator.Validator, error) { switch kind { case "fill": return validator.NewFillValidator(lookup), nil case "email": return validator.NewEmailValidator(lookup), nil case "playbook": return validator.PlaybookValidator{}, nil default: return nil, fmt.Errorf("unknown kind %q", kind) } } // mergeContext folds `context` into `artifact._context` so validators // pull contract metadata uniformly. Caller-supplied artifact._context // wins on key collision (caller knows their own contract). func mergeContext(artifact, ctx map[string]any) map[string]any { if ctx == nil { return artifact } out := make(map[string]any, len(artifact)+1) for k, v := range artifact { out[k] = v } existing, _ := out["_context"].(map[string]any) merged := make(map[string]any, len(ctx)+len(existing)) for k, v := range ctx { merged[k] = v } for k, v := range existing { merged[k] = v // existing wins } out["_context"] = merged return out } func (h *handlers) handleIterate(w http.ResponseWriter, r *http.Request) { r.Body = http.MaxBytesReader(w, r.Body, maxRequestBytes) defer r.Body.Close() var req validator.IterateRequest if err := json.NewDecoder(r.Body).Decode(&req); err != nil { http.Error(w, "invalid JSON: "+err.Error(), http.StatusBadRequest) return } if req.Kind == "" || req.Prompt == "" || req.Provider == "" || req.Model == "" { http.Error(w, "kind, prompt, provider, and model are required", http.StatusBadRequest) return } // Pull the per-request trace id from the langfuse middleware. If // the caller forwarded an upstream trace via X-Lakehouse-Trace-Id // the middleware reuses that one; otherwise it minted a fresh trace // at HTTP entry. Either way, we propagate it so chat hops nest // under the same parent and operators can pivot from the iterate // response's trace_id straight into the full Langfuse tree. if req.TraceID == "" { req.TraceID = shared.TraceIDFromCtx(r.Context()) } chat := h.chatCaller() validate := func(kind string, artifact map[string]any) (validator.Report, error) { report, vErr, kindErr := h.runValidator(kind, artifact, req.Context) if kindErr != nil { return validator.Report{}, &validator.ValidationError{ Kind: validator.ErrSchema, Reason: kindErr.Error(), } } if vErr != nil { return validator.Report{}, vErr } return *report, nil } // Wire per-attempt span emission when Langfuse is configured — // each chat→validate cycle becomes a child span of the iterate // trace, with prompt/raw/verdict captured for inspection. cfg := h.iterCfg if h.lf != nil { cfg.Tracer = h.iterTracer(r.Context()) } t0 := time.Now() resp, fail, err := validator.Iterate(r.Context(), req, cfg, chat, validate) durationMs := time.Since(t0).Milliseconds() // Emit one session row regardless of success/failure/infra-error. // Best-effort: a failing session log never blocks the response. rosterCheck := h.rosterCheckFor(req.Kind) rec := validator.BuildSessionRecord(req, resp, fail, err, rosterCheck, durationMs) h.sessionLog.Append(rec) if err != nil { http.Error(w, err.Error(), http.StatusBadGateway) return } if fail != nil { writeJSON(w, http.StatusUnprocessableEntity, fail) return } writeJSON(w, http.StatusOK, resp) } // rosterCheckFor returns a closure that verifies an accepted artifact's // candidate IDs all exist in the roster. Used by BuildSessionRecord to // stamp `grounded_in_roster` on each session row — answers the "did // the model produce real workers, not phantoms?" question at offline- // query time. // // Only the `fill` kind has worker IDs to verify. Other kinds return // nil (omits the field from the session record). // // Note: FillValidator already enforces this constraint on every // successful iteration (phantom IDs raise ValidationError::Consistency). // Stamping the bool here surfaces the verified-grounded property as // queryable data, not relying on validator semantics being recalled // during analysis. A future scrum may want to query "show me sessions // where the model guessed real worker IDs without ever needing a // retry" — that's a `iterations=1 AND grounded_in_roster=true` query. func (h *handlers) rosterCheckFor(kind string) func(map[string]any) *bool { if kind != "fill" { return nil } return func(artifact map[string]any) *bool { fills, ok := artifact["fills"].([]any) if !ok { f := false return &f } for _, raw := range fills { fill, ok := raw.(map[string]any) if !ok { f := false return &f } id, _ := fill["candidate_id"].(string) if id == "" { f := false return &f } if _, found := h.lookup.Find(id); !found { f := false return &f } } t := true return &t } } // chatCaller wires the iteration loop to chatd via HTTP. Builds the // chat.Request shape, posts to ${chatdURL}/chat, returns the content // string (no choices wrapper — chatd's response is already flat). // // When traceID is non-empty, forwards it as the `X-Lakehouse-Trace-Id` // header so chatd's middleware attaches its trace to the same // Langfuse parent. Closes the multi-call observability gap: a // /v1/iterate session shows in Langfuse as one trace with N child // chat spans, instead of N+1 disconnected traces. func (h *handlers) chatCaller() validator.ChatCaller { return func(ctx context.Context, system, user, _, model string, temp *float64, maxTokens int, traceID string) (string, error) { messages := make([]map[string]string, 0, 2) if system != "" { messages = append(messages, map[string]string{"role": "system", "content": system}) } messages = append(messages, map[string]string{"role": "user", "content": user}) body := map[string]any{ "model": model, "messages": messages, "max_tokens": maxTokens, } if temp != nil { body["temperature"] = *temp } buf, err := json.Marshal(body) if err != nil { return "", fmt.Errorf("marshal chat req: %w", err) } req, err := http.NewRequestWithContext(ctx, "POST", h.chatdURL+"/chat", bytes.NewReader(buf)) if err != nil { return "", fmt.Errorf("build chat req: %w", err) } req.Header.Set("Content-Type", "application/json") if traceID != "" { req.Header.Set(shared.TraceIDHeader, traceID) } resp, err := h.chatClient.Do(req) if err != nil { return "", fmt.Errorf("chat hop: %w", err) } defer resp.Body.Close() raw, _ := io.ReadAll(resp.Body) if resp.StatusCode >= 400 { return "", fmt.Errorf("chat %d: %s", resp.StatusCode, trim(string(raw), 300)) } var parsed struct { Content string `json:"content"` } if err := json.Unmarshal(raw, &parsed); err != nil { return "", fmt.Errorf("parse chat resp: %w", err) } return parsed.Content, nil } } // iterTracer adapts validator.Tracer → langfuse.Client.Span. Each // iteration attempt becomes one Langfuse span on the parent trace // with input={prompt, model, provider}, output={raw, verdict}. // Operators reading Langfuse can scroll the iterate session and see // the full retry chain — which prompt produced which raw, which // validator verdict landed, which retry the model recovered on. // // Returns the span id so it lands in IterateAttempt.SpanID — the // /v1/iterate response carries that id back to the caller for log // correlation. // // Errors are silent (best-effort posture per package langfuse). A // dropped span never blocks the iterate loop. func (h *handlers) iterTracer(ctx context.Context) validator.Tracer { return func(s validator.AttemptSpan) string { level := "DEFAULT" if s.Verdict.Kind != "accepted" { level = "WARNING" } return h.lf.Span(ctx, langfuse.SpanInput{ TraceID: s.TraceID, Name: fmt.Sprintf("iterate.attempt[%d]", s.Iteration), Input: map[string]any{ "iteration": s.Iteration, "model": s.Model, "provider": s.Provider, "prompt": trim(s.Prompt, 4000), }, Output: map[string]any{ "verdict": s.Verdict.Kind, "error": s.Verdict.Error, "raw": trim(s.Raw, 4000), }, Metadata: map[string]any{ "verdict_error": s.Verdict.Error, }, StartTime: s.StartTime, EndTime: s.EndTime, Level: level, }) } } func writeJSON(w http.ResponseWriter, status int, body any) { w.Header().Set("Content-Type", "application/json") w.WriteHeader(status) if err := json.NewEncoder(w).Encode(body); err != nil { slog.Error("encode", "err", err) } } func trim(s string, n int) string { if len(s) <= n { return s } return s[:n] }