Closes the second half of J's 2026-05-02 multi-call observability
concern. Trace-id propagation (commit d6d2fdf) gave us the *live*
view in Langfuse; this gives us the *longitudinal* view for ad-hoc
DuckDB queries over thousands of sessions:
"show me every session where the model produced a real candidate
without ever needing a retry"
"find sessions where validation rejected three times in a row"
"first-shot success rate per model — did we feed it enough corpus?"
## What's in
internal/validator/session_log.go:
- SessionRecord type (schema=session.iterate.v1)
- SessionLogger writer — mutex-guarded append, best-effort posture,
nil-safe (NewSessionLogger("") = nil = no-op on Append)
- BuildSessionRecord helper — assembles a row from any
iterate response/failure/infra-error combination, callable from
other daemons that wrap iterate (cross-daemon shared schema)
- 7 unit tests including concurrent-append safety + the three
code paths (success / max_iter_exhausted / infra_error)
cmd/validatord/main.go:
- handlers.sessionLog field + wiring from cfg.Validatord.SessionLogPath
- Iterate handler: build + append a SessionRecord on every call
- rosterCheckFor("fill") closure stamps grounded_in_roster — the
load-bearing forensic property J flagged ("we can never
hallucinate available staff members to contracts")
internal/shared/config.go + lakehouse.toml:
- [validatord].session_log_path field; empty = disabled
- Production: /var/lib/lakehouse/validator/sessions.jsonl
scripts/validatord_smoke.sh:
- Adds a probe verifying validatord announces session log path on
startup. Smoke is now 6/6 (was 5/5).
docs/SESSION_LOG.md:
- Schema reference + 5 worked DuckDB query examples including the
"alarm" query (sessions where grounded_in_roster=false on an
accepted fill — should always be empty; if not, something is
bypassing FillValidator).
## What this is NOT
This is NOT a duplicate of replay_runs.jsonl. They're siblings:
- replay_runs.jsonl: replay tool's per-task retrieval+model output
- sessions.jsonl: validatord's per-iterate full retry chain +
grounded-in-roster verdict
A single coordinator session can produce rows in both streams; the
session_id (= Langfuse trace_id) is the join key.
## Layered observability now in place
Live view: Langfuse trace tree (X-Lakehouse-Trace-Id propagation)
`iterate.attempt[N]` spans with prompt/raw/verdict
Offline: coordinator_sessions.jsonl (this commit)
DuckDB-queryable; longitudinal forensics
Hard gate: FillValidator + WorkerLookup (existing)
phantom IDs structurally rejected, never reach
session log's grounded_in_roster=true bucket
Per the architecture invariant in STATE_OF_PLAY's DO NOT RELITIGATE
section — these layers are wired; future work targets the data, not
the wiring.
## Verification
- internal/validator: 7 new tests (session_log_test.go) — all PASS
- cmd/validatord: 3 new integration tests covering the success,
failure, and grounded=false paths — all PASS
- validatord_smoke.sh: 6/6 PASS through gateway :3110
- Full go test ./... green across 33 packages
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
460 lines
15 KiB
Go
460 lines
15 KiB
Go
// validatord is the staffing-validator service daemon. Hosts:
|
|
//
|
|
// POST /validate — dispatch a single artifact to FillValidator,
|
|
// EmailValidator, or PlaybookValidator
|
|
// POST /iterate — generate→validate→correct loop (Phase 43 PRD).
|
|
// Calls chatd for the LLM hop and runs the
|
|
// validator in-process for the gate.
|
|
// GET /health — readiness (always 200; roster status reported
|
|
// in /validate responses)
|
|
//
|
|
// Per docs/SPEC.md and architecture_comparison.md "Go primary path":
|
|
// this closes the last bounded item — the now-Go-side validators get
|
|
// a network surface so any caller (TS code path, other daemons, agents)
|
|
// can validate artifacts via gateway /v1/validate or /v1/iterate.
|
|
//
|
|
// The roster (worker existence + city/state/role/blacklist) loads
|
|
// from a JSONL file at startup. Empty path = no roster, worker-existence
|
|
// checks fail Consistency. Production points this at a roster that's
|
|
// regenerated from workers_500k.parquet on a schedule.
|
|
package main
|
|
|
|
import (
|
|
"bytes"
|
|
"context"
|
|
"encoding/json"
|
|
"errors"
|
|
"flag"
|
|
"fmt"
|
|
"io"
|
|
"log/slog"
|
|
"net/http"
|
|
"os"
|
|
"time"
|
|
|
|
"github.com/go-chi/chi/v5"
|
|
|
|
"git.agentview.dev/profit/golangLAKEHOUSE/internal/langfuse"
|
|
"git.agentview.dev/profit/golangLAKEHOUSE/internal/shared"
|
|
"git.agentview.dev/profit/golangLAKEHOUSE/internal/validator"
|
|
)
|
|
|
|
const maxRequestBytes = 4 << 20 // 4 MiB
|
|
|
|
func main() {
|
|
configPath := flag.String("config", "lakehouse.toml", "path to TOML config")
|
|
flag.Parse()
|
|
|
|
cfg, err := shared.LoadConfig(*configPath)
|
|
if err != nil {
|
|
slog.Error("config", "err", err)
|
|
os.Exit(1)
|
|
}
|
|
|
|
lookup, err := validator.LoadJSONLRoster(cfg.Validatord.RosterPath)
|
|
if err != nil {
|
|
slog.Error("roster load", "path", cfg.Validatord.RosterPath, "err", err)
|
|
os.Exit(1)
|
|
}
|
|
slog.Info("validatord roster",
|
|
"path", cfg.Validatord.RosterPath,
|
|
"records", lookup.Len(),
|
|
)
|
|
|
|
chatTimeout := time.Duration(cfg.Validatord.ChatTimeoutSecs) * time.Second
|
|
if chatTimeout <= 0 {
|
|
chatTimeout = 240 * time.Second
|
|
}
|
|
|
|
h := &handlers{
|
|
lookup: lookup,
|
|
chatdURL: cfg.Validatord.ChatdURL,
|
|
chatClient: &http.Client{Timeout: chatTimeout},
|
|
iterCfg: validator.IterateConfig{
|
|
DefaultMaxIterations: cfg.Validatord.DefaultMaxIterations,
|
|
DefaultMaxTokens: cfg.Validatord.DefaultMaxTokens,
|
|
},
|
|
// Same env loader the middleware uses — getting our own
|
|
// reference lets us emit child spans of the per-request trace.
|
|
// nil when Langfuse env isn't set; the iterate handler skips
|
|
// span emission gracefully in that case.
|
|
lf: shared.LoadLangfuseFromEnv(),
|
|
sessionLog: validator.NewSessionLogger(cfg.Validatord.SessionLogPath),
|
|
}
|
|
if h.sessionLog != nil {
|
|
slog.Info("validatord session log", "path", cfg.Validatord.SessionLogPath)
|
|
}
|
|
|
|
if err := shared.Run("validatord", cfg.Validatord.Bind, h.register, cfg.Auth); err != nil {
|
|
slog.Error("server", "err", err)
|
|
os.Exit(1)
|
|
}
|
|
}
|
|
|
|
type handlers struct {
|
|
lookup validator.WorkerLookup
|
|
chatdURL string
|
|
chatClient *http.Client
|
|
iterCfg validator.IterateConfig
|
|
// lf is the Langfuse client (nil when unconfigured — best-effort
|
|
// posture matching the rest of the stack). Per-attempt iteration
|
|
// spans get emitted via lf when set. The HTTP middleware that
|
|
// creates the parent trace lives in shared.Run; this client lets
|
|
// validatord's handler emit application-level child spans.
|
|
lf *langfuse.Client
|
|
// sessionLog appends one SessionRecord per /v1/iterate session to
|
|
// `coordinator_sessions.jsonl` for offline DuckDB analysis. nil
|
|
// when the session log is unconfigured (best-effort posture).
|
|
sessionLog *validator.SessionLogger
|
|
}
|
|
|
|
func (h *handlers) register(r chi.Router) {
|
|
r.Post("/validate", h.handleValidate)
|
|
r.Post("/iterate", h.handleIterate)
|
|
}
|
|
|
|
// validateRequest is the request body for POST /validate. Mirrors
|
|
// Rust's ValidateRequest in `crates/gateway/src/v1/validate.rs`.
|
|
type validateRequest struct {
|
|
Kind string `json:"kind"` // "fill" | "email" | "playbook"
|
|
Artifact map[string]any `json:"artifact"`
|
|
Context map[string]any `json:"context,omitempty"`
|
|
}
|
|
|
|
func (h *handlers) handleValidate(w http.ResponseWriter, r *http.Request) {
|
|
r.Body = http.MaxBytesReader(w, r.Body, maxRequestBytes)
|
|
defer r.Body.Close()
|
|
|
|
var req validateRequest
|
|
if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
|
|
http.Error(w, "invalid JSON: "+err.Error(), http.StatusBadRequest)
|
|
return
|
|
}
|
|
if req.Kind == "" {
|
|
http.Error(w, "kind is required", http.StatusBadRequest)
|
|
return
|
|
}
|
|
if req.Artifact == nil {
|
|
http.Error(w, "artifact is required", http.StatusBadRequest)
|
|
return
|
|
}
|
|
|
|
report, vErr, kindErr := h.runValidator(req.Kind, req.Artifact, req.Context)
|
|
switch {
|
|
case kindErr != nil:
|
|
http.Error(w, kindErr.Error(), http.StatusBadRequest)
|
|
case vErr != nil:
|
|
writeJSON(w, http.StatusUnprocessableEntity, vErr)
|
|
default:
|
|
writeJSON(w, http.StatusOK, report)
|
|
}
|
|
}
|
|
|
|
// runValidator dispatches by kind. Returns (Report, ValidationError, kindErr).
|
|
// kindErr is non-nil only for unknown kind strings (400).
|
|
func (h *handlers) runValidator(kind string, artifact, ctx map[string]any) (*validator.Report, *validator.ValidationError, error) {
|
|
merged := mergeContext(artifact, ctx)
|
|
a, kindErr := buildArtifact(kind, merged)
|
|
if kindErr != nil {
|
|
return nil, nil, kindErr
|
|
}
|
|
v, vErr := pickValidator(kind, h.lookup)
|
|
if vErr != nil {
|
|
return nil, nil, vErr
|
|
}
|
|
report, err := v.Validate(a)
|
|
if err != nil {
|
|
var ve *validator.ValidationError
|
|
if errors.As(err, &ve) {
|
|
return nil, ve, nil
|
|
}
|
|
// Validators only ever return ValidationError; an "any other
|
|
// error" path means the validator violated its own contract.
|
|
// Surface as 500 rather than silently coercing.
|
|
return nil, &validator.ValidationError{
|
|
Kind: validator.ErrSchema,
|
|
Reason: "internal validator error: " + err.Error(),
|
|
}, nil
|
|
}
|
|
return &report, nil, nil
|
|
}
|
|
|
|
// buildArtifact maps the kind string to the right Artifact union arm.
|
|
// Unknown kinds return a 400-friendly error.
|
|
func buildArtifact(kind string, body map[string]any) (validator.Artifact, error) {
|
|
switch kind {
|
|
case "fill":
|
|
return validator.Artifact{FillProposal: body}, nil
|
|
case "email":
|
|
return validator.Artifact{EmailDraft: body}, nil
|
|
case "playbook":
|
|
return validator.Artifact{Playbook: body}, nil
|
|
default:
|
|
return validator.Artifact{}, fmt.Errorf("unknown kind %q — expected fill | email | playbook", kind)
|
|
}
|
|
}
|
|
|
|
func pickValidator(kind string, lookup validator.WorkerLookup) (validator.Validator, error) {
|
|
switch kind {
|
|
case "fill":
|
|
return validator.NewFillValidator(lookup), nil
|
|
case "email":
|
|
return validator.NewEmailValidator(lookup), nil
|
|
case "playbook":
|
|
return validator.PlaybookValidator{}, nil
|
|
default:
|
|
return nil, fmt.Errorf("unknown kind %q", kind)
|
|
}
|
|
}
|
|
|
|
// mergeContext folds `context` into `artifact._context` so validators
|
|
// pull contract metadata uniformly. Caller-supplied artifact._context
|
|
// wins on key collision (caller knows their own contract).
|
|
func mergeContext(artifact, ctx map[string]any) map[string]any {
|
|
if ctx == nil {
|
|
return artifact
|
|
}
|
|
out := make(map[string]any, len(artifact)+1)
|
|
for k, v := range artifact {
|
|
out[k] = v
|
|
}
|
|
existing, _ := out["_context"].(map[string]any)
|
|
merged := make(map[string]any, len(ctx)+len(existing))
|
|
for k, v := range ctx {
|
|
merged[k] = v
|
|
}
|
|
for k, v := range existing {
|
|
merged[k] = v // existing wins
|
|
}
|
|
out["_context"] = merged
|
|
return out
|
|
}
|
|
|
|
func (h *handlers) handleIterate(w http.ResponseWriter, r *http.Request) {
|
|
r.Body = http.MaxBytesReader(w, r.Body, maxRequestBytes)
|
|
defer r.Body.Close()
|
|
|
|
var req validator.IterateRequest
|
|
if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
|
|
http.Error(w, "invalid JSON: "+err.Error(), http.StatusBadRequest)
|
|
return
|
|
}
|
|
if req.Kind == "" || req.Prompt == "" || req.Provider == "" || req.Model == "" {
|
|
http.Error(w, "kind, prompt, provider, and model are required", http.StatusBadRequest)
|
|
return
|
|
}
|
|
|
|
// Pull the per-request trace id from the langfuse middleware. If
|
|
// the caller forwarded an upstream trace via X-Lakehouse-Trace-Id
|
|
// the middleware reuses that one; otherwise it minted a fresh trace
|
|
// at HTTP entry. Either way, we propagate it so chat hops nest
|
|
// under the same parent and operators can pivot from the iterate
|
|
// response's trace_id straight into the full Langfuse tree.
|
|
if req.TraceID == "" {
|
|
req.TraceID = shared.TraceIDFromCtx(r.Context())
|
|
}
|
|
|
|
chat := h.chatCaller()
|
|
validate := func(kind string, artifact map[string]any) (validator.Report, error) {
|
|
report, vErr, kindErr := h.runValidator(kind, artifact, req.Context)
|
|
if kindErr != nil {
|
|
return validator.Report{}, &validator.ValidationError{
|
|
Kind: validator.ErrSchema,
|
|
Reason: kindErr.Error(),
|
|
}
|
|
}
|
|
if vErr != nil {
|
|
return validator.Report{}, vErr
|
|
}
|
|
return *report, nil
|
|
}
|
|
|
|
// Wire per-attempt span emission when Langfuse is configured —
|
|
// each chat→validate cycle becomes a child span of the iterate
|
|
// trace, with prompt/raw/verdict captured for inspection.
|
|
cfg := h.iterCfg
|
|
if h.lf != nil {
|
|
cfg.Tracer = h.iterTracer(r.Context())
|
|
}
|
|
|
|
t0 := time.Now()
|
|
resp, fail, err := validator.Iterate(r.Context(), req, cfg, chat, validate)
|
|
durationMs := time.Since(t0).Milliseconds()
|
|
|
|
// Emit one session row regardless of success/failure/infra-error.
|
|
// Best-effort: a failing session log never blocks the response.
|
|
rosterCheck := h.rosterCheckFor(req.Kind)
|
|
rec := validator.BuildSessionRecord(req, resp, fail, err, rosterCheck, durationMs)
|
|
h.sessionLog.Append(rec)
|
|
|
|
if err != nil {
|
|
http.Error(w, err.Error(), http.StatusBadGateway)
|
|
return
|
|
}
|
|
if fail != nil {
|
|
writeJSON(w, http.StatusUnprocessableEntity, fail)
|
|
return
|
|
}
|
|
writeJSON(w, http.StatusOK, resp)
|
|
}
|
|
|
|
// rosterCheckFor returns a closure that verifies an accepted artifact's
|
|
// candidate IDs all exist in the roster. Used by BuildSessionRecord to
|
|
// stamp `grounded_in_roster` on each session row — answers the "did
|
|
// the model produce real workers, not phantoms?" question at offline-
|
|
// query time.
|
|
//
|
|
// Only the `fill` kind has worker IDs to verify. Other kinds return
|
|
// nil (omits the field from the session record).
|
|
//
|
|
// Note: FillValidator already enforces this constraint on every
|
|
// successful iteration (phantom IDs raise ValidationError::Consistency).
|
|
// Stamping the bool here surfaces the verified-grounded property as
|
|
// queryable data, not relying on validator semantics being recalled
|
|
// during analysis. A future scrum may want to query "show me sessions
|
|
// where the model guessed real worker IDs without ever needing a
|
|
// retry" — that's a `iterations=1 AND grounded_in_roster=true` query.
|
|
func (h *handlers) rosterCheckFor(kind string) func(map[string]any) *bool {
|
|
if kind != "fill" {
|
|
return nil
|
|
}
|
|
return func(artifact map[string]any) *bool {
|
|
fills, ok := artifact["fills"].([]any)
|
|
if !ok {
|
|
f := false
|
|
return &f
|
|
}
|
|
for _, raw := range fills {
|
|
fill, ok := raw.(map[string]any)
|
|
if !ok {
|
|
f := false
|
|
return &f
|
|
}
|
|
id, _ := fill["candidate_id"].(string)
|
|
if id == "" {
|
|
f := false
|
|
return &f
|
|
}
|
|
if _, found := h.lookup.Find(id); !found {
|
|
f := false
|
|
return &f
|
|
}
|
|
}
|
|
t := true
|
|
return &t
|
|
}
|
|
}
|
|
|
|
// chatCaller wires the iteration loop to chatd via HTTP. Builds the
|
|
// chat.Request shape, posts to ${chatdURL}/chat, returns the content
|
|
// string (no choices wrapper — chatd's response is already flat).
|
|
//
|
|
// When traceID is non-empty, forwards it as the `X-Lakehouse-Trace-Id`
|
|
// header so chatd's middleware attaches its trace to the same
|
|
// Langfuse parent. Closes the multi-call observability gap: a
|
|
// /v1/iterate session shows in Langfuse as one trace with N child
|
|
// chat spans, instead of N+1 disconnected traces.
|
|
func (h *handlers) chatCaller() validator.ChatCaller {
|
|
return func(ctx context.Context, system, user, _, model string, temp *float64, maxTokens int, traceID string) (string, error) {
|
|
messages := make([]map[string]string, 0, 2)
|
|
if system != "" {
|
|
messages = append(messages, map[string]string{"role": "system", "content": system})
|
|
}
|
|
messages = append(messages, map[string]string{"role": "user", "content": user})
|
|
body := map[string]any{
|
|
"model": model,
|
|
"messages": messages,
|
|
"max_tokens": maxTokens,
|
|
}
|
|
if temp != nil {
|
|
body["temperature"] = *temp
|
|
}
|
|
buf, err := json.Marshal(body)
|
|
if err != nil {
|
|
return "", fmt.Errorf("marshal chat req: %w", err)
|
|
}
|
|
req, err := http.NewRequestWithContext(ctx, "POST", h.chatdURL+"/chat", bytes.NewReader(buf))
|
|
if err != nil {
|
|
return "", fmt.Errorf("build chat req: %w", err)
|
|
}
|
|
req.Header.Set("Content-Type", "application/json")
|
|
if traceID != "" {
|
|
req.Header.Set(shared.TraceIDHeader, traceID)
|
|
}
|
|
resp, err := h.chatClient.Do(req)
|
|
if err != nil {
|
|
return "", fmt.Errorf("chat hop: %w", err)
|
|
}
|
|
defer resp.Body.Close()
|
|
raw, _ := io.ReadAll(resp.Body)
|
|
if resp.StatusCode >= 400 {
|
|
return "", fmt.Errorf("chat %d: %s", resp.StatusCode, trim(string(raw), 300))
|
|
}
|
|
var parsed struct {
|
|
Content string `json:"content"`
|
|
}
|
|
if err := json.Unmarshal(raw, &parsed); err != nil {
|
|
return "", fmt.Errorf("parse chat resp: %w", err)
|
|
}
|
|
return parsed.Content, nil
|
|
}
|
|
}
|
|
|
|
// iterTracer adapts validator.Tracer → langfuse.Client.Span. Each
|
|
// iteration attempt becomes one Langfuse span on the parent trace
|
|
// with input={prompt, model, provider}, output={raw, verdict}.
|
|
// Operators reading Langfuse can scroll the iterate session and see
|
|
// the full retry chain — which prompt produced which raw, which
|
|
// validator verdict landed, which retry the model recovered on.
|
|
//
|
|
// Returns the span id so it lands in IterateAttempt.SpanID — the
|
|
// /v1/iterate response carries that id back to the caller for log
|
|
// correlation.
|
|
//
|
|
// Errors are silent (best-effort posture per package langfuse). A
|
|
// dropped span never blocks the iterate loop.
|
|
func (h *handlers) iterTracer(ctx context.Context) validator.Tracer {
|
|
return func(s validator.AttemptSpan) string {
|
|
level := "DEFAULT"
|
|
if s.Verdict.Kind != "accepted" {
|
|
level = "WARNING"
|
|
}
|
|
return h.lf.Span(ctx, langfuse.SpanInput{
|
|
TraceID: s.TraceID,
|
|
Name: fmt.Sprintf("iterate.attempt[%d]", s.Iteration),
|
|
Input: map[string]any{
|
|
"iteration": s.Iteration,
|
|
"model": s.Model,
|
|
"provider": s.Provider,
|
|
"prompt": trim(s.Prompt, 4000),
|
|
},
|
|
Output: map[string]any{
|
|
"verdict": s.Verdict.Kind,
|
|
"error": s.Verdict.Error,
|
|
"raw": trim(s.Raw, 4000),
|
|
},
|
|
Metadata: map[string]any{
|
|
"verdict_error": s.Verdict.Error,
|
|
},
|
|
StartTime: s.StartTime,
|
|
EndTime: s.EndTime,
|
|
Level: level,
|
|
})
|
|
}
|
|
}
|
|
|
|
func writeJSON(w http.ResponseWriter, status int, body any) {
|
|
w.Header().Set("Content-Type", "application/json")
|
|
w.WriteHeader(status)
|
|
if err := json.NewEncoder(w).Encode(body); err != nil {
|
|
slog.Error("encode", "err", err)
|
|
}
|
|
}
|
|
|
|
func trim(s string, n int) string {
|
|
if len(s) <= n {
|
|
return s
|
|
}
|
|
return s[:n]
|
|
}
|