root f9e72412c1 validatord: /v1/validate + /v1/iterate HTTP surface (port 3221)
Closes the last "Go primary" backlog item in
docs/ARCHITECTURE_COMPARISON.md. Go now owns the entire validator
path end-to-end — no Rust dep for staffing safety net.

Architecture: cmd/validatord on :3221 hosts both endpoints. Calls
chatd directly for the iterate loop's LLM hop (no gateway
self-loopback like the Rust shape). Gateway proxies /v1/validate +
/v1/iterate to validatord.

What's in:
- internal/validator/playbook.go — 3rd validator kind (PRD checks:
  fill: prefix, endorsed_names ≤ target_count×2, fingerprint required)
- internal/validator/lookup_jsonl.go — JSONL roster loader (Parquet
  deferred; producer one-liner documented in package comment)
- internal/validator/iterate.go — ExtractJSON helper + Iterate
  orchestrator with ChatCaller seam for unit tests
- cmd/validatord/main.go — HTTP routes, roster load, chat client
- internal/shared/config.go — ValidatordConfig + gateway URL field
- lakehouse.toml — [validatord] section
- cmd/gateway/main.go — proxy routes for /v1/validate + /v1/iterate

Smoke: 5/5 PASS through gateway :3110:
  ✓ playbook happy path
  ✓ playbook missing fingerprint → 422 schema/fingerprint
  ✓ phantom candidate W-PHANTOM → 422 consistency
  ✓ unknown kind → 400
  ✓ roster loaded with 3 records

go test ./... green across 33 packages.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-02 03:53:20 -05:00

238 lines
6.8 KiB
Go

package validator
import (
"context"
"encoding/json"
"fmt"
"strings"
)
// IterateRequest is the input to Iterate. Mirrors Rust's
// IterateRequest in `crates/gateway/src/v1/iterate.rs` so JSONL
// captured from one runtime parses on the other.
type IterateRequest struct {
Kind string `json:"kind"`
Prompt string `json:"prompt"`
Provider string `json:"provider"`
Model string `json:"model"`
System string `json:"system,omitempty"`
Context map[string]any `json:"context,omitempty"`
MaxIterations int `json:"max_iterations,omitempty"`
Temperature *float64 `json:"temperature,omitempty"`
MaxTokens int `json:"max_tokens,omitempty"`
}
// IterateAttempt is one row in the history. raw is capped at 2000
// chars on the wire to keep responses bounded.
type IterateAttempt struct {
Iteration int `json:"iteration"`
Raw string `json:"raw"`
Status AttemptStatus `json:"status"`
}
// AttemptStatus is the per-attempt verdict. Tagged JSON so consumers
// can switch on `kind` without trying to parse the optional error.
type AttemptStatus struct {
Kind string `json:"kind"` // "no_json" | "validation_failed" | "accepted"
Error string `json:"error,omitempty"`
}
// IterateResponse is the success payload (200 + Report + accepted artifact).
type IterateResponse struct {
Artifact map[string]any `json:"artifact"`
Validation Report `json:"validation"`
Iterations int `json:"iterations"`
History []IterateAttempt `json:"history"`
}
// IterateFailure is the max-iter-exhausted payload (422 + history).
type IterateFailure struct {
Error string `json:"error"`
Iterations int `json:"iterations"`
History []IterateAttempt `json:"history"`
}
// ChatCaller is the seam Iterate uses to invoke an LLM. Tests inject
// scripted callers; production wires this to the chatd /v1/chat HTTP
// endpoint. Implementations must return the model's textual content
// (no choices wrapper, no message envelope).
type ChatCaller func(ctx context.Context, system, user, provider, model string, temperature *float64, maxTokens int) (string, error)
// IterateConfig threads daemon-level settings into the orchestrator.
type IterateConfig struct {
DefaultMaxIterations int
DefaultMaxTokens int
DefaultTemperature float64
}
const (
defaultMaxIterations = 3
defaultMaxTokens = 4096
defaultTemperature = 0.2
)
// Iterate runs the generate→validate→correct loop. Returns
// IterateResponse on success (with full history) or IterateFailure
// on max-iter exhaustion. Infrastructure errors (chat hop fails)
// surface as Go errors so the HTTP layer can return 502.
func Iterate(ctx context.Context, req IterateRequest, cfg IterateConfig, chat ChatCaller, validate func(string, map[string]any) (Report, error)) (*IterateResponse, *IterateFailure, error) {
maxIter := req.MaxIterations
if maxIter <= 0 {
maxIter = cfg.DefaultMaxIterations
}
if maxIter <= 0 {
maxIter = defaultMaxIterations
}
maxTokens := req.MaxTokens
if maxTokens <= 0 {
maxTokens = cfg.DefaultMaxTokens
}
if maxTokens <= 0 {
maxTokens = defaultMaxTokens
}
temp := req.Temperature
if temp == nil {
t := cfg.DefaultTemperature
if t == 0 {
t = defaultTemperature
}
temp = &t
}
currentPrompt := req.Prompt
history := make([]IterateAttempt, 0, maxIter)
for i := 0; i < maxIter; i++ {
raw, err := chat(ctx, req.System, currentPrompt, req.Provider, req.Model, temp, maxTokens)
if err != nil {
return nil, nil, fmt.Errorf("/v1/chat hop failed at iter %d: %w", i, err)
}
artifact := ExtractJSON(raw)
if artifact == nil {
history = append(history, IterateAttempt{
Iteration: i,
Raw: trim(raw, 2000),
Status: AttemptStatus{Kind: "no_json"},
})
currentPrompt = req.Prompt + "\n\nYour previous attempt did not contain a JSON object. Reply with ONLY a valid JSON object matching the requested artifact shape."
continue
}
report, vErr := validate(req.Kind, artifact)
if vErr == nil {
history = append(history, IterateAttempt{
Iteration: i,
Raw: trim(raw, 2000),
Status: AttemptStatus{Kind: "accepted"},
})
return &IterateResponse{
Artifact: artifact,
Validation: report,
Iterations: i + 1,
History: history,
}, nil, nil
}
// Validation failed — append error to prompt for next iter.
// The model sees concrete failure mode + retries with corrective
// context. Same "validator IS the observer" shape as Phase 43.
errSummary := vErr.Error()
history = append(history, IterateAttempt{
Iteration: i,
Raw: trim(raw, 2000),
Status: AttemptStatus{Kind: "validation_failed", Error: errSummary},
})
currentPrompt = req.Prompt + "\n\nPrior attempt failed validation:\n" + errSummary + "\n\nFix the specific issue above and respond with a corrected JSON object."
}
return nil, &IterateFailure{
Error: fmt.Sprintf("max iterations reached (%d) without passing validation", maxIter),
Iterations: maxIter,
History: history,
}, nil
}
// ExtractJSON pulls the first JSON object from a model's output.
// Handles fenced code blocks (```json ... ```), bare braces, and
// stray prose around the JSON. Returns nil on no extractable object.
//
// Same algorithm shape as Rust's extract_json so a model producing
// output that one runtime accepts will be accepted by the other.
func ExtractJSON(raw string) map[string]any {
// Try fenced first.
for _, c := range fencedCandidates(raw) {
if v, ok := parseObject(c); ok {
return v
}
}
// Fall back to outermost {...} balance.
bytes := []byte(raw)
depth := 0
start := -1
for i, b := range bytes {
switch b {
case '{':
if start < 0 {
start = i
}
depth++
case '}':
depth--
if depth == 0 && start >= 0 {
if v, ok := parseObject(raw[start : i+1]); ok {
return v
}
start = -1
}
}
}
return nil
}
// fencedCandidates returns the bodies of every ``` fenced block in
// `raw`. Skips an optional language tag on the opening fence (e.g.
// ```json).
func fencedCandidates(raw string) []string {
var out []string
s := raw
for {
idx := strings.Index(s, "```")
if idx < 0 {
break
}
after := s[idx+3:]
// Skip optional language tag up to the first newline.
bodyStart := strings.Index(after, "\n")
if bodyStart < 0 {
bodyStart = 0
} else {
bodyStart++
}
body := after[bodyStart:]
end := strings.Index(body, "```")
if end < 0 {
break
}
out = append(out, strings.TrimSpace(body[:end]))
s = body[end+3:]
}
return out
}
func parseObject(s string) (map[string]any, bool) {
var v any
if err := json.Unmarshal([]byte(s), &v); err != nil {
return nil, false
}
obj, ok := v.(map[string]any)
return obj, ok
}
func trim(s string, n int) string {
if len(s) <= n {
return s
}
return s[:n]
}