Closes the last "Go primary" backlog item in docs/ARCHITECTURE_COMPARISON.md. Go now owns the entire validator path end-to-end — no Rust dep for staffing safety net. Architecture: cmd/validatord on :3221 hosts both endpoints. Calls chatd directly for the iterate loop's LLM hop (no gateway self-loopback like the Rust shape). Gateway proxies /v1/validate + /v1/iterate to validatord. What's in: - internal/validator/playbook.go — 3rd validator kind (PRD checks: fill: prefix, endorsed_names ≤ target_count×2, fingerprint required) - internal/validator/lookup_jsonl.go — JSONL roster loader (Parquet deferred; producer one-liner documented in package comment) - internal/validator/iterate.go — ExtractJSON helper + Iterate orchestrator with ChatCaller seam for unit tests - cmd/validatord/main.go — HTTP routes, roster load, chat client - internal/shared/config.go — ValidatordConfig + gateway URL field - lakehouse.toml — [validatord] section - cmd/gateway/main.go — proxy routes for /v1/validate + /v1/iterate Smoke: 5/5 PASS through gateway :3110: ✓ playbook happy path ✓ playbook missing fingerprint → 422 schema/fingerprint ✓ phantom candidate W-PHANTOM → 422 consistency ✓ unknown kind → 400 ✓ roster loaded with 3 records go test ./... green across 33 packages. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
238 lines
6.8 KiB
Go
238 lines
6.8 KiB
Go
package validator
|
|
|
|
import (
|
|
"context"
|
|
"encoding/json"
|
|
"fmt"
|
|
"strings"
|
|
)
|
|
|
|
// IterateRequest is the input to Iterate. Mirrors Rust's
|
|
// IterateRequest in `crates/gateway/src/v1/iterate.rs` so JSONL
|
|
// captured from one runtime parses on the other.
|
|
type IterateRequest struct {
|
|
Kind string `json:"kind"`
|
|
Prompt string `json:"prompt"`
|
|
Provider string `json:"provider"`
|
|
Model string `json:"model"`
|
|
System string `json:"system,omitempty"`
|
|
Context map[string]any `json:"context,omitempty"`
|
|
MaxIterations int `json:"max_iterations,omitempty"`
|
|
Temperature *float64 `json:"temperature,omitempty"`
|
|
MaxTokens int `json:"max_tokens,omitempty"`
|
|
}
|
|
|
|
// IterateAttempt is one row in the history. raw is capped at 2000
|
|
// chars on the wire to keep responses bounded.
|
|
type IterateAttempt struct {
|
|
Iteration int `json:"iteration"`
|
|
Raw string `json:"raw"`
|
|
Status AttemptStatus `json:"status"`
|
|
}
|
|
|
|
// AttemptStatus is the per-attempt verdict. Tagged JSON so consumers
|
|
// can switch on `kind` without trying to parse the optional error.
|
|
type AttemptStatus struct {
|
|
Kind string `json:"kind"` // "no_json" | "validation_failed" | "accepted"
|
|
Error string `json:"error,omitempty"`
|
|
}
|
|
|
|
// IterateResponse is the success payload (200 + Report + accepted artifact).
|
|
type IterateResponse struct {
|
|
Artifact map[string]any `json:"artifact"`
|
|
Validation Report `json:"validation"`
|
|
Iterations int `json:"iterations"`
|
|
History []IterateAttempt `json:"history"`
|
|
}
|
|
|
|
// IterateFailure is the max-iter-exhausted payload (422 + history).
|
|
type IterateFailure struct {
|
|
Error string `json:"error"`
|
|
Iterations int `json:"iterations"`
|
|
History []IterateAttempt `json:"history"`
|
|
}
|
|
|
|
// ChatCaller is the seam Iterate uses to invoke an LLM. Tests inject
|
|
// scripted callers; production wires this to the chatd /v1/chat HTTP
|
|
// endpoint. Implementations must return the model's textual content
|
|
// (no choices wrapper, no message envelope).
|
|
type ChatCaller func(ctx context.Context, system, user, provider, model string, temperature *float64, maxTokens int) (string, error)
|
|
|
|
// IterateConfig threads daemon-level settings into the orchestrator.
|
|
type IterateConfig struct {
|
|
DefaultMaxIterations int
|
|
DefaultMaxTokens int
|
|
DefaultTemperature float64
|
|
}
|
|
|
|
const (
|
|
defaultMaxIterations = 3
|
|
defaultMaxTokens = 4096
|
|
defaultTemperature = 0.2
|
|
)
|
|
|
|
// Iterate runs the generate→validate→correct loop. Returns
|
|
// IterateResponse on success (with full history) or IterateFailure
|
|
// on max-iter exhaustion. Infrastructure errors (chat hop fails)
|
|
// surface as Go errors so the HTTP layer can return 502.
|
|
func Iterate(ctx context.Context, req IterateRequest, cfg IterateConfig, chat ChatCaller, validate func(string, map[string]any) (Report, error)) (*IterateResponse, *IterateFailure, error) {
|
|
maxIter := req.MaxIterations
|
|
if maxIter <= 0 {
|
|
maxIter = cfg.DefaultMaxIterations
|
|
}
|
|
if maxIter <= 0 {
|
|
maxIter = defaultMaxIterations
|
|
}
|
|
maxTokens := req.MaxTokens
|
|
if maxTokens <= 0 {
|
|
maxTokens = cfg.DefaultMaxTokens
|
|
}
|
|
if maxTokens <= 0 {
|
|
maxTokens = defaultMaxTokens
|
|
}
|
|
temp := req.Temperature
|
|
if temp == nil {
|
|
t := cfg.DefaultTemperature
|
|
if t == 0 {
|
|
t = defaultTemperature
|
|
}
|
|
temp = &t
|
|
}
|
|
|
|
currentPrompt := req.Prompt
|
|
history := make([]IterateAttempt, 0, maxIter)
|
|
|
|
for i := 0; i < maxIter; i++ {
|
|
raw, err := chat(ctx, req.System, currentPrompt, req.Provider, req.Model, temp, maxTokens)
|
|
if err != nil {
|
|
return nil, nil, fmt.Errorf("/v1/chat hop failed at iter %d: %w", i, err)
|
|
}
|
|
|
|
artifact := ExtractJSON(raw)
|
|
if artifact == nil {
|
|
history = append(history, IterateAttempt{
|
|
Iteration: i,
|
|
Raw: trim(raw, 2000),
|
|
Status: AttemptStatus{Kind: "no_json"},
|
|
})
|
|
currentPrompt = req.Prompt + "\n\nYour previous attempt did not contain a JSON object. Reply with ONLY a valid JSON object matching the requested artifact shape."
|
|
continue
|
|
}
|
|
|
|
report, vErr := validate(req.Kind, artifact)
|
|
if vErr == nil {
|
|
history = append(history, IterateAttempt{
|
|
Iteration: i,
|
|
Raw: trim(raw, 2000),
|
|
Status: AttemptStatus{Kind: "accepted"},
|
|
})
|
|
return &IterateResponse{
|
|
Artifact: artifact,
|
|
Validation: report,
|
|
Iterations: i + 1,
|
|
History: history,
|
|
}, nil, nil
|
|
}
|
|
|
|
// Validation failed — append error to prompt for next iter.
|
|
// The model sees concrete failure mode + retries with corrective
|
|
// context. Same "validator IS the observer" shape as Phase 43.
|
|
errSummary := vErr.Error()
|
|
history = append(history, IterateAttempt{
|
|
Iteration: i,
|
|
Raw: trim(raw, 2000),
|
|
Status: AttemptStatus{Kind: "validation_failed", Error: errSummary},
|
|
})
|
|
currentPrompt = req.Prompt + "\n\nPrior attempt failed validation:\n" + errSummary + "\n\nFix the specific issue above and respond with a corrected JSON object."
|
|
}
|
|
|
|
return nil, &IterateFailure{
|
|
Error: fmt.Sprintf("max iterations reached (%d) without passing validation", maxIter),
|
|
Iterations: maxIter,
|
|
History: history,
|
|
}, nil
|
|
}
|
|
|
|
// ExtractJSON pulls the first JSON object from a model's output.
|
|
// Handles fenced code blocks (```json ... ```), bare braces, and
|
|
// stray prose around the JSON. Returns nil on no extractable object.
|
|
//
|
|
// Same algorithm shape as Rust's extract_json so a model producing
|
|
// output that one runtime accepts will be accepted by the other.
|
|
func ExtractJSON(raw string) map[string]any {
|
|
// Try fenced first.
|
|
for _, c := range fencedCandidates(raw) {
|
|
if v, ok := parseObject(c); ok {
|
|
return v
|
|
}
|
|
}
|
|
// Fall back to outermost {...} balance.
|
|
bytes := []byte(raw)
|
|
depth := 0
|
|
start := -1
|
|
for i, b := range bytes {
|
|
switch b {
|
|
case '{':
|
|
if start < 0 {
|
|
start = i
|
|
}
|
|
depth++
|
|
case '}':
|
|
depth--
|
|
if depth == 0 && start >= 0 {
|
|
if v, ok := parseObject(raw[start : i+1]); ok {
|
|
return v
|
|
}
|
|
start = -1
|
|
}
|
|
}
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// fencedCandidates returns the bodies of every ``` fenced block in
|
|
// `raw`. Skips an optional language tag on the opening fence (e.g.
|
|
// ```json).
|
|
func fencedCandidates(raw string) []string {
|
|
var out []string
|
|
s := raw
|
|
for {
|
|
idx := strings.Index(s, "```")
|
|
if idx < 0 {
|
|
break
|
|
}
|
|
after := s[idx+3:]
|
|
// Skip optional language tag up to the first newline.
|
|
bodyStart := strings.Index(after, "\n")
|
|
if bodyStart < 0 {
|
|
bodyStart = 0
|
|
} else {
|
|
bodyStart++
|
|
}
|
|
body := after[bodyStart:]
|
|
end := strings.Index(body, "```")
|
|
if end < 0 {
|
|
break
|
|
}
|
|
out = append(out, strings.TrimSpace(body[:end]))
|
|
s = body[end+3:]
|
|
}
|
|
return out
|
|
}
|
|
|
|
func parseObject(s string) (map[string]any, bool) {
|
|
var v any
|
|
if err := json.Unmarshal([]byte(s), &v); err != nil {
|
|
return nil, false
|
|
}
|
|
obj, ok := v.(map[string]any)
|
|
return obj, ok
|
|
}
|
|
|
|
func trim(s string, n int) string {
|
|
if len(s) <= n {
|
|
return s
|
|
}
|
|
return s[:n]
|
|
}
|