golangLAKEHOUSE/internal/validator/iterate.go

package validator

import (
	"context"
	"encoding/json"
	"fmt"
	"strings"
)

// IterateRequest is the input to Iterate. Mirrors Rust's
// IterateRequest in `crates/gateway/src/v1/iterate.rs` so JSONL
// captured from one runtime parses on the other.
type IterateRequest struct {
	Kind          string         `json:"kind"`
	Prompt        string         `json:"prompt"`
	Provider      string         `json:"provider"`
	Model         string         `json:"model"`
	System        string         `json:"system,omitempty"`
	Context       map[string]any `json:"context,omitempty"`
	MaxIterations int            `json:"max_iterations,omitempty"`
	Temperature   *float64       `json:"temperature,omitempty"`
	MaxTokens     int            `json:"max_tokens,omitempty"`
}

// IterateAttempt is one row in the history. raw is capped at 2000
// chars on the wire to keep responses bounded.
type IterateAttempt struct {
	Iteration int            `json:"iteration"`
	Raw       string         `json:"raw"`
	Status    AttemptStatus  `json:"status"`
}

// AttemptStatus is the per-attempt verdict. Tagged JSON so consumers
// can switch on `kind` without trying to parse the optional error.
type AttemptStatus struct {
	Kind  string `json:"kind"` // "no_json" | "validation_failed" | "accepted"
	Error string `json:"error,omitempty"`
}

// IterateResponse is the success payload (200 + Report + accepted artifact).
type IterateResponse struct {
	Artifact   map[string]any   `json:"artifact"`
	Validation Report           `json:"validation"`
	Iterations int              `json:"iterations"`
	History    []IterateAttempt `json:"history"`
}

// IterateFailure is the max-iter-exhausted payload (422 + history).
type IterateFailure struct {
	Error      string           `json:"error"`
	Iterations int              `json:"iterations"`
	History    []IterateAttempt `json:"history"`
}

// ChatCaller is the seam Iterate uses to invoke an LLM. Tests inject
// scripted callers; production wires this to the chatd /v1/chat HTTP
// endpoint. Implementations must return the model's textual content
// (no choices wrapper, no message envelope).
type ChatCaller func(ctx context.Context, system, user, provider, model string, temperature *float64, maxTokens int) (string, error)

// IterateConfig threads daemon-level settings into the orchestrator.
type IterateConfig struct {
	DefaultMaxIterations int
	DefaultMaxTokens     int
	DefaultTemperature   float64
}

const (
	defaultMaxIterations = 3
	defaultMaxTokens     = 4096
	defaultTemperature   = 0.2
)

// Iterate runs the generate→validate→correct loop. Returns
// IterateResponse on success (with full history) or IterateFailure
// on max-iter exhaustion. Infrastructure errors (chat hop fails)
// surface as Go errors so the HTTP layer can return 502.
func Iterate(ctx context.Context, req IterateRequest, cfg IterateConfig, chat ChatCaller, validate func(string, map[string]any) (Report, error)) (*IterateResponse, *IterateFailure, error) {
	maxIter := req.MaxIterations
	if maxIter <= 0 {
		maxIter = cfg.DefaultMaxIterations
	}
	if maxIter <= 0 {
		maxIter = defaultMaxIterations
	}
	maxTokens := req.MaxTokens
	if maxTokens <= 0 {
		maxTokens = cfg.DefaultMaxTokens
	}
	if maxTokens <= 0 {
		maxTokens = defaultMaxTokens
	}
	temp := req.Temperature
	if temp == nil {
		t := cfg.DefaultTemperature
		if t == 0 {
			t = defaultTemperature
		}
		temp = &t
	}

	currentPrompt := req.Prompt
	history := make([]IterateAttempt, 0, maxIter)

	for i := 0; i < maxIter; i++ {
		raw, err := chat(ctx, req.System, currentPrompt, req.Provider, req.Model, temp, maxTokens)
		if err != nil {
			return nil, nil, fmt.Errorf("/v1/chat hop failed at iter %d: %w", i, err)
		}

		artifact := ExtractJSON(raw)
		if artifact == nil {
			history = append(history, IterateAttempt{
				Iteration: i,
				Raw:       trim(raw, 2000),
				Status:    AttemptStatus{Kind: "no_json"},
			})
			currentPrompt = req.Prompt + "\n\nYour previous attempt did not contain a JSON object. Reply with ONLY a valid JSON object matching the requested artifact shape."
			continue
		}

		report, vErr := validate(req.Kind, artifact)
		if vErr == nil {
			history = append(history, IterateAttempt{
				Iteration: i,
				Raw:       trim(raw, 2000),
				Status:    AttemptStatus{Kind: "accepted"},
			})
			return &IterateResponse{
				Artifact:   artifact,
				Validation: report,
				Iterations: i + 1,
				History:    history,
			}, nil, nil
		}

		// Validation failed — append error to prompt for next iter.
		// The model sees concrete failure mode + retries with corrective
		// context. Same "validator IS the observer" shape as Phase 43.
		errSummary := vErr.Error()
		history = append(history, IterateAttempt{
			Iteration: i,
			Raw:       trim(raw, 2000),
			Status:    AttemptStatus{Kind: "validation_failed", Error: errSummary},
		})
		currentPrompt = req.Prompt + "\n\nPrior attempt failed validation:\n" + errSummary + "\n\nFix the specific issue above and respond with a corrected JSON object."
	}

	return nil, &IterateFailure{
		Error:      fmt.Sprintf("max iterations reached (%d) without passing validation", maxIter),
		Iterations: maxIter,
		History:    history,
	}, nil
}

// ExtractJSON pulls the first JSON object from a model's output.
// Handles fenced code blocks (```json ... ```), bare braces, and
// stray prose around the JSON. Returns nil on no extractable object.
//
// Same algorithm shape as Rust's extract_json so a model producing
// output that one runtime accepts will be accepted by the other.
func ExtractJSON(raw string) map[string]any {
	// Try fenced first.
	for _, c := range fencedCandidates(raw) {
		if v, ok := parseObject(c); ok {
			return v
		}
	}
	// Fall back to outermost {...} balance.
	bytes := []byte(raw)
	depth := 0
	start := -1
	for i, b := range bytes {
		switch b {
		case '{':
			if start < 0 {
				start = i
			}
			depth++
		case '}':
			depth--
			if depth == 0 && start >= 0 {
				if v, ok := parseObject(raw[start : i+1]); ok {
					return v
				}
				start = -1
			}
		}
	}
	return nil
}

// fencedCandidates returns the bodies of every ``` fenced block in
// `raw`. Skips an optional language tag on the opening fence (e.g.
// ```json).
func fencedCandidates(raw string) []string {
	var out []string
	s := raw
	for {
		idx := strings.Index(s, "```")
		if idx < 0 {
			break
		}
		after := s[idx+3:]
		// Skip optional language tag up to the first newline.
		bodyStart := strings.Index(after, "\n")
		if bodyStart < 0 {
			bodyStart = 0
		} else {
			bodyStart++
		}
		body := after[bodyStart:]
		end := strings.Index(body, "```")
		if end < 0 {
			break
		}
		out = append(out, strings.TrimSpace(body[:end]))
		s = body[end+3:]
	}
	return out
}

func parseObject(s string) (map[string]any, bool) {
	var v any
	if err := json.Unmarshal([]byte(s), &v); err != nil {
		return nil, false
	}
	obj, ok := v.(map[string]any)
	return obj, ok
}

func trim(s string, n int) string {
	if len(s) <= n {
		return s
	}
	return s[:n]
}