golangLAKEHOUSE/internal/replay/model.go

package replay

import (
	"bytes"
	"context"
	"encoding/json"
	"fmt"
	"io"
	"net/http"
	"strings"
	"time"
)

// callModelResult is what the gateway round-trip returns.
type callModelResult struct {
	Content string
	OK      bool
	Error   string
}

// ModelCaller is the seam tests use to swap out HTTP. Production
// supplies httpModelCaller; tests can supply scripted responses.
type ModelCaller func(ctx context.Context, model, system, user string) callModelResult

// httpModelCaller posts to ${gatewayURL}/v1/chat with provider derived
// from model name. Mirrors replay.ts:callModel.
func httpModelCaller(gatewayURL string) ModelCaller {
	client := &http.Client{Timeout: 180 * time.Second}
	return func(ctx context.Context, model, system, user string) callModelResult {
		provider := inferProvider(model)
		body, err := json.Marshal(map[string]any{
			"provider": provider,
			"model":    model,
			"messages": []map[string]string{
				{"role": "system", "content": system},
				{"role": "user", "content": user},
			},
			"max_tokens":  1500,
			"temperature": 0.1,
		})
		if err != nil {
			return callModelResult{Error: "marshal request: " + err.Error()}
		}
		req, err := http.NewRequestWithContext(ctx, "POST", gatewayURL+"/v1/chat", bytes.NewReader(body))
		if err != nil {
			return callModelResult{Error: "build request: " + err.Error()}
		}
		req.Header.Set("Content-Type", "application/json")
		resp, err := client.Do(req)
		if err != nil {
			return callModelResult{Error: trim(err.Error(), 240)}
		}
		defer resp.Body.Close()
		buf, _ := io.ReadAll(resp.Body)
		if resp.StatusCode >= 400 {
			return callModelResult{Error: fmt.Sprintf("HTTP %d: %s", resp.StatusCode, trim(string(buf), 240))}
		}
		var parsed struct {
			Choices []struct {
				Message struct {
					Content string `json:"content"`
				} `json:"message"`
			} `json:"choices"`
		}
		if err := json.Unmarshal(buf, &parsed); err != nil {
			return callModelResult{Error: "parse response: " + err.Error()}
		}
		content := ""
		if len(parsed.Choices) > 0 {
			content = parsed.Choices[0].Message.Content
		}
		return callModelResult{Content: content, OK: true}
	}
}

// inferProvider picks the right /v1/chat provider for a given model
// name. Mirrors replay.ts:callModel's branching exactly so the gateway
// sees the same request shape regardless of caller runtime.
//
//   "/" in name              → openrouter
//   kimi-/qwen3-coder/...    → ollama_cloud
//   else                     → ollama (local)
func inferProvider(model string) string {
	if strings.Contains(model, "/") {
		return "openrouter"
	}
	switch {
	case strings.HasPrefix(model, "kimi-"),
		strings.HasPrefix(model, "qwen3-coder"),
		strings.HasPrefix(model, "deepseek-v"),
		strings.HasPrefix(model, "mistral-large"),
		model == "gpt-oss:120b",
		model == "qwen3.5:397b":
		return "ollama_cloud"
	}
	return "ollama"
}

// dryRunSynthesize produces a deterministic synthetic response that
// echoes context-bundle signals. Used by tests + dry-run mode to
// exercise retrieval + validation without a live LLM.
func dryRunSynthesize(task string, bundle *ContextBundle) string {
	parts := []string{
		"Synthetic dry-run response for task: " + trim(task, 120),
		"",
	}
	if bundle != nil {
		parts = append(parts, fmt.Sprintf(
			"Retrieved %d playbooks; %d accepted, %d partial.",
			len(bundle.RetrievedPlaybooks),
			len(bundle.PriorSuccessfulOutputs),
			len(bundle.FailurePatterns),
		))
		if len(bundle.ValidationSteps) > 0 {
			parts = append(parts, "Following validation checklist:")
			for i, s := range bundle.ValidationSteps {
				if i >= 3 {
					break
				}
				parts = append(parts, "- "+s)
			}
		}
		if len(bundle.PriorSuccessfulOutputs) > 0 {
			parts = append(parts, "")
			parts = append(parts, "Anchored on prior accepted: "+bundle.PriorSuccessfulOutputs[0].Title)
		}
	} else {
		parts = append(parts, "No retrieval context — answering from task alone. Verify and check produced output before approving.")
	}
	return strings.Join(parts, "\n")
}