package replay import ( "bytes" "context" "encoding/json" "fmt" "io" "net/http" "strings" "time" ) // callModelResult is what the gateway round-trip returns. type callModelResult struct { Content string OK bool Error string } // ModelCaller is the seam tests use to swap out HTTP. Production // supplies httpModelCaller; tests can supply scripted responses. type ModelCaller func(ctx context.Context, model, system, user string) callModelResult // httpModelCaller posts to ${gatewayURL}/v1/chat with provider derived // from model name. Mirrors replay.ts:callModel. func httpModelCaller(gatewayURL string) ModelCaller { client := &http.Client{Timeout: 180 * time.Second} return func(ctx context.Context, model, system, user string) callModelResult { provider := inferProvider(model) body, err := json.Marshal(map[string]any{ "provider": provider, "model": model, "messages": []map[string]string{ {"role": "system", "content": system}, {"role": "user", "content": user}, }, "max_tokens": 1500, "temperature": 0.1, }) if err != nil { return callModelResult{Error: "marshal request: " + err.Error()} } req, err := http.NewRequestWithContext(ctx, "POST", gatewayURL+"/v1/chat", bytes.NewReader(body)) if err != nil { return callModelResult{Error: "build request: " + err.Error()} } req.Header.Set("Content-Type", "application/json") resp, err := client.Do(req) if err != nil { return callModelResult{Error: trim(err.Error(), 240)} } defer resp.Body.Close() buf, _ := io.ReadAll(resp.Body) if resp.StatusCode >= 400 { return callModelResult{Error: fmt.Sprintf("HTTP %d: %s", resp.StatusCode, trim(string(buf), 240))} } var parsed struct { Choices []struct { Message struct { Content string `json:"content"` } `json:"message"` } `json:"choices"` } if err := json.Unmarshal(buf, &parsed); err != nil { return callModelResult{Error: "parse response: " + err.Error()} } content := "" if len(parsed.Choices) > 0 { content = parsed.Choices[0].Message.Content } return callModelResult{Content: content, OK: true} } } // inferProvider picks the right /v1/chat provider for a given model // name. Mirrors replay.ts:callModel's branching exactly so the gateway // sees the same request shape regardless of caller runtime. // // "/" in name → openrouter // kimi-/qwen3-coder/... → ollama_cloud // else → ollama (local) func inferProvider(model string) string { if strings.Contains(model, "/") { return "openrouter" } switch { case strings.HasPrefix(model, "kimi-"), strings.HasPrefix(model, "qwen3-coder"), strings.HasPrefix(model, "deepseek-v"), strings.HasPrefix(model, "mistral-large"), model == "gpt-oss:120b", model == "qwen3.5:397b": return "ollama_cloud" } return "ollama" } // dryRunSynthesize produces a deterministic synthetic response that // echoes context-bundle signals. Used by tests + dry-run mode to // exercise retrieval + validation without a live LLM. func dryRunSynthesize(task string, bundle *ContextBundle) string { parts := []string{ "Synthetic dry-run response for task: " + trim(task, 120), "", } if bundle != nil { parts = append(parts, fmt.Sprintf( "Retrieved %d playbooks; %d accepted, %d partial.", len(bundle.RetrievedPlaybooks), len(bundle.PriorSuccessfulOutputs), len(bundle.FailurePatterns), )) if len(bundle.ValidationSteps) > 0 { parts = append(parts, "Following validation checklist:") for i, s := range bundle.ValidationSteps { if i >= 3 { break } parts = append(parts, "- "+s) } } if len(bundle.PriorSuccessfulOutputs) > 0 { parts = append(parts, "") parts = append(parts, "Anchored on prior accepted: "+bundle.PriorSuccessfulOutputs[0].Title) } } else { parts = append(parts, "No retrieval context — answering from task alone. Verify and check produced output before approving.") } return strings.Join(parts, "\n") }