// Package chat provides the LLM chat abstraction over multiple // providers (local Ollama, Ollama Cloud, OpenRouter, OpenCode, Kimi). // // Architecture per docs/SPEC.md §1 (chatd) + the small-model pipeline // vision (project_small_model_pipeline_vision.md): // // - Hot path runs on local Ollama via cheap `qwen3.5:latest` repeats. // - Oversight tier reaches Ollama Cloud (Pro plan) for tasks needing // more capacity (kimi-k2.6:cloud, qwen3-coder:480b, deepseek-v3.2). // - Frontier escalation goes through OpenRouter (claude-opus-4-7, // gpt-5, kimi-k2-0905) or OpenCode (free-tier opus-4-7) for // blockers that need full-scope reasoning. // // Provider resolution is by model-name prefix (mirroring the Rust // gateway's pattern from crates/gateway/src/v1/mod.rs): // // ollama/ → ollama (or bare names like "qwen3.5:latest") // ollama_cloud/ → ollama_cloud // openrouter// → openrouter // opencode/ → opencode // kimi/ → kimi // // The prefix is stripped before the upstream call. package chat import ( "context" "errors" ) // Sentinel errors for the upper layers (gateway response codes, // observability, retry decisions). var ( // ErrProviderNotFound — model name didn't resolve to any registered // provider. Maps to 404 at the HTTP layer. ErrProviderNotFound = errors.New("provider not found for model") // ErrProviderDisabled — the provider was registered but its key // resolved empty. Maps to 503 at the HTTP layer (operator can fix // by setting the env var; not a client bug). ErrProviderDisabled = errors.New("provider disabled (no auth key)") // ErrUpstream — the provider returned a non-2xx. Body included in // the wrapped error message. Maps to 502. ErrUpstream = errors.New("provider upstream error") // ErrTimeout — provider call exceeded the configured timeout. // Maps to 504. ErrTimeout = errors.New("provider timeout") ) // Message is one entry in a Chat request's conversation. Role is // "system", "user", or "assistant" — matches OpenAI/Anthropic shapes. type Message struct { Role string `json:"role"` Content string `json:"content"` } // Request is the provider-neutral chat request. Providers translate // to their wire format (OpenAI Chat Completions, Ollama /api/chat, // Ollama Cloud /api/generate, etc.). Format="json" asks the provider // to return JSON-only output (constrained decoding when supported). // // Temperature is *float64 (pointer) so callers can distinguish "not // set" (nil) from "set to 0" (deterministic). Anthropic 4.7 rejects // the temperature field entirely, so providers omit it from the // upstream call when this is nil. type Request struct { Model string `json:"model"` Messages []Message `json:"messages"` Temperature *float64 `json:"temperature,omitempty"` MaxTokens int `json:"max_tokens,omitempty"` Format string `json:"format,omitempty"` // "" or "json" Stream bool `json:"stream,omitempty"` // ignored in G0 — chatd is synchronous } // Temp is a convenience for callers building a Request literal: // // chat.Request{Model: "...", Temperature: chat.Temp(0.0), ...} func Temp(v float64) *float64 { return &v } // Response is the provider-neutral chat response. LatencyMs is filled // by the dispatcher (Provider implementations don't track it). // Provider names the resolved provider for telemetry / debug. type Response struct { Model string `json:"model"` Content string `json:"content"` InputTokens int `json:"input_tokens,omitempty"` OutputTokens int `json:"output_tokens,omitempty"` FinishReason string `json:"finish_reason,omitempty"` Provider string `json:"provider"` LatencyMs int64 `json:"latency_ms"` } // Provider is the contract every backend must implement. Implementations // must be safe for concurrent calls — the dispatcher shares one // instance across all incoming requests. // // Chat receives the full Request including the prefixed model name // (e.g. "openrouter/anthropic/claude-opus-4-7"). Implementations // strip their prefix before the upstream call. type Provider interface { // Name returns the provider's short identifier (matches the prefix // in model names). Used for telemetry and Response.Provider. Name() string // Chat performs one round-trip. Should respect ctx cancellation. Chat(ctx context.Context, req Request) (*Response, error) // Available reports whether the provider is ready to serve. False // means missing auth key or unreachable upstream — dispatcher // returns ErrProviderDisabled for callers using this provider. Available() bool }