golangLAKEHOUSE/internal/chat/types.go

// Package chat provides the LLM chat abstraction over multiple
// providers (local Ollama, Ollama Cloud, OpenRouter, OpenCode, Kimi).
//
// Architecture per docs/SPEC.md §1 (chatd) + the small-model pipeline
// vision (project_small_model_pipeline_vision.md):
//
//   - Hot path runs on local Ollama via cheap `qwen3.5:latest` repeats.
//   - Oversight tier reaches Ollama Cloud (Pro plan) for tasks needing
//     more capacity (kimi-k2.6:cloud, qwen3-coder:480b, deepseek-v3.2).
//   - Frontier escalation goes through OpenRouter (claude-opus-4-7,
//     gpt-5, kimi-k2-0905) or OpenCode (free-tier opus-4-7) for
//     blockers that need full-scope reasoning.
//
// Provider resolution is by model-name prefix (mirroring the Rust
// gateway's pattern from crates/gateway/src/v1/mod.rs):
//
//   ollama/<model>          → ollama (or bare names like "qwen3.5:latest")
//   ollama_cloud/<model>    → ollama_cloud
//   openrouter/<vendor>/<m> → openrouter
//   opencode/<model>        → opencode
//   kimi/<model>            → kimi
//
// The prefix is stripped before the upstream call.
package chat

import (
	"context"
	"errors"
)

// Sentinel errors for the upper layers (gateway response codes,
// observability, retry decisions).
var (
	// ErrProviderNotFound — model name didn't resolve to any registered
	// provider. Maps to 404 at the HTTP layer.
	ErrProviderNotFound = errors.New("provider not found for model")

	// ErrProviderDisabled — the provider was registered but its key
	// resolved empty. Maps to 503 at the HTTP layer (operator can fix
	// by setting the env var; not a client bug).
	ErrProviderDisabled = errors.New("provider disabled (no auth key)")

	// ErrUpstream — the provider returned a non-2xx. Body included in
	// the wrapped error message. Maps to 502.
	ErrUpstream = errors.New("provider upstream error")

	// ErrTimeout — provider call exceeded the configured timeout.
	// Maps to 504.
	ErrTimeout = errors.New("provider timeout")
)

// Message is one entry in a Chat request's conversation. Role is
// "system", "user", or "assistant" — matches OpenAI/Anthropic shapes.
type Message struct {
	Role    string `json:"role"`
	Content string `json:"content"`
}

// Request is the provider-neutral chat request. Providers translate
// to their wire format (OpenAI Chat Completions, Ollama /api/chat,
// Ollama Cloud /api/generate, etc.). Format="json" asks the provider
// to return JSON-only output (constrained decoding when supported).
//
// Temperature is *float64 (pointer) so callers can distinguish "not
// set" (nil) from "set to 0" (deterministic). Anthropic 4.7 rejects
// the temperature field entirely, so providers omit it from the
// upstream call when this is nil.
type Request struct {
	Model       string    `json:"model"`
	Messages    []Message `json:"messages"`
	Temperature *float64  `json:"temperature,omitempty"`
	MaxTokens   int       `json:"max_tokens,omitempty"`
	Format      string    `json:"format,omitempty"` // "" or "json"
	Stream      bool      `json:"stream,omitempty"` // ignored in G0 — chatd is synchronous
}

// Temp is a convenience for callers building a Request literal:
//
//	chat.Request{Model: "...", Temperature: chat.Temp(0.0), ...}
func Temp(v float64) *float64 { return &v }

// Response is the provider-neutral chat response. LatencyMs is filled
// by the dispatcher (Provider implementations don't track it).
// Provider names the resolved provider for telemetry / debug.
type Response struct {
	Model        string `json:"model"`
	Content      string `json:"content"`
	InputTokens  int    `json:"input_tokens,omitempty"`
	OutputTokens int    `json:"output_tokens,omitempty"`
	FinishReason string `json:"finish_reason,omitempty"`
	Provider     string `json:"provider"`
	LatencyMs    int64  `json:"latency_ms"`
}

// Provider is the contract every backend must implement. Implementations
// must be safe for concurrent calls — the dispatcher shares one
// instance across all incoming requests.
//
// Chat receives the full Request including the prefixed model name
// (e.g. "openrouter/anthropic/claude-opus-4-7"). Implementations
// strip their prefix before the upstream call.
type Provider interface {
	// Name returns the provider's short identifier (matches the prefix
	// in model names). Used for telemetry and Response.Provider.
	Name() string

	// Chat performs one round-trip. Should respect ctx cancellation.
	Chat(ctx context.Context, req Request) (*Response, error)

	// Available reports whether the provider is ready to serve. False
	// means missing auth key or unreachable upstream — dispatcher
	// returns ErrProviderDisabled for callers using this provider.
	Available() bool
}