// Package llm defines the model-provider abstraction. Phase A ships // the interface only; Phase C adds the Ollama implementation. // // Provider interface mirrors PROMPT.md / docs/LOCAL_MODEL_SETUP.md: // list_models() // complete(prompt, options) // complete_json(prompt, schema, options) // health_check() // // Phase A's stub doctor uses this only for HealthCheck — the rest // is wired in Phase C. package llm import "context" // HealthStatus is what HealthCheck returns. Stable shape so the // model-doctor JSON schema doesn't shift between phases. type HealthStatus struct { ServerAvailable bool `json:"server_available"` PrimaryModelAvailable bool `json:"primary_model_available"` FallbackModelAvailable bool `json:"fallback_model_available"` BasicPromptOK bool `json:"basic_prompt_ok"` JSONModeOK bool `json:"json_mode_ok"` Errors []string `json:"errors"` } // CompleteOptions tunes a non-streaming completion call. type CompleteOptions struct { Temperature float64 MaxTokens int TimeoutSeconds int } // Provider is the abstraction every model backend implements. // G0 ships Ollama; OpenAI-compatible local endpoints land in // Phase F+ when the harness needs them. type Provider interface { // Name returns the short identifier (e.g. "ollama"). Name() string // HealthCheck probes server + primary + fallback model availability // + a basic prompt + a JSON-mode probe. Used by `model doctor`. HealthCheck(ctx context.Context, primaryModel, fallbackModel string) HealthStatus // Complete performs a non-streaming completion. Phase C wires this. Complete(ctx context.Context, model, prompt string, opts CompleteOptions) (string, error) // CompleteJSON requests strict JSON output. Phase C wires this. // Implementations should set Ollama's `format: "json"` or its // upstream-equivalent constrained-decoding flag. CompleteJSON(ctx context.Context, model, prompt string, opts CompleteOptions) (string, error) }