Fix Ollama timeout: set num_ctx dynamically, truncate oversized prompts
Root cause: query_ollama() sent no num_ctx option, so Ollama defaulted to 2048 tokens. Research mode with 15 questions builds prompts that exceed model context windows, causing Ollama to hang until the 300s timeout. Fix: - Calculate num_ctx from prompt size + 1024 token response buffer - Cap at model's actual context limit - Truncate prompts that exceed context window minus 512 response tokens - Uses smart_truncate() to preserve start + end of prompt - Updated MODEL_CONTEXT map with accurate limits for all local models Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
1ac7a436e6
commit
59379c624d
@ -2751,8 +2751,16 @@ def _get_timeout(model_id):
|
||||
def query_ollama(model, prompt, timeout):
|
||||
cfg = load_config()
|
||||
base = cfg["providers"]["ollama"].get("base_url", "http://localhost:11434")
|
||||
# Set num_ctx based on prompt size — Ollama defaults to 2048 which is too small
|
||||
prompt_tokens = estimate_tokens(prompt)
|
||||
ctx_limit = get_context_limit(model)
|
||||
num_ctx = min(max(prompt_tokens + 1024, 2048), ctx_limit)
|
||||
# Truncate prompt if it exceeds the model's context window
|
||||
if prompt_tokens > ctx_limit - 512:
|
||||
prompt = smart_truncate(prompt, ctx_limit - 512)
|
||||
resp = requests.post(f"{base}/api/generate", json={
|
||||
"model": model, "prompt": prompt, "stream": False,
|
||||
"options": {"num_ctx": num_ctx}
|
||||
}, timeout=timeout)
|
||||
resp.raise_for_status()
|
||||
return resp.json()["response"]
|
||||
@ -2803,7 +2811,9 @@ def query_model(model_id, prompt):
|
||||
|
||||
# Context window sizes (tokens) — conservative estimates for safe prompting
|
||||
MODEL_CONTEXT = {
|
||||
"llama3.2": 4096, "mistral": 8192, "gemma2": 8192, "qwen2.5": 8192,
|
||||
"llama3.2": 4096, "llama3.1": 8192, "llama3": 8192,
|
||||
"mistral": 8192, "gemma2": 8192, "gemma3": 32768,
|
||||
"qwen2.5": 8192, "qwen3": 32768,
|
||||
"gpt-oss": 4096, "gpt-4o": 128000, "gpt-4o-mini": 128000,
|
||||
"claude-3": 200000, "claude-sonnet": 200000, "claude-haiku": 200000,
|
||||
}
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user