From 12ab39167932e1ada5b42855e87fce75a05456f0 Mon Sep 17 00:00:00 2001 From: root Date: Fri, 24 Apr 2026 06:09:34 -0500 Subject: [PATCH] scrum: swap mistral:latest defaults to ollama_cloud::gpt-oss:120b MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Three default model lists hardcoded mistral:latest as the fallback when config.get("model_sets" / "models") returns nothing. Per feedback_no_mistral.md, mistral 7B has decoder-level JSON malformation issues (0/5 fill rate on A/B) and is a liability in any path that depends on structured output from the model. Swapping to ollama_cloud::gpt-oss:120b (Phase 20 T3 cloud tier) keeps the defaults reliable for the meta-pipeline orchestrator (line 9959), the fallback model list for empty Ollama (10084), and the worker pool default (11835). All three are DEFAULTS — any caller passing explicit config.model_sets / config.models is unaffected. Routing works because query_model's "::" provider prefix already resolves ollama_cloud via commit fa6ccff. Activation requires OLLAMA_CLOUD_API_KEY or a key saved via the Admin UI; this PR does not change credential behavior, only the default model list. Surfaced by lakehouse scrum-master pipeline run 2026-04-24, findings confirmed by grep verification against the live code. --- llm_team_ui.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/llm_team_ui.py b/llm_team_ui.py index f1bf4db..5fa10fa 100644 --- a/llm_team_ui.py +++ b/llm_team_ui.py @@ -9956,7 +9956,7 @@ def _run_meta_pipeline(pipeline_id): stages = pipe["stages"] or ["extract", "research", "validate", "synthesize"] data_source = pipe["data_source"] config = pipe["config"] or {} - model_sets = config.get("model_sets", [["qwen2.5:latest"], ["mistral:latest"], ["gemma2:latest"]]) + model_sets = config.get("model_sets", [["qwen2.5:latest"], ["ollama_cloud::gpt-oss:120b"], ["gemma2:latest"]]) max_iterations = config.get("max_iterations", len(model_sets)) _meta_status[pipeline_id] = {"stage": 0, "substep": "Gathering data...", "progress": 0, "iteration": 0} @@ -10081,7 +10081,7 @@ def create_meta_pipeline(): all_m = [m["name"] for m in resp.json().get("models", []) if m["size"] > 1e9] models = [[m] for m in all_m[:4]] except Exception: - models = [["qwen2.5:latest"], ["mistral:latest"]] + models = [["qwen2.5:latest"], ["ollama_cloud::gpt-oss:120b"]] config = {"model_sets": models, "max_iterations": len(models)} with get_db() as conn: @@ -11832,7 +11832,7 @@ def run_refine(config): start = time.time() * 1000 prompt = config["prompt"] orchestrator = config.get("orchestrator", "qwen2.5:latest") - workers = config.get("models", ["qwen2.5:latest", "mistral:latest"]) + workers = config.get("models", ["qwen2.5:latest", "ollama_cloud::gpt-oss:120b"]) max_stages = config.get("max_stages", 5) yield sse({"type": "clear"}) steps = []