scrum: swap mistral:latest defaults to ollama_cloud::gpt-oss:120b
Three default model lists hardcoded mistral:latest as the fallback
when config.get("model_sets" / "models") returns nothing. Per
feedback_no_mistral.md, mistral 7B has decoder-level JSON malformation
issues (0/5 fill rate on A/B) and is a liability in any path that
depends on structured output from the model.
Swapping to ollama_cloud::gpt-oss:120b (Phase 20 T3 cloud tier)
keeps the defaults reliable for the meta-pipeline orchestrator
(line 9959), the fallback model list for empty Ollama (10084), and
the worker pool default (11835). All three are DEFAULTS — any caller
passing explicit config.model_sets / config.models is unaffected.
Routing works because query_model's "::" provider prefix already
resolves ollama_cloud via commit fa6ccff. Activation requires
OLLAMA_CLOUD_API_KEY or a key saved via the Admin UI; this PR does
not change credential behavior, only the default model list.
Surfaced by lakehouse scrum-master pipeline run 2026-04-24, findings
confirmed by grep verification against the live code.
This commit is contained in:
parent
205eff64b4
commit
12ab391679
@ -9956,7 +9956,7 @@ def _run_meta_pipeline(pipeline_id):
|
|||||||
stages = pipe["stages"] or ["extract", "research", "validate", "synthesize"]
|
stages = pipe["stages"] or ["extract", "research", "validate", "synthesize"]
|
||||||
data_source = pipe["data_source"]
|
data_source = pipe["data_source"]
|
||||||
config = pipe["config"] or {}
|
config = pipe["config"] or {}
|
||||||
model_sets = config.get("model_sets", [["qwen2.5:latest"], ["mistral:latest"], ["gemma2:latest"]])
|
model_sets = config.get("model_sets", [["qwen2.5:latest"], ["ollama_cloud::gpt-oss:120b"], ["gemma2:latest"]])
|
||||||
max_iterations = config.get("max_iterations", len(model_sets))
|
max_iterations = config.get("max_iterations", len(model_sets))
|
||||||
|
|
||||||
_meta_status[pipeline_id] = {"stage": 0, "substep": "Gathering data...", "progress": 0, "iteration": 0}
|
_meta_status[pipeline_id] = {"stage": 0, "substep": "Gathering data...", "progress": 0, "iteration": 0}
|
||||||
@ -10081,7 +10081,7 @@ def create_meta_pipeline():
|
|||||||
all_m = [m["name"] for m in resp.json().get("models", []) if m["size"] > 1e9]
|
all_m = [m["name"] for m in resp.json().get("models", []) if m["size"] > 1e9]
|
||||||
models = [[m] for m in all_m[:4]]
|
models = [[m] for m in all_m[:4]]
|
||||||
except Exception:
|
except Exception:
|
||||||
models = [["qwen2.5:latest"], ["mistral:latest"]]
|
models = [["qwen2.5:latest"], ["ollama_cloud::gpt-oss:120b"]]
|
||||||
|
|
||||||
config = {"model_sets": models, "max_iterations": len(models)}
|
config = {"model_sets": models, "max_iterations": len(models)}
|
||||||
with get_db() as conn:
|
with get_db() as conn:
|
||||||
@ -11832,7 +11832,7 @@ def run_refine(config):
|
|||||||
start = time.time() * 1000
|
start = time.time() * 1000
|
||||||
prompt = config["prompt"]
|
prompt = config["prompt"]
|
||||||
orchestrator = config.get("orchestrator", "qwen2.5:latest")
|
orchestrator = config.get("orchestrator", "qwen2.5:latest")
|
||||||
workers = config.get("models", ["qwen2.5:latest", "mistral:latest"])
|
workers = config.get("models", ["qwen2.5:latest", "ollama_cloud::gpt-oss:120b"])
|
||||||
max_stages = config.get("max_stages", 5)
|
max_stages = config.get("max_stages", 5)
|
||||||
yield sse({"type": "clear"})
|
yield sse({"type": "clear"})
|
||||||
steps = []
|
steps = []
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user