diff --git a/tests/real-world/scrum_master_pipeline.ts b/tests/real-world/scrum_master_pipeline.ts
index 362d438..8af8fb2 100644
--- a/tests/real-world/scrum_master_pipeline.ts
+++ b/tests/real-world/scrum_master_pipeline.ts
@@ -37,6 +37,11 @@ const MAX_ATTEMPTS = 9;
 // crates/<crate>/src/*.rs.
 const FILE_TREE_SPLIT_THRESHOLD = Number(process.env.LH_SCRUM_TREE_SPLIT_THRESHOLD ?? 6000);
 const FILE_SHARD_SIZE = Number(process.env.LH_SCRUM_SHARD_SIZE ?? 3500);
+// Same-model retry budget after observer rejection. After this many
+// quality rejects on the current model, advance to the next provider-
+// error fallback. Counts ONLY observer/quality rejects, not provider
+// errors (which advance immediately).
+const MAX_QUALITY_RETRIES = Number(process.env.LH_SCRUM_MAX_QUALITY_RETRIES ?? 2);
 // Appended jsonl so auditor's kb_query can surface scrum findings for
 // files touched by a PR under review. Part of cohesion plan Phase C.
 const SCRUM_REVIEWS_JSONL = process.env.LH_SCRUM_REVIEWS_OUT
@@ -94,27 +99,24 @@ const TARGET_FILES: string[] = process.env.LH_SCRUM_FILES
 // Local fallbacks kept for cloud-down scenarios.
 // Hot-path pipelines (scenario.ts / execution_loop) stay local per
 // Phase 20 t1_hot — this scrum is not hot path.
+// 2026-04-25 J architectural correction: stop cascading models on
+// every failure. ONE model handles the work, with same-model retries
+// using enriched context. Cycle to a different model ONLY on PROVIDER
+// errors (network/auth/5xx) — not on quality issues. Quality issues
+// signal that the context needs more enrichment, not a different model.
+//
+// Tree-split (treeSplitFile) is the ONE legitimate model-switch trigger
+// for context-overflow, and even that just re-runs the same model
+// against smaller chunks.
+//
+// This ladder is now a SAFETY chain for provider failures, not the
+// strategy. Kimi K2.6, Gemini, free-tier, local fallback, etc. were
+// removed — they're available as routable tools later (mode router)
+// but not as automatic fallbacks.
 const LADDER: Array<{ provider: "ollama" | "ollama_cloud" | "openrouter"; model: string; note: string }> = [
-  // Paid-OpenRouter top of ladder (2026-04-25 J directive). These give
-  // us reliable cloud access independent of the Ollama Cloud account
-  // throttle that wedged iter 1-9. Kimi K2.6 has a 25/hour hard cap
-  // enforced by checkRateLimit() — when capped, the ladder skips it.
-  { provider: "openrouter",   model: "moonshotai/kimi-k2.6",                 note: "OR paid · Kimi K2.6 · $0.74/$4.66 per M · 256K · 25/hr cap" },
-  { provider: "openrouter",   model: "x-ai/grok-4.1-fast",                   note: "OR paid · Grok 4.1 fast · $0.20/$0.50 per M · 2M ctx" },
-  { provider: "openrouter",   model: "google/gemini-2.5-flash",              note: "OR paid · Gemini 2.5 flash · $0.30/$2.50 per M · 1M ctx" },
-  { provider: "openrouter",   model: "deepseek/deepseek-v4-flash",           note: "OR paid · DeepSeek V4 flash · $0.14/$0.28 per M · 1M ctx" },
-  { provider: "openrouter",   model: "qwen/qwen3-235b-a22b-2507",            note: "OR paid · Qwen3 235B · $0.07/$0.10 per M · 262K ctx" },
-  // Ollama Cloud — kept as middle rungs. May 429 under load (account
-  // throttle); ladder cycles through them quickly.
-  { provider: "ollama_cloud", model: "kimi-k2:1t",                           note: "cloud 1T — biggest available, 1.4s probe" },
-  { provider: "ollama_cloud", model: "qwen3-coder:480b",                     note: "cloud 480B — coding specialist, 0.9s probe" },
-  { provider: "ollama_cloud", model: "deepseek-v3.1:671b",                   note: "cloud 671B — fast reasoning (1.0s probe)" },
-  // Free-tier rescue — kept as later fallback. These hallucinate on
-  // grounding (10-21% verified 2026-04-25) and now must pass observer
-  // hand-review before scrum accepts them.
-  { provider: "openrouter",   model: "openai/gpt-oss-120b:free",             note: "OpenRouter free 120B — rescue (low grounding observed)" },
-  { provider: "openrouter",   model: "google/gemma-3-27b-it:free",           note: "OpenRouter free 27B — fastest rescue, 1.4s probe" },
-  { provider: "ollama",       model: "qwen3.5:latest",                       note: "local qwen3.5 — last-resort if all cloud down" },
+  { provider: "openrouter",   model: "x-ai/grok-4.1-fast",                   note: "PRIMARY · Grok 4.1 fast · $0.20/$0.50 · 2M ctx · single-model strategy" },
+  { provider: "openrouter",   model: "deepseek/deepseek-v4-flash",           note: "FALLBACK on provider error · DeepSeek V4 flash · $0.14/$0.28 · 1M ctx" },
+  { provider: "openrouter",   model: "qwen/qwen3-235b-a22b-2507",            note: "LAST FALLBACK on provider error · Qwen3 235B · $0.07/$0.10 · 262K" },
   // Dropped from the ladder after 2026-04-24 probe:
   //   - kimi-k2.6 — not available on current tier (empty response)
   //   - devstral-2:123b — displaced by qwen3-coder:480b (better coding specialist)
@@ -1122,18 +1124,32 @@ Respond with markdown. Be specific, not generic. Cite file-region + PRD-chunk-of
   // Collect attempts for the pathway trace sidecar.
   const pathwayAttempts: LadderAttemptRec[] = [];
 
+  // Single-model strategy with same-model retry. modelIdx advances
+  // only on PROVIDER errors. Quality rejects from observer keep the
+  // same model and retry with enriched context (history feeds back
+  // into the `learning` preamble so the model sees what was wrong).
+  // After MAX_QUALITY_RETRIES on the current model, advance to the
+  // next fallback model in the safety chain.
+  let modelIdx = 0;
+  let qualityRetriesOnCurrentModel = 0;
+
   for (let step = 0; step < MAX_ATTEMPTS; step++) {
-    const i = ladderOrder[step];
+    if (modelIdx >= ladderOrder.length) {
+      log(`  ✗ all ${ladderOrder.length} fallback models exhausted, marking UNRESOLVED`);
+      break;
+    }
+    const i = ladderOrder[modelIdx];
     const n = step + 1;
     const rung = LADDER[i];
 
-    // Per-model rate limit (e.g. Kimi K2.6 capped at 25/hour). When
-    // capped, log + skip the rung. Doesn't increment `n` so subsequent
-    // logs stay readable; just continues to the next rung in ladderOrder.
+    // Per-model rate limit. When capped, advance modelIdx (this model
+    // is unavailable for the rest of the hour) and reset retries.
     const limit = MODEL_RATE_LIMITS[rung.model];
     if (limit && !(await checkRateLimit(rung.model, limit.perHour))) {
       log(`  attempt ${n}/${MAX_ATTEMPTS}: ${rung.provider}::${rung.model} — SKIP (rate-limited: cap ${limit.perHour}/hr reached)`);
       pathwayAttempts.push({ rung: i + 1, model: rung.model, latency_ms: 0, accepted: false, reject_reason: `rate-limited (cap ${limit.perHour}/hr)` });
+      modelIdx++;
+      qualityRetriesOnCurrentModel = 0;
       continue;
     }
 
@@ -1141,7 +1157,10 @@ Respond with markdown. Be specific, not generic. Cite file-region + PRD-chunk-of
       ? `\n\n═══ PRIOR ATTEMPTS FAILED. Specific issues to fix: ═══\n${history.map(h => `Attempt ${h.n} (${h.model}, ${h.chars} chars): ${h.status} — ${h.error ?? "thin/unstructured answer"}`).join("\n")}\n═══`
       : "";
 
-    log(`  attempt ${n}/${MAX_ATTEMPTS}: ${rung.provider}::${rung.model}${learning ? " [w/ learning]" : ""}${pathwayPreamble ? " [w/ pathway memory]" : ""}`);
+    const retryTag = qualityRetriesOnCurrentModel > 0
+      ? ` [retry ${qualityRetriesOnCurrentModel + 1}/${MAX_QUALITY_RETRIES + 1} same model + enrichment]`
+      : "";
+    log(`  attempt ${n}/${MAX_ATTEMPTS}: ${rung.provider}::${rung.model}${learning ? " [w/ learning]" : ""}${pathwayPreamble ? " [w/ pathway memory]" : ""}${retryTag}`);
     const attemptStarted = Date.now();
     if (limit) await recordRateLimitCall(rung.model);
     const r = await chat({
@@ -1153,15 +1172,28 @@ Respond with markdown. Be specific, not generic. Cite file-region + PRD-chunk-of
     const attemptMs = Date.now() - attemptStarted;
 
     if (r.error) {
+      // PROVIDER error (network, auth, 5xx) → cycle to next fallback
+      // model. Reset retry counter for the new model.
       history.push({ n, model: rung.model, status: "error", chars: 0, error: r.error.slice(0, 180) });
       pathwayAttempts.push({ rung: i + 1, model: rung.model, latency_ms: attemptMs, accepted: false, reject_reason: `error: ${r.error.slice(0, 100)}` });
-      log(`    ✗ error: ${r.error.slice(0, 80)}`);
+      log(`    ✗ provider error: ${r.error.slice(0, 80)} — advancing to next fallback model`);
+      modelIdx++;
+      qualityRetriesOnCurrentModel = 0;
       continue;
     }
     if (!isAcceptable(r.content)) {
+      // Thin/unstructured response = quality issue. Retry SAME model
+      // with the failure logged to learning so it sees what to fix.
       history.push({ n, model: rung.model, status: "thin", chars: r.content.length, error: `thin/unstructured (${r.content.length} chars)` });
       pathwayAttempts.push({ rung: i + 1, model: rung.model, latency_ms: attemptMs, accepted: false, reject_reason: `thin (${r.content.length} chars)` });
-      log(`    ✗ thin/unstructured (${r.content.length} chars)`);
+      qualityRetriesOnCurrentModel++;
+      if (qualityRetriesOnCurrentModel > MAX_QUALITY_RETRIES) {
+        log(`    ✗ thin (${r.content.length} chars) — quality retries exhausted on ${rung.model}, advancing fallback`);
+        modelIdx++;
+        qualityRetriesOnCurrentModel = 0;
+      } else {
+        log(`    ✗ thin (${r.content.length} chars) — retrying same model with enrichment hint`);
+      }
       continue;
     }
     // Compute grounding stats as DATA — feed to observer for hand-review.
@@ -1184,10 +1216,21 @@ Respond with markdown. Be specific, not generic. Cite file-region + PRD-chunk-of
       attempt: n,
     });
     if (obsVerdict.verdict === "reject" || obsVerdict.verdict === "cycle") {
+      // Observer rejected on quality grounds → retry SAME model with
+      // the rejection notes feeding into `learning`. This is the
+      // architectural correction (J 2026-04-25): quality issues mean
+      // the context needs more enrichment, not a different model.
       const reason = `observer ${obsVerdict.verdict}: ${obsVerdict.notes ?? "no notes"} (conf=${obsVerdict.confidence ?? "?"})`;
       history.push({ n, model: rung.model, status: "thin", chars: r.content.length, error: reason });
       pathwayAttempts.push({ rung: i + 1, model: rung.model, latency_ms: attemptMs, accepted: false, reject_reason: reason });
-      log(`    ✗ ${reason} — cycling ladder`);
+      qualityRetriesOnCurrentModel++;
+      if (qualityRetriesOnCurrentModel > MAX_QUALITY_RETRIES) {
+        log(`    ✗ ${reason} — quality retries exhausted on ${rung.model}, advancing fallback`);
+        modelIdx++;
+        qualityRetriesOnCurrentModel = 0;
+      } else {
+        log(`    ✗ ${reason} — retrying same model with enrichment hint`);
+      }
       continue;
     }
     history.push({ n, model: rung.model, status: "accepted", chars: r.content.length });