diff --git a/crates/vectord/src/rag.rs b/crates/vectord/src/rag.rs
index 007329a..286c00d 100644
--- a/crates/vectord/src/rag.rs
+++ b/crates/vectord/src/rag.rs
@@ -163,7 +163,11 @@ pub async fn query(
     // production caller of the Phase 21 primitives — see audit finding
     // "Phase 21 Rust primitives are wired but not CALLED by any
     // production surface" from 2026-04-21.
-    let mut cont_opts = ContinuableOpts::new("qwen2.5:latest");
+    // 2026-04-30 model bump: qwen2.5:latest → qwen3.5:latest to match
+    // the small-model-pipeline local-tier default. Same JSON-clean
+    // property, more capacity. think=Some(false) preserved — RAG hot
+    // path doesn't need reasoning traces; direct answers only.
+    let mut cont_opts = ContinuableOpts::new("qwen3.5:latest");
     cont_opts.max_tokens = Some(512);
     cont_opts.temperature = Some(0.2);
     cont_opts.shape = ResponseShape::Text;
@@ -176,7 +180,7 @@ pub async fn query(
         // echoes whatever Ollama loaded). Use the configured tier model
         // for now; if RAG needs to report the actual resolved model,
         // the runner can add a post-call ps probe later.
-        model: "qwen2.5:latest".to_string(),
+        model: "qwen3.5:latest".to_string(),
         sources: results,
         tokens_generated: None,
     })
diff --git a/lakehouse.toml b/lakehouse.toml
index 19061a1..4e828e1 100644
--- a/lakehouse.toml
+++ b/lakehouse.toml
@@ -48,8 +48,13 @@ url = "http://localhost:3200"
 
 [ai]
 embed_model = "nomic-embed-text"
-gen_model = "qwen2.5"
-rerank_model = "qwen2.5"
+# Local-tier defaults bumped 2026-04-30: qwen3.5:latest is the
+# stronger local rung in the 5-loop substrate (per
+# project_small_model_pipeline_vision.md). Same JSON-clean property
+# as qwen2.5, more capacity. Ollama still serves both — bump back
+# in this file if a workload regressed.
+gen_model = "qwen3.5:latest"
+rerank_model = "qwen3.5:latest"
 
 [auth]
 enabled = false
@@ -72,7 +77,9 @@ min_recall = 0.9                          # never promote below this
 max_trials_per_hour = 20                  # hard budget cap
 
 # Model roster — available for profile hot-swap
+# qwen3.5:latest: stronger local rung — JSON-clean, 8K+ context,
+#                 default for gen_model and rerank_model
 # qwen3: 8.2B, 40K context, thinking+tools, best for reasoning tasks
-# qwen2.5: 7B, 8K context, fast, good for SQL generation
-# mistral: 7B, 8K context, good for general generation
+# qwen2.5: 7B, 8K context, fast — kept loaded for the 2026-04 era
+#          comparison runs; new defaults use qwen3.5:latest
 # nomic-embed-text: 137M, embedding-only, used by all profiles
diff --git a/mcp-server/index.ts b/mcp-server/index.ts
index ffeff3f..5576f79 100644
--- a/mcp-server/index.ts
+++ b/mcp-server/index.ts
@@ -313,9 +313,9 @@ ${(buckets as any[] || []).map((b: any) => `- ${b.name}: ${b.backend} (${b.reach
 - Ollama: :11434
 
 ## Available Models
+- qwen3.5:latest: stronger local rung, JSON-clean (default for gen + rerank)
 - qwen3: 8.2B, 40K context, thinking+tools (best for reasoning)
-- qwen2.5: 7B, 8K context (best for fast SQL generation)
-- mistral: 7B, 8K context (general generation)
+- qwen2.5: 7B, 8K context (legacy — 2026-04 era comparison runs only)
 - nomic-embed-text: 137M (embedding, automatic)
 `;
   return { contents: [{ uri: uri.href, mimeType: "text/plain", text }] };
diff --git a/mcp-server/observer.ts b/mcp-server/observer.ts
index 24e8042..7085b0c 100644
--- a/mcp-server/observer.ts
+++ b/mcp-server/observer.ts
@@ -146,15 +146,16 @@ async function persistOp(op: ObservedOp) {
 // ─── LLM Team escalation (code_review mode) ───
 //
 // When recent failures on a single sig_hash cross a threshold the
-// local qwen2.5 analysis is probably insufficient. J's 2026-04-24
+// local-model analysis is probably insufficient. J's 2026-04-24
 // direction: "the observer would trigger to give more context" —
 // route failure clusters to LLM Team's specialized code_review mode
 // (via /api/run) so richer structured signal lands in the KB for
 // scrum + auditor + playbook memory to consume next pass.
 //
-// Non-destructive: runs in parallel to the existing qwen2.5 analysis,
-// never replaces it. Writes to data/_kb/observer_escalations.jsonl
-// as a dedicated audit surface.
+// Non-destructive: runs in parallel to the existing local diagnose
+// call (qwen3.5:latest after the 2026-04-30 bump), never replaces
+// it. Writes to data/_kb/observer_escalations.jsonl as a dedicated
+// audit surface.
 
 const LLM_TEAM = process.env.LH_LLM_TEAM_URL ?? "http://localhost:5000";
 const LLM_TEAM_ESCALATIONS = "/home/profit/lakehouse/data/_kb/observer_escalations.jsonl";
@@ -542,7 +543,7 @@ async function analyzeErrors() {
   if (failures.length === 0) return;
 
   // NEW 2026-04-24: escalate recurring sig_hash clusters to LLM Team
-  // code_review mode. Runs in parallel to the local qwen2.5 analysis
+  // code_review mode. Runs in parallel to the local diagnose call
   // below — non-blocking, richer downstream signal for scrum/auditor.
   maybeEscalate(failures).catch(() => {});
 
@@ -552,13 +553,14 @@ async function analyzeErrors() {
 
   // Ask local model to diagnose. Phase 44 migration (2026-04-27):
   // /v1/chat instead of legacy /ai/generate so /v1/usage tracks the
-  // call + Langfuse traces it. Same upstream model (qwen2.5 local).
+  // call + Langfuse traces it. 2026-04-30 model bump: qwen2.5 →
+  // qwen3.5:latest to match the small-model-pipeline local-tier default.
   try {
     const resp = await fetch(`${LAKEHOUSE}/v1/chat`, {
       method: "POST",
       headers: { "Content-Type": "application/json" },
       body: JSON.stringify({
-        model: "qwen2.5",
+        model: "qwen3.5:latest",
         provider: "ollama",
         messages: [{
           role: "user",