v1/mode: model-aware enrichment downgrade + 3 corpora + variance harness

Pass 5 (5 reps × 4 conditions × 1 file on grok-4.1-fast) showed composing matrix corpora is anti-additive on strong models — composed lakehouse_arch + symbols LOST 5/5 head-to-head vs codereview_isolation (Δ −1.8 grounded findings, p=0.031). Default flips to isolation; matrix path now auto- downgrades when the resolved model is strong. Mode runner: - matrix_corpus is Vec<String> (string OR array via deserialize_string_or_vec) - top_k=6 from each corpus, merge by score, take top 8 globally - chunk tag prefers doc_id over source so reviewer sees [adr:009] vs [lakehouse_arch] - is_weak_model() gate auto-downgrades codereview_lakehouse → codereview_isolation for strong models (default-strong; weak = :free suffix or local last-resort) - LH_FORCE_FULL_ENRICHMENT=1 bypasses for diagnostic runs - EnrichmentSources.downgraded_from records when the gate fires Three corpora indexed via /vectors/index (5849 chunks total): - lakehouse_arch_v1 — ADRs + phases + PRD + scrum spec (93 docs, 2119 chunks) - scrum_findings_v1 — past scrum_reviews.jsonl (168 docs, 1260 chunks; EXCLUDED from defaults — 24% out-of-bounds line citations from cross-file drift) - lakehouse_symbols_v1 — regex-extracted pub items + /// docs (656 docs, 2470 chunks) Experiment infra: - scripts/build_*_corpus.ts — re-runnable when source content changes - scripts/mode_pass5_variance_paid.ts — N reps × M conditions on one file - scripts/mode_pass5_summarize.ts — mean ± σ + head-to-head, parser handles numbered + path-with-line + path-with-symbol finding tables - scripts/mode_compare.ts — groups by mode|corpus when sweeps span corpora - scripts/mode_experiment.ts — default model bumped to x-ai/grok-4.1-fast, --corpus flag for per-call override Decisions + open follow-ups: docs/MODE_RUNNER_TUNING_PLAN.md Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-26 17:29:17 -05:00 · 2026-04-26 17:29:17 -05:00 · 2dbc8dbc83
commit 2dbc8dbc83
parent 56bf30cfd8
10 changed files with 1043 additions and 82 deletions
--- a/config/modes.toml
+++ b/config/modes.toml
@ -12,15 +12,22 @@

 [[task_class]]
 name = "scrum_review"
-# `codereview_lakehouse` is the codebase-specific enrichment runner —
-# bundles defined/imported symbols, pathway-memory bug fingerprints,
-# and relevance-filtered matrix chunks into ONE precise prompt so the
-# model gets it right the first call. The generic `codereview` mode
-# from LLM Team is still the network fallback if execute fails.
-preferred_mode = "codereview_lakehouse"
-fallback_modes = ["codereview", "consensus", "ladder"]
+# 2026-04-26 pass5 variance test (5 reps × 4 conditions, grok-4.1-fast,
+# pathway_memory.rs): composed corpus LOST 5/5 vs isolation (Δ −1.8
+# grounded findings, p=0.031). See docs/MODE_RUNNER_TUNING_PLAN.md.
+# Default is now isolation — bug fingerprints + adversarial framing +
+# file content carries strong models without matrix noise. The
+# `codereview_lakehouse` matrix path remains available via force_mode
+# (auto-downgrades to isolation on strong models — see the
+# is_strong_model gate in crates/gateway/src/v1/mode.rs).
+preferred_mode = "codereview_isolation"
+fallback_modes = ["codereview_lakehouse", "codereview", "consensus", "ladder"]
 default_model = "qwen3-coder:480b"
-matrix_corpus = "distilled_procedural_v20260423102847"
+# Corpora kept defined so experimental modes (codereview_matrix_only,
+# pass2/pass5 sweeps) and weak-model rescue rungs can still pull them.
+# scrum_findings_v1 is built but EXCLUDED — bake-off showed 24% OOB
+# line citations from cross-file drift, only safe with same-file gating.
+matrix_corpus = ["lakehouse_arch_v1", "lakehouse_symbols_v1"]

 [[task_class]]
 name = "contract_analysis"
--- a/crates/gateway/src/v1/mode.rs
+++ b/crates/gateway/src/v1/mode.rs
@ -147,8 +147,31 @@ pub struct TaskClassEntry {
    #[serde(default)]
    pub fallback_modes: Vec<String>,
    pub default_model: String,
-    #[serde(default)]
-    pub matrix_corpus: Option<String>,
+    /// One or more corpora the mode runner queries (top-k per corpus,
+    /// merged by score before the relevance filter). Accepts a single
+    /// string or an array in modes.toml — `deserialize_string_or_vec`
+    /// handles both shapes for backward compat.
+    #[serde(default, deserialize_with = "deserialize_string_or_vec")]
+    pub matrix_corpus: Vec<String>,
+}
+
+/// Accept `key = "x"` or `key = ["x", "y"]` in TOML/JSON. Empty string or
+/// missing field → empty vec.
+fn deserialize_string_or_vec<'de, D>(d: D) -> Result<Vec<String>, D::Error>
+where D: serde::Deserializer<'de> {
+    use serde::de::Error;
+    let v = serde_json::Value::deserialize(d).map_err(D::Error::custom)?;
+    match v {
+        serde_json::Value::Null => Ok(vec![]),
+        serde_json::Value::String(s) if s.is_empty() => Ok(vec![]),
+        serde_json::Value::String(s) => Ok(vec![s]),
+        serde_json::Value::Array(a) => a
+            .into_iter()
+            .map(|x| x.as_str().map(String::from)
+                .ok_or_else(|| D::Error::custom("matrix_corpus array must contain strings")))
+            .collect(),
+        other => Err(D::Error::custom(format!("matrix_corpus must be string or array, got {other:?}"))),
+    }
 }

 #[derive(Clone, Debug, Deserialize)]
@ -234,7 +257,7 @@ pub struct DecisionTrace {
    pub task_class_matched: bool,
    pub source: &'static str, // "config" | "default" | "force_mode"
    pub fallbacks: Vec<String>,
-    pub matrix_corpus: Option<String>,
+    pub matrix_corpus: Vec<String>,
    pub notes: Vec<String>,
 }

@ -279,7 +302,7 @@ pub async fn route(
                task_class_matched: cfg.lookup(&req.task_class).is_some(),
                source: "force_mode",
                fallbacks: vec![],
-                matrix_corpus: None,
+                matrix_corpus: vec![],
                notes,
            },
        }));
@ -349,7 +372,7 @@ pub async fn route(
            task_class_matched: false,
            source: "default",
            fallbacks: cfg.default.fallback_modes.clone(),
-            matrix_corpus: None,
+            matrix_corpus: vec![],
            notes,
        },
    }))
@ -419,11 +442,13 @@ pub struct ExecuteRequest {
    /// runner uses its built-in forensic-review framing.
    #[serde(default)]
    pub user_question: Option<String>,
-    /// Override the matrix corpus the runner queries. Defaults to the
-    /// task_class's matrix_corpus from modes.toml. Use for the corpus-
-    /// tightening experiment (Pass 2 of the 2026-04-26 mode sweep).
-    #[serde(default)]
-    pub force_matrix_corpus: Option<String>,
+    /// Override the matrix corpus (or corpora) the runner queries.
+    /// Accepts a single string or array — same semantics as
+    /// modes.toml's `matrix_corpus`. Empty/missing → use the task
+    /// class default. Multi-corpus path: top-k retrieved from each,
+    /// merged and re-sorted by score before the relevance filter.
+    #[serde(default, deserialize_with = "deserialize_string_or_vec")]
+    pub force_matrix_corpus: Vec<String>,
    /// Override the relevance filter threshold (default 0.3). Setting
    /// to 0 keeps every chunk; raising rejects more aggressively. Used
    /// to find the threshold sweet spot per task class.
@ -441,8 +466,13 @@ pub struct EnrichmentSources {
    pub bug_fingerprints_count: usize,
    pub matrix_chunks_kept: usize,
    pub matrix_chunks_dropped: usize,
-    pub matrix_corpus: Option<String>,
+    pub matrix_corpus: Vec<String>,
    pub relevance_filter_used: bool,
+    /// Set when the model-aware downgrade fires — records the mode the
+    /// caller was originally routed to before is_weak_model() flipped
+    /// it. None means no downgrade happened.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub downgraded_from: Option<String>,
    pub enrichment_warnings: Vec<String>,
    /// Which enrichment knobs the runner used for this mode. Lets
    /// the comparison aggregator group runs by signal-set.
@ -488,6 +518,30 @@ fn framing_text(f: ReviewerFraming) -> &'static str {
    }
 }

+/// Strong-model heuristic for the model-aware enrichment downgrade.
+///
+/// Pass 5 variance test (2026-04-26, see docs/MODE_RUNNER_TUNING_PLAN.md)
+/// proved that on `x-ai/grok-4.1-fast`, composing matrix corpora into the
+/// `codereview_lakehouse` prompt LOST 5/5 head-to-head reps against the
+/// matrix-free `codereview_isolation` mode. Strong models have enough
+/// native capacity that bug fingerprints + adversarial framing + file
+/// content carry them; matrix chunks displace depth-of-analysis.
+///
+/// We default to "strong" (downgrade matrix off) because most production
+/// traffic uses paid models. The explicit `weak` predicate keeps the
+/// list small and easy to extend — anything matching `:free` (OpenRouter
+/// free tier) or the local last-resort qwen3.5 stays on the full
+/// `codereview_lakehouse` path where matrix demonstrably helped during
+/// the 2026-04-26 free-tier bake-off.
+fn is_weak_model(model: &str) -> bool {
+    if model.ends_with(":free") || model.contains(":free/") {
+        return true;
+    }
+    // Local last-resort rung from the scrum ladder. Other local models
+    // can be added here as we test them.
+    matches!(model, "qwen3.5:latest" | "qwen3:latest")
+}
+
 pub async fn execute(
    State(_state): State<V1State>,
    Json(req): Json<ExecuteRequest>,
@ -507,7 +561,40 @@ pub async fn execute(
        .clone()
        .or_else(|| tc.map(|t| t.default_model.clone()))
        .unwrap_or_else(|| cfg.default.default_model.clone());
-    let matrix_corpus = tc.and_then(|t| t.matrix_corpus.clone());
+    let matrix_corpus: Vec<String> = tc
+        .map(|t| t.matrix_corpus.clone())
+        .unwrap_or_default();
+
+    // Model-aware enrichment downgrade (2026-04-26 pass 5 finding).
+    // If a caller resolves `codereview_lakehouse` against a strong
+    // model, downgrade to `codereview_isolation` so we don't pollute
+    // the prompt with matrix chunks the model would do better without.
+    // `LH_FORCE_FULL_ENRICHMENT=1` bypasses for diagnostic runs.
+    // `force_mode` from the caller is treated as opt-in to the chosen
+    // mode and skips the downgrade — experiments need to inspect exact
+    // mode behavior on whatever model they pass.
+    let force_full = std::env::var("LH_FORCE_FULL_ENRICHMENT")
+        .map(|v| v == "1" || v.eq_ignore_ascii_case("true"))
+        .unwrap_or(false);
+    let downgraded_from = if mode == "codereview_lakehouse"
+        && req.force_mode.is_none()
+        && !force_full
+        && !is_weak_model(&model)
+    {
+        tracing::info!(
+            target: "v1::mode",
+            "downgrade codereview_lakehouse -> codereview_isolation for strong model {}",
+            model
+        );
+        Some(mode.clone())
+    } else {
+        None
+    };
+    let mode = if downgraded_from.is_some() {
+        "codereview_isolation".to_string()
+    } else {
+        mode
+    };

    if !is_native_mode(&mode) {
        // Native execute is the only path implemented; LLM-Team proxy
@ -525,12 +612,17 @@ pub async fn execute(
    }

    // Caller can override the matrix corpus per-call (Pass 2 corpus
-    // tightening). Falls back to modes.toml default.
-    let matrix_corpus = req.force_matrix_corpus.clone().or(matrix_corpus);
+    // tightening). Empty force_matrix_corpus falls back to modes.toml.
+    let matrix_corpus: Vec<String> = if req.force_matrix_corpus.is_empty() {
+        matrix_corpus
+    } else {
+        req.force_matrix_corpus.clone()
+    };
    let flags = flags_for_mode(&mode);
    let mut sources = EnrichmentSources {
        matrix_corpus: matrix_corpus.clone(),
        flags: Some(flags),
+        downgraded_from: downgraded_from.clone(),
        ..Default::default()
    };

@ -613,38 +705,64 @@ pub async fn execute(
        }
    }

-    // Step 3: matrix corpus search (if configured for this task class).
+    // Step 3: matrix corpus search. Multi-corpus path: query top_k from
+    // each, merge, re-sort by score, take top 8 overall before the
+    // relevance filter — orthogonal corpora (e.g. arch + symbols) get
+    // composed without one swamping the other on chunk count alone.
    let mut raw_chunks: Vec<serde_json::Value> = vec![];
-    if flags.include_matrix_chunks {
-        if let Some(corpus) = &matrix_corpus {
-        let body = serde_json::json!({
-            "index_name": corpus,
-            "query": format!("{} {}\n{}", req.task_class, req.file_path, &file_content[..file_content.len().min(500)]),
-            "top_k": 8,
-        });
-        match client
-            .post("http://localhost:3100/vectors/search")
-            .json(&body)
-            .send()
-            .await
-        {
-            Ok(r) if r.status().is_success() => {
-                if let Ok(j) = r.json::<serde_json::Value>().await {
-                    raw_chunks = j
-                        .get("results")
-                        .and_then(|v| v.as_array())
-                        .cloned()
-                        .unwrap_or_default();
+    if flags.include_matrix_chunks && !matrix_corpus.is_empty() {
+        let query_str = format!(
+            "{} {}\n{}",
+            req.task_class,
+            req.file_path,
+            &file_content[..file_content.len().min(500)]
+        );
+        let per_corpus_k = if matrix_corpus.len() == 1 { 8 } else { 6 };
+        for corpus in &matrix_corpus {
+            let body = serde_json::json!({
+                "index_name": corpus,
+                "query": query_str,
+                "top_k": per_corpus_k,
+            });
+            match client
+                .post("http://localhost:3100/vectors/search")
+                .json(&body)
+                .send()
+                .await
+            {
+                Ok(r) if r.status().is_success() => {
+                    if let Ok(j) = r.json::<serde_json::Value>().await {
+                        if let Some(arr) = j.get("results").and_then(|v| v.as_array()) {
+                            for mut c in arr.iter().cloned() {
+                                // Tag the corpus origin on each chunk so
+                                // dropped/kept telemetry can attribute
+                                // signal back to its source corpus.
+                                if let serde_json::Value::Object(ref mut obj) = c {
+                                    obj.insert(
+                                        "corpus".to_string(),
+                                        serde_json::Value::String(corpus.clone()),
+                                    );
+                                }
+                                raw_chunks.push(c);
+                            }
+                        }
+                    }
                }
+                Ok(r) => sources
+                    .enrichment_warnings
+                    .push(format!("matrix_search[{}] HTTP {}", corpus, r.status())),
+                Err(e) => sources
+                    .enrichment_warnings
+                    .push(format!("matrix_search[{}] err: {e}", corpus)),
            }
-            Ok(r) => sources
-                .enrichment_warnings
-                .push(format!("matrix_search HTTP {}", r.status())),
-            Err(e) => sources
-                .enrichment_warnings
-                .push(format!("matrix_search err: {e}")),
        }
-        }  // close `if let Some(corpus)`
+        // Sort merged chunks by score desc and take the global top 8.
+        raw_chunks.sort_by(|a, b| {
+            let sa = a.get("score").and_then(|v| v.as_f64()).unwrap_or(0.0);
+            let sb = b.get("score").and_then(|v| v.as_f64()).unwrap_or(0.0);
+            sb.partial_cmp(&sa).unwrap_or(std::cmp::Ordering::Equal)
+        });
+        raw_chunks.truncate(8);
    }

    // Step 4: relevance filter — drop adjacency pollution.
@ -709,9 +827,16 @@ pub async fn execute(
    if flags.include_matrix_chunks && !kept_chunks.is_empty() {
        user_prompt.push_str("📁 RELATED CONTEXT (matrix chunks):\n");
        for c in &kept_chunks {
-            let src = c.get("source").and_then(|v| v.as_str()).unwrap_or("?");
-            let txt = c.get("text").and_then(|v| v.as_str()).unwrap_or("");
-            user_prompt.push_str(&format!("  [{}] {}\n", src, &txt[..txt.len().min(280)]));
+            // Prefer doc_id for the tag — corpus builders encode origin
+            // in doc_id (e.g. `adr:017`, `phase:19`) so the reviewer sees
+            // useful provenance instead of a generic source label.
+            let tag = c.get("doc_id").and_then(|v| v.as_str())
+                .filter(|s| !s.is_empty())
+                .or_else(|| c.get("source").and_then(|v| v.as_str()))
+                .unwrap_or("?");
+            let txt = c.get("text").or_else(|| c.get("chunk_text"))
+                .and_then(|v| v.as_str()).unwrap_or("");
+            user_prompt.push_str(&format!("  [{}] {}\n", tag, &txt[..txt.len().min(280)]));
        }
        user_prompt.push_str("\n");
    }
--- a/docs/MODE_RUNNER_TUNING_PLAN.md
+++ b/docs/MODE_RUNNER_TUNING_PLAN.md
@ -0,0 +1,114 @@
+# Mode Runner Tuning Plan
+
+**Date:** 2026-04-26
+**Branch:** `scrum/auto-apply-19814` (PR #11)
+**Status:** Pass 5 variance test complete; conclusions locked. Implementation in progress.
+
+A fresh Claude session reading this + the pass5 row range in `data/_kb/mode_experiments.jsonl` should be able to continue the work without re-running anything.
+
+---
+
+## What we set out to do
+
+J's directive 2026-04-26 evening: "Mode runner experiment + corpus tightening."
+
+Symptom in memory before the session: scrum_review's matrix corpus was kept-rate 0/2 across every call — silent failure. Question: should we tighten the corpus, build new ones, or change retrieval?
+
+## What we built
+
+Three new corpora indexed under `/vectors/index`:
+
+| Corpus | Builder | Docs | Chunks | Source |
+|---|---|---|---|---|
+| `lakehouse_arch_v1` | `scripts/build_lakehouse_corpus.ts` | 93 | 2119 | DECISIONS.md ADRs + standalone ADRs + PHASES.md + PRD.md + CONTROL_PLANE_PRD.md + SCRUM_MASTER_SPEC.md |
+| `scrum_findings_v1` | `scripts/build_scrum_findings_corpus.ts` | 168 | 1260 | Past `scrum_reviews.jsonl` rows |
+| `lakehouse_symbols_v1` | `scripts/build_symbols_corpus.ts` | 656 | 2470 | Regex-extracted `pub fn|struct|enum|trait` + `///` docs from `crates/**/*.rs` |
+
+Multi-corpus support added to the mode runner:
+- `crates/gateway/src/v1/mode.rs` — `matrix_corpus` is now `Vec<String>` (string OR array in modes.toml/JSON via `deserialize_string_or_vec`)
+- Top-K retrieved from each corpus, merged by score, top 8 globally before relevance filter
+- Each chunk tagged with `corpus` for telemetry
+- Prompt assembly prefers `doc_id` over `source` so reviewer sees `[adr:009]` not `[lakehouse_arch]`
+
+Validation infra:
+- `scripts/mode_pass5_variance_paid.ts` — N reps × M conditions on one file, paid model
+- `scripts/mode_pass5_summarize.ts` — mean ± stddev + head-to-head wins/losses with parser handling 3 finding-table shapes (numbered, path-with-line, path-with-symbol)
+- `scripts/mode_compare.ts` — extended grouping key to `mode|corpus` (sorted+joined when multiple corpora) so multi-corpus sweeps don't last-write-wins-clobber
+
+## What we learned
+
+### Single-rep bake-off (free-tier `openai/gpt-oss-120b:free`, 3 files)
+
+Confirmed `lakehouse_arch_v1` adds +1.7 grounded findings/file vs isolation, 100% groundedness, −20s latency. **But:** matrix slightly *hurts* on small files (273-line `delta.rs`: lakehouse 7 vs isolation 9) and unlocks +9 findings on the large file (1355-line `pathway_memory.rs`).
+
+`scrum_findings_v1` produced 24% out-of-bounds line citations from cross-file line-number drift — **dangerous, excluded from defaults**. Only safe with same-file gating (TBD if needed).
+
+### Single-rep bake-off (paid `x-ai/grok-4.1-fast`, 3 files × 4 conditions)
+
+Picture *flips* on a strong model. Composed corpus −1.4 grounded vs isolation. Symbols-alone slightly negative. Arch-alone negative. Suggested kitchen-sinking enrichment denigrates results when the model is good enough to handle the file directly.
+
+### Pass 5 variance test (paid grok-4.1-fast, 5 reps × 4 conditions on `pathway_memory.rs`)
+
+| Condition | n | mean grounded ± σ | range | H2H vs isolation | Δ mean |
+|---|---|---|---|---|---|
+| **isolation** | 5 | 6.2 ± 1.3 | [5–8] | baseline | — |
+| arch_only | 5 | 5.2 ± 0.8 | [4–6] | 0W–3L–2T | −1.0 |
+| symbols_only | 5 | 6.4 ± 1.5 | [4–8] | 3W–2L–0T | +0.2 |
+| **composed (A+C)** | 5 | 4.4 ± 1.1 | [3–6] | **0W–5L–0T** | **−1.8** |
+
+**Composed loses 5/5 head-to-head against isolation on this file with this model.** Probability under random noise = 1/2⁵ = 3.1%. Statistically significant.
+
+Data window: rows in `data/_kb/mode_experiments.jsonl` where `ts > "2026-04-26T21:50:03Z"` and `file_path == "crates/vectord/src/pathway_memory.rs"`. Re-aggregate any time with `bun run scripts/mode_pass5_summarize.ts --since 2026-04-26T21:50:03Z`.
+
+## Decisions taken
+
+1. **Composed-corpus default is reverted.** `scrum_review.preferred_mode` switches from `codereview_lakehouse` → `codereview_isolation`. Matrix corpora stay defined in modes.toml but only fire when a caller explicitly forces `codereview_lakehouse` or one of the matrix-only experimental modes.
+
+2. **Model-aware enrichment downgrade (α) is wired** in `crates/gateway/src/v1/mode.rs::execute`. When a caller resolves a "strong" model AND the resolved mode is `codereview_lakehouse`, the runner downgrades to `codereview_isolation` flag-set automatically. Strong patterns: `x-ai/grok-*`, `anthropic/*`, `openai/gpt-4*`, `openai/gpt-5*`, `deepseek/deepseek-v4*`, `moonshotai/kimi-k2*`, `google/gemini-2.5*`. Override via `LH_FORCE_FULL_ENRICHMENT=1` for diagnostic runs.
+
+3. **`scrum_findings_v1` stays excluded from defaults** until same-file gating lands. Built and indexed; do not point any task class at it without that gate.
+
+## Open follow-ups (not landed in this batch)
+
+- **Same-file gating for `scrum_findings_v1`** — restrict retrieval to chunks where `file_path == focus_file` so cross-file line-number drift can't happen. Then it becomes a per-file "what was found before" signal.
+- **Variance test on small files** — pass 5 was 1 file (the largest, where matrix-hurt was sharpest). Confirm direction holds on 273-line / 333-line files. ~15 min × 2 files = ~30 min.
+- **Verify weak-model gain holds with α** — the bake-off showed matrix helps free-tier `gpt-oss-120b:free` on the large file. After α is wired, re-run on a free-tier model to confirm full enrichment still fires for it. ~5 min.
+- **Higher-signal matrix (β fork)** — if we ever want matrix back as a default, it can't be whole-ADR/whole-section chunks. Better: only retrieve chunks where the focus file's defined symbols appear. Tighter signal, fewer chunks. Postponed.
+
+## Reference data + tools
+
+- **Mode-runner code:** `crates/gateway/src/v1/mode.rs`
+- **Mode config:** `config/modes.toml`
+- **Per-call experiment log:** `data/_kb/mode_experiments.jsonl`
+- **Sweep harnesses:**
+  - `scripts/mode_experiment.ts` — files × modes × 1 rep (default model: `x-ai/grok-4.1-fast`)
+  - `scripts/mode_pass2_corpus_sweep.ts` — corpus × threshold sweep
+  - `scripts/mode_pass3_variance.ts` — temp × reps on one mode
+  - `scripts/mode_pass5_variance_paid.ts` — N reps × M conditions on one file
+- **Aggregators:**
+  - `scripts/mode_compare.ts` — full per-mode comparison with grounding check
+  - `scripts/mode_pass5_summarize.ts` — variance + head-to-head, robust to 3 table shapes
+- **Corpus builders (re-runnable when source docs / scrum_reviews / source code change):**
+  - `scripts/build_lakehouse_corpus.ts`
+  - `scripts/build_scrum_findings_corpus.ts`
+  - `scripts/build_symbols_corpus.ts`
+
+## Re-entry recipe (fresh session)
+
+```bash
+cd /home/profit/lakehouse
+git log --oneline scrum/auto-apply-19814 -10               # what's recent
+cat docs/MODE_RUNNER_TUNING_PLAN.md                        # this file
+bun run scripts/mode_pass5_summarize.ts --since 2026-04-26T21:50:03Z  # locked result
+curl -s http://localhost:3100/v1/mode/list | jq '.task_classes.scrum_review'  # current config
+```
+
+If you want to reproduce the bake-off:
+
+```bash
+# Strong model variance test (~17 min):
+bun run scripts/mode_pass5_variance_paid.ts
+
+# Weak-model regression (~10 min):
+LH_MODEL=openai/gpt-oss-120b:free LH_REPS=3 bun run scripts/mode_pass5_variance_paid.ts
+```
--- a/scripts/build_lakehouse_corpus.ts
+++ b/scripts/build_lakehouse_corpus.ts
@ -0,0 +1,176 @@
+#!/usr/bin/env bun
+/**
+ * Build the `lakehouse_arch_v1` corpus — Option A from 2026-04-26
+ * corpus-tightening pass. Sources: DECISIONS.md ADRs, standalone
+ * ADR-NNN-*.md docs, PHASES.md per-phase entries, PRD.md,
+ * CONTROL_PLANE_PRD.md, SCRUM_MASTER_SPEC.md sections.
+ *
+ * doc_id encodes origin (adr:017, phase:19, prd:executive_summary, ...)
+ * so the reviewer prompt's [tag] surfaces useful context.
+ *
+ * Usage:
+ *   bun run scripts/build_lakehouse_corpus.ts            # build
+ *   bun run scripts/build_lakehouse_corpus.ts --dry-run  # show docs, don't POST
+ *   bun run scripts/build_lakehouse_corpus.ts --print    # dump first chunk + count
+ */
+
+import { readFileSync, readdirSync } from "node:fs";
+import { resolve } from "node:path";
+
+const ROOT = resolve(import.meta.dir, "..");
+const GATEWAY = process.env.LH_GATEWAY ?? "http://localhost:3100";
+const INDEX_NAME = process.env.LH_CORPUS_NAME ?? "lakehouse_arch_v1";
+const SOURCE_LABEL = "lakehouse_arch";
+const CHUNK_SIZE = Number(process.env.LH_CHUNK_SIZE ?? 1500);
+const OVERLAP = Number(process.env.LH_OVERLAP ?? 150);
+
+interface Doc { id: string; text: string }
+
+function slug(s: string): string {
+  return s
+    .toLowerCase()
+    .replace(/[^a-z0-9]+/g, "_")
+    .replace(/^_+|_+$/g, "")
+    .slice(0, 60);
+}
+
+// Split DECISIONS.md by `## ADR-NNN: title`. Drop date line so it doesn't
+// dilute the embedding (ADRs are about intent, not when they happened).
+function chunkDecisionsMd(md: string): Doc[] {
+  const docs: Doc[] = [];
+  const sections = md.split(/^## ADR-(\d+):\s*(.+)$/m);
+  // sections = [preamble, num, title, body, num, title, body, ...]
+  for (let i = 1; i < sections.length; i += 3) {
+    const num = sections[i].padStart(3, "0");
+    const title = sections[i + 1].trim();
+    const body = sections[i + 2]
+      .replace(/^\*\*Date:\*\*.*$/m, "")
+      .trim();
+    docs.push({
+      id: `adr:${num}`,
+      text: `# ADR-${num}: ${title}\n\n${body}`,
+    });
+  }
+  return docs;
+}
+
+// Standalone ADR-NNN-*.md files in docs/ — keep one doc per file.
+function chunkStandaloneAdrs(dir: string): Doc[] {
+  const docs: Doc[] = [];
+  for (const f of readdirSync(dir)) {
+    const m = f.match(/^ADR-(\d+)-(.+)\.md$/);
+    if (!m) continue;
+    const num = m[1].padStart(3, "0");
+    const slug_ = slug(m[2]);
+    docs.push({
+      id: `adr_doc:${num}_${slug_}`,
+      text: readFileSync(resolve(dir, f), "utf8"),
+    });
+  }
+  return docs;
+}
+
+// PHASES.md uses `## Phase N: title` headings + nested checklists. Split
+// by phase. Sub-bullets stay with their parent phase so context is intact.
+function chunkPhasesMd(md: string): Doc[] {
+  const docs: Doc[] = [];
+  const sections = md.split(/^## (Phase[^\n]*)$/m);
+  for (let i = 1; i < sections.length; i += 2) {
+    const heading = sections[i].trim();
+    const body = sections[i + 1].trim();
+    if (!body) continue;
+    const phase_num_match = heading.match(/Phase\s+(\S+)/);
+    const id_part = phase_num_match
+      ? `phase:${slug(phase_num_match[1])}`
+      : `phase:${slug(heading)}`;
+    docs.push({ id: id_part, text: `## ${heading}\n${body}` });
+  }
+  return docs;
+}
+
+// Generic doc: split by `## Section` (top-level inside a single doc). If
+// the section list is empty, return the whole file as one doc and let the
+// server-side chunker handle it.
+function chunkBySectionH2(filePath: string, originPrefix: string): Doc[] {
+  const md = readFileSync(filePath, "utf8");
+  const sections = md.split(/^## (.+)$/m);
+  if (sections.length < 3) {
+    return [{ id: `${originPrefix}:_full`, text: md }];
+  }
+  const docs: Doc[] = [];
+  // Capture preamble (before any ## heading) if non-trivial
+  if (sections[0].trim().length > 200) {
+    docs.push({
+      id: `${originPrefix}:_preamble`,
+      text: sections[0].trim(),
+    });
+  }
+  for (let i = 1; i < sections.length; i += 2) {
+    const heading = sections[i].trim();
+    const body = sections[i + 1].trim();
+    if (!body) continue;
+    docs.push({
+      id: `${originPrefix}:${slug(heading)}`,
+      text: `## ${heading}\n${body}`,
+    });
+  }
+  return docs;
+}
+
+function buildAllDocs(): Doc[] {
+  const docs: Doc[] = [];
+  docs.push(...chunkDecisionsMd(readFileSync(resolve(ROOT, "docs/DECISIONS.md"), "utf8")));
+  docs.push(...chunkStandaloneAdrs(resolve(ROOT, "docs")));
+  docs.push(...chunkPhasesMd(readFileSync(resolve(ROOT, "docs/PHASES.md"), "utf8")));
+  docs.push(...chunkBySectionH2(resolve(ROOT, "docs/PRD.md"), "prd"));
+  docs.push(...chunkBySectionH2(resolve(ROOT, "docs/CONTROL_PLANE_PRD.md"), "ctrl_prd"));
+  docs.push(...chunkBySectionH2(resolve(ROOT, "docs/SCRUM_MASTER_SPEC.md"), "scrum_spec"));
+  return docs;
+}
+
+async function main() {
+  const dryRun = process.argv.includes("--dry-run") || process.argv.includes("--print");
+  const printOnly = process.argv.includes("--print");
+
+  const docs = buildAllDocs();
+  const totalBytes = docs.reduce((s, d) => s + d.text.length, 0);
+  const expectedChunks = Math.ceil(totalBytes / (CHUNK_SIZE - OVERLAP));
+
+  console.log(`[corpus] ${docs.length} documents · ${totalBytes} bytes · ~${expectedChunks} chunks at ${CHUNK_SIZE}/${OVERLAP}`);
+  console.log(`[corpus] origins: ${[...new Set(docs.map(d => d.id.split(":")[0]))].join(", ")}`);
+
+  if (printOnly) {
+    console.log("\n[corpus] first 3 doc IDs:");
+    docs.slice(0, 3).forEach(d => console.log(`  ${d.id} (${d.text.length} bytes) ${d.text.slice(0, 80).replace(/\n/g, " ")}…`));
+    console.log("\n[corpus] last 3 doc IDs:");
+    docs.slice(-3).forEach(d => console.log(`  ${d.id} (${d.text.length} bytes) ${d.text.slice(0, 80).replace(/\n/g, " ")}…`));
+    return;
+  }
+  if (dryRun) return;
+
+  const body = {
+    index_name: INDEX_NAME,
+    source: SOURCE_LABEL,
+    documents: docs,
+    chunk_size: CHUNK_SIZE,
+    overlap: OVERLAP,
+  };
+
+  console.log(`[corpus] POST ${GATEWAY}/vectors/index → ${INDEX_NAME}`);
+  const r = await fetch(`${GATEWAY}/vectors/index`, {
+    method: "POST",
+    headers: { "content-type": "application/json" },
+    body: JSON.stringify(body),
+    signal: AbortSignal.timeout(60_000),
+  });
+  if (!r.ok) {
+    console.error(`[corpus] HTTP ${r.status}: ${await r.text()}`);
+    process.exit(1);
+  }
+  const j: any = await r.json();
+  console.log(`[corpus] job ${j.job_id} · ${j.documents} docs → ${j.chunks} chunks queued`);
+  console.log(`[corpus] poll: curl -s ${GATEWAY}/vectors/jobs/${j.job_id} | jq`);
+  console.log(`[corpus] verify: curl -s '${GATEWAY}/vectors/indexes' | jq '.[]|select(.index_name=="${INDEX_NAME}")'`);
+}
+
+main().catch(e => { console.error(e); process.exit(1); });
--- a/scripts/build_scrum_findings_corpus.ts
+++ b/scripts/build_scrum_findings_corpus.ts
@ -0,0 +1,94 @@
+#!/usr/bin/env bun
+/**
+ * Build the `scrum_findings_v1` corpus — Option B from 2026-04-26
+ * corpus pass. Self-feeding: each accepted scrum review's
+ * `suggestions_preview` becomes a document, indexed under doc_id
+ * `review:<file_slug>:<ts_compact>` so multi-iteration coexists.
+ *
+ * Re-run this whenever scrum_reviews.jsonl grows; the index_name stays
+ * stable and the gateway will re-register metadata.
+ */
+
+import { readFileSync } from "node:fs";
+import { resolve } from "node:path";
+
+const ROOT = resolve(import.meta.dir, "..");
+const GATEWAY = process.env.LH_GATEWAY ?? "http://localhost:3100";
+const INDEX_NAME = process.env.LH_CORPUS_NAME ?? "scrum_findings_v1";
+const SOURCE_LABEL = "scrum_findings";
+const CHUNK_SIZE = Number(process.env.LH_CHUNK_SIZE ?? 1500);
+const OVERLAP = Number(process.env.LH_OVERLAP ?? 150);
+const MIN_PREVIEW_BYTES = 200;  // skip stub rows
+
+interface Doc { id: string; text: string }
+
+function slugFile(path: string): string {
+  return path.replace(/^crates\//, "").replace(/[^a-z0-9]+/gi, "_").slice(0, 40);
+}
+
+function compactTs(iso: string): string {
+  return iso.replace(/[-:T]/g, "").slice(0, 14);  // 20260424T110656
+}
+
+function buildDocs(): Doc[] {
+  const lines = readFileSync(resolve(ROOT, "data/_kb/scrum_reviews.jsonl"), "utf8").split("\n").filter(Boolean);
+  const docs: Doc[] = [];
+  const idCounts = new Map<string, number>();
+
+  for (const line of lines) {
+    let row: any;
+    try { row = JSON.parse(line); } catch { continue; }
+
+    const file = row.file ?? "";
+    const preview = row.suggestions_preview ?? "";
+    if (!file || preview.length < MIN_PREVIEW_BYTES) continue;
+
+    const ts = compactTs(row.reviewed_at ?? "");
+    const baseId = `review:${slugFile(file)}:${ts || "no_ts"}`;
+    // Multiple reviews with same ts (rare but possible) get a counter.
+    const count = (idCounts.get(baseId) ?? 0) + 1;
+    idCounts.set(baseId, count);
+    const id = count === 1 ? baseId : `${baseId}_${count}`;
+
+    const header = `File: ${file}\nReviewed: ${row.reviewed_at ?? "?"}\nModel: ${row.accepted_model ?? "?"}\nVerdict: ${row.verdict ?? "?"}\nFindings: ${row.findings_count ?? "?"}\n\n`;
+    docs.push({ id, text: header + preview });
+  }
+  return docs;
+}
+
+async function main() {
+  const dryRun = process.argv.includes("--dry-run") || process.argv.includes("--print");
+  const printOnly = process.argv.includes("--print");
+
+  const docs = buildDocs();
+  const totalBytes = docs.reduce((s, d) => s + d.text.length, 0);
+  console.log(`[corpus-B] ${docs.length} reviews · ${totalBytes} bytes · target chunk_size=${CHUNK_SIZE}`);
+  console.log(`[corpus-B] file coverage: ${new Set(docs.map(d => d.id.split(":")[1])).size} unique files`);
+
+  if (printOnly) {
+    docs.slice(0, 3).forEach(d => console.log(`  ${d.id} (${d.text.length}b) ${d.text.slice(0, 80).replace(/\n/g, " ")}…`));
+    return;
+  }
+  if (dryRun) return;
+
+  const r = await fetch(`${GATEWAY}/vectors/index`, {
+    method: "POST",
+    headers: { "content-type": "application/json" },
+    body: JSON.stringify({
+      index_name: INDEX_NAME,
+      source: SOURCE_LABEL,
+      documents: docs,
+      chunk_size: CHUNK_SIZE,
+      overlap: OVERLAP,
+    }),
+    signal: AbortSignal.timeout(60_000),
+  });
+  if (!r.ok) {
+    console.error(`[corpus-B] HTTP ${r.status}: ${await r.text()}`);
+    process.exit(1);
+  }
+  const j: any = await r.json();
+  console.log(`[corpus-B] job ${j.job_id} · ${j.documents} docs → ${j.chunks} chunks queued`);
+}
+
+main().catch(e => { console.error(e); process.exit(1); });
--- a/scripts/build_symbols_corpus.ts
+++ b/scripts/build_symbols_corpus.ts
@ -0,0 +1,141 @@
+#!/usr/bin/env bun
+/**
+ * Build the `lakehouse_symbols_v1` corpus — Option C from 2026-04-26
+ * pass. Extracts public Rust items with their /// doc comments from
+ * crates/**\/*.rs. Regex-based — covers ~80% of definitions without
+ * pulling in a syn-based parser.
+ *
+ * doc_id: `symbol:<crate>::<kind>::<name>`  e.g. symbol:vectord::struct::PathwayTrace
+ *
+ * Each chunk includes: doc comment (if any) + signature + 1-2 lines
+ * after the brace so reviewer sees field types / variants for structs
+ * and enums.
+ */
+
+import { readFileSync, readdirSync, statSync } from "node:fs";
+import { resolve, relative } from "node:path";
+
+const ROOT = resolve(import.meta.dir, "..");
+const GATEWAY = process.env.LH_GATEWAY ?? "http://localhost:3100";
+const INDEX_NAME = process.env.LH_CORPUS_NAME ?? "lakehouse_symbols_v1";
+const SOURCE_LABEL = "lakehouse_symbols";
+const CHUNK_SIZE = Number(process.env.LH_CHUNK_SIZE ?? 800);
+const OVERLAP = Number(process.env.LH_OVERLAP ?? 80);
+
+interface Doc { id: string; text: string }
+
+function walkRs(dir: string): string[] {
+  const out: string[] = [];
+  for (const entry of readdirSync(dir)) {
+    if (entry === "target" || entry.startsWith(".")) continue;
+    const full = resolve(dir, entry);
+    const st = statSync(full);
+    if (st.isDirectory()) out.push(...walkRs(full));
+    else if (entry.endsWith(".rs")) out.push(full);
+  }
+  return out;
+}
+
+function crateOf(rsPath: string): string {
+  const rel = relative(resolve(ROOT, "crates"), rsPath);
+  return rel.split("/")[0];
+}
+
+// Match pub fn|struct|enum|trait declarations. Capture the (optional)
+// preceding contiguous /// doc block and a few lines after for signature
+// + body preview. Skips items inside `mod tests` blocks and #[cfg(test)].
+const ITEM_RE = /(?:^[ \t]*\/\/\/.*\n)*[ \t]*pub(?:\([^)]+\))?[ \t]+(fn|struct|enum|trait|async[ \t]+fn)[ \t]+([A-Za-z_][A-Za-z0-9_]*)/gm;
+
+function extractItems(src: string, crate: string, relPath: string): Doc[] {
+  const docs: Doc[] = [];
+  const seen = new Set<string>();
+
+  // Quick test-module guard: drop everything from a `mod tests {` line
+  // onward. Coarse but adequate — public items inside tests are rare.
+  const cutoff = src.search(/^(#\[cfg\(test\)\]|mod tests\b)/m);
+  const usable = cutoff > 0 ? src.slice(0, cutoff) : src;
+
+  for (const m of usable.matchAll(ITEM_RE)) {
+    const matchStart = m.index!;
+    const kind = m[1].replace(/^async[ \t]+/, "async_");
+    const name = m[2];
+
+    // Walk backward to capture the contiguous /// doc block above.
+    const lines = usable.slice(0, matchStart).split("\n");
+    const docLines: string[] = [];
+    for (let i = lines.length - 1; i >= 0; i--) {
+      const t = lines[i].trim();
+      if (t.startsWith("///")) docLines.unshift(t.replace(/^\/\/\/\s?/, ""));
+      else if (t === "" || t.startsWith("#[")) continue;
+      else break;
+    }
+
+    // Capture signature + ~6 lines of body preview.
+    const after = usable.slice(matchStart, matchStart + 800);
+    const bodyEnd = after.search(/\n\}\n|\n\n[a-z#]/);
+    const body = bodyEnd > 0 ? after.slice(0, Math.min(bodyEnd, 800)) : after.slice(0, 800);
+
+    const id = `symbol:${crate}::${kind}::${name}`;
+    if (seen.has(id)) continue;
+    seen.add(id);
+
+    const header = `${crate}::${name} (${kind}) — ${relPath}`;
+    const docText = docLines.length > 0 ? `\n${docLines.join("\n")}\n` : "\n";
+    docs.push({ id, text: `${header}\n${docText}\n\`\`\`rust\n${body}\n\`\`\`` });
+  }
+  return docs;
+}
+
+function buildDocs(): Doc[] {
+  const cratesDir = resolve(ROOT, "crates");
+  const docs: Doc[] = [];
+  for (const f of walkRs(cratesDir)) {
+    const src = readFileSync(f, "utf8");
+    const crate = crateOf(f);
+    const rel = relative(ROOT, f);
+    docs.push(...extractItems(src, crate, rel));
+  }
+  return docs;
+}
+
+async function main() {
+  const printOnly = process.argv.includes("--print");
+  const dryRun = process.argv.includes("--dry-run") || printOnly;
+
+  const docs = buildDocs();
+  const totalBytes = docs.reduce((s, d) => s + d.text.length, 0);
+  const byCrate = new Map<string, number>();
+  for (const d of docs) {
+    const c = d.id.split("::")[0].replace("symbol:", "");
+    byCrate.set(c, (byCrate.get(c) ?? 0) + 1);
+  }
+  console.log(`[corpus-C] ${docs.length} symbols · ${totalBytes} bytes · chunk_size=${CHUNK_SIZE}`);
+  console.log(`[corpus-C] by crate: ${[...byCrate.entries()].map(([k, v]) => `${k}=${v}`).join(", ")}`);
+
+  if (printOnly) {
+    docs.slice(0, 3).forEach(d => console.log(`  ${d.id} (${d.text.length}b)\n  ${d.text.slice(0, 200).replace(/\n/g, "\n  ")}\n`));
+    return;
+  }
+  if (dryRun) return;
+
+  const r = await fetch(`${GATEWAY}/vectors/index`, {
+    method: "POST",
+    headers: { "content-type": "application/json" },
+    body: JSON.stringify({
+      index_name: INDEX_NAME,
+      source: SOURCE_LABEL,
+      documents: docs,
+      chunk_size: CHUNK_SIZE,
+      overlap: OVERLAP,
+    }),
+    signal: AbortSignal.timeout(60_000),
+  });
+  if (!r.ok) {
+    console.error(`[corpus-C] HTTP ${r.status}: ${await r.text()}`);
+    process.exit(1);
+  }
+  const j: any = await r.json();
+  console.log(`[corpus-C] job ${j.job_id} · ${j.documents} docs → ${j.chunks} chunks queued`);
+}
+
+main().catch(e => { console.error(e); process.exit(1); });
--- a/scripts/mode_compare.ts
+++ b/scripts/mode_compare.ts
@ -227,13 +227,37 @@ function main() {
    process.exit(1);
  }

-  // Group by file → mode
+  // Group by file → mode (with corpus appended when matrix-bearing modes
+  // were swept across multiple corpora — otherwise lakehouse_arch_v1
+  // would clobber scrum_findings_v1 etc). matrix_corpus is now a Vec
+  // on the wire (multi-corpus support); legacy rows have either a
+  // string or null. Coerce to a stable key.
+  const matrixCorpus = (r: Row): string => {
+    const c = (r.sources as any)?.matrix_corpus;
+    if (!c) return "";
+    if (typeof c === "string") return c;
+    if (Array.isArray(c)) {
+      if (c.length === 0) return "";
+      if (c.length === 1) return c[0];
+      // Stable join: sort then "+"-separate so order doesn't matter.
+      return [...c].sort().join("+");
+    }
+    return "";
+  };
+  const corporaInPlay = new Set(rows.map(matrixCorpus).filter(c => c));
+  const showCorpus = corporaInPlay.size > 1;
+  const keyOf = (r: Row): string => {
+    const c = matrixCorpus(r);
+    return showCorpus && c ? `${r.mode}|${c}` : r.mode;
+  };
+
  const byFile: Record<string, Record<string, Row>> = {};
  const allModes = new Set<string>();
  for (const r of rows) {
    byFile[r.file_path] ??= {};
-    byFile[r.file_path][r.mode] = r; // last-write-wins per mode per file
-    allModes.add(r.mode);
+    const k = keyOf(r);
+    byFile[r.file_path][k] = r; // last-write-wins per (mode,corpus) per file
+    allModes.add(k);
  }
  const modesSorted = [...allModes].sort();

@ -246,13 +270,13 @@ function main() {
    const fileLines = fileContent ? fileContent.split("\n").length : 0;
    console.log(`   (file: ${fileLines} lines${fileContent === null ? ", NOT READABLE — grounding skipped" : ""})`);
    console.log(
-      `  ${pad("mode", 30)} ${pad("resp", 6, true)} ${pad("find", 5, true)} ${pad("ground", 9, true)} ${pad("hallu", 6, true)} ${pad("OOB", 4, true)} ${pad("path", 5, true)} ${pad("ms", 7, true)} ${pad("bug_fp", 6, true)}`
+      `  ${pad("mode", 56)} ${pad("resp", 6, true)} ${pad("find", 5, true)} ${pad("ground", 9, true)} ${pad("hallu", 6, true)} ${pad("OOB", 4, true)} ${pad("path", 5, true)} ${pad("ms", 7, true)} ${pad("bug_fp", 6, true)}`
    );
-    console.log(`  ${"─".repeat(30)} ${"─".repeat(6)} ${"─".repeat(5)} ${"─".repeat(9)} ${"─".repeat(6)} ${"─".repeat(4)} ${"─".repeat(5)} ${"─".repeat(7)} ${"─".repeat(6)}`);
+    console.log(`  ${"─".repeat(56)} ${"─".repeat(6)} ${"─".repeat(5)} ${"─".repeat(9)} ${"─".repeat(6)} ${"─".repeat(4)} ${"─".repeat(5)} ${"─".repeat(7)} ${"─".repeat(6)}`);
    for (const mode of modesSorted) {
      const r = byFile[file][mode];
      if (!r) {
-        console.log(`  ${pad(modeLabel(mode), 30)} ${pad("—", 6, true)}`);
+        console.log(`  ${pad(modeLabel(mode), 56)} ${pad("—", 6, true)}`);
        continue;
      }
      const findings = countFindings(r.response);
@ -261,7 +285,7 @@ function main() {
      const grounding = checkGrounding(r.response, fileContent);
      const groundedStr = grounding.total === 0 ? "—" : `${grounding.grounded}/${grounding.total}`;
      console.log(
-        `  ${pad(modeLabel(mode), 30)} ${pad(r.response_chars, 6, true)} ${pad(findings, 5, true)} ${pad(groundedStr, 9, true)} ${pad(grounding.hallucinated, 6, true)} ${pad(grounding.out_of_bounds_lines, 4, true)} ${pad(cits, 5, true)} ${pad(r.latency_ms, 7, true)} ${pad(bf, 6, true)}`
+        `  ${pad(modeLabel(mode), 56)} ${pad(r.response_chars, 6, true)} ${pad(findings, 5, true)} ${pad(groundedStr, 9, true)} ${pad(grounding.hallucinated, 6, true)} ${pad(grounding.out_of_bounds_lines, 4, true)} ${pad(cits, 5, true)} ${pad(r.latency_ms, 7, true)} ${pad(bf, 6, true)}`
      );
    }
    console.log("");
@ -271,11 +295,11 @@ function main() {
  // avg_groundedness is the rate at which findings cite real symbols
  // within file bounds. Modes with low groundedness are confabulating.
  console.log("═══ PER-MODE AGGREGATE ═══\n");
-  console.log(`  ${pad("mode", 30)} ${pad("n", 3, true)} ${pad("avg find", 9, true)} ${pad("avg grnd", 9, true)} ${pad("grnd %", 7, true)} ${pad("avg hallu", 10, true)} ${pad("avg ms", 7, true)}`);
-  console.log(`  ${"─".repeat(30)} ${"─".repeat(3)} ${"─".repeat(9)} ${"─".repeat(9)} ${"─".repeat(7)} ${"─".repeat(10)} ${"─".repeat(7)}`);
+  console.log(`  ${pad("mode", 56)} ${pad("n", 3, true)} ${pad("avg find", 9, true)} ${pad("avg grnd", 9, true)} ${pad("grnd %", 7, true)} ${pad("avg hallu", 10, true)} ${pad("avg ms", 7, true)}`);
+  console.log(`  ${"─".repeat(56)} ${"─".repeat(3)} ${"─".repeat(9)} ${"─".repeat(9)} ${"─".repeat(7)} ${"─".repeat(10)} ${"─".repeat(7)}`);
  const fileCache: Record<string, string | null> = {};
  for (const mode of modesSorted) {
-    const modeRows = rows.filter(r => r.mode === mode);
+    const modeRows = rows.filter(r => keyOf(r) === mode);
    if (modeRows.length === 0) continue;
    const n = modeRows.length;
    let totFind = 0, totGround = 0, totHallu = 0;
@ -292,7 +316,7 @@ function main() {
    const avgHallu = (totHallu / n).toFixed(1);
    const avgMs = Math.round(modeRows.reduce((s, r) => s + r.latency_ms, 0) / n);
    console.log(
-      `  ${pad(modeLabel(mode), 30)} ${pad(n, 3, true)} ${pad(avgFind, 9, true)} ${pad(avgGround, 9, true)} ${pad(grndPct, 7, true)} ${pad(avgHallu, 10, true)} ${pad(avgMs, 7, true)}`
+      `  ${pad(modeLabel(mode), 56)} ${pad(n, 3, true)} ${pad(avgFind, 9, true)} ${pad(avgGround, 9, true)} ${pad(grndPct, 7, true)} ${pad(avgHallu, 10, true)} ${pad(avgMs, 7, true)}`
    );
  }

@ -301,13 +325,20 @@ function main() {
  // wins). Comparing grounded findings instead corrects for modes
  // that produce convincing-but-fake output.
  console.log("\n═══ MODE vs codereview_lakehouse (grounded findings, per file) ═══\n");
-  console.log(`  ${pad("mode", 30)} ${pad("wins", 5, true)} ${pad("losses", 7, true)} ${pad("ties", 5, true)} ${pad("Δ avg grounded", 16, true)}`);
-  console.log(`  ${"─".repeat(30)} ${"─".repeat(5)} ${"─".repeat(7)} ${"─".repeat(5)} ${"─".repeat(16)}`);
+  console.log(`  ${pad("mode", 56)} ${pad("wins", 5, true)} ${pad("losses", 7, true)} ${pad("ties", 5, true)} ${pad("Δ avg grounded", 16, true)}`);
+  console.log(`  ${"─".repeat(56)} ${"─".repeat(5)} ${"─".repeat(7)} ${"─".repeat(5)} ${"─".repeat(16)}`);
+  // Pick whichever codereview_lakehouse key shows up most often as the
+  // baseline (handles corpus-suffixed keys when showCorpus=true).
+  const baselineKey = modesSorted
+    .filter(k => k.startsWith("codereview_lakehouse"))
+    .sort((a, b) =>
+      Object.values(byFile).filter(f => f[b]).length -
+      Object.values(byFile).filter(f => f[a]).length)[0] ?? "codereview_lakehouse";
  for (const mode of modesSorted) {
-    if (mode === "codereview_lakehouse") continue;
+    if (mode === baselineKey) continue;
    let wins = 0, losses = 0, ties = 0, totalDelta = 0, n = 0;
    for (const file of Object.keys(byFile)) {
-      const baseline = byFile[file]["codereview_lakehouse"];
+      const baseline = byFile[file][baselineKey];
      const challenger = byFile[file][mode];
      if (!baseline || !challenger) continue;
      const fc = fileCache[file] ??= readFileSafe(file);
@ -322,7 +353,7 @@ function main() {
    if (n === 0) continue;
    const avgDelta = (totalDelta / n).toFixed(1);
    console.log(
-      `  ${pad(modeLabel(mode), 30)} ${pad(wins, 5, true)} ${pad(losses, 7, true)} ${pad(ties, 5, true)} ${pad(avgDelta, 16, true)}`
+      `  ${pad(modeLabel(mode), 56)} ${pad(wins, 5, true)} ${pad(losses, 7, true)} ${pad(ties, 5, true)} ${pad(avgDelta, 16, true)}`
    );
  }
  console.log("\n[compare] done — ⚗ marks lossy/control modes, exclude from recommendations\n");
--- a/scripts/mode_experiment.ts
+++ b/scripts/mode_experiment.ts
@ -32,7 +32,7 @@ const DEFAULT_FILES = [
  "crates/queryd/src/service.rs",
 ];

-function parseArgs(): { files: string[]; modes: string[]; model: string } {
+function parseArgs(): { files: string[]; modes: string[]; model: string; corpus: string[] } {
  const args = Bun.argv.slice(2);
  const out: Record<string, string> = {};
  for (let i = 0; i < args.length; i++) {
@ -41,8 +41,12 @@ function parseArgs(): { files: string[]; modes: string[]; model: string } {
  }
  const files = (out.files ?? DEFAULT_FILES.join(",")).split(",").map(s => s.trim()).filter(Boolean);
  const modes = (out.modes ?? ALL_MODES.join(",")).split(",").map(s => s.trim()).filter(Boolean);
-  const model = out.model ?? "openai/gpt-oss-120b:free";
-  return { files, modes, model };
+  // Default to the paid OpenRouter primary (matches scrum_master_pipeline
+  // ladder rung 1). Pass `--model openai/gpt-oss-120b:free` if you want
+  // the old free-tier baseline. See SCRUM_MASTER_SPEC.md for the ladder.
+  const model = out.model ?? "x-ai/grok-4.1-fast";
+  const corpus = (out.corpus ?? "").split(",").map(s => s.trim()).filter(Boolean);
+  return { files, modes, model, corpus };
 }

 interface RunResult {
@ -58,19 +62,22 @@ interface RunResult {
  error?: string;
 }

-async function runOne(file: string, mode: string, model: string): Promise<RunResult> {
+async function runOne(file: string, mode: string, model: string, corpus: string[]): Promise<RunResult> {
  const t0 = Date.now();
  try {
+    const body: any = {
+      task_class: TASK_CLASS,
+      file_path: file,
+      force_mode: mode,
+      force_model: model,
+    };
+    if (corpus.length === 1) body.force_matrix_corpus = corpus[0];
+    else if (corpus.length > 1) body.force_matrix_corpus = corpus;
    const r = await fetch(`${GATEWAY}/v1/mode/execute`, {
      method: "POST",
      headers: { "content-type": "application/json" },
-      body: JSON.stringify({
-        task_class: TASK_CLASS,
-        file_path: file,
-        force_mode: mode,
-        force_model: model,
-      }),
-      signal: AbortSignal.timeout(180_000),
+      body: JSON.stringify(body),
+      signal: AbortSignal.timeout(240_000),
    });
    if (!r.ok) {
      const body = await r.text().catch(() => "");
@ -92,9 +99,10 @@ async function runOne(file: string, mode: string, model: string): Promise<RunRes
 }

 async function main() {
-  const { files, modes, model } = parseArgs();
+  const { files, modes, model, corpus } = parseArgs();
  console.log(`[experiment] files=${files.length} × modes=${modes.length} = ${files.length * modes.length} runs`);
  console.log(`[experiment] model=${model} task=${TASK_CLASS} gateway=${GATEWAY}`);
+  if (corpus.length > 0) console.log(`[experiment] corpus override: ${corpus.join(" + ")}`);
  console.log("");

  const results: RunResult[] = [];
@ -103,7 +111,7 @@ async function main() {
    for (const mode of modes) {
      i++;
      process.stdout.write(`  [${i}/${files.length * modes.length}] ${mode.padEnd(28)} ${file}  ... `);
-      const r = await runOne(file, mode, model);
+      const r = await runOne(file, mode, model, corpus);
      results.push(r);
      if (r.ok) {
        console.log(
--- a/scripts/mode_pass5_summarize.ts
+++ b/scripts/mode_pass5_summarize.ts
@ -0,0 +1,169 @@
+#!/usr/bin/env bun
+/**
+ * Pass 5 variance summarizer. Reads data/_kb/mode_experiments.jsonl
+ * since a timestamp, groups by (mode|corpus), reports mean ± stddev
+ * of grounded finding count, plus a head-to-head wins/losses table
+ * vs the isolation baseline.
+ *
+ * Usage:
+ *   bun run scripts/mode_pass5_summarize.ts                        # default 2h
+ *   bun run scripts/mode_pass5_summarize.ts --since 2026-04-26T22  # explicit
+ */
+
+import { readFileSync, existsSync } from "node:fs";
+
+const argSince = (() => {
+  const i = Bun.argv.indexOf("--since");
+  return i >= 0 ? Bun.argv[i + 1] : new Date(Date.now() - 2 * 60 * 60 * 1000).toISOString();
+})();
+
+const JSONL = "data/_kb/mode_experiments.jsonl";
+if (!existsSync(JSONL)) { console.error(`no ${JSONL}`); process.exit(1); }
+
+interface Row {
+  ts: string; mode: string; file_path: string; response: string;
+  sources: { matrix_corpus?: string | string[] | null };
+  latency_ms: number;
+}
+
+function corpusKey(c: any): string {
+  if (!c) return "";
+  if (typeof c === "string") return c;
+  if (Array.isArray(c)) return c.length === 0 ? "" : [...c].sort().join("+");
+  return "";
+}
+const condKey = (r: Row) => {
+  const c = corpusKey(r.sources?.matrix_corpus);
+  return c ? `${r.mode}|${c}` : r.mode;
+};
+
+// Reuse the same grounding logic as mode_compare — symbols cited in
+// findings rows must appear in the focus file, and any line numbers
+// must fall within EOF.
+function extractFindings(md: string): { symbols: string[]; lines: number[] }[] {
+  const sec = /(?:^|\n)#{1,3}[^\na-zA-Z]*(?:Ranked\s+)?Findings?[^\n]*\n/i;
+  const m = md.match(sec);
+  let section = md;
+  if (m && m.index !== undefined) {
+    const after = md.slice(m.index + m[0].length);
+    const stop = after.search(/\n#{1,3}[^\na-zA-Z]*(?:Patch|Suggestion|Reference|Summary|Concrete)/i);
+    section = stop >= 0 ? after.slice(0, stop) : after;
+  }
+  // Three row shapes:
+  //   1) numbered:        `| 1 | ... |`
+  //   2) path-with-line:  `| service.rs:106 | ... |`
+  //   3) path-with-sym:   `| crates/vectord/src/pathway_memory.rs:load_fn (≈L220) | ... |`
+  // Pick whichever shape matches the most rows (ties favor numbered).
+  const numbered = section.split("\n").filter(l => /^\|\s*\*?\*?\d+\*?\*?\s*\|/.test(l));
+  const pathRows = section.split("\n").filter(l => /^\|\s*[a-z_/\.][a-z_/\.0-9]*\.(rs|ts|py)\b/i.test(l));
+  const rows = numbered.length >= pathRows.length ? numbered : pathRows;
+  return rows.map(row => {
+    const sym = new Set<string>();
+    for (const t of row.matchAll(/`([A-Za-z_][A-Za-z0-9_:]*)`/g)) sym.add(t[1]);
+    for (const t of row.matchAll(/\b([a-z][a-z0-9_]{4,})\b/g)) sym.add(t[1]);
+    const lines: number[] = [];
+    for (const t of row.matchAll(/[:\-](\d{2,5})/g)) lines.push(parseInt(t[1]));
+    return { symbols: [...sym], lines };
+  });
+}
+
+function grounded(md: string, file: string): { total: number; grounded: number; oob: number } {
+  const content = readFileSync(file, "utf8");
+  const eof = content.split("\n").length;
+  const findings = extractFindings(md);
+  let g = 0, oob = 0;
+  for (const f of findings) {
+    const symHit = f.symbols.length > 0 && f.symbols.some(s => content.includes(s));
+    const lineOob = f.lines.length > 0 && f.lines.some(l => l > eof);
+    if (lineOob) oob++;
+    if (symHit && !lineOob) g++;
+  }
+  return { total: findings.length, grounded: g, oob };
+}
+
+const lines = readFileSync(JSONL, "utf8").split("\n").filter(Boolean);
+const rows: Row[] = [];
+for (const l of lines) {
+  try {
+    const r: Row = JSON.parse(l);
+    if (r.ts < argSince) continue;
+    rows.push(r);
+  } catch {}
+}
+
+if (rows.length === 0) { console.error(`no rows since ${argSince}`); process.exit(1); }
+
+// Group: condition → file → array of grounded counts
+type CellArr = { grnd: number[]; total: number[]; oob: number[]; ms: number[] };
+const byCond: Record<string, Record<string, CellArr>> = {};
+for (const r of rows) {
+  const k = condKey(r);
+  byCond[k] ??= {};
+  byCond[k][r.file_path] ??= { grnd: [], total: [], oob: [], ms: [] };
+  const g = grounded(r.response, r.file_path);
+  byCond[k][r.file_path].grnd.push(g.grounded);
+  byCond[k][r.file_path].total.push(g.total);
+  byCond[k][r.file_path].oob.push(g.oob);
+  byCond[k][r.file_path].ms.push(r.latency_ms);
+}
+
+function stats(xs: number[]): { n: number; mean: number; sd: number; min: number; max: number } {
+  const n = xs.length;
+  if (n === 0) return { n: 0, mean: 0, sd: 0, min: 0, max: 0 };
+  const mean = xs.reduce((s, x) => s + x, 0) / n;
+  const variance = n === 1 ? 0 : xs.reduce((s, x) => s + (x - mean) ** 2, 0) / (n - 1);
+  return { n, mean, sd: Math.sqrt(variance), min: Math.min(...xs), max: Math.max(...xs) };
+}
+
+const conditions = Object.keys(byCond).sort();
+const files = [...new Set(rows.map(r => r.file_path))].sort();
+
+console.log(`\n═══ Pass 5 variance — since ${argSince} ═══\n`);
+console.log(`  ${rows.length} rows · ${conditions.length} conditions · ${files.length} files\n`);
+
+for (const file of files) {
+  console.log(`📄 ${file}`);
+  console.log(`  ${"condition".padEnd(56)}  n  ${"grounded mean ± sd".padStart(20)}  ${"range".padStart(8)}  ${"oob".padStart(4)}  ${"avg ms".padStart(7)}`);
+  console.log(`  ${"─".repeat(56)} ─── ${"─".repeat(20)} ${"─".repeat(8)} ${"─".repeat(4)} ${"─".repeat(7)}`);
+  for (const c of conditions) {
+    const cell = byCond[c]?.[file];
+    if (!cell || cell.grnd.length === 0) continue;
+    const s = stats(cell.grnd);
+    const oobSum = cell.oob.reduce((a, b) => a + b, 0);
+    const msMean = cell.ms.reduce((a, b) => a + b, 0) / cell.ms.length;
+    const meanSd = `${s.mean.toFixed(1)} ± ${s.sd.toFixed(1)}`;
+    const range = `[${s.min}-${s.max}]`;
+    console.log(`  ${c.padEnd(56)} ${String(s.n).padStart(3)} ${meanSd.padStart(20)} ${range.padStart(8)} ${String(oobSum).padStart(4)} ${Math.round(msMean / 1000).toString().padStart(5)}s`);
+  }
+  console.log("");
+}
+
+// Head-to-head: for each condition vs isolation baseline, count rep-by-rep
+// wins across the same file. Requires equal rep counts.
+console.log(`═══ Head-to-head: each condition vs isolation, rep-by-rep ═══\n`);
+const isoKey = conditions.find(c => c.startsWith("codereview_isolation"));
+if (!isoKey) {
+  console.log("  no isolation rows in window");
+} else {
+  console.log(`  baseline: ${isoKey}\n`);
+  console.log(`  ${"challenger".padEnd(56)} wins losses ties  Δ mean grnd`);
+  console.log(`  ${"─".repeat(56)} ${"─".repeat(4)} ${"─".repeat(6)} ${"─".repeat(4)} ${"─".repeat(12)}`);
+  for (const c of conditions) {
+    if (c === isoKey) continue;
+    let wins = 0, losses = 0, ties = 0, deltaSum = 0, n = 0;
+    for (const file of files) {
+      const isoArr = byCond[isoKey]?.[file]?.grnd ?? [];
+      const cArr = byCond[c]?.[file]?.grnd ?? [];
+      const k = Math.min(isoArr.length, cArr.length);
+      for (let i = 0; i < k; i++) {
+        if (cArr[i] > isoArr[i]) wins++;
+        else if (cArr[i] < isoArr[i]) losses++;
+        else ties++;
+        deltaSum += cArr[i] - isoArr[i];
+        n++;
+      }
+    }
+    const dMean = n > 0 ? (deltaSum / n).toFixed(2) : "—";
+    console.log(`  ${c.padEnd(56)} ${String(wins).padStart(4)} ${String(losses).padStart(6)} ${String(ties).padStart(4)} ${dMean.padStart(12)}`);
+  }
+}
--- a/scripts/mode_pass5_variance_paid.ts
+++ b/scripts/mode_pass5_variance_paid.ts
@ -0,0 +1,96 @@
+#!/usr/bin/env bun
+/**
+ * Pass 5: variance test for the 2026-04-26 paid-model bake-off.
+ *
+ * The pass-4 single-rep sweep showed isolation beating every matrix
+ * condition by 1.0-1.4 grounded findings/file on grok-4.1-fast. This
+ * harness runs N reps × M conditions on the file where the effect was
+ * sharpest (pathway_memory.rs, 1355 lines) so we can decide whether
+ * the deltas are real signal or run-to-run noise.
+ *
+ * Conditions:
+ *   1. codereview_isolation                                — no matrix
+ *   2. codereview_lakehouse + corpus=lakehouse_arch_v1     — A only
+ *   3. codereview_lakehouse + corpus=lakehouse_symbols_v1  — C only
+ *   4. codereview_lakehouse (modes.toml default)           — A+C composed
+ *
+ * Output appends per-call to data/_kb/mode_experiments.jsonl. Aggregate
+ * with `bun run scripts/mode_compare.ts --since <ts>` and read the
+ * grounded column with multiple rows per (mode|corpus) key.
+ *
+ * Usage:
+ *   bun run scripts/mode_pass5_variance_paid.ts
+ *   LH_REPS=3 LH_FILE=crates/queryd/src/delta.rs bun run scripts/mode_pass5_variance_paid.ts
+ */
+
+const GATEWAY = process.env.LH_GATEWAY ?? "http://localhost:3100";
+const MODEL = process.env.LH_MODEL ?? "x-ai/grok-4.1-fast";
+const FILE = process.env.LH_FILE ?? "crates/vectord/src/pathway_memory.rs";
+const REPS = Number(process.env.LH_REPS ?? 5);
+
+interface Condition {
+  label: string;
+  mode: string;
+  corpus?: string | string[];
+}
+
+const CONDITIONS: Condition[] = [
+  { label: "isolation       ",  mode: "codereview_isolation"  },
+  { label: "arch_only       ",  mode: "codereview_lakehouse",  corpus: "lakehouse_arch_v1" },
+  { label: "symbols_only    ",  mode: "codereview_lakehouse",  corpus: "lakehouse_symbols_v1" },
+  { label: "composed (A+C)  ",  mode: "codereview_lakehouse"  /* uses modes.toml default */ },
+];
+
+async function runOne(c: Condition, rep: number): Promise<{ ok: boolean; latency_ms?: number; resp_chars?: number; error?: string }> {
+  const body: any = {
+    task_class: "scrum_review",
+    file_path: FILE,
+    force_mode: c.mode,
+    force_model: MODEL,
+  };
+  if (c.corpus !== undefined) body.force_matrix_corpus = c.corpus;
+
+  try {
+    const r = await fetch(`${GATEWAY}/v1/mode/execute`, {
+      method: "POST",
+      headers: { "content-type": "application/json" },
+      body: JSON.stringify(body),
+      signal: AbortSignal.timeout(240_000),
+    });
+    if (!r.ok) {
+      const txt = await r.text().catch(() => "");
+      return { ok: false, error: `HTTP ${r.status}: ${txt.slice(0, 160)}` };
+    }
+    const j: any = await r.json();
+    return { ok: true, latency_ms: j.latency_ms, resp_chars: (j.response ?? "").length };
+  } catch (e: any) {
+    return { ok: false, error: e.message };
+  }
+}
+
+async function main() {
+  const total = CONDITIONS.length * REPS;
+  console.log(`[pass5] file=${FILE}`);
+  console.log(`[pass5] model=${MODEL} · ${CONDITIONS.length} conditions × ${REPS} reps = ${total} runs`);
+  console.log("");
+
+  let i = 0;
+  const startTs = new Date().toISOString();
+  for (let rep = 1; rep <= REPS; rep++) {
+    for (const c of CONDITIONS) {
+      i++;
+      process.stdout.write(`  [${i}/${total}] rep=${rep} ${c.label}... `);
+      const r = await runOne(c, rep);
+      if (r.ok) {
+        console.log(`✓ ${r.resp_chars} chars · ${((r.latency_ms ?? 0) / 1000).toFixed(1)}s`);
+      } else {
+        console.log(`✗ ${r.error}`);
+      }
+    }
+  }
+
+  console.log(`\n[pass5] complete · started ${startTs}`);
+  console.log(`[pass5] aggregate: bun run scripts/mode_compare.ts --since ${startTs}`);
+}
+
+main().catch(e => { console.error(e); process.exit(1); });