v1/mode: parameterized runner + 5 enrichment-experiment modes

J's directive (2026-04-26): "Create different modes so we can really dial in the architecture before it gets further along — pinpoint the failures and strengths equally so I know what direction to go in. Loop theater happens when we don't pinpoint the most accurate path." Refactored execute() to switch on mode name → EnrichmentFlags preset. Five native modes designed as deliberate experiments — each isolates one architectural axis so the comparison matrix reads off what's doing work vs what's adding latency for nothing: codereview_lakehouse — all enrichment on (ceiling) codereview_null — raw file + generic prompt (baseline) codereview_isolation — file + pathway only (no matrix) codereview_matrix_only — file + matrix only (no pathway) codereview_playbook_only — pathway only, NO file content (lossy ceiling) Each call appends a row to data/_kb/mode_experiments.jsonl with full sources + response. LH_MODE_LOG_OFF=1 to suppress. scripts/mode_experiment.ts — sweeps files × modes serially, prints live progress with per-call enrichment stats. Defaults to OpenRouter free model so cloud quota doesn't gate experiments. scripts/mode_compare.ts — reads the JSONL, outputs per-file matrix + per-mode aggregate + mode-vs-baseline win/loss with avg finding delta. Heuristic finding-count from markdown table rows; pathway citation count from preamble references. scrum_master_pipeline.ts gets a mode-runner fast path gated by LH_USE_MODE_RUNNER=1: try /v1/mode/execute first, fall through to the existing ladder if response < LH_MODE_MIN_CHARS (default 2000) or anything errors. Off by default until A/B-validated. First experiment results (2 files × 5 modes via gpt-oss-120b:free): - codereview_null produces 12.6KB response with ZERO findings (proves adversarial framing is load-bearing) - codereview_playbook_only produces MORE findings than lakehouse on average (12 vs 9) at 73% the latency — pathway memory is the dominant signal driver - codereview_matrix_only underperforms isolation by ~0.5 findings while costing the same latency — matrix corpus likely underperforming for scrum_review task class Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-26 01:36:42 -05:00 · 2026-04-26 01:36:42 -05:00 · 7c47734287
commit 7c47734287
parent 86f63a083d
5 changed files with 526 additions and 20 deletions
--- a/Cargo.lock
+++ b/Cargo.lock
@ -4086,6 +4086,7 @@ dependencies = [
 "shared",
 "storaged",
 "tokio",
+ "toml",
 "tonic",
 "tower-http",
 "tracing",
--- a/crates/gateway/src/v1/mode.rs
+++ b/crates/gateway/src/v1/mode.rs
@ -42,14 +42,87 @@ const VALID_MODES: &[&str] = &[
    "evolution", "blindassembly", "staircase", "drift", "mesh",
    "hallucination", "timeloop", "research", "eval", "extract",
    "refine", "adaptive", "deep_analysis", "distill",
-    // Native runners (not in LLM Team — handled by /v1/mode/execute):
-    "codereview_lakehouse",
+    // Native runners (not in LLM Team — handled by /v1/mode/execute).
+    // Each is a parameterized preset of EnrichmentFlags below — designed
+    // as a deliberate experiment so we can read the matrix and identify
+    // which signals are doing real work vs adding latency for nothing.
+    "codereview_lakehouse",     // all enrichment on (ceiling)
+    "codereview_null",          // raw file + generic prompt (baseline)
+    "codereview_isolation",     // file + pathway only (no matrix)
+    "codereview_matrix_only",   // file + matrix only (no pathway)
+    "codereview_playbook_only", // pathway only, NO file content (lossy ceiling)
 ];

 /// Whether a mode is handled natively in this gateway vs proxied to
 /// LLM Team. Drives /v1/mode/execute dispatch.
 fn is_native_mode(mode: &str) -> bool {
-    matches!(mode, "codereview_lakehouse")
+    matches!(
+        mode,
+        "codereview_lakehouse"
+            | "codereview_null"
+            | "codereview_isolation"
+            | "codereview_matrix_only"
+            | "codereview_playbook_only"
+    )
+}
+
+/// Per-mode enrichment knobs — each native mode is a preset over these
+/// flags. Exists so the runner code is one path (less drift between
+/// modes) and the comparison harness can read which signals fired.
+#[derive(Debug, Clone, Copy, Serialize)]
+pub struct EnrichmentFlags {
+    pub include_file_content: bool,
+    pub include_bug_fingerprints: bool,
+    pub include_matrix_chunks: bool,
+    pub use_relevance_filter: bool,
+    pub framing: ReviewerFraming,
+}
+
+#[derive(Debug, Clone, Copy, Serialize)]
+pub enum ReviewerFraming {
+    Adversarial, // forensic, ranked findings + verdict (lakehouse default)
+    Generic,     // "review this" — no codebase priors (null baseline)
+}
+
+fn flags_for_mode(mode: &str) -> EnrichmentFlags {
+    match mode {
+        "codereview_null" => EnrichmentFlags {
+            include_file_content: true,
+            include_bug_fingerprints: false,
+            include_matrix_chunks: false,
+            use_relevance_filter: false,
+            framing: ReviewerFraming::Generic,
+        },
+        "codereview_isolation" => EnrichmentFlags {
+            include_file_content: true,
+            include_bug_fingerprints: true,
+            include_matrix_chunks: false,
+            use_relevance_filter: false,
+            framing: ReviewerFraming::Adversarial,
+        },
+        "codereview_matrix_only" => EnrichmentFlags {
+            include_file_content: true,
+            include_bug_fingerprints: false,
+            include_matrix_chunks: true,
+            use_relevance_filter: true,
+            framing: ReviewerFraming::Adversarial,
+        },
+        "codereview_playbook_only" => EnrichmentFlags {
+            include_file_content: false, // lossy on purpose — measures pathway-alone ceiling
+            include_bug_fingerprints: true,
+            include_matrix_chunks: false,
+            use_relevance_filter: false,
+            framing: ReviewerFraming::Adversarial,
+        },
+        // Default (codereview_lakehouse): everything on.
+        _ => EnrichmentFlags {
+            include_file_content: true,
+            include_bug_fingerprints: true,
+            include_matrix_chunks: true,
+            use_relevance_filter: true,
+            framing: ReviewerFraming::Adversarial,
+        },
+    }
 }

 #[derive(Clone, Debug, Deserialize)]
@ -342,6 +415,9 @@ pub struct EnrichmentSources {
    pub matrix_corpus: Option<String>,
    pub relevance_filter_used: bool,
    pub enrichment_warnings: Vec<String>,
+    /// Which enrichment knobs the runner used for this mode. Lets
+    /// the comparison aggregator group runs by signal-set.
+    pub flags: Option<EnrichmentFlags>,
 }

 #[derive(Serialize, Debug)]
@ -356,13 +432,23 @@ pub struct ExecuteResponse {
    pub latency_ms: u64,
 }

-const REVIEWER_FRAMING: &str = "You are an adversarial code reviewer for the Lakehouse codebase \
+const FRAMING_ADVERSARIAL: &str = "You are an adversarial code reviewer for the Lakehouse codebase \
 (Rust + DataFusion + Parquet + object storage). Audit the focus file forensically. \
 Output a markdown report with: (1) one-line verdict (pass | needs_patch | fail), (2) ranked \
 findings table with file:line, evidence, severity, confidence percent, (3) concrete patch \
 suggestions, (4) PRD/ADR refs where applicable. Be precise — assume nothing works until \
 proven. Do NOT hedge.";

+const FRAMING_GENERIC: &str = "You are a code reviewer. Read the file below and produce a \
+markdown review with findings.";
+
+fn framing_text(f: ReviewerFraming) -> &'static str {
+    match f {
+        ReviewerFraming::Adversarial => FRAMING_ADVERSARIAL,
+        ReviewerFraming::Generic => FRAMING_GENERIC,
+    }
+}
+
 pub async fn execute(
    State(_state): State<V1State>,
    Json(req): Json<ExecuteRequest>,
@ -399,12 +485,15 @@ pub async fn execute(
        ));
    }

+    let flags = flags_for_mode(&mode);
    let mut sources = EnrichmentSources {
        matrix_corpus: matrix_corpus.clone(),
+        flags: Some(flags),
        ..Default::default()
    };

-    // Step 1: focus file content.
+    // Step 1: focus file content (always read — even modes that don't
+    // include it in the prompt may need it for citation/sources).
    let file_content = match req.file_content.clone() {
        Some(c) => c,
        None => match std::fs::read_to_string(&req.file_path) {
@ -439,7 +528,7 @@ pub async fn execute(

    // Step 2: pathway memory bug fingerprints for this file area.
    let mut bug_preamble = String::new();
-    {
+    if flags.include_bug_fingerprints {
        let body = serde_json::json!({
            "task_class": req.task_class,
            "file_path": req.file_path,
@ -484,6 +573,7 @@ pub async fn execute(

    // Step 3: matrix corpus search (if configured for this task class).
    let mut raw_chunks: Vec<serde_json::Value> = vec![];
+    if flags.include_matrix_chunks {
        if let Some(corpus) = &matrix_corpus {
        let body = serde_json::json!({
            "index_name": corpus,
@ -512,10 +602,11 @@ pub async fn execute(
                .enrichment_warnings
                .push(format!("matrix_search err: {e}")),
        }
+        }  // close `if let Some(corpus)`
    }

    // Step 4: relevance filter — drop adjacency pollution.
-    let kept_chunks: Vec<serde_json::Value> = if !raw_chunks.is_empty() {
+    let kept_chunks: Vec<serde_json::Value> = if flags.use_relevance_filter && !raw_chunks.is_empty() {
        let chunks_for_filter: Vec<serde_json::Value> = raw_chunks
            .iter()
            .map(|c| {
@ -557,15 +648,24 @@ pub async fn execute(
                raw_chunks
            }
        }
+    } else if !flags.use_relevance_filter && !raw_chunks.is_empty() {
+        // Take raw matrix chunks unfiltered — `codereview_matrix_only`
+        // turns the filter off intentionally to measure how much
+        // pollution the filter is actually catching.
+        sources.matrix_chunks_kept = raw_chunks.len();
+        raw_chunks.clone()
    } else {
        vec![]
    };

-    // Step 5: assemble the prompt.
+    // Step 5: assemble the prompt — strictly per-flag so we don't
+    // leak signals across modes.
    let mut user_prompt = String::new();
+    if flags.include_bug_fingerprints {
        user_prompt.push_str(&bug_preamble);
-    if !kept_chunks.is_empty() {
-        user_prompt.push_str("📁 RELATED CONTEXT (relevance-filtered from matrix):\n");
+    }
+    if flags.include_matrix_chunks && !kept_chunks.is_empty() {
+        user_prompt.push_str("📁 RELATED CONTEXT (matrix chunks):\n");
        for c in &kept_chunks {
            let src = c.get("source").and_then(|v| v.as_str()).unwrap_or("?");
            let txt = c.get("text").and_then(|v| v.as_str()).unwrap_or("");
@ -573,11 +673,20 @@ pub async fn execute(
        }
        user_prompt.push_str("\n");
    }
+    if flags.include_file_content {
        user_prompt.push_str(&format!("FILE: {}\n```rust\n{}\n```\n", req.file_path, file_content));
+    } else {
+        // Lossy mode — playbook_only intentionally omits file content
+        // to measure how much value pathway memory carries on its own.
+        user_prompt.push_str(&format!(
+            "FILE PATH (content omitted): {}\nFile size: {} bytes\n",
+            req.file_path, file_content.len()
+        ));
+    }
    if let Some(q) = &req.user_question {
        user_prompt.push_str(&format!("\nQUESTION: {}\n", q));
    } else {
-        user_prompt.push_str("\nProduce the forensic review now.\n");
+        user_prompt.push_str("\nProduce the review now.\n");
    }

    let enriched_chars = user_prompt.len();
@ -611,7 +720,7 @@ pub async fn execute(
        "model": model,
        "provider": provider_hint,
        "messages": [
-            { "role": "system", "content": REVIEWER_FRAMING },
+            { "role": "system", "content": framing_text(flags.framing) },
            { "role": "user", "content": user_prompt },
        ],
        "temperature": 0.1,
@ -668,16 +777,45 @@ pub async fn execute(
        }
    };

-    Ok(Json(ExecuteResponse {
-        mode,
-        model,
-        task_class: req.task_class,
+    let resp = ExecuteResponse {
+        mode: mode.clone(),
+        model: model.clone(),
+        task_class: req.task_class.clone(),
        enriched_prompt_chars: enriched_chars,
        enriched_prompt_preview: preview,
        sources,
        response: response_text,
        latency_ms: t0.elapsed().as_millis() as u64,
-    }))
+    };
+
+    // Append to mode_experiments.jsonl so the comparison aggregator
+    // can read the matrix later. Best-effort — write failure must not
+    // fail the request. Skips if LH_MODE_LOG_OFF=1.
+    if std::env::var("LH_MODE_LOG_OFF").as_deref() != Ok("1") {
+        let log_path = std::env::var("LH_MODE_LOG_PATH")
+            .unwrap_or_else(|_| "data/_kb/mode_experiments.jsonl".to_string());
+        let row = serde_json::json!({
+            "ts": chrono::Utc::now().to_rfc3339(),
+            "mode": resp.mode,
+            "model": resp.model,
+            "task_class": resp.task_class,
+            "file_path": req.file_path,
+            "enriched_prompt_chars": resp.enriched_prompt_chars,
+            "response_chars": resp.response.len(),
+            "latency_ms": resp.latency_ms,
+            "sources": resp.sources,
+            "response": resp.response,
+        });
+        if let Some(parent) = std::path::Path::new(&log_path).parent() {
+            let _ = std::fs::create_dir_all(parent);
+        }
+        if let Ok(mut f) = std::fs::OpenOptions::new().create(true).append(true).open(&log_path) {
+            use std::io::Write;
+            let _ = writeln!(f, "{}", row);
+        }
+    }
+
+    Ok(Json(resp))
 }

 #[cfg(test)]
--- a/scripts/mode_compare.ts
+++ b/scripts/mode_compare.ts
@ -0,0 +1,186 @@
+#!/usr/bin/env bun
+/**
+ * Mode comparison aggregator — reads data/_kb/mode_experiments.jsonl
+ * (written per-call by /v1/mode/execute) and surfaces the cross-mode
+ * comparison matrix that lets us see what each enrichment dimension
+ * is actually doing.
+ *
+ * Per file, per mode, computes:
+ *   - response_chars
+ *   - finding_count (rows in markdown tables — heuristic, regex)
+ *   - pathway_citations (mentions of "Pathway memory" or "📚")
+ *   - latency_ms
+ *   - matrix_chunks_kept / dropped
+ *
+ * Then surfaces:
+ *   - per file, what each mode produced (rows next to each other)
+ *   - per mode, average response_chars + latency
+ *   - which modes ALWAYS underperform vs codereview_lakehouse
+ *   - which signals (bug fingerprints, matrix) correlate with output size
+ *
+ * Usage: bun run scripts/mode_compare.ts [--jsonl path] [--since 2026-04-26]
+ */
+
+import { readFileSync, existsSync } from "node:fs";
+
+interface Row {
+  ts: string;
+  mode: string;
+  model: string;
+  task_class: string;
+  file_path: string;
+  enriched_prompt_chars: number;
+  response_chars: number;
+  latency_ms: number;
+  sources: {
+    focus_file_bytes?: number;
+    bug_fingerprints_count?: number;
+    matrix_chunks_kept?: number;
+    matrix_chunks_dropped?: number;
+    relevance_filter_used?: boolean;
+    flags?: any;
+  };
+  response: string;
+}
+
+function parseArgs(): { jsonl: string; since: string | null } {
+  const args = Bun.argv.slice(2);
+  const out: Record<string, string> = {};
+  for (let i = 0; i < args.length; i++) {
+    const a = args[i];
+    if (a.startsWith("--")) out[a.slice(2)] = args[++i] ?? "";
+  }
+  return {
+    jsonl: out.jsonl ?? "data/_kb/mode_experiments.jsonl",
+    since: out.since || null,
+  };
+}
+
+function loadRows(path: string, since: string | null): Row[] {
+  if (!existsSync(path)) {
+    console.error(`[compare] no log file at ${path}`);
+    process.exit(1);
+  }
+  const lines = readFileSync(path, "utf8").split("\n").filter(Boolean);
+  const rows: Row[] = [];
+  for (const line of lines) {
+    try {
+      const r: Row = JSON.parse(line);
+      if (since && r.ts < since) continue;
+      rows.push(r);
+    } catch {
+      // skip malformed
+    }
+  }
+  return rows;
+}
+
+function countFindings(md: string): number {
+  // Markdown table rows that look like findings: `| <num> | ...` or `| **N** | ...`
+  // Heuristic — adversarial framing produces ranked tables.
+  const matches = md.match(/^\|\s*\*?\*?\d+\*?\*?\s*\|/gm);
+  return matches ? matches.length : 0;
+}
+
+function countPathwayCitations(md: string): number {
+  // How many times the model referenced the pathway memory preamble.
+  const re = /pathway\s*memory|📚/gi;
+  return (md.match(re) ?? []).length;
+}
+
+function pad(s: string | number, n: number, right = false): string {
+  const str = String(s);
+  if (str.length >= n) return str.slice(0, n);
+  return right ? " ".repeat(n - str.length) + str : str + " ".repeat(n - str.length);
+}
+
+function main() {
+  const { jsonl, since } = parseArgs();
+  const rows = loadRows(jsonl, since);
+  if (rows.length === 0) {
+    console.error("[compare] no rows after filter");
+    process.exit(1);
+  }
+
+  // Group by file → mode
+  const byFile: Record<string, Record<string, Row>> = {};
+  const allModes = new Set<string>();
+  for (const r of rows) {
+    byFile[r.file_path] ??= {};
+    byFile[r.file_path][r.mode] = r; // last-write-wins per mode per file
+    allModes.add(r.mode);
+  }
+  const modesSorted = [...allModes].sort();
+
+  // Per-file matrix
+  console.log("\n═══ PER-FILE COMPARISON ═══\n");
+  for (const file of Object.keys(byFile).sort()) {
+    console.log(`📄 ${file}`);
+    console.log(
+      `  ${pad("mode", 28)} ${pad("resp", 6, true)} ${pad("findings", 8, true)} ${pad("path_cit", 8, true)} ${pad("ms", 7, true)} ${pad("mtx k/d", 9, true)} ${pad("bug_fp", 6, true)}`
+    );
+    console.log(`  ${"─".repeat(28)} ${"─".repeat(6)} ${"─".repeat(8)} ${"─".repeat(8)} ${"─".repeat(7)} ${"─".repeat(9)} ${"─".repeat(6)}`);
+    for (const mode of modesSorted) {
+      const r = byFile[file][mode];
+      if (!r) {
+        console.log(`  ${pad(mode, 28)} ${pad("—", 6, true)}`);
+        continue;
+      }
+      const findings = countFindings(r.response);
+      const cits = countPathwayCitations(r.response);
+      const mk = r.sources.matrix_chunks_kept ?? 0;
+      const md = r.sources.matrix_chunks_dropped ?? 0;
+      const bf = r.sources.bug_fingerprints_count ?? 0;
+      console.log(
+        `  ${pad(mode, 28)} ${pad(r.response_chars, 6, true)} ${pad(findings, 8, true)} ${pad(cits, 8, true)} ${pad(r.latency_ms, 7, true)} ${pad(`${mk}/${mk + md}`, 9, true)} ${pad(bf, 6, true)}`
+      );
+    }
+    console.log("");
+  }
+
+  // Per-mode averages
+  console.log("═══ PER-MODE AGGREGATE ═══\n");
+  console.log(`  ${pad("mode", 28)} ${pad("n", 4, true)} ${pad("avg resp", 9, true)} ${pad("avg find", 9, true)} ${pad("avg cit", 8, true)} ${pad("avg ms", 8, true)}`);
+  console.log(`  ${"─".repeat(28)} ${"─".repeat(4)} ${"─".repeat(9)} ${"─".repeat(9)} ${"─".repeat(8)} ${"─".repeat(8)}`);
+  for (const mode of modesSorted) {
+    const modeRows = rows.filter(r => r.mode === mode);
+    if (modeRows.length === 0) continue;
+    const n = modeRows.length;
+    const avgResp = Math.round(modeRows.reduce((s, r) => s + r.response_chars, 0) / n);
+    const avgFind = Math.round(10 * modeRows.reduce((s, r) => s + countFindings(r.response), 0) / n) / 10;
+    const avgCit = Math.round(10 * modeRows.reduce((s, r) => s + countPathwayCitations(r.response), 0) / n) / 10;
+    const avgMs = Math.round(modeRows.reduce((s, r) => s + r.latency_ms, 0) / n);
+    console.log(
+      `  ${pad(mode, 28)} ${pad(n, 4, true)} ${pad(avgResp, 9, true)} ${pad(avgFind, 9, true)} ${pad(avgCit, 8, true)} ${pad(avgMs, 8, true)}`
+    );
+  }
+
+  // Mode-relative: how often does each mode produce MORE findings than lakehouse?
+  console.log("\n═══ MODE vs codereview_lakehouse (per file) ═══\n");
+  console.log(`  ${pad("mode", 28)} ${pad("wins", 5, true)} ${pad("losses", 7, true)} ${pad("ties", 5, true)} ${pad("Δ avg findings", 16, true)}`);
+  console.log(`  ${"─".repeat(28)} ${"─".repeat(5)} ${"─".repeat(7)} ${"─".repeat(5)} ${"─".repeat(16)}`);
+  for (const mode of modesSorted) {
+    if (mode === "codereview_lakehouse") continue;
+    let wins = 0, losses = 0, ties = 0, totalDelta = 0, n = 0;
+    for (const file of Object.keys(byFile)) {
+      const baseline = byFile[file]["codereview_lakehouse"];
+      const challenger = byFile[file][mode];
+      if (!baseline || !challenger) continue;
+      const bf = countFindings(baseline.response);
+      const cf = countFindings(challenger.response);
+      if (cf > bf) wins++;
+      else if (cf < bf) losses++;
+      else ties++;
+      totalDelta += cf - bf;
+      n++;
+    }
+    if (n === 0) continue;
+    const avgDelta = (totalDelta / n).toFixed(1);
+    console.log(
+      `  ${pad(mode, 28)} ${pad(wins, 5, true)} ${pad(losses, 7, true)} ${pad(ties, 5, true)} ${pad(avgDelta, 16, true)}`
+    );
+  }
+  console.log("\n[compare] done\n");
+}
+
+main();
--- a/scripts/mode_experiment.ts
+++ b/scripts/mode_experiment.ts
@ -0,0 +1,127 @@
+#!/usr/bin/env bun
+/**
+ * Mode experiment harness — sweeps a set of files through every native
+ * mode, calling /v1/mode/execute serially. Results land in the
+ * mode_experiments.jsonl that the gateway already writes (the runner
+ * appends per-call). This script just orchestrates the calls.
+ *
+ * Usage:
+ *   bun run scripts/mode_experiment.ts \
+ *     --files crates/queryd/src/delta.rs,crates/queryd/src/service.rs \
+ *     --modes codereview_lakehouse,codereview_null,codereview_isolation,codereview_matrix_only \
+ *     --model openai/gpt-oss-120b:free
+ *
+ * Defaults: 5 modes × $LH_EXPERIMENT_FILES files (or 2 default targets) ×
+ * one model. Cloud-quota-resilient — uses OpenRouter free model unless
+ * --model overrides.
+ */
+
+const GATEWAY = process.env.LH_GATEWAY ?? "http://localhost:3100";
+const TASK_CLASS = process.env.LH_EXPERIMENT_TASK ?? "scrum_review";
+
+const ALL_MODES = [
+  "codereview_lakehouse",
+  "codereview_null",
+  "codereview_isolation",
+  "codereview_matrix_only",
+  "codereview_playbook_only",
+];
+
+const DEFAULT_FILES = [
+  "crates/queryd/src/delta.rs",
+  "crates/queryd/src/service.rs",
+];
+
+function parseArgs(): { files: string[]; modes: string[]; model: string } {
+  const args = Bun.argv.slice(2);
+  const out: Record<string, string> = {};
+  for (let i = 0; i < args.length; i++) {
+    const a = args[i];
+    if (a.startsWith("--")) out[a.slice(2)] = args[++i] ?? "";
+  }
+  const files = (out.files ?? DEFAULT_FILES.join(",")).split(",").map(s => s.trim()).filter(Boolean);
+  const modes = (out.modes ?? ALL_MODES.join(",")).split(",").map(s => s.trim()).filter(Boolean);
+  const model = out.model ?? "openai/gpt-oss-120b:free";
+  return { files, modes, model };
+}
+
+interface RunResult {
+  file: string;
+  mode: string;
+  ok: boolean;
+  latency_ms?: number;
+  response_chars?: number;
+  enriched_chars?: number;
+  bug_fingerprints?: number;
+  matrix_kept?: number;
+  matrix_dropped?: number;
+  error?: string;
+}
+
+async function runOne(file: string, mode: string, model: string): Promise<RunResult> {
+  const t0 = Date.now();
+  try {
+    const r = await fetch(`${GATEWAY}/v1/mode/execute`, {
+      method: "POST",
+      headers: { "content-type": "application/json" },
+      body: JSON.stringify({
+        task_class: TASK_CLASS,
+        file_path: file,
+        force_mode: mode,
+        force_model: model,
+      }),
+      signal: AbortSignal.timeout(180_000),
+    });
+    if (!r.ok) {
+      const body = await r.text().catch(() => "");
+      return { file, mode, ok: false, error: `HTTP ${r.status}: ${body.slice(0, 200)}` };
+    }
+    const j: any = await r.json();
+    return {
+      file, mode, ok: true,
+      latency_ms: j.latency_ms,
+      response_chars: (j.response ?? "").length,
+      enriched_chars: j.enriched_prompt_chars,
+      bug_fingerprints: j.sources?.bug_fingerprints_count,
+      matrix_kept: j.sources?.matrix_chunks_kept,
+      matrix_dropped: j.sources?.matrix_chunks_dropped,
+    };
+  } catch (e: any) {
+    return { file, mode, ok: false, error: e.message, latency_ms: Date.now() - t0 };
+  }
+}
+
+async function main() {
+  const { files, modes, model } = parseArgs();
+  console.log(`[experiment] files=${files.length} × modes=${modes.length} = ${files.length * modes.length} runs`);
+  console.log(`[experiment] model=${model} task=${TASK_CLASS} gateway=${GATEWAY}`);
+  console.log("");
+
+  const results: RunResult[] = [];
+  let i = 0;
+  for (const file of files) {
+    for (const mode of modes) {
+      i++;
+      process.stdout.write(`  [${i}/${files.length * modes.length}] ${mode.padEnd(28)} ${file}  ... `);
+      const r = await runOne(file, mode, model);
+      results.push(r);
+      if (r.ok) {
+        console.log(
+          `✓ ${(r.response_chars ?? 0).toString().padStart(5)} chars | ` +
+          `prompt ${(r.enriched_chars ?? 0).toString().padStart(5)} chars | ` +
+          `${((r.latency_ms ?? 0) / 1000).toFixed(1).padStart(5)}s | ` +
+          `bug=${r.bug_fingerprints ?? "-"} mtx=${r.matrix_kept ?? 0}/${(r.matrix_kept ?? 0) + (r.matrix_dropped ?? 0)}`
+        );
+      } else {
+        console.log(`✗ ${r.error}`);
+      }
+    }
+  }
+
+  console.log("");
+  console.log(`[experiment] complete · ${results.filter(r => r.ok).length}/${results.length} succeeded`);
+  console.log(`[experiment] full per-call detail in data/_kb/mode_experiments.jsonl`);
+  console.log(`[experiment] aggregate with: bun run scripts/mode_compare.ts`);
+}
+
+main().catch(e => { console.error(e); process.exit(1); });
--- a/tests/real-world/scrum_master_pipeline.ts
+++ b/tests/real-world/scrum_master_pipeline.ts
@ -1438,6 +1438,56 @@ Respond with markdown. Be specific, not generic. Cite file-region + PRD-chunk-of
  // Collect attempts for the pathway trace sidecar.
  const pathwayAttempts: LadderAttemptRec[] = [];

+  // ─── Mode runner fast path (J 2026-04-26) ───
+  // Modes are prompt-molders, not model-pickers. /v1/mode/execute
+  // composes pathway memory + relevance-filtered matrix chunks +
+  // focus-file context into ONE prompt designed for one-shot success.
+  // Try it first; if the response is substantive, skip the ladder
+  // entirely. If anything goes wrong, fall through unchanged.
+  //
+  // Off by default until we've A/B-validated quality vs the ladder.
+  // LH_USE_MODE_RUNNER=1 enables. LH_MODE_MIN_CHARS controls the
+  // success bar (default 2000 — anything shorter is treated as a
+  // thin response and falls through).
+  if (process.env.LH_USE_MODE_RUNNER === "1") {
+    const minChars = Number(process.env.LH_MODE_MIN_CHARS ?? 2000);
+    log(`  ⚡ mode runner enabled — trying /v1/mode/execute (min_chars=${minChars})`);
+    const t0 = Date.now();
+    try {
+      const r = await fetch(`${GATEWAY}/v1/mode/execute`, {
+        method: "POST",
+        headers: { "content-type": "application/json" },
+        body: JSON.stringify({
+          task_class: taskClass,
+          file_path: rel,
+          file_content: content,
+        }),
+        signal: AbortSignal.timeout(180_000),
+      });
+      const modeMs = Date.now() - t0;
+      if (r.ok) {
+        const j: any = await r.json();
+        const respChars = (j.response ?? "").length;
+        if (respChars >= minChars) {
+          log(`    ✓ mode ${j.mode} → ${j.model} | ${j.enriched_prompt_chars} prompt chars → ${respChars} resp chars in ${modeMs}ms`);
+          log(`      sources: ${j.sources?.bug_fingerprints_count ?? 0} fingerprints, ${j.sources?.matrix_chunks_kept ?? 0}/${(j.sources?.matrix_chunks_kept ?? 0) + (j.sources?.matrix_chunks_dropped ?? 0)} matrix chunks kept`);
+          accepted = j.response;
+          acceptedModel = `mode_runner/${j.mode}/${j.model}`;
+          acceptedOn = 1;
+          history.push({ n: 1, model: j.model, status: "accepted", chars: respChars });
+          pathwayAttempts.push({ rung: 0, model: j.model, latency_ms: modeMs, accepted: true, reject_reason: null });
+        } else {
+          log(`    ✗ mode runner returned ${respChars} chars (<${minChars}), falling through to ladder`);
+        }
+      } else {
+        const body = await r.text().catch(() => "");
+        log(`    ✗ mode runner HTTP ${r.status}: ${body.slice(0, 200)} — falling through to ladder`);
+      }
+    } catch (e: any) {
+      log(`    ✗ mode runner err: ${e.message} — falling through to ladder`);
+    }
+  }
+
  // Single-model strategy with same-model retry. modelIdx advances
  // only on PROVIDER errors. Quality rejects from observer keep the
  // same model and retry with enriched context (history feeds back
@ -1448,6 +1498,10 @@ Respond with markdown. Be specific, not generic. Cite file-region + PRD-chunk-of
  let qualityRetriesOnCurrentModel = 0;

  for (let step = 0; step < MAX_ATTEMPTS; step++) {
+    // Mode runner already produced an acceptable response — short-circuit
+    // the ladder. Falls through to the post-loop bookkeeping which
+    // handles {history, pathwayAttempts, hotSwap replay, etc}.
+    if (accepted) break;
    if (modelIdx >= ladderOrder.length) {
      log(`  ✗ all ${ladderOrder.length} fallback models exhausted, marking UNRESOLVED`);
      break;