diff --git a/Cargo.lock b/Cargo.lock index d2b4532..8e46134 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4086,6 +4086,7 @@ dependencies = [ "shared", "storaged", "tokio", + "toml", "tonic", "tower-http", "tracing", diff --git a/crates/gateway/src/v1/mode.rs b/crates/gateway/src/v1/mode.rs index 294ccd7..ebe74e7 100644 --- a/crates/gateway/src/v1/mode.rs +++ b/crates/gateway/src/v1/mode.rs @@ -42,14 +42,87 @@ const VALID_MODES: &[&str] = &[ "evolution", "blindassembly", "staircase", "drift", "mesh", "hallucination", "timeloop", "research", "eval", "extract", "refine", "adaptive", "deep_analysis", "distill", - // Native runners (not in LLM Team — handled by /v1/mode/execute): - "codereview_lakehouse", + // Native runners (not in LLM Team — handled by /v1/mode/execute). + // Each is a parameterized preset of EnrichmentFlags below — designed + // as a deliberate experiment so we can read the matrix and identify + // which signals are doing real work vs adding latency for nothing. + "codereview_lakehouse", // all enrichment on (ceiling) + "codereview_null", // raw file + generic prompt (baseline) + "codereview_isolation", // file + pathway only (no matrix) + "codereview_matrix_only", // file + matrix only (no pathway) + "codereview_playbook_only", // pathway only, NO file content (lossy ceiling) ]; /// Whether a mode is handled natively in this gateway vs proxied to /// LLM Team. Drives /v1/mode/execute dispatch. fn is_native_mode(mode: &str) -> bool { - matches!(mode, "codereview_lakehouse") + matches!( + mode, + "codereview_lakehouse" + | "codereview_null" + | "codereview_isolation" + | "codereview_matrix_only" + | "codereview_playbook_only" + ) +} + +/// Per-mode enrichment knobs — each native mode is a preset over these +/// flags. Exists so the runner code is one path (less drift between +/// modes) and the comparison harness can read which signals fired. +#[derive(Debug, Clone, Copy, Serialize)] +pub struct EnrichmentFlags { + pub include_file_content: bool, + pub include_bug_fingerprints: bool, + pub include_matrix_chunks: bool, + pub use_relevance_filter: bool, + pub framing: ReviewerFraming, +} + +#[derive(Debug, Clone, Copy, Serialize)] +pub enum ReviewerFraming { + Adversarial, // forensic, ranked findings + verdict (lakehouse default) + Generic, // "review this" — no codebase priors (null baseline) +} + +fn flags_for_mode(mode: &str) -> EnrichmentFlags { + match mode { + "codereview_null" => EnrichmentFlags { + include_file_content: true, + include_bug_fingerprints: false, + include_matrix_chunks: false, + use_relevance_filter: false, + framing: ReviewerFraming::Generic, + }, + "codereview_isolation" => EnrichmentFlags { + include_file_content: true, + include_bug_fingerprints: true, + include_matrix_chunks: false, + use_relevance_filter: false, + framing: ReviewerFraming::Adversarial, + }, + "codereview_matrix_only" => EnrichmentFlags { + include_file_content: true, + include_bug_fingerprints: false, + include_matrix_chunks: true, + use_relevance_filter: true, + framing: ReviewerFraming::Adversarial, + }, + "codereview_playbook_only" => EnrichmentFlags { + include_file_content: false, // lossy on purpose — measures pathway-alone ceiling + include_bug_fingerprints: true, + include_matrix_chunks: false, + use_relevance_filter: false, + framing: ReviewerFraming::Adversarial, + }, + // Default (codereview_lakehouse): everything on. + _ => EnrichmentFlags { + include_file_content: true, + include_bug_fingerprints: true, + include_matrix_chunks: true, + use_relevance_filter: true, + framing: ReviewerFraming::Adversarial, + }, + } } #[derive(Clone, Debug, Deserialize)] @@ -342,6 +415,9 @@ pub struct EnrichmentSources { pub matrix_corpus: Option, pub relevance_filter_used: bool, pub enrichment_warnings: Vec, + /// Which enrichment knobs the runner used for this mode. Lets + /// the comparison aggregator group runs by signal-set. + pub flags: Option, } #[derive(Serialize, Debug)] @@ -356,13 +432,23 @@ pub struct ExecuteResponse { pub latency_ms: u64, } -const REVIEWER_FRAMING: &str = "You are an adversarial code reviewer for the Lakehouse codebase \ +const FRAMING_ADVERSARIAL: &str = "You are an adversarial code reviewer for the Lakehouse codebase \ (Rust + DataFusion + Parquet + object storage). Audit the focus file forensically. \ Output a markdown report with: (1) one-line verdict (pass | needs_patch | fail), (2) ranked \ findings table with file:line, evidence, severity, confidence percent, (3) concrete patch \ suggestions, (4) PRD/ADR refs where applicable. Be precise — assume nothing works until \ proven. Do NOT hedge."; +const FRAMING_GENERIC: &str = "You are a code reviewer. Read the file below and produce a \ +markdown review with findings."; + +fn framing_text(f: ReviewerFraming) -> &'static str { + match f { + ReviewerFraming::Adversarial => FRAMING_ADVERSARIAL, + ReviewerFraming::Generic => FRAMING_GENERIC, + } +} + pub async fn execute( State(_state): State, Json(req): Json, @@ -399,12 +485,15 @@ pub async fn execute( )); } + let flags = flags_for_mode(&mode); let mut sources = EnrichmentSources { matrix_corpus: matrix_corpus.clone(), + flags: Some(flags), ..Default::default() }; - // Step 1: focus file content. + // Step 1: focus file content (always read — even modes that don't + // include it in the prompt may need it for citation/sources). let file_content = match req.file_content.clone() { Some(c) => c, None => match std::fs::read_to_string(&req.file_path) { @@ -439,7 +528,7 @@ pub async fn execute( // Step 2: pathway memory bug fingerprints for this file area. let mut bug_preamble = String::new(); - { + if flags.include_bug_fingerprints { let body = serde_json::json!({ "task_class": req.task_class, "file_path": req.file_path, @@ -484,7 +573,8 @@ pub async fn execute( // Step 3: matrix corpus search (if configured for this task class). let mut raw_chunks: Vec = vec![]; - if let Some(corpus) = &matrix_corpus { + if flags.include_matrix_chunks { + if let Some(corpus) = &matrix_corpus { let body = serde_json::json!({ "index_name": corpus, "query": format!("{} {}\n{}", req.task_class, req.file_path, &file_content[..file_content.len().min(500)]), @@ -512,10 +602,11 @@ pub async fn execute( .enrichment_warnings .push(format!("matrix_search err: {e}")), } + } // close `if let Some(corpus)` } // Step 4: relevance filter — drop adjacency pollution. - let kept_chunks: Vec = if !raw_chunks.is_empty() { + let kept_chunks: Vec = if flags.use_relevance_filter && !raw_chunks.is_empty() { let chunks_for_filter: Vec = raw_chunks .iter() .map(|c| { @@ -557,15 +648,24 @@ pub async fn execute( raw_chunks } } + } else if !flags.use_relevance_filter && !raw_chunks.is_empty() { + // Take raw matrix chunks unfiltered — `codereview_matrix_only` + // turns the filter off intentionally to measure how much + // pollution the filter is actually catching. + sources.matrix_chunks_kept = raw_chunks.len(); + raw_chunks.clone() } else { vec![] }; - // Step 5: assemble the prompt. + // Step 5: assemble the prompt — strictly per-flag so we don't + // leak signals across modes. let mut user_prompt = String::new(); - user_prompt.push_str(&bug_preamble); - if !kept_chunks.is_empty() { - user_prompt.push_str("📁 RELATED CONTEXT (relevance-filtered from matrix):\n"); + if flags.include_bug_fingerprints { + user_prompt.push_str(&bug_preamble); + } + if flags.include_matrix_chunks && !kept_chunks.is_empty() { + user_prompt.push_str("📁 RELATED CONTEXT (matrix chunks):\n"); for c in &kept_chunks { let src = c.get("source").and_then(|v| v.as_str()).unwrap_or("?"); let txt = c.get("text").and_then(|v| v.as_str()).unwrap_or(""); @@ -573,11 +673,20 @@ pub async fn execute( } user_prompt.push_str("\n"); } - user_prompt.push_str(&format!("FILE: {}\n```rust\n{}\n```\n", req.file_path, file_content)); + if flags.include_file_content { + user_prompt.push_str(&format!("FILE: {}\n```rust\n{}\n```\n", req.file_path, file_content)); + } else { + // Lossy mode — playbook_only intentionally omits file content + // to measure how much value pathway memory carries on its own. + user_prompt.push_str(&format!( + "FILE PATH (content omitted): {}\nFile size: {} bytes\n", + req.file_path, file_content.len() + )); + } if let Some(q) = &req.user_question { user_prompt.push_str(&format!("\nQUESTION: {}\n", q)); } else { - user_prompt.push_str("\nProduce the forensic review now.\n"); + user_prompt.push_str("\nProduce the review now.\n"); } let enriched_chars = user_prompt.len(); @@ -611,7 +720,7 @@ pub async fn execute( "model": model, "provider": provider_hint, "messages": [ - { "role": "system", "content": REVIEWER_FRAMING }, + { "role": "system", "content": framing_text(flags.framing) }, { "role": "user", "content": user_prompt }, ], "temperature": 0.1, @@ -668,16 +777,45 @@ pub async fn execute( } }; - Ok(Json(ExecuteResponse { - mode, - model, - task_class: req.task_class, + let resp = ExecuteResponse { + mode: mode.clone(), + model: model.clone(), + task_class: req.task_class.clone(), enriched_prompt_chars: enriched_chars, enriched_prompt_preview: preview, sources, response: response_text, latency_ms: t0.elapsed().as_millis() as u64, - })) + }; + + // Append to mode_experiments.jsonl so the comparison aggregator + // can read the matrix later. Best-effort — write failure must not + // fail the request. Skips if LH_MODE_LOG_OFF=1. + if std::env::var("LH_MODE_LOG_OFF").as_deref() != Ok("1") { + let log_path = std::env::var("LH_MODE_LOG_PATH") + .unwrap_or_else(|_| "data/_kb/mode_experiments.jsonl".to_string()); + let row = serde_json::json!({ + "ts": chrono::Utc::now().to_rfc3339(), + "mode": resp.mode, + "model": resp.model, + "task_class": resp.task_class, + "file_path": req.file_path, + "enriched_prompt_chars": resp.enriched_prompt_chars, + "response_chars": resp.response.len(), + "latency_ms": resp.latency_ms, + "sources": resp.sources, + "response": resp.response, + }); + if let Some(parent) = std::path::Path::new(&log_path).parent() { + let _ = std::fs::create_dir_all(parent); + } + if let Ok(mut f) = std::fs::OpenOptions::new().create(true).append(true).open(&log_path) { + use std::io::Write; + let _ = writeln!(f, "{}", row); + } + } + + Ok(Json(resp)) } #[cfg(test)] diff --git a/scripts/mode_compare.ts b/scripts/mode_compare.ts new file mode 100644 index 0000000..8560675 --- /dev/null +++ b/scripts/mode_compare.ts @@ -0,0 +1,186 @@ +#!/usr/bin/env bun +/** + * Mode comparison aggregator — reads data/_kb/mode_experiments.jsonl + * (written per-call by /v1/mode/execute) and surfaces the cross-mode + * comparison matrix that lets us see what each enrichment dimension + * is actually doing. + * + * Per file, per mode, computes: + * - response_chars + * - finding_count (rows in markdown tables — heuristic, regex) + * - pathway_citations (mentions of "Pathway memory" or "📚") + * - latency_ms + * - matrix_chunks_kept / dropped + * + * Then surfaces: + * - per file, what each mode produced (rows next to each other) + * - per mode, average response_chars + latency + * - which modes ALWAYS underperform vs codereview_lakehouse + * - which signals (bug fingerprints, matrix) correlate with output size + * + * Usage: bun run scripts/mode_compare.ts [--jsonl path] [--since 2026-04-26] + */ + +import { readFileSync, existsSync } from "node:fs"; + +interface Row { + ts: string; + mode: string; + model: string; + task_class: string; + file_path: string; + enriched_prompt_chars: number; + response_chars: number; + latency_ms: number; + sources: { + focus_file_bytes?: number; + bug_fingerprints_count?: number; + matrix_chunks_kept?: number; + matrix_chunks_dropped?: number; + relevance_filter_used?: boolean; + flags?: any; + }; + response: string; +} + +function parseArgs(): { jsonl: string; since: string | null } { + const args = Bun.argv.slice(2); + const out: Record = {}; + for (let i = 0; i < args.length; i++) { + const a = args[i]; + if (a.startsWith("--")) out[a.slice(2)] = args[++i] ?? ""; + } + return { + jsonl: out.jsonl ?? "data/_kb/mode_experiments.jsonl", + since: out.since || null, + }; +} + +function loadRows(path: string, since: string | null): Row[] { + if (!existsSync(path)) { + console.error(`[compare] no log file at ${path}`); + process.exit(1); + } + const lines = readFileSync(path, "utf8").split("\n").filter(Boolean); + const rows: Row[] = []; + for (const line of lines) { + try { + const r: Row = JSON.parse(line); + if (since && r.ts < since) continue; + rows.push(r); + } catch { + // skip malformed + } + } + return rows; +} + +function countFindings(md: string): number { + // Markdown table rows that look like findings: `| | ...` or `| **N** | ...` + // Heuristic — adversarial framing produces ranked tables. + const matches = md.match(/^\|\s*\*?\*?\d+\*?\*?\s*\|/gm); + return matches ? matches.length : 0; +} + +function countPathwayCitations(md: string): number { + // How many times the model referenced the pathway memory preamble. + const re = /pathway\s*memory|📚/gi; + return (md.match(re) ?? []).length; +} + +function pad(s: string | number, n: number, right = false): string { + const str = String(s); + if (str.length >= n) return str.slice(0, n); + return right ? " ".repeat(n - str.length) + str : str + " ".repeat(n - str.length); +} + +function main() { + const { jsonl, since } = parseArgs(); + const rows = loadRows(jsonl, since); + if (rows.length === 0) { + console.error("[compare] no rows after filter"); + process.exit(1); + } + + // Group by file → mode + const byFile: Record> = {}; + const allModes = new Set(); + for (const r of rows) { + byFile[r.file_path] ??= {}; + byFile[r.file_path][r.mode] = r; // last-write-wins per mode per file + allModes.add(r.mode); + } + const modesSorted = [...allModes].sort(); + + // Per-file matrix + console.log("\n═══ PER-FILE COMPARISON ═══\n"); + for (const file of Object.keys(byFile).sort()) { + console.log(`📄 ${file}`); + console.log( + ` ${pad("mode", 28)} ${pad("resp", 6, true)} ${pad("findings", 8, true)} ${pad("path_cit", 8, true)} ${pad("ms", 7, true)} ${pad("mtx k/d", 9, true)} ${pad("bug_fp", 6, true)}` + ); + console.log(` ${"─".repeat(28)} ${"─".repeat(6)} ${"─".repeat(8)} ${"─".repeat(8)} ${"─".repeat(7)} ${"─".repeat(9)} ${"─".repeat(6)}`); + for (const mode of modesSorted) { + const r = byFile[file][mode]; + if (!r) { + console.log(` ${pad(mode, 28)} ${pad("—", 6, true)}`); + continue; + } + const findings = countFindings(r.response); + const cits = countPathwayCitations(r.response); + const mk = r.sources.matrix_chunks_kept ?? 0; + const md = r.sources.matrix_chunks_dropped ?? 0; + const bf = r.sources.bug_fingerprints_count ?? 0; + console.log( + ` ${pad(mode, 28)} ${pad(r.response_chars, 6, true)} ${pad(findings, 8, true)} ${pad(cits, 8, true)} ${pad(r.latency_ms, 7, true)} ${pad(`${mk}/${mk + md}`, 9, true)} ${pad(bf, 6, true)}` + ); + } + console.log(""); + } + + // Per-mode averages + console.log("═══ PER-MODE AGGREGATE ═══\n"); + console.log(` ${pad("mode", 28)} ${pad("n", 4, true)} ${pad("avg resp", 9, true)} ${pad("avg find", 9, true)} ${pad("avg cit", 8, true)} ${pad("avg ms", 8, true)}`); + console.log(` ${"─".repeat(28)} ${"─".repeat(4)} ${"─".repeat(9)} ${"─".repeat(9)} ${"─".repeat(8)} ${"─".repeat(8)}`); + for (const mode of modesSorted) { + const modeRows = rows.filter(r => r.mode === mode); + if (modeRows.length === 0) continue; + const n = modeRows.length; + const avgResp = Math.round(modeRows.reduce((s, r) => s + r.response_chars, 0) / n); + const avgFind = Math.round(10 * modeRows.reduce((s, r) => s + countFindings(r.response), 0) / n) / 10; + const avgCit = Math.round(10 * modeRows.reduce((s, r) => s + countPathwayCitations(r.response), 0) / n) / 10; + const avgMs = Math.round(modeRows.reduce((s, r) => s + r.latency_ms, 0) / n); + console.log( + ` ${pad(mode, 28)} ${pad(n, 4, true)} ${pad(avgResp, 9, true)} ${pad(avgFind, 9, true)} ${pad(avgCit, 8, true)} ${pad(avgMs, 8, true)}` + ); + } + + // Mode-relative: how often does each mode produce MORE findings than lakehouse? + console.log("\n═══ MODE vs codereview_lakehouse (per file) ═══\n"); + console.log(` ${pad("mode", 28)} ${pad("wins", 5, true)} ${pad("losses", 7, true)} ${pad("ties", 5, true)} ${pad("Δ avg findings", 16, true)}`); + console.log(` ${"─".repeat(28)} ${"─".repeat(5)} ${"─".repeat(7)} ${"─".repeat(5)} ${"─".repeat(16)}`); + for (const mode of modesSorted) { + if (mode === "codereview_lakehouse") continue; + let wins = 0, losses = 0, ties = 0, totalDelta = 0, n = 0; + for (const file of Object.keys(byFile)) { + const baseline = byFile[file]["codereview_lakehouse"]; + const challenger = byFile[file][mode]; + if (!baseline || !challenger) continue; + const bf = countFindings(baseline.response); + const cf = countFindings(challenger.response); + if (cf > bf) wins++; + else if (cf < bf) losses++; + else ties++; + totalDelta += cf - bf; + n++; + } + if (n === 0) continue; + const avgDelta = (totalDelta / n).toFixed(1); + console.log( + ` ${pad(mode, 28)} ${pad(wins, 5, true)} ${pad(losses, 7, true)} ${pad(ties, 5, true)} ${pad(avgDelta, 16, true)}` + ); + } + console.log("\n[compare] done\n"); +} + +main(); diff --git a/scripts/mode_experiment.ts b/scripts/mode_experiment.ts new file mode 100644 index 0000000..f4a1f76 --- /dev/null +++ b/scripts/mode_experiment.ts @@ -0,0 +1,127 @@ +#!/usr/bin/env bun +/** + * Mode experiment harness — sweeps a set of files through every native + * mode, calling /v1/mode/execute serially. Results land in the + * mode_experiments.jsonl that the gateway already writes (the runner + * appends per-call). This script just orchestrates the calls. + * + * Usage: + * bun run scripts/mode_experiment.ts \ + * --files crates/queryd/src/delta.rs,crates/queryd/src/service.rs \ + * --modes codereview_lakehouse,codereview_null,codereview_isolation,codereview_matrix_only \ + * --model openai/gpt-oss-120b:free + * + * Defaults: 5 modes × $LH_EXPERIMENT_FILES files (or 2 default targets) × + * one model. Cloud-quota-resilient — uses OpenRouter free model unless + * --model overrides. + */ + +const GATEWAY = process.env.LH_GATEWAY ?? "http://localhost:3100"; +const TASK_CLASS = process.env.LH_EXPERIMENT_TASK ?? "scrum_review"; + +const ALL_MODES = [ + "codereview_lakehouse", + "codereview_null", + "codereview_isolation", + "codereview_matrix_only", + "codereview_playbook_only", +]; + +const DEFAULT_FILES = [ + "crates/queryd/src/delta.rs", + "crates/queryd/src/service.rs", +]; + +function parseArgs(): { files: string[]; modes: string[]; model: string } { + const args = Bun.argv.slice(2); + const out: Record = {}; + for (let i = 0; i < args.length; i++) { + const a = args[i]; + if (a.startsWith("--")) out[a.slice(2)] = args[++i] ?? ""; + } + const files = (out.files ?? DEFAULT_FILES.join(",")).split(",").map(s => s.trim()).filter(Boolean); + const modes = (out.modes ?? ALL_MODES.join(",")).split(",").map(s => s.trim()).filter(Boolean); + const model = out.model ?? "openai/gpt-oss-120b:free"; + return { files, modes, model }; +} + +interface RunResult { + file: string; + mode: string; + ok: boolean; + latency_ms?: number; + response_chars?: number; + enriched_chars?: number; + bug_fingerprints?: number; + matrix_kept?: number; + matrix_dropped?: number; + error?: string; +} + +async function runOne(file: string, mode: string, model: string): Promise { + const t0 = Date.now(); + try { + const r = await fetch(`${GATEWAY}/v1/mode/execute`, { + method: "POST", + headers: { "content-type": "application/json" }, + body: JSON.stringify({ + task_class: TASK_CLASS, + file_path: file, + force_mode: mode, + force_model: model, + }), + signal: AbortSignal.timeout(180_000), + }); + if (!r.ok) { + const body = await r.text().catch(() => ""); + return { file, mode, ok: false, error: `HTTP ${r.status}: ${body.slice(0, 200)}` }; + } + const j: any = await r.json(); + return { + file, mode, ok: true, + latency_ms: j.latency_ms, + response_chars: (j.response ?? "").length, + enriched_chars: j.enriched_prompt_chars, + bug_fingerprints: j.sources?.bug_fingerprints_count, + matrix_kept: j.sources?.matrix_chunks_kept, + matrix_dropped: j.sources?.matrix_chunks_dropped, + }; + } catch (e: any) { + return { file, mode, ok: false, error: e.message, latency_ms: Date.now() - t0 }; + } +} + +async function main() { + const { files, modes, model } = parseArgs(); + console.log(`[experiment] files=${files.length} × modes=${modes.length} = ${files.length * modes.length} runs`); + console.log(`[experiment] model=${model} task=${TASK_CLASS} gateway=${GATEWAY}`); + console.log(""); + + const results: RunResult[] = []; + let i = 0; + for (const file of files) { + for (const mode of modes) { + i++; + process.stdout.write(` [${i}/${files.length * modes.length}] ${mode.padEnd(28)} ${file} ... `); + const r = await runOne(file, mode, model); + results.push(r); + if (r.ok) { + console.log( + `✓ ${(r.response_chars ?? 0).toString().padStart(5)} chars | ` + + `prompt ${(r.enriched_chars ?? 0).toString().padStart(5)} chars | ` + + `${((r.latency_ms ?? 0) / 1000).toFixed(1).padStart(5)}s | ` + + `bug=${r.bug_fingerprints ?? "-"} mtx=${r.matrix_kept ?? 0}/${(r.matrix_kept ?? 0) + (r.matrix_dropped ?? 0)}` + ); + } else { + console.log(`✗ ${r.error}`); + } + } + } + + console.log(""); + console.log(`[experiment] complete · ${results.filter(r => r.ok).length}/${results.length} succeeded`); + console.log(`[experiment] full per-call detail in data/_kb/mode_experiments.jsonl`); + console.log(`[experiment] aggregate with: bun run scripts/mode_compare.ts`); +} + +main().catch(e => { console.error(e); process.exit(1); }); diff --git a/tests/real-world/scrum_master_pipeline.ts b/tests/real-world/scrum_master_pipeline.ts index 509180f..9d77e97 100644 --- a/tests/real-world/scrum_master_pipeline.ts +++ b/tests/real-world/scrum_master_pipeline.ts @@ -1438,6 +1438,56 @@ Respond with markdown. Be specific, not generic. Cite file-region + PRD-chunk-of // Collect attempts for the pathway trace sidecar. const pathwayAttempts: LadderAttemptRec[] = []; + // ─── Mode runner fast path (J 2026-04-26) ─── + // Modes are prompt-molders, not model-pickers. /v1/mode/execute + // composes pathway memory + relevance-filtered matrix chunks + + // focus-file context into ONE prompt designed for one-shot success. + // Try it first; if the response is substantive, skip the ladder + // entirely. If anything goes wrong, fall through unchanged. + // + // Off by default until we've A/B-validated quality vs the ladder. + // LH_USE_MODE_RUNNER=1 enables. LH_MODE_MIN_CHARS controls the + // success bar (default 2000 — anything shorter is treated as a + // thin response and falls through). + if (process.env.LH_USE_MODE_RUNNER === "1") { + const minChars = Number(process.env.LH_MODE_MIN_CHARS ?? 2000); + log(` ⚡ mode runner enabled — trying /v1/mode/execute (min_chars=${minChars})`); + const t0 = Date.now(); + try { + const r = await fetch(`${GATEWAY}/v1/mode/execute`, { + method: "POST", + headers: { "content-type": "application/json" }, + body: JSON.stringify({ + task_class: taskClass, + file_path: rel, + file_content: content, + }), + signal: AbortSignal.timeout(180_000), + }); + const modeMs = Date.now() - t0; + if (r.ok) { + const j: any = await r.json(); + const respChars = (j.response ?? "").length; + if (respChars >= minChars) { + log(` ✓ mode ${j.mode} → ${j.model} | ${j.enriched_prompt_chars} prompt chars → ${respChars} resp chars in ${modeMs}ms`); + log(` sources: ${j.sources?.bug_fingerprints_count ?? 0} fingerprints, ${j.sources?.matrix_chunks_kept ?? 0}/${(j.sources?.matrix_chunks_kept ?? 0) + (j.sources?.matrix_chunks_dropped ?? 0)} matrix chunks kept`); + accepted = j.response; + acceptedModel = `mode_runner/${j.mode}/${j.model}`; + acceptedOn = 1; + history.push({ n: 1, model: j.model, status: "accepted", chars: respChars }); + pathwayAttempts.push({ rung: 0, model: j.model, latency_ms: modeMs, accepted: true, reject_reason: null }); + } else { + log(` ✗ mode runner returned ${respChars} chars (<${minChars}), falling through to ladder`); + } + } else { + const body = await r.text().catch(() => ""); + log(` ✗ mode runner HTTP ${r.status}: ${body.slice(0, 200)} — falling through to ladder`); + } + } catch (e: any) { + log(` ✗ mode runner err: ${e.message} — falling through to ladder`); + } + } + // Single-model strategy with same-model retry. modelIdx advances // only on PROVIDER errors. Quality rejects from observer keep the // same model and retry with enriched context (history feeds back @@ -1448,6 +1498,10 @@ Respond with markdown. Be specific, not generic. Cite file-region + PRD-chunk-of let qualityRetriesOnCurrentModel = 0; for (let step = 0; step < MAX_ATTEMPTS; step++) { + // Mode runner already produced an acceptable response — short-circuit + // the ladder. Falls through to the post-loop bookkeeping which + // handles {history, pathwayAttempts, hotSwap replay, etc}. + if (accepted) break; if (modelIdx >= ladderOrder.length) { log(` ✗ all ${ladderOrder.length} fallback models exhausted, marking UNRESOLVED`); break;