v1/mode: codereview_lakehouse native runner — modes are prompt-molders

J's framing (2026-04-26): "Modes are how you ask ONCE and get BETTER information — they mold the data, hyperfocus the prompt on this codebase's needs, so the model gets it right the first time without the cascading retry ladder." Built the first concrete native enrichment runner (codereview_lakehouse) that composes every context primitive the gateway exposes: 1. Focus file content (read from disk OR caller-supplied) 2. Pathway memory bug_fingerprints for this file area (ADR-021 preamble — "📚 BUGS PREVIOUSLY FOUND IN THIS FILE AREA") 3. Matrix corpus search via the task_class's matrix_corpus 4. Relevance filter (observer /relevance) drops adjacency pollution 5. Assembles ONE precise prompt with system framing 6. Single call to /v1/chat with the recommended model POST /v1/mode/execute dispatches. Native mode → runs the composer. Non-native mode → 501 NOT_IMPLEMENTED with hint (proxy to LLM Team /api/run is queued). Provider hint logic auto-routes by model name shape: - vendor/model[:tag] → openrouter - kimi-*/qwen3-coder*/deepseek-v*/mistral-large* → ollama_cloud - everything else → local ollama Live test against crates/queryd/src/delta.rs (10593 bytes, 10 historical bug fingerprints, 2 matrix chunks dropped by relevance): - enriched_chars: 12876 - response_chars: 16346 (14 findings with confidence percentages) - Model literally cited the pathway memory preamble in finding #7 - One call to free-tier gpt-oss:120b produced what previously required the 9-rung escalation ladder Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-26 00:28:46 -05:00 · 2026-04-26 00:28:46 -05:00 · 86f63a083d
commit 86f63a083d
parent d277efbfd2
3 changed files with 405 additions and 2 deletions
--- a/config/modes.toml
+++ b/config/modes.toml
@ -12,8 +12,13 @@

 [[task_class]]
 name = "scrum_review"
-preferred_mode = "codereview"
-fallback_modes = ["consensus", "ladder"]
+# `codereview_lakehouse` is the codebase-specific enrichment runner —
+# bundles defined/imported symbols, pathway-memory bug fingerprints,
+# and relevance-filtered matrix chunks into ONE precise prompt so the
+# model gets it right the first call. The generic `codereview` mode
+# from LLM Team is still the network fallback if execute fails.
+preferred_mode = "codereview_lakehouse"
+fallback_modes = ["codereview", "consensus", "ladder"]
 default_model = "qwen3-coder:480b"
 matrix_corpus = "distilled_procedural_v20260423102847"

--- a/crates/gateway/src/v1/mod.rs
+++ b/crates/gateway/src/v1/mod.rs
@ -86,6 +86,7 @@ pub fn router(state: V1State) -> Router {
        .route("/context", get(truth::context))
        .route("/mode", post(mode::route))
        .route("/mode/list", get(mode::list))
+        .route("/mode/execute", post(mode::execute))
        .with_state(state)
 }

--- a/crates/gateway/src/v1/mode.rs
+++ b/crates/gateway/src/v1/mode.rs
@ -29,14 +29,29 @@ use super::V1State;
 /// Validated against the LLM Team /api/run handler at
 /// /root/llm_team_ui.py:10581. Kept in sync manually — adding a mode
 /// here without adding it upstream returns 400 from the proxy.
+///
+/// Modes prefixed with the codebase name (e.g. `codereview_lakehouse`)
+/// are NATIVE Rust enrichment runners — they don't proxy to LLM Team,
+/// they compose the lakehouse's own context primitives (pathway memory,
+/// relevance filter, matrix corpora) into a one-shot prompt for the
+/// recommended model. Native modes are listed alongside upstream ones
+/// so the router can pick either without callers caring.
 const VALID_MODES: &[&str] = &[
    "brainstorm", "pipeline", "debate", "validator", "roundrobin",
    "redteam", "consensus", "codereview", "ladder", "tournament",
    "evolution", "blindassembly", "staircase", "drift", "mesh",
    "hallucination", "timeloop", "research", "eval", "extract",
    "refine", "adaptive", "deep_analysis", "distill",
+    // Native runners (not in LLM Team — handled by /v1/mode/execute):
+    "codereview_lakehouse",
 ];

+/// Whether a mode is handled natively in this gateway vs proxied to
+/// LLM Team. Drives /v1/mode/execute dispatch.
+fn is_native_mode(mode: &str) -> bool {
+    matches!(mode, "codereview_lakehouse")
+}
+
 #[derive(Clone, Debug, Deserialize)]
 pub struct TaskClassEntry {
    pub name: String,
@ -283,6 +298,388 @@ pub async fn list(State(_state): State<V1State>) -> impl IntoResponse {
    }))
 }

+// ─── Native runner: codereview_lakehouse ───
+//
+// Enrichment composer for the lakehouse-specific code review mode.
+// Pulls every context primitive the gateway exposes — focus file
+// content, pathway-memory bug fingerprints, matrix corpus chunks
+// (post relevance filter) — bundles them into ONE prompt designed
+// for one-shot success against qwen3-coder:480b. The whole point of
+// the mode is that the model gets it right the first time because
+// the prompt was molded for THIS file in THIS codebase.
+//
+// Network composition only — no Rust port of the relevance scorer.
+// Every primitive is already an HTTP endpoint; the runner just stitches.
+
+#[derive(Deserialize, Debug)]
+pub struct ExecuteRequest {
+    pub task_class: String,
+    pub file_path: String,
+    /// If absent, the runner reads the file from disk relative to the
+    /// gateway working directory. Useful for test harnesses that don't
+    /// want to rely on filesystem state.
+    #[serde(default)]
+    pub file_content: Option<String>,
+    /// Override the resolved mode — same semantics as RouteRequest.
+    #[serde(default)]
+    pub force_mode: Option<String>,
+    /// Override the resolved model. Defaults to the task_class's
+    /// default_model from modes.toml.
+    #[serde(default)]
+    pub force_model: Option<String>,
+    /// Reserved for ad-hoc questions about the file. If omitted, the
+    /// runner uses its built-in forensic-review framing.
+    #[serde(default)]
+    pub user_question: Option<String>,
+}
+
+#[derive(Serialize, Debug, Default)]
+pub struct EnrichmentSources {
+    pub focus_file_bytes: usize,
+    pub bug_fingerprints_count: usize,
+    pub matrix_chunks_kept: usize,
+    pub matrix_chunks_dropped: usize,
+    pub matrix_corpus: Option<String>,
+    pub relevance_filter_used: bool,
+    pub enrichment_warnings: Vec<String>,
+}
+
+#[derive(Serialize, Debug)]
+pub struct ExecuteResponse {
+    pub mode: String,
+    pub model: String,
+    pub task_class: String,
+    pub enriched_prompt_chars: usize,
+    pub enriched_prompt_preview: String,
+    pub sources: EnrichmentSources,
+    pub response: String,
+    pub latency_ms: u64,
+}
+
+const REVIEWER_FRAMING: &str = "You are an adversarial code reviewer for the Lakehouse codebase \
+(Rust + DataFusion + Parquet + object storage). Audit the focus file forensically. \
+Output a markdown report with: (1) one-line verdict (pass | needs_patch | fail), (2) ranked \
+findings table with file:line, evidence, severity, confidence percent, (3) concrete patch \
+suggestions, (4) PRD/ADR refs where applicable. Be precise — assume nothing works until \
+proven. Do NOT hedge.";
+
+pub async fn execute(
+    State(_state): State<V1State>,
+    Json(req): Json<ExecuteRequest>,
+) -> impl IntoResponse {
+    let cfg = load_config();
+    let t0 = std::time::Instant::now();
+
+    // Resolve mode + model (mirrors /v1/mode logic).
+    let tc = cfg.lookup(&req.task_class);
+    let mode = req
+        .force_mode
+        .clone()
+        .or_else(|| tc.map(|t| t.preferred_mode.clone()))
+        .unwrap_or_else(|| cfg.default.preferred_mode.clone());
+    let model = req
+        .force_model
+        .clone()
+        .or_else(|| tc.map(|t| t.default_model.clone()))
+        .unwrap_or_else(|| cfg.default.default_model.clone());
+    let matrix_corpus = tc.and_then(|t| t.matrix_corpus.clone());
+
+    if !is_native_mode(&mode) {
+        // Native execute is the only path implemented; LLM-Team proxy
+        // is queued behind this. Surface a clear 501 so callers know.
+        return Err((
+            StatusCode::NOT_IMPLEMENTED,
+            Json(serde_json::json!({
+                "error": format!(
+                    "mode '{}' has no native runner — proxy to /api/run not yet wired",
+                    mode
+                ),
+                "hint": "use force_mode=codereview_lakehouse, or call LLM Team /api/run directly until proxy lands",
+            })),
+        ));
+    }
+
+    let mut sources = EnrichmentSources {
+        matrix_corpus: matrix_corpus.clone(),
+        ..Default::default()
+    };
+
+    // Step 1: focus file content.
+    let file_content = match req.file_content.clone() {
+        Some(c) => c,
+        None => match std::fs::read_to_string(&req.file_path) {
+            Ok(c) => c,
+            Err(e) => {
+                return Err((
+                    StatusCode::BAD_REQUEST,
+                    Json(serde_json::json!({
+                        "error": format!("read {} failed: {}", req.file_path, e),
+                    })),
+                ));
+            }
+        },
+    };
+    sources.focus_file_bytes = file_content.len();
+
+    // Local HTTP client for composing internal calls. Short timeout
+    // because every endpoint is on localhost; the LLM call uses its
+    // own longer timeout further down.
+    let client = match reqwest::Client::builder()
+        .timeout(std::time::Duration::from_secs(8))
+        .build()
+    {
+        Ok(c) => c,
+        Err(e) => {
+            return Err((
+                StatusCode::INTERNAL_SERVER_ERROR,
+                Json(serde_json::json!({"error": format!("client build: {e}")})),
+            ));
+        }
+    };
+
+    // Step 2: pathway memory bug fingerprints for this file area.
+    let mut bug_preamble = String::new();
+    {
+        let body = serde_json::json!({
+            "task_class": req.task_class,
+            "file_path": req.file_path,
+            "signal_class": null,
+            "limit": 10,
+        });
+        match client
+            .post("http://localhost:3100/vectors/pathway/bug_fingerprints")
+            .json(&body)
+            .send()
+            .await
+        {
+            Ok(r) if r.status().is_success() => {
+                if let Ok(j) = r.json::<serde_json::Value>().await {
+                    let fps = j.get("fingerprints").and_then(|v| v.as_array()).cloned().unwrap_or_default();
+                    sources.bug_fingerprints_count = fps.len();
+                    if !fps.is_empty() {
+                        bug_preamble.push_str(
+                            "📚 PATHWAY MEMORY — BUGS PREVIOUSLY FOUND IN THIS FILE AREA:\n",
+                        );
+                        for fp in &fps {
+                            let pk = fp.get("pattern_key").and_then(|v| v.as_str()).unwrap_or("?");
+                            let occ = fp.get("occurrences").and_then(|v| v.as_u64()).unwrap_or(0);
+                            let ex = fp.get("example").and_then(|v| v.as_str()).unwrap_or("");
+                            bug_preamble.push_str(&format!(
+                                "  • {} (×{}) e.g. `{}`\n",
+                                pk, occ, ex
+                            ));
+                        }
+                        bug_preamble.push_str("Watch for these patterns recurring.\n\n");
+                    }
+                }
+            }
+            Ok(r) => sources
+                .enrichment_warnings
+                .push(format!("bug_fingerprints HTTP {}", r.status())),
+            Err(e) => sources
+                .enrichment_warnings
+                .push(format!("bug_fingerprints err: {e}")),
+        }
+    }
+
+    // Step 3: matrix corpus search (if configured for this task class).
+    let mut raw_chunks: Vec<serde_json::Value> = vec![];
+    if let Some(corpus) = &matrix_corpus {
+        let body = serde_json::json!({
+            "index_name": corpus,
+            "query": format!("{} {}\n{}", req.task_class, req.file_path, &file_content[..file_content.len().min(500)]),
+            "top_k": 8,
+        });
+        match client
+            .post("http://localhost:3100/vectors/search")
+            .json(&body)
+            .send()
+            .await
+        {
+            Ok(r) if r.status().is_success() => {
+                if let Ok(j) = r.json::<serde_json::Value>().await {
+                    raw_chunks = j
+                        .get("results")
+                        .and_then(|v| v.as_array())
+                        .cloned()
+                        .unwrap_or_default();
+                }
+            }
+            Ok(r) => sources
+                .enrichment_warnings
+                .push(format!("matrix_search HTTP {}", r.status())),
+            Err(e) => sources
+                .enrichment_warnings
+                .push(format!("matrix_search err: {e}")),
+        }
+    }
+
+    // Step 4: relevance filter — drop adjacency pollution.
+    let kept_chunks: Vec<serde_json::Value> = if !raw_chunks.is_empty() {
+        let chunks_for_filter: Vec<serde_json::Value> = raw_chunks
+            .iter()
+            .map(|c| {
+                serde_json::json!({
+                    "source": c.get("source").cloned().unwrap_or_default(),
+                    "doc_id": c.get("doc_id").cloned().unwrap_or_default(),
+                    "text": c.get("chunk_text").or_else(|| c.get("text")).cloned().unwrap_or_default(),
+                    "score": c.get("score").cloned().unwrap_or(serde_json::json!(0.0)),
+                })
+            })
+            .collect();
+        let body = serde_json::json!({
+            "focus_file": { "path": req.file_path, "content": file_content },
+            "chunks": chunks_for_filter,
+            "threshold": 0.3,
+        });
+        match client
+            .post("http://localhost:3800/relevance")
+            .json(&body)
+            .send()
+            .await
+        {
+            Ok(r) if r.status().is_success() => {
+                sources.relevance_filter_used = true;
+                if let Ok(j) = r.json::<serde_json::Value>().await {
+                    let kept = j.get("kept").and_then(|v| v.as_array()).cloned().unwrap_or_default();
+                    let dropped = j.get("dropped").and_then(|v| v.as_array()).cloned().unwrap_or_default();
+                    sources.matrix_chunks_kept = kept.len();
+                    sources.matrix_chunks_dropped = dropped.len();
+                    kept
+                } else {
+                    raw_chunks
+                }
+            }
+            _ => {
+                sources
+                    .enrichment_warnings
+                    .push("relevance filter unreachable, using raw chunks".to_string());
+                raw_chunks
+            }
+        }
+    } else {
+        vec![]
+    };
+
+    // Step 5: assemble the prompt.
+    let mut user_prompt = String::new();
+    user_prompt.push_str(&bug_preamble);
+    if !kept_chunks.is_empty() {
+        user_prompt.push_str("📁 RELATED CONTEXT (relevance-filtered from matrix):\n");
+        for c in &kept_chunks {
+            let src = c.get("source").and_then(|v| v.as_str()).unwrap_or("?");
+            let txt = c.get("text").and_then(|v| v.as_str()).unwrap_or("");
+            user_prompt.push_str(&format!("  [{}] {}\n", src, &txt[..txt.len().min(280)]));
+        }
+        user_prompt.push_str("\n");
+    }
+    user_prompt.push_str(&format!("FILE: {}\n```rust\n{}\n```\n", req.file_path, file_content));
+    if let Some(q) = &req.user_question {
+        user_prompt.push_str(&format!("\nQUESTION: {}\n", q));
+    } else {
+        user_prompt.push_str("\nProduce the forensic review now.\n");
+    }
+
+    let enriched_chars = user_prompt.len();
+    let preview: String = user_prompt.chars().take(800).collect();
+
+    // Step 6: ONE call to /v1/chat. The whole point of the mode is
+    // that this single call gets it right because the prompt was
+    // molded for THIS file. No retry ladder.
+    //
+    // Provider selection mirrors routing.toml's broad strokes — Phase 40
+    // routing engine isn't auto-wired into /v1/chat yet, so the runner
+    // hints explicitly. Cloud-only models (kimi*, qwen3-coder*,
+    // deepseek*, mistral-large*, gpt-oss:120b, qwen3.5:397b) → cloud;
+    // smaller local-resident models → local ollama default.
+    let provider_hint = if model.contains('/') || model.contains(":free") {
+        // OpenRouter convention: vendor/model[:tag] (e.g.
+        // "openai/gpt-oss-120b:free", "google/gemma-3-27b-it:free").
+        "openrouter"
+    } else if model.starts_with("kimi-")
+        || model.starts_with("qwen3-coder")
+        || model.starts_with("deepseek-v")
+        || model.starts_with("mistral-large")
+        || model == "gpt-oss:120b"
+        || model == "qwen3.5:397b"
+    {
+        "ollama_cloud"
+    } else {
+        "ollama"
+    };
+    let chat_body = serde_json::json!({
+        "model": model,
+        "provider": provider_hint,
+        "messages": [
+            { "role": "system", "content": REVIEWER_FRAMING },
+            { "role": "user", "content": user_prompt },
+        ],
+        "temperature": 0.1,
+        "max_tokens": 4096,
+    });
+    let chat_client = match reqwest::Client::builder()
+        .timeout(std::time::Duration::from_secs(180))
+        .build()
+    {
+        Ok(c) => c,
+        Err(e) => {
+            return Err((
+                StatusCode::INTERNAL_SERVER_ERROR,
+                Json(serde_json::json!({"error": format!("chat client build: {e}")})),
+            ));
+        }
+    };
+    let response_text = match chat_client
+        .post("http://localhost:3100/v1/chat")
+        .json(&chat_body)
+        .send()
+        .await
+    {
+        Ok(r) if r.status().is_success() => match r.json::<serde_json::Value>().await {
+            Ok(j) => j
+                .get("choices")
+                .and_then(|c| c.as_array())
+                .and_then(|a| a.first())
+                .and_then(|c| c.get("message"))
+                .and_then(|m| m.get("content"))
+                .and_then(|s| s.as_str())
+                .unwrap_or("")
+                .to_string(),
+            Err(e) => {
+                return Err((
+                    StatusCode::BAD_GATEWAY,
+                    Json(serde_json::json!({"error": format!("/v1/chat parse: {e}")})),
+                ));
+            }
+        },
+        Ok(r) => {
+            let status = r.status();
+            let body = r.text().await.unwrap_or_default();
+            return Err((
+                status,
+                Json(serde_json::json!({"error": "/v1/chat upstream error", "body": body})),
+            ));
+        }
+        Err(e) => {
+            return Err((
+                StatusCode::BAD_GATEWAY,
+                Json(serde_json::json!({"error": format!("/v1/chat send: {e}")})),
+            ));
+        }
+    };
+
+    Ok(Json(ExecuteResponse {
+        mode,
+        model,
+        task_class: req.task_class,
+        enriched_prompt_chars: enriched_chars,
+        enriched_prompt_preview: preview,
+        sources,
+        response: response_text,
+        latency_ms: t0.elapsed().as_millis() as u64,
+    }))
+}
+
 #[cfg(test)]
 mod tests {
    use super::*;