diff --git a/crates/gateway/src/v1/iterate.rs b/crates/gateway/src/v1/iterate.rs new file mode 100644 index 0000000..49a3ba6 --- /dev/null +++ b/crates/gateway/src/v1/iterate.rs @@ -0,0 +1,313 @@ +//! /v1/iterate — the Phase 43 PRD's "generate → validate → correct → retry" loop. +//! +//! Closes the "0→85% with iteration" thesis structurally. A caller +//! posts a prompt + artifact kind + validation context; the gateway: +//! 1. Generates a JSON artifact via /v1/chat (any provider/model) +//! 2. Extracts the JSON object from the model output +//! 3. Validates via /v1/validate (FillValidator / EmailValidator / +//! PlaybookValidator with the shared WorkerLookup) +//! 4. On ValidationError, appends the error to the prompt and +//! retries up to `max_iterations` (default 3) +//! 5. Returns the accepted artifact + Report on success, OR the +//! attempt history + final error on max-iter exhaustion +//! +//! Internal calls go via HTTP loopback to localhost:gateway_port so +//! the same /v1/usage tracking and Langfuse traces apply. A small +//! latency cost (~1-3ms per loopback hop) for clean separation of +//! concerns and observability. +//! +//! 2026-04-27 Phase 43 v3 part 3: this endpoint makes the iteration +//! loop a first-class lakehouse capability rather than a per-caller +//! re-implementation. Staffing executors, agent loops, and future +//! validators all reach the same code path. + +use axum::{extract::State, http::StatusCode, response::IntoResponse, Json}; +use serde::{Deserialize, Serialize}; + +const DEFAULT_MAX_ITERATIONS: u32 = 3; +const LOOPBACK_TIMEOUT_SECS: u64 = 240; + +#[derive(Deserialize)] +pub struct IterateRequest { + /// "fill" | "email" | "playbook" — picks which validator runs. + pub kind: String, + /// The prompt to seed generation. Validation errors from prior + /// attempts are appended on retry. + pub prompt: String, + /// Provider/model passed through to /v1/chat. e.g. "ollama_cloud" + /// + "kimi-k2.6", or "opencode" + "claude-haiku-4-5". + pub provider: String, + pub model: String, + /// Optional system prompt — sent to /v1/chat as the system message. + #[serde(default)] + pub system: Option, + /// Validation context (target_count, city, state, role, client_id + /// for fills; candidate_id for emails). Forwarded to /v1/validate. + #[serde(default)] + pub context: Option, + /// Cap on iteration count. Defaults to 3 per the Phase 43 PRD. + #[serde(default)] + pub max_iterations: Option, + /// Forwarded to /v1/chat. Defaults to 0.2 if unset. + #[serde(default)] + pub temperature: Option, + /// Forwarded to /v1/chat. Defaults to 4096 if unset. + #[serde(default)] + pub max_tokens: Option, +} + +#[derive(Serialize)] +pub struct IterateAttempt { + pub iteration: u32, + pub raw: String, + pub status: AttemptStatus, +} + +#[derive(Serialize)] +#[serde(tag = "kind", rename_all = "snake_case")] +pub enum AttemptStatus { + /// Model output didn't contain extractable JSON. + NoJson, + /// JSON extracted but failed validation; carries the error. + ValidationFailed { error: serde_json::Value }, + /// Validation passed (last attempt's terminal status). + Accepted, +} + +#[derive(Serialize)] +pub struct IterateResponse { + pub artifact: serde_json::Value, + pub validation: serde_json::Value, + pub iterations: u32, + pub history: Vec, +} + +#[derive(Serialize)] +pub struct IterateFailure { + pub error: String, + pub iterations: u32, + pub history: Vec, +} + +pub async fn iterate( + State(state): State, + Json(req): Json, +) -> impl IntoResponse { + let max_iter = req.max_iterations.unwrap_or(DEFAULT_MAX_ITERATIONS).max(1); + let temperature = req.temperature.unwrap_or(0.2); + let max_tokens = req.max_tokens.unwrap_or(4096); + let mut history: Vec = Vec::with_capacity(max_iter as usize); + let mut current_prompt = req.prompt.clone(); + + let client = match reqwest::Client::builder() + .timeout(std::time::Duration::from_secs(LOOPBACK_TIMEOUT_SECS)) + .build() { + Ok(c) => c, + Err(e) => return (StatusCode::INTERNAL_SERVER_ERROR, format!("client build: {e}")).into_response(), + }; + // Self-loopback to the gateway port. Carries gateway internal + // calls through /v1/chat + /v1/validate so /v1/usage tracks them. + let gateway = "http://127.0.0.1:3100"; + + for iteration in 0..max_iter { + // ── Generate ── + let mut messages = Vec::with_capacity(2); + if let Some(sys) = &req.system { + messages.push(serde_json::json!({"role": "system", "content": sys})); + } + messages.push(serde_json::json!({"role": "user", "content": current_prompt})); + let chat_body = serde_json::json!({ + "messages": messages, + "provider": req.provider, + "model": req.model, + "temperature": temperature, + "max_tokens": max_tokens, + }); + let raw = match call_chat(&client, gateway, &chat_body).await { + Ok(r) => r, + Err(e) => return (StatusCode::BAD_GATEWAY, format!("/v1/chat hop failed at iter {iteration}: {e}")).into_response(), + }; + + // ── Extract JSON ── + let artifact = match extract_json(&raw) { + Some(a) => a, + None => { + history.push(IterateAttempt { + iteration, + raw: raw.chars().take(2000).collect(), + status: AttemptStatus::NoJson, + }); + current_prompt = format!( + "{}\n\nYour previous attempt did not contain a JSON object. Reply with ONLY a valid JSON object matching the requested artifact shape.", + req.prompt, + ); + continue; + } + }; + + // ── Validate ── + let validate_body = serde_json::json!({ + "kind": req.kind, + "artifact": artifact, + "context": req.context.clone().unwrap_or(serde_json::Value::Null), + }); + match call_validate(&client, gateway, &validate_body).await { + Ok(report) => { + history.push(IterateAttempt { + iteration, + raw: raw.chars().take(2000).collect(), + status: AttemptStatus::Accepted, + }); + return (StatusCode::OK, Json(IterateResponse { + artifact, + validation: report, + iterations: iteration + 1, + history, + })).into_response(); + } + Err(err) => { + let err_summary = err.to_string(); + history.push(IterateAttempt { + iteration, + raw: raw.chars().take(2000).collect(), + status: AttemptStatus::ValidationFailed { + error: serde_json::to_value(&err_summary).unwrap_or(serde_json::Value::Null), + }, + }); + // Append validation feedback to prompt for next iter. + // The model sees concrete failure mode + retries with + // corrective context. This is the "observer correction" + // in Phase 43 PRD shape, simplified — the validator + // itself IS the observer for now. + current_prompt = format!( + "{}\n\nPrior attempt failed validation:\n{}\n\nFix the specific issue above and respond with a corrected JSON object.", + req.prompt, err_summary, + ); + continue; + } + } + } + + (StatusCode::UNPROCESSABLE_ENTITY, Json(IterateFailure { + error: format!("max iterations reached ({max_iter}) without passing validation"), + iterations: max_iter, + history, + })).into_response() +} + +async fn call_chat(client: &reqwest::Client, gateway: &str, body: &serde_json::Value) -> Result { + let resp = client.post(format!("{gateway}/v1/chat")) + .json(body) + .send() + .await + .map_err(|e| format!("chat hop: {e}"))?; + let status = resp.status(); + if !status.is_success() { + let body = resp.text().await.unwrap_or_default(); + return Err(format!("chat {}: {}", status, body.chars().take(300).collect::())); + } + let parsed: serde_json::Value = resp.json().await.map_err(|e| format!("chat parse: {e}"))?; + Ok(parsed.pointer("/choices/0/message/content") + .and_then(|v| v.as_str()) + .unwrap_or("") + .to_string()) +} + +async fn call_validate(client: &reqwest::Client, gateway: &str, body: &serde_json::Value) -> Result { + let resp = client.post(format!("{gateway}/v1/validate")) + .json(body) + .send() + .await + .map_err(|e| format!("validate hop: {e}"))?; + let status = resp.status(); + let parsed: serde_json::Value = resp.json().await.map_err(|e| format!("validate parse: {e}"))?; + if status.is_success() { + Ok(parsed) + } else { + // The /v1/validate endpoint returns a ValidationError JSON + // on 422; surface its structure verbatim so the prompt- + // appending step gets specific failure detail. + Err(serde_json::to_string(&parsed).unwrap_or_else(|_| format!("validation {} (unparseable body)", status))) + } +} + +/// Extract the first JSON object from a model's output. Handles +/// fenced code blocks (```json ... ```), bare braces, and stray +/// prose around the JSON. Returns None on no extractable object. +fn extract_json(raw: &str) -> Option { + // Try fenced first. + let candidates: Vec = { + let mut out = vec![]; + let mut s = raw; + while let Some(start) = s.find("```") { + let after = &s[start + 3..]; + // Skip optional language tag (json, etc.) + let body_start = after.find('\n').map(|n| n + 1).unwrap_or(0); + let body = &after[body_start..]; + if let Some(end) = body.find("```") { + out.push(body[..end].trim().to_string()); + s = &body[end + 3..]; + } else { break; } + } + out + }; + for c in &candidates { + if let Ok(v) = serde_json::from_str::(c) { + if v.is_object() { return Some(v); } + } + } + // Fall back to outermost {...} balance. + let bytes = raw.as_bytes(); + let mut depth = 0i32; + let mut start: Option = None; + for (i, &b) in bytes.iter().enumerate() { + match b { + b'{' => { if start.is_none() { start = Some(i); } depth += 1; } + b'}' => { + depth -= 1; + if depth == 0 { + if let Some(s) = start { + let slice = &raw[s..=i]; + if let Ok(v) = serde_json::from_str::(slice) { + if v.is_object() { return Some(v); } + } + start = None; + } + } + } + _ => {} + } + } + None +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn extract_json_from_fenced_block() { + let raw = "Here's my answer:\n```json\n{\"fills\": [{\"candidate_id\": \"W-1\"}]}\n```\nDone."; + let v = extract_json(raw).unwrap(); + assert!(v.get("fills").is_some()); + } + + #[test] + fn extract_json_from_bare_braces() { + let raw = "Here you go: {\"fills\": [{\"candidate_id\": \"W-2\"}]}"; + let v = extract_json(raw).unwrap(); + assert!(v.get("fills").is_some()); + } + + #[test] + fn extract_json_returns_none_on_no_object() { + assert!(extract_json("just prose, no json").is_none()); + } + + #[test] + fn extract_json_picks_first_balanced() { + let raw = "{\"a\":1} then {\"b\":2}"; + let v = extract_json(raw).unwrap(); + assert_eq!(v.get("a").and_then(|v| v.as_i64()), Some(1)); + } +} diff --git a/crates/gateway/src/v1/mod.rs b/crates/gateway/src/v1/mod.rs index 9af8d85..50b0990 100644 --- a/crates/gateway/src/v1/mod.rs +++ b/crates/gateway/src/v1/mod.rs @@ -19,6 +19,7 @@ pub mod claude; pub mod kimi; pub mod opencode; pub mod validate; +pub mod iterate; pub mod langfuse_trace; pub mod mode; pub mod respond; @@ -118,6 +119,7 @@ pub fn router(state: V1State) -> Router { .route("/mode/list", get(mode::list)) .route("/mode/execute", post(mode::execute)) .route("/validate", post(validate::validate)) + .route("/iterate", post(iterate::iterate)) .with_state(state) }