//! /v1/iterate — the Phase 43 PRD's "generate → validate → correct → retry" loop. //! //! Closes the "0→85% with iteration" thesis structurally. A caller //! posts a prompt + artifact kind + validation context; the gateway: //! 1. Generates a JSON artifact via /v1/chat (any provider/model) //! 2. Extracts the JSON object from the model output //! 3. Validates via /v1/validate (FillValidator / EmailValidator / //! PlaybookValidator with the shared WorkerLookup) //! 4. On ValidationError, appends the error to the prompt and //! retries up to `max_iterations` (default 3) //! 5. Returns the accepted artifact + Report on success, OR the //! attempt history + final error on max-iter exhaustion //! //! Internal calls go via HTTP loopback to localhost:gateway_port so //! the same /v1/usage tracking and Langfuse traces apply. A small //! latency cost (~1-3ms per loopback hop) for clean separation of //! concerns and observability. //! //! 2026-04-27 Phase 43 v3 part 3: this endpoint makes the iteration //! loop a first-class lakehouse capability rather than a per-caller //! re-implementation. Staffing executors, agent loops, and future //! validators all reach the same code path. use axum::{extract::State, http::{HeaderMap, StatusCode}, response::IntoResponse, Json}; use serde::{Deserialize, Serialize}; const DEFAULT_MAX_ITERATIONS: u32 = 3; const LOOPBACK_TIMEOUT_SECS: u64 = 240; /// Header name used to propagate a Langfuse parent trace id across /// daemon boundaries. Matches Go's `shared.TraceIDHeader` constant /// byte-for-byte (commit d6d2fdf in golangLAKEHOUSE) — same wire /// format means a Go caller can hit Rust's /v1/iterate (or vice /// versa) and the resulting Langfuse trees nest correctly. pub const TRACE_ID_HEADER: &str = "x-lakehouse-trace-id"; #[derive(Deserialize)] pub struct IterateRequest { /// "fill" | "email" | "playbook" — picks which validator runs. pub kind: String, /// The prompt to seed generation. Validation errors from prior /// attempts are appended on retry. pub prompt: String, /// Provider/model passed through to /v1/chat. e.g. "ollama_cloud" /// + "kimi-k2.6", or "opencode" + "claude-haiku-4-5". pub provider: String, pub model: String, /// Optional system prompt — sent to /v1/chat as the system message. #[serde(default)] pub system: Option, /// Validation context (target_count, city, state, role, client_id /// for fills; candidate_id for emails). Forwarded to /v1/validate. #[serde(default)] pub context: Option, /// Cap on iteration count. Defaults to 3 per the Phase 43 PRD. #[serde(default)] pub max_iterations: Option, /// Forwarded to /v1/chat. Defaults to 0.2 if unset. #[serde(default)] pub temperature: Option, /// Forwarded to /v1/chat. Defaults to 4096 if unset. #[serde(default)] pub max_tokens: Option, } #[derive(Serialize)] pub struct IterateAttempt { pub iteration: u32, pub raw: String, pub status: AttemptStatus, } #[derive(Serialize)] #[serde(tag = "kind", rename_all = "snake_case")] pub enum AttemptStatus { /// Model output didn't contain extractable JSON. NoJson, /// JSON extracted but failed validation; carries the error. ValidationFailed { error: serde_json::Value }, /// Validation passed (last attempt's terminal status). Accepted, } #[derive(Serialize)] pub struct IterateResponse { pub artifact: serde_json::Value, pub validation: serde_json::Value, pub iterations: u32, pub history: Vec, /// Echoes the resolved trace id (caller-forwarded header, body /// field, langfuse-middleware mint, or local fallback). Operators /// pivot from this id straight into Langfuse + the /// coordinator_sessions.jsonl join key. Cross-runtime parity with /// Go's `validator.IterateResponse` (commit 6847bbc in /// golangLAKEHOUSE). #[serde(skip_serializing_if = "Option::is_none")] pub trace_id: Option, } #[derive(Serialize)] pub struct IterateFailure { pub error: String, pub iterations: u32, pub history: Vec, #[serde(skip_serializing_if = "Option::is_none")] pub trace_id: Option, } pub async fn iterate( State(state): State, headers: HeaderMap, Json(req): Json, ) -> impl IntoResponse { let max_iter = req.max_iterations.unwrap_or(DEFAULT_MAX_ITERATIONS).max(1); let temperature = req.temperature.unwrap_or(0.2); let max_tokens = req.max_tokens.unwrap_or(4096); let mut history: Vec = Vec::with_capacity(max_iter as usize); let mut attempt_records: Vec = Vec::with_capacity(max_iter as usize); let mut current_prompt = req.prompt.clone(); // Resolve the parent Langfuse trace id. Caller-forwarded header // wins (cross-daemon tree linkage); otherwise mint a fresh id so // the iterate session is its own tree. Same shape as the Go-side // validatord trace propagation. let trace_id: String = headers .get(TRACE_ID_HEADER) .and_then(|v| v.to_str().ok()) .filter(|s| !s.is_empty()) .map(|s| s.to_string()) .unwrap_or_else(new_trace_id); let client = match reqwest::Client::builder() .timeout(std::time::Duration::from_secs(LOOPBACK_TIMEOUT_SECS)) .build() { Ok(c) => c, Err(e) => { // Even infrastructure failures get a session row so a // missing /v1/iterate event never silently disappears // from the longitudinal log. write_infra_error(&state, &req, &trace_id, max_iter, 0, format!("client build: {e}")).await; return (StatusCode::INTERNAL_SERVER_ERROR, format!("client build: {e}")).into_response(); } }; // Self-loopback to the gateway port. Carries gateway internal // calls through /v1/chat + /v1/validate so /v1/usage tracks them. let gateway = "http://127.0.0.1:3100"; let t0 = std::time::Instant::now(); for iteration in 0..max_iter { let attempt_started = chrono::Utc::now(); // ── Generate ── let mut messages = Vec::with_capacity(2); if let Some(sys) = &req.system { messages.push(serde_json::json!({"role": "system", "content": sys})); } messages.push(serde_json::json!({"role": "user", "content": current_prompt})); let chat_body = serde_json::json!({ "messages": messages, "provider": req.provider, "model": req.model, "temperature": temperature, "max_tokens": max_tokens, }); let raw = match call_chat(&client, gateway, &chat_body, &trace_id).await { Ok(r) => r, Err(e) => { write_infra_error(&state, &req, &trace_id, max_iter, t0.elapsed().as_millis() as u64, format!("/v1/chat hop failed at iter {iteration}: {e}")).await; return (StatusCode::BAD_GATEWAY, format!("/v1/chat hop failed at iter {iteration}: {e}")).into_response(); } }; // ── Extract JSON ── let artifact = match extract_json(&raw) { Some(a) => a, None => { let span_id = emit_attempt_span( &state, &trace_id, iteration, &req, ¤t_prompt, &raw, "no_json", None, attempt_started, chrono::Utc::now(), ); history.push(IterateAttempt { iteration, raw: raw.chars().take(2000).collect(), status: AttemptStatus::NoJson, }); attempt_records.push(super::session_log::SessionAttemptRecord { iteration, verdict_kind: "no_json".to_string(), error: None, span_id, }); current_prompt = format!( "{}\n\nYour previous attempt did not contain a JSON object. Reply with ONLY a valid JSON object matching the requested artifact shape.", req.prompt, ); continue; } }; // ── Validate ── let validate_body = serde_json::json!({ "kind": req.kind, "artifact": artifact, "context": req.context.clone().unwrap_or(serde_json::Value::Null), }); match call_validate(&client, gateway, &validate_body, &trace_id).await { Ok(report) => { let span_id = emit_attempt_span( &state, &trace_id, iteration, &req, ¤t_prompt, &raw, "accepted", None, attempt_started, chrono::Utc::now(), ); history.push(IterateAttempt { iteration, raw: raw.chars().take(2000).collect(), status: AttemptStatus::Accepted, }); attempt_records.push(super::session_log::SessionAttemptRecord { iteration, verdict_kind: "accepted".to_string(), error: None, span_id, }); let duration_ms = t0.elapsed().as_millis() as u64; let grounded = grounded_in_roster(&state, &req.kind, &artifact); write_session_accepted(&state, &req, &trace_id, iteration + 1, max_iter, attempt_records, &artifact, grounded, duration_ms).await; return (StatusCode::OK, Json(IterateResponse { artifact, validation: report, iterations: iteration + 1, history, trace_id: Some(trace_id.clone()), })).into_response(); } Err(err) => { let err_summary = err.to_string(); let span_id = emit_attempt_span( &state, &trace_id, iteration, &req, ¤t_prompt, &raw, "validation_failed", Some(err_summary.clone()), attempt_started, chrono::Utc::now(), ); history.push(IterateAttempt { iteration, raw: raw.chars().take(2000).collect(), status: AttemptStatus::ValidationFailed { error: serde_json::to_value(&err_summary).unwrap_or(serde_json::Value::Null), }, }); attempt_records.push(super::session_log::SessionAttemptRecord { iteration, verdict_kind: "validation_failed".to_string(), error: Some(err_summary.clone()), span_id, }); // Append validation feedback to prompt for next iter. // The model sees concrete failure mode + retries with // corrective context. This is the "observer correction" // in Phase 43 PRD shape, simplified — the validator // itself IS the observer for now. current_prompt = format!( "{}\n\nPrior attempt failed validation:\n{}\n\nFix the specific issue above and respond with a corrected JSON object.", req.prompt, err_summary, ); continue; } } } let duration_ms = t0.elapsed().as_millis() as u64; write_session_failure(&state, &req, &trace_id, max_iter, max_iter, attempt_records, duration_ms).await; (StatusCode::UNPROCESSABLE_ENTITY, Json(IterateFailure { error: format!("max iterations reached ({max_iter}) without passing validation"), iterations: max_iter, history, trace_id: Some(trace_id.clone()), })).into_response() } // ─── Helpers — Langfuse spans + session log + roster check ───────── fn emit_attempt_span( state: &super::V1State, trace_id: &str, iteration: u32, req: &IterateRequest, prompt: &str, raw: &str, verdict: &str, error: Option, started: chrono::DateTime, ended: chrono::DateTime, ) -> Option { let lf = state.langfuse.as_ref()?; Some(lf.emit_attempt_span(super::langfuse_trace::AttemptSpan { trace_id: trace_id.to_string(), iteration, model: req.model.clone(), provider: req.provider.clone(), prompt: prompt.to_string(), raw: raw.to_string(), verdict: verdict.to_string(), error, start_time: started.to_rfc3339(), end_time: ended.to_rfc3339(), })) } /// Verify every fill artifact's candidate IDs exist in the roster. /// Returns Some(true)/Some(false) on the fill kind, None otherwise /// (other kinds don't have worker IDs to ground). Same semantics as /// Go's `handlers.rosterCheckFor("fill")`. fn grounded_in_roster( state: &super::V1State, kind: &str, artifact: &serde_json::Value, ) -> Option { if kind != "fill" { return None; } let fills = artifact.get("fills").and_then(|v| v.as_array())?; for f in fills { let id = match f.get("candidate_id").and_then(|v| v.as_str()) { Some(s) if !s.is_empty() => s, _ => return Some(false), }; if state.validate_workers.find(id).is_none() { return Some(false); } } Some(true) } async fn write_session_accepted( state: &super::V1State, req: &IterateRequest, trace_id: &str, iterations: u32, max_iter: u32, attempts: Vec, artifact: &serde_json::Value, grounded: Option, duration_ms: u64, ) { let Some(logger) = state.session_log.as_ref() else { return }; let rec = build_session_record(req, trace_id, "accepted", iterations, max_iter, attempts, Some(artifact.clone()), grounded, duration_ms); logger.append(rec).await; } async fn write_session_failure( state: &super::V1State, req: &IterateRequest, trace_id: &str, iterations: u32, max_iter: u32, attempts: Vec, duration_ms: u64, ) { let Some(logger) = state.session_log.as_ref() else { return }; let rec = build_session_record(req, trace_id, "max_iter_exhausted", iterations, max_iter, attempts, None, None, duration_ms); logger.append(rec).await; } async fn write_infra_error( state: &super::V1State, req: &IterateRequest, trace_id: &str, max_iter: u32, duration_ms: u64, error: String, ) { let Some(logger) = state.session_log.as_ref() else { return }; let attempts = vec![super::session_log::SessionAttemptRecord { iteration: 0, verdict_kind: "infra_error".to_string(), error: Some(error), span_id: None, }]; let rec = build_session_record(req, trace_id, "infra_error", 0, max_iter, attempts, None, None, duration_ms); logger.append(rec).await; } fn build_session_record( req: &IterateRequest, trace_id: &str, final_verdict: &str, iterations: u32, max_iter: u32, attempts: Vec, artifact: Option, grounded: Option, duration_ms: u64, ) -> super::session_log::SessionRecord { super::session_log::SessionRecord { schema: super::session_log::SESSION_RECORD_SCHEMA.to_string(), session_id: trace_id.to_string(), timestamp: chrono::Utc::now().to_rfc3339(), daemon: "gateway".to_string(), kind: req.kind.clone(), model: req.model.clone(), provider: req.provider.clone(), prompt: super::session_log::truncate(&req.prompt, 4000), iterations, max_iterations: max_iter, final_verdict: final_verdict.to_string(), attempts, artifact, grounded_in_roster: grounded, duration_ms, } } /// Generate a fresh trace id when no parent was forwarded. Same /// time-ordered hex shape Langfuse already accepts elsewhere in this /// crate (see `langfuse_trace::uuid_v7_like`). fn new_trace_id() -> String { let ts = chrono::Utc::now().timestamp_nanos_opt().unwrap_or(0); let rand = std::time::SystemTime::now() .duration_since(std::time::UNIX_EPOCH) .map(|d| d.subsec_nanos()) .unwrap_or(0); format!("{:016x}-{:08x}", ts, rand) } async fn call_chat(client: &reqwest::Client, gateway: &str, body: &serde_json::Value, trace_id: &str) -> Result { let mut req = client.post(format!("{gateway}/v1/chat")).json(body); if !trace_id.is_empty() { req = req.header(TRACE_ID_HEADER, trace_id); } let resp = req.send().await.map_err(|e| format!("chat hop: {e}"))?; let status = resp.status(); if !status.is_success() { let body = resp.text().await.unwrap_or_default(); return Err(format!("chat {}: {}", status, body.chars().take(300).collect::())); } let parsed: serde_json::Value = resp.json().await.map_err(|e| format!("chat parse: {e}"))?; Ok(parsed.pointer("/choices/0/message/content") .and_then(|v| v.as_str()) .unwrap_or("") .to_string()) } async fn call_validate(client: &reqwest::Client, gateway: &str, body: &serde_json::Value, trace_id: &str) -> Result { let mut req = client.post(format!("{gateway}/v1/validate")).json(body); if !trace_id.is_empty() { req = req.header(TRACE_ID_HEADER, trace_id); } let resp = req.send().await.map_err(|e| format!("validate hop: {e}"))?; let status = resp.status(); let parsed: serde_json::Value = resp.json().await.map_err(|e| format!("validate parse: {e}"))?; if status.is_success() { Ok(parsed) } else { // The /v1/validate endpoint returns a ValidationError JSON // on 422; surface its structure verbatim so the prompt- // appending step gets specific failure detail. Err(serde_json::to_string(&parsed).unwrap_or_else(|_| format!("validation {} (unparseable body)", status))) } } /// Extract the first JSON object from a model's output. Handles /// fenced code blocks (```json ... ```), bare braces, and stray /// prose around the JSON. Returns None on no extractable object. /// /// Made `pub` 2026-05-02 to support the cross-runtime parity probe /// at `golangLAKEHOUSE/scripts/cutover/parity/extract_json_parity.sh`. /// The Go counterpart lives at `internal/validator/iterate.go::ExtractJSON`; /// when either runtime's algorithm changes the parity probe surfaces /// the divergence. pub fn extract_json(raw: &str) -> Option { // Try fenced first. let candidates: Vec = { let mut out = vec![]; let mut s = raw; while let Some(start) = s.find("```") { let after = &s[start + 3..]; // Skip optional language tag (json, etc.) let body_start = after.find('\n').map(|n| n + 1).unwrap_or(0); let body = &after[body_start..]; if let Some(end) = body.find("```") { out.push(body[..end].trim().to_string()); s = &body[end + 3..]; } else { break; } } out }; for c in &candidates { if let Ok(v) = serde_json::from_str::(c) { if v.is_object() { return Some(v); } } } // Fall back to outermost {...} balance. let bytes = raw.as_bytes(); let mut depth = 0i32; let mut start: Option = None; for (i, &b) in bytes.iter().enumerate() { match b { b'{' => { if start.is_none() { start = Some(i); } depth += 1; } b'}' => { depth -= 1; if depth == 0 { if let Some(s) = start { let slice = &raw[s..=i]; if let Ok(v) = serde_json::from_str::(slice) { if v.is_object() { return Some(v); } } start = None; } } } _ => {} } } None } #[cfg(test)] mod tests { use super::*; #[test] fn extract_json_from_fenced_block() { let raw = "Here's my answer:\n```json\n{\"fills\": [{\"candidate_id\": \"W-1\"}]}\n```\nDone."; let v = extract_json(raw).unwrap(); assert!(v.get("fills").is_some()); } #[test] fn extract_json_from_bare_braces() { let raw = "Here you go: {\"fills\": [{\"candidate_id\": \"W-2\"}]}"; let v = extract_json(raw).unwrap(); assert!(v.get("fills").is_some()); } #[test] fn extract_json_returns_none_on_no_object() { assert!(extract_json("just prose, no json").is_none()); } #[test] fn extract_json_picks_first_balanced() { let raw = "{\"a\":1} then {\"b\":2}"; let v = extract_json(raw).unwrap(); assert_eq!(v.get("a").and_then(|v| v.as_i64()), Some(1)); } }