lakehouse/crates/gateway/src/v1/iterate.rs

//! /v1/iterate — the Phase 43 PRD's "generate → validate → correct → retry" loop.
//!
//! Closes the "0→85% with iteration" thesis structurally. A caller
//! posts a prompt + artifact kind + validation context; the gateway:
//!   1. Generates a JSON artifact via /v1/chat (any provider/model)
//!   2. Extracts the JSON object from the model output
//!   3. Validates via /v1/validate (FillValidator / EmailValidator /
//!      PlaybookValidator with the shared WorkerLookup)
//!   4. On ValidationError, appends the error to the prompt and
//!      retries up to `max_iterations` (default 3)
//!   5. Returns the accepted artifact + Report on success, OR the
//!      attempt history + final error on max-iter exhaustion
//!
//! Internal calls go via HTTP loopback to localhost:gateway_port so
//! the same /v1/usage tracking and Langfuse traces apply. A small
//! latency cost (~1-3ms per loopback hop) for clean separation of
//! concerns and observability.
//!
//! 2026-04-27 Phase 43 v3 part 3: this endpoint makes the iteration
//! loop a first-class lakehouse capability rather than a per-caller
//! re-implementation. Staffing executors, agent loops, and future
//! validators all reach the same code path.

use axum::{extract::State, http::{HeaderMap, StatusCode}, response::IntoResponse, Json};
use serde::{Deserialize, Serialize};

const DEFAULT_MAX_ITERATIONS: u32 = 3;
const LOOPBACK_TIMEOUT_SECS: u64 = 240;

/// Header name used to propagate a Langfuse parent trace id across
/// daemon boundaries. Matches Go's `shared.TraceIDHeader` constant
/// byte-for-byte (commit d6d2fdf in golangLAKEHOUSE) — same wire
/// format means a Go caller can hit Rust's /v1/iterate (or vice
/// versa) and the resulting Langfuse trees nest correctly.
pub const TRACE_ID_HEADER: &str = "x-lakehouse-trace-id";

#[derive(Deserialize)]
pub struct IterateRequest {
    /// "fill" | "email" | "playbook" — picks which validator runs.
    pub kind: String,
    /// The prompt to seed generation. Validation errors from prior
    /// attempts are appended on retry.
    pub prompt: String,
    /// Provider/model passed through to /v1/chat. e.g. "ollama_cloud"
    /// + "kimi-k2.6", or "opencode" + "claude-haiku-4-5".
    pub provider: String,
    pub model: String,
    /// Optional system prompt — sent to /v1/chat as the system message.
    #[serde(default)]
    pub system: Option<String>,
    /// Validation context (target_count, city, state, role, client_id
    /// for fills; candidate_id for emails). Forwarded to /v1/validate.
    #[serde(default)]
    pub context: Option<serde_json::Value>,
    /// Cap on iteration count. Defaults to 3 per the Phase 43 PRD.
    #[serde(default)]
    pub max_iterations: Option<u32>,
    /// Forwarded to /v1/chat. Defaults to 0.2 if unset.
    #[serde(default)]
    pub temperature: Option<f64>,
    /// Forwarded to /v1/chat. Defaults to 4096 if unset.
    #[serde(default)]
    pub max_tokens: Option<u32>,
}

#[derive(Serialize)]
pub struct IterateAttempt {
    pub iteration: u32,
    pub raw: String,
    pub status: AttemptStatus,
}

#[derive(Serialize)]
#[serde(tag = "kind", rename_all = "snake_case")]
pub enum AttemptStatus {
    /// Model output didn't contain extractable JSON.
    NoJson,
    /// JSON extracted but failed validation; carries the error.
    ValidationFailed { error: serde_json::Value },
    /// Validation passed (last attempt's terminal status).
    Accepted,
}

#[derive(Serialize)]
pub struct IterateResponse {
    pub artifact: serde_json::Value,
    pub validation: serde_json::Value,
    pub iterations: u32,
    pub history: Vec<IterateAttempt>,
    /// Echoes the resolved trace id (caller-forwarded header, body
    /// field, langfuse-middleware mint, or local fallback). Operators
    /// pivot from this id straight into Langfuse + the
    /// coordinator_sessions.jsonl join key. Cross-runtime parity with
    /// Go's `validator.IterateResponse` (commit 6847bbc in
    /// golangLAKEHOUSE).
    #[serde(skip_serializing_if = "Option::is_none")]
    pub trace_id: Option<String>,
}

#[derive(Serialize)]
pub struct IterateFailure {
    pub error: String,
    pub iterations: u32,
    pub history: Vec<IterateAttempt>,
    #[serde(skip_serializing_if = "Option::is_none")]
    pub trace_id: Option<String>,
}

pub async fn iterate(
    State(state): State<super::V1State>,
    headers: HeaderMap,
    Json(req): Json<IterateRequest>,
) -> impl IntoResponse {
    let max_iter = req.max_iterations.unwrap_or(DEFAULT_MAX_ITERATIONS).max(1);
    let temperature = req.temperature.unwrap_or(0.2);
    let max_tokens = req.max_tokens.unwrap_or(4096);
    let mut history: Vec<IterateAttempt> = Vec::with_capacity(max_iter as usize);
    let mut attempt_records: Vec<super::session_log::SessionAttemptRecord> = Vec::with_capacity(max_iter as usize);
    let mut current_prompt = req.prompt.clone();

    // Resolve the parent Langfuse trace id. Caller-forwarded header
    // wins (cross-daemon tree linkage); otherwise mint a fresh id so
    // the iterate session is its own tree. Same shape as the Go-side
    // validatord trace propagation.
    let trace_id: String = headers
        .get(TRACE_ID_HEADER)
        .and_then(|v| v.to_str().ok())
        .filter(|s| !s.is_empty())
        .map(|s| s.to_string())
        .unwrap_or_else(new_trace_id);

    let client = match reqwest::Client::builder()
        .timeout(std::time::Duration::from_secs(LOOPBACK_TIMEOUT_SECS))
        .build() {
        Ok(c) => c,
        Err(e) => {
            // Even infrastructure failures get a session row so a
            // missing /v1/iterate event never silently disappears
            // from the longitudinal log.
            write_infra_error(&state, &req, &trace_id, max_iter, 0, format!("client build: {e}")).await;
            return (StatusCode::INTERNAL_SERVER_ERROR, format!("client build: {e}")).into_response();
        }
    };
    // Self-loopback to the gateway port. Carries gateway internal
    // calls through /v1/chat + /v1/validate so /v1/usage tracks them.
    let gateway = "http://127.0.0.1:3100";
    let t0 = std::time::Instant::now();

    for iteration in 0..max_iter {
        let attempt_started = chrono::Utc::now();
        // ── Generate ──
        let mut messages = Vec::with_capacity(2);
        if let Some(sys) = &req.system {
            messages.push(serde_json::json!({"role": "system", "content": sys}));
        }
        messages.push(serde_json::json!({"role": "user", "content": current_prompt}));
        let chat_body = serde_json::json!({
            "messages": messages,
            "provider": req.provider,
            "model": req.model,
            "temperature": temperature,
            "max_tokens": max_tokens,
        });
        let raw = match call_chat(&client, gateway, &chat_body, &trace_id).await {
            Ok(r) => r,
            Err(e) => {
                write_infra_error(&state, &req, &trace_id, max_iter, t0.elapsed().as_millis() as u64, format!("/v1/chat hop failed at iter {iteration}: {e}")).await;
                return (StatusCode::BAD_GATEWAY, format!("/v1/chat hop failed at iter {iteration}: {e}")).into_response();
            }
        };

        // ── Extract JSON ──
        let artifact = match extract_json(&raw) {
            Some(a) => a,
            None => {
                let span_id = emit_attempt_span(
                    &state, &trace_id, iteration, &req, &current_prompt, &raw, "no_json", None,
                    attempt_started, chrono::Utc::now(),
                );
                history.push(IterateAttempt {
                    iteration,
                    raw: raw.chars().take(2000).collect(),
                    status: AttemptStatus::NoJson,
                });
                attempt_records.push(super::session_log::SessionAttemptRecord {
                    iteration,
                    verdict_kind: "no_json".to_string(),
                    error: None,
                    span_id,
                });
                current_prompt = format!(
                    "{}\n\nYour previous attempt did not contain a JSON object. Reply with ONLY a valid JSON object matching the requested artifact shape.",
                    req.prompt,
                );
                continue;
            }
        };

        // ── Validate ──
        let validate_body = serde_json::json!({
            "kind": req.kind,
            "artifact": artifact,
            "context": req.context.clone().unwrap_or(serde_json::Value::Null),
        });
        match call_validate(&client, gateway, &validate_body, &trace_id).await {
            Ok(report) => {
                let span_id = emit_attempt_span(
                    &state, &trace_id, iteration, &req, &current_prompt, &raw, "accepted", None,
                    attempt_started, chrono::Utc::now(),
                );
                history.push(IterateAttempt {
                    iteration,
                    raw: raw.chars().take(2000).collect(),
                    status: AttemptStatus::Accepted,
                });
                attempt_records.push(super::session_log::SessionAttemptRecord {
                    iteration,
                    verdict_kind: "accepted".to_string(),
                    error: None,
                    span_id,
                });
                let duration_ms = t0.elapsed().as_millis() as u64;
                let grounded = grounded_in_roster(&state, &req.kind, &artifact);
                write_session_accepted(&state, &req, &trace_id, iteration + 1, max_iter, attempt_records, &artifact, grounded, duration_ms).await;
                return (StatusCode::OK, Json(IterateResponse {
                    artifact,
                    validation: report,
                    iterations: iteration + 1,
                    history,
                    trace_id: Some(trace_id.clone()),
                })).into_response();
            }
            Err(err) => {
                let err_summary = err.to_string();
                let span_id = emit_attempt_span(
                    &state, &trace_id, iteration, &req, &current_prompt, &raw, "validation_failed",
                    Some(err_summary.clone()),
                    attempt_started, chrono::Utc::now(),
                );
                history.push(IterateAttempt {
                    iteration,
                    raw: raw.chars().take(2000).collect(),
                    status: AttemptStatus::ValidationFailed {
                        error: serde_json::to_value(&err_summary).unwrap_or(serde_json::Value::Null),
                    },
                });
                attempt_records.push(super::session_log::SessionAttemptRecord {
                    iteration,
                    verdict_kind: "validation_failed".to_string(),
                    error: Some(err_summary.clone()),
                    span_id,
                });
                // Append validation feedback to prompt for next iter.
                // The model sees concrete failure mode + retries with
                // corrective context. This is the "observer correction"
                // in Phase 43 PRD shape, simplified — the validator
                // itself IS the observer for now.
                current_prompt = format!(
                    "{}\n\nPrior attempt failed validation:\n{}\n\nFix the specific issue above and respond with a corrected JSON object.",
                    req.prompt, err_summary,
                );
                continue;
            }
        }
    }

    let duration_ms = t0.elapsed().as_millis() as u64;
    write_session_failure(&state, &req, &trace_id, max_iter, max_iter, attempt_records, duration_ms).await;
    (StatusCode::UNPROCESSABLE_ENTITY, Json(IterateFailure {
        error: format!("max iterations reached ({max_iter}) without passing validation"),
        iterations: max_iter,
        history,
        trace_id: Some(trace_id.clone()),
    })).into_response()
}

// ─── Helpers — Langfuse spans + session log + roster check ─────────

fn emit_attempt_span(
    state: &super::V1State,
    trace_id: &str,
    iteration: u32,
    req: &IterateRequest,
    prompt: &str,
    raw: &str,
    verdict: &str,
    error: Option<String>,
    started: chrono::DateTime<chrono::Utc>,
    ended: chrono::DateTime<chrono::Utc>,
) -> Option<String> {
    let lf = state.langfuse.as_ref()?;
    Some(lf.emit_attempt_span(super::langfuse_trace::AttemptSpan {
        trace_id: trace_id.to_string(),
        iteration,
        model: req.model.clone(),
        provider: req.provider.clone(),
        prompt: prompt.to_string(),
        raw: raw.to_string(),
        verdict: verdict.to_string(),
        error,
        start_time: started.to_rfc3339(),
        end_time: ended.to_rfc3339(),
    }))
}

/// Verify every fill artifact's candidate IDs exist in the roster.
/// Returns Some(true)/Some(false) on the fill kind, None otherwise
/// (other kinds don't have worker IDs to ground). Same semantics as
/// Go's `handlers.rosterCheckFor("fill")`.
fn grounded_in_roster(
    state: &super::V1State,
    kind: &str,
    artifact: &serde_json::Value,
) -> Option<bool> {
    if kind != "fill" {
        return None;
    }
    let fills = artifact.get("fills").and_then(|v| v.as_array())?;
    for f in fills {
        let id = match f.get("candidate_id").and_then(|v| v.as_str()) {
            Some(s) if !s.is_empty() => s,
            _ => return Some(false),
        };
        if state.validate_workers.find(id).is_none() {
            return Some(false);
        }
    }
    Some(true)
}

async fn write_session_accepted(
    state: &super::V1State,
    req: &IterateRequest,
    trace_id: &str,
    iterations: u32,
    max_iter: u32,
    attempts: Vec<super::session_log::SessionAttemptRecord>,
    artifact: &serde_json::Value,
    grounded: Option<bool>,
    duration_ms: u64,
) {
    let Some(logger) = state.session_log.as_ref() else { return };
    let rec = build_session_record(req, trace_id, "accepted", iterations, max_iter, attempts, Some(artifact.clone()), grounded, duration_ms);
    logger.append(rec).await;
}

async fn write_session_failure(
    state: &super::V1State,
    req: &IterateRequest,
    trace_id: &str,
    iterations: u32,
    max_iter: u32,
    attempts: Vec<super::session_log::SessionAttemptRecord>,
    duration_ms: u64,
) {
    let Some(logger) = state.session_log.as_ref() else { return };
    let rec = build_session_record(req, trace_id, "max_iter_exhausted", iterations, max_iter, attempts, None, None, duration_ms);
    logger.append(rec).await;
}

async fn write_infra_error(
    state: &super::V1State,
    req: &IterateRequest,
    trace_id: &str,
    max_iter: u32,
    duration_ms: u64,
    error: String,
) {
    let Some(logger) = state.session_log.as_ref() else { return };
    let attempts = vec![super::session_log::SessionAttemptRecord {
        iteration: 0,
        verdict_kind: "infra_error".to_string(),
        error: Some(error),
        span_id: None,
    }];
    let rec = build_session_record(req, trace_id, "infra_error", 0, max_iter, attempts, None, None, duration_ms);
    logger.append(rec).await;
}

fn build_session_record(
    req: &IterateRequest,
    trace_id: &str,
    final_verdict: &str,
    iterations: u32,
    max_iter: u32,
    attempts: Vec<super::session_log::SessionAttemptRecord>,
    artifact: Option<serde_json::Value>,
    grounded: Option<bool>,
    duration_ms: u64,
) -> super::session_log::SessionRecord {
    super::session_log::SessionRecord {
        schema: super::session_log::SESSION_RECORD_SCHEMA.to_string(),
        session_id: trace_id.to_string(),
        timestamp: chrono::Utc::now().to_rfc3339(),
        daemon: "gateway".to_string(),
        kind: req.kind.clone(),
        model: req.model.clone(),
        provider: req.provider.clone(),
        prompt: super::session_log::truncate(&req.prompt, 4000),
        iterations,
        max_iterations: max_iter,
        final_verdict: final_verdict.to_string(),
        attempts,
        artifact,
        grounded_in_roster: grounded,
        duration_ms,
    }
}

/// Generate a fresh trace id when no parent was forwarded. Same
/// time-ordered hex shape Langfuse already accepts elsewhere in this
/// crate (see `langfuse_trace::uuid_v7_like`).
fn new_trace_id() -> String {
    let ts = chrono::Utc::now().timestamp_nanos_opt().unwrap_or(0);
    let rand = std::time::SystemTime::now()
        .duration_since(std::time::UNIX_EPOCH)
        .map(|d| d.subsec_nanos())
        .unwrap_or(0);
    format!("{:016x}-{:08x}", ts, rand)
}

async fn call_chat(client: &reqwest::Client, gateway: &str, body: &serde_json::Value, trace_id: &str) -> Result<String, String> {
    let mut req = client.post(format!("{gateway}/v1/chat")).json(body);
    if !trace_id.is_empty() {
        req = req.header(TRACE_ID_HEADER, trace_id);
    }
    let resp = req.send().await.map_err(|e| format!("chat hop: {e}"))?;
    let status = resp.status();
    if !status.is_success() {
        let body = resp.text().await.unwrap_or_default();
        return Err(format!("chat {}: {}", status, body.chars().take(300).collect::<String>()));
    }
    let parsed: serde_json::Value = resp.json().await.map_err(|e| format!("chat parse: {e}"))?;
    Ok(parsed.pointer("/choices/0/message/content")
        .and_then(|v| v.as_str())
        .unwrap_or("")
        .to_string())
}

async fn call_validate(client: &reqwest::Client, gateway: &str, body: &serde_json::Value, trace_id: &str) -> Result<serde_json::Value, String> {
    let mut req = client.post(format!("{gateway}/v1/validate")).json(body);
    if !trace_id.is_empty() {
        req = req.header(TRACE_ID_HEADER, trace_id);
    }
    let resp = req.send().await.map_err(|e| format!("validate hop: {e}"))?;
    let status = resp.status();
    let parsed: serde_json::Value = resp.json().await.map_err(|e| format!("validate parse: {e}"))?;
    if status.is_success() {
        Ok(parsed)
    } else {
        // The /v1/validate endpoint returns a ValidationError JSON
        // on 422; surface its structure verbatim so the prompt-
        // appending step gets specific failure detail.
        Err(serde_json::to_string(&parsed).unwrap_or_else(|_| format!("validation {} (unparseable body)", status)))
    }
}

/// Extract the first JSON object from a model's output. Handles
/// fenced code blocks (```json ... ```), bare braces, and stray
/// prose around the JSON. Returns None on no extractable object.
///
/// Made `pub` 2026-05-02 to support the cross-runtime parity probe
/// at `golangLAKEHOUSE/scripts/cutover/parity/extract_json_parity.sh`.
/// The Go counterpart lives at `internal/validator/iterate.go::ExtractJSON`;
/// when either runtime's algorithm changes the parity probe surfaces
/// the divergence.
pub fn extract_json(raw: &str) -> Option<serde_json::Value> {
    // Try fenced first.
    let candidates: Vec<String> = {
        let mut out = vec![];
        let mut s = raw;
        while let Some(start) = s.find("```") {
            let after = &s[start + 3..];
            // Skip optional language tag (json, etc.)
            let body_start = after.find('\n').map(|n| n + 1).unwrap_or(0);
            let body = &after[body_start..];
            if let Some(end) = body.find("```") {
                out.push(body[..end].trim().to_string());
                s = &body[end + 3..];
            } else { break; }
        }
        out
    };
    for c in &candidates {
        if let Ok(v) = serde_json::from_str::<serde_json::Value>(c) {
            if v.is_object() { return Some(v); }
        }
    }
    // Fall back to outermost {...} balance.
    let bytes = raw.as_bytes();
    let mut depth = 0i32;
    let mut start: Option<usize> = None;
    for (i, &b) in bytes.iter().enumerate() {
        match b {
            b'{' => { if start.is_none() { start = Some(i); } depth += 1; }
            b'}' => {
                depth -= 1;
                if depth == 0 {
                    if let Some(s) = start {
                        let slice = &raw[s..=i];
                        if let Ok(v) = serde_json::from_str::<serde_json::Value>(slice) {
                            if v.is_object() { return Some(v); }
                        }
                        start = None;
                    }
                }
            }
            _ => {}
        }
    }
    None
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn extract_json_from_fenced_block() {
        let raw = "Here's my answer:\n```json\n{\"fills\": [{\"candidate_id\": \"W-1\"}]}\n```\nDone.";
        let v = extract_json(raw).unwrap();
        assert!(v.get("fills").is_some());
    }

    #[test]
    fn extract_json_from_bare_braces() {
        let raw = "Here you go: {\"fills\": [{\"candidate_id\": \"W-2\"}]}";
        let v = extract_json(raw).unwrap();
        assert!(v.get("fills").is_some());
    }

    #[test]
    fn extract_json_returns_none_on_no_object() {
        assert!(extract_json("just prose, no json").is_none());
    }

    #[test]
    fn extract_json_picks_first_balanced() {
        let raw = "{\"a\":1} then {\"b\":2}";
        let v = extract_json(raw).unwrap();
        assert_eq!(v.get("a").and_then(|v| v.as_i64()), Some(1));
    }
}