use axum::{ Json, Router, extract::State, http::StatusCode, response::IntoResponse, routing::{get, post}, }; use crate::client::{ AiClient, EmbedRequest, GenerateRequest, RerankRequest, }; pub fn router(client: AiClient) -> Router { Router::new() .route("/health", get(health)) .route("/embed", post(embed)) .route("/generate", post(generate)) .route("/rerank", post(rerank)) .route("/vram", get(vram)) .with_state(client) } async fn vram(State(client): State) -> impl IntoResponse { match client.vram_snapshot().await { Ok(snap) => Ok(Json(snap)), Err(e) => Err((StatusCode::BAD_GATEWAY, e)), } } async fn health(State(client): State) -> impl IntoResponse { match client.health().await { Ok(info) => Ok(Json(info)), Err(e) => Err((StatusCode::BAD_GATEWAY, format!("sidecar down: {e}"))), } } async fn embed( State(client): State, Json(req): Json, ) -> impl IntoResponse { tracing::info!("embedding {} texts", req.texts.len()); match client.embed(req).await { Ok(resp) => Ok(Json(resp)), Err(e) => Err((StatusCode::BAD_GATEWAY, e)), } } async fn generate( State(client): State, Json(req): Json, ) -> impl IntoResponse { tracing::info!("generating with prompt len={}", req.prompt.len()); match client.generate(req).await { Ok(resp) => Ok(Json(resp)), Err(e) => Err((StatusCode::BAD_GATEWAY, e)), } } async fn rerank( State(client): State, Json(req): Json, ) -> impl IntoResponse { tracing::info!("reranking {} documents", req.documents.len()); match client.rerank(req).await { Ok(resp) => Ok(Json(resp)), Err(e) => Err((StatusCode::BAD_GATEWAY, e)), } }