/// RAG pipeline: question → embed → search → retrieve → generate answer. use object_store::ObjectStore; use std::sync::Arc; use aibridge::client::{AiClient, EmbedRequest, GenerateRequest}; use crate::search::{self, SearchResult}; use crate::store; /// Full RAG answer with provenance. #[derive(Debug, Clone, serde::Serialize)] pub struct RagResponse { pub answer: String, pub model: String, pub sources: Vec, pub tokens_generated: Option, } /// Execute full RAG: embed question → search index → retrieve context → generate answer. pub async fn query( question: &str, index_name: &str, top_k: usize, object_store: &Arc, ai_client: &AiClient, ) -> Result { // 1. Embed the question tracing::info!("RAG: embedding question"); let embed_resp = ai_client.embed(EmbedRequest { texts: vec![question.to_string()], model: None, }).await?; if embed_resp.embeddings.is_empty() { return Err("no embedding returned for question".into()); } let query_vec: Vec = embed_resp.embeddings[0].iter().map(|&x| x as f32).collect(); // 2. Load index and search tracing::info!("RAG: searching index '{index_name}'"); let embeddings = store::load_embeddings(object_store, index_name).await?; let results = search::search(&query_vec, &embeddings, top_k); if results.is_empty() { return Ok(RagResponse { answer: "No relevant information found.".into(), model: String::new(), sources: vec![], tokens_generated: None, }); } // 3. Build context from retrieved chunks let context: String = results.iter().enumerate().map(|(i, r)| { format!("[{}] (source: {}, doc: {}) {}", i + 1, r.source, r.doc_id, r.chunk_text) }).collect::>().join("\n\n"); // 4. Generate answer tracing::info!("RAG: generating answer from {} chunks", results.len()); let prompt = format!( "You are a helpful assistant answering questions based on retrieved documents from a data system.\n\n\ Use ONLY the following context to answer. If the context doesn't contain enough information, say so.\n\ Cite sources by their number [1], [2], etc.\n\n\ Context:\n{context}\n\n\ Question: {question}\n\n\ Answer:" ); let gen_resp = ai_client.generate(GenerateRequest { prompt, model: None, system: None, temperature: Some(0.2), max_tokens: Some(512), }).await?; Ok(RagResponse { answer: gen_resp.text.trim().to_string(), model: gen_resp.model, sources: results, tokens_generated: gen_resp.tokens_generated, }) }