root 26fc98c885 Phase 7: Vector index + RAG pipeline
- vectord crate: chunk → embed → store → search → RAG
- chunker: configurable chunk size + overlap, sentence-boundary aware splitting
- store: embeddings as Parquet (binary blob f32 vectors), portable format
- search: brute-force cosine similarity (works up to ~100K vectors)
- rag: full pipeline — embed question → search index → retrieve context → LLM answer
- Endpoints: POST /vectors/index, /vectors/search, /vectors/rag
- Gateway wired with vectord service
- Tested: 200 candidate resumes indexed in 5.4s, semantic search + RAG working
- 20 unit tests passing (chunker, search, ingestd, shared)
- AI gives honest "no match found" when context doesn't support an answer

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-27 08:12:28 -05:00

85 lines
2.7 KiB
Rust

/// RAG pipeline: question → embed → search → retrieve → generate answer.
use object_store::ObjectStore;
use std::sync::Arc;
use aibridge::client::{AiClient, EmbedRequest, GenerateRequest};
use crate::search::{self, SearchResult};
use crate::store;
/// Full RAG answer with provenance.
#[derive(Debug, Clone, serde::Serialize)]
pub struct RagResponse {
pub answer: String,
pub model: String,
pub sources: Vec<SearchResult>,
pub tokens_generated: Option<u64>,
}
/// Execute full RAG: embed question → search index → retrieve context → generate answer.
pub async fn query(
question: &str,
index_name: &str,
top_k: usize,
object_store: &Arc<dyn ObjectStore>,
ai_client: &AiClient,
) -> Result<RagResponse, String> {
// 1. Embed the question
tracing::info!("RAG: embedding question");
let embed_resp = ai_client.embed(EmbedRequest {
texts: vec![question.to_string()],
model: None,
}).await?;
if embed_resp.embeddings.is_empty() {
return Err("no embedding returned for question".into());
}
let query_vec: Vec<f32> = embed_resp.embeddings[0].iter().map(|&x| x as f32).collect();
// 2. Load index and search
tracing::info!("RAG: searching index '{index_name}'");
let embeddings = store::load_embeddings(object_store, index_name).await?;
let results = search::search(&query_vec, &embeddings, top_k);
if results.is_empty() {
return Ok(RagResponse {
answer: "No relevant information found.".into(),
model: String::new(),
sources: vec![],
tokens_generated: None,
});
}
// 3. Build context from retrieved chunks
let context: String = results.iter().enumerate().map(|(i, r)| {
format!("[{}] (source: {}, doc: {}) {}", i + 1, r.source, r.doc_id, r.chunk_text)
}).collect::<Vec<_>>().join("\n\n");
// 4. Generate answer
tracing::info!("RAG: generating answer from {} chunks", results.len());
let prompt = format!(
"You are a helpful assistant answering questions based on retrieved documents from a data system.\n\n\
Use ONLY the following context to answer. If the context doesn't contain enough information, say so.\n\
Cite sources by their number [1], [2], etc.\n\n\
Context:\n{context}\n\n\
Question: {question}\n\n\
Answer:"
);
let gen_resp = ai_client.generate(GenerateRequest {
prompt,
model: None,
system: None,
temperature: Some(0.2),
max_tokens: Some(512),
}).await?;
Ok(RagResponse {
answer: gen_resp.text.trim().to_string(),
model: gen_resp.model,
sources: results,
tokens_generated: gen_resp.tokens_generated,
})
}