- vectord crate: chunk → embed → store → search → RAG - chunker: configurable chunk size + overlap, sentence-boundary aware splitting - store: embeddings as Parquet (binary blob f32 vectors), portable format - search: brute-force cosine similarity (works up to ~100K vectors) - rag: full pipeline — embed question → search index → retrieve context → LLM answer - Endpoints: POST /vectors/index, /vectors/search, /vectors/rag - Gateway wired with vectord service - Tested: 200 candidate resumes indexed in 5.4s, semantic search + RAG working - 20 unit tests passing (chunker, search, ingestd, shared) - AI gives honest "no match found" when context doesn't support an answer Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
19 lines
458 B
TOML
19 lines
458 B
TOML
[package]
|
|
name = "vectord"
|
|
version = "0.1.0"
|
|
edition = "2024"
|
|
|
|
[dependencies]
|
|
shared = { path = "../shared" }
|
|
storaged = { path = "../storaged" }
|
|
aibridge = { path = "../aibridge" }
|
|
tokio = { workspace = true }
|
|
axum = { workspace = true }
|
|
serde = { workspace = true }
|
|
serde_json = { workspace = true }
|
|
tracing = { workspace = true }
|
|
bytes = { workspace = true }
|
|
object_store = { workspace = true }
|
|
parquet = { workspace = true }
|
|
arrow = { workspace = true }
|