From 239e47122327e4baa97ff5a4cd6f95fd8afb205c Mon Sep 17 00:00:00 2001 From: root Date: Fri, 27 Mar 2026 05:53:56 -0500 Subject: [PATCH] Phase 3: AI integration with Ollama via Python sidecar - sidecar: FastAPI app with /embed, /generate, /rerank hitting Ollama - sidecar: Dockerfile, env var config (EMBED_MODEL, GEN_MODEL, RERANK_MODEL) - aibridge: reqwest HTTP client with typed request/response structs - aibridge: Axum proxy endpoints (POST /ai/embed, /ai/generate, /ai/rerank) - gateway: wires AiClient with SIDECAR_URL env var - e2e verified: nomic-embed-text returns 768d vectors, qwen2.5 generates text Co-Authored-By: Claude Opus 4.6 (1M context) --- Cargo.lock | 12 ++ crates/aibridge/Cargo.toml | 1 + crates/aibridge/src/client.rs | 137 ++++++++++++++++++ crates/aibridge/src/lib.rs | 1 + crates/aibridge/src/service.rs | 61 +++++++- crates/gateway/src/main.rs | 6 +- docs/PHASES.md | 12 +- sidecar/Dockerfile | 17 +++ sidecar/pyproject.toml | 14 ++ sidecar/sidecar/__init__.py | 0 .../__pycache__/__init__.cpython-313.pyc | Bin 0 -> 147 bytes .../sidecar/__pycache__/embed.cpython-313.pyc | Bin 0 -> 2412 bytes .../__pycache__/generate.cpython-313.pyc | Bin 0 -> 2753 bytes .../sidecar/__pycache__/main.cpython-313.pyc | Bin 0 -> 1256 bytes .../__pycache__/ollama.cpython-313.pyc | Bin 0 -> 762 bytes .../__pycache__/rerank.cpython-313.pyc | Bin 0 -> 3839 bytes sidecar/sidecar/embed.py | 44 ++++++ sidecar/sidecar/generate.py | 55 +++++++ sidecar/sidecar/main.py | 23 +++ sidecar/sidecar/ollama.py | 12 ++ sidecar/sidecar/rerank.py | 70 +++++++++ 21 files changed, 453 insertions(+), 12 deletions(-) create mode 100644 crates/aibridge/src/client.rs create mode 100644 sidecar/Dockerfile create mode 100644 sidecar/pyproject.toml create mode 100644 sidecar/sidecar/__init__.py create mode 100644 sidecar/sidecar/__pycache__/__init__.cpython-313.pyc create mode 100644 sidecar/sidecar/__pycache__/embed.cpython-313.pyc create mode 100644 sidecar/sidecar/__pycache__/generate.cpython-313.pyc create mode 100644 sidecar/sidecar/__pycache__/main.cpython-313.pyc create mode 100644 sidecar/sidecar/__pycache__/ollama.cpython-313.pyc create mode 100644 sidecar/sidecar/__pycache__/rerank.cpython-313.pyc create mode 100644 sidecar/sidecar/embed.py create mode 100644 sidecar/sidecar/generate.py create mode 100644 sidecar/sidecar/main.py create mode 100644 sidecar/sidecar/ollama.py create mode 100644 sidecar/sidecar/rerank.py diff --git a/Cargo.lock b/Cargo.lock index d88f471..6e12912 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -36,6 +36,7 @@ name = "aibridge" version = "0.1.0" dependencies = [ "axum", + "reqwest", "serde", "serde_json", "shared", @@ -1740,6 +1741,7 @@ dependencies = [ "tokio", "tokio-rustls", "tower-service", + "webpki-roots", ] [[package]] @@ -2734,6 +2736,7 @@ dependencies = [ "wasm-bindgen-futures", "wasm-streams", "web-sys", + "webpki-roots", ] [[package]] @@ -3672,6 +3675,15 @@ dependencies = [ "wasm-bindgen", ] +[[package]] +name = "webpki-roots" +version = "1.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "22cfaf3c063993ff62e73cb4311efde4db1efb31ab78a3e5c457939ad5cc0bed" +dependencies = [ + "rustls-pki-types", +] + [[package]] name = "winapi-util" version = "0.1.11" diff --git a/crates/aibridge/Cargo.toml b/crates/aibridge/Cargo.toml index 8f57876..5a1a93b 100644 --- a/crates/aibridge/Cargo.toml +++ b/crates/aibridge/Cargo.toml @@ -10,3 +10,4 @@ axum = { workspace = true } serde = { workspace = true } serde_json = { workspace = true } tracing = { workspace = true } +reqwest = { version = "0.12", default-features = false, features = ["json", "rustls-tls"] } diff --git a/crates/aibridge/src/client.rs b/crates/aibridge/src/client.rs new file mode 100644 index 0000000..97e7cec --- /dev/null +++ b/crates/aibridge/src/client.rs @@ -0,0 +1,137 @@ +use reqwest::Client; +use serde::{Deserialize, Serialize}; +use std::time::Duration; + +/// HTTP client for the Python AI sidecar. +#[derive(Clone)] +pub struct AiClient { + client: Client, + base_url: String, +} + +// -- Request/Response types -- + +#[derive(Serialize, Deserialize)] +pub struct EmbedRequest { + pub texts: Vec, + #[serde(skip_serializing_if = "Option::is_none")] + pub model: Option, +} + +#[derive(Deserialize, Serialize, Clone)] +pub struct EmbedResponse { + pub embeddings: Vec>, + pub model: String, + pub dimensions: usize, +} + +#[derive(Serialize, Deserialize)] +pub struct GenerateRequest { + pub prompt: String, + #[serde(skip_serializing_if = "Option::is_none")] + pub model: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub system: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub temperature: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub max_tokens: Option, +} + +#[derive(Deserialize, Serialize, Clone)] +pub struct GenerateResponse { + pub text: String, + pub model: String, + pub tokens_evaluated: Option, + pub tokens_generated: Option, +} + +#[derive(Serialize, Deserialize)] +pub struct RerankRequest { + pub query: String, + pub documents: Vec, + #[serde(skip_serializing_if = "Option::is_none")] + pub model: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub top_k: Option, +} + +#[derive(Deserialize, Serialize, Clone)] +pub struct ScoredDocument { + pub index: usize, + pub text: String, + pub score: f64, +} + +#[derive(Deserialize, Serialize, Clone)] +pub struct RerankResponse { + pub results: Vec, + pub model: String, +} + +impl AiClient { + pub fn new(base_url: &str) -> Self { + let client = Client::builder() + .timeout(Duration::from_secs(120)) + .build() + .expect("failed to build HTTP client"); + Self { + client, + base_url: base_url.trim_end_matches('/').to_string(), + } + } + + pub async fn health(&self) -> Result { + let resp = self.client + .get(format!("{}/health", self.base_url)) + .send() + .await + .map_err(|e| format!("sidecar unreachable: {e}"))?; + resp.json().await.map_err(|e| format!("invalid response: {e}")) + } + + pub async fn embed(&self, req: EmbedRequest) -> Result { + let resp = self.client + .post(format!("{}/embed", self.base_url)) + .json(&req) + .send() + .await + .map_err(|e| format!("embed request failed: {e}"))?; + + if !resp.status().is_success() { + let text = resp.text().await.unwrap_or_default(); + return Err(format!("embed error ({}): {text}", text.len())); + } + resp.json().await.map_err(|e| format!("embed parse error: {e}")) + } + + pub async fn generate(&self, req: GenerateRequest) -> Result { + let resp = self.client + .post(format!("{}/generate", self.base_url)) + .json(&req) + .send() + .await + .map_err(|e| format!("generate request failed: {e}"))?; + + if !resp.status().is_success() { + let text = resp.text().await.unwrap_or_default(); + return Err(format!("generate error: {text}")); + } + resp.json().await.map_err(|e| format!("generate parse error: {e}")) + } + + pub async fn rerank(&self, req: RerankRequest) -> Result { + let resp = self.client + .post(format!("{}/rerank", self.base_url)) + .json(&req) + .send() + .await + .map_err(|e| format!("rerank request failed: {e}"))?; + + if !resp.status().is_success() { + let text = resp.text().await.unwrap_or_default(); + return Err(format!("rerank error: {text}")); + } + resp.json().await.map_err(|e| format!("rerank parse error: {e}")) + } +} diff --git a/crates/aibridge/src/lib.rs b/crates/aibridge/src/lib.rs index 1f278a4..431311a 100644 --- a/crates/aibridge/src/lib.rs +++ b/crates/aibridge/src/lib.rs @@ -1 +1,2 @@ +pub mod client; pub mod service; diff --git a/crates/aibridge/src/service.rs b/crates/aibridge/src/service.rs index 97c7655..dee4847 100644 --- a/crates/aibridge/src/service.rs +++ b/crates/aibridge/src/service.rs @@ -1,9 +1,60 @@ -use axum::{Router, routing::get}; +use axum::{ + Json, Router, + extract::State, + http::StatusCode, + response::IntoResponse, + routing::{get, post}, +}; -pub fn router() -> Router { - Router::new().route("/health", get(health)) +use crate::client::{ + AiClient, EmbedRequest, GenerateRequest, RerankRequest, +}; + +pub fn router(client: AiClient) -> Router { + Router::new() + .route("/health", get(health)) + .route("/embed", post(embed)) + .route("/generate", post(generate)) + .route("/rerank", post(rerank)) + .with_state(client) } -async fn health() -> &'static str { - "aibridge ok" +async fn health(State(client): State) -> impl IntoResponse { + match client.health().await { + Ok(info) => Ok(Json(info)), + Err(e) => Err((StatusCode::BAD_GATEWAY, format!("sidecar down: {e}"))), + } +} + +async fn embed( + State(client): State, + Json(req): Json, +) -> impl IntoResponse { + tracing::info!("embedding {} texts", req.texts.len()); + match client.embed(req).await { + Ok(resp) => Ok(Json(resp)), + Err(e) => Err((StatusCode::BAD_GATEWAY, e)), + } +} + +async fn generate( + State(client): State, + Json(req): Json, +) -> impl IntoResponse { + tracing::info!("generating with prompt len={}", req.prompt.len()); + match client.generate(req).await { + Ok(resp) => Ok(Json(resp)), + Err(e) => Err((StatusCode::BAD_GATEWAY, e)), + } +} + +async fn rerank( + State(client): State, + Json(req): Json, +) -> impl IntoResponse { + tracing::info!("reranking {} documents", req.documents.len()); + match client.rerank(req).await { + Ok(resp) => Ok(Json(resp)), + Err(e) => Err((StatusCode::BAD_GATEWAY, e)), + } } diff --git a/crates/gateway/src/main.rs b/crates/gateway/src/main.rs index 7206092..4489629 100644 --- a/crates/gateway/src/main.rs +++ b/crates/gateway/src/main.rs @@ -22,12 +22,16 @@ async fn main() { // Query engine — DataFusion over catalog-registered Parquet let engine = queryd::context::QueryEngine::new(registry.clone(), store.clone()); + // AI sidecar client + let sidecar_url = std::env::var("SIDECAR_URL").unwrap_or_else(|_| "http://localhost:3200".to_string()); + let ai_client = aibridge::client::AiClient::new(&sidecar_url); + let app = Router::new() .route("/health", get(health)) .nest("/storage", storaged::service::router(store)) .nest("/catalog", catalogd::service::router(registry)) .nest("/query", queryd::service::router(engine)) - .nest("/ai", aibridge::service::router()) + .nest("/ai", aibridge::service::router(ai_client)) .layer(TraceLayer::new_for_http()); let addr = "0.0.0.0:3100"; diff --git a/docs/PHASES.md b/docs/PHASES.md index 9cc412a..e16ac95 100644 --- a/docs/PHASES.md +++ b/docs/PHASES.md @@ -31,13 +31,13 @@ **Gate: PASSED** — SELECT *, WHERE/ORDER BY, COUNT/AVG all return correct results via catalog. ## Phase 3: AI Integration -- [ ] 3.1 — Python sidecar: FastAPI + Ollama (embed/generate/rerank) -- [ ] 3.2 — Dockerfile for sidecar -- [ ] 3.3 — aibridge/client.rs: HTTP client to sidecar -- [ ] 3.4 — aibridge service: Axum proxy endpoints -- [ ] 3.5 — Model config via env vars +- [x] 3.1 — Python sidecar: FastAPI + Ollama (embed/generate/rerank) — real models, no mocks +- [x] 3.2 — Dockerfile for sidecar +- [x] 3.3 — aibridge/client.rs: reqwest HTTP client with 120s timeout +- [x] 3.4 — aibridge service: Axum proxy endpoints (POST /ai/embed, /ai/generate, /ai/rerank) +- [x] 3.5 — Model config via env vars (EMBED_MODEL, GEN_MODEL, RERANK_MODEL, SIDECAR_URL) -**Gate:** Rust → Python → Ollama → real embeddings return. +**Gate: PASSED** — Gateway → aibridge → sidecar → Ollama → real 768d embeddings + generation. ## Phase 4: Frontend - [ ] 4.1 — Dioxus scaffold, WASM build diff --git a/sidecar/Dockerfile b/sidecar/Dockerfile new file mode 100644 index 0000000..d0a0cd8 --- /dev/null +++ b/sidecar/Dockerfile @@ -0,0 +1,17 @@ +FROM python:3.13-slim + +WORKDIR /app + +COPY pyproject.toml . +RUN pip install --no-cache-dir . + +COPY sidecar/ sidecar/ + +ENV OLLAMA_URL=http://host.docker.internal:11434 +ENV EMBED_MODEL=nomic-embed-text +ENV GEN_MODEL=qwen2.5 +ENV RERANK_MODEL=qwen2.5 + +EXPOSE 3200 + +CMD ["uvicorn", "sidecar.main:app", "--host", "0.0.0.0", "--port", "3200"] diff --git a/sidecar/pyproject.toml b/sidecar/pyproject.toml new file mode 100644 index 0000000..559e1b5 --- /dev/null +++ b/sidecar/pyproject.toml @@ -0,0 +1,14 @@ +[project] +name = "lakehouse-sidecar" +version = "0.1.0" +requires-python = ">=3.11" +dependencies = [ + "fastapi>=0.115", + "uvicorn>=0.34", + "httpx>=0.28", + "pydantic>=2.0", +] + +[tool.uvicorn] +host = "0.0.0.0" +port = 3200 diff --git a/sidecar/sidecar/__init__.py b/sidecar/sidecar/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/sidecar/sidecar/__pycache__/__init__.cpython-313.pyc b/sidecar/sidecar/__pycache__/__init__.cpython-313.pyc new file mode 100644 index 0000000000000000000000000000000000000000..7654651d1c3dc38d78ecf50e904553e7cc4c9c0e GIT binary patch literal 147 zcmey&%ge<81jiDNWrFC(AOZ#$p^VQgK*m&tbOudEzm*I{OhDdekkl$wwTfTP?1Px&+(^CyG>k&4I* zs(3;g2+%+pirhBX*Kk3DMaXYxI$rDZt!sRfQzbR9kE@Spqz$%#8f*tOR228|4XNx) z!zhj1BaIe=`Oc=7wO*BrQ8k+7)L2p8%c=2pI4i8~D?UP$#4=MJR5&km>~t<~kGlr- zqR*c>bGm<`WK><#wiM!pj_Zzb(k>g7WrFG`kxCWQuv{&6*O+iiYdZmG8~|2&P z%LHF0(%=ZMNS@r!R>&LY#tp|^j-t9JvgMpXPaI>r`~sAa7t}OMA2T$~3u_vR$19AN zHSOHEUTMt4HEr0Wj$1J;!?JN&sF;rH@s3M9oYO7Kc6A1VqiNJ%s8!P(CMs#V>r(So z1dPqaa1$%|gO|yS+G9qhO6_6O%~bT)j8S{sF*1%>HcC2O?R}JXS1(YOgnq;PJMGxK z3$;jmmY>cp2`#m@%~SoC_RO|S_uLRR(~V8#2SfV|PCo$J2DtYETnr%8JcG3uM6L;N zXhc#1UbF$QQ^k`T%Pe`lu)pE5X^l8_B*JBL%&;5=j(_|?B~0U}NfWGjh&9ZRCb99v z;fk%hl%+gvVFKQ?T$*A-23`GJZH!|8_zg35I&!d>-=gAd*P?W*q(KlMJrzAZ8I{Fa`d_J^(4l|#; z)S_~`IhD_HIV>>uc@zs`-Y;7B+o?hidek=o46!8gNflKIwHdnWCabX? zqh^t?qym>`5nM+F>4D`dkb$P>eGS~xaPk5`1Tn;7iC=R*$K8LnG!(d1%;j|_2{x?^ zGQ_TcPX(?(hVabIks4w_X|Ah=v;1}l!r%}1e~SN8U9c1URk7IDV-y4#R@cP+4s&VP ziaSli;B$xhtuRau@WVuj3{HkKdezMMS3u$F`7fGQb=<|Q6V5uewfqyVZT^E!CS#{6 z6@5%k82~KR}4!D&~3~MOAdc2kCdq5o3b0@ z31!{Y9kx)qPLy3mPxLoTpFp0ConNt8hAH?>j?d(#b2sJfpL9(1*B(kv4g48zotb!l zV(!UW%f^p#ALcIg)i$+%mijm~FJCQO-;|TmI!C(Ka*jPXgrBtysrg?_yVWDb}+X?D;d&R!er)V#!;{)a>MZ*X1{t zlHGR#f-KzzL6q)nM8n^c>c67zU}oad#DC6)SxSB)v~bU-8I&-Q9bTzt&26 z_Xe+}j%~xicLyUVU*Jjaeqlk#Fn%vV`NDp}_=8ekJg{(t^bvXCsEqvgkpRk-gvZ&A z7Z}ztepQoVp@4e$g4GLU-NGk@vi$WV+fN@q?w&;JPjbMp&YRS>=;O@H&kO(E@T(=O z;(nCY)lx6`bp72sLJyz{<|a5}*xcp`A-_QAZ`k=OZ21e?e-CYVuVYI3EtZ%$JAHP> znzrU%Ufg+PDRy)!a#!L==WO;aV0Smnla|@sUBK?P3|#Qtj<-54<>#YQ9gAf5Z7xZq S+2RUdx55LLxx)m0iT)oqc=wzD literal 0 HcmV?d00001 diff --git a/sidecar/sidecar/__pycache__/generate.cpython-313.pyc b/sidecar/sidecar/__pycache__/generate.cpython-313.pyc new file mode 100644 index 0000000000000000000000000000000000000000..27dbcf2be5a21e5d3d704c5b97e96d4f5b2eb485 GIT binary patch literal 2753 zcmZ`*O>7&-6`uWHl1oyiC0P<>i2J8%2*w2*k{`0UY!Xx=E1QTzcqxyQCu%&Lj5C z%r|e|zI}hg-DosSfIk2EpUpD@A%Dk3mtgJFt_Q)l<&2f}V;hLM{ za{?7$&a1+ln3JfKB1LwbOm$RHxkw6$&SKwC7}&SyOjS{prr2OXBSU1IsPYI=Lq%bV z>!6B{h5?Q2L!(8xaQGGalwwqko@3QmQTYL@#z)9a{)JufMJC<1g@^KK)(c&FXSQJ1 zU4wejpS=6-J2Oiqqvo2nm1exqW!*8}w#!Ckive*!#8SmHEY}NNnaOK!U(L?^bVa)L zpdmMvB<03_a#CA<2#tN4-jRLNh-G+YEPs3Ct9rph9hj(G7ahGkIQH44V9y5YE6QFwzV)~H>rxp_~h;)i&m zv+TG=b#cL`e_iwzp) z?N(i{bavvJwqR1nt(cZ!*|5zyF7*J_Ez5Rwbf}|g6iuQrWN|c3Sg6>#OZ#xnnc&v} zO>wFvEz$V zwPlLw?lXM5^Cqynq^-p7%(aI`8#AkCn*D3jEq=5;e7Nz}>W`bRt>s$$;r8%oW3D-X z_~1w**W|w7M`*IE>D9q5!1i|s+YdYEhm41?ODf+9dk|tCLi{Zz`f?HtE%IsE>-#pe zP7OS1Y+(_3ylX7EzEAUBUl1_OxUE;}kObuyNY@o*-;L=o#iFDKk@X`(Rdf*9A!J99 z4I>*zHi8WOAAI~M&W>~$9(7It1BZsAcV^p>*qvPa*u*B^-z;rp@7-+i6Z@B6@DqU= z)Zk%3#i$W{5hfO-Cr6E2B`mfsK|+!{@a*1z{&0+>z`^qb9o0Sa9-`hhfdx}-LQ@q8(zTZP%7U;fThunU(#5^0+_;t)gpSoR% zA+yjSZb09-&D`hmf#pu#-K2b?*cCjv`_c3)ED>`418j>l$a5vW zKD##iOd0>(aARg$B#A@q#K6ZV?wx2S4n0pEd3f=`#WoBV9$eTFxc=}?NREq*E87Vo zNA9lNU1^VwJ z>q~nIQAXR*1MBavz2Cg~XuK6Y-i{rFOX1&f?L@)^KJgBq-uG5d?83 z3g^F!Pk+Ptde)a#m;UGc48Hs~m)^)uGM`K`*@XDX84l-X`?Evbr(q`BFMb+15A!E- z1n?&Znd~6{r2it!pY}6=KOJOnJ|t!{($gbM_Nel7Ou_m0B*4>s9&09_R^Vi>@lrtir= z3a&E-7;!*-0Y^4;XGldFZz)EkZDCUbh59bv?c_%4BAmm}=t J4g&t=@qf_iNKya* literal 0 HcmV?d00001 diff --git a/sidecar/sidecar/__pycache__/main.cpython-313.pyc b/sidecar/sidecar/__pycache__/main.cpython-313.pyc new file mode 100644 index 0000000000000000000000000000000000000000..4396af1de1734ba19a78ced1fa337eb4af1751d5 GIT binary patch literal 1256 zcmbtT%}*0S6rbJg?si*>M1fkoG|0h9=mw!NfFG#=KWIVIi5G0BYZ>UmcDK%M6?-tC z2R+(@>6IhiJoYaz9z0-rkc^P{5f0oSJ$Z9xy9HxROmvcY@ArQ5-p=g%Xof-oK-c^G z@APdRfbYy`FUbSVZxp-+5I`;kFw=aBo8~be1yF##lrZhXqK74lWe@u)R-z!w_X8+G zX^g{xY#pLrG{%S|FmUVmy&b7%l3n#OG*Bc12N_kcqJ7AmVSMimYNvnJe@TS5u;ZG{Y(6y{EZK zrt&%6%@0(Z1yNQG?w)(ZM242`_mvTUX8{l-tqJANLJ_i^3uo zQ)UTot&qU&bUHbc%-zkTE4>BBDUGYDVdXWWVA;<2@bKu!C<#u@T$`HA&D@@xN>{o} ztElIP+)NKSXw@Ogjj37hP=5FrnHS=Tn2$)dqd8@p1T4eQidwFW4H9&taz$$a8APF_ zXu+63pJEdknU8dAnM7DZ&KuBVX(VyAU=@*C!q%ehs7AXDRJ+v!N4lu#X1w%-NUb8` zGc>r1ZaYoIK3D?>5;zs!={;Rrt}fTis=1Nf8c8*J(>vivtx~Pjo>rf36t@yL8sVEe zJ>lA1b*?sFo!>~kjWl{D4i&LOIsl?5{jk||K1=o|2e?=#?x6;e7irgOB^?XY!mMR5 z>jBcy`rTF!V>a*PM7#fT9vftAsMUfVM+B`@BAvRKH_8jB?a0R0{VRN$AvR&Sj|KZL z(R`K-bOrZQlMRl&PQ`)1aolI1?13|TAhrwCT`>4n3T<_t|0oTv315Kl37q^0PQJRd z>95OgpKLKJxqU=m-JGZ|*Rw7?eN128jMUH7`&@dG=9Cv-gxA9_BMt7AmS6t|t+U2@laEbUNLBCU{67V{&Q|#jO#YA!BHU%DUt027Tl%RjeLDPh?@G&e!+9F#p0vso%c>E_vKS&h6`zSI zxcT0tq{%`DI2bi1?R&%y=p1)bk#?A$#sm^+Xo4#i*=&$n+rv-k0;l4g{nuRGh9HDD zAk_!aKA8I*OPubW?4ItO?0qbD@8^55LPxz(6KCjBJy$Q|=ke|x{mXnW`KYHp?kKl1 UgavpeT_=~XlFNUAV8fFC0Ig`CkN^Mx literal 0 HcmV?d00001 diff --git a/sidecar/sidecar/__pycache__/rerank.cpython-313.pyc b/sidecar/sidecar/__pycache__/rerank.cpython-313.pyc new file mode 100644 index 0000000000000000000000000000000000000000..7cda2117bde7afa9a80960b6c1f59455a39234da GIT binary patch literal 3839 zcmaJETTC3+_0DT&_6;mx3?>XVINioxz?j6~*sdQ?1NJ)IF_j}$+KzVzSbLdS-2U!10*r0>2QOZ|`d05mTFYBmLyt1H_nc~KmyL{;+hBkkFrIWStLc^< zNc1Mo#s^cEuUzQ8G$r1;qv^+DUFjVS2q|BOY+eKGeUc=UQ3z!fhH|YW$tMMceHC_+ zo+OXHfNu`J6`qPoUg3{|!vH6gaZ=K|jq^A-k@`mYsALBd8dde13GG%vGp&sv$gl-~ z>7?z=80o?|=xfGpVI1#a3zm^j-TW1a`fVwd($#S-m9l-Q6zmpq2nSNBTLm>&-U+8t zBUx%%xvZ|~25fVtMQy;Ux^7r1+Q|egpUVP11V_^_p3h~$Mm4FFi6rTiYFRXU9c&D= zv;qh=;YaNDwlQN|Ys*t(Bx|+h)SKFvQ82YOGn>)UD&1bGbHP}Cl43YJ6W-E13e!EZ z?hm~?upW-gpP!vvAE5Ov|K6TD4%`3NQ)M3|Meq#o zctN1js3$7gAth~4Epwr4@QrE^WeZt7qfOYnrA=72U?PQc{dm+%D^V3p8XCa~>1iEK zwK!qs*uqH8P%XM25oooN`V9y*x)bu5D9bg3FZgc%y1(MROC@R~-21Zau(Q`?onZ8o zgHud;w!=~y6`~$H2j6QOXi{6GnpwzMW+?l@B2jE@!Z6+W$4-pxz zKSUDl?n~<_=3C{jjE4Z-aJgU+qBCtoXNTS5xm^Mo($6T&7|6ZN+~?v#fau&XWN>M~ zDUu8CtX*Z@BIKe?NCD2s_bM#VIz@Yj4xtc~;UNKLzEjXWLtFwAVAoKDYqeNj6-n{N zSj89P6@NF|4~6|}dWl0~0yA~jPy#)72vH%}hB2I+p#+r>Q14ONL!^^+%eck>%^s!R zEw~Bv`L3aal?sp@zHYXKNaVXr{G3GMUp15A zgYNDyW{NY8vm#N!SfIVAdz(mndW4+fn#c$PJYcFL#JLCWMzcf$#MLJQQ9A-!HvzXy z;`tKa8{7yJt(*GzJT*-7sHUeO1F2jp+*y58zMWO&E4iFHuF9jD4)xs9Vm_Z!Gfi1j z(_^yRs$_Lk&FZElTVtB+v@!Ymq^z1ZQ4Hdo!SPP?t@X-#fF+R=W*2Nb$& z8q_k8%_+HZMl;izo&fNKt7bT!)uH(tb$B6r^&$4 zj!Ja2$K-@lcQW#wtTiTIiC=mfsPw}4b&bLuzlWW;N1h7!d~S1qFR6U_o~dA)n$Nbm zM(G=MeYAlU1KuZoOB>Il!3tEf{UA1#hdz@{TT$MrwzdcQ=Xl<-JGgne^VK4fvOyjG7dLr!%UdSCR5C3TY3|s;d(|rZP%6i`O&EW^5*HGg+I1kfSj?iAS&O*xkf-Aa9S7 z^*kk;(LvZlH<@?Av>RZkVu%DQ=K5#*p9ESzf|t;GFg)G&Ww>T;;wKXe9ZzeU7P(&r zKMp>wjm`GWoGtQfr2bibai-fBWy&zn5Xy!suA{Et7!cD8;rp3;+@ZqPm zhZfH-o&L1t_tD=)KW$iv_O8|TtyJ_qi|qTb`-ASEo&5032WJ+0mgrjK*ptYK)yRpp zNDqi%`QU-4b&U&|hc_PF_(g81KJ)-^1g|0J?_b*XEuxnr&F*t`Ho1|xHq zXD@#gTe|gG>uUYQ$ASLmN6Egn7aXamo0}*eBZB|l;N8L1>eeM|dT^CLzApOiP2QVa zuiUrbUpRyoSgCB8@qFpuzj%1n|N095`oB&B@AE3S)K^=zq^9W;-%8}QO~M4>S=sPY zp~wOBpBGW*fVqj8iT|2E0$2R_+kLMxzh-)4p5Jf){r0TX7vw%a!Sr@WpLd^afR8`a z9P91mABl|sKWgy<(W5q|&(A+V=&b6K#ASg6cv)l+mN=IHuxxUKZGVmkr%?P4Rz^Y6 literal 0 HcmV?d00001 diff --git a/sidecar/sidecar/embed.py b/sidecar/sidecar/embed.py new file mode 100644 index 0000000..2ff9d7b --- /dev/null +++ b/sidecar/sidecar/embed.py @@ -0,0 +1,44 @@ +import os + +from fastapi import APIRouter, HTTPException +from pydantic import BaseModel + +from .ollama import client + +router = APIRouter() + +EMBED_MODEL = os.environ.get("EMBED_MODEL", "nomic-embed-text") + + +class EmbedRequest(BaseModel): + texts: list[str] + model: str | None = None + + +class EmbedResponse(BaseModel): + embeddings: list[list[float]] + model: str + dimensions: int + + +@router.post("", response_model=EmbedResponse) +async def embed(req: EmbedRequest): + model = req.model or EMBED_MODEL + embeddings = [] + + async with client() as c: + for text in req.texts: + resp = await c.post("/api/embed", json={"model": model, "input": text}) + if resp.status_code != 200: + raise HTTPException(502, f"Ollama error: {resp.text}") + data = resp.json() + embeddings.extend(data.get("embeddings", [])) + + if not embeddings: + raise HTTPException(502, "No embeddings returned") + + return EmbedResponse( + embeddings=embeddings, + model=model, + dimensions=len(embeddings[0]), + ) diff --git a/sidecar/sidecar/generate.py b/sidecar/sidecar/generate.py new file mode 100644 index 0000000..e7bcc23 --- /dev/null +++ b/sidecar/sidecar/generate.py @@ -0,0 +1,55 @@ +import os + +from fastapi import APIRouter, HTTPException +from pydantic import BaseModel + +from .ollama import client + +router = APIRouter() + +GEN_MODEL = os.environ.get("GEN_MODEL", "qwen2.5") + + +class GenerateRequest(BaseModel): + prompt: str + model: str | None = None + system: str | None = None + temperature: float = 0.7 + max_tokens: int = 2048 + + +class GenerateResponse(BaseModel): + text: str + model: str + tokens_evaluated: int | None = None + tokens_generated: int | None = None + + +@router.post("", response_model=GenerateResponse) +async def generate(req: GenerateRequest): + model = req.model or GEN_MODEL + + payload = { + "model": model, + "prompt": req.prompt, + "stream": False, + "options": { + "temperature": req.temperature, + "num_predict": req.max_tokens, + }, + } + if req.system: + payload["system"] = req.system + + async with client() as c: + resp = await c.post("/api/generate", json=payload) + if resp.status_code != 200: + raise HTTPException(502, f"Ollama error: {resp.text}") + data = resp.json() + + return GenerateResponse( + text=data.get("response", ""), + model=model, + tokens_evaluated=data.get("prompt_eval_count"), + tokens_generated=data.get("eval_count"), + ) diff --git a/sidecar/sidecar/main.py b/sidecar/sidecar/main.py new file mode 100644 index 0000000..9555de2 --- /dev/null +++ b/sidecar/sidecar/main.py @@ -0,0 +1,23 @@ +import os + +from fastapi import FastAPI + +from .embed import router as embed_router +from .generate import router as generate_router +from .rerank import router as rerank_router + +app = FastAPI(title="Lakehouse AI Sidecar") + +app.include_router(embed_router, prefix="/embed", tags=["embed"]) +app.include_router(generate_router, prefix="/generate", tags=["generate"]) +app.include_router(rerank_router, prefix="/rerank", tags=["rerank"]) + + +@app.get("/health") +async def health(): + return { + "status": "ok", + "ollama_url": os.environ.get("OLLAMA_URL", "http://localhost:11434"), + "embed_model": os.environ.get("EMBED_MODEL", "nomic-embed-text"), + "gen_model": os.environ.get("GEN_MODEL", "qwen2.5"), + } diff --git a/sidecar/sidecar/ollama.py b/sidecar/sidecar/ollama.py new file mode 100644 index 0000000..bccd7d3 --- /dev/null +++ b/sidecar/sidecar/ollama.py @@ -0,0 +1,12 @@ +"""Shared Ollama HTTP client.""" + +import os + +import httpx + +OLLAMA_URL = os.environ.get("OLLAMA_URL", "http://localhost:11434") +TIMEOUT = float(os.environ.get("OLLAMA_TIMEOUT", "120")) + + +def client() -> httpx.AsyncClient: + return httpx.AsyncClient(base_url=OLLAMA_URL, timeout=TIMEOUT) diff --git a/sidecar/sidecar/rerank.py b/sidecar/sidecar/rerank.py new file mode 100644 index 0000000..eb61a46 --- /dev/null +++ b/sidecar/sidecar/rerank.py @@ -0,0 +1,70 @@ +import os + +from fastapi import APIRouter, HTTPException +from pydantic import BaseModel + +from .ollama import client + +router = APIRouter() + +RERANK_MODEL = os.environ.get("RERANK_MODEL", "qwen2.5") + + +class RerankRequest(BaseModel): + query: str + documents: list[str] + model: str | None = None + top_k: int | None = None + + +class ScoredDocument(BaseModel): + index: int + text: str + score: float + + +class RerankResponse(BaseModel): + results: list[ScoredDocument] + model: str + + +@router.post("", response_model=RerankResponse) +async def rerank(req: RerankRequest): + """Cross-encoder reranking via Ollama generate. + + Scores each document against the query by asking the model to rate relevance 0-10, + then sorts by score descending. + """ + model = req.model or RERANK_MODEL + scored = [] + + async with client() as c: + for i, doc in enumerate(req.documents): + prompt = ( + f"Rate the relevance of the following document to the query on a scale of 0 to 10. " + f"Respond with ONLY a number.\n\n" + f"Query: {req.query}\n\n" + f"Document: {doc}\n\n" + f"Score:" + ) + resp = await c.post( + "/api/generate", + json={"model": model, "prompt": prompt, "stream": False, "options": {"temperature": 0.0, "num_predict": 8}}, + ) + if resp.status_code != 200: + raise HTTPException(502, f"Ollama error: {resp.text}") + + text = resp.json().get("response", "").strip() + try: + score = float(text.split()[0]) + score = max(0.0, min(10.0, score)) + except (ValueError, IndexError): + score = 0.0 + + scored.append(ScoredDocument(index=i, text=doc, score=score)) + + scored.sort(key=lambda x: x.score, reverse=True) + if req.top_k: + scored = scored[: req.top_k] + + return RerankResponse(results=scored, model=model)