root 239e471223 Phase 3: AI integration with Ollama via Python sidecar
- sidecar: FastAPI app with /embed, /generate, /rerank hitting Ollama
- sidecar: Dockerfile, env var config (EMBED_MODEL, GEN_MODEL, RERANK_MODEL)
- aibridge: reqwest HTTP client with typed request/response structs
- aibridge: Axum proxy endpoints (POST /ai/embed, /ai/generate, /ai/rerank)
- gateway: wires AiClient with SIDECAR_URL env var
- e2e verified: nomic-embed-text returns 768d vectors, qwen2.5 generates text

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-27 05:53:56 -05:00

45 lines
1.1 KiB
Python

import os
from fastapi import APIRouter, HTTPException
from pydantic import BaseModel
from .ollama import client
router = APIRouter()
EMBED_MODEL = os.environ.get("EMBED_MODEL", "nomic-embed-text")
class EmbedRequest(BaseModel):
texts: list[str]
model: str | None = None
class EmbedResponse(BaseModel):
embeddings: list[list[float]]
model: str
dimensions: int
@router.post("", response_model=EmbedResponse)
async def embed(req: EmbedRequest):
model = req.model or EMBED_MODEL
embeddings = []
async with client() as c:
for text in req.texts:
resp = await c.post("/api/embed", json={"model": model, "input": text})
if resp.status_code != 200:
raise HTTPException(502, f"Ollama error: {resp.text}")
data = resp.json()
embeddings.extend(data.get("embeddings", []))
if not embeddings:
raise HTTPException(502, "No embeddings returned")
return EmbedResponse(
embeddings=embeddings,
model=model,
dimensions=len(embeddings[0]),
)