- sidecar: FastAPI app with /embed, /generate, /rerank hitting Ollama - sidecar: Dockerfile, env var config (EMBED_MODEL, GEN_MODEL, RERANK_MODEL) - aibridge: reqwest HTTP client with typed request/response structs - aibridge: Axum proxy endpoints (POST /ai/embed, /ai/generate, /ai/rerank) - gateway: wires AiClient with SIDECAR_URL env var - e2e verified: nomic-embed-text returns 768d vectors, qwen2.5 generates text Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
45 lines
1.1 KiB
Python
45 lines
1.1 KiB
Python
import os
|
|
|
|
from fastapi import APIRouter, HTTPException
|
|
from pydantic import BaseModel
|
|
|
|
from .ollama import client
|
|
|
|
router = APIRouter()
|
|
|
|
EMBED_MODEL = os.environ.get("EMBED_MODEL", "nomic-embed-text")
|
|
|
|
|
|
class EmbedRequest(BaseModel):
|
|
texts: list[str]
|
|
model: str | None = None
|
|
|
|
|
|
class EmbedResponse(BaseModel):
|
|
embeddings: list[list[float]]
|
|
model: str
|
|
dimensions: int
|
|
|
|
|
|
@router.post("", response_model=EmbedResponse)
|
|
async def embed(req: EmbedRequest):
|
|
model = req.model or EMBED_MODEL
|
|
embeddings = []
|
|
|
|
async with client() as c:
|
|
for text in req.texts:
|
|
resp = await c.post("/api/embed", json={"model": model, "input": text})
|
|
if resp.status_code != 200:
|
|
raise HTTPException(502, f"Ollama error: {resp.text}")
|
|
data = resp.json()
|
|
embeddings.extend(data.get("embeddings", []))
|
|
|
|
if not embeddings:
|
|
raise HTTPException(502, "No embeddings returned")
|
|
|
|
return EmbedResponse(
|
|
embeddings=embeddings,
|
|
model=model,
|
|
dimensions=len(embeddings[0]),
|
|
)
|