golangLAKEHOUSE/internal/embed/ollama_test.go
root 9ee7fc5550 G2: embedd — text → vector via Ollama · 2 scrum fixes
Bridges the missing piece for the staffing co-pilot: text inputs to
vectord-shaped vectors. Standalone cmd/embedd on :3216 fronted by
gateway at /v1/embed. Pluggable embed.Provider interface (G2 ships
Ollama; OpenAI/Voyage swap in via the same interface in G3+).

Wire format:
  POST /v1/embed {"texts":[...], "model":"..."}  // model optional
  → 200 {"model","dimension","vectors":[[...]]}

Default model: nomic-embed-text (768-d). Ollama returns float64;
provider converts to float32 at the boundary so vectors flow through
vectord/HNSW without re-conversion.

Acceptance smoke 5/5 PASS — including the architectural payoff:
end-to-end embed → vectord add → search by re-embedded text returns
recall=1 at distance 5.96e-8 (float32 precision noise on identical
unit vectors). The staffing co-pilot pipeline (text → vector →
similarity search) is now functional end-to-end.

All 9 smokes (D1-D6 + G1 + G1P + G2) PASS deterministically.

Cross-lineage scrum on shipped code:
  - Opus 4.7 (opencode):                    0 BLOCK + 4 WARN + 3 INFO
  - Kimi K2-0905 (openrouter):              0 BLOCK + 2 WARN + 1 INFO
  - Qwen3-coder (openrouter):               "No BLOCKs" (3 tokens)

Fixed (2 — 1 convergent + 1 single-reviewer):
  C1 (Opus + Kimi convergent WARN): per-text 60s timeout × N-text
    batch was up to N×60s with no batch-level cap. One stuck Ollama
    call would stall the whole handler indefinitely. Fix:
    context.WithTimeout(r.Context(), 60s) wraps the entire batch.
  O-W3 (Opus WARN): empty strings in texts went to Ollama unchecked,
    producing version-dependent garbage. Fix: reject "" with 400 at
    the handler boundary so callers get a deterministic answer
    instead of an upstream-conditional 502.

Deferred (4): drainAndClose 64KiB cap (matches G0 pattern), no
concurrency limit on /embed (single-tenant G2), missing Accept
header (exotic-proxy concern), MaxBytesError string-match
redundancy (paranoia layer kept consistent across codebase).

Zero false positives this round — Qwen returned 3 tokens "No BLOCKs"
and the other two reviewers' findings were all real.

Setup confirmed: Ollama 0.21.0 on :11434 with nomic-embed-text loaded.
Per-text /api/embeddings used (forward-compat with 0.21+); newer
0.4+ /api/embed batch endpoint can swap in via the Provider interface.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-29 01:42:27 -05:00

116 lines
3.5 KiB
Go

package embed
import (
"context"
"encoding/json"
"errors"
"net/http"
"net/http/httptest"
"strings"
"sync"
"testing"
)
func TestOllama_EmbedBatch_PreservesOrder(t *testing.T) {
var mu sync.Mutex
var seenPrompts []string
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
var req ollamaRequest
_ = json.NewDecoder(r.Body).Decode(&req)
mu.Lock()
seenPrompts = append(seenPrompts, req.Prompt)
mu.Unlock()
// Return a vector that encodes which prompt this was, so
// we can assert order at the caller. 4-d vector for cheap.
var vec [4]float64
switch req.Prompt {
case "alpha":
vec = [4]float64{1, 0, 0, 0}
case "beta":
vec = [4]float64{0, 1, 0, 0}
case "gamma":
vec = [4]float64{0, 0, 1, 0}
}
_ = json.NewEncoder(w).Encode(map[string]any{"embedding": vec[:]})
}))
defer srv.Close()
p := NewOllama(srv.URL, "test-model")
res, err := p.Embed(context.Background(), []string{"alpha", "beta", "gamma"}, "")
if err != nil {
t.Fatal(err)
}
if res.Model != "test-model" || res.Dimension != 4 || len(res.Vectors) != 3 {
t.Fatalf("Result: got %+v", res)
}
if res.Vectors[0][0] != 1 || res.Vectors[1][1] != 1 || res.Vectors[2][2] != 1 {
t.Errorf("vectors out of order: %v", res.Vectors)
}
// Sanity: all three prompts hit the server.
if len(seenPrompts) != 3 {
t.Errorf("expected 3 upstream calls, got %d", len(seenPrompts))
}
}
func TestOllama_EmptyTextsErrors(t *testing.T) {
p := NewOllama("http://nope:0", "x")
_, err := p.Embed(context.Background(), nil, "")
if !errors.Is(err, ErrEmptyTexts) {
t.Errorf("expected ErrEmptyTexts, got %v", err)
}
}
func TestOllama_NoModelNoDefault(t *testing.T) {
p := NewOllama("http://nope:0", "") // empty default
_, err := p.Embed(context.Background(), []string{"hi"}, "")
if err == nil || !strings.Contains(err.Error(), "no model") {
t.Errorf("expected no-model error, got %v", err)
}
}
func TestOllama_UpstreamErrorPropagates(t *testing.T) {
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) {
http.Error(w, "model not loaded", http.StatusInternalServerError)
}))
defer srv.Close()
p := NewOllama(srv.URL, "x")
_, err := p.Embed(context.Background(), []string{"hi"}, "")
if err == nil || !strings.Contains(err.Error(), "upstream status 500") {
t.Errorf("expected wrapped 500 error, got %v", err)
}
}
func TestOllama_DimensionMismatchMidBatch(t *testing.T) {
calls := 0
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) {
calls++
// First call returns 4-d, second returns 8-d → server changed
// model under us. Provider should ErrModelMismatch.
var v []float64
if calls == 1 {
v = []float64{1, 0, 0, 0}
} else {
v = []float64{1, 0, 0, 0, 0, 0, 0, 0}
}
_ = json.NewEncoder(w).Encode(map[string]any{"embedding": v})
}))
defer srv.Close()
p := NewOllama(srv.URL, "x")
_, err := p.Embed(context.Background(), []string{"a", "b"}, "")
if !errors.Is(err, ErrModelMismatch) {
t.Errorf("expected ErrModelMismatch, got %v", err)
}
}
func TestOllama_EmptyEmbeddingErrors(t *testing.T) {
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) {
_ = json.NewEncoder(w).Encode(map[string]any{"embedding": []float64{}})
}))
defer srv.Close()
p := NewOllama(srv.URL, "x")
_, err := p.Embed(context.Background(), []string{"hi"}, "")
if err == nil || !strings.Contains(err.Error(), "empty embedding") {
t.Errorf("expected empty-embedding error, got %v", err)
}
}