Bridges the missing piece for the staffing co-pilot: text inputs to
vectord-shaped vectors. Standalone cmd/embedd on :3216 fronted by
gateway at /v1/embed. Pluggable embed.Provider interface (G2 ships
Ollama; OpenAI/Voyage swap in via the same interface in G3+).
Wire format:
POST /v1/embed {"texts":[...], "model":"..."} // model optional
→ 200 {"model","dimension","vectors":[[...]]}
Default model: nomic-embed-text (768-d). Ollama returns float64;
provider converts to float32 at the boundary so vectors flow through
vectord/HNSW without re-conversion.
Acceptance smoke 5/5 PASS — including the architectural payoff:
end-to-end embed → vectord add → search by re-embedded text returns
recall=1 at distance 5.96e-8 (float32 precision noise on identical
unit vectors). The staffing co-pilot pipeline (text → vector →
similarity search) is now functional end-to-end.
All 9 smokes (D1-D6 + G1 + G1P + G2) PASS deterministically.
Cross-lineage scrum on shipped code:
- Opus 4.7 (opencode): 0 BLOCK + 4 WARN + 3 INFO
- Kimi K2-0905 (openrouter): 0 BLOCK + 2 WARN + 1 INFO
- Qwen3-coder (openrouter): "No BLOCKs" (3 tokens)
Fixed (2 — 1 convergent + 1 single-reviewer):
C1 (Opus + Kimi convergent WARN): per-text 60s timeout × N-text
batch was up to N×60s with no batch-level cap. One stuck Ollama
call would stall the whole handler indefinitely. Fix:
context.WithTimeout(r.Context(), 60s) wraps the entire batch.
O-W3 (Opus WARN): empty strings in texts went to Ollama unchecked,
producing version-dependent garbage. Fix: reject "" with 400 at
the handler boundary so callers get a deterministic answer
instead of an upstream-conditional 502.
Deferred (4): drainAndClose 64KiB cap (matches G0 pattern), no
concurrency limit on /embed (single-tenant G2), missing Accept
header (exotic-proxy concern), MaxBytesError string-match
redundancy (paranoia layer kept consistent across codebase).
Zero false positives this round — Qwen returned 3 tokens "No BLOCKs"
and the other two reviewers' findings were all real.
Setup confirmed: Ollama 0.21.0 on :11434 with nomic-embed-text loaded.
Per-text /api/embeddings used (forward-compat with 0.21+); newer
0.4+ /api/embed batch endpoint can swap in via the Provider interface.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
116 lines
3.5 KiB
Go
116 lines
3.5 KiB
Go
package embed
|
|
|
|
import (
|
|
"context"
|
|
"encoding/json"
|
|
"errors"
|
|
"net/http"
|
|
"net/http/httptest"
|
|
"strings"
|
|
"sync"
|
|
"testing"
|
|
)
|
|
|
|
func TestOllama_EmbedBatch_PreservesOrder(t *testing.T) {
|
|
var mu sync.Mutex
|
|
var seenPrompts []string
|
|
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
|
var req ollamaRequest
|
|
_ = json.NewDecoder(r.Body).Decode(&req)
|
|
mu.Lock()
|
|
seenPrompts = append(seenPrompts, req.Prompt)
|
|
mu.Unlock()
|
|
// Return a vector that encodes which prompt this was, so
|
|
// we can assert order at the caller. 4-d vector for cheap.
|
|
var vec [4]float64
|
|
switch req.Prompt {
|
|
case "alpha":
|
|
vec = [4]float64{1, 0, 0, 0}
|
|
case "beta":
|
|
vec = [4]float64{0, 1, 0, 0}
|
|
case "gamma":
|
|
vec = [4]float64{0, 0, 1, 0}
|
|
}
|
|
_ = json.NewEncoder(w).Encode(map[string]any{"embedding": vec[:]})
|
|
}))
|
|
defer srv.Close()
|
|
|
|
p := NewOllama(srv.URL, "test-model")
|
|
res, err := p.Embed(context.Background(), []string{"alpha", "beta", "gamma"}, "")
|
|
if err != nil {
|
|
t.Fatal(err)
|
|
}
|
|
if res.Model != "test-model" || res.Dimension != 4 || len(res.Vectors) != 3 {
|
|
t.Fatalf("Result: got %+v", res)
|
|
}
|
|
if res.Vectors[0][0] != 1 || res.Vectors[1][1] != 1 || res.Vectors[2][2] != 1 {
|
|
t.Errorf("vectors out of order: %v", res.Vectors)
|
|
}
|
|
// Sanity: all three prompts hit the server.
|
|
if len(seenPrompts) != 3 {
|
|
t.Errorf("expected 3 upstream calls, got %d", len(seenPrompts))
|
|
}
|
|
}
|
|
|
|
func TestOllama_EmptyTextsErrors(t *testing.T) {
|
|
p := NewOllama("http://nope:0", "x")
|
|
_, err := p.Embed(context.Background(), nil, "")
|
|
if !errors.Is(err, ErrEmptyTexts) {
|
|
t.Errorf("expected ErrEmptyTexts, got %v", err)
|
|
}
|
|
}
|
|
|
|
func TestOllama_NoModelNoDefault(t *testing.T) {
|
|
p := NewOllama("http://nope:0", "") // empty default
|
|
_, err := p.Embed(context.Background(), []string{"hi"}, "")
|
|
if err == nil || !strings.Contains(err.Error(), "no model") {
|
|
t.Errorf("expected no-model error, got %v", err)
|
|
}
|
|
}
|
|
|
|
func TestOllama_UpstreamErrorPropagates(t *testing.T) {
|
|
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) {
|
|
http.Error(w, "model not loaded", http.StatusInternalServerError)
|
|
}))
|
|
defer srv.Close()
|
|
p := NewOllama(srv.URL, "x")
|
|
_, err := p.Embed(context.Background(), []string{"hi"}, "")
|
|
if err == nil || !strings.Contains(err.Error(), "upstream status 500") {
|
|
t.Errorf("expected wrapped 500 error, got %v", err)
|
|
}
|
|
}
|
|
|
|
func TestOllama_DimensionMismatchMidBatch(t *testing.T) {
|
|
calls := 0
|
|
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) {
|
|
calls++
|
|
// First call returns 4-d, second returns 8-d → server changed
|
|
// model under us. Provider should ErrModelMismatch.
|
|
var v []float64
|
|
if calls == 1 {
|
|
v = []float64{1, 0, 0, 0}
|
|
} else {
|
|
v = []float64{1, 0, 0, 0, 0, 0, 0, 0}
|
|
}
|
|
_ = json.NewEncoder(w).Encode(map[string]any{"embedding": v})
|
|
}))
|
|
defer srv.Close()
|
|
p := NewOllama(srv.URL, "x")
|
|
_, err := p.Embed(context.Background(), []string{"a", "b"}, "")
|
|
if !errors.Is(err, ErrModelMismatch) {
|
|
t.Errorf("expected ErrModelMismatch, got %v", err)
|
|
}
|
|
}
|
|
|
|
func TestOllama_EmptyEmbeddingErrors(t *testing.T) {
|
|
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) {
|
|
_ = json.NewEncoder(w).Encode(map[string]any{"embedding": []float64{}})
|
|
}))
|
|
defer srv.Close()
|
|
p := NewOllama(srv.URL, "x")
|
|
_, err := p.Embed(context.Background(), []string{"hi"}, "")
|
|
if err == nil || !strings.Contains(err.Error(), "empty embedding") {
|
|
t.Errorf("expected empty-embedding error, got %v", err)
|
|
}
|
|
}
|