G2: embedd — text → vector via Ollama · 2 scrum fixes

Bridges the missing piece for the staffing co-pilot: text inputs to
vectord-shaped vectors. Standalone cmd/embedd on :3216 fronted by
gateway at /v1/embed. Pluggable embed.Provider interface (G2 ships
Ollama; OpenAI/Voyage swap in via the same interface in G3+).

Wire format:
  POST /v1/embed {"texts":[...], "model":"..."}  // model optional
  → 200 {"model","dimension","vectors":[[...]]}

Default model: nomic-embed-text (768-d). Ollama returns float64;
provider converts to float32 at the boundary so vectors flow through
vectord/HNSW without re-conversion.

Acceptance smoke 5/5 PASS — including the architectural payoff:
end-to-end embed → vectord add → search by re-embedded text returns
recall=1 at distance 5.96e-8 (float32 precision noise on identical
unit vectors). The staffing co-pilot pipeline (text → vector →
similarity search) is now functional end-to-end.

All 9 smokes (D1-D6 + G1 + G1P + G2) PASS deterministically.

Cross-lineage scrum on shipped code:
  - Opus 4.7 (opencode):                    0 BLOCK + 4 WARN + 3 INFO
  - Kimi K2-0905 (openrouter):              0 BLOCK + 2 WARN + 1 INFO
  - Qwen3-coder (openrouter):               "No BLOCKs" (3 tokens)

Fixed (2 — 1 convergent + 1 single-reviewer):
  C1 (Opus + Kimi convergent WARN): per-text 60s timeout × N-text
    batch was up to N×60s with no batch-level cap. One stuck Ollama
    call would stall the whole handler indefinitely. Fix:
    context.WithTimeout(r.Context(), 60s) wraps the entire batch.
  O-W3 (Opus WARN): empty strings in texts went to Ollama unchecked,
    producing version-dependent garbage. Fix: reject "" with 400 at
    the handler boundary so callers get a deterministic answer
    instead of an upstream-conditional 502.

Deferred (4): drainAndClose 64KiB cap (matches G0 pattern), no
concurrency limit on /embed (single-tenant G2), missing Accept
header (exotic-proxy concern), MaxBytesError string-match
redundancy (paranoia layer kept consistent across codebase).

Zero false positives this round — Qwen returned 3 tokens "No BLOCKs"
and the other two reviewers' findings were all real.

Setup confirmed: Ollama 0.21.0 on :11434 with nomic-embed-text loaded.
Per-text /api/embeddings used (forward-compat with 0.21+); newer
0.4+ /api/embed batch endpoint can swap in via the Provider interface.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
root 2026-04-29 01:42:27 -05:00
parent 8b92518d21
commit 9ee7fc5550
8 changed files with 624 additions and 3 deletions

140
cmd/embedd/main.go Normal file
View File

@ -0,0 +1,140 @@
// embedd is the embedding service. Turns text into vectors via a
// pluggable Provider (G2: Ollama at :11434). Vectors flow through
// the rest of the stack as float32 — see internal/embed for the
// boundary conversion. Default model is config-resolved; callers
// can override per request.
package main
import (
"context"
"encoding/json"
"errors"
"flag"
"log/slog"
"net/http"
"os"
"strings"
"time"
"github.com/go-chi/chi/v5"
"git.agentview.dev/profit/golangLAKEHOUSE/internal/embed"
"git.agentview.dev/profit/golangLAKEHOUSE/internal/shared"
)
const (
maxRequestBytes = 4 << 20 // 4 MiB cap on /embed body — texts plural
batchDeadline = 60 * time.Second // upper bound on a single /embed batch
)
func main() {
configPath := flag.String("config", "lakehouse.toml", "path to TOML config")
flag.Parse()
cfg, err := shared.LoadConfig(*configPath)
if err != nil {
slog.Error("config", "err", err)
os.Exit(1)
}
if cfg.Embedd.ProviderURL == "" {
slog.Error("config", "err", "embedd.provider_url is required")
os.Exit(1)
}
h := &handlers{
provider: embed.NewOllama(cfg.Embedd.ProviderURL, cfg.Embedd.DefaultModel),
}
if err := shared.Run("embedd", cfg.Embedd.Bind, h.register); err != nil {
slog.Error("server", "err", err)
os.Exit(1)
}
}
type handlers struct {
provider embed.Provider
}
func (h *handlers) register(r chi.Router) {
r.Post("/embed", h.handleEmbed)
}
// embedRequest is the POST /embed body. Texts is the list to
// embed; Model is optional (empty → use server default).
type embedRequest struct {
Texts []string `json:"texts"`
Model string `json:"model,omitempty"`
}
func (h *handlers) handleEmbed(w http.ResponseWriter, r *http.Request) {
defer r.Body.Close()
r.Body = http.MaxBytesReader(w, r.Body, maxRequestBytes)
var req embedRequest
if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
var maxErr *http.MaxBytesError
if errors.As(err, &maxErr) || strings.Contains(err.Error(), "http: request body too large") {
http.Error(w, "body too large", http.StatusRequestEntityTooLarge)
return
}
http.Error(w, "decode body: "+err.Error(), http.StatusBadRequest)
return
}
// Per scrum O-W3 (Opus): reject empty strings up front. Ollama's
// behavior on empty prompt is version-dependent (some return
// errors, some return zero vectors); rejecting at the boundary
// gives callers a deterministic 400 instead of 502.
for j, t := range req.Texts {
if t == "" {
http.Error(w, "texts["+itoa(j)+"]: empty string", http.StatusBadRequest)
return
}
}
// Per scrum C1 (Opus + Kimi convergent): per-text 60s timeout
// without a batch-level cap means a 100-text batch with one
// stuck call can pin the handler for ~6000s. Set a hard batch
// ceiling derived from the request ctx so a wedged Ollama
// surfaces as 504-ish (mapped to 502 by the upstream-error
// path below) rather than holding the connection forever.
ctx, cancel := context.WithTimeout(r.Context(), batchDeadline)
defer cancel()
res, err := h.provider.Embed(ctx, req.Texts, req.Model)
if errors.Is(err, embed.ErrEmptyTexts) {
http.Error(w, err.Error(), http.StatusBadRequest)
return
}
if errors.Is(err, embed.ErrModelMismatch) {
http.Error(w, err.Error(), http.StatusBadGateway)
return
}
if err != nil {
// Upstream-shape errors (Ollama down, model missing,
// 5xx body) bubble up as 502 — distinguishes "your input
// was wrong" (400) from "the embedding backend was wrong" (502).
slog.Warn("embed", "err", err)
http.Error(w, "embed: "+err.Error(), http.StatusBadGateway)
return
}
w.Header().Set("Content-Type", "application/json")
if err := json.NewEncoder(w).Encode(res); err != nil {
slog.Warn("embed encode", "err", err)
}
}
// itoa is a tiny helper for error messages without pulling strconv
// in just for one call site.
func itoa(i int) string {
if i == 0 {
return "0"
}
var buf [20]byte
pos := len(buf)
for i > 0 {
pos--
buf[pos] = byte('0' + i%10)
i /= 10
}
return string(buf[pos:])
}

View File

@ -43,6 +43,7 @@ func main() {
"ingestd_url": cfg.Gateway.IngestdURL,
"queryd_url": cfg.Gateway.QuerydURL,
"vectord_url": cfg.Gateway.VectordURL,
"embedd_url": cfg.Gateway.EmbeddURL,
}
for k, v := range upstreams {
if v == "" {
@ -61,12 +62,14 @@ func main() {
ingestdURL := mustParseUpstream("ingestd_url", cfg.Gateway.IngestdURL)
querydURL := mustParseUpstream("queryd_url", cfg.Gateway.QuerydURL)
vectordURL := mustParseUpstream("vectord_url", cfg.Gateway.VectordURL)
embeddURL := mustParseUpstream("embedd_url", cfg.Gateway.EmbeddURL)
storagedProxy := gateway.NewProxyHandler(storagedURL)
catalogdProxy := gateway.NewProxyHandler(catalogdURL)
ingestdProxy := gateway.NewProxyHandler(ingestdURL)
querydProxy := gateway.NewProxyHandler(querydURL)
vectordProxy := gateway.NewProxyHandler(vectordURL)
embeddProxy := gateway.NewProxyHandler(embeddURL)
if err := shared.Run("gateway", cfg.Gateway.Bind, func(r chi.Router) {
// Storage / catalog have multi-segment paths under their
@ -82,6 +85,8 @@ func main() {
r.Handle("/v1/sql", querydProxy)
// Vector search routes — /v1/vectors/index, /v1/vectors/index/{name}/...
r.Handle("/v1/vectors/*", vectordProxy)
// Embedding service — /v1/embed
r.Handle("/v1/embed", embeddProxy)
}); err != nil {
slog.Error("server", "err", err)
os.Exit(1)

43
internal/embed/embed.go Normal file
View File

@ -0,0 +1,43 @@
// Package embed turns text into vectors. G2 ships an Ollama-backed
// Provider (local model on :11434, no network call to a hosted
// service); the Provider interface keeps the door open for OpenAI /
// Voyage / etc. without per-call switching at the HTTP boundary.
//
// Vectors are float32 throughout the system (matches vectord +
// coder/hnsw types). Ollama returns float64; we convert at this
// package's boundary so callers don't have to think about it.
package embed
import (
"context"
"errors"
)
// Provider is the embed boundary. Embed takes a list of input
// texts and returns the same number of vectors, in input order,
// using the named model. Empty model = use the Provider's default.
type Provider interface {
Embed(ctx context.Context, texts []string, model string) (Result, error)
}
// Result is the per-call response shape. Model echoes which model
// was actually used (after default-resolution); Dimension is the
// vector dimension reported by the model. The Provider guarantees
// every Vector in Vectors has Dimension components.
type Result struct {
Model string `json:"model"`
Dimension int `json:"dimension"`
Vectors [][]float32 `json:"vectors"`
}
// ErrEmptyTexts is returned when the caller passed no inputs.
// Empty-batch is an error because returning an empty result
// silently wastes the round trip and hides caller bugs.
var ErrEmptyTexts = errors.New("embed: empty texts")
// ErrModelMismatch is returned when one call to the upstream
// returns a different vector dimension than the previous call in
// the same batch — only possible if the model actually switched
// mid-batch (load swap on the server). Surfaced as a 502 by
// callers because the upstream's behavior was inconsistent.
var ErrModelMismatch = errors.New("embed: dimension changed mid-batch")

127
internal/embed/ollama.go Normal file
View File

@ -0,0 +1,127 @@
// ollama.go — Provider backed by an Ollama HTTP server. Compatible
// with Ollama 0.21+ via the per-text /api/embeddings endpoint.
// Newer Ollama (0.4+) exposes /api/embed for batched calls, but
// the per-text loop is forward-compatible with both.
package embed
import (
"bytes"
"context"
"encoding/json"
"fmt"
"io"
"net/http"
"strings"
"time"
)
// OllamaProvider hits an Ollama server at the configured base URL.
type OllamaProvider struct {
baseURL string
defaultModel string
hc *http.Client
}
// NewOllama builds a provider against baseURL (e.g.
// "http://localhost:11434"). defaultModel is what gets used when
// callers pass an empty model name.
func NewOllama(baseURL, defaultModel string) *OllamaProvider {
return &OllamaProvider{
baseURL: strings.TrimRight(baseURL, "/"),
defaultModel: defaultModel,
hc: &http.Client{
// Embeddings are CPU-bound on the server side; 60s gives
// plenty of headroom for a single-text call. Caller can
// add an outer ctx deadline for batch-level cap.
Timeout: 60 * time.Second,
},
}
}
// ollamaRequest is Ollama's /api/embeddings body shape.
type ollamaRequest struct {
Model string `json:"model"`
Prompt string `json:"prompt"`
}
// ollamaResponse mirrors the success body. Embedding is float64
// from Ollama; we convert to float32 at the boundary.
type ollamaResponse struct {
Embedding []float64 `json:"embedding"`
}
// Embed loops over texts, issuing one HTTP call per text. Errors
// short-circuit — if call N fails, we return the error and the
// caller sees no partial Result.
func (p *OllamaProvider) Embed(ctx context.Context, texts []string, model string) (Result, error) {
if len(texts) == 0 {
return Result{}, ErrEmptyTexts
}
if model == "" {
model = p.defaultModel
}
if model == "" {
return Result{}, fmt.Errorf("embed: no model (empty request, no default)")
}
out := Result{Model: model, Vectors: make([][]float32, 0, len(texts))}
for i, text := range texts {
vec, err := p.embedOne(ctx, model, text)
if err != nil {
return Result{}, fmt.Errorf("embed text[%d]: %w", i, err)
}
// Per-text vectors must agree on dimension. Lock on first.
if out.Dimension == 0 {
out.Dimension = len(vec)
} else if len(vec) != out.Dimension {
return Result{}, fmt.Errorf("%w: text[%d] returned %d, prior were %d",
ErrModelMismatch, i, len(vec), out.Dimension)
}
out.Vectors = append(out.Vectors, vec)
}
return out, nil
}
func (p *OllamaProvider) embedOne(ctx context.Context, model, text string) ([]float32, error) {
body, err := json.Marshal(ollamaRequest{Model: model, Prompt: text})
if err != nil {
return nil, fmt.Errorf("marshal: %w", err)
}
req, err := http.NewRequestWithContext(ctx, http.MethodPost,
p.baseURL+"/api/embeddings", bytes.NewReader(body))
if err != nil {
return nil, fmt.Errorf("req: %w", err)
}
req.Header.Set("Content-Type", "application/json")
req.ContentLength = int64(len(body))
resp, err := p.hc.Do(req)
if err != nil {
return nil, fmt.Errorf("do: %w", err)
}
defer drainAndClose(resp.Body)
if resp.StatusCode != http.StatusOK {
preview, _ := io.ReadAll(io.LimitReader(resp.Body, 256))
return nil, fmt.Errorf("upstream status %d: %s", resp.StatusCode, string(preview))
}
var or ollamaResponse
if err := json.NewDecoder(resp.Body).Decode(&or); err != nil {
return nil, fmt.Errorf("decode: %w", err)
}
if len(or.Embedding) == 0 {
return nil, fmt.Errorf("upstream returned empty embedding")
}
// Float64 → Float32. Loss of precision is acceptable for HNSW
// search; float32 matches the rest of the system.
out := make([]float32, len(or.Embedding))
for i, v := range or.Embedding {
out[i] = float32(v)
}
return out, nil
}
func drainAndClose(body io.ReadCloser) {
_, _ = io.Copy(io.Discard, io.LimitReader(body, 64<<10))
_ = body.Close()
}

View File

@ -0,0 +1,115 @@
package embed
import (
"context"
"encoding/json"
"errors"
"net/http"
"net/http/httptest"
"strings"
"sync"
"testing"
)
func TestOllama_EmbedBatch_PreservesOrder(t *testing.T) {
var mu sync.Mutex
var seenPrompts []string
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
var req ollamaRequest
_ = json.NewDecoder(r.Body).Decode(&req)
mu.Lock()
seenPrompts = append(seenPrompts, req.Prompt)
mu.Unlock()
// Return a vector that encodes which prompt this was, so
// we can assert order at the caller. 4-d vector for cheap.
var vec [4]float64
switch req.Prompt {
case "alpha":
vec = [4]float64{1, 0, 0, 0}
case "beta":
vec = [4]float64{0, 1, 0, 0}
case "gamma":
vec = [4]float64{0, 0, 1, 0}
}
_ = json.NewEncoder(w).Encode(map[string]any{"embedding": vec[:]})
}))
defer srv.Close()
p := NewOllama(srv.URL, "test-model")
res, err := p.Embed(context.Background(), []string{"alpha", "beta", "gamma"}, "")
if err != nil {
t.Fatal(err)
}
if res.Model != "test-model" || res.Dimension != 4 || len(res.Vectors) != 3 {
t.Fatalf("Result: got %+v", res)
}
if res.Vectors[0][0] != 1 || res.Vectors[1][1] != 1 || res.Vectors[2][2] != 1 {
t.Errorf("vectors out of order: %v", res.Vectors)
}
// Sanity: all three prompts hit the server.
if len(seenPrompts) != 3 {
t.Errorf("expected 3 upstream calls, got %d", len(seenPrompts))
}
}
func TestOllama_EmptyTextsErrors(t *testing.T) {
p := NewOllama("http://nope:0", "x")
_, err := p.Embed(context.Background(), nil, "")
if !errors.Is(err, ErrEmptyTexts) {
t.Errorf("expected ErrEmptyTexts, got %v", err)
}
}
func TestOllama_NoModelNoDefault(t *testing.T) {
p := NewOllama("http://nope:0", "") // empty default
_, err := p.Embed(context.Background(), []string{"hi"}, "")
if err == nil || !strings.Contains(err.Error(), "no model") {
t.Errorf("expected no-model error, got %v", err)
}
}
func TestOllama_UpstreamErrorPropagates(t *testing.T) {
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) {
http.Error(w, "model not loaded", http.StatusInternalServerError)
}))
defer srv.Close()
p := NewOllama(srv.URL, "x")
_, err := p.Embed(context.Background(), []string{"hi"}, "")
if err == nil || !strings.Contains(err.Error(), "upstream status 500") {
t.Errorf("expected wrapped 500 error, got %v", err)
}
}
func TestOllama_DimensionMismatchMidBatch(t *testing.T) {
calls := 0
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) {
calls++
// First call returns 4-d, second returns 8-d → server changed
// model under us. Provider should ErrModelMismatch.
var v []float64
if calls == 1 {
v = []float64{1, 0, 0, 0}
} else {
v = []float64{1, 0, 0, 0, 0, 0, 0, 0}
}
_ = json.NewEncoder(w).Encode(map[string]any{"embedding": v})
}))
defer srv.Close()
p := NewOllama(srv.URL, "x")
_, err := p.Embed(context.Background(), []string{"a", "b"}, "")
if !errors.Is(err, ErrModelMismatch) {
t.Errorf("expected ErrModelMismatch, got %v", err)
}
}
func TestOllama_EmptyEmbeddingErrors(t *testing.T) {
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) {
_ = json.NewEncoder(w).Encode(map[string]any{"embedding": []float64{}})
}))
defer srv.Close()
p := NewOllama(srv.URL, "x")
_, err := p.Embed(context.Background(), []string{"hi"}, "")
if err == nil || !strings.Contains(err.Error(), "empty embedding") {
t.Errorf("expected empty-embedding error, got %v", err)
}
}

View File

@ -25,6 +25,7 @@ type Config struct {
Ingestd IngestConfig `toml:"ingestd"`
Queryd QuerydConfig `toml:"queryd"`
Vectord VectordConfig `toml:"vectord"`
Embedd EmbeddConfig `toml:"embedd"`
S3 S3Config `toml:"s3"`
Log LogConfig `toml:"log"`
}
@ -48,9 +49,9 @@ type IngestConfig struct {
// GatewayConfig adds the upstream URLs the reverse proxy fronts.
// Each route family (/v1/storage, /v1/catalog, /v1/ingest, /v1/sql,
// /v1/vectors) has its own upstream so we can scale services
// independently or move them to different boxes without touching
// gateway code.
// /v1/vectors, /v1/embed) has its own upstream so we can scale
// services independently or move them to different boxes without
// touching gateway code.
type GatewayConfig struct {
Bind string `toml:"bind"`
StoragedURL string `toml:"storaged_url"`
@ -58,6 +59,17 @@ type GatewayConfig struct {
IngestdURL string `toml:"ingestd_url"`
QuerydURL string `toml:"queryd_url"`
VectordURL string `toml:"vectord_url"`
EmbeddURL string `toml:"embedd_url"`
}
// EmbeddConfig drives the embed service. ProviderURL points at the
// embedding backend (Ollama in G2, possibly OpenAI/Voyage in G3+).
// DefaultModel is what gets used when callers don't specify a
// model in their request body.
type EmbeddConfig struct {
Bind string `toml:"bind"`
ProviderURL string `toml:"provider_url"`
DefaultModel string `toml:"default_model"`
}
// VectordConfig adds vectord-specific knobs. StoragedURL is
@ -123,6 +135,7 @@ func DefaultConfig() Config {
IngestdURL: "http://127.0.0.1:3213",
QuerydURL: "http://127.0.0.1:3214",
VectordURL: "http://127.0.0.1:3215",
EmbeddURL: "http://127.0.0.1:3216",
},
Storaged: ServiceConfig{Bind: "127.0.0.1:3211"},
Catalogd: CatalogConfig{Bind: "127.0.0.1:3212", StoragedURL: "http://127.0.0.1:3211"},
@ -136,6 +149,11 @@ func DefaultConfig() Config {
Bind: "127.0.0.1:3215",
StoragedURL: "http://127.0.0.1:3211",
},
Embedd: EmbeddConfig{
Bind: "127.0.0.1:3216",
ProviderURL: "http://localhost:11434", // local Ollama
DefaultModel: "nomic-embed-text",
},
Queryd: QuerydConfig{
Bind: "127.0.0.1:3214",
CatalogdURL: "http://127.0.0.1:3212",

View File

@ -11,6 +11,7 @@ catalogd_url = "http://127.0.0.1:3212"
ingestd_url = "http://127.0.0.1:3213"
queryd_url = "http://127.0.0.1:3214"
vectord_url = "http://127.0.0.1:3215"
embedd_url = "http://127.0.0.1:3216"
[storaged]
bind = "127.0.0.1:3211"
@ -33,6 +34,13 @@ bind = "127.0.0.1:3215"
# Optional — set to empty string to disable persistence (dev/test).
storaged_url = "http://127.0.0.1:3211"
[embedd]
bind = "127.0.0.1:3216"
# G2: Ollama local. G3+ may swap in OpenAI/Voyage by changing
# this URL + the wire format inside the provider.
provider_url = "http://localhost:11434"
default_model = "nomic-embed-text"
[queryd]
bind = "127.0.0.1:3214"
catalogd_url = "http://127.0.0.1:3212"

165
scripts/g2_smoke.sh Executable file
View File

@ -0,0 +1,165 @@
#!/usr/bin/env bash
# G2 smoke — embedd service. All assertions go through gateway :3110.
#
# Validates:
# - POST /v1/embed with 2 texts → 200, dim=768 (nomic-embed-text),
# vectors[0] != vectors[1] (different texts → different vectors)
# - Same text twice → byte-identical vectors (deterministic)
# - Empty texts → 400
# - Bad model → 502 from upstream Ollama
# - End-to-end with vectord: embed text → store → search by text →
# same text round-trips at distance ≈ 0 (proves embed→vectord
# pipeline works)
#
# Requires Ollama running at :11434 with nomic-embed-text loaded.
#
# Usage: ./scripts/g2_smoke.sh
set -euo pipefail
cd "$(dirname "$0")/.."
export PATH="$PATH:/usr/local/go/bin"
echo "[g2-smoke] building embedd + vectord + gateway..."
go build -o bin/ ./cmd/embedd ./cmd/vectord ./cmd/gateway
pkill -f "bin/(embedd|vectord|gateway)" 2>/dev/null || true
sleep 0.3
PIDS=()
TMP="$(mktemp -d)"
cleanup() {
echo "[g2-smoke] cleanup"
for p in "${PIDS[@]}"; do [ -n "$p" ] && kill "$p" 2>/dev/null || true; done
rm -rf "$TMP"
}
trap cleanup EXIT INT TERM
poll_health() {
local port="$1" deadline=$(($(date +%s) + 5))
while [ "$(date +%s)" -lt "$deadline" ]; do
if curl -sS --max-time 1 "http://127.0.0.1:$port/health" >/dev/null 2>&1; then return 0; fi
sleep 0.05
done
return 1
}
# Verify Ollama is up before the test even starts — otherwise every
# embed call would 502 and the smoke would be misleading.
if ! curl -sS --max-time 3 http://localhost:11434/api/tags >/dev/null 2>&1; then
echo "[g2-smoke] Ollama not reachable on :11434 — skipping"
exit 0
fi
echo "[g2-smoke] launching embedd → vectord (no persist) → gateway..."
./bin/embedd > /tmp/embedd.log 2>&1 &
PIDS+=($!)
poll_health 3216 || { echo "embedd failed"; tail /tmp/embedd.log; exit 1; }
# vectord with persistence disabled (matches g1_smoke pattern —
# this smoke doesn't touch storaged).
./bin/vectord -config scripts/g1_smoke.toml > /tmp/vectord.log 2>&1 &
PIDS+=($!)
poll_health 3215 || { echo "vectord failed"; tail /tmp/vectord.log; exit 1; }
./bin/gateway > /tmp/gateway.log 2>&1 &
PIDS+=($!)
poll_health 3110 || { echo "gateway failed"; tail /tmp/gateway.log; exit 1; }
FAILED=0
echo "[g2-smoke] /v1/embed — two distinct texts:"
RESP="$(curl -sS -X POST http://127.0.0.1:3110/v1/embed \
-H 'Content-Type: application/json' \
-d '{"texts":["forklift operator with OSHA-30","CNC machinist precision parts"]}')"
DIM="$(echo "$RESP" | jq -r '.dimension')"
N="$(echo "$RESP" | jq -r '.vectors | length')"
MODEL="$(echo "$RESP" | jq -r '.model')"
SAME="$(echo "$RESP" | jq -r '.vectors[0][0] == .vectors[1][0]')"
if [ "$DIM" = "768" ] && [ "$N" = "2" ] && [ "$MODEL" = "nomic-embed-text" ] && [ "$SAME" = "false" ]; then
echo " ✓ dim=768, model=nomic-embed-text, 2 distinct vectors"
else
echo " ✗ resp: dim=$DIM n=$N model=$MODEL same=$SAME"; FAILED=1
fi
echo "[g2-smoke] determinism — same text twice → byte-identical vector:"
RESP1="$(curl -sS -X POST http://127.0.0.1:3110/v1/embed \
-H 'Content-Type: application/json' \
-d '{"texts":["determinism check"]}' | jq -c '.vectors[0]')"
RESP2="$(curl -sS -X POST http://127.0.0.1:3110/v1/embed \
-H 'Content-Type: application/json' \
-d '{"texts":["determinism check"]}' | jq -c '.vectors[0]')"
if [ "$RESP1" = "$RESP2" ]; then
echo " ✓ identical text → identical vector"
else
echo " ✗ deterministic mismatch"; FAILED=1
fi
echo "[g2-smoke] empty texts → 400:"
HTTP="$(curl -sS -o /dev/null -w '%{http_code}' -X POST http://127.0.0.1:3110/v1/embed \
-H 'Content-Type: application/json' -d '{"texts":[]}')"
if [ "$HTTP" = "400" ]; then echo " ✓ empty → 400"; else echo " ✗ empty → $HTTP"; FAILED=1; fi
echo "[g2-smoke] bad model → 502:"
HTTP="$(curl -sS -o /dev/null -w '%{http_code}' -X POST http://127.0.0.1:3110/v1/embed \
-H 'Content-Type: application/json' -d '{"texts":["x"],"model":"definitely-not-loaded"}')"
if [ "$HTTP" = "502" ]; then echo " ✓ unknown model → 502"; else echo " ✗ unknown → $HTTP"; FAILED=1; fi
echo "[g2-smoke] end-to-end: embed → vectord add → search by embed → recall:"
NAME="g2_demo"
# Create index. Default M/EfSearch; cosine distance.
curl -sS -o /dev/null -X POST http://127.0.0.1:3110/v1/vectors/index \
-H 'Content-Type: application/json' \
-d "{\"name\":\"$NAME\",\"dimension\":768,\"distance\":\"cosine\"}"
# Embed a few staffing-ish texts and add them.
TEXTS='["forklift operator with OSHA-30","CNC machinist precision parts","warehouse picker night shift","dental hygienist 3 years experience"]'
EMBEDS="$(curl -sS -X POST http://127.0.0.1:3110/v1/embed \
-H 'Content-Type: application/json' \
-d "{\"texts\":$TEXTS}")"
# Build the add payload — id-i + vector from embeds[i].
python3 - "$EMBEDS" <<'EOF' > "$TMP/add.json"
import json, sys
embeds = json.loads(sys.argv[1])
items = [
{"id": f"w-{i}", "vector": v, "metadata": {"text": t}}
for i, (v, t) in enumerate(zip(embeds["vectors"], [
"forklift operator with OSHA-30",
"CNC machinist precision parts",
"warehouse picker night shift",
"dental hygienist 3 years experience",
]))
]
print(json.dumps({"items": items}))
EOF
curl -sS -o /dev/null -X POST "http://127.0.0.1:3110/v1/vectors/index/$NAME/add" \
-H 'Content-Type: application/json' -d @"$TMP/add.json"
# Search by embedding the FIRST text again — should retrieve w-0 at dist≈0
QUERY_VEC="$(curl -sS -X POST http://127.0.0.1:3110/v1/embed \
-H 'Content-Type: application/json' \
-d '{"texts":["forklift operator with OSHA-30"]}' | jq -c '.vectors[0]')"
SEARCH="$(curl -sS -X POST "http://127.0.0.1:3110/v1/vectors/index/$NAME/search" \
-H 'Content-Type: application/json' \
-d "{\"vector\":$QUERY_VEC,\"k\":3}")"
TOP_ID="$(echo "$SEARCH" | jq -r '.results[0].id')"
TOP_DIST="$(echo "$SEARCH" | jq -r '.results[0].distance')"
DIST_OK="$(python3 -c "import sys; sys.exit(0 if abs($TOP_DIST) < 1e-4 else 1)" && echo y || echo n)"
if [ "$TOP_ID" = "w-0" ] && [ "$DIST_OK" = "y" ]; then
echo " ✓ embed → store → search round-trip: w-0 at dist=$TOP_DIST"
else
echo " ✗ recall: top=$TOP_ID dist=$TOP_DIST"
echo " full: $SEARCH"
FAILED=1
fi
# Clean up the index.
curl -sS -o /dev/null -X DELETE "http://127.0.0.1:3110/v1/vectors/index/$NAME" || true
if [ "$FAILED" -eq 0 ]; then
echo "[g2-smoke] G2 acceptance gate: PASSED"
exit 0
else
echo "[g2-smoke] G2 acceptance gate: FAILED"
exit 1
fi