diff --git a/cmd/fake_ollama/main.go b/cmd/fake_ollama/main.go new file mode 100644 index 0000000..02d0a2b --- /dev/null +++ b/cmd/fake_ollama/main.go @@ -0,0 +1,102 @@ +// fake_ollama is a minimal Ollama-API-compatible fake for proof harness +// fixture-mode smokes (R-006 partial). Implements just enough of the +// Ollama API surface for embedd to drive end-to-end without a real +// Ollama installation: +// +// GET /api/tags — returns a fixed model list including +// nomic-embed-text:latest +// POST /api/embeddings — returns a deterministic dim-D vector +// derived from sha256(prompt). Same prompt +// → bit-identical vector across runs. +// +// Vectors are NOT semantically meaningful (the value of similarity +// search against these is undefined). The fake is for proving the +// EMBED CONTRACT — dimension echo, response shape, status codes — +// not for proving real semantic ranking. That requires real Ollama. +// +// Why this exists: the proof harness's contract tier already runs +// against real Ollama (when present). For CI / fresh-clone reviewers +// without Ollama, this fake unblocks the chain. +// +// Usage: +// bin/fake_ollama --bind 127.0.0.1:11435 --dim 768 +package main + +import ( + "crypto/sha256" + "encoding/json" + "flag" + "fmt" + "log/slog" + "net/http" + "os" +) + +func main() { + bind := flag.String("bind", "127.0.0.1:11435", "bind addr") + dim := flag.Int("dim", 768, "embedding dimension to return") + model := flag.String("model", "nomic-embed-text", "model name to echo back") + flag.Parse() + + mux := http.NewServeMux() + mux.HandleFunc("/api/tags", func(w http.ResponseWriter, _ *http.Request) { + w.Header().Set("Content-Type", "application/json") + _ = json.NewEncoder(w).Encode(map[string]any{ + "models": []map[string]any{ + { + "name": *model + ":latest", + "model": *model + ":latest", + }, + }, + }) + }) + mux.HandleFunc("/api/embeddings", func(w http.ResponseWriter, r *http.Request) { + var req struct { + Model string `json:"model"` + Prompt string `json:"prompt"` + } + if err := json.NewDecoder(r.Body).Decode(&req); err != nil { + http.Error(w, "decode: "+err.Error(), http.StatusBadRequest) + return + } + // Reject unknown models so embedd's bad-model→502 contract + // path is exercisable. The fake recognizes the configured + // model name only. + if req.Model != "" && req.Model != *model && req.Model != *model+":latest" { + http.Error(w, fmt.Sprintf("model %q not found", req.Model), http.StatusNotFound) + return + } + vec := deterministicVector(req.Prompt, *dim) + w.Header().Set("Content-Type", "application/json") + _ = json.NewEncoder(w).Encode(map[string]any{ + "embedding": vec, + }) + }) + mux.HandleFunc("/health", func(w http.ResponseWriter, _ *http.Request) { + w.WriteHeader(http.StatusOK) + _, _ = w.Write([]byte(`{"status":"ok","service":"fake_ollama"}`)) + }) + + slog.Info("fake_ollama starting", "bind", *bind, "dim", *dim, "model", *model) + srv := &http.Server{Addr: *bind, Handler: mux} + if err := srv.ListenAndServe(); err != nil && err != http.ErrServerClosed { + slog.Error("fake_ollama serve", "err", err) + os.Exit(1) + } +} + +// deterministicVector returns a fixed dim-d float64 vector derived +// from sha256(prompt). Same prompt → same vector across runs and +// across machines, so smoke assertions can compare to fixtures. +func deterministicVector(prompt string, dim int) []float64 { + h := sha256.Sum256([]byte(prompt)) + vec := make([]float64, dim) + for i := range vec { + // Spread the 32 hash bytes across `dim` positions; map to + // [-1, 1] so cosine distance is well-defined and the result + // looks vaguely like a real embedding. + b := h[i%len(h)] + vec[i] = (float64(b) - 128.0) / 128.0 + } + return vec +} diff --git a/justfile b/justfile index 3ae07f5..fcb399d 100644 --- a/justfile +++ b/justfile @@ -53,6 +53,12 @@ build: smoke day: @bash scripts/{{day}}_smoke.sh +# Fixture-mode G2 smoke — runs against fake Ollama instead of real, +# so CI / fresh-clone reviewers without Ollama can verify the embed +# contract. Closes R-006 partial (embed half; storage half deferred). +smoke-g2-fixtures: + @bash scripts/g2_smoke_fixtures.sh + # All 9 smokes in dependency order. Halts on first failure. smoke-all: #!/usr/bin/env bash diff --git a/scripts/g2_smoke_fixtures.sh b/scripts/g2_smoke_fixtures.sh new file mode 100755 index 0000000..49d8a8c --- /dev/null +++ b/scripts/g2_smoke_fixtures.sh @@ -0,0 +1,146 @@ +#!/usr/bin/env bash +# G2 smoke — fixtures variant. Same shape as g2_smoke.sh but points +# embedd at the Go fake Ollama (cmd/fake_ollama) instead of a real +# Ollama install. Useful for CI / fresh-clone reviewers who don't +# have Ollama set up. +# +# Validates the embed contract end-to-end: +# - POST /v1/embed → 200, dim=768 +# - Same text twice → byte-identical vector (fake is deterministic) +# - Different texts → different vectors +# - Bad model → 4xx (fake rejects unknown models with 404 → embedd +# maps to 502) +# +# What this DOESN'T cover: +# - Real semantic similarity (fake vectors are sha256-derived; not +# semantically meaningful) +# - Real Ollama API quirks (timeouts, version-specific shapes) +# +# Closes R-006 partial: embedd no longer needs real Ollama for the +# CI / fresh-clone path. MinIO mocking is a separate Sprint 0 +# follow-up. +# +# Usage: ./scripts/g2_smoke_fixtures.sh + +set -euo pipefail +cd "$(dirname "$0")/.." + +export PATH="$PATH:/usr/local/go/bin" + +FAKE_PORT=11435 # distinct from real Ollama at 11434 +EMBEDD_PORT=3216 +GATEWAY_PORT=3110 +VECTORD_PORT=3215 + +echo "[g2-fixtures] building fake_ollama + embedd + vectord + gateway..." +go build -o bin/ ./cmd/fake_ollama ./cmd/embedd ./cmd/vectord ./cmd/gateway + +pkill -f "bin/fake_ollama" 2>/dev/null || true +pkill -f "bin/(embedd|vectord|gateway)" 2>/dev/null || true +sleep 0.3 + +PIDS=() +TMP="$(mktemp -d)" +cleanup() { + echo "[g2-fixtures] cleanup" + for p in "${PIDS[@]}"; do [ -n "$p" ] && kill "$p" 2>/dev/null || true; done + rm -rf "$TMP" +} +trap cleanup EXIT INT TERM + +poll_health() { + local port="$1" deadline=$(($(date +%s) + 5)) + while [ "$(date +%s)" -lt "$deadline" ]; do + if curl -sS --max-time 1 "http://127.0.0.1:$port/health" >/dev/null 2>&1; then + return 0 + fi + sleep 0.05 + done + return 1 +} + +# 1. Start fake_ollama on port 11435 +echo "[g2-fixtures] launching fake_ollama on :${FAKE_PORT}..." +./bin/fake_ollama --bind "127.0.0.1:${FAKE_PORT}" --dim 768 \ + > "$TMP/fake_ollama.log" 2>&1 & +PIDS+=($!) +poll_health "$FAKE_PORT" || { echo "fake_ollama failed"; cat "$TMP/fake_ollama.log"; exit 1; } + +# 2. Write override config pointing embedd at fake_ollama +CFG="$TMP/lakehouse_fixtures.toml" +sed "s|provider_url *= *\".*\"|provider_url = \"http://127.0.0.1:${FAKE_PORT}\"|" \ + lakehouse.toml > "$CFG" + +# 3. Start embedd, vectord, gateway with the override config +echo "[g2-fixtures] launching embedd/vectord/gateway with fixture config..." +for SPEC in "vectord:${VECTORD_PORT}" "embedd:${EMBEDD_PORT}" "gateway:${GATEWAY_PORT}"; do + NAME="${SPEC%:*}"; PORT="${SPEC#*:}" + ./bin/"$NAME" --config "$CFG" > "$TMP/${NAME}.log" 2>&1 & + PIDS+=($!) + if ! poll_health "$PORT"; then + echo "[g2-fixtures] $NAME failed to bind on :$PORT" + tail -10 "$TMP/${NAME}.log" + exit 1 + fi +done + +# 4. Run the assertions +FAILED=0 + +echo "[g2-fixtures] /v1/embed with one text → 200 + dim=768" +RESP=$(curl -sS -X POST "http://127.0.0.1:${GATEWAY_PORT}/v1/embed" \ + -H 'Content-Type: application/json' \ + -d '{"texts":["hello world"]}') +DIM=$(echo "$RESP" | jq -r '.dimension // empty') +N=$(echo "$RESP" | jq -r '.vectors | length') +MODEL=$(echo "$RESP" | jq -r '.model // empty') +if [ "$DIM" = "768" ] && [ "$N" = "1" ] && [ "$MODEL" = "nomic-embed-text" ]; then + echo " ✓ dim=768, model=nomic-embed-text" +else + echo " ✗ dim=$DIM n=$N model=$MODEL"; FAILED=1 +fi + +echo "[g2-fixtures] same text twice → byte-identical vector (deterministic)" +V1=$(curl -sS -X POST "http://127.0.0.1:${GATEWAY_PORT}/v1/embed" \ + -H 'Content-Type: application/json' \ + -d '{"texts":["test"]}' | jq -c '.vectors[0]') +V2=$(curl -sS -X POST "http://127.0.0.1:${GATEWAY_PORT}/v1/embed" \ + -H 'Content-Type: application/json' \ + -d '{"texts":["test"]}' | jq -c '.vectors[0]') +if [ "$V1" = "$V2" ]; then + echo " ✓ deterministic" +else + echo " ✗ same input → different vectors (fake should be deterministic)"; FAILED=1 +fi + +echo "[g2-fixtures] different texts → different vectors" +VA=$(curl -sS -X POST "http://127.0.0.1:${GATEWAY_PORT}/v1/embed" \ + -H 'Content-Type: application/json' \ + -d '{"texts":["alpha"]}' | jq -c '.vectors[0]') +VB=$(curl -sS -X POST "http://127.0.0.1:${GATEWAY_PORT}/v1/embed" \ + -H 'Content-Type: application/json' \ + -d '{"texts":["beta"]}' | jq -c '.vectors[0]') +if [ "$VA" != "$VB" ]; then + echo " ✓ different texts diverge" +else + echo " ✗ different texts produced identical vectors"; FAILED=1 +fi + +echo "[g2-fixtures] bad model → 4xx/5xx (fake returns 404, embedd maps to 502)" +HTTP=$(curl -s -o /dev/null -w "%{http_code}" \ + -X POST "http://127.0.0.1:${GATEWAY_PORT}/v1/embed" \ + -H 'Content-Type: application/json' \ + -d '{"texts":["x"],"model":"definitely-not-loaded"}') +if [ "$HTTP" -ge 400 ] && [ "$HTTP" -lt 600 ]; then + echo " ✓ unknown model → $HTTP" +else + echo " ✗ unknown model → $HTTP"; FAILED=1 +fi + +if [ "$FAILED" = "0" ]; then + echo "[g2-fixtures] ✓ G2 fixture-mode acceptance: PASSED" + exit 0 +else + echo "[g2-fixtures] ✗ G2 fixture-mode acceptance: FAILED" + exit 1 +fi