Batch 4: embed fixture-mode — partial R-006 closure

Adds cmd/fake_ollama, a minimal Ollama-API-compatible fake that implements just enough surface for embedd to drive end-to-end without a real Ollama install: GET /api/tags — fixed model list including nomic-embed-text POST /api/embeddings — deterministic dim-D vector from sha256(prompt) GET /health — for the smoke's poll_health helper Same prompt → bit-identical vector across runs, machines, and CI nodes. Vectors are NOT semantically meaningful; the fake validates the embed CONTRACT (dimension echo, response shape, status codes, deterministic round-trip), not real semantic ranking. Real ranking still requires real Ollama and lives in scripts/g2_smoke.sh + the integration tier of the proof harness. scripts/g2_smoke_fixtures.sh — full chain smoke against the fake: - Build fake_ollama + embedd + vectord + gateway - Start fake on :11435 (distinct from real Ollama at :11434) - Generate temp lakehouse.toml with provider_url override - Boot embedd/vectord/gateway with --config <override> - 4 assertions: dim=768, deterministic same-text, different-text divergence, bad-model → 4xx/5xx (fake 404 → embedd 502) - Trap-cleanup tears down all 4 binaries + tmp config Wired into the task runner: just smoke-g2-fixtures Closes R-006 partially: - Embed half: ✓ — CI / fresh-clone reviewers without Ollama can now run the embed contract smoke - Storage half: deferred — mocking S3 protocol is non-trivial (multipart, signed URLs, etc.) and MinIO itself is lightweight enough to install via Docker in any CI environment. Documented as Sprint 0 follow-up if a CI system without Docker shows up. What this DOESN'T cover: - Real semantic similarity (use scripts/g2_smoke.sh + real Ollama) - Real Ollama API quirks (timeouts, version-specific shapes, /api/embed batch endpoint that newer versions support) Verified: bash scripts/g2_smoke_fixtures.sh — 4/4 assertions PASS, ~3s wall just verify — vet + test + 9 smokes still green Doesn't replace the existing g2_smoke.sh (which still requires real Ollama and exercises the actual embed semantics). Adds an alternate mode for portability. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-29 06:22:07 -05:00 · 2026-04-29 06:22:07 -05:00 · fb08232f58
commit fb08232f58
parent 0f79bce948
3 changed files with 254 additions and 0 deletions
--- a/cmd/fake_ollama/main.go
+++ b/cmd/fake_ollama/main.go
@ -0,0 +1,102 @@
+// fake_ollama is a minimal Ollama-API-compatible fake for proof harness
+// fixture-mode smokes (R-006 partial). Implements just enough of the
+// Ollama API surface for embedd to drive end-to-end without a real
+// Ollama installation:
+//
+//   GET  /api/tags         — returns a fixed model list including
+//                            nomic-embed-text:latest
+//   POST /api/embeddings   — returns a deterministic dim-D vector
+//                            derived from sha256(prompt). Same prompt
+//                            → bit-identical vector across runs.
+//
+// Vectors are NOT semantically meaningful (the value of similarity
+// search against these is undefined). The fake is for proving the
+// EMBED CONTRACT — dimension echo, response shape, status codes —
+// not for proving real semantic ranking. That requires real Ollama.
+//
+// Why this exists: the proof harness's contract tier already runs
+// against real Ollama (when present). For CI / fresh-clone reviewers
+// without Ollama, this fake unblocks the chain.
+//
+// Usage:
+//   bin/fake_ollama --bind 127.0.0.1:11435 --dim 768
+package main
+
+import (
+	"crypto/sha256"
+	"encoding/json"
+	"flag"
+	"fmt"
+	"log/slog"
+	"net/http"
+	"os"
+)
+
+func main() {
+	bind := flag.String("bind", "127.0.0.1:11435", "bind addr")
+	dim := flag.Int("dim", 768, "embedding dimension to return")
+	model := flag.String("model", "nomic-embed-text", "model name to echo back")
+	flag.Parse()
+
+	mux := http.NewServeMux()
+	mux.HandleFunc("/api/tags", func(w http.ResponseWriter, _ *http.Request) {
+		w.Header().Set("Content-Type", "application/json")
+		_ = json.NewEncoder(w).Encode(map[string]any{
+			"models": []map[string]any{
+				{
+					"name":  *model + ":latest",
+					"model": *model + ":latest",
+				},
+			},
+		})
+	})
+	mux.HandleFunc("/api/embeddings", func(w http.ResponseWriter, r *http.Request) {
+		var req struct {
+			Model  string `json:"model"`
+			Prompt string `json:"prompt"`
+		}
+		if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
+			http.Error(w, "decode: "+err.Error(), http.StatusBadRequest)
+			return
+		}
+		// Reject unknown models so embedd's bad-model→502 contract
+		// path is exercisable. The fake recognizes the configured
+		// model name only.
+		if req.Model != "" && req.Model != *model && req.Model != *model+":latest" {
+			http.Error(w, fmt.Sprintf("model %q not found", req.Model), http.StatusNotFound)
+			return
+		}
+		vec := deterministicVector(req.Prompt, *dim)
+		w.Header().Set("Content-Type", "application/json")
+		_ = json.NewEncoder(w).Encode(map[string]any{
+			"embedding": vec,
+		})
+	})
+	mux.HandleFunc("/health", func(w http.ResponseWriter, _ *http.Request) {
+		w.WriteHeader(http.StatusOK)
+		_, _ = w.Write([]byte(`{"status":"ok","service":"fake_ollama"}`))
+	})
+
+	slog.Info("fake_ollama starting", "bind", *bind, "dim", *dim, "model", *model)
+	srv := &http.Server{Addr: *bind, Handler: mux}
+	if err := srv.ListenAndServe(); err != nil && err != http.ErrServerClosed {
+		slog.Error("fake_ollama serve", "err", err)
+		os.Exit(1)
+	}
+}
+
+// deterministicVector returns a fixed dim-d float64 vector derived
+// from sha256(prompt). Same prompt → same vector across runs and
+// across machines, so smoke assertions can compare to fixtures.
+func deterministicVector(prompt string, dim int) []float64 {
+	h := sha256.Sum256([]byte(prompt))
+	vec := make([]float64, dim)
+	for i := range vec {
+		// Spread the 32 hash bytes across `dim` positions; map to
+		// [-1, 1] so cosine distance is well-defined and the result
+		// looks vaguely like a real embedding.
+		b := h[i%len(h)]
+		vec[i] = (float64(b) - 128.0) / 128.0
+	}
+	return vec
+}
--- a/6
+++ b/6
@ -53,6 +53,12 @@ build:
 smoke day:
    @bash scripts/{{day}}_smoke.sh

+# Fixture-mode G2 smoke — runs against fake Ollama instead of real,
+# so CI / fresh-clone reviewers without Ollama can verify the embed
+# contract. Closes R-006 partial (embed half; storage half deferred).
+smoke-g2-fixtures:
+    @bash scripts/g2_smoke_fixtures.sh
+
 # All 9 smokes in dependency order. Halts on first failure.
 smoke-all:
    #!/usr/bin/env bash
--- a/scripts/g2_smoke_fixtures.sh
+++ b/scripts/g2_smoke_fixtures.sh
@ -0,0 +1,146 @@
+#!/usr/bin/env bash
+# G2 smoke — fixtures variant. Same shape as g2_smoke.sh but points
+# embedd at the Go fake Ollama (cmd/fake_ollama) instead of a real
+# Ollama install. Useful for CI / fresh-clone reviewers who don't
+# have Ollama set up.
+#
+# Validates the embed contract end-to-end:
+#   - POST /v1/embed → 200, dim=768
+#   - Same text twice → byte-identical vector (fake is deterministic)
+#   - Different texts → different vectors
+#   - Bad model → 4xx (fake rejects unknown models with 404 → embedd
+#     maps to 502)
+#
+# What this DOESN'T cover:
+#   - Real semantic similarity (fake vectors are sha256-derived; not
+#     semantically meaningful)
+#   - Real Ollama API quirks (timeouts, version-specific shapes)
+#
+# Closes R-006 partial: embedd no longer needs real Ollama for the
+# CI / fresh-clone path. MinIO mocking is a separate Sprint 0
+# follow-up.
+#
+# Usage: ./scripts/g2_smoke_fixtures.sh
+
+set -euo pipefail
+cd "$(dirname "$0")/.."
+
+export PATH="$PATH:/usr/local/go/bin"
+
+FAKE_PORT=11435  # distinct from real Ollama at 11434
+EMBEDD_PORT=3216
+GATEWAY_PORT=3110
+VECTORD_PORT=3215
+
+echo "[g2-fixtures] building fake_ollama + embedd + vectord + gateway..."
+go build -o bin/ ./cmd/fake_ollama ./cmd/embedd ./cmd/vectord ./cmd/gateway
+
+pkill -f "bin/fake_ollama" 2>/dev/null || true
+pkill -f "bin/(embedd|vectord|gateway)" 2>/dev/null || true
+sleep 0.3
+
+PIDS=()
+TMP="$(mktemp -d)"
+cleanup() {
+    echo "[g2-fixtures] cleanup"
+    for p in "${PIDS[@]}"; do [ -n "$p" ] && kill "$p" 2>/dev/null || true; done
+    rm -rf "$TMP"
+}
+trap cleanup EXIT INT TERM
+
+poll_health() {
+    local port="$1" deadline=$(($(date +%s) + 5))
+    while [ "$(date +%s)" -lt "$deadline" ]; do
+        if curl -sS --max-time 1 "http://127.0.0.1:$port/health" >/dev/null 2>&1; then
+            return 0
+        fi
+        sleep 0.05
+    done
+    return 1
+}
+
+# 1. Start fake_ollama on port 11435
+echo "[g2-fixtures] launching fake_ollama on :${FAKE_PORT}..."
+./bin/fake_ollama --bind "127.0.0.1:${FAKE_PORT}" --dim 768 \
+    > "$TMP/fake_ollama.log" 2>&1 &
+PIDS+=($!)
+poll_health "$FAKE_PORT" || { echo "fake_ollama failed"; cat "$TMP/fake_ollama.log"; exit 1; }
+
+# 2. Write override config pointing embedd at fake_ollama
+CFG="$TMP/lakehouse_fixtures.toml"
+sed "s|provider_url *= *\".*\"|provider_url = \"http://127.0.0.1:${FAKE_PORT}\"|" \
+    lakehouse.toml > "$CFG"
+
+# 3. Start embedd, vectord, gateway with the override config
+echo "[g2-fixtures] launching embedd/vectord/gateway with fixture config..."
+for SPEC in "vectord:${VECTORD_PORT}" "embedd:${EMBEDD_PORT}" "gateway:${GATEWAY_PORT}"; do
+    NAME="${SPEC%:*}"; PORT="${SPEC#*:}"
+    ./bin/"$NAME" --config "$CFG" > "$TMP/${NAME}.log" 2>&1 &
+    PIDS+=($!)
+    if ! poll_health "$PORT"; then
+        echo "[g2-fixtures] $NAME failed to bind on :$PORT"
+        tail -10 "$TMP/${NAME}.log"
+        exit 1
+    fi
+done
+
+# 4. Run the assertions
+FAILED=0
+
+echo "[g2-fixtures] /v1/embed with one text → 200 + dim=768"
+RESP=$(curl -sS -X POST "http://127.0.0.1:${GATEWAY_PORT}/v1/embed" \
+    -H 'Content-Type: application/json' \
+    -d '{"texts":["hello world"]}')
+DIM=$(echo "$RESP" | jq -r '.dimension // empty')
+N=$(echo "$RESP" | jq -r '.vectors | length')
+MODEL=$(echo "$RESP" | jq -r '.model // empty')
+if [ "$DIM" = "768" ] && [ "$N" = "1" ] && [ "$MODEL" = "nomic-embed-text" ]; then
+    echo "  ✓ dim=768, model=nomic-embed-text"
+else
+    echo "  ✗ dim=$DIM n=$N model=$MODEL"; FAILED=1
+fi
+
+echo "[g2-fixtures] same text twice → byte-identical vector (deterministic)"
+V1=$(curl -sS -X POST "http://127.0.0.1:${GATEWAY_PORT}/v1/embed" \
+    -H 'Content-Type: application/json' \
+    -d '{"texts":["test"]}' | jq -c '.vectors[0]')
+V2=$(curl -sS -X POST "http://127.0.0.1:${GATEWAY_PORT}/v1/embed" \
+    -H 'Content-Type: application/json' \
+    -d '{"texts":["test"]}' | jq -c '.vectors[0]')
+if [ "$V1" = "$V2" ]; then
+    echo "  ✓ deterministic"
+else
+    echo "  ✗ same input → different vectors (fake should be deterministic)"; FAILED=1
+fi
+
+echo "[g2-fixtures] different texts → different vectors"
+VA=$(curl -sS -X POST "http://127.0.0.1:${GATEWAY_PORT}/v1/embed" \
+    -H 'Content-Type: application/json' \
+    -d '{"texts":["alpha"]}' | jq -c '.vectors[0]')
+VB=$(curl -sS -X POST "http://127.0.0.1:${GATEWAY_PORT}/v1/embed" \
+    -H 'Content-Type: application/json' \
+    -d '{"texts":["beta"]}' | jq -c '.vectors[0]')
+if [ "$VA" != "$VB" ]; then
+    echo "  ✓ different texts diverge"
+else
+    echo "  ✗ different texts produced identical vectors"; FAILED=1
+fi
+
+echo "[g2-fixtures] bad model → 4xx/5xx (fake returns 404, embedd maps to 502)"
+HTTP=$(curl -s -o /dev/null -w "%{http_code}" \
+    -X POST "http://127.0.0.1:${GATEWAY_PORT}/v1/embed" \
+    -H 'Content-Type: application/json' \
+    -d '{"texts":["x"],"model":"definitely-not-loaded"}')
+if [ "$HTTP" -ge 400 ] && [ "$HTTP" -lt 600 ]; then
+    echo "  ✓ unknown model → $HTTP"
+else
+    echo "  ✗ unknown model → $HTTP"; FAILED=1
+fi
+
+if [ "$FAILED" = "0" ]; then
+    echo "[g2-fixtures] ✓ G2 fixture-mode acceptance: PASSED"
+    exit 0
+else
+    echo "[g2-fixtures] ✗ G2 fixture-mode acceptance: FAILED"
+    exit 1
+fi