Batch 4: embed fixture-mode — partial R-006 closure
Adds cmd/fake_ollama, a minimal Ollama-API-compatible fake that
implements just enough surface for embedd to drive end-to-end
without a real Ollama install:
GET /api/tags — fixed model list including nomic-embed-text
POST /api/embeddings — deterministic dim-D vector from sha256(prompt)
GET /health — for the smoke's poll_health helper
Same prompt → bit-identical vector across runs, machines, and CI
nodes. Vectors are NOT semantically meaningful; the fake validates
the embed CONTRACT (dimension echo, response shape, status codes,
deterministic round-trip), not real semantic ranking. Real ranking
still requires real Ollama and lives in scripts/g2_smoke.sh + the
integration tier of the proof harness.
scripts/g2_smoke_fixtures.sh — full chain smoke against the fake:
- Build fake_ollama + embedd + vectord + gateway
- Start fake on :11435 (distinct from real Ollama at :11434)
- Generate temp lakehouse.toml with provider_url override
- Boot embedd/vectord/gateway with --config <override>
- 4 assertions: dim=768, deterministic same-text, different-text
divergence, bad-model → 4xx/5xx (fake 404 → embedd 502)
- Trap-cleanup tears down all 4 binaries + tmp config
Wired into the task runner:
just smoke-g2-fixtures
Closes R-006 partially:
- Embed half: ✓ — CI / fresh-clone reviewers without Ollama can
now run the embed contract smoke
- Storage half: deferred — mocking S3 protocol is non-trivial
(multipart, signed URLs, etc.) and MinIO itself is lightweight
enough to install via Docker in any CI environment. Documented
as Sprint 0 follow-up if a CI system without Docker shows up.
What this DOESN'T cover:
- Real semantic similarity (use scripts/g2_smoke.sh + real Ollama)
- Real Ollama API quirks (timeouts, version-specific shapes,
/api/embed batch endpoint that newer versions support)
Verified:
bash scripts/g2_smoke_fixtures.sh — 4/4 assertions PASS, ~3s wall
just verify — vet + test + 9 smokes still green
Doesn't replace the existing g2_smoke.sh (which still requires real
Ollama and exercises the actual embed semantics). Adds an alternate
mode for portability.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
0f79bce948
commit
fb08232f58
102
cmd/fake_ollama/main.go
Normal file
102
cmd/fake_ollama/main.go
Normal file
@ -0,0 +1,102 @@
|
||||
// fake_ollama is a minimal Ollama-API-compatible fake for proof harness
|
||||
// fixture-mode smokes (R-006 partial). Implements just enough of the
|
||||
// Ollama API surface for embedd to drive end-to-end without a real
|
||||
// Ollama installation:
|
||||
//
|
||||
// GET /api/tags — returns a fixed model list including
|
||||
// nomic-embed-text:latest
|
||||
// POST /api/embeddings — returns a deterministic dim-D vector
|
||||
// derived from sha256(prompt). Same prompt
|
||||
// → bit-identical vector across runs.
|
||||
//
|
||||
// Vectors are NOT semantically meaningful (the value of similarity
|
||||
// search against these is undefined). The fake is for proving the
|
||||
// EMBED CONTRACT — dimension echo, response shape, status codes —
|
||||
// not for proving real semantic ranking. That requires real Ollama.
|
||||
//
|
||||
// Why this exists: the proof harness's contract tier already runs
|
||||
// against real Ollama (when present). For CI / fresh-clone reviewers
|
||||
// without Ollama, this fake unblocks the chain.
|
||||
//
|
||||
// Usage:
|
||||
// bin/fake_ollama --bind 127.0.0.1:11435 --dim 768
|
||||
package main
|
||||
|
||||
import (
|
||||
"crypto/sha256"
|
||||
"encoding/json"
|
||||
"flag"
|
||||
"fmt"
|
||||
"log/slog"
|
||||
"net/http"
|
||||
"os"
|
||||
)
|
||||
|
||||
func main() {
|
||||
bind := flag.String("bind", "127.0.0.1:11435", "bind addr")
|
||||
dim := flag.Int("dim", 768, "embedding dimension to return")
|
||||
model := flag.String("model", "nomic-embed-text", "model name to echo back")
|
||||
flag.Parse()
|
||||
|
||||
mux := http.NewServeMux()
|
||||
mux.HandleFunc("/api/tags", func(w http.ResponseWriter, _ *http.Request) {
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
_ = json.NewEncoder(w).Encode(map[string]any{
|
||||
"models": []map[string]any{
|
||||
{
|
||||
"name": *model + ":latest",
|
||||
"model": *model + ":latest",
|
||||
},
|
||||
},
|
||||
})
|
||||
})
|
||||
mux.HandleFunc("/api/embeddings", func(w http.ResponseWriter, r *http.Request) {
|
||||
var req struct {
|
||||
Model string `json:"model"`
|
||||
Prompt string `json:"prompt"`
|
||||
}
|
||||
if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
|
||||
http.Error(w, "decode: "+err.Error(), http.StatusBadRequest)
|
||||
return
|
||||
}
|
||||
// Reject unknown models so embedd's bad-model→502 contract
|
||||
// path is exercisable. The fake recognizes the configured
|
||||
// model name only.
|
||||
if req.Model != "" && req.Model != *model && req.Model != *model+":latest" {
|
||||
http.Error(w, fmt.Sprintf("model %q not found", req.Model), http.StatusNotFound)
|
||||
return
|
||||
}
|
||||
vec := deterministicVector(req.Prompt, *dim)
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
_ = json.NewEncoder(w).Encode(map[string]any{
|
||||
"embedding": vec,
|
||||
})
|
||||
})
|
||||
mux.HandleFunc("/health", func(w http.ResponseWriter, _ *http.Request) {
|
||||
w.WriteHeader(http.StatusOK)
|
||||
_, _ = w.Write([]byte(`{"status":"ok","service":"fake_ollama"}`))
|
||||
})
|
||||
|
||||
slog.Info("fake_ollama starting", "bind", *bind, "dim", *dim, "model", *model)
|
||||
srv := &http.Server{Addr: *bind, Handler: mux}
|
||||
if err := srv.ListenAndServe(); err != nil && err != http.ErrServerClosed {
|
||||
slog.Error("fake_ollama serve", "err", err)
|
||||
os.Exit(1)
|
||||
}
|
||||
}
|
||||
|
||||
// deterministicVector returns a fixed dim-d float64 vector derived
|
||||
// from sha256(prompt). Same prompt → same vector across runs and
|
||||
// across machines, so smoke assertions can compare to fixtures.
|
||||
func deterministicVector(prompt string, dim int) []float64 {
|
||||
h := sha256.Sum256([]byte(prompt))
|
||||
vec := make([]float64, dim)
|
||||
for i := range vec {
|
||||
// Spread the 32 hash bytes across `dim` positions; map to
|
||||
// [-1, 1] so cosine distance is well-defined and the result
|
||||
// looks vaguely like a real embedding.
|
||||
b := h[i%len(h)]
|
||||
vec[i] = (float64(b) - 128.0) / 128.0
|
||||
}
|
||||
return vec
|
||||
}
|
||||
6
justfile
6
justfile
@ -53,6 +53,12 @@ build:
|
||||
smoke day:
|
||||
@bash scripts/{{day}}_smoke.sh
|
||||
|
||||
# Fixture-mode G2 smoke — runs against fake Ollama instead of real,
|
||||
# so CI / fresh-clone reviewers without Ollama can verify the embed
|
||||
# contract. Closes R-006 partial (embed half; storage half deferred).
|
||||
smoke-g2-fixtures:
|
||||
@bash scripts/g2_smoke_fixtures.sh
|
||||
|
||||
# All 9 smokes in dependency order. Halts on first failure.
|
||||
smoke-all:
|
||||
#!/usr/bin/env bash
|
||||
|
||||
146
scripts/g2_smoke_fixtures.sh
Executable file
146
scripts/g2_smoke_fixtures.sh
Executable file
@ -0,0 +1,146 @@
|
||||
#!/usr/bin/env bash
|
||||
# G2 smoke — fixtures variant. Same shape as g2_smoke.sh but points
|
||||
# embedd at the Go fake Ollama (cmd/fake_ollama) instead of a real
|
||||
# Ollama install. Useful for CI / fresh-clone reviewers who don't
|
||||
# have Ollama set up.
|
||||
#
|
||||
# Validates the embed contract end-to-end:
|
||||
# - POST /v1/embed → 200, dim=768
|
||||
# - Same text twice → byte-identical vector (fake is deterministic)
|
||||
# - Different texts → different vectors
|
||||
# - Bad model → 4xx (fake rejects unknown models with 404 → embedd
|
||||
# maps to 502)
|
||||
#
|
||||
# What this DOESN'T cover:
|
||||
# - Real semantic similarity (fake vectors are sha256-derived; not
|
||||
# semantically meaningful)
|
||||
# - Real Ollama API quirks (timeouts, version-specific shapes)
|
||||
#
|
||||
# Closes R-006 partial: embedd no longer needs real Ollama for the
|
||||
# CI / fresh-clone path. MinIO mocking is a separate Sprint 0
|
||||
# follow-up.
|
||||
#
|
||||
# Usage: ./scripts/g2_smoke_fixtures.sh
|
||||
|
||||
set -euo pipefail
|
||||
cd "$(dirname "$0")/.."
|
||||
|
||||
export PATH="$PATH:/usr/local/go/bin"
|
||||
|
||||
FAKE_PORT=11435 # distinct from real Ollama at 11434
|
||||
EMBEDD_PORT=3216
|
||||
GATEWAY_PORT=3110
|
||||
VECTORD_PORT=3215
|
||||
|
||||
echo "[g2-fixtures] building fake_ollama + embedd + vectord + gateway..."
|
||||
go build -o bin/ ./cmd/fake_ollama ./cmd/embedd ./cmd/vectord ./cmd/gateway
|
||||
|
||||
pkill -f "bin/fake_ollama" 2>/dev/null || true
|
||||
pkill -f "bin/(embedd|vectord|gateway)" 2>/dev/null || true
|
||||
sleep 0.3
|
||||
|
||||
PIDS=()
|
||||
TMP="$(mktemp -d)"
|
||||
cleanup() {
|
||||
echo "[g2-fixtures] cleanup"
|
||||
for p in "${PIDS[@]}"; do [ -n "$p" ] && kill "$p" 2>/dev/null || true; done
|
||||
rm -rf "$TMP"
|
||||
}
|
||||
trap cleanup EXIT INT TERM
|
||||
|
||||
poll_health() {
|
||||
local port="$1" deadline=$(($(date +%s) + 5))
|
||||
while [ "$(date +%s)" -lt "$deadline" ]; do
|
||||
if curl -sS --max-time 1 "http://127.0.0.1:$port/health" >/dev/null 2>&1; then
|
||||
return 0
|
||||
fi
|
||||
sleep 0.05
|
||||
done
|
||||
return 1
|
||||
}
|
||||
|
||||
# 1. Start fake_ollama on port 11435
|
||||
echo "[g2-fixtures] launching fake_ollama on :${FAKE_PORT}..."
|
||||
./bin/fake_ollama --bind "127.0.0.1:${FAKE_PORT}" --dim 768 \
|
||||
> "$TMP/fake_ollama.log" 2>&1 &
|
||||
PIDS+=($!)
|
||||
poll_health "$FAKE_PORT" || { echo "fake_ollama failed"; cat "$TMP/fake_ollama.log"; exit 1; }
|
||||
|
||||
# 2. Write override config pointing embedd at fake_ollama
|
||||
CFG="$TMP/lakehouse_fixtures.toml"
|
||||
sed "s|provider_url *= *\".*\"|provider_url = \"http://127.0.0.1:${FAKE_PORT}\"|" \
|
||||
lakehouse.toml > "$CFG"
|
||||
|
||||
# 3. Start embedd, vectord, gateway with the override config
|
||||
echo "[g2-fixtures] launching embedd/vectord/gateway with fixture config..."
|
||||
for SPEC in "vectord:${VECTORD_PORT}" "embedd:${EMBEDD_PORT}" "gateway:${GATEWAY_PORT}"; do
|
||||
NAME="${SPEC%:*}"; PORT="${SPEC#*:}"
|
||||
./bin/"$NAME" --config "$CFG" > "$TMP/${NAME}.log" 2>&1 &
|
||||
PIDS+=($!)
|
||||
if ! poll_health "$PORT"; then
|
||||
echo "[g2-fixtures] $NAME failed to bind on :$PORT"
|
||||
tail -10 "$TMP/${NAME}.log"
|
||||
exit 1
|
||||
fi
|
||||
done
|
||||
|
||||
# 4. Run the assertions
|
||||
FAILED=0
|
||||
|
||||
echo "[g2-fixtures] /v1/embed with one text → 200 + dim=768"
|
||||
RESP=$(curl -sS -X POST "http://127.0.0.1:${GATEWAY_PORT}/v1/embed" \
|
||||
-H 'Content-Type: application/json' \
|
||||
-d '{"texts":["hello world"]}')
|
||||
DIM=$(echo "$RESP" | jq -r '.dimension // empty')
|
||||
N=$(echo "$RESP" | jq -r '.vectors | length')
|
||||
MODEL=$(echo "$RESP" | jq -r '.model // empty')
|
||||
if [ "$DIM" = "768" ] && [ "$N" = "1" ] && [ "$MODEL" = "nomic-embed-text" ]; then
|
||||
echo " ✓ dim=768, model=nomic-embed-text"
|
||||
else
|
||||
echo " ✗ dim=$DIM n=$N model=$MODEL"; FAILED=1
|
||||
fi
|
||||
|
||||
echo "[g2-fixtures] same text twice → byte-identical vector (deterministic)"
|
||||
V1=$(curl -sS -X POST "http://127.0.0.1:${GATEWAY_PORT}/v1/embed" \
|
||||
-H 'Content-Type: application/json' \
|
||||
-d '{"texts":["test"]}' | jq -c '.vectors[0]')
|
||||
V2=$(curl -sS -X POST "http://127.0.0.1:${GATEWAY_PORT}/v1/embed" \
|
||||
-H 'Content-Type: application/json' \
|
||||
-d '{"texts":["test"]}' | jq -c '.vectors[0]')
|
||||
if [ "$V1" = "$V2" ]; then
|
||||
echo " ✓ deterministic"
|
||||
else
|
||||
echo " ✗ same input → different vectors (fake should be deterministic)"; FAILED=1
|
||||
fi
|
||||
|
||||
echo "[g2-fixtures] different texts → different vectors"
|
||||
VA=$(curl -sS -X POST "http://127.0.0.1:${GATEWAY_PORT}/v1/embed" \
|
||||
-H 'Content-Type: application/json' \
|
||||
-d '{"texts":["alpha"]}' | jq -c '.vectors[0]')
|
||||
VB=$(curl -sS -X POST "http://127.0.0.1:${GATEWAY_PORT}/v1/embed" \
|
||||
-H 'Content-Type: application/json' \
|
||||
-d '{"texts":["beta"]}' | jq -c '.vectors[0]')
|
||||
if [ "$VA" != "$VB" ]; then
|
||||
echo " ✓ different texts diverge"
|
||||
else
|
||||
echo " ✗ different texts produced identical vectors"; FAILED=1
|
||||
fi
|
||||
|
||||
echo "[g2-fixtures] bad model → 4xx/5xx (fake returns 404, embedd maps to 502)"
|
||||
HTTP=$(curl -s -o /dev/null -w "%{http_code}" \
|
||||
-X POST "http://127.0.0.1:${GATEWAY_PORT}/v1/embed" \
|
||||
-H 'Content-Type: application/json' \
|
||||
-d '{"texts":["x"],"model":"definitely-not-loaded"}')
|
||||
if [ "$HTTP" -ge 400 ] && [ "$HTTP" -lt 600 ]; then
|
||||
echo " ✓ unknown model → $HTTP"
|
||||
else
|
||||
echo " ✗ unknown model → $HTTP"; FAILED=1
|
||||
fi
|
||||
|
||||
if [ "$FAILED" = "0" ]; then
|
||||
echo "[g2-fixtures] ✓ G2 fixture-mode acceptance: PASSED"
|
||||
exit 0
|
||||
else
|
||||
echo "[g2-fixtures] ✗ G2 fixture-mode acceptance: FAILED"
|
||||
exit 1
|
||||
fi
|
||||
Loading…
x
Reference in New Issue
Block a user