Adds cmd/fake_ollama, a minimal Ollama-API-compatible fake that
implements just enough surface for embedd to drive end-to-end
without a real Ollama install:
GET /api/tags — fixed model list including nomic-embed-text
POST /api/embeddings — deterministic dim-D vector from sha256(prompt)
GET /health — for the smoke's poll_health helper
Same prompt → bit-identical vector across runs, machines, and CI
nodes. Vectors are NOT semantically meaningful; the fake validates
the embed CONTRACT (dimension echo, response shape, status codes,
deterministic round-trip), not real semantic ranking. Real ranking
still requires real Ollama and lives in scripts/g2_smoke.sh + the
integration tier of the proof harness.
scripts/g2_smoke_fixtures.sh — full chain smoke against the fake:
- Build fake_ollama + embedd + vectord + gateway
- Start fake on :11435 (distinct from real Ollama at :11434)
- Generate temp lakehouse.toml with provider_url override
- Boot embedd/vectord/gateway with --config <override>
- 4 assertions: dim=768, deterministic same-text, different-text
divergence, bad-model → 4xx/5xx (fake 404 → embedd 502)
- Trap-cleanup tears down all 4 binaries + tmp config
Wired into the task runner:
just smoke-g2-fixtures
Closes R-006 partially:
- Embed half: ✓ — CI / fresh-clone reviewers without Ollama can
now run the embed contract smoke
- Storage half: deferred — mocking S3 protocol is non-trivial
(multipart, signed URLs, etc.) and MinIO itself is lightweight
enough to install via Docker in any CI environment. Documented
as Sprint 0 follow-up if a CI system without Docker shows up.
What this DOESN'T cover:
- Real semantic similarity (use scripts/g2_smoke.sh + real Ollama)
- Real Ollama API quirks (timeouts, version-specific shapes,
/api/embed batch endpoint that newer versions support)
Verified:
bash scripts/g2_smoke_fixtures.sh — 4/4 assertions PASS, ~3s wall
just verify — vet + test + 9 smokes still green
Doesn't replace the existing g2_smoke.sh (which still requires real
Ollama and exercises the actual embed semantics). Adds an alternate
mode for portability.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
103 lines
3.5 KiB
Go
103 lines
3.5 KiB
Go
// fake_ollama is a minimal Ollama-API-compatible fake for proof harness
|
|
// fixture-mode smokes (R-006 partial). Implements just enough of the
|
|
// Ollama API surface for embedd to drive end-to-end without a real
|
|
// Ollama installation:
|
|
//
|
|
// GET /api/tags — returns a fixed model list including
|
|
// nomic-embed-text:latest
|
|
// POST /api/embeddings — returns a deterministic dim-D vector
|
|
// derived from sha256(prompt). Same prompt
|
|
// → bit-identical vector across runs.
|
|
//
|
|
// Vectors are NOT semantically meaningful (the value of similarity
|
|
// search against these is undefined). The fake is for proving the
|
|
// EMBED CONTRACT — dimension echo, response shape, status codes —
|
|
// not for proving real semantic ranking. That requires real Ollama.
|
|
//
|
|
// Why this exists: the proof harness's contract tier already runs
|
|
// against real Ollama (when present). For CI / fresh-clone reviewers
|
|
// without Ollama, this fake unblocks the chain.
|
|
//
|
|
// Usage:
|
|
// bin/fake_ollama --bind 127.0.0.1:11435 --dim 768
|
|
package main
|
|
|
|
import (
|
|
"crypto/sha256"
|
|
"encoding/json"
|
|
"flag"
|
|
"fmt"
|
|
"log/slog"
|
|
"net/http"
|
|
"os"
|
|
)
|
|
|
|
func main() {
|
|
bind := flag.String("bind", "127.0.0.1:11435", "bind addr")
|
|
dim := flag.Int("dim", 768, "embedding dimension to return")
|
|
model := flag.String("model", "nomic-embed-text", "model name to echo back")
|
|
flag.Parse()
|
|
|
|
mux := http.NewServeMux()
|
|
mux.HandleFunc("/api/tags", func(w http.ResponseWriter, _ *http.Request) {
|
|
w.Header().Set("Content-Type", "application/json")
|
|
_ = json.NewEncoder(w).Encode(map[string]any{
|
|
"models": []map[string]any{
|
|
{
|
|
"name": *model + ":latest",
|
|
"model": *model + ":latest",
|
|
},
|
|
},
|
|
})
|
|
})
|
|
mux.HandleFunc("/api/embeddings", func(w http.ResponseWriter, r *http.Request) {
|
|
var req struct {
|
|
Model string `json:"model"`
|
|
Prompt string `json:"prompt"`
|
|
}
|
|
if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
|
|
http.Error(w, "decode: "+err.Error(), http.StatusBadRequest)
|
|
return
|
|
}
|
|
// Reject unknown models so embedd's bad-model→502 contract
|
|
// path is exercisable. The fake recognizes the configured
|
|
// model name only.
|
|
if req.Model != "" && req.Model != *model && req.Model != *model+":latest" {
|
|
http.Error(w, fmt.Sprintf("model %q not found", req.Model), http.StatusNotFound)
|
|
return
|
|
}
|
|
vec := deterministicVector(req.Prompt, *dim)
|
|
w.Header().Set("Content-Type", "application/json")
|
|
_ = json.NewEncoder(w).Encode(map[string]any{
|
|
"embedding": vec,
|
|
})
|
|
})
|
|
mux.HandleFunc("/health", func(w http.ResponseWriter, _ *http.Request) {
|
|
w.WriteHeader(http.StatusOK)
|
|
_, _ = w.Write([]byte(`{"status":"ok","service":"fake_ollama"}`))
|
|
})
|
|
|
|
slog.Info("fake_ollama starting", "bind", *bind, "dim", *dim, "model", *model)
|
|
srv := &http.Server{Addr: *bind, Handler: mux}
|
|
if err := srv.ListenAndServe(); err != nil && err != http.ErrServerClosed {
|
|
slog.Error("fake_ollama serve", "err", err)
|
|
os.Exit(1)
|
|
}
|
|
}
|
|
|
|
// deterministicVector returns a fixed dim-d float64 vector derived
|
|
// from sha256(prompt). Same prompt → same vector across runs and
|
|
// across machines, so smoke assertions can compare to fixtures.
|
|
func deterministicVector(prompt string, dim int) []float64 {
|
|
h := sha256.Sum256([]byte(prompt))
|
|
vec := make([]float64, dim)
|
|
for i := range vec {
|
|
// Spread the 32 hash bytes across `dim` positions; map to
|
|
// [-1, 1] so cosine distance is well-defined and the result
|
|
// looks vaguely like a real embedding.
|
|
b := h[i%len(h)]
|
|
vec[i] = (float64(b) - 128.0) / 128.0
|
|
}
|
|
return vec
|
|
}
|