golangLAKEHOUSE/cmd/fake_ollama/main.go

// fake_ollama is a minimal Ollama-API-compatible fake for proof harness
// fixture-mode smokes (R-006 partial). Implements just enough of the
// Ollama API surface for embedd to drive end-to-end without a real
// Ollama installation:
//
//   GET  /api/tags         — returns a fixed model list including
//                            nomic-embed-text:latest
//   POST /api/embeddings   — returns a deterministic dim-D vector
//                            derived from sha256(prompt). Same prompt
//                            → bit-identical vector across runs.
//
// Vectors are NOT semantically meaningful (the value of similarity
// search against these is undefined). The fake is for proving the
// EMBED CONTRACT — dimension echo, response shape, status codes —
// not for proving real semantic ranking. That requires real Ollama.
//
// Why this exists: the proof harness's contract tier already runs
// against real Ollama (when present). For CI / fresh-clone reviewers
// without Ollama, this fake unblocks the chain.
//
// Usage:
//   bin/fake_ollama --bind 127.0.0.1:11435 --dim 768
package main

import (
	"crypto/sha256"
	"encoding/json"
	"flag"
	"fmt"
	"log/slog"
	"net/http"
	"os"
)

func main() {
	bind := flag.String("bind", "127.0.0.1:11435", "bind addr")
	dim := flag.Int("dim", 768, "embedding dimension to return")
	model := flag.String("model", "nomic-embed-text", "model name to echo back")
	flag.Parse()

	mux := http.NewServeMux()
	mux.HandleFunc("/api/tags", func(w http.ResponseWriter, _ *http.Request) {
		w.Header().Set("Content-Type", "application/json")
		_ = json.NewEncoder(w).Encode(map[string]any{
			"models": []map[string]any{
				{
					"name":  *model + ":latest",
					"model": *model + ":latest",
				},
			},
		})
	})
	mux.HandleFunc("/api/embeddings", func(w http.ResponseWriter, r *http.Request) {
		var req struct {
			Model  string `json:"model"`
			Prompt string `json:"prompt"`
		}
		if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
			http.Error(w, "decode: "+err.Error(), http.StatusBadRequest)
			return
		}
		// Reject unknown models so embedd's bad-model→502 contract
		// path is exercisable. The fake recognizes the configured
		// model name only.
		if req.Model != "" && req.Model != *model && req.Model != *model+":latest" {
			http.Error(w, fmt.Sprintf("model %q not found", req.Model), http.StatusNotFound)
			return
		}
		vec := deterministicVector(req.Prompt, *dim)
		w.Header().Set("Content-Type", "application/json")
		_ = json.NewEncoder(w).Encode(map[string]any{
			"embedding": vec,
		})
	})
	mux.HandleFunc("/health", func(w http.ResponseWriter, _ *http.Request) {
		w.WriteHeader(http.StatusOK)
		_, _ = w.Write([]byte(`{"status":"ok","service":"fake_ollama"}`))
	})

	slog.Info("fake_ollama starting", "bind", *bind, "dim", *dim, "model", *model)
	srv := &http.Server{Addr: *bind, Handler: mux}
	if err := srv.ListenAndServe(); err != nil && err != http.ErrServerClosed {
		slog.Error("fake_ollama serve", "err", err)
		os.Exit(1)
	}
}

// deterministicVector returns a fixed dim-d float64 vector derived
// from sha256(prompt). Same prompt → same vector across runs and
// across machines, so smoke assertions can compare to fixtures.
func deterministicVector(prompt string, dim int) []float64 {
	h := sha256.Sum256([]byte(prompt))
	vec := make([]float64, dim)
	for i := range vec {
		// Spread the 32 hash bytes across `dim` positions; map to
		// [-1, 1] so cosine distance is well-defined and the result
		// looks vaguely like a real embedding.
		b := h[i%len(h)]
		vec[i] = (float64(b) - 128.0) / 128.0
	}
	return vec
}