// catalogd is the metadata authority — registers Parquet datasets, // persists manifests in storaged, rehydrates them on startup, and // answers GET/list queries. ADR-020 idempotency contract enforced // by internal/catalogd/registry.go. package main import ( "context" "encoding/json" "errors" "flag" "log/slog" "net/http" "os" "time" "github.com/go-chi/chi/v5" "git.agentview.dev/profit/golangLAKEHOUSE/internal/catalogd" "git.agentview.dev/profit/golangLAKEHOUSE/internal/shared" ) func main() { configPath := flag.String("config", "lakehouse.toml", "path to TOML config") flag.Parse() cfg, err := shared.LoadConfig(*configPath) if err != nil { slog.Error("config", "err", err) os.Exit(1) } if cfg.Catalogd.StoragedURL == "" { slog.Error("config", "err", "catalogd.storaged_url is required") os.Exit(1) } store := catalogd.NewStoreClient(cfg.Catalogd.StoragedURL) registry := catalogd.NewRegistry(store) rehydrateCtx, cancel := context.WithTimeout(context.Background(), 30*time.Second) n, err := registry.Rehydrate(rehydrateCtx) cancel() if err != nil { slog.Error("rehydrate", "err", err) os.Exit(1) } slog.Info("rehydrated", "manifests", n) h := newHandlers(registry) if err := shared.Run("catalogd", cfg.Catalogd.Bind, h.register); err != nil { slog.Error("server", "err", err) os.Exit(1) } } type handlers struct { reg *catalogd.Registry } func newHandlers(r *catalogd.Registry) *handlers { return &handlers{reg: r} } func (h *handlers) register(r chi.Router) { r.Post("/catalog/register", h.handleRegister) r.Get("/catalog/manifest/*", h.handleGetManifest) r.Get("/catalog/list", h.handleList) } // registerRequest mirrors POST body shape. type registerRequest struct { Name string `json:"name"` SchemaFingerprint string `json:"schema_fingerprint"` Objects []catalogd.Object `json:"objects"` RowCount *int64 `json:"row_count,omitempty"` } // registerResponse adds the existing flag so callers can distinguish // fresh registration from idempotent re-register. type registerResponse struct { Manifest *catalogd.Manifest `json:"manifest"` Existing bool `json:"existing"` } func (h *handlers) handleRegister(w http.ResponseWriter, r *http.Request) { defer r.Body.Close() r.Body = http.MaxBytesReader(w, r.Body, 4<<20) // 4 MiB cap on register payloads var req registerRequest if err := json.NewDecoder(r.Body).Decode(&req); err != nil { http.Error(w, "decode body: "+err.Error(), http.StatusBadRequest) return } m, existing, err := h.reg.Register(r.Context(), req.Name, req.SchemaFingerprint, req.Objects, req.RowCount) if errors.Is(err, catalogd.ErrFingerprintConflict) { http.Error(w, err.Error(), http.StatusConflict) return } if errors.Is(err, catalogd.ErrEmptyName) || errors.Is(err, catalogd.ErrEmptyFingerprint) { // Per scrum S2 (Opus): sentinel-based detection, not substring match. http.Error(w, err.Error(), http.StatusBadRequest) return } if err != nil { slog.Error("register", "name", req.Name, "err", err) http.Error(w, "internal", http.StatusInternalServerError) return } w.Header().Set("Content-Type", "application/json") _ = json.NewEncoder(w).Encode(registerResponse{Manifest: m, Existing: existing}) } func (h *handlers) handleGetManifest(w http.ResponseWriter, r *http.Request) { name := chi.URLParam(r, "*") m, err := h.reg.Get(name) if errors.Is(err, catalogd.ErrManifestNotFound) { http.Error(w, "not found", http.StatusNotFound) return } if err != nil { slog.Error("get manifest", "name", name, "err", err) http.Error(w, "internal", http.StatusInternalServerError) return } w.Header().Set("Content-Type", "application/json") _ = json.NewEncoder(w).Encode(m) } func (h *handlers) handleList(w http.ResponseWriter, _ *http.Request) { items := h.reg.List() w.Header().Set("Content-Type", "application/json") _ = json.NewEncoder(w).Encode(map[string]any{"manifests": items, "count": len(items)}) }