// storaged is the object I/O service. D2 wires GET / PUT / LIST / // DELETE routes against a single bucket ("primary") in the registry. // Bind is 127.0.0.1 only (G0 dev — no auth on the wire); body cap is // 256 MiB; concurrent in-flight PUTs are capped at 4 with non-blocking // try-acquire (503 + Retry-After when full). package main import ( "context" "encoding/json" "errors" "flag" "io" "log/slog" "net/http" "os" "strings" "github.com/go-chi/chi/v5" "git.agentview.dev/profit/golangLAKEHOUSE/internal/secrets" "git.agentview.dev/profit/golangLAKEHOUSE/internal/shared" "git.agentview.dev/profit/golangLAKEHOUSE/internal/storaged" ) const ( maxPutBytes = 256 << 20 // 256 MiB per Qwen Q1 fix maxConcurrentPut = 4 // 4-slot semaphore on in-flight PUTs retryAfterSecs = "5" // Retry-After header on 503 primaryBucket = "primary" ) func main() { configPath := flag.String("config", "lakehouse.toml", "path to TOML config") secretsPath := flag.String("secrets", "/etc/lakehouse/secrets-go.toml", "path to secrets TOML (Go-side; Rust uses /etc/lakehouse/secrets.toml)") flag.Parse() cfg, err := shared.LoadConfig(*configPath) if err != nil { slog.Error("config", "err", err) os.Exit(1) } registry, err := buildRegistry(cfg, *secretsPath) if err != nil { slog.Error("bucket registry", "err", err) os.Exit(1) } h := newHandlers(registry) if err := shared.Run("storaged", cfg.Storaged.Bind, h.register); err != nil { slog.Error("server", "err", err) os.Exit(1) } } // buildRegistry constructs the (single, G0) bucket registry. Multi-bucket // federation lands in G2; the registry shape is in place so that arrives // without an HTTP-layer refactor. func buildRegistry(cfg shared.Config, secretsPath string) (*storaged.BucketRegistry, error) { prov, err := secrets.NewFileProvider(secretsPath, secrets.S3Credentials{ AccessKeyID: cfg.S3.AccessKeyID, SecretAccessKey: cfg.S3.SecretAccessKey, }) if err != nil { return nil, err } // Per Opus C1 review: don't tie the AWS config-load context to a // canceller that fires when buildRegistry returns. With static creds // it's fine today, but EC2 IMDS / SSO / AssumeRole credential // providers (G2+) capture the load ctx for refresh — a cancelled // ctx silently fails them at the next refresh. Use Background here; // per-request lifetimes flow through r.Context() in handlers. bucket, err := storaged.NewBucket(context.Background(), cfg.S3, prov, primaryBucket) if err != nil { return nil, err } reg := storaged.NewRegistry() if err := reg.Register(bucket); err != nil { return nil, err } return reg, nil } // handlers carries the registry + the PUT semaphore. One instance // per process; chi routes close over it. type handlers struct { reg *storaged.BucketRegistry putSem chan struct{} } func newHandlers(reg *storaged.BucketRegistry) *handlers { return &handlers{ reg: reg, putSem: make(chan struct{}, maxConcurrentPut), } } func (h *handlers) register(r chi.Router) { // Verb-paths per PHASE_G0_KICKOFF D2.4 spec. REST-style refactor // (GET/PUT/DELETE on a single /storage/{key}) deferred until G2. r.Get("/storage/get/*", h.handleGet) r.Put("/storage/put/*", h.handlePut) r.Get("/storage/list", h.handleList) r.Delete("/storage/delete/*", h.handleDelete) } // extractKey pulls the wildcard key out of a chi route. Routes are // registered as `/storage//*` so the wildcard captures the // remainder, including embedded slashes. func extractKey(r *http.Request) string { // chi.URLParam with "*" returns everything after the route prefix. return chi.URLParam(r, "*") } // validateKey rejects keys that would be unsafe to round-trip through // the system. The exact policy is a G0 design choice — see the focused // decision request in the D2 PR description (validateKey-policy). func validateKey(key string) error { if key == "" { return errors.New("empty key") } if len(key) > 1024 { return errors.New("key too long (>1024 bytes)") } if strings.ContainsRune(key, 0) { return errors.New("key contains NUL byte") } if strings.HasPrefix(key, "/") { return errors.New("key starts with /") } for _, part := range strings.Split(key, "/") { if part == ".." { return errors.New("key contains .. component") } } if strings.ContainsAny(key, "\r\n\t") { return errors.New("key contains control char") } return nil } func (h *handlers) handleGet(w http.ResponseWriter, r *http.Request) { key := extractKey(r) if err := validateKey(key); err != nil { http.Error(w, err.Error(), http.StatusBadRequest) return } bucket, err := h.reg.Resolve(primaryBucket) if err != nil { http.Error(w, err.Error(), http.StatusInternalServerError) return } body, info, err := bucket.Get(r.Context(), key) if errors.Is(err, storaged.ErrKeyNotFound) { http.Error(w, "not found", http.StatusNotFound) return } if err != nil { slog.Error("storage get", "key", key, "err", err) http.Error(w, "internal", http.StatusInternalServerError) return } defer body.Close() if info.ETag != "" { w.Header().Set("ETag", info.ETag) } w.Header().Set("Content-Type", "application/octet-stream") if _, err := io.Copy(w, body); err != nil { slog.Warn("storage get copy", "key", key, "err", err) } } func (h *handlers) handlePut(w http.ResponseWriter, r *http.Request) { key := extractKey(r) if err := validateKey(key); err != nil { http.Error(w, err.Error(), http.StatusBadRequest) return } // Up-front Content-Length cap. Per Opus C3 review: the // manager.Uploader's multipart path runs body reads in goroutines // and wraps errors in its own types, so *http.MaxBytesError can be // buried by the time it reaches us — meaning bodies just over the // 5 MiB multipart threshold could surface as 500 instead of 413. // Catching Content-Length up front returns 413 deterministically // when the client honestly declares size; MaxBytesReader + the // string-match fallback below cover chunked / lying-CL cases. if r.ContentLength > maxPutBytes { w.Header().Set("Retry-After", retryAfterSecs) http.Error(w, "payload too large", http.StatusRequestEntityTooLarge) return } // Non-blocking try-acquire: if the 4-slot semaphore is full, return // 503 + Retry-After:5 instantly rather than holding the connection. // Per PHASE_G0_KICKOFF D2.4: "PUTs blocked on the semaphore → 503 // with Retry-After: 5". select { case h.putSem <- struct{}{}: defer func() { <-h.putSem }() default: w.Header().Set("Retry-After", retryAfterSecs) http.Error(w, "storaged: put concurrency cap reached", http.StatusServiceUnavailable) return } // 256 MiB per-request body cap. Reads beyond this surface as // *http.MaxBytesError; for chunked-encoding bodies that's the only // signal we get. Defer LIFO order: r.Body.Close fires before // <-h.putSem, so the body is fully drained before the slot frees. r.Body = http.MaxBytesReader(w, r.Body, maxPutBytes) defer r.Body.Close() bucket, err := h.reg.Resolve(primaryBucket) if err != nil { http.Error(w, err.Error(), http.StatusInternalServerError) return } if err := bucket.Put(r.Context(), key, r.Body); err != nil { // Two-layer detect: errors.As catches the typed error when it // survives unwrapping; the string-suffix check catches cases // where manager.Uploader's multipart path wraps the body-read // failure in its own aggregate type. var maxErr *http.MaxBytesError if errors.As(err, &maxErr) || strings.Contains(err.Error(), "http: request body too large") { w.Header().Set("Retry-After", retryAfterSecs) http.Error(w, "payload too large", http.StatusRequestEntityTooLarge) return } slog.Error("storage put", "key", key, "err", err) http.Error(w, "internal", http.StatusInternalServerError) return } w.Header().Set("Content-Type", "application/json") w.WriteHeader(http.StatusOK) _, _ = w.Write([]byte(`{"status":"ok"}`)) } func (h *handlers) handleList(w http.ResponseWriter, r *http.Request) { prefix := r.URL.Query().Get("prefix") bucket, err := h.reg.Resolve(primaryBucket) if err != nil { http.Error(w, err.Error(), http.StatusInternalServerError) return } objs, err := bucket.List(r.Context(), prefix) if err != nil { slog.Error("storage list", "prefix", prefix, "err", err) http.Error(w, "internal", http.StatusInternalServerError) return } w.Header().Set("Content-Type", "application/json") _ = json.NewEncoder(w).Encode(map[string]any{ "prefix": prefix, "objects": objs, }) } func (h *handlers) handleDelete(w http.ResponseWriter, r *http.Request) { key := extractKey(r) if err := validateKey(key); err != nil { http.Error(w, err.Error(), http.StatusBadRequest) return } bucket, err := h.reg.Resolve(primaryBucket) if err != nil { http.Error(w, err.Error(), http.StatusInternalServerError) return } if err := bucket.Delete(r.Context(), key); err != nil { slog.Error("storage delete", "key", key, "err", err) http.Error(w, "internal", http.StatusInternalServerError) return } w.WriteHeader(http.StatusNoContent) }