package main import ( "bytes" "io" "net/http" "net/http/httptest" "strings" "testing" "github.com/go-chi/chi/v5" "git.agentview.dev/profit/golangLAKEHOUSE/internal/catalogclient" ) // Closes R-005 for ingestd: cmd-level tests for the cmd-shape // of /ingest — name query param, body cap, multipart parsing, // missing form file. CSV→Parquet logic is tested in internal/ingestd. func newTestHandlers(t *testing.T) (*handlers, *httptest.Server) { t.Helper() // Stub catalogd so we can run end-to-end happy paths without the // real catalogd up. The stub returns a 200-shaped registerResponse. stub := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { switch r.URL.Path { case "/catalog/register": w.Header().Set("Content-Type", "application/json") w.Write([]byte(`{"manifest":{"name":"x","dataset_id":"d","schema_fingerprint":"sha256:x","objects":[]},"existing":false}`)) default: w.WriteHeader(http.StatusNotFound) } })) t.Cleanup(stub.Close) storaged := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) { w.WriteHeader(http.StatusOK) w.Write([]byte(`{"status":"ok"}`)) })) t.Cleanup(storaged.Close) h := &handlers{ storagedURL: strings.TrimRight(storaged.URL, "/"), catalogd: catalogclient.New(stub.URL), hc: &http.Client{}, maxBytes: 256 << 20, } return h, stub } func mountedRouter(h *handlers) chi.Router { r := chi.NewRouter() h.register(r) return r } func TestRoutesMounted(t *testing.T) { h, _ := newTestHandlers(t) r := mountedRouter(h) found := false chi.Walk(r, func(method, route string, _ http.Handler, _ ...func(http.Handler) http.Handler) error { if method == "POST" && route == "/ingest" { found = true } return nil }) if !found { t.Error("POST /ingest not mounted") } } func TestHandleIngest_MissingNameQueryParam(t *testing.T) { h, _ := newTestHandlers(t) r := mountedRouter(h) srv := httptest.NewServer(r) defer srv.Close() resp, err := http.Post(srv.URL+"/ingest", "multipart/form-data; boundary=x", strings.NewReader("")) if err != nil { t.Fatalf("POST: %v", err) } defer resp.Body.Close() if resp.StatusCode != http.StatusBadRequest { t.Errorf("expected 400 on missing name param, got %d", resp.StatusCode) } body, _ := io.ReadAll(resp.Body) if !strings.Contains(string(body), "name") { t.Errorf("error body should mention 'name', got %q", body) } } func TestHandleIngest_MalformedMultipart(t *testing.T) { h, _ := newTestHandlers(t) r := mountedRouter(h) srv := httptest.NewServer(r) defer srv.Close() resp, err := http.Post(srv.URL+"/ingest?name=test", "multipart/form-data; boundary=xyz", strings.NewReader("garbage not multipart")) if err != nil { t.Fatalf("POST: %v", err) } defer resp.Body.Close() if resp.StatusCode != http.StatusBadRequest { t.Errorf("expected 400 on malformed multipart, got %d", resp.StatusCode) } } func TestHandleIngest_MissingFormFile(t *testing.T) { h, _ := newTestHandlers(t) r := mountedRouter(h) srv := httptest.NewServer(r) defer srv.Close() // Valid multipart with no "file" field. body := bytes.NewReader([]byte( "--xyz\r\n" + "Content-Disposition: form-data; name=\"other\"\r\n" + "\r\n" + "value\r\n" + "--xyz--\r\n", )) resp, err := http.Post(srv.URL+"/ingest?name=test", "multipart/form-data; boundary=xyz", body) if err != nil { t.Fatalf("POST: %v", err) } defer resp.Body.Close() if resp.StatusCode != http.StatusBadRequest { t.Errorf("expected 400 on missing form file, got %d", resp.StatusCode) } } func TestHandleIngest_BodyTooLarge(t *testing.T) { h, _ := newTestHandlers(t) h.maxBytes = 1024 // tiny cap so we hit it without huge upload r := mountedRouter(h) srv := httptest.NewServer(r) defer srv.Close() big := bytes.Repeat([]byte("x"), 4096) resp, err := http.Post(srv.URL+"/ingest?name=test", "multipart/form-data; boundary=xyz", bytes.NewReader(big)) if err != nil { t.Fatalf("POST: %v", err) } defer resp.Body.Close() if resp.StatusCode < 400 || resp.StatusCode >= 500 { t.Errorf("expected 4xx on oversize body, got %d", resp.StatusCode) } } func TestEscapeKeyPath(t *testing.T) { cases := []struct { in string want string }{ {"a/b/c.parquet", "a/b/c.parquet"}, {"data sets/x.parquet", "data%20sets/x.parquet"}, {"O'Reilly/key", "O%27Reilly/key"}, {"datasets/proof/abc.parquet", "datasets/proof/abc.parquet"}, {"", ""}, {"/", "/"}, } for _, tc := range cases { t.Run(tc.in, func(t *testing.T) { got := escapeKeyPath(tc.in) if got != tc.want { t.Errorf("escapeKeyPath(%q) = %q, want %q", tc.in, got, tc.want) } }) } } func TestParquetKeyPath_Format(t *testing.T) { // Lock the content-addressed key shape per scrum C-DRIFT. // Failure here means a dataset's parquet would land at an // unexpected key, breaking schema-drift idempotency. if !strings.Contains(parquetKeyPath, "%s") { t.Errorf("parquetKeyPath should be a fmt template, got %q", parquetKeyPath) } if !strings.HasPrefix(parquetKeyPath, "datasets/") { t.Errorf("parquetKeyPath should be under datasets/, got %q", parquetKeyPath) } if !strings.HasSuffix(parquetKeyPath, ".parquet") { t.Errorf("parquetKeyPath should end with .parquet, got %q", parquetKeyPath) } }