golangLAKEHOUSE/cmd/ingestd/main_test.go
root 0f79bce948 Batch 3: cmd/<bin>/main_test.go × 6 — closes R-005
Adds main_test.go for each of the 6 cmd binaries that lacked them
(storaged already had main_test.go; that's where the pattern came
from). Each test file focuses on the cmd-specific surface — route
mounts, body caps, decode/validation paths — without re-testing
internal package logic that's covered elsewhere.

cmd/catalogd/main_test.go — 6 funcs
  TestRoutesMounted: chi.Walk asserts /catalog/{register,manifest/*,list}
  TestHandleRegister_BodyTooLarge: 5 MiB body → 4xx
  TestHandleRegister_MalformedJSON: 400
  TestHandleRegister_EmptyName_400: ErrEmptyName surfaces as 400
  TestHandleGetManifest_404 + TestHandleList_EmptyShape

cmd/embedd/main_test.go — 8 funcs
  stubProvider implements embed.Provider deterministically
  TestRoutesMounted, MalformedJSON_400, EmptyTextRejected_400 (per
    scrum O-W3), UpstreamError_502 (provider error → 502, not 500),
    HappyPath_ProviderEcho, BodyTooLarge (4xx range), TestItoa
    (covers the no-strconv helper)

cmd/gateway/main_test.go — 4 funcs
  TestMustParseUpstream_HappyPaths: 3 valid URLs
  TestMustParseUpstream_FailureExits: re-execs the test binary in a
    subprocess with env flag (standard pattern for testing os.Exit
    callers); subprocess invokes mustParseUpstream("127.0.0.1:3211")
    [missing scheme]; expects exit non-zero. Same pattern for garbage.
  TestUpstreamConfigKeys_DocumentedShape: locks the 6 _url keys

cmd/ingestd/main_test.go — 7 funcs
  Stubs both storaged and catalogd via httptest.Server so the cmd
  layer can be exercised without bringing the full chain up.
  TestHandleIngest_MissingNameQueryParam: 400 with "name" in body
  TestHandleIngest_MalformedMultipart: 400
  TestHandleIngest_MissingFormFile: 400 (valid multipart, wrong field)
  TestHandleIngest_BodyTooLarge: 4xx
  TestEscapeKeyPath: 6-case URL-escape table (apostrophe, space, etc.)
  TestParquetKeyPath_Format: locks the datasets/<n>/<fp>.parquet shape
    per scrum C-DRIFT (any rename breaks idempotent re-ingest)

cmd/queryd/main_test.go — 6 funcs
  Tests pre-DB paths (decode, body cap, empty SQL); db.QueryContext
  itself needs DuckDB so it's covered by GOLAKE-040 in the proof
  harness, not unit tests. handlers.db = nil here is intentional.
  TestHandleSQL_EmptySQL_400: 3 cases (empty, whitespace, mixed-WS)
  TestMaxSQLBodyBytes_Reasonable: locks the 64 KiB constant in a
    sane range so a refactor can't blow it open
  TestPrimaryBucket_Constant: locks "primary" — secrets lookup uses
    this; rename = silent secret-resolution failure at boot

cmd/vectord/main_test.go — 14 funcs
  All 6 routes verified mounted. handlers.persist = nil = pure
  in-memory mode; persistence is GOLAKE-070 in the proof harness.
  Coverage of every error branch in handleCreate/Add/Search/Delete:
    missing index → 404, dim mismatch → 400, empty items → 400,
    empty id → 400, malformed JSON → 400, body too large → 4xx,
    happy create → 201, happy list → 200.

One real finding caught during writing:
  Body-cap rejection is sometimes 413 (typed MaxBytesError survives
  unwrap) and sometimes 400 (decoder wraps it as a generic decode
  error). Both are valid client-error contracts; the contract isn't
  "exactly 413" but "fails loud as 4xx, never silent 200 or 5xx."
  Tests assert 4xx range. The proof harness's
  proof_assert_status_4xx already had this shape — just bringing
  the unit tests in line with it.

Verified:
  go test -count=1 -short ./cmd/...  — all 7 packages green
  just verify                         — vet + test + 9 smokes 35s

Closes audit risk R-005 (6/7 cmd/main.go untested). Combined with
the proof harness's wiring coverage, every cmd-level handler now
has both unit-test and integration-test coverage of the wiring
layer. R-005 → CLOSED.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-29 06:18:46 -05:00

189 lines
5.2 KiB
Go

package main
import (
"bytes"
"io"
"net/http"
"net/http/httptest"
"strings"
"testing"
"github.com/go-chi/chi/v5"
"git.agentview.dev/profit/golangLAKEHOUSE/internal/catalogclient"
)
// Closes R-005 for ingestd: cmd-level tests for the cmd-shape
// of /ingest — name query param, body cap, multipart parsing,
// missing form file. CSV→Parquet logic is tested in internal/ingestd.
func newTestHandlers(t *testing.T) (*handlers, *httptest.Server) {
t.Helper()
// Stub catalogd so we can run end-to-end happy paths without the
// real catalogd up. The stub returns a 200-shaped registerResponse.
stub := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
switch r.URL.Path {
case "/catalog/register":
w.Header().Set("Content-Type", "application/json")
w.Write([]byte(`{"manifest":{"name":"x","dataset_id":"d","schema_fingerprint":"sha256:x","objects":[]},"existing":false}`))
default:
w.WriteHeader(http.StatusNotFound)
}
}))
t.Cleanup(stub.Close)
storaged := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) {
w.WriteHeader(http.StatusOK)
w.Write([]byte(`{"status":"ok"}`))
}))
t.Cleanup(storaged.Close)
h := &handlers{
storagedURL: strings.TrimRight(storaged.URL, "/"),
catalogd: catalogclient.New(stub.URL),
hc: &http.Client{},
maxBytes: 256 << 20,
}
return h, stub
}
func mountedRouter(h *handlers) chi.Router {
r := chi.NewRouter()
h.register(r)
return r
}
func TestRoutesMounted(t *testing.T) {
h, _ := newTestHandlers(t)
r := mountedRouter(h)
found := false
chi.Walk(r, func(method, route string, _ http.Handler, _ ...func(http.Handler) http.Handler) error {
if method == "POST" && route == "/ingest" {
found = true
}
return nil
})
if !found {
t.Error("POST /ingest not mounted")
}
}
func TestHandleIngest_MissingNameQueryParam(t *testing.T) {
h, _ := newTestHandlers(t)
r := mountedRouter(h)
srv := httptest.NewServer(r)
defer srv.Close()
resp, err := http.Post(srv.URL+"/ingest",
"multipart/form-data; boundary=x", strings.NewReader(""))
if err != nil {
t.Fatalf("POST: %v", err)
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusBadRequest {
t.Errorf("expected 400 on missing name param, got %d", resp.StatusCode)
}
body, _ := io.ReadAll(resp.Body)
if !strings.Contains(string(body), "name") {
t.Errorf("error body should mention 'name', got %q", body)
}
}
func TestHandleIngest_MalformedMultipart(t *testing.T) {
h, _ := newTestHandlers(t)
r := mountedRouter(h)
srv := httptest.NewServer(r)
defer srv.Close()
resp, err := http.Post(srv.URL+"/ingest?name=test",
"multipart/form-data; boundary=xyz", strings.NewReader("garbage not multipart"))
if err != nil {
t.Fatalf("POST: %v", err)
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusBadRequest {
t.Errorf("expected 400 on malformed multipart, got %d", resp.StatusCode)
}
}
func TestHandleIngest_MissingFormFile(t *testing.T) {
h, _ := newTestHandlers(t)
r := mountedRouter(h)
srv := httptest.NewServer(r)
defer srv.Close()
// Valid multipart with no "file" field.
body := bytes.NewReader([]byte(
"--xyz\r\n" +
"Content-Disposition: form-data; name=\"other\"\r\n" +
"\r\n" +
"value\r\n" +
"--xyz--\r\n",
))
resp, err := http.Post(srv.URL+"/ingest?name=test",
"multipart/form-data; boundary=xyz", body)
if err != nil {
t.Fatalf("POST: %v", err)
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusBadRequest {
t.Errorf("expected 400 on missing form file, got %d", resp.StatusCode)
}
}
func TestHandleIngest_BodyTooLarge(t *testing.T) {
h, _ := newTestHandlers(t)
h.maxBytes = 1024 // tiny cap so we hit it without huge upload
r := mountedRouter(h)
srv := httptest.NewServer(r)
defer srv.Close()
big := bytes.Repeat([]byte("x"), 4096)
resp, err := http.Post(srv.URL+"/ingest?name=test",
"multipart/form-data; boundary=xyz", bytes.NewReader(big))
if err != nil {
t.Fatalf("POST: %v", err)
}
defer resp.Body.Close()
if resp.StatusCode < 400 || resp.StatusCode >= 500 {
t.Errorf("expected 4xx on oversize body, got %d", resp.StatusCode)
}
}
func TestEscapeKeyPath(t *testing.T) {
cases := []struct {
in string
want string
}{
{"a/b/c.parquet", "a/b/c.parquet"},
{"data sets/x.parquet", "data%20sets/x.parquet"},
{"O'Reilly/key", "O%27Reilly/key"},
{"datasets/proof/abc.parquet", "datasets/proof/abc.parquet"},
{"", ""},
{"/", "/"},
}
for _, tc := range cases {
t.Run(tc.in, func(t *testing.T) {
got := escapeKeyPath(tc.in)
if got != tc.want {
t.Errorf("escapeKeyPath(%q) = %q, want %q", tc.in, got, tc.want)
}
})
}
}
func TestParquetKeyPath_Format(t *testing.T) {
// Lock the content-addressed key shape per scrum C-DRIFT.
// Failure here means a dataset's parquet would land at an
// unexpected key, breaking schema-drift idempotency.
if !strings.Contains(parquetKeyPath, "%s") {
t.Errorf("parquetKeyPath should be a fmt template, got %q", parquetKeyPath)
}
if !strings.HasPrefix(parquetKeyPath, "datasets/") {
t.Errorf("parquetKeyPath should be under datasets/, got %q", parquetKeyPath)
}
if !strings.HasSuffix(parquetKeyPath, ".parquet") {
t.Errorf("parquetKeyPath should end with .parquet, got %q", parquetKeyPath)
}
}