golangLAKEHOUSE/internal/catalogd/registry_test.go

package catalogd

import (
	"context"
	"errors"
	"sync"
	"testing"
	"time"
)

// memStore is an in-memory Store fake for unit tests.
type memStore struct {
	mu   sync.Mutex
	data map[string][]byte
}

func newMemStore() *memStore { return &memStore{data: map[string][]byte{}} }

func (m *memStore) Put(_ context.Context, key string, body []byte) error {
	m.mu.Lock()
	defer m.mu.Unlock()
	cp := make([]byte, len(body))
	copy(cp, body)
	m.data[key] = cp
	return nil
}

func (m *memStore) Get(_ context.Context, key string) ([]byte, error) {
	m.mu.Lock()
	defer m.mu.Unlock()
	b, ok := m.data[key]
	if !ok {
		return nil, ErrKeyNotFound
	}
	return b, nil
}

func (m *memStore) List(_ context.Context, prefix string) ([]string, error) {
	m.mu.Lock()
	defer m.mu.Unlock()
	out := []string{}
	for k := range m.data {
		if len(k) >= len(prefix) && k[:len(prefix)] == prefix {
			out = append(out, k)
		}
	}
	return out, nil
}

func mkRegistry(t *testing.T) (*Registry, *memStore) {
	t.Helper()
	s := newMemStore()
	r := NewRegistry(s)
	r.now = func() time.Time { return time.Unix(1777435000, 0).UTC() }
	return r, s
}

func TestRegister_NewManifest(t *testing.T) {
	r, _ := mkRegistry(t)
	rc := int64(100)
	m, existing, err := r.Register(context.Background(), "workers", "sha256:abc",
		[]Object{{Key: "datasets/workers/p1.parquet", Size: 1024}}, &rc)
	if err != nil {
		t.Fatalf("Register: %v", err)
	}
	if existing {
		t.Error("expected existing=false for new manifest")
	}
	if m.DatasetID != DatasetIDForName("workers") {
		t.Errorf("DatasetID: got %q, want UUIDv5(workers)", m.DatasetID)
	}
}

func TestRegister_SameFingerprint_Idempotent(t *testing.T) {
	r, _ := mkRegistry(t)
	rc := int64(100)
	first, _, _ := r.Register(context.Background(), "workers", "sha256:abc",
		[]Object{{Key: "p1.parquet", Size: 1024}}, &rc)

	// Re-register same name + fingerprint with new objects.
	rc2 := int64(200)
	second, existing, err := r.Register(context.Background(), "workers", "sha256:abc",
		[]Object{{Key: "p1.parquet", Size: 1024}, {Key: "p2.parquet", Size: 2048}}, &rc2)
	if err != nil {
		t.Fatalf("Register (idempotent): %v", err)
	}
	if !existing {
		t.Error("expected existing=true on idempotent re-register")
	}
	if second.DatasetID != first.DatasetID {
		t.Errorf("DatasetID changed: %q → %q", first.DatasetID, second.DatasetID)
	}
	if len(second.Objects) != 2 {
		t.Errorf("Objects not replaced: got %d, want 2", len(second.Objects))
	}
	if second.RowCount == nil || *second.RowCount != 200 {
		t.Errorf("RowCount not bumped: got %v, want 200", second.RowCount)
	}
}

func TestRegister_DifferentFingerprint_Conflict(t *testing.T) {
	r, _ := mkRegistry(t)
	_, _, _ = r.Register(context.Background(), "workers", "sha256:abc",
		[]Object{{Key: "p1.parquet", Size: 1024}}, nil)

	_, _, err := r.Register(context.Background(), "workers", "sha256:DIFFERENT",
		[]Object{{Key: "p1.parquet", Size: 1024}}, nil)
	if !errors.Is(err, ErrFingerprintConflict) {
		t.Fatalf("expected ErrFingerprintConflict, got %v", err)
	}
}

func TestRehydrate_RecoversManifests(t *testing.T) {
	// Build first registry, register 2 manifests.
	r1, store := mkRegistry(t)
	_, _, _ = r1.Register(context.Background(), "workers", "sha256:a", nil, nil)
	_, _, _ = r1.Register(context.Background(), "candidates", "sha256:b", nil, nil)

	// Build a second registry against the same store + rehydrate.
	r2 := NewRegistry(store)
	n, err := r2.Rehydrate(context.Background())
	if err != nil {
		t.Fatalf("Rehydrate: %v", err)
	}
	if n != 2 {
		t.Errorf("recovered %d, want 2", n)
	}
	if _, err := r2.Get("workers"); err != nil {
		t.Errorf("Get(workers): %v", err)
	}
	if _, err := r2.Get("candidates"); err != nil {
		t.Errorf("Get(candidates): %v", err)
	}
}

func TestList_Sorted(t *testing.T) {
	r, _ := mkRegistry(t)
	_, _, _ = r.Register(context.Background(), "zoo", "fp", nil, nil)
	_, _, _ = r.Register(context.Background(), "alpha", "fp", nil, nil)
	_, _, _ = r.Register(context.Background(), "midway", "fp", nil, nil)
	got := r.List()
	want := []string{"alpha", "midway", "zoo"}
	for i, m := range got {
		if m.Name != want[i] {
			t.Errorf("List[%d]: got %q, want %q", i, m.Name, want[i])
		}
	}
}

func TestRegister_RejectsEmptyInputs(t *testing.T) {
	r, _ := mkRegistry(t)
	_, _, err := r.Register(context.Background(), "", "fp", nil, nil)
	if err == nil {
		t.Error("expected error on empty name")
	}
	_, _, err = r.Register(context.Background(), "x", "", nil, nil)
	if err == nil {
		t.Error("expected error on empty fingerprint")
	}
}