// registrar.go — turn catalogd's manifest list into DuckDB views. // // Refresh() reads /catalog/list, runs CREATE OR REPLACE VIEW for // every manifest, drops views for manifests that disappeared, and // skips any whose updated_at hasn't changed since the prior refresh // (per kickoff D5.3 spec — "don't re-CREATE on every request, // Opus review flagged that as the perf cliff"). // // Identifier safety: all view names are double-quoted in SQL with // internal " escaped to "" — catalogd accepts user-supplied dataset // names that wouldn't pass SQL bare-identifier rules (hyphens, // digit-leading, reserved words). Quoting makes them unambiguous. package queryd import ( "context" "database/sql" "errors" "fmt" "strings" "time" "git.agentview.dev/profit/golangLAKEHOUSE/internal/catalogd" ) // CatalogLister is what registrar needs from the catalog client — // only List, no Register. Defined as an interface so unit tests can // inject a fake without spinning up a real catalogd. type CatalogLister interface { List(ctx context.Context) ([]*catalogd.Manifest, error) } // Execer is the subset of *sql.DB that registrar actually uses. // Same testability win — unit tests can record the SQL strings // without booting DuckDB. *sql.DB satisfies this directly. type Execer interface { ExecContext(ctx context.Context, query string, args ...any) (sql.Result, error) } // Registrar holds the running view state and the dependencies needed // to refresh it. type Registrar struct { exec Execer catalog CatalogLister bucket string // known tracks the current view names + the updated_at we last // rebuilt them from. Used by Refresh to skip unchanged views and // drop ones that disappeared from the catalog. known map[string]time.Time } // NewRegistrar builds a fresh registrar. bucket is the S3 bucket // name used to construct s3:// URLs for read_parquet(). func NewRegistrar(exec Execer, catalog CatalogLister, bucket string) *Registrar { return &Registrar{ exec: exec, catalog: catalog, bucket: bucket, known: make(map[string]time.Time), } } // RefreshStats reports what Refresh did — for logs + tests. type RefreshStats struct { Created int Updated int Dropped int Skipped int } // Refresh syncs DuckDB's view catalog with catalogd's manifest list. // Returns counts of {Created, Updated, Dropped, Skipped} so the // caller can decide whether to log loudly. // // Per scrum C1 (Opus + Kimi convergent): drop pass runs BEFORE create // pass so a failure in one view's CREATE doesn't block another view's // DROP. Per-iteration errors are collected and the refresh continues // — a single poison manifest must not block the whole catalog from // re-syncing. The collected errors are joined into the return value; // callers see "everything that happened to fail this round." func (r *Registrar) Refresh(ctx context.Context) (RefreshStats, error) { var stats RefreshStats manifests, err := r.catalog.List(ctx) if err != nil { return stats, fmt.Errorf("registrar list: %w", err) } wantNames := make(map[string]struct{}, len(manifests)) for _, m := range manifests { wantNames[m.Name] = struct{}{} } var errs []error // 1. Drop views whose manifests disappeared. Run first so the // create pass can't block removals on a poison-create error. for name := range r.known { if _, still := wantNames[name]; still { continue } if err := r.dropView(ctx, name); err != nil { errs = append(errs, err) continue } delete(r.known, name) stats.Dropped++ } // 2. Create / update views for current manifests. for _, m := range manifests { prior, exists := r.known[m.Name] if exists && prior.Equal(m.UpdatedAt) { stats.Skipped++ continue } if err := r.createOrReplaceView(ctx, m); err != nil { errs = append(errs, err) continue } if exists { stats.Updated++ } else { stats.Created++ } r.known[m.Name] = m.UpdatedAt } if len(errs) > 0 { return stats, errors.Join(errs...) } return stats, nil } // createOrReplaceView builds the DuckDB SQL for one manifest and // runs it. G0 manifests have exactly one Object per dataset; if more // land later (G2 multi-part), `read_parquet([...])` accepts a list. func (r *Registrar) createOrReplaceView(ctx context.Context, m *catalogd.Manifest) error { if len(m.Objects) == 0 { return fmt.Errorf("registrar: manifest %q has no objects", m.Name) } urls := make([]string, len(m.Objects)) for i, obj := range m.Objects { urls[i] = fmt.Sprintf("'%s'", sqlEscape(buildS3URL(r.bucket, obj.Key))) } var fromExpr string if len(urls) == 1 { fromExpr = "read_parquet(" + urls[0] + ")" } else { fromExpr = "read_parquet([" + strings.Join(urls, ", ") + "])" } sql := fmt.Sprintf("CREATE OR REPLACE VIEW %s AS SELECT * FROM %s", quoteIdent(m.Name), fromExpr) if _, err := r.exec.ExecContext(ctx, sql); err != nil { return fmt.Errorf("create view %q: %w", m.Name, err) } return nil } func (r *Registrar) dropView(ctx context.Context, name string) error { sql := "DROP VIEW IF EXISTS " + quoteIdent(name) if _, err := r.exec.ExecContext(ctx, sql); err != nil { return fmt.Errorf("drop view %q: %w", name, err) } return nil } // quoteIdent wraps a SQL identifier in double quotes and escapes // internal " by doubling. This is the SQL-standard rule — works in // every engine that accepts quoted identifiers. func quoteIdent(name string) string { return `"` + strings.ReplaceAll(name, `"`, `""`) + `"` } // buildS3URL composes the s3://bucket/key URL DuckDB's httpfs // extension consumes. Keys aren't URL-escaped because read_parquet // accepts S3-style literal paths. func buildS3URL(bucket, key string) string { return "s3://" + bucket + "/" + key }