// Package queryd is the SQL execution surface — a DuckDB engine that // reads Parquet directly from S3-compatible storage (MinIO in dev) // via DuckDB's httpfs extension. Views are registered from catalogd // manifests so user SQL just references dataset names. // // db.go owns the *sql.DB lifecycle: a custom Connector with a // bootstrapper that runs INSTALL httpfs / LOAD httpfs / CREATE OR // REPLACE SECRET on every new connection, plus a SetMaxOpenConns(1) // pin so the registrar's CREATE VIEWs and the handler's user SQL // serialize through one connection (avoids cross-connection visibility // edge cases for G0; lift to a pool when concurrency wins matter). package queryd import ( "context" "database/sql" "database/sql/driver" "fmt" "strings" "github.com/duckdb/duckdb-go/v2" "git.agentview.dev/profit/golangLAKEHOUSE/internal/secrets" "git.agentview.dev/profit/golangLAKEHOUSE/internal/shared" ) // OpenDB returns a *sql.DB backed by an in-memory DuckDB whose // connections are pre-loaded with httpfs + an S3 secret derived from // the shared S3 config + secrets provider. The bucket parameter names // the logical bucket whose credentials are pulled. // // Caller is responsible for Close()ing the returned db. func OpenDB(ctx context.Context, s3 shared.S3Config, prov secrets.Provider, bucketLogicalName string) (*sql.DB, error) { creds, err := prov.S3Credentials(bucketLogicalName) if err != nil { return nil, fmt.Errorf("queryd: secrets: %w", err) } bootstrap := buildBootstrap(s3, creds) // Stable labels per bootstrap statement. Per scrum B-LEAK (Kimi): // the prior firstLine(stmt) truncated CREATE OR REPLACE SECRET // to 80 chars, which contained both KEY_ID and the start of // SECRET — a log aggregator would capture credentials. Using // stable labels avoids putting the SQL into the error path at all. labels := []string{"install httpfs", "load httpfs", "create secret"} connector, err := duckdb.NewConnector("", func(execer driver.ExecerContext) error { // Per scrum B-CTX (Opus): use Background() inside the bootstrap // closure rather than capturing the OpenDB-call ctx. The // connector callback runs on EVERY new physical connection, // including reconnects long after OpenDB returned. A captured // short-lived ctx would silently fail every reconnect's // bootstrap. The passed ctx is only for the initial Ping below. for i, stmt := range bootstrap { if _, err := execer.ExecContext(context.Background(), stmt, nil); err != nil { return fmt.Errorf("queryd bootstrap %s: %s", labels[i], redactCreds(err.Error(), creds)) } } return nil }) if err != nil { return nil, fmt.Errorf("queryd: new connector: %w", err) } db := sql.OpenDB(connector) // One connection: the registrar's view CREATEs are visible to the // query handler's SELECTs without worrying about MVCC/timing. // Lift this for G2+ when concurrent reads matter. db.SetMaxOpenConns(1) if err := db.PingContext(ctx); err != nil { _ = db.Close() return nil, fmt.Errorf("queryd: ping: %w", err) } return db, nil } // buildBootstrap returns the SQL statements that initialize a fresh // DuckDB connection: install + load httpfs, then create-or-replace // the unnamed S3 secret. Endpoint is normalized — DuckDB wants // `host:port` without the scheme; the http/https split is controlled // by USE_SSL. func buildBootstrap(s3 shared.S3Config, creds secrets.S3Credentials) []string { endpoint := s3.Endpoint useSSL := true switch { case strings.HasPrefix(endpoint, "http://"): endpoint = strings.TrimPrefix(endpoint, "http://") useSSL = false case strings.HasPrefix(endpoint, "https://"): endpoint = strings.TrimPrefix(endpoint, "https://") useSSL = true } urlStyle := "vhost" if s3.UsePathStyle { urlStyle = "path" } // CREATE OR REPLACE so a reconnect doesn't error on the existing // secret. Single-quoted SQL string literals; we escape ' → '' for // belt-and-braces (creds shouldn't contain ' but a future SSO // token might). createSecret := fmt.Sprintf( "CREATE OR REPLACE SECRET (TYPE S3, KEY_ID '%s', SECRET '%s', REGION '%s', ENDPOINT '%s', URL_STYLE '%s', USE_SSL %t)", sqlEscape(creds.AccessKeyID), sqlEscape(creds.SecretAccessKey), sqlEscape(s3.Region), sqlEscape(endpoint), sqlEscape(urlStyle), useSSL, ) return []string{ "INSTALL httpfs", "LOAD httpfs", createSecret, } } // sqlEscape doubles single quotes in a SQL string literal value. // SQL identifier escaping (doubling " for quoted identifiers) lives // next to its use site in registrar.go. func sqlEscape(v string) string { return strings.ReplaceAll(v, "'", "''") } // redactCreds replaces known credential values inside an error // message with placeholder tokens. DuckDB's parser/exec error may // echo the offending statement; this scrubs the secret values // regardless. Per scrum B-LEAK (Kimi): a 503 from a typo in CREATE // SECRET would otherwise drop both KEY_ID and SECRET into the log // pipeline. func redactCreds(msg string, creds secrets.S3Credentials) string { if creds.AccessKeyID != "" { msg = strings.ReplaceAll(msg, creds.AccessKeyID, "[REDACTED-KEY]") } if creds.SecretAccessKey != "" { msg = strings.ReplaceAll(msg, creds.SecretAccessKey, "[REDACTED-SECRET]") } return msg }