root 01373c0e45 Phase 5: hardening — gRPC, observability, auth, config
- proto: lakehouse.proto with CatalogService, QueryService, StorageService, AiService
- proto crate: tonic-build codegen from proto definitions
- catalogd: gRPC CatalogService implementation
- gateway: dual HTTP (:3100) + gRPC (:3101) servers
- gateway: OpenTelemetry tracing with stdout exporter
- gateway: API key auth middleware (toggleable)
- shared: TOML config system with typed structs and defaults
- lakehouse.toml config file
- ADR-006 and ADR-007 documented

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-27 06:37:07 -05:00

96 lines
3.1 KiB
Rust

use proto::lakehouse::{
catalog_service_server::CatalogService,
CreateDatasetRequest, DatasetResponse, GetDatasetByNameRequest,
GetDatasetRequest, ListDatasetsRequest, ListDatasetsResponse,
ObjectRef as ProtoObjectRef,
};
use shared::types::{DatasetId, ObjectRef, SchemaFingerprint};
use tonic::{Request, Response, Status};
use uuid::Uuid;
use crate::registry::Registry;
pub struct CatalogGrpc {
registry: Registry,
}
impl CatalogGrpc {
pub fn new(registry: Registry) -> Self {
Self { registry }
}
}
fn manifest_to_proto(m: &shared::types::DatasetManifest) -> DatasetResponse {
DatasetResponse {
id: m.id.to_string(),
name: m.name.clone(),
schema_fingerprint: m.schema_fingerprint.0.clone(),
objects: m.objects.iter().map(|o| ProtoObjectRef {
bucket: o.bucket.clone(),
key: o.key.clone(),
size_bytes: o.size_bytes,
created_at: o.created_at.to_rfc3339(),
}).collect(),
created_at: m.created_at.to_rfc3339(),
updated_at: m.updated_at.to_rfc3339(),
}
}
#[tonic::async_trait]
impl CatalogService for CatalogGrpc {
async fn create_dataset(
&self,
request: Request<CreateDatasetRequest>,
) -> Result<Response<DatasetResponse>, Status> {
let req = request.into_inner();
let now = chrono::Utc::now();
let objects: Vec<ObjectRef> = req.objects.into_iter().map(|o| ObjectRef {
bucket: o.bucket,
key: o.key,
size_bytes: o.size_bytes,
created_at: now,
}).collect();
let manifest = self.registry
.register(req.name, SchemaFingerprint(req.schema_fingerprint), objects)
.await
.map_err(|e| Status::internal(e))?;
Ok(Response::new(manifest_to_proto(&manifest)))
}
async fn get_dataset(
&self,
request: Request<GetDatasetRequest>,
) -> Result<Response<DatasetResponse>, Status> {
let req = request.into_inner();
let uuid = Uuid::parse_str(&req.id).map_err(|e| Status::invalid_argument(e.to_string()))?;
let id = DatasetId(uuid);
match self.registry.get(&id).await {
Some(m) => Ok(Response::new(manifest_to_proto(&m))),
None => Err(Status::not_found(format!("dataset not found: {}", req.id))),
}
}
async fn get_dataset_by_name(
&self,
request: Request<GetDatasetByNameRequest>,
) -> Result<Response<DatasetResponse>, Status> {
let req = request.into_inner();
match self.registry.get_by_name(&req.name).await {
Some(m) => Ok(Response::new(manifest_to_proto(&m))),
None => Err(Status::not_found(format!("dataset not found: {}", req.name))),
}
}
async fn list_datasets(
&self,
_request: Request<ListDatasetsRequest>,
) -> Result<Response<ListDatasetsResponse>, Status> {
let datasets = self.registry.list().await;
let responses = datasets.iter().map(manifest_to_proto).collect();
Ok(Response::new(ListDatasetsResponse { datasets: responses }))
}
}