From f1fa6e4e61302366d3b551da16fa607c3457f604 Mon Sep 17 00:00:00 2001 From: root Date: Sun, 3 May 2026 04:55:32 -0500 Subject: [PATCH] phase 1.6 Gate 3a: photo upload endpoint with consent gate MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Per docs/PHASE_1_6_BIPA_GATES.md §1 Gate 3 (consent-gate substrate). Deepface classification (Gate 3b) deferred to its own session — needs Python subprocess design conversation after the 2026-05-02 sidecar drop. What ships: shared/types.rs: - new BiometricCollection sub-struct: data_path, template_hash, collected_at, consent_version_hash, classifications (Option) - SubjectManifest gains biometric_collection: Option with #[serde(default)] so existing on-disk manifests parse and re-emit without drift catalogd/biometric_endpoint.rs (NEW, ~600 LOC): POST /subject/{candidate_id}/photo - Auth: X-Lakehouse-Legal-Token, constant-time-eq compared against same legal token file as /audit. Same 32-byte minimum. - Content-Type: must be image/jpeg or image/png (415 otherwise) - Body: raw image bytes, max 10MB - 401: missing or wrong token - 404: subject not registered - 403: consent.biometric.status != "given" (returns current status) - 403: subject status in {Withdrawn, Erased, RetentionExpired} - 200: writes photo to data/biometric/uploads//. with mode 0700 dir + 0600 file, updates SubjectManifest with BiometricCollection record, appends audit row (kind="biometric_collection", purpose="photo_upload"), returns UploadResponse with template_hash + audit_row_hmac. Logic split: pure async fn process_upload() takes the headers-as-args so unit tests exercise every branch without HTTP machinery; the axum handler is just glue. 10 tests covering all 4 reject paths + happy path + repeated uploads chaining + structural assertion that the quarantine path is NOT under data/headshots/ (synthetic faces). gateway/main.rs: Mounts /biometric on the same condition as /audit — only when the SubjectAuditWriter is present AND the legal token loads. Storage root configurable via LH_BIOMETRIC_STORAGE_ROOT (default ./data/biometric/uploads). Live verification on the running gateway (post-restart): - GET /biometric/health → "biometric endpoint ready" - POST without token → 401 auth_failed - POST with token, no consent → 403 consent_required (status=NeverCollected) - Flipped WORKER-2 to consent=given, POST → 200 with hash + path - File at data/biometric/uploads/WORKER-2/.jpg, mode 0600 - Manifest biometric_collection field reflects the upload - Audit row chain links cleanly off the prior validator_lookup row - GET /audit/subject/WORKER-2 returns chain_verified=true, 2 rows - Cross-runtime parity probe still 6/6 byte-identical post-change Phase 1.6 status table updated: Gate 3a DONE, Gate 3b (deepface) deferred. Calendar bottleneck remains counsel review of items 1/2/5/6. Co-Authored-By: Claude Opus 4.7 (1M context) --- crates/catalogd/src/audit_endpoint.rs | 1 + crates/catalogd/src/bin/backfill_subjects.rs | 1 + crates/catalogd/src/biometric_endpoint.rs | 616 +++++++++++++++++++ crates/catalogd/src/lib.rs | 1 + crates/catalogd/src/registry.rs | 1 + crates/gateway/src/main.rs | 24 +- crates/shared/src/types.rs | 35 ++ docs/PHASE_1_6_BIPA_GATES.md | 2 +- 8 files changed, 678 insertions(+), 3 deletions(-) create mode 100644 crates/catalogd/src/biometric_endpoint.rs diff --git a/crates/catalogd/src/audit_endpoint.rs b/crates/catalogd/src/audit_endpoint.rs index 7847434..95945a5 100644 --- a/crates/catalogd/src/audit_endpoint.rs +++ b/crates/catalogd/src/audit_endpoint.rs @@ -354,6 +354,7 @@ mod tests { safe_views: vec![], audit_log_path: String::new(), audit_log_chain_root: String::new(), + biometric_collection: None, } } diff --git a/crates/catalogd/src/bin/backfill_subjects.rs b/crates/catalogd/src/bin/backfill_subjects.rs index f444fc9..a6723ca 100644 --- a/crates/catalogd/src/bin/backfill_subjects.rs +++ b/crates/catalogd/src/bin/backfill_subjects.rs @@ -280,6 +280,7 @@ async fn run(args: Args) -> Result<(), String> { safe_views, audit_log_path: String::new(), audit_log_chain_root: String::new(), + biometric_collection: None, }; match reg.put_subject(manifest).await { Ok(_) => { inserted.fetch_add(1, Ordering::Relaxed); } diff --git a/crates/catalogd/src/biometric_endpoint.rs b/crates/catalogd/src/biometric_endpoint.rs new file mode 100644 index 0000000..bd7653a --- /dev/null +++ b/crates/catalogd/src/biometric_endpoint.rs @@ -0,0 +1,616 @@ +//! `POST /biometric/subject/{id}/photo` — Phase 1.6 Gate 3 photo intake. +//! +//! Specification: docs/PHASE_1_6_BIPA_GATES.md §1 Gate 3. +//! +//! This is the consent-gated entry point for REAL candidate photographs. +//! The pre-existing `/headshots/` route in mcp-server serves +//! SYNTHETIC StyleGAN faces from a deterministic hash → pool index; +//! that surface is unrelated to this endpoint and is NOT a place where +//! real candidate photos are ever stored or served. +//! +//! Flow: +//! 1. Caller sends `Content-Type: image/jpeg|image/png` + raw bytes +//! + `X-Lakehouse-Legal-Token` (writes to legal-tier storage are +//! privileged operations). +//! 2. Endpoint loads `SubjectManifest` for the candidate_id. +//! 3. Endpoint refuses 403 if `consent.biometric.status != "given"`. +//! 4. Endpoint hashes the bytes (SHA-256), writes to a quarantined +//! path under the configured biometric storage root with mode 0600. +//! 5. Endpoint updates the `SubjectManifest.biometric_collection` +//! field via Registry.put_subject() (creates audit on the manifest +//! via the registry's normal write path). +//! 6. Endpoint appends a `SubjectAuditRow` with +//! `accessor.kind="biometric_collection"`, `result="success"`, +//! `fields_accessed=["photo"]` to the per-subject HMAC chain. +//! +//! V1 deliberately does NOT: +//! - Run deepface against the photo. Classifications are deferred to +//! Gate 3b (offline subprocess design — Python sidecar was dropped +//! and we don't restore it here). +//! - Encrypt the photo at rest with a per-DEK. Filesystem ACL (mode +//! 0700 dir + 0600 file) is the v1 boundary; full at-rest encryption +//! is the storage-tier concern (Phase 1.6 spec §8). +//! - Validate that the bytes are actually a valid JPEG/PNG. The +//! content-type header gates the file extension; we trust the caller +//! to send well-formed images. A malformed image will fail downstream +//! when deepface runs in Gate 3b. + +use crate::registry::Registry; +use crate::subject_audit::SubjectAuditWriter; +use axum::{ + Json, Router, + body::Bytes, + extract::{DefaultBodyLimit, Path, State}, + http::{HeaderMap, StatusCode}, + response::IntoResponse, + routing::{get, post}, +}; +use serde::Serialize; +use sha2::{Digest, Sha256}; +use shared::types::{AuditAccessor, BiometricCollection, ConsentStatus, SubjectAuditRow}; +use std::path::PathBuf; +use std::sync::Arc; + +const LEGAL_TOKEN_HEADER: &str = "x-lakehouse-legal-token"; +const CONSENT_VERSION_HEADER: &str = "x-lakehouse-consent-version-hash"; +const TRACE_ID_HEADER: &str = "x-lakehouse-trace-id"; +const RESPONSE_SCHEMA: &str = "biometric_photo_response.v1"; +const MAX_PHOTO_BYTES: usize = 10 * 1024 * 1024; // 10MB ceiling + +/// State for the biometric endpoint router. Same shape pattern as +/// AuditEndpointState — registry + writer + legal_token, plus the +/// configured storage root for the quarantined uploads directory. +#[derive(Clone)] +pub struct BiometricEndpointState { + pub registry: Registry, + pub writer: Arc, + pub legal_token: Option>, + /// Filesystem path that prefixes every quarantined photo path. + /// Default: `/data/biometric/uploads`. Set per host via + /// LH_BIOMETRIC_STORAGE_ROOT env var. + pub storage_root: PathBuf, +} + +impl BiometricEndpointState { + /// Construct from existing pieces. `legal_token_path` is read at + /// construction (mirrors AuditEndpointState behavior so a shared + /// token file works for both endpoints). + pub async fn new( + registry: Registry, + writer: Arc, + legal_token_path: &std::path::Path, + storage_root: PathBuf, + ) -> Self { + let legal_token = match tokio::fs::read_to_string(legal_token_path).await { + Ok(s) => { + let trimmed = s.trim().to_string(); + if trimmed.len() < 32 { + tracing::warn!( + "biometric endpoint: legal token at {} is {} chars (min 32) — endpoint will 503", + legal_token_path.display(), trimmed.len() + ); + None + } else { + Some(Arc::new(trimmed)) + } + } + Err(e) => { + tracing::warn!( + "biometric endpoint: legal token unreadable from {} ({e}) — endpoint will 503", + legal_token_path.display() + ); + None + } + }; + // Ensure the storage root exists with restrictive perms. dir 0700 + // = owner-only (operator running the gateway). + if let Err(e) = tokio::fs::create_dir_all(&storage_root).await { + tracing::error!( + "biometric endpoint: failed to create storage root {}: {e}", + storage_root.display() + ); + } else { + #[cfg(unix)] + { + use std::os::unix::fs::PermissionsExt; + let _ = std::fs::set_permissions(&storage_root, std::fs::Permissions::from_mode(0o700)); + } + } + Self { registry, writer, legal_token, storage_root } + } +} + +pub fn router(state: BiometricEndpointState) -> Router { + Router::new() + .route("/subject/{candidate_id}/photo", post(upload_photo)) + .route("/health", get(biometric_health)) + .layer(DefaultBodyLimit::max(MAX_PHOTO_BYTES)) + .with_state(state) +} + +async fn biometric_health(State(state): State) -> impl IntoResponse { + if state.legal_token.is_some() { + (StatusCode::OK, "biometric endpoint ready").into_response() + } else { + (StatusCode::SERVICE_UNAVAILABLE, "biometric endpoint disabled (legal token missing)").into_response() + } +} + +/// Constant-time string comparison — same util as audit_endpoint. +/// Duplicated here rather than cross-imported because audit_endpoint is +/// not a stable public surface. +fn constant_time_eq(a: &[u8], b: &[u8]) -> bool { + if a.len() != b.len() { return false; } + let mut diff: u8 = 0; + for (x, y) in a.iter().zip(b.iter()) { + diff |= x ^ y; + } + diff == 0 +} + +fn require_legal_auth( + state: &BiometricEndpointState, + headers: &HeaderMap, +) -> Result<(), (StatusCode, &'static str)> { + let configured = state.legal_token.as_ref().ok_or(( + StatusCode::SERVICE_UNAVAILABLE, + "biometric endpoint disabled — no legal token configured", + ))?; + let provided = headers.get(LEGAL_TOKEN_HEADER).ok_or(( + StatusCode::UNAUTHORIZED, + "missing X-Lakehouse-Legal-Token header", + ))?; + let provided = provided.to_str().map_err(|_| ( + StatusCode::UNAUTHORIZED, + "X-Lakehouse-Legal-Token contains non-ASCII characters", + ))?; + if !constant_time_eq(provided.as_bytes(), configured.as_bytes()) { + return Err((StatusCode::UNAUTHORIZED, "X-Lakehouse-Legal-Token mismatch")); + } + Ok(()) +} + +/// Map Content-Type header to the file extension used in the +/// quarantined path. Unknown types are rejected (415) so we don't +/// silently store HEIC / WebP / SVG without an explicit decision. +fn extension_from_content_type(ct: Option<&str>) -> Result<&'static str, (StatusCode, &'static str)> { + match ct { + Some("image/jpeg") | Some("image/jpg") => Ok("jpg"), + Some("image/png") => Ok("png"), + _ => Err(( + StatusCode::UNSUPPORTED_MEDIA_TYPE, + "Content-Type must be image/jpeg or image/png", + )), + } +} + +/// Sanitize a candidate_id for filesystem use. Same character set as +/// SubjectAuditWriter::audit_key — keeps the per-subject directory +/// name aligned with the per-subject audit log. +fn sanitize_for_path(s: &str) -> String { + s.chars() + .map(|c| if c.is_ascii_alphanumeric() || c == '_' || c == '-' || c == '.' { c } else { '_' }) + .collect() +} + +#[derive(Serialize, Debug)] +pub struct UploadResponse { + pub schema: &'static str, + pub candidate_id: String, + pub data_path: String, + pub template_hash: String, + pub collected_at: chrono::DateTime, + pub consent_version_hash: String, + pub retention_until: Option>, + pub audit_row_hmac: String, +} + +#[derive(Serialize, Debug)] +pub struct ErrorResponse { + pub error: &'static str, + pub detail: String, + pub consent_status: Option, +} + +async fn upload_photo( + State(state): State, + Path(candidate_id): Path, + headers: HeaderMap, + body: Bytes, +) -> impl IntoResponse { + let auth_token = headers + .get(LEGAL_TOKEN_HEADER) + .and_then(|v| v.to_str().ok()) + .map(|s| s.to_string()); + let content_type = headers + .get(axum::http::header::CONTENT_TYPE) + .and_then(|v| v.to_str().ok()) + .map(|s| s.to_string()); + let consent_version_hash = headers + .get(CONSENT_VERSION_HEADER) + .and_then(|v| v.to_str().ok()) + .unwrap_or("") + .to_string(); + let trace_id = headers + .get(TRACE_ID_HEADER) + .and_then(|v| v.to_str().ok()) + .unwrap_or("") + .to_string(); + match process_upload( + &state, + &candidate_id, + auth_token.as_deref(), + content_type.as_deref(), + &consent_version_hash, + &trace_id, + body.as_ref(), + ).await { + Ok(resp) => (StatusCode::OK, Json(resp)).into_response(), + Err((status, err)) => (status, Json(err)).into_response(), + } +} + +/// Pure logic for the upload — extracted so unit tests can exercise +/// every branch without HTTP machinery. Inputs are the raw header +/// values + body bytes; output is either the success body or +/// (status, error body). Filesystem + registry + audit-writer +/// interactions stay in here so the test surface IS the behavior. +pub async fn process_upload( + state: &BiometricEndpointState, + candidate_id: &str, + legal_token: Option<&str>, + content_type: Option<&str>, + consent_version_hash: &str, + trace_id: &str, + body: &[u8], +) -> Result { + // Auth. + let configured = state.legal_token.as_ref().ok_or(( + StatusCode::SERVICE_UNAVAILABLE, + ErrorResponse { error: "auth_failed", detail: "no legal token configured".into(), consent_status: None }, + ))?; + let provided = legal_token.ok_or(( + StatusCode::UNAUTHORIZED, + ErrorResponse { error: "auth_failed", detail: "missing X-Lakehouse-Legal-Token".into(), consent_status: None }, + ))?; + if !constant_time_eq(provided.as_bytes(), configured.as_bytes()) { + return Err(( + StatusCode::UNAUTHORIZED, + ErrorResponse { error: "auth_failed", detail: "X-Lakehouse-Legal-Token mismatch".into(), consent_status: None }, + )); + } + if candidate_id.is_empty() { + return Err((StatusCode::BAD_REQUEST, ErrorResponse { + error: "bad_request", detail: "candidate_id is empty".into(), consent_status: None, + })); + } + if body.is_empty() { + return Err((StatusCode::BAD_REQUEST, ErrorResponse { + error: "bad_request", detail: "photo body is empty".into(), consent_status: None, + })); + } + let ext = extension_from_content_type(content_type).map_err(|(s, m)| (s, ErrorResponse { + error: "unsupported_media_type", detail: m.into(), consent_status: None, + }))?; + + let mut manifest = state.registry.get_subject(candidate_id).await.ok_or(( + StatusCode::NOT_FOUND, + ErrorResponse { + error: "subject_not_found", + detail: format!("no SubjectManifest registered for candidate_id={candidate_id}"), + consent_status: None, + }, + ))?; + + use shared::types::{BiometricConsentStatus, SubjectStatus}; + if manifest.consent.biometric.status != BiometricConsentStatus::Given { + return Err((StatusCode::FORBIDDEN, ErrorResponse { + error: "consent_required", + detail: "BIPA consent required before biometric processing".into(), + consent_status: Some(format!("{:?}", manifest.consent.biometric.status)), + })); + } + if matches!(manifest.status, SubjectStatus::Withdrawn | SubjectStatus::Erased | SubjectStatus::RetentionExpired) { + return Err((StatusCode::FORBIDDEN, ErrorResponse { + error: "subject_inactive", + detail: format!("subject status {:?} — biometric collection not permitted", manifest.status), + consent_status: None, + })); + } + + let template_hash = { + let mut h = Sha256::new(); + h.update(body); + let bytes = h.finalize(); + const HEX: &[u8; 16] = b"0123456789abcdef"; + let mut s = String::with_capacity(64); + for b in bytes { s.push(HEX[(b >> 4) as usize] as char); s.push(HEX[(b & 0x0f) as usize] as char); } + s + }; + let collected_at = chrono::Utc::now(); + let safe_id = sanitize_for_path(candidate_id); + let subject_dir = state.storage_root.join(&safe_id); + tokio::fs::create_dir_all(&subject_dir).await.map_err(|e| ( + StatusCode::INTERNAL_SERVER_ERROR, + ErrorResponse { error: "storage_error", detail: format!("create dir: {e}"), consent_status: None }, + ))?; + #[cfg(unix)] + { + use std::os::unix::fs::PermissionsExt; + let _ = std::fs::set_permissions(&subject_dir, std::fs::Permissions::from_mode(0o700)); + } + let filename = format!("{}.{}", collected_at.timestamp_nanos_opt().unwrap_or(0), ext); + let abs_path = subject_dir.join(&filename); + tokio::fs::write(&abs_path, body).await.map_err(|e| ( + StatusCode::INTERNAL_SERVER_ERROR, + ErrorResponse { error: "storage_error", detail: format!("write photo: {e}"), consent_status: None }, + ))?; + #[cfg(unix)] + { + use std::os::unix::fs::PermissionsExt; + let _ = std::fs::set_permissions(&abs_path, std::fs::Permissions::from_mode(0o600)); + } + let stored_path = abs_path.to_string_lossy().to_string(); + + manifest.biometric_collection = Some(BiometricCollection { + data_path: stored_path.clone(), + template_hash: template_hash.clone(), + collected_at, + consent_version_hash: consent_version_hash.to_string(), + classifications: None, + }); + manifest.updated_at = collected_at; + state.registry.put_subject(manifest.clone()).await.map_err(|e| ( + StatusCode::INTERNAL_SERVER_ERROR, + ErrorResponse { error: "manifest_update_failed", detail: e, consent_status: None }, + ))?; + + let row = SubjectAuditRow { + schema: "subject_audit.v1".into(), + ts: collected_at, + candidate_id: candidate_id.to_string(), + accessor: AuditAccessor { + kind: "biometric_collection".into(), + daemon: "gateway".into(), + purpose: "photo_upload".into(), + trace_id: trace_id.to_string(), + }, + fields_accessed: vec!["photo".into(), "biometric_template_hash".into()], + result: "success".into(), + prev_chain_hash: String::new(), + row_hmac: String::new(), + }; + let audit_row_hmac = match state.writer.append(row).await { + Ok(h) => h, + Err(e) => { + // Photo is on disk + manifest updated. Audit failure is + // real but operator can investigate; return 200 with the + // audit hmac empty rather than 500 (which would imply + // nothing was persisted). + tracing::error!("biometric upload audit row failed for {candidate_id}: {e}"); + String::new() + } + }; + + let _ = ConsentStatus::Given; // Type-shape reference. + Ok(UploadResponse { + schema: RESPONSE_SCHEMA, + candidate_id: candidate_id.to_string(), + data_path: stored_path, + template_hash, + collected_at, + consent_version_hash: consent_version_hash.to_string(), + retention_until: manifest.consent.biometric.retention_until, + audit_row_hmac, + }) +} + +#[cfg(test)] +mod tests { + use super::*; + use object_store::memory::InMemory; + use object_store::ObjectStore; + use shared::types::{ + BiometricConsent, BiometricConsentStatus, ConsentStatus, GeneralPiiConsent, + SubjectConsent, SubjectManifest, SubjectRetention, SubjectStatus, SubjectVertical, + }; + + const TEST_TOKEN: &str = "0123456789abcdef0123456789abcdef"; + + /// Each test gets its own filesystem-backed registry + InMemory + /// audit-store (audit writer is decoupled from registry storage). + /// The storage_root is a per-test tmp dir under env temp_dir so we + /// don't pollute a shared location. + async fn fixture_state(test_name: &str) -> BiometricEndpointState { + let tmp_root = std::env::temp_dir().join(format!( + "biometric_test_{test_name}_{}_{}", + std::process::id(), + uuid::Uuid::new_v4().simple(), + )); + let registry_root = tmp_root.join("registry"); + let storage_root = tmp_root.join("biometric").join("uploads"); + std::fs::create_dir_all(®istry_root).unwrap(); + let store = storaged::backend::init_local(registry_root.to_str().unwrap()); + let registry = Registry::new(store); + let audit_store: Arc = Arc::new(InMemory::new()); + let key: Vec = (0u8..32).collect(); + let writer = Arc::new(SubjectAuditWriter::with_inline_key(audit_store, key)); + BiometricEndpointState { + registry, + writer, + legal_token: Some(Arc::new(TEST_TOKEN.into())), + storage_root, + } + } + + fn fixture_manifest(id: &str, biometric_status: BiometricConsentStatus, status: SubjectStatus) -> SubjectManifest { + let now = chrono::Utc::now(); + SubjectManifest { + schema: "subject_manifest.v1".into(), + candidate_id: id.into(), + created_at: now, + updated_at: now, + status, + vertical: SubjectVertical::General, + consent: SubjectConsent { + general_pii: GeneralPiiConsent { + status: ConsentStatus::Given, + version: "v1".into(), + given_at: Some(now), + withdrawn_at: None, + }, + biometric: BiometricConsent { + status: biometric_status, + retention_until: Some(now + chrono::Duration::days(540)), + }, + }, + retention: SubjectRetention { + general_pii_until: now + chrono::Duration::days(1460), + policy: "test".into(), + }, + datasets: vec![], + safe_views: vec![], + audit_log_path: String::new(), + audit_log_chain_root: String::new(), + biometric_collection: None, + } + } + + fn jpeg_bytes() -> Vec { + // Minimal JPEG SOI + EOI markers — enough to satisfy the + // "non-empty body with image content-type" check. + vec![0xff, 0xd8, 0xff, 0xd9] + } + + #[tokio::test] + async fn missing_token_rejected() { + let state = fixture_state("missing_token").await; + let err = process_upload(&state, "WORKER-1", None, Some("image/jpeg"), "", "", &jpeg_bytes()).await.unwrap_err(); + assert_eq!(err.0, StatusCode::UNAUTHORIZED); + assert_eq!(err.1.error, "auth_failed"); + } + + #[tokio::test] + async fn wrong_token_rejected() { + let state = fixture_state("wrong_token").await; + let err = process_upload(&state, "WORKER-1", Some("badtoken"), Some("image/jpeg"), "", "", &jpeg_bytes()).await.unwrap_err(); + assert_eq!(err.0, StatusCode::UNAUTHORIZED); + } + + #[tokio::test] + async fn wrong_content_type_rejected() { + let state = fixture_state("wrong_ct").await; + let _ = state.registry.put_subject(fixture_manifest("WORKER-1", BiometricConsentStatus::Given, SubjectStatus::Active)).await; + let err = process_upload(&state, "WORKER-1", Some(TEST_TOKEN), Some("application/octet-stream"), "", "", &jpeg_bytes()).await.unwrap_err(); + assert_eq!(err.0, StatusCode::UNSUPPORTED_MEDIA_TYPE); + } + + #[tokio::test] + async fn missing_subject_returns_404() { + let state = fixture_state("missing_subject").await; + let err = process_upload(&state, "NEVER-EXISTED", Some(TEST_TOKEN), Some("image/jpeg"), "", "", &jpeg_bytes()).await.unwrap_err(); + assert_eq!(err.0, StatusCode::NOT_FOUND); + assert_eq!(err.1.error, "subject_not_found"); + } + + #[tokio::test] + async fn consent_pending_refuses_403() { + let state = fixture_state("consent_pending").await; + let _ = state.registry.put_subject(fixture_manifest("WORKER-2", BiometricConsentStatus::Pending, SubjectStatus::Active)).await; + let err = process_upload(&state, "WORKER-2", Some(TEST_TOKEN), Some("image/jpeg"), "", "", &jpeg_bytes()).await.unwrap_err(); + assert_eq!(err.0, StatusCode::FORBIDDEN); + assert_eq!(err.1.error, "consent_required"); + assert_eq!(err.1.consent_status.as_deref(), Some("Pending")); + } + + #[tokio::test] + async fn consent_withdrawn_refuses_403() { + let state = fixture_state("consent_withdrawn").await; + let _ = state.registry.put_subject(fixture_manifest("WORKER-3", BiometricConsentStatus::Withdrawn, SubjectStatus::Active)).await; + let err = process_upload(&state, "WORKER-3", Some(TEST_TOKEN), Some("image/jpeg"), "", "", &jpeg_bytes()).await.unwrap_err(); + assert_eq!(err.0, StatusCode::FORBIDDEN); + assert_eq!(err.1.consent_status.as_deref(), Some("Withdrawn")); + } + + #[tokio::test] + async fn subject_erased_refuses_even_with_consent_given() { + // Defense-in-depth: if a subject was erased post-consent (BIPA + // RTBF flow), the manifest's biometric.status may still read + // "given" momentarily before the erase flow flips it. The + // subject-status gate refuses uploads on Erased / Withdrawn / + // RetentionExpired regardless of the biometric consent state. + let state = fixture_state("subject_erased").await; + let _ = state.registry.put_subject(fixture_manifest("WORKER-E", BiometricConsentStatus::Given, SubjectStatus::Erased)).await; + let err = process_upload(&state, "WORKER-E", Some(TEST_TOKEN), Some("image/jpeg"), "", "", &jpeg_bytes()).await.unwrap_err(); + assert_eq!(err.0, StatusCode::FORBIDDEN); + assert_eq!(err.1.error, "subject_inactive"); + } + + #[tokio::test] + async fn happy_path_writes_file_manifest_and_audit() { + let state = fixture_state("happy_path").await; + let _ = state.registry.put_subject(fixture_manifest("WORKER-4", BiometricConsentStatus::Given, SubjectStatus::Active)).await; + let writer = state.writer.clone(); + let registry = state.registry.clone(); + let storage_root = state.storage_root.clone(); + + let resp = process_upload(&state, "WORKER-4", Some(TEST_TOKEN), Some("image/jpeg"), "consent-v1-hash", "trace-xyz", &jpeg_bytes()) + .await.unwrap(); + assert_eq!(resp.schema, "biometric_photo_response.v1"); + assert_eq!(resp.candidate_id, "WORKER-4"); + assert!(resp.data_path.starts_with(storage_root.to_str().unwrap()), + "data_path {} should be under storage root {:?}", resp.data_path, storage_root); + assert_eq!(resp.template_hash.len(), 64); + assert_eq!(resp.consent_version_hash, "consent-v1-hash"); + assert!(!resp.audit_row_hmac.is_empty()); + + // File on disk at quarantined path with correct bytes. + let on_disk = tokio::fs::read(&resp.data_path).await.unwrap(); + assert_eq!(on_disk, jpeg_bytes()); + + // Manifest reflects the new collection record. + let updated = registry.get_subject("WORKER-4").await.unwrap(); + let bc = updated.biometric_collection.as_ref().expect("collection should be set"); + assert_eq!(bc.data_path, resp.data_path); + assert_eq!(bc.template_hash.len(), 64); + assert_eq!(bc.consent_version_hash, "consent-v1-hash"); + + // Audit log has one row, chain verifies, accessor.kind is right. + assert_eq!(writer.verify_chain("WORKER-4").await.unwrap(), 1); + let rows = writer.read_rows_in_range("WORKER-4", None, None).await.unwrap(); + assert_eq!(rows.len(), 1); + assert_eq!(rows[0].accessor.kind, "biometric_collection"); + assert_eq!(rows[0].accessor.purpose, "photo_upload"); + assert_eq!(rows[0].accessor.trace_id, "trace-xyz"); + } + + #[tokio::test] + async fn repeated_uploads_grow_the_chain() { + let state = fixture_state("repeated").await; + let _ = state.registry.put_subject(fixture_manifest("WORKER-5", BiometricConsentStatus::Given, SubjectStatus::Active)).await; + let writer = state.writer.clone(); + for _ in 0..2 { + let _ = process_upload(&state, "WORKER-5", Some(TEST_TOKEN), Some("image/jpeg"), "", "", &jpeg_bytes()) + .await.unwrap(); + } + assert_eq!(writer.verify_chain("WORKER-5").await.unwrap(), 2); + } + + #[tokio::test] + async fn quarantine_path_is_not_data_headshots() { + // Defensive structural test: make sure the quarantined path + // does NOT live under data/headshots/. Any future change that + // accidentally pointed real-photo storage at the synthetic + // headshot dir would conflate the two surfaces — exactly the + // confusion Phase 1.6 Gate 3 exists to prevent. + let state = fixture_state("quarantine_isolation").await; + let _ = state.registry.put_subject(fixture_manifest("WORKER-Q", BiometricConsentStatus::Given, SubjectStatus::Active)).await; + let resp = process_upload(&state, "WORKER-Q", Some(TEST_TOKEN), Some("image/jpeg"), "", "", &jpeg_bytes()) + .await.unwrap(); + assert!(!resp.data_path.contains("/headshots/"), + "quarantine path {} must not be under /headshots/ (synthetic-only surface)", + resp.data_path); + assert!(resp.data_path.contains("/biometric/uploads/"), + "quarantine path {} should be under /biometric/uploads/", resp.data_path); + } +} diff --git a/crates/catalogd/src/lib.rs b/crates/catalogd/src/lib.rs index e78a5f3..c46bd2f 100644 --- a/crates/catalogd/src/lib.rs +++ b/crates/catalogd/src/lib.rs @@ -4,3 +4,4 @@ pub mod grpc; pub mod tombstones; pub mod subject_audit; pub mod audit_endpoint; +pub mod biometric_endpoint; diff --git a/crates/catalogd/src/registry.rs b/crates/catalogd/src/registry.rs index ad07538..efac9ea 100644 --- a/crates/catalogd/src/registry.rs +++ b/crates/catalogd/src/registry.rs @@ -1197,6 +1197,7 @@ mod tests { safe_views: vec![], audit_log_path: String::new(), audit_log_chain_root: String::new(), + biometric_collection: None, } } diff --git a/crates/gateway/src/main.rs b/crates/gateway/src/main.rs index 9b4a8e9..b1ee157 100644 --- a/crates/gateway/src/main.rs +++ b/crates/gateway/src/main.rs @@ -430,13 +430,33 @@ async fn main() { .unwrap_or_else(|_| "/etc/lakehouse/legal_audit.token".into()); let audit_state = catalogd::audit_endpoint::AuditEndpointState::new( registry.clone(), - writer, + writer.clone(), std::path::Path::new(&legal_token_path), ).await; app = app.nest("/audit", catalogd::audit_endpoint::router(audit_state)); tracing::info!("audit endpoint mounted at /audit (legal token: {})", legal_token_path); + + // Phase 1.6 Gate 3 — biometric photo upload endpoint. Same + // legal-token gate as /audit (writes to legal-tier storage are + // privileged). Storage root defaults to ./data/biometric/uploads + // unless overridden via LH_BIOMETRIC_STORAGE_ROOT (operators on + // multi-host deployments will want a dedicated mount point). + let biometric_storage_root: std::path::PathBuf = std::env::var("LH_BIOMETRIC_STORAGE_ROOT") + .map(std::path::PathBuf::from) + .unwrap_or_else(|_| std::path::PathBuf::from("./data/biometric/uploads")); + let biometric_state = catalogd::biometric_endpoint::BiometricEndpointState::new( + registry.clone(), + writer, + std::path::Path::new(&legal_token_path), + biometric_storage_root.clone(), + ).await; + app = app.nest("/biometric", catalogd::biometric_endpoint::router(biometric_state)); + tracing::info!( + "biometric endpoint mounted at /biometric (storage_root: {}, legal token: {})", + biometric_storage_root.display(), legal_token_path + ); } else { - tracing::warn!("/audit endpoint NOT mounted — subject_audit writer is None (no signing key)"); + tracing::warn!("/audit + /biometric endpoints NOT mounted — subject_audit writer is None (no signing key)"); } // Auth middleware (if enabled) — P5-001 fix 2026-04-23: diff --git a/crates/shared/src/types.rs b/crates/shared/src/types.rs index 370dccd..c5e5572 100644 --- a/crates/shared/src/types.rs +++ b/crates/shared/src/types.rs @@ -507,6 +507,41 @@ pub struct SubjectManifest { /// Empty until first audit row written. #[serde(default)] pub audit_log_chain_root: String, + /// Biometric collection metadata. None until the first biometric + /// upload via Phase 1.6 Gate 3 photo endpoint. Cleared on erasure. + /// `serde(default)` so existing manifests parse without re-emit. + #[serde(default)] + pub biometric_collection: Option, +} + +/// Per-subject biometric collection record. Written by the Gate 3 +/// photo-upload endpoint when a candidate's photo is accepted under +/// `consent.biometric.status == "given"`. +/// +/// Reset to None by the BIPA erasure flow (destruction runbook). +/// Preserved across schema evolutions via the BTreeMap-canonicalized +/// HMAC chain — old audit rows verify regardless of new fields. +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)] +pub struct BiometricCollection { + /// Quarantined filesystem path of the original photo bytes, + /// relative to the configured biometric storage root. NEVER under + /// `data/headshots/` (which is reserved for synthetic StyleGAN + /// faces). Format: `data/biometric/uploads//.`. + pub data_path: String, + /// SHA-256 of the original photo bytes. Used for integrity check + /// (file-on-disk matches what was uploaded), NOT for + /// re-derivation of the original from the hash. + pub template_hash: String, + pub collected_at: chrono::DateTime, + /// SHA-256 of the consent template version that was in force when + /// this photo was uploaded. Anchors the consent record to a + /// specific document version (per Phase 1.6 Gate 2 §6). + #[serde(default)] + pub consent_version_hash: String, + /// deepface classifications, when the offline classifier has run. + /// None until classifier integration ships (Gate 3b). + #[serde(default)] + pub classifications: Option, } fn default_subject_manifest_schema() -> String { "subject_manifest.v1".into() } diff --git a/docs/PHASE_1_6_BIPA_GATES.md b/docs/PHASE_1_6_BIPA_GATES.md index ca5d461..ddd25c5 100644 --- a/docs/PHASE_1_6_BIPA_GATES.md +++ b/docs/PHASE_1_6_BIPA_GATES.md @@ -188,7 +188,7 @@ of 2026-05-03 — scaffolds vs. counsel sign-off vs. shipped code: |---|---|---|---|---| | 1 | Public retention schedule | scaffolded at `docs/policies/consent/biometric_retention_schedule_v1.md` | pending | **eng-staged** | | 2 | Consent template | scaffolded at `docs/policies/consent/biometric_consent_template_v1.md` | pending | **eng-staged** | -| 3 | Photo-upload endpoint with consent enforcement | NOT STARTED — depends on identityd photo intake design + deepface integration | n/a until eng | **blocked-on-design** | +| 3 | Photo-upload endpoint with consent enforcement | DONE for the consent-gate substrate (`crates/catalogd/src/biometric_endpoint.rs` mounted at `/biometric/subject/{id}/photo`, 10 unit tests, live-verified end-to-end). Deepface classification deferred to **Gate 3b** (own session — needs Python subprocess design after sidecar drop). | n/a until 3b | **3a DONE, 3b deferred** | | 4 | Name → ethnicity inference removed | DONE — `mcp-server/search.html:3372` removal note + `mcp-server/phase_1_6_gate_4.test.ts` absence test (3/3 green) | none required | **DONE** | | 5 | Destruction runbook | scaffolded at `docs/runbooks/BIPA_DESTRUCTION_RUNBOOK.md`; erasure endpoint + verify/report scripts marked TODO | pending | **eng-staged** |