lakehouse/scripts/staffing/subject_timeline.sh

#!/usr/bin/env bash
# subject_timeline — pretty-print a subject's full BIPA lifecycle.
#
# Specification: docs/specs/SUBJECT_MANIFESTS_ON_CATALOGD.md §6
#                + docs/runbooks/BIPA_DESTRUCTION_RUNBOOK.md §3.
#
# Why this exists: when an operator gets a question like "what
# happened to candidate X's biometric data" — counsel inquiry,
# subject access request, or just routine triage — they need a
# one-shot view of the full lineage. /audit/subject/{id} returns
# the raw JSON; this wraps it in a human-readable timeline.
#
# Output:
#   - Manifest summary (status, biometric status, retention_until)
#   - Audit chain (chronological, kind + result + ts + hmac prefix)
#   - Chain verification status (HMAC chain integrity)
#   - On-disk photo presence + size if applicable
#
# Usage:
#   subject_timeline.sh <candidate_id>
#
# Environment:
#   GATEWAY_URL        — default http://localhost:3100
#   LEGAL_TOKEN_FILE   — default /etc/lakehouse/legal_audit.token
#   UPLOADS_ROOT       — default data/biometric/uploads (relative to repo)
#
# Exit codes:
#   0 — timeline printed (chain may or may not verify; that's a fact, not a script error)
#   1 — chain verification failed (still prints, but flagged)
#   2 — script error (missing tools, network failure, bad token, subject not found)

set -uo pipefail
cd "$(dirname "$0")/../.."

if [ "$#" -lt 1 ]; then
  echo "usage: subject_timeline.sh <candidate_id>" >&2
  exit 2
fi

CANDIDATE_ID="$1"
GATEWAY_URL="${GATEWAY_URL:-http://localhost:3100}"
LEGAL_TOKEN_FILE="${LEGAL_TOKEN_FILE:-/etc/lakehouse/legal_audit.token}"
UPLOADS_ROOT="${UPLOADS_ROOT:-data/biometric/uploads}"

for cmd in curl jq; do
  if ! command -v "$cmd" >/dev/null 2>&1; then
    echo "[timeline] FAIL: required tool '$cmd' not found" >&2
    exit 2
  fi
done

if [ ! -r "$LEGAL_TOKEN_FILE" ]; then
  echo "[timeline] FAIL: cannot read legal token at $LEGAL_TOKEN_FILE" >&2
  exit 2
fi
LEGAL_TOKEN=$(tr -d '[:space:]' < "$LEGAL_TOKEN_FILE")
[ -n "$LEGAL_TOKEN" ] || { echo "[timeline] FAIL: legal token file is empty" >&2; exit 2; }

# safe_id matches catalogd::biometric_endpoint::sanitize_for_path
SAFE_ID=$(printf '%s' "$CANDIDATE_ID" | sed 's/[^A-Za-z0-9_.\-]/_/g')

RESP_FILE=$(mktemp)
trap 'rm -f "$RESP_FILE"' EXIT
HTTP_CODE=$(curl -sS -o "$RESP_FILE" -w '%{http_code}' \
  -H "X-Lakehouse-Legal-Token: $LEGAL_TOKEN" \
  -H "Accept: application/json" \
  "$GATEWAY_URL/audit/subject/$CANDIDATE_ID")

if [ "$HTTP_CODE" != "200" ]; then
  echo "[timeline] FAIL: GET /audit/subject/$CANDIDATE_ID returned HTTP $HTTP_CODE" >&2
  echo "[timeline] response:" >&2
  cat "$RESP_FILE" >&2
  echo >&2
  exit 2
fi

# ── Header ──────────────────────────────────────────────────────────
printf '\n'
printf '═══ Subject Timeline — %s ═══\n' "$CANDIDATE_ID"
printf '\n'

# ── Manifest summary ───────────────────────────────────────────────
printf 'Manifest\n'
printf '  candidate_id      : %s\n' "$(jq -r '.manifest.candidate_id' < "$RESP_FILE")"
printf '  subject status    : %s\n' "$(jq -r '.manifest.status' < "$RESP_FILE")"
printf '  vertical          : %s\n' "$(jq -r '.manifest.vertical' < "$RESP_FILE")"
printf '  general_pii       : %s (until %s)\n' \
  "$(jq -r '.manifest.consent.general_pii.status' < "$RESP_FILE")" \
  "$(jq -r '.manifest.retention.general_pii_until' < "$RESP_FILE")"
printf '  biometric         : %s\n' "$(jq -r '.manifest.consent.biometric.status' < "$RESP_FILE")"
RET=$(jq -r '.manifest.consent.biometric.retention_until // "—"' < "$RESP_FILE")
printf '  biometric retent. : %s\n' "$RET"
BC_PRESENT=$(jq -r '.manifest.biometric_collection != null' < "$RESP_FILE")
if [ "$BC_PRESENT" = "true" ]; then
  printf '  photo data_path   : %s\n' "$(jq -r '.manifest.biometric_collection.data_path' < "$RESP_FILE")"
  printf '  photo template    : %s\n' "$(jq -r '.manifest.biometric_collection.template_hash' < "$RESP_FILE")"
  printf '  photo collected   : %s\n' "$(jq -r '.manifest.biometric_collection.collected_at' < "$RESP_FILE")"
  printf '  consent_ver_hash  : %s\n' "$(jq -r '.manifest.biometric_collection.consent_version_hash' < "$RESP_FILE")"
fi

# ── On-disk photo state ────────────────────────────────────────────
printf '\nOn disk\n'
PHOTO_DIR="$UPLOADS_ROOT/$SAFE_ID"
if [ -d "$PHOTO_DIR" ]; then
  COUNT=$(find "$PHOTO_DIR" -maxdepth 1 -type f 2>/dev/null | wc -l | tr -d '[:space:]')
  printf '  uploads dir       : %s (%s file(s))\n' "$PHOTO_DIR" "${COUNT:-0}"
  if [ "${COUNT:-0}" != "0" ]; then
    while IFS= read -r f; do
      printf '    - %s (%s bytes)\n' "$f" "$(stat -c '%s' "$f" 2>/dev/null || echo '?')"
    done < <(find "$PHOTO_DIR" -maxdepth 1 -type f 2>/dev/null)
  fi
else
  printf '  uploads dir       : %s (absent)\n' "$PHOTO_DIR"
fi

# ── Audit chain ────────────────────────────────────────────────────
printf '\nAudit chain\n'
ROWS_TOTAL=$(jq -r '.audit_log.chain_rows_total' < "$RESP_FILE")
VERIFIED=$(jq -r '.audit_log.chain_verified' < "$RESP_FILE")
ROOT=$(jq -r '.audit_log.chain_root // "—"' < "$RESP_FILE")
ERROR=$(jq -r '.audit_log.chain_verification_error // ""' < "$RESP_FILE")
printf '  rows total        : %s\n' "$ROWS_TOTAL"
printf '  verified          : %s\n' "$VERIFIED"
printf '  chain root (last) : %s\n' "$ROOT"
if [ -n "$ERROR" ] && [ "$ERROR" != "null" ]; then
  printf '  verification err  : %s\n' "$ERROR"
fi

if [ "$ROWS_TOTAL" != "0" ]; then
  printf '\n  events (chronological):\n'
  jq -r '
    .audit_log.rows
    | sort_by(.ts)
    | .[]
    | "    \(.ts) | \(.accessor.kind | ascii_upcase) | result=\(.result) | hmac=\(.row_hmac[0:16])… | trace=\(.accessor.trace_id // "—")"
  ' < "$RESP_FILE"
fi

# ── Footer ─────────────────────────────────────────────────────────
printf '\n'
if [ "$VERIFIED" = "true" ]; then
  printf 'Status: chain verified end-to-end.\n'
  printf '\n'
  exit 0
else
  printf 'Status: CHAIN VERIFICATION FAILED. Investigate before quoting this timeline\n'
  printf '        in any external response. Likely causes: post-rotation legacy chain\n'
  printf '        (expected) or actual tampering (escalate to engineering + counsel).\n'
  printf '\n'
  exit 1
fi