#!/usr/bin/env bash # biometric_destruction_report — monthly destruction event aggregation. # # Specification: docs/runbooks/BIPA_DESTRUCTION_RUNBOOK.md §5. # Spec: docs/PHASE_1_6_BIPA_GATES.md §1 Gate 5. # # Why this exists: counsel and operations review need a periodic # attestation that destructions have happened in a defensible cadence. # This script produces an anonymized monthly report aggregating # per-subject audit logs. # # Output is anonymized — counts, timings, scope/trigger breakdowns, # and chain attestations. Candidate IDs are hashed (sha256-prefix) so # the report can be shared with counsel without exposing identifiers. # # Usage: # biometric_destruction_report.sh \ # [--month YYYY-MM] \ # [--audit-dir data/_catalog/subjects] \ # [--output reports/biometric/destruction_.md] # # Defaults: # --month — current UTC month (YYYY-MM) # --audit-dir — data/_catalog/subjects # --output — reports/biometric/destruction_.md # # Exit codes: # 0 — report written successfully (whether or not events were found) # 1 — report written but with anomalies that need review # 2 — script error (missing tools, unreadable audit dir) set -uo pipefail cd "$(dirname "$0")/../.." MONTH="" AUDIT_DIR="data/_catalog/subjects" OUT="" while [ "$#" -gt 0 ]; do case "$1" in --month) MONTH="$2"; shift 2 ;; --audit-dir) AUDIT_DIR="$2"; shift 2 ;; --output) OUT="$2"; shift 2 ;; -h|--help) sed -n '2,30p' "$0" | sed 's/^# \?//' exit 0 ;; *) echo "unknown flag: $1" >&2; exit 2 ;; esac done # Default month = current UTC YYYY-MM. Validate format defensively # so a malformed --month value (e.g. "May 2026") doesn't silently # match nothing in the JSONL filter. if [ -z "$MONTH" ]; then MONTH=$(date -u +%Y-%m) fi if ! echo "$MONTH" | grep -qE '^[0-9]{4}-(0[1-9]|1[0-2])$'; then echo "[report] FAIL: --month must be YYYY-MM, got '$MONTH'" >&2 exit 2 fi if [ -z "$OUT" ]; then OUT="reports/biometric/destruction_${MONTH}.md" fi # Dependency gates. for cmd in jq sha256sum; do if ! command -v "$cmd" >/dev/null 2>&1; then echo "[report] FAIL: required tool '$cmd' not found in PATH" >&2 exit 2 fi done if [ ! -d "$AUDIT_DIR" ]; then echo "[report] FAIL: audit dir not found at $AUDIT_DIR" >&2 exit 2 fi mkdir -p "$(dirname "$OUT")" # Aggregator storage. EVENTS=$(mktemp) ANOMALIES=$(mktemp) trap 'rm -f "$EVENTS" "$ANOMALIES"' EXIT # Iterate every per-subject audit log under AUDIT_DIR. Each file is # JSONL — one row per line. We extract erasure rows in the requested # month + emit a normalized one-line record per event. TOTAL_FILES=0 TOTAL_ROWS_SCANNED=0 SHARDS_WITH_EVENTS=0 for f in "$AUDIT_DIR"/*.audit.jsonl; do [ -e "$f" ] || continue TOTAL_FILES=$((TOTAL_FILES + 1)) # File-level row count (cheap). ROWS=$(wc -l < "$f" 2>/dev/null || echo 0) TOTAL_ROWS_SCANNED=$((TOTAL_ROWS_SCANNED + ROWS)) # Filter rows for the month + erasure kinds. HAD_EVENT=0 while IFS= read -r line; do [ -n "$line" ] || continue KIND=$(printf '%s' "$line" | jq -r '.accessor.kind // ""' 2>/dev/null || echo "") case "$KIND" in biometric_erasure|full_erasure) ;; *) continue ;; esac TS=$(printf '%s' "$line" | jq -r '.ts // ""' 2>/dev/null || echo "") case "$TS" in "${MONTH}-"*) ;; # only this month *) continue ;; esac HAD_EVENT=1 CID=$(printf '%s' "$line" | jq -r '.candidate_id // ""' 2>/dev/null || echo "") PURPOSE=$(printf '%s' "$line" | jq -r '.accessor.purpose // ""' 2>/dev/null || echo "") RESULT=$(printf '%s' "$line" | jq -r '.result // ""' 2>/dev/null || echo "") # accessor.purpose has shape "trigger=;..." per biometric_endpoint TRIGGER=$(printf '%s' "$PURPOSE" | sed -nE 's/.*trigger=([a-z_]+).*/\1/p') [ -n "$TRIGGER" ] || TRIGGER="unknown" # Hash candidate_id so the report stays anonymized. CID_HASH=$(printf '%s' "$CID" | sha256sum | awk '{print substr($1,1,12)}') # Anomaly: erasure row but result not in {erased, success}. case "$RESULT" in erased|success) ;; *) echo " - candidate_hash=$CID_HASH ts=$TS kind=$KIND result=$RESULT trigger=$TRIGGER (unexpected result)" >> "$ANOMALIES" ;; esac # Tab-separated event line: ts, kind, trigger, result, cid_hash printf '%s\t%s\t%s\t%s\t%s\n' "$TS" "$KIND" "$TRIGGER" "$RESULT" "$CID_HASH" >> "$EVENTS" done < "$f" if [ "$HAD_EVENT" = "1" ]; then SHARDS_WITH_EVENTS=$((SHARDS_WITH_EVENTS + 1)) fi done EVENT_COUNT=$(wc -l < "$EVENTS" 2>/dev/null || echo 0) EVENT_COUNT=$(printf '%s' "$EVENT_COUNT" | tr -d '[:space:]') : "${EVENT_COUNT:=0}" # Compute breakdowns. COUNT_BIOMETRIC_ONLY=0 COUNT_FULL=0 if [ "$EVENT_COUNT" != "0" ]; then COUNT_BIOMETRIC_ONLY=$(awk -F '\t' '$2=="biometric_erasure"' "$EVENTS" | wc -l | tr -d '[:space:]') COUNT_FULL=$(awk -F '\t' '$2=="full_erasure"' "$EVENTS" | wc -l | tr -d '[:space:]') fi ANOMALY_COUNT=$(wc -l < "$ANOMALIES" 2>/dev/null || echo 0) ANOMALY_COUNT=$(printf '%s' "$ANOMALY_COUNT" | tr -d '[:space:]') : "${ANOMALY_COUNT:=0}" # Render the report. GENERATED_AT=$(date -u +%Y-%m-%dT%H:%M:%SZ) { echo "# Biometric Destruction Report — $MONTH" echo echo "**Generated:** $GENERATED_AT" echo "**Audit dir scanned:** \`$AUDIT_DIR\`" echo "**Spec:** docs/runbooks/BIPA_DESTRUCTION_RUNBOOK.md §5" echo "**Generator:** scripts/staffing/biometric_destruction_report.sh" echo echo "## Scope" echo echo "- **Subject audit shards scanned:** $TOTAL_FILES" echo "- **Audit rows scanned (all kinds):** $TOTAL_ROWS_SCANNED" echo "- **Shards containing $MONTH erasure events:** $SHARDS_WITH_EVENTS" echo echo "## Destruction events in $MONTH" echo echo "- **Total events:** $EVENT_COUNT" echo "- **By scope:**" echo " - \`biometric_erasure\` (BiometricCollection cleared, manifest retained): $COUNT_BIOMETRIC_ONLY" echo " - \`full_erasure\` (manifest + biometric data cleared): $COUNT_FULL" echo if [ "$EVENT_COUNT" = "0" ]; then echo "**No destruction events recorded for $MONTH.** This is correct" echo "for a month with no retention expiries / withdrawal requests" echo "/ RTBF requests / court orders." echo else echo "### By trigger" echo echo "| Trigger | Count |" echo "|---|---|" awk -F '\t' '{print $3}' "$EVENTS" | sort | uniq -c | \ sort -rn | awk '{ printf("| %s | %d |\n", $2, $1); }' echo echo "### Event detail (anonymized)" echo echo "Candidate IDs are hashed (sha256-12-prefix) so this report can" echo "be shared with outside counsel without exposing identifiers." echo echo "| ts | kind | trigger | result | candidate_hash |" echo "|---|---|---|---|---|" sort -k1,1 "$EVENTS" | awk -F '\t' '{ printf("| %s | %s | %s | %s | %s |\n", $1, $2, $3, $4, $5); }' echo fi if [ "$ANOMALY_COUNT" != "0" ]; then echo "## Anomalies ($ANOMALY_COUNT)" echo echo "Events whose audit row deviates from expected shape (kind/result" echo "mismatch, missing trigger, etc.). These do NOT necessarily mean" echo "the destruction failed — the BIPA-load-bearing surface is the" echo "audit chain, which still verifies cryptographically. They are" echo "logged here so an operator can investigate and confirm." echo echo '```' cat "$ANOMALIES" echo '```' echo fi echo "## Cryptographic attestation" echo echo "This report was produced by aggregating per-subject HMAC-chained" echo "audit logs. The chain itself is the BIPA-defensible substrate;" echo "this report is a derived view, not the chain of record. To verify" echo "any individual event, run:" echo echo '```bash' echo "./scripts/staffing/verify_biometric_erasure.sh " echo '```' echo "(operator must un-hash the candidate ID through their own" echo " operator log to perform spot-checks)." echo echo "**Cross-runtime parity:** the same audit logs are byte-identical" echo "under Rust + Go (per scripts/cutover/parity/subject_audit_parity.sh)." echo "If counsel needs cross-runtime attestation, that probe provides it." echo EVIDENCE_HASH=$(sha256sum "$EVENTS" 2>/dev/null | awk '{print $1}') : "${EVIDENCE_HASH:=$(echo -n '' | sha256sum | awk '{print $1}')}" echo "**Events SHA-256:** \`$EVIDENCE_HASH\`" echo echo "---" echo echo "**Operator (J):** _______________________________ Date: __________" echo } > "$OUT" echo "[report] $EVENT_COUNT destruction events in $MONTH ($COUNT_BIOMETRIC_ONLY biometric_only, $COUNT_FULL full)" echo "[report] anomalies: $ANOMALY_COUNT" echo "[report] output: $OUT" # Exit 1 if anomalies present (review needed) but report still written. if [ "$ANOMALY_COUNT" != "0" ]; then exit 1 fi exit 0