diff --git a/docs/PHASE_1_6_BIPA_GATES.md b/docs/PHASE_1_6_BIPA_GATES.md index 6d677f2..ca5d461 100644 --- a/docs/PHASE_1_6_BIPA_GATES.md +++ b/docs/PHASE_1_6_BIPA_GATES.md @@ -199,7 +199,17 @@ PLUS: | 6 | Cryptographic attestation pre-identityd | DONE — `scripts/staffing/attest_pre_identityd_biometric_state.sh` + `docs/attestations/BIPA_PRE_IDENTITYD_ATTESTATION_2026-05-03.md` (3/3 evidence checks pass; signature lines pending) | pending signature | **eng-DONE, signature-pending** | | 7 | Employee training material | scaffold deferred — Gate 5 runbook §7 acknowledgment may serve as substrate | pending | **deferred** | -Until items 1-5 + 6 are checked off, **identity service backfill (Phase 2 §5 Step 5) cannot proceed.** +**Blocking set for Phase 2 backfill:** items **1, 2, 3, 4, 5, 6** must +all be DONE. Item 7 (employee training) is reduced from blocking to +"deferred" because the Gate 5 destruction runbook §7 already requires +operator acknowledgment before legal-tier credentials are issued — +that acknowledgment is procedurally equivalent to the training-record +requirement when the operator population is small (J + 1-2 named +operators). If the operator population grows beyond that, item 7 +re-promotes to blocking and a separate training program must be authored. + +⚖ COUNSEL — confirm whether item 7 deferral is acceptable for the +expected operator population size, or restore it to the blocking set. **Calendar bottleneck:** Items 1, 2, 5, 6 (and #7) await counsel review of the engineering scaffolds. Gate 3 (photo-upload endpoint) diff --git a/docs/attestations/BIPA_PRE_IDENTITYD_ATTESTATION_2026-05-03.md b/docs/attestations/BIPA_PRE_IDENTITYD_ATTESTATION_2026-05-03.md index 8f1d861..7e713b6 100644 --- a/docs/attestations/BIPA_PRE_IDENTITYD_ATTESTATION_2026-05-03.md +++ b/docs/attestations/BIPA_PRE_IDENTITYD_ATTESTATION_2026-05-03.md @@ -22,27 +22,27 @@ tamper-evident store (filesystem with backups + version control). **Schema columns** (18 total): ``` -worker_id -name -role -email -phone -city -state -zip -skills -certifications -archetype -reliability -responsiveness -engagement -compliance -availability -communications -resume_text +worker_id int64 nullable=True +name string nullable=True +role string nullable=True +email string nullable=True +phone string nullable=True +city string nullable=True +state string nullable=True +zip int64 nullable=True +skills string nullable=True +certifications string nullable=True +archetype string nullable=True +reliability double nullable=True +responsiveness double nullable=True +engagement double nullable=True +compliance double nullable=True +availability double nullable=True +communications string nullable=True +resume_text string nullable=True ``` -**Schema SHA-256:** `4ba17870ce25a186a62bdfc29a3b336947dc2fba8a62c42ca249c81f41d32e30` +**Schema SHA-256:** `973b9abe56420de8f88122278b633e813f90a64cf0ddaac6a9811dc0940be676` - PASS: no biometric / photo / face / image column present @@ -81,7 +81,7 @@ No biometric identifiers or biometric information from real candidates have been collected, processed, or stored prior to the deployment of the Phase 1.6 BIPA pre-launch gates. -**Evidence SHA-256:** `230fffeb77b502717bcd7161cc74d5a3401b8722acc8d6ed3d524f93e261cd0b` +**Evidence SHA-256:** `1fdcc9f1682de27e1a0556d698ce221b74c1e71cf54128763828b4bca7b5c1bf` --- diff --git a/mcp-server/phase_1_6_gate_4.test.ts b/mcp-server/phase_1_6_gate_4.test.ts index 2c38803..66236f4 100644 --- a/mcp-server/phase_1_6_gate_4.test.ts +++ b/mcp-server/phase_1_6_gate_4.test.ts @@ -65,6 +65,12 @@ function* walkSource(dir: string): Generator { // definitionPatternsFor: returns regexes that match common DEFINITION // forms in JS/TS/HTML embedded scripts. A bare reference inside a // comment is intentionally NOT matched. +// +// 2026-05-03 opus scrum WARN (gate_4_test:60) added object-literal + +// class-field patterns: a developer wrapping the lookup tables in +// `const tables = { FEMALE_NAMES: [...] }` or a TypeScript class +// field `FEMALE_NAMES: string[] = [...]` would have bypassed the +// original 4 patterns silently. function definitionPatternsFor(symbol: string): RegExp[] { return [ // var / const / let SYMBOL = @@ -73,8 +79,16 @@ function definitionPatternsFor(symbol: string): RegExp[] { new RegExp(`\\bfunction\\s+${symbol}\\s*\\(`), // SYMBOL = function( OR SYMBOL = (...) => new RegExp(`\\b${symbol}\\s*=\\s*(?:function\\s*\\(|\\([^)]*\\)\\s*=>|async\\s*(?:\\(|function))`), - // class member: SYMBOL(...) { (a method declaration) + // class method: SYMBOL(...) { new RegExp(`^\\s*${symbol}\\s*\\([^)]*\\)\\s*\\{`, "m"), + // object-literal property assigned to an array OR object value: + // { SYMBOL: [...] } or SYMBOL: {...} or SYMBOL: new Set(...) + new RegExp(`(?:^|[,{])\\s*${symbol}\\s*:\\s*(?:\\[|\\{|new\\s+(?:Set|Map|Array)\\b)`, "m"), + // TypeScript / class field with type annotation. The boundary + // before SYMBOL is start-of-line OR `{`/`;`/`}` so single-line + // class bodies (`class L { public NAMES_X: string[] = []; }`) + // are caught alongside multi-line ones. + new RegExp(`(?:^|[{};])\\s*(?:public|private|protected|readonly|static\\s+)*\\s*${symbol}\\s*:\\s*[^=;{]+=\\s*[\\[\\{]`, "m"), ]; } @@ -128,3 +142,29 @@ test("Gate 4: regex catches a synthetic positive (defense in depth)", () => { expect(offenders.some((o) => o.includes("NAMES_HISPANIC"))).toBe(true); expect(offenders.some((o) => o.includes("guessEthnicityFromFirstName"))).toBe(true); }); + +// 2026-05-03 opus scrum WARN regression: the bypass forms a developer +// might use to wrap the lookup tables without tripping the original +// four patterns. All of these MUST trip a definition regex. +test("Gate 4: object-literal and class-field bypasses are caught", () => { + const bypassForms = [ + // Inline object-literal property → array + `const tables = { FEMALE_NAMES: ["Maria"] };`, + // Object property → Set/Map constructor + `const lookups = { NAMES_BLACK: new Set(["X"]) };`, + // Multi-line object literal + `const all = {\n NAMES_HISPANIC: [...],\n};`, + // TypeScript class field with type annotation + initializer + `class Lookup {\n SURNAMES_BLACK: string[] = ["X"];\n}`, + // TypeScript public field + `class L { public NAMES_EAST_ASIAN: string[] = []; }`, + ]; + for (const synthetic of bypassForms) { + const offenders = findOffenders("bypass_synthetic", synthetic); + if (offenders.length === 0) { + throw new Error( + `Gate 4 bypass not caught — pattern would slip past:\n ${synthetic}`, + ); + } + } +}); diff --git a/scripts/staffing/attest_pre_identityd_biometric_state.sh b/scripts/staffing/attest_pre_identityd_biometric_state.sh index a3e9d0d..38a0906 100755 --- a/scripts/staffing/attest_pre_identityd_biometric_state.sh +++ b/scripts/staffing/attest_pre_identityd_biometric_state.sh @@ -34,6 +34,17 @@ set -uo pipefail cd "$(dirname "$0")/../.." +# Dependency gate: pyarrow is required to read the parquet schema. Fail +# fast with a clear message rather than letting python3 -c emit a stack +# trace that gets captured into the attestation as "evidence". (Caught +# 2026-05-03 kimi scrum WARN python3-reliance.) +if ! python3 -c "import pyarrow" 2>/dev/null; then + echo "[attest] FAIL: python3 -c 'import pyarrow' failed." >&2 + echo "[attest] pyarrow is required to verify workers_500k.parquet schema." >&2 + echo "[attest] Install with: pip install pyarrow" >&2 + exit 2 +fi + DATE="${OVERRIDE_DATE:-$(date -u +%Y-%m-%d)}" OUT_DIR="docs/attestations" OUT="$OUT_DIR/BIPA_PRE_IDENTITYD_ATTESTATION_${DATE}.md" @@ -62,12 +73,20 @@ if [ ! -r "$WORKERS_PARQUET" ]; then rm -f "$EVIDENCE" exit 2 fi +# Hash NAME + TYPE + nullability per column, not just names. A schema +# fingerprint over names alone would not invalidate if a column got +# repurposed (e.g. resume_text reused to hold base64 photo bytes under +# its existing name). Including types catches that class of evasion. +# (Caught 2026-05-03 opus scrum WARN on attestation:18.) SCHEMA=$(python3 -c " import sys, pyarrow.parquet as pq schema = pq.read_schema('$WORKERS_PARQUET') for f in schema: - print(f.name) + print(f'{f.name}\t{f.type}\tnullable={f.nullable}') " 2>&1) +# Bash assigns + propagates the substitution's exit through \$?. +# Verified: X=\$(false); echo \$? -> 1. opus 2026-05-03 BLOCK on this +# location was a false positive — the check IS the python3 exit gate. if [ $? -ne 0 ]; then echo "[attest] FAIL: schema read error: $SCHEMA" >&2 rm -f "$EVIDENCE"