Per 2026-05-03 phase_1_6_bipa_gates scrum (13 findings, 0 convergent).
1 BLOCK verified false positive, 4 real fixes shipped:
False positive (verified):
- opus BLOCK on attest:55 — claimed `set -uo pipefail` without `-e`
makes the post-python3 `if [ $? -ne 0 ]` check unreachable. Verified
WRONG: `X=$(false); echo $?` prints 1. Bash propagates command-
substitution exit through $? on the assignment line. The check IS
the python3 exit gate. Inline comment added to the script noting
the false positive so future scrums don't re-flag.
Real fixes:
1. opus WARN attestation:18 — schema fingerprint hashed names ONLY,
missing column-type changes. A column repurposed to hold base64
photo bytes under its existing name would pass undetected. Now
hashes "name<TAB>type<TAB>nullable=bool" per row. Re-run produced
evidence SHA-256 1fdcc9f1... (vs old 230fffeb..., reflecting the
broader fingerprint scope).
2. opus WARN gate_4_test:60 — definition regex didn't catch
object-literal property forms (`const t = { FEMALE_NAMES: [...] }`)
or TypeScript class fields (`class L { public NAMES_X: string[] = [] }`).
Added two new patterns + a regression test
(Gate 4: object-literal and class-field bypasses are caught) that
exercises 5 bypass forms. 4/4 tests green; 1 minor regex tweak
needed mid-fix to handle single-line class bodies.
3. kimi WARN python3-reliance — script assumed pyarrow installed and
would emit a stack trace into the attestation if not. Added
`python3 -c "import pyarrow"` gate at top with clean install
instructions on failure.
4. opus INFO PHASE_1_6:200 — item 7 (training) silently dropped from
blocking set with bare "deferred" rationale. Now explicitly states
the deferral is conditional on small operator population (J + 1-2
named ops); item 7 re-promotes to blocking if population grows.
⚖ COUNSEL marker added.
Skipped (acceptable as ⚖ COUNSEL placeholders by design):
- kimi WARN consent template:30-day-SLA (counsel decides number)
- kimi WARN consent template:email-placeholder (counsel supplies)
- kimi WARN parquet absence (env override exists; redeployment-aware)
- kimi INFO runbook manual-erasure (marked TODO when /erase ships)
- qwen INFO doc path/status nits (already addressed by file moves)
Tests: 4/4 Gate 4 absence test (incl. new bypass-coverage), 3/3
attestation evidence checks pass on live data.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
171 lines
6.9 KiB
TypeScript
171 lines
6.9 KiB
TypeScript
// Phase 1.6 Gate 4 absence test.
|
|
//
|
|
// Spec: docs/PHASE_1_6_BIPA_GATES.md §1 Gate 4 — Engineering acceptance:
|
|
// "Unit test asserts no protected-attribute inference functions exist
|
|
// in search.html or any mcp-server module"
|
|
//
|
|
// What this guards: the FEMALE_NAMES / NAMES_HISPANIC / SURNAMES_* lookup
|
|
// tables and the genderFor() / guessEthnicityFromFirstName() / etc.
|
|
// inference functions removed 2026-05-03. Re-introduction would re-open
|
|
// (1) Title VII / IL Human Rights Act discriminatory-feature risk and
|
|
// (2) BIPA's broad-reading "biometric information derived from a biometric
|
|
// identifier" pattern when combined with deepface output.
|
|
//
|
|
// Strategy: walk every .html / .ts / .tsx / .js / .mjs file under
|
|
// mcp-server/ and grep-assert that none of them DEFINE the forbidden
|
|
// symbols. We deliberately allow the symbol NAMES to appear inside
|
|
// comments — search.html has a removal note that names them so future
|
|
// readers know what was excised — but we forbid actual definition
|
|
// patterns (var / const / let / function / class member / object literal).
|
|
|
|
import { test, expect } from "bun:test";
|
|
import { readdirSync, statSync, readFileSync } from "node:fs";
|
|
import { join } from "node:path";
|
|
|
|
const FORBIDDEN_DATA_TABLES = [
|
|
// First-name lookup tables
|
|
"FEMALE_NAMES",
|
|
"MALE_NAMES",
|
|
"NAMES_HISPANIC",
|
|
"NAMES_BLACK",
|
|
"NAMES_SOUTH_ASIAN",
|
|
"NAMES_EAST_ASIAN",
|
|
"NAMES_MIDDLE_EASTERN",
|
|
// Surname lookup tables
|
|
"SURNAMES_HISPANIC",
|
|
"SURNAMES_BLACK",
|
|
"SURNAMES_SOUTH_ASIAN",
|
|
"SURNAMES_EAST_ASIAN",
|
|
"SURNAMES_MIDDLE_EASTERN",
|
|
];
|
|
|
|
const FORBIDDEN_FUNCTIONS = [
|
|
"guessGenderFromFirstName",
|
|
"guessEthnicityFromName",
|
|
"guessEthnicityFromFirstName",
|
|
"genderFor",
|
|
];
|
|
|
|
function* walkSource(dir: string): Generator<string> {
|
|
for (const entry of readdirSync(dir)) {
|
|
if (entry === "node_modules" || entry === "dist" || entry.startsWith(".")) continue;
|
|
const path = join(dir, entry);
|
|
const stat = statSync(path);
|
|
if (stat.isDirectory()) {
|
|
yield* walkSource(path);
|
|
} else if (/\.(html|ts|tsx|js|mjs)$/.test(entry)) {
|
|
// Don't grep this test file itself — it lists the forbidden tokens
|
|
// by name as match targets, not as definitions.
|
|
if (path.endsWith("phase_1_6_gate_4.test.ts")) continue;
|
|
yield path;
|
|
}
|
|
}
|
|
}
|
|
|
|
// definitionPatternsFor: returns regexes that match common DEFINITION
|
|
// forms in JS/TS/HTML embedded scripts. A bare reference inside a
|
|
// comment is intentionally NOT matched.
|
|
//
|
|
// 2026-05-03 opus scrum WARN (gate_4_test:60) added object-literal +
|
|
// class-field patterns: a developer wrapping the lookup tables in
|
|
// `const tables = { FEMALE_NAMES: [...] }` or a TypeScript class
|
|
// field `FEMALE_NAMES: string[] = [...]` would have bypassed the
|
|
// original 4 patterns silently.
|
|
function definitionPatternsFor(symbol: string): RegExp[] {
|
|
return [
|
|
// var / const / let SYMBOL =
|
|
new RegExp(`\\b(?:var|const|let)\\s+${symbol}\\b\\s*=`),
|
|
// function SYMBOL(
|
|
new RegExp(`\\bfunction\\s+${symbol}\\s*\\(`),
|
|
// SYMBOL = function( OR SYMBOL = (...) =>
|
|
new RegExp(`\\b${symbol}\\s*=\\s*(?:function\\s*\\(|\\([^)]*\\)\\s*=>|async\\s*(?:\\(|function))`),
|
|
// class method: SYMBOL(...) {
|
|
new RegExp(`^\\s*${symbol}\\s*\\([^)]*\\)\\s*\\{`, "m"),
|
|
// object-literal property assigned to an array OR object value:
|
|
// { SYMBOL: [...] } or SYMBOL: {...} or SYMBOL: new Set(...)
|
|
new RegExp(`(?:^|[,{])\\s*${symbol}\\s*:\\s*(?:\\[|\\{|new\\s+(?:Set|Map|Array)\\b)`, "m"),
|
|
// TypeScript / class field with type annotation. The boundary
|
|
// before SYMBOL is start-of-line OR `{`/`;`/`}` so single-line
|
|
// class bodies (`class L { public NAMES_X: string[] = []; }`)
|
|
// are caught alongside multi-line ones.
|
|
new RegExp(`(?:^|[{};])\\s*(?:public|private|protected|readonly|static\\s+)*\\s*${symbol}\\s*:\\s*[^=;{]+=\\s*[\\[\\{]`, "m"),
|
|
];
|
|
}
|
|
|
|
function findOffenders(filePath: string, text: string): string[] {
|
|
const out: string[] = [];
|
|
for (const sym of [...FORBIDDEN_DATA_TABLES, ...FORBIDDEN_FUNCTIONS]) {
|
|
for (const pattern of definitionPatternsFor(sym)) {
|
|
if (pattern.test(text)) {
|
|
out.push(`${filePath}: definition of ${sym} (matched ${pattern})`);
|
|
}
|
|
}
|
|
}
|
|
return out;
|
|
}
|
|
|
|
test("Gate 4: no protected-attribute inference DEFINITIONS in mcp-server", () => {
|
|
const root = import.meta.dir;
|
|
const offenders: string[] = [];
|
|
for (const path of walkSource(root)) {
|
|
const text = readFileSync(path, "utf8");
|
|
offenders.push(...findOffenders(path, text));
|
|
}
|
|
if (offenders.length > 0) {
|
|
throw new Error(
|
|
`Phase 1.6 Gate 4 violation — protected-attribute inference symbols defined in mcp-server:\n` +
|
|
offenders.map((o) => ` - ${o}`).join("\n"),
|
|
);
|
|
}
|
|
expect(offenders.length).toBe(0);
|
|
});
|
|
|
|
// Sanity: confirm the test actually walks files (otherwise the absence
|
|
// assertion is vacuously true). If mcp-server ever lost its source
|
|
// tree, this would catch it.
|
|
test("Gate 4: walker actually finds source files to scan", () => {
|
|
const root = import.meta.dir;
|
|
let count = 0;
|
|
for (const _ of walkSource(root)) count++;
|
|
expect(count).toBeGreaterThan(5); // mcp-server has more than 5 source files
|
|
});
|
|
|
|
// Defense in depth: the regex itself must catch a synthetic positive.
|
|
// If the definition pattern ever stops matching real code, the absence
|
|
// test would silently pass on actual reintroductions.
|
|
test("Gate 4: regex catches a synthetic positive (defense in depth)", () => {
|
|
const synthetic =
|
|
`var NAMES_HISPANIC = ["Maria"];\n` +
|
|
`function guessEthnicityFromFirstName(name) { return "?"; }\n`;
|
|
const offenders = findOffenders("synthetic_test_input", synthetic);
|
|
expect(offenders.length).toBeGreaterThanOrEqual(2);
|
|
expect(offenders.some((o) => o.includes("NAMES_HISPANIC"))).toBe(true);
|
|
expect(offenders.some((o) => o.includes("guessEthnicityFromFirstName"))).toBe(true);
|
|
});
|
|
|
|
// 2026-05-03 opus scrum WARN regression: the bypass forms a developer
|
|
// might use to wrap the lookup tables without tripping the original
|
|
// four patterns. All of these MUST trip a definition regex.
|
|
test("Gate 4: object-literal and class-field bypasses are caught", () => {
|
|
const bypassForms = [
|
|
// Inline object-literal property → array
|
|
`const tables = { FEMALE_NAMES: ["Maria"] };`,
|
|
// Object property → Set/Map constructor
|
|
`const lookups = { NAMES_BLACK: new Set(["X"]) };`,
|
|
// Multi-line object literal
|
|
`const all = {\n NAMES_HISPANIC: [...],\n};`,
|
|
// TypeScript class field with type annotation + initializer
|
|
`class Lookup {\n SURNAMES_BLACK: string[] = ["X"];\n}`,
|
|
// TypeScript public field
|
|
`class L { public NAMES_EAST_ASIAN: string[] = []; }`,
|
|
];
|
|
for (const synthetic of bypassForms) {
|
|
const offenders = findOffenders("bypass_synthetic", synthetic);
|
|
if (offenders.length === 0) {
|
|
throw new Error(
|
|
`Gate 4 bypass not caught — pattern would slip past:\n ${synthetic}`,
|
|
);
|
|
}
|
|
}
|
|
});
|