// Phase 1.6 Gate 4 absence test. // // Spec: docs/PHASE_1_6_BIPA_GATES.md §1 Gate 4 — Engineering acceptance: // "Unit test asserts no protected-attribute inference functions exist // in search.html or any mcp-server module" // // What this guards: the FEMALE_NAMES / NAMES_HISPANIC / SURNAMES_* lookup // tables and the genderFor() / guessEthnicityFromFirstName() / etc. // inference functions removed 2026-05-03. Re-introduction would re-open // (1) Title VII / IL Human Rights Act discriminatory-feature risk and // (2) BIPA's broad-reading "biometric information derived from a biometric // identifier" pattern when combined with deepface output. // // Strategy: walk every .html / .ts / .tsx / .js / .mjs file under // mcp-server/ and grep-assert that none of them DEFINE the forbidden // symbols. We deliberately allow the symbol NAMES to appear inside // comments — search.html has a removal note that names them so future // readers know what was excised — but we forbid actual definition // patterns (var / const / let / function / class member / object literal). import { test, expect } from "bun:test"; import { readdirSync, statSync, readFileSync } from "node:fs"; import { join } from "node:path"; const FORBIDDEN_DATA_TABLES = [ // First-name lookup tables "FEMALE_NAMES", "MALE_NAMES", "NAMES_HISPANIC", "NAMES_BLACK", "NAMES_SOUTH_ASIAN", "NAMES_EAST_ASIAN", "NAMES_MIDDLE_EASTERN", // Surname lookup tables "SURNAMES_HISPANIC", "SURNAMES_BLACK", "SURNAMES_SOUTH_ASIAN", "SURNAMES_EAST_ASIAN", "SURNAMES_MIDDLE_EASTERN", ]; const FORBIDDEN_FUNCTIONS = [ "guessGenderFromFirstName", "guessEthnicityFromName", "guessEthnicityFromFirstName", "genderFor", ]; function* walkSource(dir: string): Generator { for (const entry of readdirSync(dir)) { if (entry === "node_modules" || entry === "dist" || entry.startsWith(".")) continue; const path = join(dir, entry); const stat = statSync(path); if (stat.isDirectory()) { yield* walkSource(path); } else if (/\.(html|ts|tsx|js|mjs)$/.test(entry)) { // Don't grep this test file itself — it lists the forbidden tokens // by name as match targets, not as definitions. if (path.endsWith("phase_1_6_gate_4.test.ts")) continue; yield path; } } } // definitionPatternsFor: returns regexes that match common DEFINITION // forms in JS/TS/HTML embedded scripts. A bare reference inside a // comment is intentionally NOT matched. // // 2026-05-03 opus scrum WARN (gate_4_test:60) added object-literal + // class-field patterns: a developer wrapping the lookup tables in // `const tables = { FEMALE_NAMES: [...] }` or a TypeScript class // field `FEMALE_NAMES: string[] = [...]` would have bypassed the // original 4 patterns silently. function definitionPatternsFor(symbol: string): RegExp[] { return [ // var / const / let SYMBOL = new RegExp(`\\b(?:var|const|let)\\s+${symbol}\\b\\s*=`), // function SYMBOL( new RegExp(`\\bfunction\\s+${symbol}\\s*\\(`), // SYMBOL = function( OR SYMBOL = (...) => new RegExp(`\\b${symbol}\\s*=\\s*(?:function\\s*\\(|\\([^)]*\\)\\s*=>|async\\s*(?:\\(|function))`), // class method: SYMBOL(...) { new RegExp(`^\\s*${symbol}\\s*\\([^)]*\\)\\s*\\{`, "m"), // object-literal property assigned to an array OR object value: // { SYMBOL: [...] } or SYMBOL: {...} or SYMBOL: new Set(...) new RegExp(`(?:^|[,{])\\s*${symbol}\\s*:\\s*(?:\\[|\\{|new\\s+(?:Set|Map|Array)\\b)`, "m"), // TypeScript / class field with type annotation. The boundary // before SYMBOL is start-of-line OR `{`/`;`/`}` so single-line // class bodies (`class L { public NAMES_X: string[] = []; }`) // are caught alongside multi-line ones. new RegExp(`(?:^|[{};])\\s*(?:public|private|protected|readonly|static\\s+)*\\s*${symbol}\\s*:\\s*[^=;{]+=\\s*[\\[\\{]`, "m"), ]; } function findOffenders(filePath: string, text: string): string[] { const out: string[] = []; for (const sym of [...FORBIDDEN_DATA_TABLES, ...FORBIDDEN_FUNCTIONS]) { for (const pattern of definitionPatternsFor(sym)) { if (pattern.test(text)) { out.push(`${filePath}: definition of ${sym} (matched ${pattern})`); } } } return out; } test("Gate 4: no protected-attribute inference DEFINITIONS in mcp-server", () => { const root = import.meta.dir; const offenders: string[] = []; for (const path of walkSource(root)) { const text = readFileSync(path, "utf8"); offenders.push(...findOffenders(path, text)); } if (offenders.length > 0) { throw new Error( `Phase 1.6 Gate 4 violation — protected-attribute inference symbols defined in mcp-server:\n` + offenders.map((o) => ` - ${o}`).join("\n"), ); } expect(offenders.length).toBe(0); }); // Sanity: confirm the test actually walks files (otherwise the absence // assertion is vacuously true). If mcp-server ever lost its source // tree, this would catch it. test("Gate 4: walker actually finds source files to scan", () => { const root = import.meta.dir; let count = 0; for (const _ of walkSource(root)) count++; expect(count).toBeGreaterThan(5); // mcp-server has more than 5 source files }); // Defense in depth: the regex itself must catch a synthetic positive. // If the definition pattern ever stops matching real code, the absence // test would silently pass on actual reintroductions. test("Gate 4: regex catches a synthetic positive (defense in depth)", () => { const synthetic = `var NAMES_HISPANIC = ["Maria"];\n` + `function guessEthnicityFromFirstName(name) { return "?"; }\n`; const offenders = findOffenders("synthetic_test_input", synthetic); expect(offenders.length).toBeGreaterThanOrEqual(2); expect(offenders.some((o) => o.includes("NAMES_HISPANIC"))).toBe(true); expect(offenders.some((o) => o.includes("guessEthnicityFromFirstName"))).toBe(true); }); // 2026-05-03 opus scrum WARN regression: the bypass forms a developer // might use to wrap the lookup tables without tripping the original // four patterns. All of these MUST trip a definition regex. test("Gate 4: object-literal and class-field bypasses are caught", () => { const bypassForms = [ // Inline object-literal property → array `const tables = { FEMALE_NAMES: ["Maria"] };`, // Object property → Set/Map constructor `const lookups = { NAMES_BLACK: new Set(["X"]) };`, // Multi-line object literal `const all = {\n NAMES_HISPANIC: [...],\n};`, // TypeScript class field with type annotation + initializer `class Lookup {\n SURNAMES_BLACK: string[] = ["X"];\n}`, // TypeScript public field `class L { public NAMES_EAST_ASIAN: string[] = []; }`, ]; for (const synthetic of bypassForms) { const offenders = findOffenders("bypass_synthetic", synthetic); if (offenders.length === 0) { throw new Error( `Gate 4 bypass not caught — pattern would slip past:\n ${synthetic}`, ); } } });