// Phase 1.6 Gate 4 absence test. // // Spec: docs/PHASE_1_6_BIPA_GATES.md §1 Gate 4 — Engineering acceptance: // "Unit test asserts no protected-attribute inference functions exist // in search.html or any mcp-server module" // // What this guards: the FEMALE_NAMES / NAMES_HISPANIC / SURNAMES_* lookup // tables and the genderFor() / guessEthnicityFromFirstName() / etc. // inference functions removed 2026-05-03. Re-introduction would re-open // (1) Title VII / IL Human Rights Act discriminatory-feature risk and // (2) BIPA's broad-reading "biometric information derived from a biometric // identifier" pattern when combined with deepface output. // // Strategy: walk every .html / .ts / .tsx / .js / .mjs file under // mcp-server/ and grep-assert that none of them DEFINE the forbidden // symbols. We deliberately allow the symbol NAMES to appear inside // comments — search.html has a removal note that names them so future // readers know what was excised — but we forbid actual definition // patterns (var / const / let / function / class member / object literal). import { test, expect } from "bun:test"; import { readdirSync, statSync, readFileSync } from "node:fs"; import { join } from "node:path"; const FORBIDDEN_DATA_TABLES = [ // First-name lookup tables "FEMALE_NAMES", "MALE_NAMES", "NAMES_HISPANIC", "NAMES_BLACK", "NAMES_SOUTH_ASIAN", "NAMES_EAST_ASIAN", "NAMES_MIDDLE_EASTERN", // Surname lookup tables "SURNAMES_HISPANIC", "SURNAMES_BLACK", "SURNAMES_SOUTH_ASIAN", "SURNAMES_EAST_ASIAN", "SURNAMES_MIDDLE_EASTERN", ]; const FORBIDDEN_FUNCTIONS = [ "guessGenderFromFirstName", "guessEthnicityFromName", "guessEthnicityFromFirstName", "genderFor", ]; function* walkSource(dir: string): Generator { for (const entry of readdirSync(dir)) { if (entry === "node_modules" || entry === "dist" || entry.startsWith(".")) continue; const path = join(dir, entry); const stat = statSync(path); if (stat.isDirectory()) { yield* walkSource(path); } else if (/\.(html|ts|tsx|js|mjs)$/.test(entry)) { // Don't grep this test file itself — it lists the forbidden tokens // by name as match targets, not as definitions. if (path.endsWith("phase_1_6_gate_4.test.ts")) continue; yield path; } } } // definitionPatternsFor: returns regexes that match common DEFINITION // forms in JS/TS/HTML embedded scripts. A bare reference inside a // comment is intentionally NOT matched. function definitionPatternsFor(symbol: string): RegExp[] { return [ // var / const / let SYMBOL = new RegExp(`\\b(?:var|const|let)\\s+${symbol}\\b\\s*=`), // function SYMBOL( new RegExp(`\\bfunction\\s+${symbol}\\s*\\(`), // SYMBOL = function( OR SYMBOL = (...) => new RegExp(`\\b${symbol}\\s*=\\s*(?:function\\s*\\(|\\([^)]*\\)\\s*=>|async\\s*(?:\\(|function))`), // class member: SYMBOL(...) { (a method declaration) new RegExp(`^\\s*${symbol}\\s*\\([^)]*\\)\\s*\\{`, "m"), ]; } function findOffenders(filePath: string, text: string): string[] { const out: string[] = []; for (const sym of [...FORBIDDEN_DATA_TABLES, ...FORBIDDEN_FUNCTIONS]) { for (const pattern of definitionPatternsFor(sym)) { if (pattern.test(text)) { out.push(`${filePath}: definition of ${sym} (matched ${pattern})`); } } } return out; } test("Gate 4: no protected-attribute inference DEFINITIONS in mcp-server", () => { const root = import.meta.dir; const offenders: string[] = []; for (const path of walkSource(root)) { const text = readFileSync(path, "utf8"); offenders.push(...findOffenders(path, text)); } if (offenders.length > 0) { throw new Error( `Phase 1.6 Gate 4 violation — protected-attribute inference symbols defined in mcp-server:\n` + offenders.map((o) => ` - ${o}`).join("\n"), ); } expect(offenders.length).toBe(0); }); // Sanity: confirm the test actually walks files (otherwise the absence // assertion is vacuously true). If mcp-server ever lost its source // tree, this would catch it. test("Gate 4: walker actually finds source files to scan", () => { const root = import.meta.dir; let count = 0; for (const _ of walkSource(root)) count++; expect(count).toBeGreaterThan(5); // mcp-server has more than 5 source files }); // Defense in depth: the regex itself must catch a synthetic positive. // If the definition pattern ever stops matching real code, the absence // test would silently pass on actual reintroductions. test("Gate 4: regex catches a synthetic positive (defense in depth)", () => { const synthetic = `var NAMES_HISPANIC = ["Maria"];\n` + `function guessEthnicityFromFirstName(name) { return "?"; }\n`; const offenders = findOffenders("synthetic_test_input", synthetic); expect(offenders.length).toBeGreaterThanOrEqual(2); expect(offenders.some((o) => o.includes("NAMES_HISPANIC"))).toBe(true); expect(offenders.some((o) => o.includes("guessEthnicityFromFirstName"))).toBe(true); });