diff --git a/.gitignore b/.gitignore index 1a87915..f6ee8bf 100644 --- a/.gitignore +++ b/.gitignore @@ -4,3 +4,7 @@ .env __pycache__/ *.pyc + +# Headshot pool — binary face JPGs are fetched by scripts/staffing/fetch_face_pool.py +# (synthetic StyleGAN, ~100MB for 200 faces). Manifest + fetch script are tracked. +data/headshots/face_*.jpg diff --git a/data/headshots/manifest.jsonl b/data/headshots/manifest.jsonl new file mode 100644 index 0000000..c914030 --- /dev/null +++ b/data/headshots/manifest.jsonl @@ -0,0 +1,198 @@ +{"id": 0, "file": "face_0000.jpg", "gender": null} +{"id": 1, "file": "face_0001.jpg", "gender": null} +{"id": 2, "file": "face_0002.jpg", "gender": null} +{"id": 3, "file": "face_0003.jpg", "gender": null} +{"id": 4, "file": "face_0004.jpg", "gender": null} +{"id": 5, "file": "face_0005.jpg", "gender": null} +{"id": 6, "file": "face_0006.jpg", "gender": null} +{"id": 7, "file": "face_0007.jpg", "gender": null} +{"id": 8, "file": "face_0008.jpg", "gender": null} +{"id": 9, "file": "face_0009.jpg", "gender": null} +{"id": 10, "file": "face_0010.jpg", "gender": null} +{"id": 11, "file": "face_0011.jpg", "gender": null} +{"id": 12, "file": "face_0012.jpg", "gender": null} +{"id": 13, "file": "face_0013.jpg", "gender": null} +{"id": 14, "file": "face_0014.jpg", "gender": null} +{"id": 15, "file": "face_0015.jpg", "gender": null} +{"id": 16, "file": "face_0016.jpg", "gender": null} +{"id": 17, "file": "face_0017.jpg", "gender": null} +{"id": 18, "file": "face_0018.jpg", "gender": null} +{"id": 19, "file": "face_0019.jpg", "gender": null} +{"id": 20, "file": "face_0020.jpg", "gender": null} +{"id": 21, "file": "face_0021.jpg", "gender": null} +{"id": 22, "file": "face_0022.jpg", "gender": null} +{"id": 23, "file": "face_0023.jpg", "gender": null} +{"id": 24, "file": "face_0024.jpg", "gender": null} +{"id": 25, "file": "face_0025.jpg", "gender": null} +{"id": 26, "file": "face_0026.jpg", "gender": null} +{"id": 27, "file": "face_0027.jpg", "gender": null} +{"id": 28, "file": "face_0028.jpg", "gender": null} +{"id": 29, "file": "face_0029.jpg", "gender": null} +{"id": 30, "file": "face_0030.jpg", "gender": null} +{"id": 31, "file": "face_0031.jpg", "gender": null} +{"id": 32, "file": "face_0032.jpg", "gender": null} +{"id": 33, "file": "face_0033.jpg", "gender": null} +{"id": 34, "file": "face_0034.jpg", "gender": null} +{"id": 35, "file": "face_0035.jpg", "gender": null} +{"id": 36, "file": "face_0036.jpg", "gender": null} +{"id": 37, "file": "face_0037.jpg", "gender": null} +{"id": 38, "file": "face_0038.jpg", "gender": null} +{"id": 39, "file": "face_0039.jpg", "gender": null} +{"id": 40, "file": "face_0040.jpg", "gender": null} +{"id": 41, "file": "face_0041.jpg", "gender": null} +{"id": 42, "file": "face_0042.jpg", "gender": null} +{"id": 43, "file": "face_0043.jpg", "gender": null} +{"id": 44, "file": "face_0044.jpg", "gender": null} +{"id": 45, "file": "face_0045.jpg", "gender": null} +{"id": 46, "file": "face_0046.jpg", "gender": null} +{"id": 47, "file": "face_0047.jpg", "gender": null} +{"id": 48, "file": "face_0048.jpg", "gender": null} +{"id": 49, "file": "face_0049.jpg", "gender": null} +{"id": 50, "file": "face_0050.jpg", "gender": null} +{"id": 51, "file": "face_0051.jpg", "gender": null} +{"id": 52, "file": "face_0052.jpg", "gender": null} +{"id": 53, "file": "face_0053.jpg", "gender": null} +{"id": 54, "file": "face_0054.jpg", "gender": null} +{"id": 55, "file": "face_0055.jpg", "gender": null} +{"id": 56, "file": "face_0056.jpg", "gender": null} +{"id": 57, "file": "face_0057.jpg", "gender": null} +{"id": 58, "file": "face_0058.jpg", "gender": null} +{"id": 59, "file": "face_0059.jpg", "gender": null} +{"id": 60, "file": "face_0060.jpg", "gender": null} +{"id": 61, "file": "face_0061.jpg", "gender": null} +{"id": 62, "file": "face_0062.jpg", "gender": null} +{"id": 63, "file": "face_0063.jpg", "gender": null} +{"id": 64, "file": "face_0064.jpg", "gender": null} +{"id": 65, "file": "face_0065.jpg", "gender": null} +{"id": 66, "file": "face_0066.jpg", "gender": null} +{"id": 67, "file": "face_0067.jpg", "gender": null} +{"id": 68, "file": "face_0068.jpg", "gender": null} +{"id": 69, "file": "face_0069.jpg", "gender": null} +{"id": 70, "file": "face_0070.jpg", "gender": null} +{"id": 71, "file": "face_0071.jpg", "gender": null} +{"id": 72, "file": "face_0072.jpg", "gender": null} +{"id": 73, "file": "face_0073.jpg", "gender": null} +{"id": 74, "file": "face_0074.jpg", "gender": null} +{"id": 75, "file": "face_0075.jpg", "gender": null} +{"id": 76, "file": "face_0076.jpg", "gender": null} +{"id": 77, "file": "face_0077.jpg", "gender": null} +{"id": 78, "file": "face_0078.jpg", "gender": null} +{"id": 79, "file": "face_0079.jpg", "gender": null} +{"id": 80, "file": "face_0080.jpg", "gender": null} +{"id": 81, "file": "face_0081.jpg", "gender": null} +{"id": 82, "file": "face_0082.jpg", "gender": null} +{"id": 83, "file": "face_0083.jpg", "gender": null} +{"id": 84, "file": "face_0084.jpg", "gender": null} +{"id": 85, "file": "face_0085.jpg", "gender": null} +{"id": 86, "file": "face_0086.jpg", "gender": null} +{"id": 87, "file": "face_0087.jpg", "gender": null} +{"id": 88, "file": "face_0088.jpg", "gender": null} +{"id": 89, "file": "face_0089.jpg", "gender": null} +{"id": 90, "file": "face_0090.jpg", "gender": null} +{"id": 91, "file": "face_0091.jpg", "gender": null} +{"id": 92, "file": "face_0092.jpg", "gender": null} +{"id": 93, "file": "face_0093.jpg", "gender": null} +{"id": 94, "file": "face_0094.jpg", "gender": null} +{"id": 95, "file": "face_0095.jpg", "gender": null} +{"id": 96, "file": "face_0096.jpg", "gender": null} +{"id": 97, "file": "face_0097.jpg", "gender": null} +{"id": 98, "file": "face_0098.jpg", "gender": null} +{"id": 99, "file": "face_0099.jpg", "gender": null} +{"id": 100, "file": "face_0100.jpg", "gender": null} +{"id": 101, "file": "face_0101.jpg", "gender": null} +{"id": 102, "file": "face_0102.jpg", "gender": null} +{"id": 103, "file": "face_0103.jpg", "gender": null} +{"id": 104, "file": "face_0104.jpg", "gender": null} +{"id": 105, "file": "face_0105.jpg", "gender": null} +{"id": 106, "file": "face_0106.jpg", "gender": null} +{"id": 107, "file": "face_0107.jpg", "gender": null} +{"id": 108, "file": "face_0108.jpg", "gender": null} +{"id": 109, "file": "face_0109.jpg", "gender": null} +{"id": 110, "file": "face_0110.jpg", "gender": null} +{"id": 111, "file": "face_0111.jpg", "gender": null} +{"id": 112, "file": "face_0112.jpg", "gender": null} +{"id": 113, "file": "face_0113.jpg", "gender": null} +{"id": 114, "file": "face_0114.jpg", "gender": null} +{"id": 115, "file": "face_0115.jpg", "gender": null} +{"id": 116, "file": "face_0116.jpg", "gender": null} +{"id": 117, "file": "face_0117.jpg", "gender": null} +{"id": 118, "file": "face_0118.jpg", "gender": null} +{"id": 119, "file": "face_0119.jpg", "gender": null} +{"id": 121, "file": "face_0121.jpg", "gender": null} +{"id": 122, "file": "face_0122.jpg", "gender": null} +{"id": 123, "file": "face_0123.jpg", "gender": null} +{"id": 124, "file": "face_0124.jpg", "gender": null} +{"id": 125, "file": "face_0125.jpg", "gender": null} +{"id": 126, "file": "face_0126.jpg", "gender": null} +{"id": 127, "file": "face_0127.jpg", "gender": null} +{"id": 128, "file": "face_0128.jpg", "gender": null} +{"id": 129, "file": "face_0129.jpg", "gender": null} +{"id": 130, "file": "face_0130.jpg", "gender": null} +{"id": 131, "file": "face_0131.jpg", "gender": null} +{"id": 132, "file": "face_0132.jpg", "gender": null} +{"id": 133, "file": "face_0133.jpg", "gender": null} +{"id": 134, "file": "face_0134.jpg", "gender": null} +{"id": 135, "file": "face_0135.jpg", "gender": null} +{"id": 136, "file": "face_0136.jpg", "gender": null} +{"id": 137, "file": "face_0137.jpg", "gender": null} +{"id": 138, "file": "face_0138.jpg", "gender": null} +{"id": 139, "file": "face_0139.jpg", "gender": null} +{"id": 140, "file": "face_0140.jpg", "gender": null} +{"id": 141, "file": "face_0141.jpg", "gender": null} +{"id": 142, "file": "face_0142.jpg", "gender": null} +{"id": 143, "file": "face_0143.jpg", "gender": null} +{"id": 144, "file": "face_0144.jpg", "gender": null} +{"id": 145, "file": "face_0145.jpg", "gender": null} +{"id": 146, "file": "face_0146.jpg", "gender": null} +{"id": 147, "file": "face_0147.jpg", "gender": null} +{"id": 148, "file": "face_0148.jpg", "gender": null} +{"id": 149, "file": "face_0149.jpg", "gender": null} +{"id": 150, "file": "face_0150.jpg", "gender": null} +{"id": 151, "file": "face_0151.jpg", "gender": null} +{"id": 152, "file": "face_0152.jpg", "gender": null} +{"id": 153, "file": "face_0153.jpg", "gender": null} +{"id": 154, "file": "face_0154.jpg", "gender": null} +{"id": 155, "file": "face_0155.jpg", "gender": null} +{"id": 156, "file": "face_0156.jpg", "gender": null} +{"id": 157, "file": "face_0157.jpg", "gender": null} +{"id": 158, "file": "face_0158.jpg", "gender": null} +{"id": 159, "file": "face_0159.jpg", "gender": null} +{"id": 160, "file": "face_0160.jpg", "gender": null} +{"id": 161, "file": "face_0161.jpg", "gender": null} +{"id": 162, "file": "face_0162.jpg", "gender": null} +{"id": 163, "file": "face_0163.jpg", "gender": null} +{"id": 164, "file": "face_0164.jpg", "gender": null} +{"id": 165, "file": "face_0165.jpg", "gender": null} +{"id": 166, "file": "face_0166.jpg", "gender": null} +{"id": 167, "file": "face_0167.jpg", "gender": null} +{"id": 168, "file": "face_0168.jpg", "gender": null} +{"id": 169, "file": "face_0169.jpg", "gender": null} +{"id": 170, "file": "face_0170.jpg", "gender": null} +{"id": 171, "file": "face_0171.jpg", "gender": null} +{"id": 172, "file": "face_0172.jpg", "gender": null} +{"id": 173, "file": "face_0173.jpg", "gender": null} +{"id": 174, "file": "face_0174.jpg", "gender": null} +{"id": 175, "file": "face_0175.jpg", "gender": null} +{"id": 176, "file": "face_0176.jpg", "gender": null} +{"id": 177, "file": "face_0177.jpg", "gender": null} +{"id": 178, "file": "face_0178.jpg", "gender": null} +{"id": 179, "file": "face_0179.jpg", "gender": null} +{"id": 180, "file": "face_0180.jpg", "gender": null} +{"id": 181, "file": "face_0181.jpg", "gender": null} +{"id": 182, "file": "face_0182.jpg", "gender": null} +{"id": 183, "file": "face_0183.jpg", "gender": null} +{"id": 184, "file": "face_0184.jpg", "gender": null} +{"id": 185, "file": "face_0185.jpg", "gender": null} +{"id": 186, "file": "face_0186.jpg", "gender": null} +{"id": 187, "file": "face_0187.jpg", "gender": null} +{"id": 188, "file": "face_0188.jpg", "gender": null} +{"id": 189, "file": "face_0189.jpg", "gender": null} +{"id": 191, "file": "face_0191.jpg", "gender": null} +{"id": 192, "file": "face_0192.jpg", "gender": null} +{"id": 193, "file": "face_0193.jpg", "gender": null} +{"id": 194, "file": "face_0194.jpg", "gender": null} +{"id": 195, "file": "face_0195.jpg", "gender": null} +{"id": 196, "file": "face_0196.jpg", "gender": null} +{"id": 197, "file": "face_0197.jpg", "gender": null} +{"id": 198, "file": "face_0198.jpg", "gender": null} +{"id": 199, "file": "face_0199.jpg", "gender": null} diff --git a/mcp-server/console.html b/mcp-server/console.html index 38e0ef6..b59af4d 100644 --- a/mcp-server/console.html +++ b/mcp-server/console.html @@ -55,7 +55,8 @@ details .body{padding-top:10px;font-size:12px;color:#8b949e} .accent-r{border-left:3px solid #f85149} .worker{display:flex;align-items:center;gap:10px;padding:8px 10px;background:#161b22;border-radius:6px;margin-bottom:4px;font-size:12px;border-left:3px solid #30363d} -.worker .av{width:32px;height:32px;border-radius:50%;background:#0d1117;border:1px solid #21262d;display:flex;align-items:center;justify-content:center;font-weight:600;color:#c9d1d9;font-size:11px;flex-shrink:0;letter-spacing:0.5px} +.worker .av{width:32px;height:32px;border-radius:50%;background:#0d1117;border:1px solid #21262d;display:flex;align-items:center;justify-content:center;font-weight:600;color:#c9d1d9;font-size:11px;flex-shrink:0;letter-spacing:0.5px;overflow:hidden;position:relative} +.worker .av img{position:absolute;inset:0;width:100%;height:100%;object-fit:cover;display:block} .worker[data-role-band="warehouse"]{border-left-color:#58a6ff} .worker[data-role-band="production"]{border-left-color:#d29922} .worker[data-role-band="trades"]{border-left-color:#bc8cff} @@ -232,13 +233,70 @@ function roleBand(role){ // the left edge + uppercase role pill in the detail line. Used by // every chapter that renders worker rows. `name` and `role` drive the // classification; `detail` is the full text after the pill. +// Quick first-name → gender hint for face-pool selection. Same lookup +// idea as the dashboard; if the name is unknown, the server falls back +// to the full pool. Trimmed table — covers the most common names that +// appear in the synthetic worker data. +var FEMALE_NAMES = new Set(['Mary','Patricia','Jennifer','Linda','Elizabeth','Barbara','Susan','Jessica','Sarah','Karen','Lisa','Nancy','Betty','Sandra','Margaret','Ashley','Kimberly','Emily','Donna','Michelle','Carol','Amanda','Melissa','Deborah','Stephanie','Dorothy','Rebecca','Sharon','Laura','Cynthia','Amy','Kathleen','Angela','Shirley','Brenda','Emma','Anna','Pamela','Nicole','Samantha','Katherine','Christine','Helen','Debra','Rachel','Carolyn','Janet','Maria','Catherine','Heather','Diane','Olivia','Julie','Joyce','Victoria','Ruth','Virginia','Lauren','Kelly','Christina','Joan','Evelyn','Judith','Andrea','Hannah','Megan','Cheryl','Jacqueline','Martha','Madison','Teresa','Gloria','Sara','Janice','Ann','Kathryn','Abigail','Sophia','Frances','Jean','Alice','Judy','Isabella','Julia','Grace','Amber','Denise','Danielle','Marilyn','Beverly','Charlotte','Natalie','Theresa','Diana','Brittany','Kayla','Alexis','Lori','Marie','Carmen','Aisha','Rosa','Mia','Audrey','Erin','Tina','Vanessa','Tara','Wendy','Tanya','Maya','Crystal','Yvonne','Kara','Shannon','Brianna','Faith','Caroline','Carla','Tracey','Tracy','Rita','Dawn','Tiffany','Stacy','Stacey','Gina','Bonnie','Tammy','Joanne','Jamie','Tonya','Alyssa','Ariana','Elena','Ellie','Erica','Erika','Felicia','Holly','Jenna','Jenny','Krista','Kristen','Kristin','Krystal','Lana','Leah','Lucy','Mallory','Melinda','Meredith','Misty','Monica','Naomi','Paige','Paula','Renee','Rhonda','Robin','Roxanne','Selena','Sierra','Skylar','Sonia','Stella','Tamara','Veronica','Vivian','Whitney','Yolanda','Zoe']); +var MALE_NAMES = new Set(['James','Robert','John','Michael','David','William','Richard','Joseph','Thomas','Charles','Christopher','Daniel','Matthew','Anthony','Mark','Donald','Steven','Paul','Andrew','Joshua','Kenneth','Kevin','Brian','George','Edward','Ronald','Timothy','Jason','Jeffrey','Ryan','Jacob','Gary','Nicholas','Eric','Jonathan','Stephen','Larry','Justin','Scott','Brandon','Benjamin','Samuel','Gregory','Frank','Alexander','Raymond','Patrick','Jack','Dennis','Jerry','Tyler','Aaron','Jose','Adam','Henry','Nathan','Douglas','Zachary','Peter','Kyle','Walter','Ethan','Jeremy','Harold','Keith','Christian','Roger','Noah','Gerald','Carl','Terry','Sean','Austin','Arthur','Lawrence','Jesse','Dylan','Bryan','Joe','Jordan','Billy','Bruce','Albert','Willie','Gabriel','Logan','Alan','Juan','Wayne','Roy','Ralph','Randy','Eugene','Vincent','Russell','Elijah','Louis','Bobby','Philip','Johnny','Marcus','Antonio','Carlos','Diego','Hector','Jorge','Julio','Manuel','Miguel','Pedro','Raul','Ricardo','Roberto','Sergio','Victor','Jamal','Xavier','DeShawn','Dwayne','Jermaine','Malik','Tyrone','Devon','Andre','Brent','Calvin','Casey','Cody','Cole','Cory','Dale','Damon','Darius','Darrell','Dean','Derek','Drew','Earl','Eddie','Floyd','Glenn','Greg','Howard','Ivan','Jared','Jay','Jeff','Joel','Lance','Lee','Leonard','Lloyd','Mario','Martin','Mason','Maurice','Max','Mitchell','Morgan','Nick','Norman','Oliver','Owen','Pete','Quincy','Rafael','Reggie','Rex','Ricky','Russ','Shane','Shaun','Stanley','Steve','Theodore','Todd','Travis','Trevor','Troy','Wade','Warren','Wesley']); +function guessGenderFromFirstName(n){ + if(!n) return null; + var clean=n.replace(/[^A-Za-z]/g,''); + if(!clean) return null; + var c=clean[0].toUpperCase()+clean.slice(1).toLowerCase(); + if(FEMALE_NAMES.has(c)) return 'woman'; + if(MALE_NAMES.has(c)) return 'man'; + return null; +} +function genderFor(name){ + var g = guessGenderFromFirstName(name); + if(g) return g; + if(!name) return 'man'; + var s=String(name); var h=0; + for(var i=0;i; deterministic so + // same worker always gets the same face. Falls back to monogram if + // pool isn't fetched yet. + var faceKey = (opts.face_key) || name || ''; + var firstName = (name||'').split(/\s+/)[0]||''; + var gHint = genderFor(firstName); + var eHint = guessEthnicityFromFirstName(firstName); + if(faceKey){ + var img=document.createElement('img'); + img.alt=''; + img.loading='lazy'; + img.src = P + '/headshots/' + encodeURIComponent(faceKey) + '?g='+gHint+'&e='+eHint; + img.onerror=function(){ this.remove(); }; + av.appendChild(img); + } + w.appendChild(av); var info = el('div','info'); var nm = el('div','nm', name||'?'); if(opts.endorsed){ diff --git a/mcp-server/index.ts b/mcp-server/index.ts index a18bc50..d4eedec 100644 --- a/mcp-server/index.ts +++ b/mcp-server/index.ts @@ -1225,6 +1225,78 @@ async function main() { // OSHA national, Chicago history, ticker chart, parent link, // federal contracts, debarment, unions, training. Click any // contractor name in a permit Entity Brief to land here. + // Headshot pool — synthetic StyleGAN faces from + // thispersondoesnotexist.com fetched offline by + // scripts/staffing/fetch_face_pool.py. Deterministic mapping: + // hash(worker key) → pool index → image bytes. Same key always + // gets the same face; different keys spread evenly. + // + // Optional gender hint: ?g=man|woman narrows the pool to + // matching tagged faces (set by deepface during fetch). Falls + // back to whole pool if no matches. + if (url.pathname.startsWith("/headshots/") && req.method === "GET") { + const key = decodeURIComponent(url.pathname.slice("/headshots/".length)); + const wantGender = url.searchParams.get("g") || ""; + if (!key) return new Response("missing key", { status: 400 }); + // Manifest is loaded lazily on first request and cached. + // Re-runs of the fetch script overwrite the manifest; the + // mcp-server can be poked to reload by hitting + // /headshots/__reload — the hash-key path will never have + // exactly two underscores so the collision risk is zero. + const HEADSHOT_DIR = "/home/profit/lakehouse/data/headshots"; + if (key === "__reload" || !(globalThis as any)._faces) { + try { + const raw = await Bun.file(`${HEADSHOT_DIR}/manifest.jsonl`).text(); + const lines = raw.trim().split("\n").filter(Boolean); + const all = lines.map((l) => JSON.parse(l)); + (globalThis as any)._faces = { + all, + man: all.filter((r: any) => r.gender === "man"), + woman: all.filter((r: any) => r.gender === "woman"), + untagged: all.filter((r: any) => !r.gender || (r.gender !== "man" && r.gender !== "woman")), + loaded_at: Date.now(), + }; + if (key === "__reload") { + return Response.json({ + reloaded: true, + total: all.length, + man: (globalThis as any)._faces.man.length, + woman: (globalThis as any)._faces.woman.length, + untagged: (globalThis as any)._faces.untagged.length, + }); + } + } catch (e: any) { + return new Response(`face pool not available: ${e.message}. Run scripts/staffing/fetch_face_pool.py first.`, { status: 503 }); + } + } + const F = (globalThis as any)._faces as { all: any[]; man: any[]; woman: any[]; untagged: any[] }; + if (!F || !F.all.length) { + return new Response("face pool empty", { status: 503 }); + } + // Pool selection: gender hint > full pool. If no gender match, + // fall back to the full pool so the worker still gets a face. + let pool = F.all; + if (wantGender === "man" && F.man.length) pool = F.man; + else if (wantGender === "woman" && F.woman.length) pool = F.woman; + // Hash key → pool index. djb2-ish, fits any string. + let h = 5381; + for (let i = 0; i < key.length; i++) h = ((h << 5) + h + key.charCodeAt(i)) | 0; + const idx = Math.abs(h) % pool.length; + const pick = pool[idx]; + const file = Bun.file(`${HEADSHOT_DIR}/${pick.file}`); + if (!(await file.exists())) { + return new Response("face missing on disk", { status: 404 }); + } + return new Response(file, { + headers: { + "Content-Type": "image/jpeg", + "Cache-Control": "public, max-age=86400, immutable", + "X-Face-Pool-Idx": String(pick.id), + "X-Face-Pool-Gender": pick.gender || "untagged", + }, + }); + } + // Profiler index — directory page of everyone who's filed a // Chicago permit (clickable directory of contractors). if (url.pathname === "/profiler" || url.pathname === "/contractors") { diff --git a/mcp-server/search.html b/mcp-server/search.html index c108786..77370b4 100644 --- a/mcp-server/search.html +++ b/mcp-server/search.html @@ -48,7 +48,8 @@ body{font-family:'Inter',-apple-system,system-ui,'Segoe UI',sans-serif;backgroun /* Workers */ .iworker{display:flex;align-items:center;gap:12px;padding:10px 12px;background:#161b22;border-radius:8px;margin-bottom:4px;transition:background 0.15s} .iworker:hover{background:#1c2333} -.iworker .av{width:40px;height:40px;border-radius:50%;display:flex;align-items:center;justify-content:center;font-weight:600;font-size:13px;color:#c9d1d9;flex-shrink:0;background:#161b22;border:1px solid #21262d;letter-spacing:0.5px;font-family:'Inter',-apple-system,sans-serif} +.iworker .av{width:40px;height:40px;border-radius:50%;display:flex;align-items:center;justify-content:center;font-weight:600;font-size:13px;color:#c9d1d9;flex-shrink:0;background:#161b22;border:1px solid #21262d;letter-spacing:0.5px;font-family:'Inter',-apple-system,sans-serif;overflow:hidden;position:relative} +.iworker .av img{position:absolute;inset:0;width:100%;height:100%;object-fit:cover;display:block} .iworker .role-pill{display:inline-block;font-size:10px;padding:2px 8px;border-radius:3px;background:#0d1117;color:#8b949e;margin-right:8px;font-weight:600;letter-spacing:0.4px;text-transform:uppercase;border-left:2px solid #30363d} .iworker[data-role-band="warehouse"]{border-left:3px solid #58a6ff} .iworker[data-role-band="production"]{border-left:3px solid #d29922} @@ -2260,6 +2261,86 @@ function addBigMeter(parent,label,val,desc){ d.appendChild(lb);d.appendChild(row);d.appendChild(ds);parent.appendChild(d); } +// First-name → gender hint for face-pool selection. Built from the +// most common 200 US given names. Lookup is best-effort — undefined +// returns means "no hint, draw from full pool." Don't surface this +// anywhere user-visible; it's purely a face-pool selector. +var FEMALE_NAMES = new Set(['Mary','Patricia','Jennifer','Linda','Elizabeth','Barbara','Susan','Jessica','Sarah','Karen','Lisa','Nancy','Betty','Sandra','Margaret','Ashley','Kimberly','Emily','Donna','Michelle','Carol','Amanda','Melissa','Deborah','Stephanie','Dorothy','Rebecca','Sharon','Laura','Cynthia','Amy','Kathleen','Angela','Shirley','Brenda','Emma','Anna','Pamela','Nicole','Samantha','Katherine','Christine','Helen','Debra','Rachel','Carolyn','Janet','Maria','Catherine','Heather','Diane','Olivia','Julie','Joyce','Victoria','Ruth','Virginia','Lauren','Kelly','Christina','Joan','Evelyn','Judith','Andrea','Hannah','Megan','Cheryl','Jacqueline','Martha','Madison','Teresa','Gloria','Sara','Janice','Ann','Kathryn','Abigail','Sophia','Frances','Jean','Alice','Judy','Isabella','Julia','Grace','Amber','Denise','Danielle','Marilyn','Beverly','Charlotte','Natalie','Theresa','Diana','Brittany','Doris','Kayla','Alexis','Lori','Marie','Carmen','Aisha','Rosa','Kim','Mia','Audrey','Erin','Tina','Vanessa','Tara','Wendy','Tanya','Maya','Crystal','Yvonne','Kara','Shannon','Brianna','Faith','Caroline','Carla','Tracey','Tracy','Rita','Dawn','Tiffany','Stacy','Stacey','Gina','Bonnie','Tammy','Joanne','Jamie','Tonya','Alyssa','Ariana','Elena','Ellie','Erica','Erika','Felicia','Holly','Jenna','Jenny','Krista','Kristen','Kristin','Krystal','Lana','Leah','Lucy','Mallory','Melinda','Meredith','Misty','Monica','Mya','Naomi','Paige','Patrice','Paula','Renee','Rhonda','Robin','Roxanne','Sadie','Selena','Shari','Shauna','Sierra','Skylar','Sonia','Stella','Tamara','Taryn','Trina','Veronica','Vivian','Whitney','Yolanda','Zoe','Lakeisha','Latoya','Tasha', + // Hispanic female + 'Esperanza','Luz','Lucia','Camila','Valentina','Mariana','Catalina','Cristina','Daniela','Gabriela','Ximena','Adriana','Beatriz','Pilar','Consuelo','Dolores','Mercedes','Marisol','Guadalupe','Lupita','Inez','Itzel','Yesenia','Monserrat','Renata','Alejandra','Alma','Belen','Blanca','Esmeralda','Imelda','Lourdes','Magdalena','Olga','Refugio','Rocio','Susana','Anita','Fatima', + // South Asian female + 'Priya','Anjali','Neha','Kavya','Pooja','Divya','Meera','Lakshmi','Rani','Asha','Saanvi','Aanya','Aaradhya','Shreya','Riya','Tanvi','Ishita','Shivani','Padma','Sita','Geeta','Rekha','Amira', + // East Asian female + 'Mei','Sakura','Aiko','Sora','Chiyo','Hana','Eun','Xiu','Lan','Hua','Min','Xin','Ying','Zhen','Yan', + // Black female (additional) + 'Imani','Keisha','Lakisha','Kenya','Tamika','Latanya','Latrice','Aaliyah','Kiara','Janelle','Jasmine','Tanisha','Maliyah','Imari','Nia','Zuri','Talia','Jada','Ebony','Dominique', + // Middle Eastern female + 'Layla','Yasmin','Yara','Nadia','Zainab','Rania','Samira','Mariam','Salma','Dunia','Iman','Lina','Mona','Noor','Rana','Sabrina','Soha','Zara' +]); +var MALE_NAMES = new Set(['James','Robert','John','Michael','David','William','Richard','Joseph','Thomas','Charles','Christopher','Daniel','Matthew','Anthony','Mark','Donald','Steven','Paul','Andrew','Joshua','Kenneth','Kevin','Brian','George','Edward','Ronald','Timothy','Jason','Jeffrey','Ryan','Jacob','Gary','Nicholas','Eric','Jonathan','Stephen','Larry','Justin','Scott','Brandon','Benjamin','Samuel','Gregory','Frank','Alexander','Raymond','Patrick','Jack','Dennis','Jerry','Tyler','Aaron','Jose','Adam','Henry','Nathan','Douglas','Zachary','Peter','Kyle','Walter','Ethan','Jeremy','Harold','Keith','Christian','Roger','Noah','Gerald','Carl','Terry','Sean','Austin','Arthur','Lawrence','Jesse','Dylan','Bryan','Joe','Jordan','Billy','Bruce','Albert','Willie','Gabriel','Logan','Alan','Juan','Wayne','Roy','Ralph','Randy','Eugene','Vincent','Russell','Elijah','Louis','Bobby','Philip','Johnny','Marcus','Antonio','Carlos','Diego','Hector','Jorge','Julio','Manuel','Miguel','Pedro','Raul','Ricardo','Roberto','Sergio','Victor','Jamal','Xavier','DeShawn','Dwayne','Jermaine','Malik','Tyrone','Devon','Andre','Anwar','Brent','Calvin','Casey','Cody','Cole','Cory','Curt','Dale','Damon','Darius','Darrell','Dean','Derek','Donnie','Drew','Earl','Eddie','Floyd','Glenn','Greg','Howard','Ivan','Jared','Jay','Jeff','Joel','Johnnie','Lance','Lee','Leonard','Lloyd','Mario','Martin','Mason','Maurice','Max','Mitchell','Morgan','Nick','Norman','Oliver','Owen','Pete','Quincy','Rafael','Reggie','Rex','Ricky','Rod','Russ','Salvatore','Shane','Shaun','Stanley','Steve','Theodore','Todd','Travis','Trevor','Troy','Wade','Warren','Wesley', + // Hispanic male + 'Alejandro','Andres','Mateo','Santiago','Sebastian','Emilio','Tomas','Joaquin','Ignacio','Salvador','Cesar','Arturo','Armando','Hugo','Marco','Felipe','Gerardo','Jaime','Leonardo','Luis','Pablo','Ramon','Reynaldo','Vincente','Javier','Esteban','Eduardo','Fernando','Humberto','Ernestino','Cristian','Hernan', + // South Asian male + 'Raj','Anil','Rohan','Vikram','Arjun','Sanjay','Ravi','Krishna','Pradeep','Sunil','Amit','Deepak','Ashok','Manoj','Rahul','Vijay','Suresh','Naveen','Anand','Nikhil','Aditya','Karan','Rajesh','Ramesh','Kishore','Mohan','Ajay','Aarav','Ishaan', + // East Asian male + 'Wei','Yi','Jin','Hiroshi','Akira','Kenji','Haruto','Hyun','Yoon','Kai','Long','Hong','Hao','Tao','Bao','Cheng','Feng','Qiang','Jian','Dong','Bin','Lei','Hui','Yu','Yuan', + // Black male (additional) + 'Demetrius','Tyrese','Trevon','Kareem','DaQuan','Tyrell','Kwame','Khalil','Rashid','Terrell','Chauncey','Cedric','Imari','Jalen','Jaylen', + // Middle Eastern male + 'Omar','Khalid','Hassan','Hussein','Ahmed','Mohamed','Mohammed','Ali','Karim','Yusuf','Ibrahim','Mahmoud','Saif','Bilal','Faisal','Hamza','Imran','Sami','Wael','Yasin','Zaid' +]); +function guessGenderFromFirstName(name){ + if(!name) return null; + var clean = name.replace(/[^A-Za-z]/g,''); + if(!clean) return null; + // Title-case + var c = clean[0].toUpperCase() + clean.slice(1).toLowerCase(); + if(FEMALE_NAMES.has(c)) return 'woman'; + if(MALE_NAMES.has(c)) return 'man'; + return null; +} + +// First-name → ethnicity hint. The synthetic-data generator built +// workers_500k from a multi-cultural name pool (Raj, DeShawn, Jamal, +// Mei, Wei, Carmen, Esperanza, Aisha, etc.). The hint is used ONLY +// to bias face-pool selection toward visually-aligned StyleGAN faces; +// it never surfaces in any worker-facing label or report. Buckets +// match common deepface "race" categories so the pool tags align +// when deepface is later run over the headshot pool. +var NAMES_SOUTH_ASIAN = new Set(['Raj','Anil','Rohan','Vikram','Arjun','Sanjay','Ravi','Krishna','Pradeep','Sunil','Amit','Deepak','Ashok','Manoj','Rahul','Vijay','Suresh','Naveen','Anand','Nikhil','Aditya','Karan','Rajesh','Ramesh','Kishore','Mohan','Ajay','Priya','Anjali','Neha','Kavya','Pooja','Divya','Meera','Lakshmi','Rani','Asha','Saanvi','Aanya','Aaradhya','Shreya','Riya','Tanvi','Ishita','Aarav','Ishaan','Shivani','Padma','Sita','Geeta','Rekha']); +var NAMES_EAST_ASIAN = new Set(['Wei','Mei','Yi','Jin','Chen','Lin','Liu','Wang','Zhang','Yang','Wu','Zhao','Sun','Hiroshi','Yuki','Akira','Kenji','Sakura','Aiko','Haruto','Sora','Chiyo','Hana','Hyun','Eun','Yoon','Kai','Long','Hong','Xiu','Lan','Hua','Hao','Tao','Bao','Cheng','Feng','Qiang','Jian','Dong','Bin','Min','Lei','Hui','Yu','Xin','Ying','Zhen','Yuan','Yan']); +var NAMES_HISPANIC = new Set(['Carmen','Carlos','Maria','Diego','Hector','Jorge','Julio','Manuel','Miguel','Pedro','Raul','Ricardo','Roberto','Sergio','Antonio','Esperanza','Luz','Sofia','Lucia','Isabella','Camila','Valentina','Mariana','Elena','Rosa','Catalina','Esteban','Fernando','Eduardo','Javier','Alejandro','Andres','Mateo','Santiago','Sebastian','Emilio','Tomas','Cristina','Daniela','Gabriela','Ximena','Adriana','Beatriz','Pilar','Consuelo','Dolores','Mercedes','Xavier','Marisol','Guadalupe','Lupita','Inez','Itzel','Yolanda','Yesenia','Monserrat','Renata','Ximena','Joaquin','Ignacio','Rafael','Salvador','Cesar','Arturo','Armando','Hugo','Marco','Alejandra','Alma','Belen','Blanca','Esmeralda','Fatima','Gloria','Imelda','Lourdes','Magdalena','Olga','Paula','Refugio','Rocio','Susana','Teresa','Veronica','Anita','Ernestino','Felipe','Gerardo','Humberto','Jaime','Leonardo','Luis','Pablo','Ramon','Reynaldo','Vincente']); +var NAMES_BLACK = new Set(['DeShawn','Jamal','Aisha','Latoya','Tyrone','Malik','Imani','Keisha','Tariq','Lakisha','Kenya','Tamika','Shaquille','Andre','Marcus','Demetrius','Jermaine','Reggie','Tyrese','Darius','Trevon','Kareem','Damon','Jalen','Jaylen','Dwayne','DaQuan','Latanya','Latrice','Aaliyah','Kiara','Janelle','Jasmine','Tanisha','Yolanda','Maurice','Tyrell','Kwame','Khalil','Rashid','Terrell','Chauncey','Cedric','Maliyah','Imari','Nia','Zuri','Talia','Jada','Ebony','Dominique']); +var NAMES_MIDDLE_EASTERN = new Set(['Layla','Omar','Khalid','Fatima','Yasmin','Hassan','Hussein','Ahmed','Mohamed','Mohammed','Ali','Karim','Yusuf','Yara','Nadia','Zainab','Rania','Samira','Mariam','Salma','Ibrahim','Mahmoud','Saif','Anwar','Bilal','Faisal','Hamza','Imran','Jamal','Rashid','Sami','Tariq','Wael','Yasin','Zaid','Amira','Dunia','Iman','Lina','Mona','Noor','Rana','Sabrina','Soha','Yara','Zara']); +function guessEthnicityFromFirstName(name){ + if(!name) return 'caucasian'; + var clean = name.replace(/[^A-Za-z]/g,''); + if(!clean) return 'caucasian'; + var c = clean[0].toUpperCase() + clean.slice(1).toLowerCase(); + // Order matters where names overlap. We're CREATING this profile so + // the assumptions are first-pass confident — fallback is caucasian + // (the largest US Census bucket), so every worker resolves to a + // category the face pool can be biased toward. + if(NAMES_MIDDLE_EASTERN.has(c)) return 'middle_eastern'; + if(NAMES_BLACK.has(c)) return 'black'; + if(NAMES_HISPANIC.has(c)) return 'hispanic'; + if(NAMES_SOUTH_ASIAN.has(c)) return 'south_asian'; + if(NAMES_EAST_ASIAN.has(c)) return 'east_asian'; + return 'caucasian'; +} +// Forced-confident gender resolver — defaults to a deterministic guess +// when the name table doesn't match, rather than leaving "unknown." +// We're authoring the synthetic data; we own the confidence call. +function genderFor(name){ + var g = guessGenderFromFirstName(name); + if(g) return g; + if(!name) return 'man'; + // hash-based fallback so unknown names still spread roughly 50/50 + var s = String(name); + var h = 0; for (var i=0;i. Same key → same face by deterministic + // hash. If the image fails to load (face pool not yet fetched, CDN + // blocked, etc.), the monogram remains visible. var av=document.createElement('div');av.className='av'; var role = (workerDataRef && workerDataRef.role) || (detail||'').split(' · ')[0] || ''; var band = roleBand(role); if(band.band) w.dataset.roleBand = band.band; av.textContent=(name||'?').split(' ').map(function(n){return(n[0]||'').toUpperCase()}).join('').substring(0,2); + // Layer the headshot on top. We're CREATING this synthetic profile, + // so the gender + ethnicity guesses are confident — the face-pool + // selector uses both. Once deepface tags the pool the server will + // narrow accordingly; until then it falls back to the full pool but + // the URL shape is forward-compatible. + var faceKey = (workerDataRef && (workerDataRef.candidate_id || workerDataRef.doc_id)) || name || ''; + var firstName = (name||'').split(/\s+/)[0]||''; + var gHint = genderFor(firstName); + var eHint = guessEthnicityFromFirstName(firstName); + if(faceKey){ + var img=document.createElement('img'); + img.alt=''; + img.loading='lazy'; + var qs = '?g=' + gHint + '&e=' + eHint; + img.src = P + '/headshots/' + encodeURIComponent(faceKey) + qs; + img.onerror=function(){ this.remove(); }; + av.appendChild(img); + } w.appendChild(av); var info=document.createElement('div');info.className='info'; var nm=document.createElement('div');nm.className='nm';nm.textContent=name; diff --git a/scripts/staffing/fetch_face_pool.py b/scripts/staffing/fetch_face_pool.py new file mode 100644 index 0000000..d39d1c4 --- /dev/null +++ b/scripts/staffing/fetch_face_pool.py @@ -0,0 +1,161 @@ +#!/usr/bin/env python3 +""" +fetch_face_pool.py — pull N synthetic headshots from +https://thispersondoesnotexist.com/, write to data/headshots/face_NNNN.jpg, +optionally tag each with gender via deepface, emit a JSONL manifest. + +Each fetch is a fresh StyleGAN face — no real people. Deterministic per +worker mapping happens at serve time (mcp-server hashes the worker key +into the pool); this script just builds the pool. + +Usage: + python3 scripts/staffing/fetch_face_pool.py --count 300 --concurrency 3 + python3 scripts/staffing/fetch_face_pool.py --count 50 --no-gender + +Re-running is idempotent: existing face_NNNN.jpg files are skipped, and +the manifest is rewritten from disk state. +""" +from __future__ import annotations +import argparse +import hashlib +import json +import os +import sys +import time +from concurrent.futures import ThreadPoolExecutor, as_completed + +import urllib.request +import urllib.error + +URL = "https://thispersondoesnotexist.com/" +UA = "Lakehouse/1.0 (face-pool fetch · synthetic-only · no real-person tracking)" + + +def fetch_one(idx: int, out_dir: str) -> tuple[int, str, bool, str | None]: + """Returns (idx, basename, cached, error).""" + fname = f"face_{idx:04d}.jpg" + full = os.path.join(out_dir, fname) + if os.path.exists(full) and os.path.getsize(full) > 1024: + return idx, fname, True, None + try: + req = urllib.request.Request(URL, headers={"User-Agent": UA}) + with urllib.request.urlopen(req, timeout=20) as resp: + blob = resp.read() + if len(blob) < 1024: + return idx, fname, False, f"response too small ({len(blob)} bytes)" + with open(full, "wb") as f: + f.write(blob) + return idx, fname, False, None + except urllib.error.URLError as e: + return idx, fname, False, f"urlerror: {e}" + except Exception as e: + return idx, fname, False, f"{type(e).__name__}: {e}" + + +def maybe_tag_gender(records: list[dict], out_dir: str) -> dict[str, int]: + """If deepface is installed, label each record with gender. Returns + a count summary; mutates records in place. On import error, returns + None and tags every record as unknown.""" + try: + from deepface import DeepFace # type: ignore + except Exception as e: + print(f" (deepface unavailable: {e}) — pool will mix naturally") + for r in records: + r["gender"] = None + return {"unknown": len(records)} + + print(" tagging gender via deepface (CPU; ~0.5-1s per face)…") + counts: dict[str, int] = {} + for i, r in enumerate(records): + full = os.path.join(out_dir, r["file"]) + try: + ana = DeepFace.analyze( + img_path=full, + actions=["gender"], + enforce_detection=False, + silent=True, + ) + if isinstance(ana, list): + ana = ana[0] if ana else {} + g_raw = (ana.get("dominant_gender") or "").lower().strip() + r["gender"] = ( + "man" if g_raw.startswith("man") else + "woman" if g_raw.startswith("woman") else + None + ) + except Exception as e: + r["gender"] = None + r["gender_error"] = f"{type(e).__name__}: {e}" + counts[r["gender"] or "unknown"] = counts.get(r["gender"] or "unknown", 0) + 1 + if (i + 1) % 25 == 0: + print(f" [{i+1}/{len(records)}] {counts}") + return counts + + +def main(): + p = argparse.ArgumentParser() + p.add_argument("--count", type=int, default=300, help="how many faces to maintain in pool") + p.add_argument( + "--out", + default=os.path.join(os.path.dirname(__file__), "..", "..", "data", "headshots"), + ) + p.add_argument("--concurrency", type=int, default=3, help="parallel fetches (be polite)") + p.add_argument("--no-gender", action="store_true", help="skip deepface gender tagging") + args = p.parse_args() + + out = os.path.realpath(args.out) + os.makedirs(out, exist_ok=True) + + print(f"Fetching {args.count} faces → {out}") + print(f"Source: {URL} (synthetic StyleGAN — no real people)") + + results: list[dict] = [None] * args.count # type: ignore + t0 = time.time() + with ThreadPoolExecutor(max_workers=max(1, args.concurrency)) as ex: + futs = {ex.submit(fetch_one, i, out): i for i in range(args.count)} + for done, fut in enumerate(as_completed(futs), 1): + idx, fname, cached, err = fut.result() + results[idx] = { + "id": idx, + "file": fname, + "cached": cached, + "error": err, + } + if done % 25 == 0 or done == args.count: + ok = sum(1 for r in results if r and not r.get("error")) + print(f" [{done}/{args.count}] {ok} ok ({time.time()-t0:.1f}s)") + + # Drop slots that errored or are still None (shouldn't happen) + records = [r for r in results if r and not r.get("error")] + print(f"\nPool ready: {len(records)} faces, {sum(1 for r in records if r['cached'])} from cache") + + if not args.no_gender and records: + print("\nGender-tagging pass:") + summary = maybe_tag_gender(records, out) + print(f" distribution: {summary}") + else: + for r in records: + r["gender"] = None + + # Strip transient flags before persisting + for r in records: + r.pop("cached", None) + r.pop("error", None) + + manifest = os.path.join(out, "manifest.jsonl") + with open(manifest, "w") as f: + for r in records: + f.write(json.dumps(r) + "\n") + print(f"\nManifest: {manifest} ({len(records)} entries)") + + # Quick checksum manifest for downstream debugging + h = hashlib.sha256() + for r in records: + h.update(r["file"].encode()) + h.update(b"|") + h.update((r.get("gender") or "?").encode()) + print(f"Pool fingerprint (sha256): {h.hexdigest()[:16]}") + + +if __name__ == "__main__": + main()