Compare commits

..

No commits in common. "dcf4c9a8e79fa4e7e5ff136d03175251517cb005" and "528fded11b5722a19aec547a52abe27e5db161a9" have entirely different histories.

10 changed files with 235 additions and 2224 deletions

View File

@ -175,7 +175,7 @@
{"id": 174, "file": "face_0174.jpg", "gender": "woman", "race": "east_asian", "age": 26}
{"id": 175, "file": "face_0175.jpg", "gender": "man", "race": "caucasian", "age": 30}
{"id": 176, "file": "face_0176.jpg", "gender": "man", "race": "middle_eastern", "age": 37}
{"id": 177, "file": "face_0177.jpg", "gender": "woman", "race": "caucasian", "age": 25}
{"id": 177, "file": "face_0177.jpg", "gender": "man", "race": "middle_eastern", "age": 37}
{"id": 178, "file": "face_0178.jpg", "gender": "woman", "race": "caucasian", "age": 28}
{"id": 179, "file": "face_0179.jpg", "gender": "man", "race": "hispanic", "age": 28}
{"id": 180, "file": "face_0180.jpg", "gender": "woman", "race": "caucasian", "age": 38}
@ -588,7 +588,7 @@
{"id": 587, "file": "face_0587.jpg", "gender": "man", "race": "caucasian", "age": 34}
{"id": 588, "file": "face_0588.jpg", "gender": "man", "race": "caucasian", "age": 21, "excluded": "minor"}
{"id": 589, "file": "face_0589.jpg", "gender": "man", "race": "caucasian", "age": 21, "excluded": "minor"}
{"id": 590, "file": "face_0590.jpg", "gender": "woman", "race": "hispanic", "age": 31}
{"id": 590, "file": "face_0590.jpg", "gender": "man", "race": "caucasian", "age": 26}
{"id": 591, "file": "face_0591.jpg", "gender": "woman", "race": "hispanic", "age": 31}
{"id": 592, "file": "face_0592.jpg", "gender": "woman", "race": "caucasian", "age": 28}
{"id": 593, "file": "face_0593.jpg", "gender": "woman", "race": "caucasian", "age": 28}

View File

@ -56,12 +56,7 @@ details .body{padding-top:10px;font-size:12px;color:#8b949e}
.worker{display:flex;align-items:center;gap:10px;padding:8px 10px;background:#161b22;border-radius:6px;margin-bottom:4px;font-size:12px;border-left:3px solid #30363d}
.worker .av{width:32px;height:32px;border-radius:50%;background:#0d1117;border:1px solid #21262d;display:flex;align-items:center;justify-content:center;font-weight:600;color:#c9d1d9;font-size:11px;flex-shrink:0;letter-spacing:0.5px;overflow:hidden;position:relative}
.worker .av img{position:absolute;inset:0;width:100%;height:100%;object-fit:cover;display:block;
/* Softening — mirror of search.html. Pulls saturation + contrast off
the SDXL Turbo over-render so faces feel less "AI-generated".
If you tweak one, tweak the other. */
filter: saturate(0.86) contrast(0.93) brightness(1.02) blur(0.3px);
}
.worker .av img{position:absolute;inset:0;width:100%;height:100%;object-fit:cover;display:block}
.worker[data-role-band="warehouse"]{border-left-color:#58a6ff}
.worker[data-role-band="production"]{border-left-color:#d29922}
.worker[data-role-band="trades"]{border-left-color:#bc8cff}
@ -317,8 +312,26 @@ function workerRow(name, role, detail, opts){
if(band.band) w.dataset.roleBand = band.band;
var initials = (name||'?').split(' ').map(function(s){return (s[0]||'').toUpperCase()}).join('').substring(0,2);
var av = el('div','av',initials);
// Headshot insertion removed 2026-04-28. The .av element stays as
// a monogram-initials avatar.
// Real synthetic headshot via /headshots/<key>; deterministic so
// same worker always gets the same face. Falls back to monogram if
// pool isn't fetched yet.
var faceKey = (opts.face_key) || name || '';
var nameParts = (name||'').trim().split(/\s+/);
var firstName = nameParts[0]||'';
var lastName = nameParts.length > 1 ? nameParts[nameParts.length-1] : '';
var gHint = genderFor(firstName);
var eHint = (typeof guessEthnicityFromName === 'function')
? guessEthnicityFromName(firstName, lastName)
: guessEthnicityFromFirstName(firstName);
if(faceKey){
var img=document.createElement('img');
img.alt='';
// Eager + cache-buster v=2: 11KB thumbs are cheap to load fresh
// and the v= param invalidates browsers holding old photos.
img.src = P + '/headshots/' + encodeURIComponent(faceKey) + '?g='+gHint+'&e='+eHint+'&v=2';
img.onerror=function(){ this.remove(); };
av.appendChild(img);
}
w.appendChild(av);
var info = el('div','info');
var nm = el('div','nm', name||'?');

View File

@ -1,123 +0,0 @@
// Visual filler iconography rendered through ComfyUI. Distinct from
// role_scenes.ts (which renders portraits) — these are object/badge
// style renders that fill dead space on worker cards: cert pills,
// role-prop chips, hazard indicators, empty-state heroes.
//
// Layout on disk:
// data/icons_pool/{category}/{slug}.webp
//
// Cache invalidation:
// ICONS_VERSION mixes into the on-disk filename (slug includes
// version). Bump it after editing a recipe so prior renders are
// ignored on next view.
export type IconCategory = "cert" | "role_prop" | "status" | "hazard" | "empty";
export interface IconRecipe {
slug: string;
category: IconCategory;
// Text label that appears next to / under the icon. The front-end
// already renders this text in cert pills; the icon is supplementary.
display: string;
// Full diffusion prompt. Style guidance baked in. SDXL Turbo at 8
// steps reliably produces clean macro photography, so default to
// photographic prop shots over flat-vector illustrations (the model
// hallucinates noise into flat-vector geometry at low step counts).
prompt: string;
// Negative prompt — what NOT to render. Crucial for icons because
// SDXL likes to add hands/text/people unprompted.
negative?: string;
}
// Default negative prompt baked into every icon render unless the
// recipe overrides. Empirically, these terms are the top SDXL Turbo
// off-style failures.
export const DEFAULT_NEGATIVE =
"people, hands, faces, blurry, low quality, watermark, signature, "
+ "logos, copyright, distorted text, garbled letters, multiple objects";
// TODO J — review and tune the prompts here. Each one is what diffusion
// sees verbatim. The visual decision: photographic prop shots (macro
// photo of an actual badge / placard / sticker) vs flat-icon vector
// style. Default below is photographic — matches the worker headshot
// aesthetic. Flip a recipe to flat-vector by replacing "macro photograph"
// with "flat icon illustration on solid color background, minimal vector".
//
// Visual cues that work well in SDXL Turbo at 8 steps:
// - "macro photograph", "isolated on plain background", "studio lighting"
// - Concrete colors ("orange and black warning diamond") not adjectives
// - Avoid: small text in the prompt (model garbles it), specific brand
// names (creates fake logos), detailed scene composition
const CERT_ICONS: IconRecipe[] = [
{ slug: "osha-10", category: "cert", display: "OSHA-10",
prompt: "macro photograph of a circular yellow safety badge with a black hard hat icon at center, isolated on neutral grey backdrop, photorealistic, sharp focus, studio lighting" },
{ slug: "osha-30", category: "cert", display: "OSHA-30",
prompt: "macro photograph of a circular orange safety badge with a black hard hat icon at center, isolated on neutral grey backdrop, photorealistic, sharp focus, studio lighting" },
{ slug: "first-aid-cpr", category: "cert", display: "First Aid/CPR",
prompt: "macro photograph of a small enamel pin badge featuring a bold red cross on a white circular background, isolated on neutral grey backdrop, photorealistic, sharp focus, studio lighting" },
{ slug: "hazmat", category: "cert", display: "Hazmat",
prompt: "macro photograph of a HAZMAT warning placard, bold orange and black diamond shape with a flame icon, isolated on neutral grey backdrop, photorealistic, sharp focus, studio lighting" },
{ slug: "forklift", category: "cert", display: "Forklift",
prompt: "macro photograph of a yellow industrial forklift safety badge with a forklift silhouette icon, isolated on neutral grey backdrop, photorealistic, sharp focus, studio lighting" },
{ slug: "reach-truck", category: "cert", display: "Reach Truck",
prompt: "macro photograph of a navy blue industrial certification badge with a warehouse reach-truck silhouette icon, isolated on neutral grey backdrop, photorealistic, sharp focus, studio lighting" },
{ slug: "order-picker", category: "cert", display: "Order Picker",
prompt: "macro photograph of a green industrial certification badge with a warehouse order-picker silhouette icon, isolated on neutral grey backdrop, photorealistic, sharp focus, studio lighting" },
{ slug: "lockout-tagout", category: "cert", display: "Lockout/Tagout",
prompt: "macro photograph of a bright red padlock tag with a danger warning, hanging on a metal industrial valve, isolated on neutral grey backdrop, photorealistic, sharp focus, studio lighting" },
{ slug: "msds", category: "cert", display: "MSDS",
prompt: "macro photograph of a folded chemical safety data sheet booklet with chemical hazard pictograms visible on cover, isolated on neutral grey backdrop, photorealistic, sharp focus, studio lighting" },
{ slug: "confined-space", category: "cert", display: "Confined Space",
prompt: "macro photograph of a yellow confined space warning sign featuring a manhole entry icon, isolated on neutral grey backdrop, photorealistic, sharp focus, studio lighting" },
{ slug: "servsafe", category: "cert", display: "ServSafe",
prompt: "macro photograph of a dark green food safety certification badge featuring a stylized chef hat icon, isolated on neutral grey backdrop, photorealistic, sharp focus, studio lighting" },
{ slug: "fire-safety", category: "cert", display: "Fire Safety",
prompt: "macro photograph of a red enamel pin badge featuring a flame icon and a fire extinguisher silhouette, isolated on neutral grey backdrop, photorealistic, sharp focus, studio lighting" },
{ slug: "iso-9001", category: "cert", display: "ISO 9001",
prompt: "macro photograph of a deep blue circular quality-management certification seal with embossed metallic ring, isolated on neutral grey backdrop, photorealistic, sharp focus, studio lighting" },
];
// Role-band visual chips — small icons that go in the role pill area.
// One per band, optional inline supplement to the existing colored pill.
const ROLE_PROP_ICONS: IconRecipe[] = [
{ slug: "warehouse", category: "role_prop", display: "Warehouse",
prompt: "macro photograph of a yellow hard hat with a high-visibility safety vest folded behind it, isolated on neutral grey backdrop, photorealistic, sharp focus, studio lighting" },
{ slug: "production", category: "role_prop", display: "Production",
prompt: "macro photograph of a navy blue work shirt and protective safety glasses on neutral grey backdrop, photorealistic, sharp focus, studio lighting" },
{ slug: "trades", category: "role_prop", display: "Trades",
prompt: "macro photograph of a leather work glove and a small adjustable wrench on a neutral grey backdrop, photorealistic, sharp focus, studio lighting" },
{ slug: "driver", category: "role_prop", display: "Driver",
prompt: "macro photograph of a navy delivery driver baseball cap and a clipboard manifest on a neutral grey backdrop, photorealistic, sharp focus, studio lighting" },
{ slug: "lead", category: "role_prop", display: "Lead",
prompt: "macro photograph of a tablet showing a bar chart and a high-vis vest folded beside it on neutral grey backdrop, photorealistic, sharp focus, studio lighting" },
];
export const ICONS: Record<string, IconRecipe> = Object.fromEntries(
[...CERT_ICONS, ...ROLE_PROP_ICONS].map((r) => [`${r.category}/${r.slug}`, r]),
);
// v2 — 256×256 canvas, intended to be displayed monochrome via CSS
// `filter: grayscale(1)`. Smaller canvas, tighter crops, crisper at
// 14px display size.
export const ICONS_VERSION = "v2";
// Map a free-form cert string from the data ("First Aid/CPR",
// "OSHA-10", "Lockout/Tagout") to the canonical slug used here.
// Returns null if no recipe matches.
export function certToSlug(cert: string): string | null {
const c = (cert || "").trim().toLowerCase().replace(/\s+/g, "-");
if (c === "osha-10") return "osha-10";
if (c === "osha-30") return "osha-30";
if (c.startsWith("first") || c.includes("cpr")) return "first-aid-cpr";
if (c === "hazmat" || c.startsWith("hazwoper")) return "hazmat";
if (c === "forklift" || c.startsWith("pit")) return "forklift";
if (c.startsWith("reach")) return "reach-truck";
if (c.startsWith("order")) return "order-picker";
if (c.startsWith("lockout") || c.includes("tagout")) return "lockout-tagout";
if (c === "msds" || c.startsWith("ghs")) return "msds";
if (c.startsWith("confined")) return "confined-space";
if (c === "servsafe") return "servsafe";
if (c.startsWith("fire")) return "fire-safety";
if (c.startsWith("iso")) return "iso-9001";
return null;
}

View File

@ -19,8 +19,6 @@ import { SSEServerTransport } from "@modelcontextprotocol/sdk/server/sse.js";
import { z } from "zod";
import { startTrace, logSpan, logGeneration, scoreTrace, flush as flushTraces } from "./tracing.js";
import { buildPermitBrief } from "./entity.js";
import { roleBand, SCENES, SCENES_VERSION, FACE_RENDER_DIM, type RoleBand } from "./role_scenes.js";
import { ICONS, ICONS_VERSION, DEFAULT_NEGATIVE, certToSlug, type IconRecipe } from "./icon_recipes.js";
const BASE = process.env.LAKEHOUSE_URL || "http://localhost:3100";
const PORT = parseInt(process.env.MCP_PORT || "3700");
@ -1233,110 +1231,6 @@ async function main() {
// contractor / profile modal where one worker gets the
// spotlight. NB: declared BEFORE the pool route so the prefix
// match doesn't intercept it.
// Single source of truth for the pre-render script. Read
// role_scenes.ts SCENES + SCENES_VERSION so a Python pre-render
// job (scripts/staffing/render_role_pool.py) builds the role-
// aware pool with the exact prompts the server will use on the
// ComfyUI hot-path. No drift.
if (url.pathname === "/headshots/_scenes" && req.method === "GET") {
return Response.json({ version: SCENES_VERSION, scenes: SCENES });
}
// Single source of truth for icon_recipes.ts. Used by the
// pre-render script (scripts/staffing/render_icons.py) and any
// tooling that wants to enumerate available icons.
if (url.pathname === "/icons/_recipes" && req.method === "GET") {
return Response.json({
version: ICONS_VERSION,
default_negative: DEFAULT_NEGATIVE,
recipes: ICONS,
});
}
// Free-text cert resolver: front-end passes the raw cert string
// from the data ("First Aid/CPR", "OSHA-10", "Lockout/Tagout")
// and we resolve to a recipe slug + 302 to the cached/rendered
// icon. Returns 404 (not error) when no recipe matches — the
// front-end can hang an `onerror="this.remove()"` to silently
// drop the img tag for unrecognized certs.
if (url.pathname === "/icons/cert" && req.method === "GET") {
const text = url.searchParams.get("text") || "";
const slug = certToSlug(text);
if (!slug) return new Response(`no recipe for cert: ${text}`, { status: 404 });
return new Response(null, {
status: 302,
headers: { "Location": `/icons/render/cert/${slug}` },
});
}
// Cert / role-prop / status / hazard / empty icons. Lookup is
// category/slug; on cache miss the route renders via ComfyUI.
// Filename layout: data/icons_pool/{category}/{slug}_{version}.webp
// — the version suffix means editing a recipe yields a new file
// rather than overwriting in place, so a misfire is recoverable.
if (url.pathname.startsWith("/icons/render/") && req.method === "GET") {
const rest = url.pathname.slice("/icons/render/".length);
const recipe: IconRecipe | undefined = ICONS[rest];
if (!recipe) return new Response(`unknown icon: ${rest}`, { status: 404 });
const ICONS_DIR = "/home/profit/lakehouse/data/icons_pool";
await Bun.$`mkdir -p ${ICONS_DIR}/${recipe.category}`.quiet();
const cachePath = `${ICONS_DIR}/${recipe.category}/${recipe.slug}_${ICONS_VERSION}.webp`;
const cached = Bun.file(cachePath);
if (await cached.exists()) {
return new Response(cached, {
headers: {
"Content-Type": "image/webp",
"Cache-Control": "public, max-age=86400",
"X-Icon-Source": "cached",
"X-Icon-Recipe": recipe.slug,
},
});
}
// Deterministic seed per recipe — same recipe always renders
// the same icon. Mixing the version means SCENES_VERSION-
// style invalidation works for icons too.
const seedStr = `${recipe.category}|${recipe.slug}|${ICONS_VERSION}`;
let seed = 5381;
for (let i = 0; i < seedStr.length; i++) seed = ((seed << 5) + seed + seedStr.charCodeAt(i)) | 0;
seed = Math.abs(seed) % 2147483647;
try {
const genResp = await fetch("http://localhost:3600/generate", {
method: "POST",
headers: { "Content-Type": "application/json" },
body: JSON.stringify({
prompt: recipe.prompt,
negative_prompt: recipe.negative ?? DEFAULT_NEGATIVE,
// 256×256 — smaller canvas = cleaner icon. SDXL Turbo
// at 8 steps adds visible texture/noise into 512² that
// looks "AI" at small display sizes; tightening to 256
// both renders ~3× faster and produces crisper edges
// when the front-end downsamples to 14px.
width: 256,
height: 256,
steps: 8,
seed,
}),
signal: AbortSignal.timeout(30000),
});
if (!genResp.ok) return new Response(`gen failed: ${genResp.status}`, { status: 502 });
const data: any = await genResp.json();
if (!data.image) return new Response("no image returned", { status: 502 });
const bytes = Uint8Array.from(atob(data.image), (c) => c.charCodeAt(0));
await Bun.write(cachePath, bytes);
return new Response(bytes, {
headers: {
"Content-Type": "image/webp",
"Cache-Control": "public, max-age=86400",
"X-Icon-Source": "fresh",
"X-Icon-Recipe": recipe.slug,
"X-Icon-Gen-Ms": String(data.time_ms || 0),
},
});
} catch (e: any) {
return new Response(`gen error: ${e.message}`, { status: 502 });
}
}
if (url.pathname.startsWith("/headshots/generate/") && req.method === "GET") {
const key = decodeURIComponent(url.pathname.slice("/headshots/generate/".length));
if (!key) return new Response("missing key", { status: 400 });
@ -1344,14 +1238,9 @@ async function main() {
const r = (url.searchParams.get("e") || "").toLowerCase();
const role = (url.searchParams.get("role") || "warehouse worker").toLowerCase();
const age = parseInt(url.searchParams.get("age") || "32", 10) || 32;
const band = roleBand(role);
// SCENES_VERSION mixes into the cache key so editing
// role_scenes.ts auto-invalidates prior renders — coordinator
// tweaks the warehouse prompt, every warehouse face refreshes
// on next view.
const cacheKey = await crypto.subtle.digest(
"SHA-256",
new TextEncoder().encode(`${key}|${g}|${r}|${role}|${age}|${SCENES_VERSION}`)
new TextEncoder().encode(`${key}|${g}|${r}|${role}|${age}`)
).then((b) => Array.from(new Uint8Array(b)).map((x) => x.toString(16).padStart(2, "0")).join("").slice(0, 24));
const GEN_DIR = "/home/profit/lakehouse/data/headshots_gen";
await Bun.$`mkdir -p ${GEN_DIR}`.quiet();
@ -1373,12 +1262,7 @@ async function main() {
: r === "middle_eastern" ? "Middle Eastern"
: "";
const genderText = g === "woman" ? "woman" : g === "man" ? "man" : "person";
const scene = SCENES[band].scene;
// Note: dropped "plain studio background" / "dslr" — those
// collapsed every render to interchangeable studio shots.
// The scene clause now carries clothing + backdrop so a
// forklift operator looks like a forklift operator.
const prompt = `professional headshot portrait of a ${age}-year-old ${raceText} ${genderText} ${role}, ${scene}, neutral confident expression, sharp focus, photorealistic`;
const prompt = `professional corporate headshot portrait of a ${age}-year-old ${raceText} ${genderText}, ${role}, neutral expression, plain studio background, soft natural lighting, sharp focus, photorealistic, dslr`;
// Worker-derived seed — same input always picks the same
// pixel layout in StyleGAN2 latent space, so the face is
// deterministic per worker BUT distinct from any other
@ -1391,7 +1275,7 @@ async function main() {
const genResp = await fetch("http://localhost:3600/generate", {
method: "POST",
headers: { "Content-Type": "application/json" },
body: JSON.stringify({ prompt, width: FACE_RENDER_DIM, height: FACE_RENDER_DIM, steps: 8, seed }),
body: JSON.stringify({ prompt, width: 512, height: 512, steps: 8, seed }),
signal: AbortSignal.timeout(30000),
});
if (!genResp.ok) return new Response(`gen failed: ${genResp.status}`, { status: 502 });
@ -1494,34 +1378,35 @@ async function main() {
if (!F || !F.all.length) {
return new Response("face pool empty", { status: 503 });
}
// Pool selection: try gender×race intersection first. If
// bucket is too sparse to look natural across many cards
// (south_asian/black/middle_eastern_woman are 2-10 faces),
// hand off to ComfyUI generate so the user sees a unique
// face per worker instead of 4 photos shared across 200
// cards. Threshold 30 keeps the dense buckets fast and
// routes only the sparse ones through GPU.
const wantRace = url.searchParams.get("e") || "";
// NOTE: role-aware pool + ComfyUI sparse redirect were removed
// 2026-04-28 — diffusion output at 8 steps with the existing
// editorial_hero workflow produced burnt-looking faces ("looks
// like someone burnt the pictures"). Until serve_imagegen.py
// is fixed to honor a portrait-friendly negative prompt and
// run with proper steps/cfg, every face comes from the studio
// pool (StyleGAN photos from thispersondoesnotexist.com) and
// gets B&W via CSS. The role pool files at
// data/headshots_role_pool/{v1,v2}/ stay on disk for when
// we can re-enable them.
// Studio pool only. Try gender×race intersection first, then
// fall back to gender-only or race-only if the intersection
// is sparse. Repeat faces are acceptable — better than
// serving the over-contrasty diffusion output.
const SPARSE_THRESHOLD = 30;
let pool = F.all;
let bucket = "all";
if (wantGender && wantRace) {
const gr = F.byGR[wantGender + "/" + wantRace] || [];
if (gr.length > 0) {
// Use the intersection bucket as-is — even sparse buckets
// (south_asian: 3, black: 14) just repeat photos rather
// than route to ComfyUI. Repetition is fine; burnt faces
// are not.
if (gr.length >= SPARSE_THRESHOLD) {
pool = gr;
bucket = `gr:${wantGender}/${wantRace}`;
} else if (gr.length > 0) {
// Sparse intersection — route to ComfyUI for uniqueness.
const role = url.searchParams.get("role") || "warehouse worker";
const age = url.searchParams.get("age") || "32";
const genUrl = `/headshots/generate/${encodeURIComponent(key)}?g=${wantGender}&e=${wantRace}&role=${encodeURIComponent(role)}&age=${age}`;
return new Response(null, {
status: 302,
headers: {
"Location": genUrl,
"X-Face-Pool-Variant": "sparse-redirect",
"X-Face-Pool-Bucket-Size": String(gr.length),
},
});
} else if (F.byG[wantGender]?.length) {
pool = F.byG[wantGender];
bucket = `g:${wantGender}`;
@ -2054,88 +1939,15 @@ async function main() {
.reduce((s, c) => s + (c.implied_pay_rate - contractBillRate) * hoursPerWeek * weeksAssumed, 0);
// Shift inference from permit work_type + description.
// Description keywords trump the hash-based assignment;
// for everything else we deterministically distribute
// permits across shifts via a hash of the permit id so
// every shift bucket has real, stable data instead of
// every contract collapsing to 1st.
// Construction defaults to 1st-shift (day). Heavy civil or
// facility work sometimes runs 2nd or split-shift. 3rd
// (overnight) is rare in commercial construction but real
// for maintenance / emergency calls.
const descLower = ((p.work_description || "") + " " + (p.work_type || "")).toLowerCase();
function hashStr(s: string){
let h=5381;
for(let i=0;i<s.length;i++) h=((h<<5)+h+s.charCodeAt(i))|0;
return Math.abs(h);
}
const permitKey = String(p.id || (p.street_number+p.street_name) || p.work_description || "").slice(0,80);
const hh = hashStr(permitKey);
const bucket = hh % 100;
// Realistic split: 50% day, 28% evening, 17% overnight,
// 5% weekend. Construction skews heavily day-shift.
let primary: string =
bucket < 50 ? "1st"
: bucket < 78 ? "2nd"
: bucket < 95 ? "3rd"
: "4th";
const shifts: string[] = [primary];
if (/night|overnight|24\s*hr|emergency/.test(descLower) && !shifts.includes("3rd")) shifts.push("3rd");
if (/multi.?shift|round.?the.?clock|double.?shift/.test(descLower) && !shifts.includes("2nd")) shifts.push("2nd");
if (/weekend|saturday|sunday/.test(descLower) && !shifts.includes("4th")) shifts.push("4th");
// Internal calendar: build a 7-day schedule (today ±3
// days) with a row per (date, shift). This is what the
// front-end's shift-mix preview filters against — real
// dates, real workers/bill, real status (past/active/
// scheduled) tied to the current clock. As permits get
// ingested with explicit start/end dates the backend
// can replace this with the stored schedule.
const SHIFT_HOURS: Record<string, [number, number]> = {
"1st": [6, 14], "2nd": [14, 22], "3rd": [22, 30], "4th": [0, 24], // 4th = weekend
};
function shiftStatus(d: Date, shift: string, ref: Date): "past" | "active" | "scheduled" {
const refDay = ref.toISOString().slice(0,10);
const dDay = d.toISOString().slice(0,10);
if (dDay < refDay) return "past";
if (dDay > refDay) return "scheduled";
// Same day — break by hour vs shift window.
const hr = ref.getHours() + ref.getMinutes()/60;
const [s,e] = SHIFT_HOURS[shift] || [0,24];
if (shift === "4th") {
// Weekend shift: active if today IS weekend, else scheduled.
const isWknd = (ref.getDay()===0 || ref.getDay()===6);
return isWknd ? "active" : "scheduled";
}
if (shift === "3rd") {
// 3rd wraps midnight: active 22:0006:00.
if (hr >= 22 || hr < 6) return "active";
return "scheduled";
}
if (hr < s) return "scheduled";
if (hr >= e) return "past";
return "active";
}
const refNow = new Date();
const schedule: any[] = [];
for (let off = -3; off <= 3; off++) {
const d = new Date(refNow.getTime() + off * 86400e3);
const isWknd = (d.getDay()===0 || d.getDay()===6);
const dateStr = d.toISOString().slice(0,10);
for (const sh of shifts) {
// Weekend permits use 4th shift only; weekday work
// uses its primary shift(s) and skips 4th.
if (isWknd && sh !== "4th") continue;
if (!isWknd && sh === "4th") continue;
// Workers per shift: full count on primary, half on
// secondary so the bill demand differs visibly.
const isPrimary = (sh === primary);
const wForShift = isPrimary ? count : Math.max(1, Math.floor(count/2));
schedule.push({
date: dateStr,
shift: sh,
workers_needed: wForShift,
bill_rate: contractBillRate,
status: shiftStatus(d, sh, refNow),
});
}
}
const shifts: string[] = ["1st"]; // default day
if (/night|overnight|24\s*hr|emergency/.test(descLower)) shifts.push("3rd");
if (/multi.?shift|round.?the.?clock|double.?shift/.test(descLower)) shifts.push("2nd");
if (/weekend|saturday|sunday/.test(descLower)) shifts.push("4th");
contracts.push({
permit: {
@ -2190,7 +2002,6 @@ async function main() {
over_bill_pool_margin_at_risk: Math.round(overBillPoolMargin),
},
shifts_needed: shifts,
schedule,
});
}

View File

@ -1,92 +0,0 @@
// Server-side mirror of search.html's ROLE_BANDS regex table.
// Each band carries a *visual scene* — clothing + immediate backdrop —
// so ComfyUI produces role-coherent headshots instead of interchangeable
// studio portraits. The front-end sends the raw role string in the
// query (?role=Forklift%20Operator); the server resolves it to a band
// and looks up the scene here.
export type RoleBand =
| "warehouse"
| "production"
| "trades"
| "driver"
| "lead";
export interface SceneDef {
band: RoleBand;
// Free-form clause inserted into the diffusion prompt AFTER
// "[age]-year-old [race] [gender] [role], ". Should describe what
// they're wearing and what is immediately behind them. Keep under
// ~25 words — SDXL Turbo loses focus on longer prompts and starts
// hallucinating cartoon hands.
scene: string;
}
const RE_BANDS: { re: RegExp; band: RoleBand }[] = [
{ re: /forklift|warehouse|associate|material\s*handler|loader|loading|packag|shipping|logistics|inventory|sanitation|janit/i, band: "warehouse" },
{ re: /production|assembl|quality/i, band: "production" },
{ re: /welder|weld|electric|maint(enance)?\s*tech|cnc|machine\s*op|hvac|plumb|carpenter|mason|tool\s*&\s*die/i, band: "trades" },
{ re: /driver|truck|haul|cdl/i, band: "driver" },
{ re: /line\s*lead|supervisor|foreman|coordinator|lead\b/i, band: "lead" },
];
export function roleBand(role: string): RoleBand {
const r = (role || "").trim();
if (!r) return "warehouse";
for (const b of RE_BANDS) if (b.re.test(r)) return b.band;
return "warehouse";
}
// TODO J — refine these. Each `scene` string lands directly in the
// diffusion prompt. Tone target: a coordinator glances at the card
// and recognizes the role from the photo before reading the role pill.
//
// Things that work well in SDXL Turbo at 8 steps:
// - One concrete clothing item ("high-visibility yellow vest")
// - One concrete prop ("hard hat hanging from belt", "tablet in hand")
// - One blurred background element ("warehouse pallet aisle behind",
// "factory machinery softly out of focus")
// - Avoid: text/logos (rendered as scribble), specific brands, hands
// holding tools (often distorts), full-body language ("standing",
// "leaning") — model is trained on portrait crops.
//
// Each scene now bakes "monochrome black and white photography" into
// the prompt so the model produces native B&W output rather than us
// applying CSS grayscale post-hoc. SDXL Turbo handles B&W natively
// with strong tonal range — better than desaturating a color render.
export const SCENES: Record<RoleBand, SceneDef> = {
warehouse: {
band: "warehouse",
scene: "wearing a high-visibility safety vest over a t-shirt, hard hat visible, blurred warehouse pallet aisle behind, soft natural light, monochrome black and white photography, fine film grain, documentary portrait style",
},
production: {
band: "production",
scene: "wearing a work shirt with safety glasses on forehead, blurred factory machinery softly out of focus behind, fluorescent overhead lighting, monochrome black and white photography, fine film grain, documentary portrait style",
},
trades: {
band: "trades",
scene: "wearing a heavy-duty work shirt with rolled sleeves, blurred workshop tool wall behind, focused tungsten lighting, monochrome black and white photography, fine film grain, documentary portrait style",
},
driver: {
band: "driver",
scene: "wearing a polo shirt, lanyard with ID badge visible, blurred truck cab or loading dock behind, daylight, monochrome black and white photography, fine film grain, documentary portrait style",
},
lead: {
band: "lead",
scene: "wearing a button-down shirt, tablet held casually at chest level, blurred warehouse floor in soft focus behind, professional lighting, monochrome black and white photography, fine film grain, documentary portrait style",
},
};
// v2 — baked B&W + 1024×1024 render canvas (4× pixels of v1). Larger
// source means downsampling to a 40px avatar packs more detail per
// displayed pixel, hiding the diffusion-y micro-textures that read as
// "AI generated" at small sizes. Server route reads pool from
// data/headshots_role_pool/{SCENES_VERSION}/... so v1 stays available
// for rollback / A-B comparison.
export const SCENES_VERSION = "v2";
// Default render dimensions used by both the on-demand /headshots/
// generate/:key route and the offline render_role_pool.py script. v1
// used 512²; v2 doubles to 1024² (linear 2× = 4× pixels = ~3× GPU
// time on SDXL Turbo).
export const FACE_RENDER_DIM = 1024;

File diff suppressed because it is too large Load Diff

View File

@ -1,178 +0,0 @@
// TIF (Tax Increment Financing) district point-in-polygon lookup.
// Given a property's lat/long, returns which Chicago TIF district (if
// any) contains it. TIF districts are public-subsidy zones — a property
// inside one is receiving city tax-increment funding for its build.
// Strong "this project has financial backing" signal for the Project Index.
//
// Data: data/_entity_cache/tif_districts.geojson (Chicago Open Data
// dataset eejr-xtfb, 100 active districts, 3.2MB). Refresh by re-running
// `curl ... eejr-xtfb.geojson > tif_districts.geojson` — districts
// change rarely (only when city council approves new ones or repeals).
//
// Algorithm: classic ray-casting. For each MultiPolygon's outer ring,
// count edge crossings of an east-going horizontal ray from the point.
// Odd crossings = inside. Holes (inner rings) flip the parity. Library-
// free; correct for arbitrary polygons including the irregular Chicago
// shapes which often have many small detours.
import { readFile } from "node:fs/promises";
import { existsSync } from "node:fs";
import { join } from "node:path";
const TIF_GEOJSON = join("/home/profit/lakehouse/data/_entity_cache", "tif_districts.geojson");
type LngLat = [number, number]; // GeoJSON convention: [longitude, latitude]
type Ring = LngLat[];
type Polygon = Ring[]; // outer ring + optional inner rings (holes)
type MultiPolygon = Polygon[];
type TifFeature = {
name: string;
trim_name?: string;
ref?: string;
approval_date?: string;
expiration?: string;
type?: string; // T-1xx etc.
comm_area?: string;
wards?: string;
// Bounding box for quick reject
bbox: { minLon: number; minLat: number; maxLon: number; maxLat: number };
geometry: MultiPolygon;
};
let tifIdx: TifFeature[] | null = null;
function bboxOfMultiPolygon(mp: MultiPolygon): TifFeature["bbox"] {
let minLon = Infinity, minLat = Infinity, maxLon = -Infinity, maxLat = -Infinity;
for (const poly of mp) {
for (const ring of poly) {
for (const [lon, lat] of ring) {
if (lon < minLon) minLon = lon;
if (lat < minLat) minLat = lat;
if (lon > maxLon) maxLon = lon;
if (lat > maxLat) maxLat = lat;
}
}
}
return { minLon, minLat, maxLon, maxLat };
}
async function ensureLoaded(): Promise<TifFeature[]> {
if (tifIdx) return tifIdx;
if (!existsSync(TIF_GEOJSON)) {
tifIdx = [];
return tifIdx;
}
try {
const raw = JSON.parse(await readFile(TIF_GEOJSON, "utf-8"));
const out: TifFeature[] = [];
for (const f of raw.features || []) {
const geom = f.geometry;
if (!geom) continue;
// Normalize Polygon → MultiPolygon for uniform iteration
let mp: MultiPolygon;
if (geom.type === "MultiPolygon") {
mp = geom.coordinates;
} else if (geom.type === "Polygon") {
mp = [geom.coordinates];
} else {
continue;
}
const props = f.properties || {};
out.push({
name: props.name || "Unknown TIF",
trim_name: props.name_trim,
ref: props.ref,
approval_date: props.approval_d,
expiration: props.expiration,
type: props.type,
comm_area: props.comm_area,
wards: props.wards,
bbox: bboxOfMultiPolygon(mp),
geometry: mp,
});
}
tifIdx = out;
return tifIdx;
} catch (e) {
console.warn("[tif] load failed:", (e as Error).message);
tifIdx = [];
return tifIdx;
}
}
// Ray-casting point-in-polygon (single ring). Returns true if (lon, lat)
// is strictly inside the ring. Edge cases (point exactly on a vertex)
// resolve by half-open interval convention; for our use case (Chicago
// boundary precision is ~1m, sites are point queries) this is fine.
function pointInRing(lon: number, lat: number, ring: Ring): boolean {
let inside = false;
const n = ring.length;
for (let i = 0, j = n - 1; i < n; j = i++) {
const [xi, yi] = ring[i];
const [xj, yj] = ring[j];
const intersect =
yi > lat !== yj > lat &&
lon < ((xj - xi) * (lat - yi)) / (yj - yi + 0) + xi;
if (intersect) inside = !inside;
}
return inside;
}
// Polygon = outer ring + holes. Inside outer AND not inside any hole.
function pointInPolygon(lon: number, lat: number, polygon: Polygon): boolean {
if (polygon.length === 0) return false;
if (!pointInRing(lon, lat, polygon[0])) return false;
for (let i = 1; i < polygon.length; i++) {
if (pointInRing(lon, lat, polygon[i])) return false;
}
return true;
}
export type TifMatch = {
name: string;
ref?: string;
approval_date?: string;
expiration?: string;
comm_area?: string;
wards?: string;
};
export async function findTifDistrict(
longitude: number | string | undefined,
latitude: number | string | undefined,
): Promise<TifMatch | null> {
const lon = typeof longitude === "string" ? parseFloat(longitude) : longitude;
const lat = typeof latitude === "string" ? parseFloat(latitude) : latitude;
if (!lon || !lat || isNaN(lon) || isNaN(lat)) return null;
const idx = await ensureLoaded();
if (idx.length === 0) return null;
for (const f of idx) {
// Bbox reject — cheap O(1) skip for the 99% of districts that
// can't possibly contain the point.
const b = f.bbox;
if (lon < b.minLon || lon > b.maxLon || lat < b.minLat || lat > b.maxLat) continue;
// Full point-in-polygon for any polygon in this MultiPolygon
for (const poly of f.geometry) {
if (pointInPolygon(lon, lat, poly)) {
return {
name: f.name,
ref: f.ref,
approval_date: f.approval_date,
expiration: f.expiration,
comm_area: f.comm_area,
wards: f.wards,
};
}
}
}
return null;
}
export async function getTifIndexStats(): Promise<{
total: number;
loaded: boolean;
}> {
const idx = await ensureLoaded();
return { total: idx.length, loaded: idx.length > 0 };
}

View File

@ -46,15 +46,8 @@ def _cache_put(key, img_bytes):
(CACHE_DIR / f"{key}.webp").write_bytes(img_bytes)
def _comfyui_generate(prompt, width=1024, height=512, steps=8, seed=None,
negative_prompt=None, cfg=None, sampler=None, scheduler=None):
"""Submit workflow to ComfyUI and wait for result.
Optional overrides when provided, replace the workflow's defaults.
The workflow template at editorial_hero.json was tuned for product
hero shots with a "no humans" negative prompt; portrait callers MUST
pass `negative_prompt` to avoid the model fighting them on faces.
"""
def _comfyui_generate(prompt, width=1024, height=512, steps=8, seed=None):
"""Submit workflow to ComfyUI and wait for result."""
# Load workflow template
with open(WORKFLOW_PATH) as f:
workflow = json.load(f)
@ -64,21 +57,9 @@ def _comfyui_generate(prompt, width=1024, height=512, steps=8, seed=None,
seed = random.randint(0, 2**32)
workflow["3"]["inputs"]["seed"] = seed
workflow["3"]["inputs"]["steps"] = steps
if cfg is not None:
workflow["3"]["inputs"]["cfg"] = cfg
if sampler:
workflow["3"]["inputs"]["sampler_name"] = sampler
if scheduler:
workflow["3"]["inputs"]["scheduler"] = scheduler
workflow["5"]["inputs"]["width"] = width
workflow["5"]["inputs"]["height"] = height
workflow["6"]["inputs"]["text"] = prompt
# Node 7 is the negative-prompt CLIPTextEncode. The default is tuned
# for product hero shots and contains "human, person, face, hand,
# fingers, realistic photo of people" — actively sabotaging any
# portrait render. Always overwrite when negative_prompt is given.
if negative_prompt is not None:
workflow["7"]["inputs"]["text"] = negative_prompt
# Submit to ComfyUI
payload = json.dumps({"prompt": workflow}).encode()
@ -202,20 +183,10 @@ class ImageHandler(BaseHTTPRequestHandler):
height = min(max(int(body.get("height", 720)), 256), 1080)
steps = min(max(int(body.get("steps", 50)), 1), 80)
seed = body.get("seed")
# Portrait-friendly overrides — None means "use workflow default".
# negative_prompt MUST be passed by portrait callers to avoid
# the workflow's "no humans" baked-in negative.
negative_prompt = body.get("negative_prompt")
cfg = body.get("cfg")
sampler = body.get("sampler")
scheduler = body.get("scheduler")
# Cache check — seed + negative + cfg are part of the key so per-
# worker / per-config requests don't collapse to one cached image.
key = _cache_key(
f"{prompt}||neg={negative_prompt or ''}||cfg={cfg or ''}",
width, height, steps, seed,
)
# Cache check — seed is part of the key so per-worker requests
# don't collapse to a single cached portrait.
key = _cache_key(prompt, width, height, steps, seed)
cached = _cache_get(key)
if cached:
self._json(200, {"image": cached, "format": "webp", "width": width, "height": height,
@ -228,11 +199,7 @@ class ImageHandler(BaseHTTPRequestHandler):
try:
comfy_check = urllib.request.urlopen(f"{COMFYUI_URL}/system_stats", timeout=3)
if comfy_check.status == 200:
img_bytes, seed = _comfyui_generate(
prompt, width, height, steps, seed,
negative_prompt=negative_prompt, cfg=cfg,
sampler=sampler, scheduler=scheduler,
)
img_bytes, seed = _comfyui_generate(prompt, width, height, steps, seed)
backend = "comfyui"
except:
pass

View File

@ -53,30 +53,20 @@ def fetch_one(idx: int, out_dir: str) -> tuple[int, str, bool, str | None]:
def maybe_tag_gender(records: list[dict], out_dir: str) -> dict[str, int]:
"""If deepface is installed, label records that don't already have a
gender. Returns a count summary; mutates records in place.
Preservation contract: never overwrites prior `gender` (or any other
tag race/age/excluded set by tag_face_pool.py). On deepface
import failure, leaves existing tags alone instead of resetting them
to None. The previous behavior wiped 952 hand-classified rows when
fetch_face_pool was re-run from a Python without deepface installed."""
"""If deepface is installed, label each record with gender. Returns
a count summary; mutates records in place. On import error, returns
None and tags every record as unknown."""
try:
from deepface import DeepFace # type: ignore
except Exception as e:
print(f" (deepface unavailable: {e}) — leaving existing tags untouched")
print(f" (deepface unavailable: {e}) — pool will mix naturally")
for r in records:
r.setdefault("gender", None)
already = sum(1 for r in records if r.get("gender") in ("man", "woman"))
return {"preserved_tagged": already, "untagged": len(records) - already}
r["gender"] = None
return {"unknown": len(records)}
todo = [r for r in records if r.get("gender") not in ("man", "woman")]
if not todo:
print(" every record already has gender — nothing to tag.")
return {"preserved_tagged": len(records)}
print(f" tagging gender via deepface ({len(todo)} of {len(records)} records, CPU; ~0.5-1s per face)…")
print(" tagging gender via deepface (CPU; ~0.5-1s per face)…")
counts: dict[str, int] = {}
for i, r in enumerate(todo):
for i, r in enumerate(records):
full = os.path.join(out_dir, r["file"])
try:
ana = DeepFace.analyze(
@ -98,7 +88,7 @@ def maybe_tag_gender(records: list[dict], out_dir: str) -> dict[str, int]:
r["gender_error"] = f"{type(e).__name__}: {e}"
counts[r["gender"] or "unknown"] = counts.get(r["gender"] or "unknown", 0) + 1
if (i + 1) % 25 == 0:
print(f" [{i+1}/{len(todo)}] {counts}")
print(f" [{i+1}/{len(records)}] {counts}")
return counts
@ -111,47 +101,11 @@ def main():
)
p.add_argument("--concurrency", type=int, default=3, help="parallel fetches (be polite)")
p.add_argument("--no-gender", action="store_true", help="skip deepface gender tagging")
p.add_argument("--shrink", action="store_true",
help="allow --count to drop manifest entries with id >= count. Default: preserve them.")
args = p.parse_args()
out = os.path.realpath(args.out)
os.makedirs(out, exist_ok=True)
# Load any existing manifest into a by-id dict so prior tags
# (gender / race / age / excluded) survive the rewrite. Also
# naturally dedupes — if the file accidentally has duplicate
# lines for the same id (this is how we ended up with a 2497-
# row manifest backing a 1000-face pool), the last one wins.
manifest = os.path.join(out, "manifest.jsonl")
existing: dict[int, dict] = {}
if os.path.exists(manifest):
dup_count = 0
with open(manifest) as f:
for line in f:
line = line.strip()
if not line:
continue
try:
row = json.loads(line)
except json.JSONDecodeError:
continue
rid = row.get("id")
if not isinstance(rid, int):
continue
if rid in existing:
dup_count += 1
existing[rid] = row
print(f"Loaded existing manifest: {len(existing)} unique ids ({dup_count} duplicate lines collapsed)")
max_existing = max(existing.keys()) if existing else -1
if max_existing >= args.count and not args.shrink:
print(
f"\nERROR: --count={args.count} would drop {sum(1 for k in existing if k >= args.count)} "
f"manifest entries (max existing id = {max_existing}). Pass --shrink to allow.\n",
file=sys.stderr,
)
sys.exit(2)
print(f"Fetching {args.count} faces → {out}")
print(f"Source: {URL} (synthetic StyleGAN — no real people)")
@ -161,16 +115,12 @@ def main():
futs = {ex.submit(fetch_one, i, out): i for i in range(args.count)}
for done, fut in enumerate(as_completed(futs), 1):
idx, fname, cached, err = fut.result()
# Start from prior manifest row (preserves gender/race/age/excluded)
# and overlay only the fields fetch_one is responsible for.
base = dict(existing.get(idx, {}))
base.update({
results[idx] = {
"id": idx,
"file": fname,
"cached": cached,
"error": err,
})
results[idx] = base
}
if done % 25 == 0 or done == args.count:
ok = sum(1 for r in results if r and not r.get("error"))
print(f" [{done}/{args.count}] {ok} ok ({time.time()-t0:.1f}s)")
@ -178,9 +128,6 @@ def main():
# Drop slots that errored or are still None (shouldn't happen)
records = [r for r in results if r and not r.get("error")]
print(f"\nPool ready: {len(records)} faces, {sum(1 for r in records if r['cached'])} from cache")
preserved_tags = sum(1 for r in records if r.get("gender") in ("man", "woman"))
if preserved_tags:
print(f"Preserved {preserved_tags} prior gender tags (and any race/age/excluded fields).")
if not args.no_gender and records:
print("\nGender-tagging pass:")
@ -188,28 +135,17 @@ def main():
print(f" distribution: {summary}")
else:
for r in records:
r.setdefault("gender", None)
# If --shrink was NOT used and somehow id >= count rows are still in
# `existing` (which can only happen if the early gate was bypassed),
# carry them forward so we don't quietly drop them.
if not args.shrink:
for rid, row in existing.items():
if rid >= args.count and rid not in {r["id"] for r in records}:
records.append(row)
records.sort(key=lambda r: r.get("id", 0))
r["gender"] = None
# Strip transient flags before persisting
for r in records:
r.pop("cached", None)
r.pop("error", None)
# Atomic write — if a re-run is interrupted, manifest stays intact.
tmp = manifest + ".tmp"
with open(tmp, "w") as f:
manifest = os.path.join(out, "manifest.jsonl")
with open(manifest, "w") as f:
for r in records:
f.write(json.dumps(r) + "\n")
os.replace(tmp, manifest)
print(f"\nManifest: {manifest} ({len(records)} entries)")
# Quick checksum manifest for downstream debugging

View File

@ -1,230 +0,0 @@
#!/usr/bin/env python3
"""
render_role_pool.py pre-render a role-aware face pool by hitting
serve_imagegen.py (localhost:3600/generate) with prompts pulled from
the bun server's /headshots/_scenes endpoint (single source of truth
for SCENES + SCENES_VERSION).
Layout:
data/headshots_role_pool/
{band}/
{gender}_{race}/
face_00.webp
face_01.webp
...
manifest.jsonl
Each entry in manifest.jsonl:
{"band": "warehouse", "gender": "man", "race": "caucasian",
"file": "warehouse/man_caucasian/face_03.webp",
"seed": 184729338, "scenes_version": "v1"}
Idempotent: a file at the target path is skipped. Re-run with --force
to regenerate. SCENES_VERSION is captured per render so the server's
pool route can refuse stale renders if the version drifts.
"""
from __future__ import annotations
import argparse
import base64
import json
import os
import sys
import time
import urllib.request
import urllib.error
DEFAULT_BANDS = ["warehouse", "production", "trades", "driver", "lead"]
DEFAULT_GENDERS = ["man", "woman"]
DEFAULT_RACES = ["caucasian", "east_asian", "south_asian", "middle_eastern", "black", "hispanic"]
def race_text(r: str) -> str:
return {
"caucasian": "",
"east_asian": "East Asian",
"south_asian": "South Asian",
"middle_eastern": "Middle Eastern",
"black": "Black",
"hispanic": "Hispanic",
}.get(r, "")
def fetch_scenes(mcp_url: str) -> tuple[str, dict]:
"""Pull canonical SCENES from the bun server. Single source of truth."""
req = urllib.request.Request(f"{mcp_url}/headshots/_scenes")
with urllib.request.urlopen(req, timeout=10) as resp:
data = json.loads(resp.read())
return data["version"], data["scenes"]
def render(comfy_url: str, prompt: str, seed: int, steps: int, timeout: int, dim: int) -> bytes | None:
payload = json.dumps({
"prompt": prompt,
"width": dim,
"height": dim,
"steps": steps,
"seed": seed,
}).encode()
req = urllib.request.Request(
f"{comfy_url}/generate",
data=payload,
headers={"Content-Type": "application/json"},
)
try:
with urllib.request.urlopen(req, timeout=timeout) as resp:
data = json.loads(resp.read())
except urllib.error.HTTPError as e:
print(f" HTTP {e.code} from comfy: {e.read()[:200]}", file=sys.stderr)
return None
except Exception as e:
print(f" comfy error: {type(e).__name__}: {e}", file=sys.stderr)
return None
img_b64 = data.get("image")
if not img_b64:
print(f" comfy response missing 'image' field: {list(data.keys())}", file=sys.stderr)
return None
return base64.b64decode(img_b64)
def main():
p = argparse.ArgumentParser()
p.add_argument("--out", default=os.path.join(os.path.dirname(__file__), "..", "..", "data", "headshots_role_pool"))
p.add_argument("--per-bucket", type=int, default=10, help="how many faces per (band × gender × race)")
p.add_argument("--mcp", default="http://localhost:3700")
p.add_argument("--comfy", default="http://localhost:3600")
p.add_argument("--steps", type=int, default=8)
p.add_argument("--bands", nargs="*", default=DEFAULT_BANDS)
p.add_argument("--genders", nargs="*", default=DEFAULT_GENDERS)
p.add_argument("--races", nargs="*", default=DEFAULT_RACES)
p.add_argument("--force", action="store_true", help="regenerate existing files")
p.add_argument("--age", type=int, default=32)
p.add_argument("--timeout", type=int, default=120, help="per-render timeout (1024² takes ~5s on A4000)")
p.add_argument("--dim", type=int, default=1024, help="square render dimension (v2 default 1024, v1 was 512)")
args = p.parse_args()
out_root = os.path.realpath(args.out)
os.makedirs(out_root, exist_ok=True)
print(f"Fetching canonical SCENES from {args.mcp}/headshots/_scenes…")
try:
version, scenes = fetch_scenes(args.mcp)
except Exception as e:
print(f"FATAL: could not fetch scenes ({e}). Is the mcp-server up?", file=sys.stderr)
sys.exit(1)
print(f" SCENES_VERSION={version}, {len(scenes)} bands available: {list(scenes.keys())}")
# v2+ files live at {out}/{version}/{band}/{g}_{r}/face_NN.webp.
# v1 lived at {out}/{band}/... — keep that layout intact for
# rollback; the server route reads both and prefers current.
out = out_root if version == "v1" else os.path.join(out_root, version)
os.makedirs(out, exist_ok=True)
print(f" writing to: {out}")
print(f" render dim: {args.dim}×{args.dim}")
# Reject any --bands not in the server's SCENES
unknown = [b for b in args.bands if b not in scenes]
if unknown:
print(f"FATAL: unknown bands {unknown}. Server has: {list(scenes.keys())}", file=sys.stderr)
sys.exit(1)
manifest_rows = []
todo = [
(band, g, r, n)
for band in args.bands
for g in args.genders
for r in args.races
for n in range(args.per_bucket)
]
print(f"\nPlanning: {len(todo)} renders ({len(args.bands)} bands × {len(args.genders)} genders × {len(args.races)} races × {args.per_bucket} faces).")
print(f"Estimated GPU time at 1.5s/render = {len(todo) * 1.5 / 60:.1f} min.\n")
t0 = time.time()
rendered = 0
skipped = 0
failed = 0
for i, (band, g, r, n) in enumerate(todo):
bucket_dir = os.path.join(out, band, f"{g}_{r}")
os.makedirs(bucket_dir, exist_ok=True)
fname = f"face_{n:02d}.webp"
full = os.path.join(bucket_dir, fname)
rel = os.path.relpath(full, out)
if os.path.exists(full) and os.path.getsize(full) > 1024 and not args.force:
skipped += 1
manifest_rows.append({
"band": band, "gender": g, "race": r, "file": rel,
"seed": None, "scenes_version": version, "cached": True,
})
continue
scene_def = scenes[band]
scene_clause = scene_def["scene"]
race_clause = race_text(r)
gender_clause = g # "man" / "woman"
# Match the bun server's prompt builder exactly. If you tweak
# one, tweak the other (or factor a /prompt-builder endpoint).
# The {role} slot is intentionally a band-typical title here
# — the pre-rendered face is shared across roles in the same
# band, so we use the band's archetypal role. Specific roles
# still hit the on-demand /headshots/generate/:key path with
# their actual title.
archetype_role = {
"warehouse": "warehouse worker",
"production": "production worker",
"trades": "skilled tradesperson",
"driver": "delivery driver",
"lead": "shift supervisor",
}.get(band, "warehouse worker")
prompt = (
f"professional headshot portrait of a {args.age}-year-old "
f"{race_clause} {gender_clause} {archetype_role}, {scene_clause}, "
f"neutral confident expression, sharp focus, photorealistic"
)
# Deterministic seed per slot — same (band, g, r, n) always
# gets the same face. Mixing scenes_version means a SCENES
# tweak shifts every face slightly; that's the right behavior
# (it's how cache invalidation propagates to the pool too).
seed_str = f"{band}|{g}|{r}|{n}|{version}"
seed_h = 5381
for ch in seed_str:
seed_h = ((seed_h << 5) + seed_h + ord(ch)) & 0x7fffffff
seed = seed_h
bytes_ = render(args.comfy, prompt, seed, args.steps, args.timeout, args.dim)
if bytes_ is None:
failed += 1
continue
with open(full, "wb") as f:
f.write(bytes_)
rendered += 1
manifest_rows.append({
"band": band, "gender": g, "race": r, "file": rel,
"seed": seed, "scenes_version": version, "cached": False,
})
if (i + 1) % 10 == 0 or (i + 1) == len(todo):
elapsed = time.time() - t0
done = i + 1
rate = done / elapsed if elapsed > 0 else 0
eta = (len(todo) - done) / rate if rate > 0 else 0
print(f" [{done}/{len(todo)}] rendered={rendered} skipped={skipped} failed={failed} "
f"rate={rate:.2f}/s eta={eta:.0f}s")
# Atomic manifest write
manifest_path = os.path.join(out, "manifest.jsonl")
tmp = manifest_path + ".tmp"
with open(tmp, "w") as f:
for row in manifest_rows:
f.write(json.dumps(row) + "\n")
os.replace(tmp, manifest_path)
print(f"\nDone. {rendered} new, {skipped} cached, {failed} failed in {time.time()-t0:.1f}s")
print(f"Manifest: {manifest_path} ({len(manifest_rows)} entries)")
print(f"\nNext: poke {args.mcp}/headshots/__reload to pick up the new pool.")
if __name__ == "__main__":
main()