Adds two single-source-of-truth recipe files that drive both the
hot-path render server and the offline pre-render scripts:
- role_scenes.ts: per-role-band scene clauses (clothing + backdrop).
Forklift operators look like forklift operators instead of
collapsing to interchangeable studio shots. SCENES_VERSION mixes
into the headshot cache key so a coordinator tweak refreshes every
matching face on next view.
- icon_recipes.ts: cert / role-prop / status / hazard / empty icons
with deterministic per-recipe seeds + fuzzy text resolver.
ICONS_VERSION suffix on the cached file means edits don't
overwrite in place — misfires are recoverable.
Routes (mcp-server/index.ts):
- GET /headshots/_scenes — exposes SCENES + version to the
pre-render script so prompts don't drift between batch and hot-path.
- GET /icons/_recipes — same idea for icons.
- GET /icons/cert?text=... — resolves free-text cert names to a
recipe and 302s to the rendered icon. 404 (not 500) when no recipe
matches so the front-end can hang `onerror="this.remove()"`.
- GET /icons/render/{category}/{slug} — cache-or-render at 256² (8
steps) for crisper edges than 512² when downsampled to 14px.
ComfyUI portrait support (scripts/serve_imagegen.py):
The editorial workflow had `human, person, face` baked into its
negative prompt — actively sabotaging portraits. _comfyui_generate
now accepts negative_prompt/cfg/sampler/scheduler overrides, and
those mix into the cache key so portrait calls don't collapse into
hero-shot cache hits.
scripts/staffing/render_role_pool.py: pre-renders the role-aware
face pool by reading SCENES from /headshots/_scenes — single source
of truth verified at run time.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
231 lines
8.8 KiB
Python
231 lines
8.8 KiB
Python
#!/usr/bin/env python3
|
||
"""
|
||
render_role_pool.py — pre-render a role-aware face pool by hitting
|
||
serve_imagegen.py (localhost:3600/generate) with prompts pulled from
|
||
the bun server's /headshots/_scenes endpoint (single source of truth
|
||
for SCENES + SCENES_VERSION).
|
||
|
||
Layout:
|
||
|
||
data/headshots_role_pool/
|
||
{band}/
|
||
{gender}_{race}/
|
||
face_00.webp
|
||
face_01.webp
|
||
...
|
||
manifest.jsonl
|
||
|
||
Each entry in manifest.jsonl:
|
||
|
||
{"band": "warehouse", "gender": "man", "race": "caucasian",
|
||
"file": "warehouse/man_caucasian/face_03.webp",
|
||
"seed": 184729338, "scenes_version": "v1"}
|
||
|
||
Idempotent: a file at the target path is skipped. Re-run with --force
|
||
to regenerate. SCENES_VERSION is captured per render so the server's
|
||
pool route can refuse stale renders if the version drifts.
|
||
"""
|
||
from __future__ import annotations
|
||
import argparse
|
||
import base64
|
||
import json
|
||
import os
|
||
import sys
|
||
import time
|
||
import urllib.request
|
||
import urllib.error
|
||
|
||
DEFAULT_BANDS = ["warehouse", "production", "trades", "driver", "lead"]
|
||
DEFAULT_GENDERS = ["man", "woman"]
|
||
DEFAULT_RACES = ["caucasian", "east_asian", "south_asian", "middle_eastern", "black", "hispanic"]
|
||
|
||
|
||
def race_text(r: str) -> str:
|
||
return {
|
||
"caucasian": "",
|
||
"east_asian": "East Asian",
|
||
"south_asian": "South Asian",
|
||
"middle_eastern": "Middle Eastern",
|
||
"black": "Black",
|
||
"hispanic": "Hispanic",
|
||
}.get(r, "")
|
||
|
||
|
||
def fetch_scenes(mcp_url: str) -> tuple[str, dict]:
|
||
"""Pull canonical SCENES from the bun server. Single source of truth."""
|
||
req = urllib.request.Request(f"{mcp_url}/headshots/_scenes")
|
||
with urllib.request.urlopen(req, timeout=10) as resp:
|
||
data = json.loads(resp.read())
|
||
return data["version"], data["scenes"]
|
||
|
||
|
||
def render(comfy_url: str, prompt: str, seed: int, steps: int, timeout: int, dim: int) -> bytes | None:
|
||
payload = json.dumps({
|
||
"prompt": prompt,
|
||
"width": dim,
|
||
"height": dim,
|
||
"steps": steps,
|
||
"seed": seed,
|
||
}).encode()
|
||
req = urllib.request.Request(
|
||
f"{comfy_url}/generate",
|
||
data=payload,
|
||
headers={"Content-Type": "application/json"},
|
||
)
|
||
try:
|
||
with urllib.request.urlopen(req, timeout=timeout) as resp:
|
||
data = json.loads(resp.read())
|
||
except urllib.error.HTTPError as e:
|
||
print(f" HTTP {e.code} from comfy: {e.read()[:200]}", file=sys.stderr)
|
||
return None
|
||
except Exception as e:
|
||
print(f" comfy error: {type(e).__name__}: {e}", file=sys.stderr)
|
||
return None
|
||
img_b64 = data.get("image")
|
||
if not img_b64:
|
||
print(f" comfy response missing 'image' field: {list(data.keys())}", file=sys.stderr)
|
||
return None
|
||
return base64.b64decode(img_b64)
|
||
|
||
|
||
def main():
|
||
p = argparse.ArgumentParser()
|
||
p.add_argument("--out", default=os.path.join(os.path.dirname(__file__), "..", "..", "data", "headshots_role_pool"))
|
||
p.add_argument("--per-bucket", type=int, default=10, help="how many faces per (band × gender × race)")
|
||
p.add_argument("--mcp", default="http://localhost:3700")
|
||
p.add_argument("--comfy", default="http://localhost:3600")
|
||
p.add_argument("--steps", type=int, default=8)
|
||
p.add_argument("--bands", nargs="*", default=DEFAULT_BANDS)
|
||
p.add_argument("--genders", nargs="*", default=DEFAULT_GENDERS)
|
||
p.add_argument("--races", nargs="*", default=DEFAULT_RACES)
|
||
p.add_argument("--force", action="store_true", help="regenerate existing files")
|
||
p.add_argument("--age", type=int, default=32)
|
||
p.add_argument("--timeout", type=int, default=120, help="per-render timeout (1024² takes ~5s on A4000)")
|
||
p.add_argument("--dim", type=int, default=1024, help="square render dimension (v2 default 1024, v1 was 512)")
|
||
args = p.parse_args()
|
||
|
||
out_root = os.path.realpath(args.out)
|
||
os.makedirs(out_root, exist_ok=True)
|
||
|
||
print(f"Fetching canonical SCENES from {args.mcp}/headshots/_scenes…")
|
||
try:
|
||
version, scenes = fetch_scenes(args.mcp)
|
||
except Exception as e:
|
||
print(f"FATAL: could not fetch scenes ({e}). Is the mcp-server up?", file=sys.stderr)
|
||
sys.exit(1)
|
||
print(f" SCENES_VERSION={version}, {len(scenes)} bands available: {list(scenes.keys())}")
|
||
|
||
# v2+ files live at {out}/{version}/{band}/{g}_{r}/face_NN.webp.
|
||
# v1 lived at {out}/{band}/... — keep that layout intact for
|
||
# rollback; the server route reads both and prefers current.
|
||
out = out_root if version == "v1" else os.path.join(out_root, version)
|
||
os.makedirs(out, exist_ok=True)
|
||
print(f" writing to: {out}")
|
||
print(f" render dim: {args.dim}×{args.dim}")
|
||
|
||
# Reject any --bands not in the server's SCENES
|
||
unknown = [b for b in args.bands if b not in scenes]
|
||
if unknown:
|
||
print(f"FATAL: unknown bands {unknown}. Server has: {list(scenes.keys())}", file=sys.stderr)
|
||
sys.exit(1)
|
||
|
||
manifest_rows = []
|
||
todo = [
|
||
(band, g, r, n)
|
||
for band in args.bands
|
||
for g in args.genders
|
||
for r in args.races
|
||
for n in range(args.per_bucket)
|
||
]
|
||
print(f"\nPlanning: {len(todo)} renders ({len(args.bands)} bands × {len(args.genders)} genders × {len(args.races)} races × {args.per_bucket} faces).")
|
||
print(f"Estimated GPU time at 1.5s/render = {len(todo) * 1.5 / 60:.1f} min.\n")
|
||
|
||
t0 = time.time()
|
||
rendered = 0
|
||
skipped = 0
|
||
failed = 0
|
||
for i, (band, g, r, n) in enumerate(todo):
|
||
bucket_dir = os.path.join(out, band, f"{g}_{r}")
|
||
os.makedirs(bucket_dir, exist_ok=True)
|
||
fname = f"face_{n:02d}.webp"
|
||
full = os.path.join(bucket_dir, fname)
|
||
rel = os.path.relpath(full, out)
|
||
|
||
if os.path.exists(full) and os.path.getsize(full) > 1024 and not args.force:
|
||
skipped += 1
|
||
manifest_rows.append({
|
||
"band": band, "gender": g, "race": r, "file": rel,
|
||
"seed": None, "scenes_version": version, "cached": True,
|
||
})
|
||
continue
|
||
|
||
scene_def = scenes[band]
|
||
scene_clause = scene_def["scene"]
|
||
race_clause = race_text(r)
|
||
gender_clause = g # "man" / "woman"
|
||
# Match the bun server's prompt builder exactly. If you tweak
|
||
# one, tweak the other (or factor a /prompt-builder endpoint).
|
||
# The {role} slot is intentionally a band-typical title here
|
||
# — the pre-rendered face is shared across roles in the same
|
||
# band, so we use the band's archetypal role. Specific roles
|
||
# still hit the on-demand /headshots/generate/:key path with
|
||
# their actual title.
|
||
archetype_role = {
|
||
"warehouse": "warehouse worker",
|
||
"production": "production worker",
|
||
"trades": "skilled tradesperson",
|
||
"driver": "delivery driver",
|
||
"lead": "shift supervisor",
|
||
}.get(band, "warehouse worker")
|
||
prompt = (
|
||
f"professional headshot portrait of a {args.age}-year-old "
|
||
f"{race_clause} {gender_clause} {archetype_role}, {scene_clause}, "
|
||
f"neutral confident expression, sharp focus, photorealistic"
|
||
)
|
||
|
||
# Deterministic seed per slot — same (band, g, r, n) always
|
||
# gets the same face. Mixing scenes_version means a SCENES
|
||
# tweak shifts every face slightly; that's the right behavior
|
||
# (it's how cache invalidation propagates to the pool too).
|
||
seed_str = f"{band}|{g}|{r}|{n}|{version}"
|
||
seed_h = 5381
|
||
for ch in seed_str:
|
||
seed_h = ((seed_h << 5) + seed_h + ord(ch)) & 0x7fffffff
|
||
seed = seed_h
|
||
|
||
bytes_ = render(args.comfy, prompt, seed, args.steps, args.timeout, args.dim)
|
||
if bytes_ is None:
|
||
failed += 1
|
||
continue
|
||
with open(full, "wb") as f:
|
||
f.write(bytes_)
|
||
rendered += 1
|
||
manifest_rows.append({
|
||
"band": band, "gender": g, "race": r, "file": rel,
|
||
"seed": seed, "scenes_version": version, "cached": False,
|
||
})
|
||
|
||
if (i + 1) % 10 == 0 or (i + 1) == len(todo):
|
||
elapsed = time.time() - t0
|
||
done = i + 1
|
||
rate = done / elapsed if elapsed > 0 else 0
|
||
eta = (len(todo) - done) / rate if rate > 0 else 0
|
||
print(f" [{done}/{len(todo)}] rendered={rendered} skipped={skipped} failed={failed} "
|
||
f"rate={rate:.2f}/s eta={eta:.0f}s")
|
||
|
||
# Atomic manifest write
|
||
manifest_path = os.path.join(out, "manifest.jsonl")
|
||
tmp = manifest_path + ".tmp"
|
||
with open(tmp, "w") as f:
|
||
for row in manifest_rows:
|
||
f.write(json.dumps(row) + "\n")
|
||
os.replace(tmp, manifest_path)
|
||
|
||
print(f"\nDone. {rendered} new, {skipped} cached, {failed} failed in {time.time()-t0:.1f}s")
|
||
print(f"Manifest: {manifest_path} ({len(manifest_rows)} entries)")
|
||
print(f"\nNext: poke {args.mcp}/headshots/__reload to pick up the new pool.")
|
||
|
||
|
||
if __name__ == "__main__":
|
||
main()
|