lakehouse/scripts/serve_imagegen.py
root 4b92d1da91 demo: icon recipe pipeline + role-aware portraits + ComfyUI negative-prompt override
Adds two single-source-of-truth recipe files that drive both the
hot-path render server and the offline pre-render scripts:

- role_scenes.ts: per-role-band scene clauses (clothing + backdrop).
  Forklift operators look like forklift operators instead of
  collapsing to interchangeable studio shots. SCENES_VERSION mixes
  into the headshot cache key so a coordinator tweak refreshes every
  matching face on next view.
- icon_recipes.ts: cert / role-prop / status / hazard / empty icons
  with deterministic per-recipe seeds + fuzzy text resolver.
  ICONS_VERSION suffix on the cached file means edits don't
  overwrite in place — misfires are recoverable.

Routes (mcp-server/index.ts):
- GET /headshots/_scenes — exposes SCENES + version to the
  pre-render script so prompts don't drift between batch and hot-path.
- GET /icons/_recipes — same idea for icons.
- GET /icons/cert?text=... — resolves free-text cert names to a
  recipe and 302s to the rendered icon. 404 (not 500) when no recipe
  matches so the front-end can hang `onerror="this.remove()"`.
- GET /icons/render/{category}/{slug} — cache-or-render at 256² (8
  steps) for crisper edges than 512² when downsampled to 14px.

ComfyUI portrait support (scripts/serve_imagegen.py):
The editorial workflow had `human, person, face` baked into its
negative prompt — actively sabotaging portraits. _comfyui_generate
now accepts negative_prompt/cfg/sampler/scheduler overrides, and
those mix into the cache key so portrait calls don't collapse into
hero-shot cache hits.

scripts/staffing/render_role_pool.py: pre-renders the role-aware
face pool by reading SCENES from /headshots/_scenes — single source
of truth verified at run time.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-28 06:01:04 -05:00

479 lines
19 KiB
Python

#!/usr/bin/env python3
"""Image generation service — proxies to ComfyUI API on :8188.
Serves on :3600. Submits workflow to ComfyUI, polls for completion, returns image.
Falls back to direct diffusers if ComfyUI is unavailable.
Features:
- Disk cache — same prompt returns cached image instantly
- Negative prompt for quality (no faces, hands, text)
- DreamShaper XL Turbo for high-quality editorial illustrations
"""
import base64
import hashlib
import io
import json
import os
import random
import time
import urllib.request
import urllib.error
from http.server import HTTPServer, BaseHTTPRequestHandler
from pathlib import Path
PORT = int(os.environ.get("IMAGEGEN_PORT", "3600"))
COMFYUI_URL = os.environ.get("COMFYUI_URL", "http://localhost:8188")
CACHE_DIR = Path(os.environ.get("IMAGEGEN_CACHE", "./data/_imagecache"))
CACHE_DIR.mkdir(parents=True, exist_ok=True)
WORKFLOW_PATH = "/opt/ComfyUI/workflows/editorial_hero.json"
def _cache_key(prompt, width, height, steps, seed=None):
# Include seed so callers can vary outputs deterministically without
# the proxy collapsing to a single cached image. None == legacy
# (omitted from the key for backward compatibility).
bits = f"{prompt}|{width}|{height}|{steps}"
if seed is not None:
bits += f"|{seed}"
return hashlib.sha256(bits.encode()).hexdigest()[:24]
def _cache_get(key):
fp = CACHE_DIR / f"{key}.webp"
return base64.b64encode(fp.read_bytes()).decode() if fp.exists() else None
def _cache_put(key, img_bytes):
(CACHE_DIR / f"{key}.webp").write_bytes(img_bytes)
def _comfyui_generate(prompt, width=1024, height=512, steps=8, seed=None,
negative_prompt=None, cfg=None, sampler=None, scheduler=None):
"""Submit workflow to ComfyUI and wait for result.
Optional overrides — when provided, replace the workflow's defaults.
The workflow template at editorial_hero.json was tuned for product
hero shots with a "no humans" negative prompt; portrait callers MUST
pass `negative_prompt` to avoid the model fighting them on faces.
"""
# Load workflow template
with open(WORKFLOW_PATH) as f:
workflow = json.load(f)
# Customize
if seed is None:
seed = random.randint(0, 2**32)
workflow["3"]["inputs"]["seed"] = seed
workflow["3"]["inputs"]["steps"] = steps
if cfg is not None:
workflow["3"]["inputs"]["cfg"] = cfg
if sampler:
workflow["3"]["inputs"]["sampler_name"] = sampler
if scheduler:
workflow["3"]["inputs"]["scheduler"] = scheduler
workflow["5"]["inputs"]["width"] = width
workflow["5"]["inputs"]["height"] = height
workflow["6"]["inputs"]["text"] = prompt
# Node 7 is the negative-prompt CLIPTextEncode. The default is tuned
# for product hero shots and contains "human, person, face, hand,
# fingers, realistic photo of people" — actively sabotaging any
# portrait render. Always overwrite when negative_prompt is given.
if negative_prompt is not None:
workflow["7"]["inputs"]["text"] = negative_prompt
# Submit to ComfyUI
payload = json.dumps({"prompt": workflow}).encode()
req = urllib.request.Request(
f"{COMFYUI_URL}/prompt",
data=payload,
headers={"Content-Type": "application/json"}
)
resp = urllib.request.urlopen(req, timeout=10)
result = json.loads(resp.read())
prompt_id = result["prompt_id"]
# Poll for completion
for _ in range(120): # up to 2 minutes
time.sleep(0.5)
try:
status_req = urllib.request.Request(f"{COMFYUI_URL}/history/{prompt_id}")
status_resp = urllib.request.urlopen(status_req, timeout=5)
history = json.loads(status_resp.read())
if prompt_id in history:
outputs = history[prompt_id].get("outputs", {})
# Find the SaveImage node output
for node_id, node_out in outputs.items():
images = node_out.get("images", [])
if images:
img_info = images[0]
# Fetch the image
img_url = f"{COMFYUI_URL}/view?filename={img_info['filename']}&subfolder={img_info.get('subfolder', '')}&type={img_info.get('type', 'output')}"
img_resp = urllib.request.urlopen(img_url, timeout=10)
img_data = img_resp.read()
# Convert to webp
from PIL import Image
img = Image.open(io.BytesIO(img_data))
buf = io.BytesIO()
img.save(buf, format="WEBP", quality=90)
return buf.getvalue(), seed
return None, seed # completed but no images
except Exception:
continue
return None, seed # timeout
def _diffusers_fallback(prompt, width, height, steps, seed):
"""Fallback: use raw diffusers SDXL Turbo if ComfyUI is down."""
import torch
from diffusers import AutoPipelineForText2Image
pipe = AutoPipelineForText2Image.from_pretrained(
"stabilityai/sdxl-turbo", torch_dtype=torch.float16, variant="fp16"
).to("cuda")
pipe.enable_attention_slicing()
if seed is None:
seed = random.randint(0, 2**32)
gen = torch.Generator("cuda").manual_seed(seed)
result = pipe(prompt=prompt, num_inference_steps=steps, guidance_scale=0.0,
width=width, height=height, generator=gen)
buf = io.BytesIO()
result.images[0].save(buf, format="WEBP", quality=90)
del pipe
torch.cuda.empty_cache()
return buf.getvalue(), seed
class ImageHandler(BaseHTTPRequestHandler):
def log_message(self, fmt, *args): pass
def _json(self, code, data):
self.send_response(code)
self.send_header("Content-Type", "application/json")
self.send_header("Access-Control-Allow-Origin", "*")
self.end_headers()
self.wfile.write(json.dumps(data).encode())
def do_OPTIONS(self):
self.send_response(200)
self.send_header("Access-Control-Allow-Origin", "*")
self.send_header("Access-Control-Allow-Methods", "POST, GET, OPTIONS")
self.send_header("Access-Control-Allow-Headers", "Content-Type")
self.end_headers()
def do_GET(self):
if self.path == "/health":
comfy_ok = False
try:
r = urllib.request.urlopen(f"{COMFYUI_URL}/system_stats", timeout=3)
comfy_ok = r.status == 200
except: pass
cached = len(list(CACHE_DIR.glob("*.webp")))
self._json(200, {"status": "ok", "comfyui": comfy_ok, "cached_images": cached})
elif self.path == "/cache/stats":
files = list(CACHE_DIR.glob("*.webp"))
self._json(200, {"count": len(files), "total_mb": round(sum(f.stat().st_size for f in files)/1024**2, 1)})
else:
self._json(404, {"error": "not found"})
def do_POST(self):
if self.path == "/generate":
self._generate()
elif self.path == "/blender":
self._blender_render()
elif self.path == "/img-to-3d":
self._img_to_3d()
elif self.path == "/scene-glb":
self._scene_glb()
else:
self._json(404, {"error": "not found"})
def _generate(self):
try:
length = int(self.headers.get("Content-Length", 0))
body = json.loads(self.rfile.read(length)) if length else {}
except:
self._json(400, {"error": "invalid JSON"}); return
prompt = body.get("prompt", "").strip()
if not prompt:
self._json(400, {"error": "prompt required"}); return
width = min(max(int(body.get("width", 1280)), 256), 1920)
height = min(max(int(body.get("height", 720)), 256), 1080)
steps = min(max(int(body.get("steps", 50)), 1), 80)
seed = body.get("seed")
# Portrait-friendly overrides — None means "use workflow default".
# negative_prompt MUST be passed by portrait callers to avoid
# the workflow's "no humans" baked-in negative.
negative_prompt = body.get("negative_prompt")
cfg = body.get("cfg")
sampler = body.get("sampler")
scheduler = body.get("scheduler")
# Cache check — seed + negative + cfg are part of the key so per-
# worker / per-config requests don't collapse to one cached image.
key = _cache_key(
f"{prompt}||neg={negative_prompt or ''}||cfg={cfg or ''}",
width, height, steps, seed,
)
cached = _cache_get(key)
if cached:
self._json(200, {"image": cached, "format": "webp", "width": width, "height": height,
"cached": True, "prompt": prompt[:200]}); return
t0 = time.time()
img_bytes = None
# Try ComfyUI first
try:
comfy_check = urllib.request.urlopen(f"{COMFYUI_URL}/system_stats", timeout=3)
if comfy_check.status == 200:
img_bytes, seed = _comfyui_generate(
prompt, width, height, steps, seed,
negative_prompt=negative_prompt, cfg=cfg,
sampler=sampler, scheduler=scheduler,
)
backend = "comfyui"
except:
pass
# Fallback to diffusers
if not img_bytes:
try:
img_bytes, seed = _diffusers_fallback(prompt, width, height, steps, seed)
backend = "diffusers"
except Exception as e:
self._json(500, {"error": str(e)[:300]}); return
if not img_bytes:
self._json(500, {"error": "generation failed"}); return
elapsed_ms = int((time.time() - t0) * 1000)
img_b64 = base64.b64encode(img_bytes).decode()
# Recompute key with the actual seed used (when caller passed
# None, _comfyui_generate picks a random one and we want the
# cache to reflect that so re-requests with the same returned
# seed hit the disk).
key = _cache_key(prompt, width, height, steps, seed)
_cache_put(key, img_bytes)
self._json(200, {
"image": img_b64, "format": "webp", "width": width, "height": height,
"steps": steps, "seed": seed, "time_ms": elapsed_ms,
"backend": backend, "prompt": prompt[:200], "cached": False,
})
print(f"[IMAGEGEN] {backend} {width}x{height} in {elapsed_ms}ms")
def _blender_render(self):
"""Render a 3D hero banner via Blender Cycles GPU."""
try:
length = int(self.headers.get("Content-Length", 0))
body = json.loads(self.rfile.read(length)) if length else {}
except:
self._json(400, {"error": "invalid JSON"}); return
seed = body.get("seed", random.randint(0, 99999))
# Cache check
key = f"blender-{seed}"
cached = _cache_get(key)
if cached:
self._json(200, {"image": cached, "format": "webp", "backend": "blender-cached",
"cached": True, "seed": seed}); return
t0 = time.time()
output_png = f"/tmp/blender_render_{seed}.png"
script = "/opt/ComfyUI/blender_scripts/hero_cycles.py"
try:
import subprocess
result = subprocess.run(
["blender", "--background", "--python", script, "--", str(seed), output_png],
capture_output=True, text=True, timeout=300
)
if not os.path.exists(output_png):
self._json(500, {"error": "Blender render failed: " + result.stderr[-300:] if result.stderr else "no output"}); return
# Convert to webp
from PIL import Image
img = Image.open(output_png)
buf = io.BytesIO()
img.save(buf, format="WEBP", quality=92)
img_bytes = buf.getvalue()
os.remove(output_png)
elapsed_ms = int((time.time() - t0) * 1000)
img_b64 = base64.b64encode(img_bytes).decode()
_cache_put(key, img_bytes)
self._json(200, {
"image": img_b64, "format": "webp", "width": 1280, "height": 320,
"seed": seed, "time_ms": elapsed_ms, "backend": "blender-cycles",
"cached": False,
})
print(f"[BLENDER] Rendered seed={seed} in {elapsed_ms}ms")
except Exception as e:
self._json(500, {"error": str(e)[:300]})
def _img_to_3d(self):
"""Full pipeline: AI image → 3D displacement → Blender render."""
try:
length = int(self.headers.get("Content-Length", 0))
body = json.loads(self.rfile.read(length)) if length else {}
except:
self._json(400, {"error": "invalid JSON"}); return
prompt = body.get("prompt", "abstract flowing golden energy, fractal patterns, dark background, sharp detail").strip()
seed = body.get("seed", random.randint(0, 99999))
key = f"img3d-{hashlib.sha256(prompt.encode()).hexdigest()[:12]}-{seed % 4}"
cached = _cache_get(key)
if cached:
self._json(200, {"image": cached, "format": "webp", "backend": "img3d-cached", "cached": True}); return
t0 = time.time()
try:
import subprocess
# Step 1: Generate source image via ComfyUI directly
src_path = f"/tmp/img3d_src_{seed}.png"
try:
img_bytes_src, _ = _comfyui_generate(prompt, 512, 512, 25, seed)
if not img_bytes_src:
# Fallback to diffusers
img_bytes_src, _ = _diffusers_fallback(prompt, 512, 512, 8, seed)
if not img_bytes_src:
self._json(500, {"error": "Failed to generate source image"}); return
from PIL import Image
img_src = Image.open(io.BytesIO(img_bytes_src))
img_src.save(src_path, "PNG")
except Exception as e:
self._json(500, {"error": f"Source image failed: {e}"}); return
# Step 2: TripoSR — convert image to 3D mesh
mesh_path = f"/tmp/triposr_mesh_{seed}.obj"
out_path = f"/tmp/img3d_out_{seed}.png"
try:
# Free VRAM for TripoSR
subprocess.run(["systemctl", "stop", "comfyui"], capture_output=True, timeout=10)
time.sleep(3)
triposr_script = f"""
import torch, sys
sys.path.insert(0, '/opt/TripoSR')
from PIL import Image
from tsr.system import TSR
model = TSR.from_pretrained('stabilityai/TripoSR', config_name='config.yaml', weight_name='model.ckpt')
model.to('cuda')
image = Image.open('{src_path}').convert('RGB')
with torch.no_grad():
scene_codes = model([image], device='cuda')
meshes = model.extract_mesh(scene_codes, has_vertex_color=True, resolution=128)
meshes[0].export('{mesh_path}')
del model; torch.cuda.empty_cache()
print('[TRIPOSR] mesh exported')
"""
result = subprocess.run(
["python3", "-c", triposr_script],
capture_output=True, text=True, timeout=120)
if not os.path.exists(mesh_path):
# Fallback to displacement approach
print(f"[IMG2-3D] TripoSR failed, falling back to displacement: {result.stderr[-200:]}")
script = "/opt/ComfyUI/blender_scripts/image_to_3d.py"
result = subprocess.run(
["blender", "--background", "--python", script, "--", src_path, out_path, str(seed)],
capture_output=True, text=True, timeout=120)
else:
# Step 3: Render the TripoSR mesh in Blender with gold materials
script = "/opt/ComfyUI/blender_scripts/triposr_render.py"
result = subprocess.run(
["blender", "--background", "--python", script, "--", mesh_path, out_path, str(seed)],
capture_output=True, text=True, timeout=120)
try: os.remove(mesh_path)
except: pass
finally:
# Always restart ComfyUI
subprocess.run(["systemctl", "start", "comfyui"], capture_output=True, timeout=10)
if not os.path.exists(out_path):
self._json(500, {"error": "Blender 3D render failed"}); return
from PIL import Image
img = Image.open(out_path)
buf = io.BytesIO()
img.save(buf, format="WEBP", quality=92)
img_bytes = buf.getvalue()
# Cleanup temp files
for f in [src_path, out_path]:
try: os.remove(f)
except: pass
elapsed = int((time.time() - t0) * 1000)
img_b64 = base64.b64encode(img_bytes).decode()
_cache_put(key, img_bytes)
self._json(200, {
"image": img_b64, "format": "webp", "width": 1280, "height": 320,
"seed": seed, "time_ms": elapsed, "backend": "img-to-3d", "cached": False,
})
print(f"[IMG2-3D] seed={seed} prompt={prompt[:50]} in {elapsed}ms")
except Exception as e:
self._json(500, {"error": str(e)[:300]})
def _scene_glb(self):
"""Generate a 3D scene and export as GLB for Three.js viewer."""
try:
length = int(self.headers.get("Content-Length", 0))
body = json.loads(self.rfile.read(length)) if length else {}
except:
self._json(400, {"error": "invalid JSON"}); return
seed = body.get("seed", random.randint(0, 99999))
key = f"glb-{seed}"
# Check cache
glb_cache = CACHE_DIR / f"{key}.glb"
if glb_cache.exists():
glb_b64 = base64.b64encode(glb_cache.read_bytes()).decode()
self._json(200, {"glb": glb_b64, "seed": seed, "cached": True})
return
t0 = time.time()
glb_path = f"/tmp/scene_{seed}.glb"
try:
import subprocess
result = subprocess.run(
["blender", "--background", "--python", "/opt/ComfyUI/blender_scripts/export_glb.py",
"--", str(seed), glb_path],
capture_output=True, text=True, timeout=120)
if not os.path.exists(glb_path):
self._json(500, {"error": "GLB export failed: " + result.stderr[-200:] if result.stderr else "no output"})
return
glb_bytes = open(glb_path, 'rb').read()
os.remove(glb_path)
glb_cache.write_bytes(glb_bytes)
glb_b64 = base64.b64encode(glb_bytes).decode()
elapsed = int((time.time() - t0) * 1000)
self._json(200, {"glb": glb_b64, "seed": seed, "time_ms": elapsed, "cached": False})
print(f"[GLB] seed={seed} in {elapsed}ms size={len(glb_bytes)//1024}KB")
except Exception as e:
self._json(500, {"error": str(e)[:300]})
if __name__ == "__main__":
print(f"[IMAGEGEN] Starting on port {PORT}")
print(f"[IMAGEGEN] ComfyUI backend: {COMFYUI_URL}")
print(f"[IMAGEGEN] Cache: {CACHE_DIR}")
HTTPServer(("0.0.0.0", PORT), ImageHandler).serve_forever()