From 17e410751c1045cb38c2cb6a47a5e6615e27d688 Mon Sep 17 00:00:00 2001 From: profit Date: Mon, 6 Apr 2026 16:51:55 -0700 Subject: [PATCH] =?UTF-8?q?Phase=202:=20Production=20Bible=20=E2=80=94=20C?= =?UTF-8?q?haracter=20+=20Location=20bibles=20from=20scene=20data?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Layer 3 implementation: - Character Bible: canonical names, aliases, arcs, relationships, wardrobe states, emotional arcs, reference prompts — all grounded in scene evidence - Location Bible: canonical names, variants, descriptions, types, features, mood associations, reference prompts — all grounded in scene evidence - Combined Production Bible output for downstream layers - Bible validator: duplicate detection, scene reference checks, hallucination detection, UNKNOWN field flagging - Prompt contracts: L3_character_bible_v1, L3_location_bible_v1 - Named versioned output: character_bible_v1.json, location_bible_v1.json, production_bible_v1.json - CLI: --phase 2 runs bible only, --phase omitted runs both phases - OutputWriter: added write_named/write_named_raw for non-scene outputs Tested on the_last_backup: 3 characters, 5 locations, 0 hallucinations, 3 warnings (UNKNOWN physical_description — correct behavior) Co-Authored-By: Claude Opus 4.6 (1M context) --- run.py | 76 +++++--- src/bible/__init__.py | 0 src/bible/generator.py | 166 +++++++++++++++++ src/bible/runner.py | 246 +++++++++++++++++++++++++ src/bible/validator.py | 191 +++++++++++++++++++ src/execution/output_writer.py | 60 ++++-- src/prompts/L3_character_bible_v1.json | 20 ++ src/prompts/L3_location_bible_v1.json | 18 ++ src/schemas/production_bible.py | 56 ++---- 9 files changed, 756 insertions(+), 77 deletions(-) create mode 100644 src/bible/__init__.py create mode 100644 src/bible/generator.py create mode 100644 src/bible/runner.py create mode 100644 src/bible/validator.py create mode 100644 src/prompts/L3_character_bible_v1.json create mode 100644 src/prompts/L3_location_bible_v1.json diff --git a/run.py b/run.py index 6d39d01..3acc9c4 100644 --- a/run.py +++ b/run.py @@ -11,13 +11,14 @@ from dotenv import load_dotenv def main(): load_dotenv() - parser = argparse.ArgumentParser(description="AI Movie Production Pipeline — Phase 1") - parser.add_argument("--script", type=str, help="Path to .fountain script file") + parser = argparse.ArgumentParser(description="AI Movie Production Pipeline") + parser.add_argument("--script", type=str, help="Path to .fountain script file (Phase 1)") parser.add_argument("--project", type=str, help="Project name (determines output directory)") + parser.add_argument("--phase", type=int, default=None, choices=[1, 2], help="Run specific phase only (1=ingestion+extraction, 2=bible)") parser.add_argument("--model", type=str, default="qwen3:14b", help="Model ID (default: qwen3:14b)") parser.add_argument("--backend", type=str, default="ollama", choices=["ollama", "anthropic"], help="AI backend (default: ollama)") parser.add_argument("--ollama-url", type=str, default="http://localhost:11434", help="Ollama server URL") - parser.add_argument("--scene", type=int, default=None, help="Process only this scene number") + parser.add_argument("--scene", type=int, default=None, help="Process only this scene number (Phase 1)") parser.add_argument("--dry-run", action="store_true", help="Validate inputs only, no AI calls") parser.add_argument("--force", action="store_true", help="Ignore cache, re-run even if unchanged") parser.add_argument("--test", action="store_true", help="Run test suite against test_scripts/") @@ -29,36 +30,63 @@ def main(): run_tests(args.model, args.backend, args.ollama_url, args.output_dir) return - if not args.script or not args.project: - parser.error("--script and --project are required (unless using --test)") + if not args.project: + parser.error("--project is required (unless using --test)") api_key = os.environ.get("ANTHROPIC_API_KEY", "") if args.backend == "anthropic" and not api_key and not args.dry_run: print("ERROR: ANTHROPIC_API_KEY not set. Set it in .env or environment.") sys.exit(1) - if not os.path.exists(args.script): - print(f"ERROR: Script file not found: {args.script}") - sys.exit(1) + run_phase1_flag = args.phase is None or args.phase == 1 + run_phase2_flag = args.phase is None or args.phase == 2 - from src.execution.runner import run_phase1 + # Phase 1: Script Ingestion + Understanding + if run_phase1_flag: + if not args.script: + if args.phase == 1 or args.phase is None: + parser.error("--script is required for Phase 1") + else: + if not os.path.exists(args.script): + print(f"ERROR: Script file not found: {args.script}") + sys.exit(1) - result = run_phase1( - script_path=args.script, - project_name=args.project, - api_key=api_key, - model=args.model, - backend=args.backend, - ollama_url=args.ollama_url, - output_dir=args.output_dir, - scene_filter=args.scene, - dry_run=args.dry_run, - force=args.force, - ) + from src.execution.runner import run_phase1 - if not result.success: - print(f"\nPIPELINE FAILED: {result.stop_reason}") - sys.exit(1) + result = run_phase1( + script_path=args.script, + project_name=args.project, + api_key=api_key, + model=args.model, + backend=args.backend, + ollama_url=args.ollama_url, + output_dir=args.output_dir, + scene_filter=args.scene, + dry_run=args.dry_run, + force=args.force, + ) + + if not result.success: + print(f"\nPHASE 1 FAILED: {result.stop_reason}") + sys.exit(1) + + # Phase 2: Production Bible + if run_phase2_flag: + from src.bible.runner import run_phase2 + + bible_result = run_phase2( + project_name=args.project, + model=args.model, + backend=args.backend, + ollama_url=args.ollama_url, + api_key=api_key, + output_dir=args.output_dir, + dry_run=args.dry_run, + ) + + if not bible_result.success: + print(f"\nPHASE 2 FAILED: {bible_result.stop_reason}") + sys.exit(1) print("\nPIPELINE COMPLETE") sys.exit(0) diff --git a/src/bible/__init__.py b/src/bible/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/bible/generator.py b/src/bible/generator.py new file mode 100644 index 0000000..f4f593b --- /dev/null +++ b/src/bible/generator.py @@ -0,0 +1,166 @@ +"""Production Bible generator — Layer 3. + +Reads validated per-scene JSON from Layer 2 and synthesizes: +1. Character Bible +2. Location Bible + +Uses AI for synthesis, code for aggregation and validation. +""" + +import json +import requests +from dataclasses import dataclass +from src.schemas.production_bible import Character, Location + + +@dataclass +class BibleResult: + raw_data: dict + token_usage: dict + + +class BibleGenerationError(Exception): + pass + + +def generate_character_bible( + scenes: list[dict], + contract_path: str, + model: str = "qwen3:14b", + backend: str = "ollama", + ollama_url: str = "http://localhost:11434", + api_key: str = "", +) -> BibleResult: + """Generate Character Bible from scene data.""" + return _generate_bible(scenes, contract_path, "characters", model, backend, ollama_url, api_key) + + +def generate_location_bible( + scenes: list[dict], + contract_path: str, + model: str = "qwen3:14b", + backend: str = "ollama", + ollama_url: str = "http://localhost:11434", + api_key: str = "", +) -> BibleResult: + """Generate Location Bible from scene data.""" + return _generate_bible(scenes, contract_path, "locations", model, backend, ollama_url, api_key) + + +def _generate_bible( + scenes: list[dict], + contract_path: str, + expected_key: str, + model: str, + backend: str, + ollama_url: str, + api_key: str, +) -> BibleResult: + """Generic bible generation — sends scenes to AI with a prompt contract.""" + with open(contract_path, "r", encoding="utf-8") as f: + contract = json.load(f) + + scenes_json = json.dumps(scenes, indent=2, ensure_ascii=False) + user_prompt = contract["user_prompt_template"].replace("{{scenes_json}}", scenes_json) + + if backend == "ollama": + response_text, token_usage = _call_ollama( + model, contract["system_prompt"], user_prompt, + contract["max_output_tokens"], ollama_url, + ) + elif backend == "anthropic": + response_text, token_usage = _call_anthropic( + model, contract["system_prompt"], user_prompt, + contract["max_output_tokens"], api_key, + ) + else: + raise BibleGenerationError(f"Unknown backend: {backend}") + + # Parse JSON + try: + parsed = json.loads(response_text) + except json.JSONDecodeError as e: + cleaned = _extract_json(response_text) + if cleaned: + try: + parsed = json.loads(cleaned) + except json.JSONDecodeError: + raise BibleGenerationError( + f"AI response is not valid JSON: {e}\nResponse:\n{response_text[:500]}" + ) from e + else: + raise BibleGenerationError( + f"AI response is not valid JSON: {e}\nResponse:\n{response_text[:500]}" + ) from e + + # Extract the expected key + if isinstance(parsed, dict) and expected_key in parsed: + data = parsed + elif isinstance(parsed, list): + data = {expected_key: parsed} + else: + raise BibleGenerationError( + f"Unexpected structure: expected dict with '{expected_key}' key, got {type(parsed)}" + ) + + return BibleResult(raw_data=data, token_usage=token_usage) + + +def _call_ollama(model, system_prompt, user_prompt, max_tokens, ollama_url): + payload = { + "model": model, + "messages": [ + {"role": "system", "content": system_prompt}, + {"role": "user", "content": user_prompt}, + ], + "stream": False, + "options": { + "temperature": 0, + "num_predict": max_tokens, + "num_ctx": 32768, + }, + "format": "json", + } + try: + resp = requests.post(f"{ollama_url}/api/chat", json=payload, timeout=600) + resp.raise_for_status() + except requests.RequestException as e: + raise BibleGenerationError(f"Ollama API call failed: {e}") from e + + data = resp.json() + text = data.get("message", {}).get("content", "") + usage = { + "input": data.get("prompt_eval_count", 0), + "output": data.get("eval_count", 0), + } + if not text: + raise BibleGenerationError("Ollama returned empty response") + return text, usage + + +def _call_anthropic(model, system_prompt, user_prompt, max_tokens, api_key): + from anthropic import Anthropic + client = Anthropic(api_key=api_key) + response = client.messages.create( + model=model, + max_tokens=max_tokens, + temperature=0, + system=system_prompt, + messages=[{"role": "user", "content": user_prompt}], + ) + return response.content[0].text, { + "input": response.usage.input_tokens, + "output": response.usage.output_tokens, + } + + +def _extract_json(text): + if "```json" in text: + start = text.index("```json") + 7 + end = text.index("```", start) + return text[start:end].strip() + if "```" in text: + start = text.index("```") + 3 + end = text.index("```", start) + return text[start:end].strip() + return None diff --git a/src/bible/runner.py b/src/bible/runner.py new file mode 100644 index 0000000..e6470b3 --- /dev/null +++ b/src/bible/runner.py @@ -0,0 +1,246 @@ +"""Layer 3 runner — reads L2 scene outputs, generates and validates Production Bible.""" + +import hashlib +import json +import os +from dataclasses import dataclass, field + +from src.bible.generator import generate_character_bible, generate_location_bible, BibleGenerationError +from src.bible.validator import ( + validate_character_bible, validate_location_bible, BibleValidationWarning, +) +from src.schemas.production_bible import ( + Character, Location, CharacterBible, LocationBible, ProductionBible, +) +from src.validators.schema_validator import validate, ValidationResult +from src.logging.layer_logger import LayerLogger +from src.execution.output_writer import OutputWriter +from src.execution.retry import execute_with_retry, FailureRecord + + +@dataclass +class BiblePipelineResult: + success: bool + characters_count: int = 0 + locations_count: int = 0 + character_warnings: list[BibleValidationWarning] = field(default_factory=list) + location_warnings: list[BibleValidationWarning] = field(default_factory=list) + stop_reason: str | None = None + + +def run_phase2( + project_name: str, + model: str = "qwen3:14b", + backend: str = "ollama", + ollama_url: str = "http://localhost:11434", + api_key: str = "", + output_dir: str = "output", + dry_run: bool = False, +) -> BiblePipelineResult: + """Run Phase 2: generate Production Bible from L2 scene outputs. + + Args: + project_name: Project name (must have L2 outputs). + model: Model ID. + backend: "ollama" or "anthropic". + ollama_url: Ollama server URL. + api_key: API key (for anthropic backend). + output_dir: Base output directory. + dry_run: Validate L2 inputs only, no AI calls. + + Returns: + BiblePipelineResult with counts and warnings. + """ + logger = LayerLogger(project_name, output_dir) + writer = OutputWriter(project_name, output_dir) + + prompts_dir = os.path.join(os.path.dirname(os.path.dirname(os.path.dirname(__file__))), "src", "prompts") + char_contract = os.path.join(prompts_dir, "L3_character_bible_v1.json") + loc_contract = os.path.join(prompts_dir, "L3_location_bible_v1.json") + + # ── READ L2 OUTPUTS ───────────────────────────────────────────────── + print(f"[L3] Reading L2 scene outputs for project: {project_name}") + scenes = _load_l2_scenes(project_name, output_dir) + + if not scenes: + print("[L3] STOP: No L2 scene outputs found. Run Phase 1 first.") + return BiblePipelineResult(success=False, stop_reason="No L2 scene outputs found") + + print(f"[L3] Loaded {len(scenes)} scenes from L2") + + if dry_run: + print(f"[DRY RUN] {len(scenes)} scenes available. Would generate Character + Location bibles.") + return BiblePipelineResult(success=True, characters_count=0, locations_count=0) + + total_token_usage = {"input": 0, "output": 0} + + # ── CHARACTER BIBLE ────────────────────────────────────────────────── + print("[L3] Generating Character Bible...") + run_id = logger.start("L3", scene_id=None) + scenes_json = json.dumps(scenes, ensure_ascii=False) + input_hash = f"sha256:{hashlib.sha256(scenes_json.encode()).hexdigest()}" + + def do_char_bible(data): + return generate_character_bible(data, char_contract, model, backend, ollama_url, api_key) + + char_result = execute_with_retry( + fn=do_char_bible, + input_data=scenes, + layer_id="L3", + scene_id=None, + ) + + if isinstance(char_result, FailureRecord): + logger.finish(run_id, input_hash, None, "failed", + failure_state=char_result.error, retry_count=len(char_result.attempts)) + print(f"[L3] Character Bible: FAILED after {len(char_result.attempts)} attempts") + return BiblePipelineResult(success=False, stop_reason=f"Character Bible generation failed: {char_result.error}") + + total_token_usage["input"] += char_result.token_usage["input"] + total_token_usage["output"] += char_result.token_usage["output"] + + # Validate each character against schema + raw_characters = char_result.raw_data.get("characters", []) + valid_characters: list[Character] = [] + char_schema_errors: list[str] = [] + + for i, raw_char in enumerate(raw_characters): + result = validate(raw_char, Character) + if result.status == "failed": + char_schema_errors.append(f"Character {i}: {result.errors}") + print(f"[L3] Character {raw_char.get('canonical_name', f'#{i}')}: SCHEMA FAILED — {result.errors}") + else: + valid_characters.append(result.data) + status_label = "FLAGGED" if result.status == "flagged" else "valid" + print(f"[L3] Character: {result.data.canonical_name} — {status_label}") + + if not valid_characters: + logger.finish(run_id, input_hash, None, "failed", + failure_state=f"All characters failed schema validation: {char_schema_errors}") + return BiblePipelineResult(success=False, stop_reason="All characters failed schema validation") + + char_bible = CharacterBible(characters=valid_characters) + + # Validate against scene data + char_warnings = validate_character_bible(char_bible, scenes) + for w in char_warnings: + print(f"[L3] CHAR WARNING: [{w.entity_name}] {w.message}") + + # Write Character Bible + char_out = writer.write_named("L3", "character_bible", char_bible) + print(f"[L3] Character Bible written: {char_out['path']}") + + logger.finish(run_id, input_hash, char_out["hash"], "valid", + token_usage=char_result.token_usage) + + # ── LOCATION BIBLE ─────────────────────────────────────────────────── + print("[L3] Generating Location Bible...") + run_id = logger.start("L3_loc", scene_id=None) + + def do_loc_bible(data): + return generate_location_bible(data, loc_contract, model, backend, ollama_url, api_key) + + loc_result = execute_with_retry( + fn=do_loc_bible, + input_data=scenes, + layer_id="L3_loc", + scene_id=None, + ) + + if isinstance(loc_result, FailureRecord): + logger.finish(run_id, input_hash, None, "failed", + failure_state=loc_result.error, retry_count=len(loc_result.attempts)) + print(f"[L3] Location Bible: FAILED after {len(loc_result.attempts)} attempts") + return BiblePipelineResult( + success=False, + characters_count=len(valid_characters), + character_warnings=char_warnings, + stop_reason=f"Location Bible generation failed: {loc_result.error}", + ) + + total_token_usage["input"] += loc_result.token_usage["input"] + total_token_usage["output"] += loc_result.token_usage["output"] + + # Validate each location against schema + raw_locations = loc_result.raw_data.get("locations", []) + valid_locations: list[Location] = [] + + for i, raw_loc in enumerate(raw_locations): + result = validate(raw_loc, Location) + if result.status == "failed": + print(f"[L3] Location {raw_loc.get('canonical_name', f'#{i}')}: SCHEMA FAILED — {result.errors}") + else: + valid_locations.append(result.data) + status_label = "FLAGGED" if result.status == "flagged" else "valid" + print(f"[L3] Location: {result.data.canonical_name} — {status_label}") + + if not valid_locations: + logger.finish(run_id, input_hash, None, "failed", + failure_state="All locations failed schema validation") + return BiblePipelineResult( + success=False, + characters_count=len(valid_characters), + character_warnings=char_warnings, + stop_reason="All locations failed schema validation", + ) + + loc_bible = LocationBible(locations=valid_locations) + + # Validate against scene data + loc_warnings = validate_location_bible(loc_bible, scenes) + for w in loc_warnings: + print(f"[L3] LOC WARNING: [{w.entity_name}] {w.message}") + + # Write Location Bible + loc_out = writer.write_named("L3", "location_bible", loc_bible) + print(f"[L3] Location Bible written: {loc_out['path']}") + + logger.finish(run_id, input_hash, loc_out["hash"], "valid", + token_usage=loc_result.token_usage) + + # ── COMBINED PRODUCTION BIBLE ──────────────────────────────────────── + production_bible = ProductionBible( + characters=valid_characters, + locations=valid_locations, + ) + bible_out = writer.write_named_raw("L3", "production_bible", production_bible.model_dump()) + print(f"[L3] Production Bible written: {bible_out['path']}") + + print(f"\n[DONE] Characters: {len(valid_characters)}, Locations: {len(valid_locations)}") + print(f"[DONE] Warnings: {len(char_warnings)} character, {len(loc_warnings)} location") + print(f"[DONE] Tokens: {total_token_usage['input']} in / {total_token_usage['output']} out") + + return BiblePipelineResult( + success=True, + characters_count=len(valid_characters), + locations_count=len(valid_locations), + character_warnings=char_warnings, + location_warnings=loc_warnings, + ) + + +def _load_l2_scenes(project_name: str, output_dir: str) -> list[dict]: + """Load all latest L2 scene outputs for a project.""" + l2_dir = os.path.join(output_dir, project_name, "L2") + if not os.path.exists(l2_dir): + return [] + + latest_path = os.path.join(l2_dir, "latest.json") + if not os.path.exists(latest_path): + return [] + + with open(latest_path, "r", encoding="utf-8") as f: + manifest = json.load(f) + + scenes = [] + for scene_key, version in sorted(manifest.items(), key=lambda x: int(x[0]) if x[0].isdigit() else 0): + if not scene_key.isdigit(): + continue + scene_id = int(scene_key) + filepath = os.path.join(l2_dir, f"scene_{scene_id:03d}_v{version}.json") + if os.path.exists(filepath): + with open(filepath, "r", encoding="utf-8") as f: + scene_data = json.load(f) + scenes.append(scene_data) + + return scenes diff --git a/src/bible/validator.py b/src/bible/validator.py new file mode 100644 index 0000000..9f56bcd --- /dev/null +++ b/src/bible/validator.py @@ -0,0 +1,191 @@ +"""Production Bible validator — validates Character and Location bibles against scene data.""" + +from dataclasses import dataclass +from typing import Literal +from difflib import SequenceMatcher +from src.schemas.production_bible import Character, Location, CharacterBible, LocationBible + + +@dataclass +class BibleValidationWarning: + type: Literal[ + "duplicate_character", + "duplicate_location", + "missing_required_field", + "scene_reference_broken", + "character_not_in_scenes", + "location_not_in_scenes", + "unsupported_detail", + "unknown_value", + ] + entity_name: str + message: str + + +def validate_character_bible( + bible: CharacterBible, + scenes: list[dict], +) -> list[BibleValidationWarning]: + """Validate Character Bible against source scene data.""" + warnings: list[BibleValidationWarning] = [] + + # Collect all characters and locations mentioned in scenes + scene_characters: dict[int, set[str]] = {} + all_scene_characters: set[str] = set() + for s in scenes: + sn = s["scene_number"] + chars = set(c.upper() for c in s.get("characters_present", [])) + scene_characters[sn] = chars + all_scene_characters.update(chars) + + scene_numbers = {s["scene_number"] for s in scenes} + + # 1. Duplicate character detection (fuzzy) + names = [c.canonical_name for c in bible.characters] + for i, name_a in enumerate(names): + for name_b in names[i + 1:]: + ratio = SequenceMatcher(None, name_a.upper(), name_b.upper()).ratio() + if ratio > 0.8: + warnings.append(BibleValidationWarning( + type="duplicate_character", + entity_name=name_a, + message=f"Possible duplicate: '{name_a}' and '{name_b}' (similarity: {ratio:.0%})", + )) + + for char in bible.characters: + # 2. Broken scene references + for sn in char.scenes_present: + if sn not in scene_numbers: + warnings.append(BibleValidationWarning( + type="scene_reference_broken", + entity_name=char.canonical_name, + message=f"scenes_present references scene {sn} which does not exist in L2 output", + )) + + # 3. Character not found in any scene's characters_present + char_upper = char.canonical_name.upper() + found_in_any = False + for chars_set in scene_characters.values(): + # Check if canonical name or any alias matches + if char_upper in chars_set: + found_in_any = True + break + for alias in char.aliases: + if alias.upper() in chars_set: + found_in_any = True + break + # Also check partial match (e.g. "MARA" in "MARA REYES") + for scene_char in chars_set: + if char_upper in scene_char or scene_char in char_upper: + found_in_any = True + break + if found_in_any: + break + if not found_in_any: + warnings.append(BibleValidationWarning( + type="character_not_in_scenes", + entity_name=char.canonical_name, + message=f"Character '{char.canonical_name}' not found in any scene's characters_present — possible hallucination", + )) + + # 4. UNKNOWN values + for field_name in ["physical_description", "personality_summary", "arc_summary", "reference_prompt"]: + val = getattr(char, field_name) + if val == "UNKNOWN": + warnings.append(BibleValidationWarning( + type="unknown_value", + entity_name=char.canonical_name, + message=f"Field '{field_name}' is UNKNOWN", + )) + + # 5. first_appearance consistency + if char.scenes_present and char.first_appearance != min(char.scenes_present): + warnings.append(BibleValidationWarning( + type="unsupported_detail", + entity_name=char.canonical_name, + message=f"first_appearance ({char.first_appearance}) doesn't match min of scenes_present ({min(char.scenes_present)})", + )) + + # 6. Relationship evidence scenes exist + for rel in char.relationships: + for sn in rel.evidence_scenes: + if sn not in scene_numbers: + warnings.append(BibleValidationWarning( + type="scene_reference_broken", + entity_name=char.canonical_name, + message=f"Relationship with '{rel.character}' references scene {sn} which doesn't exist", + )) + + return warnings + + +def validate_location_bible( + bible: LocationBible, + scenes: list[dict], +) -> list[BibleValidationWarning]: + """Validate Location Bible against source scene data.""" + warnings: list[BibleValidationWarning] = [] + + scene_locations: dict[int, str] = {} + all_scene_locations: set[str] = set() + for s in scenes: + loc = s.get("location", "").upper() + scene_locations[s["scene_number"]] = loc + if loc: + all_scene_locations.add(loc) + + scene_numbers = {s["scene_number"] for s in scenes} + + # 1. Duplicate location detection (fuzzy) + names = [loc.canonical_name for loc in bible.locations] + for i, name_a in enumerate(names): + for name_b in names[i + 1:]: + ratio = SequenceMatcher(None, name_a.upper(), name_b.upper()).ratio() + if ratio > 0.8: + warnings.append(BibleValidationWarning( + type="duplicate_location", + entity_name=name_a, + message=f"Possible duplicate: '{name_a}' and '{name_b}' (similarity: {ratio:.0%})", + )) + + for loc in bible.locations: + # 2. Broken scene references + for sn in loc.scenes_used: + if sn not in scene_numbers: + warnings.append(BibleValidationWarning( + type="scene_reference_broken", + entity_name=loc.canonical_name, + message=f"scenes_used references scene {sn} which does not exist in L2 output", + )) + + # 3. Location not found in any scene + loc_upper = loc.canonical_name.upper() + found = False + for scene_loc in all_scene_locations: + if loc_upper in scene_loc or scene_loc in loc_upper: + found = True + break + for variant in loc.variants: + if variant.upper() in scene_loc or scene_loc in variant.upper(): + found = True + break + if found: + break + if not found: + warnings.append(BibleValidationWarning( + type="location_not_in_scenes", + entity_name=loc.canonical_name, + message=f"Location '{loc.canonical_name}' not found in any scene — possible hallucination", + )) + + # 4. UNKNOWN values + for field_name in ["description", "reference_prompt"]: + val = getattr(loc, field_name) + if val == "UNKNOWN": + warnings.append(BibleValidationWarning( + type="unknown_value", + entity_name=loc.canonical_name, + message=f"Field '{field_name}' is UNKNOWN", + )) + + return warnings diff --git a/src/execution/output_writer.py b/src/execution/output_writer.py index ca2dacb..896399f 100644 --- a/src/execution/output_writer.py +++ b/src/execution/output_writer.py @@ -11,6 +11,52 @@ class OutputWriter: self.project_name = project_name self.output_dir = output_dir + def write_named(self, layer_id: str, name: str, data: BaseModel) -> dict: + """Write a named layer output (e.g. 'character_bible') to a versioned JSON file.""" + layer_dir = os.path.join(self.output_dir, self.project_name, layer_id) + os.makedirs(layer_dir, exist_ok=True) + + version = self._next_version(layer_dir, name) + data_dict = data.model_dump() + data_json = json.dumps(data_dict, indent=2, ensure_ascii=False) + data_hash = hashlib.sha256(data_json.encode()).hexdigest() + + filename = f"{name}_v{version}.json" + filepath = os.path.join(layer_dir, filename) + with open(filepath, "w", encoding="utf-8") as f: + f.write(data_json) + + self._update_latest(layer_dir, name, version) + return {"path": filepath, "version": version, "hash": f"sha256:{data_hash}"} + + def write_named_raw(self, layer_id: str, name: str, data: dict) -> dict: + """Write a named raw dict to a versioned JSON file.""" + layer_dir = os.path.join(self.output_dir, self.project_name, layer_id) + os.makedirs(layer_dir, exist_ok=True) + + version = self._next_version(layer_dir, name) + data_json = json.dumps(data, indent=2, ensure_ascii=False) + data_hash = hashlib.sha256(data_json.encode()).hexdigest() + + filename = f"{name}_v{version}.json" + filepath = os.path.join(layer_dir, filename) + with open(filepath, "w", encoding="utf-8") as f: + f.write(data_json) + + self._update_latest(layer_dir, name, version) + return {"path": filepath, "version": version, "hash": f"sha256:{data_hash}"} + + def _update_latest(self, layer_dir: str, key: str | int | None, version: int): + """Update the latest.json manifest.""" + latest_path = os.path.join(layer_dir, "latest.json") + manifest = {} + if os.path.exists(latest_path): + with open(latest_path, "r", encoding="utf-8") as f: + manifest = json.load(f) + manifest[str(key) if key is not None else "output"] = version + with open(latest_path, "w", encoding="utf-8") as f: + json.dump(manifest, f, indent=2) + def write(self, layer_id: str, scene_id: int | None, data: BaseModel) -> dict: """Write a layer output to a versioned JSON file. @@ -97,16 +143,4 @@ class OutputWriter: version += 1 return version - def _update_latest(self, layer_dir: str, scene_id: int | None, version: int): - """Update the latest.json manifest.""" - latest_path = os.path.join(layer_dir, "latest.json") - manifest = {} - if os.path.exists(latest_path): - with open(latest_path, "r", encoding="utf-8") as f: - manifest = json.load(f) - - key = str(scene_id) if scene_id is not None else "output" - manifest[key] = version - - with open(latest_path, "w", encoding="utf-8") as f: - json.dump(manifest, f, indent=2) + # Note: _update_latest is defined above with the named writer methods diff --git a/src/prompts/L3_character_bible_v1.json b/src/prompts/L3_character_bible_v1.json new file mode 100644 index 0000000..0863a64 --- /dev/null +++ b/src/prompts/L3_character_bible_v1.json @@ -0,0 +1,20 @@ +{ + "contract_id": "L3_character_bible_v1", + "layer": "L3", + "version": 1, + "purpose": "Synthesize a canonical Character Bible from per-scene extraction data", + "required_output_schema": "CharacterBible", + "forbidden_behaviors": [ + "Do not invent physical descriptions not grounded in the scene data", + "Do not invent personality traits not supported by scene evidence", + "Do not invent relationships not demonstrated in the scenes", + "Do not merge characters who are clearly different people", + "Do not split one character into multiple entries", + "Do not fabricate wardrobe details not present in wardrobe_clues", + "Do not fabricate emotional states not supported by emotional_tone or action_summary", + "Do not guess — if information is not available, use UNKNOWN" + ], + "system_prompt": "You are a production bible compiler. Your job is to read structured per-scene extraction data from a screenplay and synthesize a canonical Character Bible.\n\nYou will receive a JSON array of scene objects. Each scene contains: characters_present, new_characters_introduced, wardrobe_clues, emotional_tone, action_summary, dialogue_summary, continuity_notes, and other fields.\n\nFor each unique character across all scenes, produce a canonical entry.\n\nReturn a JSON object with key \"characters\" containing an array of character objects.\n\nEach character object MUST have ALL of these fields:\n- canonical_name (string): The primary name used in the script. Use the most complete form (e.g. \"MARA REYES\" not \"MARA\")\n- aliases (string[]): Any alternate forms, nicknames, or shortened names found in the data\n- first_appearance (int): Scene number where the character first appears\n- scenes_present (int[]): All scene numbers where the character is present\n- physical_description (string): Physical appearance ONLY from scene data. Use \"UNKNOWN\" if not described.\n- personality_summary (string): Personality and role ONLY from scene evidence. Use \"UNKNOWN\" if not clear.\n- arc_summary (string): Character arc derived from scene-by-scene progression. Cite scene numbers.\n- relationships (array of {character: string, nature: string, evidence_scenes: int[]}): Only relationships demonstrated in scenes\n- wardrobe_states (array of {scene_range: int[], description: string}): Only from wardrobe_clues data\n- emotional_arc (array of {scene: int, state: string}): Per-scene emotional state from emotional_tone and action_summary\n- reference_prompt (string): A visual description seed for image generation, using ONLY confirmed physical and wardrobe details\n\nRULES:\n- Do not invent physical descriptions not grounded in the scene data\n- Do not invent personality traits not supported by scene evidence\n- Do not invent relationships not demonstrated in the scenes\n- Do not fabricate wardrobe or emotional details\n- If information is uncertain or absent, use \"UNKNOWN\" — never guess\n- Deduplicate characters carefully: same person appearing as \"MARA\" and \"MARA REYES\" is one entry\n- Return ONLY the JSON object, no additional text", + "user_prompt_template": "Build the Character Bible from these scene extractions:\n\n{{scenes_json}}", + "max_output_tokens": 8000 +} diff --git a/src/prompts/L3_location_bible_v1.json b/src/prompts/L3_location_bible_v1.json new file mode 100644 index 0000000..75dabf6 --- /dev/null +++ b/src/prompts/L3_location_bible_v1.json @@ -0,0 +1,18 @@ +{ + "contract_id": "L3_location_bible_v1", + "layer": "L3", + "version": 1, + "purpose": "Synthesize a canonical Location Bible from per-scene extraction data", + "required_output_schema": "LocationBible", + "forbidden_behaviors": [ + "Do not invent location details not grounded in the scene data", + "Do not invent notable features not described in the scenes", + "Do not merge locations that are clearly different places", + "Do not split one location into multiple entries", + "Do not fabricate mood associations not supported by emotional_tone", + "Do not guess — if information is not available, use UNKNOWN" + ], + "system_prompt": "You are a production bible compiler. Your job is to read structured per-scene extraction data from a screenplay and synthesize a canonical Location Bible.\n\nYou will receive a JSON array of scene objects. Each scene contains: scene_heading, location, time_of_day, int_ext, visual_beats, emotional_tone, action_summary, and other fields.\n\nFor each unique location across all scenes, produce a canonical entry.\n\nReturn a JSON object with key \"locations\" containing an array of location objects.\n\nEach location object MUST have ALL of these fields:\n- canonical_name (string): The primary location name. Normalize to a consistent form.\n- variants (string[]): Any alternate spellings or forms found in scene headings\n- description (string): Visual and spatial description ONLY from scene data (visual_beats, action lines). Use \"UNKNOWN\" if not described.\n- type (string): One of INTERIOR, EXTERIOR, BOTH, or UNKNOWN. Derived from int_ext field across scenes.\n- scenes_used (int[]): All scene numbers where this location appears\n- time_of_day_variants (string[]): All time_of_day values this location appears in\n- notable_features (string[]): Set elements, objects, or spatial features mentioned in scenes. Only from scene data.\n- mood_associations (string[]): Moods associated with this location from emotional_tone. Only from scene data.\n- reference_prompt (string): A visual description seed for image generation, using ONLY confirmed visual details from scenes\n\nRULES:\n- Do not invent location details not grounded in the scene data\n- Do not invent notable features not described in the scenes\n- Deduplicate locations carefully: \"SERVER ROOM\" appearing in multiple scenes is one entry\n- If a location only appears once, still create an entry\n- If information is uncertain or absent, use \"UNKNOWN\" — never guess\n- Return ONLY the JSON object, no additional text", + "user_prompt_template": "Build the Location Bible from these scene extractions:\n\n{{scenes_json}}", + "max_output_tokens": 4000 +} diff --git a/src/schemas/production_bible.py b/src/schemas/production_bible.py index 07eae25..cd3156f 100644 --- a/src/schemas/production_bible.py +++ b/src/schemas/production_bible.py @@ -1,12 +1,12 @@ -"""Production Bible schemas — Layer 3. Built in Phase 2, defined now for contract stability.""" +"""Production Bible schemas — Layer 3. Character Bible + Location Bible.""" -from typing import Optional from pydantic import BaseModel class Relationship(BaseModel): character: str nature: str + evidence_scenes: list[int] # which scenes support this relationship class WardrobeState(BaseModel): @@ -20,63 +20,39 @@ class EmotionalState(BaseModel): class Character(BaseModel): - name: str + canonical_name: str aliases: list[str] - description: str - arc_summary: str first_appearance: int scenes_present: list[int] + physical_description: str # grounded in scene data only; "UNKNOWN" if not available + personality_summary: str # grounded in scene data only; "UNKNOWN" if not available + arc_summary: str # derived from scene-by-scene evidence relationships: list[Relationship] wardrobe_states: list[WardrobeState] emotional_arc: list[EmotionalState] - reference_prompt: str + reference_prompt: str # seed for image gen, grounded in known data only class Location(BaseModel): - name: str - description: str - type: str + canonical_name: str + variants: list[str] # normalized alternate names from scene headings + description: str # grounded in scene data only + type: str # INTERIOR, EXTERIOR, BOTH, UNKNOWN scenes_used: list[int] time_of_day_variants: list[str] notable_features: list[str] mood_associations: list[str] - reference_prompt: str + reference_prompt: str # seed for image gen, grounded in known data only -class Prop(BaseModel): - name: str - description: str - significance: str - scenes_present: list[int] - owner_or_association: str - state_changes: list[EmotionalState] # reuses {scene, state} shape +class CharacterBible(BaseModel): + characters: list[Character] -class WardrobeEntry(BaseModel): - character: str - scene_range: list[int] - description: str - change_trigger: str - - -class EmotionalBeat(BaseModel): - scene: int - dominant_tone: str - tension_level: int - arc_position: str - - -class TimelineEntry(BaseModel): - scene: int - story_time: str - elapsed_since_previous: str - concurrent_with: list[int] +class LocationBible(BaseModel): + locations: list[Location] class ProductionBible(BaseModel): characters: list[Character] locations: list[Location] - props: list[Prop] - wardrobe: list[WardrobeEntry] - emotional_arc: list[EmotionalBeat] - timeline: list[TimelineEntry]