Phase 2: Production Bible — Character + Location bibles from scene data

Layer 3 implementation:
- Character Bible: canonical names, aliases, arcs, relationships, wardrobe
  states, emotional arcs, reference prompts — all grounded in scene evidence
- Location Bible: canonical names, variants, descriptions, types, features,
  mood associations, reference prompts — all grounded in scene evidence
- Combined Production Bible output for downstream layers
- Bible validator: duplicate detection, scene reference checks, hallucination
  detection, UNKNOWN field flagging
- Prompt contracts: L3_character_bible_v1, L3_location_bible_v1
- Named versioned output: character_bible_v1.json, location_bible_v1.json,
  production_bible_v1.json
- CLI: --phase 2 runs bible only, --phase omitted runs both phases
- OutputWriter: added write_named/write_named_raw for non-scene outputs

Tested on the_last_backup: 3 characters, 5 locations, 0 hallucinations,
3 warnings (UNKNOWN physical_description — correct behavior)

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
profit 2026-04-06 16:51:55 -07:00
parent 74870f7c0d
commit 17e410751c
9 changed files with 756 additions and 77 deletions

40
run.py
View File

@ -11,13 +11,14 @@ from dotenv import load_dotenv
def main(): def main():
load_dotenv() load_dotenv()
parser = argparse.ArgumentParser(description="AI Movie Production Pipeline — Phase 1") parser = argparse.ArgumentParser(description="AI Movie Production Pipeline")
parser.add_argument("--script", type=str, help="Path to .fountain script file") parser.add_argument("--script", type=str, help="Path to .fountain script file (Phase 1)")
parser.add_argument("--project", type=str, help="Project name (determines output directory)") parser.add_argument("--project", type=str, help="Project name (determines output directory)")
parser.add_argument("--phase", type=int, default=None, choices=[1, 2], help="Run specific phase only (1=ingestion+extraction, 2=bible)")
parser.add_argument("--model", type=str, default="qwen3:14b", help="Model ID (default: qwen3:14b)") parser.add_argument("--model", type=str, default="qwen3:14b", help="Model ID (default: qwen3:14b)")
parser.add_argument("--backend", type=str, default="ollama", choices=["ollama", "anthropic"], help="AI backend (default: ollama)") parser.add_argument("--backend", type=str, default="ollama", choices=["ollama", "anthropic"], help="AI backend (default: ollama)")
parser.add_argument("--ollama-url", type=str, default="http://localhost:11434", help="Ollama server URL") parser.add_argument("--ollama-url", type=str, default="http://localhost:11434", help="Ollama server URL")
parser.add_argument("--scene", type=int, default=None, help="Process only this scene number") parser.add_argument("--scene", type=int, default=None, help="Process only this scene number (Phase 1)")
parser.add_argument("--dry-run", action="store_true", help="Validate inputs only, no AI calls") parser.add_argument("--dry-run", action="store_true", help="Validate inputs only, no AI calls")
parser.add_argument("--force", action="store_true", help="Ignore cache, re-run even if unchanged") parser.add_argument("--force", action="store_true", help="Ignore cache, re-run even if unchanged")
parser.add_argument("--test", action="store_true", help="Run test suite against test_scripts/") parser.add_argument("--test", action="store_true", help="Run test suite against test_scripts/")
@ -29,14 +30,23 @@ def main():
run_tests(args.model, args.backend, args.ollama_url, args.output_dir) run_tests(args.model, args.backend, args.ollama_url, args.output_dir)
return return
if not args.script or not args.project: if not args.project:
parser.error("--script and --project are required (unless using --test)") parser.error("--project is required (unless using --test)")
api_key = os.environ.get("ANTHROPIC_API_KEY", "") api_key = os.environ.get("ANTHROPIC_API_KEY", "")
if args.backend == "anthropic" and not api_key and not args.dry_run: if args.backend == "anthropic" and not api_key and not args.dry_run:
print("ERROR: ANTHROPIC_API_KEY not set. Set it in .env or environment.") print("ERROR: ANTHROPIC_API_KEY not set. Set it in .env or environment.")
sys.exit(1) sys.exit(1)
run_phase1_flag = args.phase is None or args.phase == 1
run_phase2_flag = args.phase is None or args.phase == 2
# Phase 1: Script Ingestion + Understanding
if run_phase1_flag:
if not args.script:
if args.phase == 1 or args.phase is None:
parser.error("--script is required for Phase 1")
else:
if not os.path.exists(args.script): if not os.path.exists(args.script):
print(f"ERROR: Script file not found: {args.script}") print(f"ERROR: Script file not found: {args.script}")
sys.exit(1) sys.exit(1)
@ -57,7 +67,25 @@ def main():
) )
if not result.success: if not result.success:
print(f"\nPIPELINE FAILED: {result.stop_reason}") print(f"\nPHASE 1 FAILED: {result.stop_reason}")
sys.exit(1)
# Phase 2: Production Bible
if run_phase2_flag:
from src.bible.runner import run_phase2
bible_result = run_phase2(
project_name=args.project,
model=args.model,
backend=args.backend,
ollama_url=args.ollama_url,
api_key=api_key,
output_dir=args.output_dir,
dry_run=args.dry_run,
)
if not bible_result.success:
print(f"\nPHASE 2 FAILED: {bible_result.stop_reason}")
sys.exit(1) sys.exit(1)
print("\nPIPELINE COMPLETE") print("\nPIPELINE COMPLETE")

0
src/bible/__init__.py Normal file
View File

166
src/bible/generator.py Normal file
View File

@ -0,0 +1,166 @@
"""Production Bible generator — Layer 3.
Reads validated per-scene JSON from Layer 2 and synthesizes:
1. Character Bible
2. Location Bible
Uses AI for synthesis, code for aggregation and validation.
"""
import json
import requests
from dataclasses import dataclass
from src.schemas.production_bible import Character, Location
@dataclass
class BibleResult:
raw_data: dict
token_usage: dict
class BibleGenerationError(Exception):
pass
def generate_character_bible(
scenes: list[dict],
contract_path: str,
model: str = "qwen3:14b",
backend: str = "ollama",
ollama_url: str = "http://localhost:11434",
api_key: str = "",
) -> BibleResult:
"""Generate Character Bible from scene data."""
return _generate_bible(scenes, contract_path, "characters", model, backend, ollama_url, api_key)
def generate_location_bible(
scenes: list[dict],
contract_path: str,
model: str = "qwen3:14b",
backend: str = "ollama",
ollama_url: str = "http://localhost:11434",
api_key: str = "",
) -> BibleResult:
"""Generate Location Bible from scene data."""
return _generate_bible(scenes, contract_path, "locations", model, backend, ollama_url, api_key)
def _generate_bible(
scenes: list[dict],
contract_path: str,
expected_key: str,
model: str,
backend: str,
ollama_url: str,
api_key: str,
) -> BibleResult:
"""Generic bible generation — sends scenes to AI with a prompt contract."""
with open(contract_path, "r", encoding="utf-8") as f:
contract = json.load(f)
scenes_json = json.dumps(scenes, indent=2, ensure_ascii=False)
user_prompt = contract["user_prompt_template"].replace("{{scenes_json}}", scenes_json)
if backend == "ollama":
response_text, token_usage = _call_ollama(
model, contract["system_prompt"], user_prompt,
contract["max_output_tokens"], ollama_url,
)
elif backend == "anthropic":
response_text, token_usage = _call_anthropic(
model, contract["system_prompt"], user_prompt,
contract["max_output_tokens"], api_key,
)
else:
raise BibleGenerationError(f"Unknown backend: {backend}")
# Parse JSON
try:
parsed = json.loads(response_text)
except json.JSONDecodeError as e:
cleaned = _extract_json(response_text)
if cleaned:
try:
parsed = json.loads(cleaned)
except json.JSONDecodeError:
raise BibleGenerationError(
f"AI response is not valid JSON: {e}\nResponse:\n{response_text[:500]}"
) from e
else:
raise BibleGenerationError(
f"AI response is not valid JSON: {e}\nResponse:\n{response_text[:500]}"
) from e
# Extract the expected key
if isinstance(parsed, dict) and expected_key in parsed:
data = parsed
elif isinstance(parsed, list):
data = {expected_key: parsed}
else:
raise BibleGenerationError(
f"Unexpected structure: expected dict with '{expected_key}' key, got {type(parsed)}"
)
return BibleResult(raw_data=data, token_usage=token_usage)
def _call_ollama(model, system_prompt, user_prompt, max_tokens, ollama_url):
payload = {
"model": model,
"messages": [
{"role": "system", "content": system_prompt},
{"role": "user", "content": user_prompt},
],
"stream": False,
"options": {
"temperature": 0,
"num_predict": max_tokens,
"num_ctx": 32768,
},
"format": "json",
}
try:
resp = requests.post(f"{ollama_url}/api/chat", json=payload, timeout=600)
resp.raise_for_status()
except requests.RequestException as e:
raise BibleGenerationError(f"Ollama API call failed: {e}") from e
data = resp.json()
text = data.get("message", {}).get("content", "")
usage = {
"input": data.get("prompt_eval_count", 0),
"output": data.get("eval_count", 0),
}
if not text:
raise BibleGenerationError("Ollama returned empty response")
return text, usage
def _call_anthropic(model, system_prompt, user_prompt, max_tokens, api_key):
from anthropic import Anthropic
client = Anthropic(api_key=api_key)
response = client.messages.create(
model=model,
max_tokens=max_tokens,
temperature=0,
system=system_prompt,
messages=[{"role": "user", "content": user_prompt}],
)
return response.content[0].text, {
"input": response.usage.input_tokens,
"output": response.usage.output_tokens,
}
def _extract_json(text):
if "```json" in text:
start = text.index("```json") + 7
end = text.index("```", start)
return text[start:end].strip()
if "```" in text:
start = text.index("```") + 3
end = text.index("```", start)
return text[start:end].strip()
return None

246
src/bible/runner.py Normal file
View File

@ -0,0 +1,246 @@
"""Layer 3 runner — reads L2 scene outputs, generates and validates Production Bible."""
import hashlib
import json
import os
from dataclasses import dataclass, field
from src.bible.generator import generate_character_bible, generate_location_bible, BibleGenerationError
from src.bible.validator import (
validate_character_bible, validate_location_bible, BibleValidationWarning,
)
from src.schemas.production_bible import (
Character, Location, CharacterBible, LocationBible, ProductionBible,
)
from src.validators.schema_validator import validate, ValidationResult
from src.logging.layer_logger import LayerLogger
from src.execution.output_writer import OutputWriter
from src.execution.retry import execute_with_retry, FailureRecord
@dataclass
class BiblePipelineResult:
success: bool
characters_count: int = 0
locations_count: int = 0
character_warnings: list[BibleValidationWarning] = field(default_factory=list)
location_warnings: list[BibleValidationWarning] = field(default_factory=list)
stop_reason: str | None = None
def run_phase2(
project_name: str,
model: str = "qwen3:14b",
backend: str = "ollama",
ollama_url: str = "http://localhost:11434",
api_key: str = "",
output_dir: str = "output",
dry_run: bool = False,
) -> BiblePipelineResult:
"""Run Phase 2: generate Production Bible from L2 scene outputs.
Args:
project_name: Project name (must have L2 outputs).
model: Model ID.
backend: "ollama" or "anthropic".
ollama_url: Ollama server URL.
api_key: API key (for anthropic backend).
output_dir: Base output directory.
dry_run: Validate L2 inputs only, no AI calls.
Returns:
BiblePipelineResult with counts and warnings.
"""
logger = LayerLogger(project_name, output_dir)
writer = OutputWriter(project_name, output_dir)
prompts_dir = os.path.join(os.path.dirname(os.path.dirname(os.path.dirname(__file__))), "src", "prompts")
char_contract = os.path.join(prompts_dir, "L3_character_bible_v1.json")
loc_contract = os.path.join(prompts_dir, "L3_location_bible_v1.json")
# ── READ L2 OUTPUTS ─────────────────────────────────────────────────
print(f"[L3] Reading L2 scene outputs for project: {project_name}")
scenes = _load_l2_scenes(project_name, output_dir)
if not scenes:
print("[L3] STOP: No L2 scene outputs found. Run Phase 1 first.")
return BiblePipelineResult(success=False, stop_reason="No L2 scene outputs found")
print(f"[L3] Loaded {len(scenes)} scenes from L2")
if dry_run:
print(f"[DRY RUN] {len(scenes)} scenes available. Would generate Character + Location bibles.")
return BiblePipelineResult(success=True, characters_count=0, locations_count=0)
total_token_usage = {"input": 0, "output": 0}
# ── CHARACTER BIBLE ──────────────────────────────────────────────────
print("[L3] Generating Character Bible...")
run_id = logger.start("L3", scene_id=None)
scenes_json = json.dumps(scenes, ensure_ascii=False)
input_hash = f"sha256:{hashlib.sha256(scenes_json.encode()).hexdigest()}"
def do_char_bible(data):
return generate_character_bible(data, char_contract, model, backend, ollama_url, api_key)
char_result = execute_with_retry(
fn=do_char_bible,
input_data=scenes,
layer_id="L3",
scene_id=None,
)
if isinstance(char_result, FailureRecord):
logger.finish(run_id, input_hash, None, "failed",
failure_state=char_result.error, retry_count=len(char_result.attempts))
print(f"[L3] Character Bible: FAILED after {len(char_result.attempts)} attempts")
return BiblePipelineResult(success=False, stop_reason=f"Character Bible generation failed: {char_result.error}")
total_token_usage["input"] += char_result.token_usage["input"]
total_token_usage["output"] += char_result.token_usage["output"]
# Validate each character against schema
raw_characters = char_result.raw_data.get("characters", [])
valid_characters: list[Character] = []
char_schema_errors: list[str] = []
for i, raw_char in enumerate(raw_characters):
result = validate(raw_char, Character)
if result.status == "failed":
char_schema_errors.append(f"Character {i}: {result.errors}")
print(f"[L3] Character {raw_char.get('canonical_name', f'#{i}')}: SCHEMA FAILED — {result.errors}")
else:
valid_characters.append(result.data)
status_label = "FLAGGED" if result.status == "flagged" else "valid"
print(f"[L3] Character: {result.data.canonical_name}{status_label}")
if not valid_characters:
logger.finish(run_id, input_hash, None, "failed",
failure_state=f"All characters failed schema validation: {char_schema_errors}")
return BiblePipelineResult(success=False, stop_reason="All characters failed schema validation")
char_bible = CharacterBible(characters=valid_characters)
# Validate against scene data
char_warnings = validate_character_bible(char_bible, scenes)
for w in char_warnings:
print(f"[L3] CHAR WARNING: [{w.entity_name}] {w.message}")
# Write Character Bible
char_out = writer.write_named("L3", "character_bible", char_bible)
print(f"[L3] Character Bible written: {char_out['path']}")
logger.finish(run_id, input_hash, char_out["hash"], "valid",
token_usage=char_result.token_usage)
# ── LOCATION BIBLE ───────────────────────────────────────────────────
print("[L3] Generating Location Bible...")
run_id = logger.start("L3_loc", scene_id=None)
def do_loc_bible(data):
return generate_location_bible(data, loc_contract, model, backend, ollama_url, api_key)
loc_result = execute_with_retry(
fn=do_loc_bible,
input_data=scenes,
layer_id="L3_loc",
scene_id=None,
)
if isinstance(loc_result, FailureRecord):
logger.finish(run_id, input_hash, None, "failed",
failure_state=loc_result.error, retry_count=len(loc_result.attempts))
print(f"[L3] Location Bible: FAILED after {len(loc_result.attempts)} attempts")
return BiblePipelineResult(
success=False,
characters_count=len(valid_characters),
character_warnings=char_warnings,
stop_reason=f"Location Bible generation failed: {loc_result.error}",
)
total_token_usage["input"] += loc_result.token_usage["input"]
total_token_usage["output"] += loc_result.token_usage["output"]
# Validate each location against schema
raw_locations = loc_result.raw_data.get("locations", [])
valid_locations: list[Location] = []
for i, raw_loc in enumerate(raw_locations):
result = validate(raw_loc, Location)
if result.status == "failed":
print(f"[L3] Location {raw_loc.get('canonical_name', f'#{i}')}: SCHEMA FAILED — {result.errors}")
else:
valid_locations.append(result.data)
status_label = "FLAGGED" if result.status == "flagged" else "valid"
print(f"[L3] Location: {result.data.canonical_name}{status_label}")
if not valid_locations:
logger.finish(run_id, input_hash, None, "failed",
failure_state="All locations failed schema validation")
return BiblePipelineResult(
success=False,
characters_count=len(valid_characters),
character_warnings=char_warnings,
stop_reason="All locations failed schema validation",
)
loc_bible = LocationBible(locations=valid_locations)
# Validate against scene data
loc_warnings = validate_location_bible(loc_bible, scenes)
for w in loc_warnings:
print(f"[L3] LOC WARNING: [{w.entity_name}] {w.message}")
# Write Location Bible
loc_out = writer.write_named("L3", "location_bible", loc_bible)
print(f"[L3] Location Bible written: {loc_out['path']}")
logger.finish(run_id, input_hash, loc_out["hash"], "valid",
token_usage=loc_result.token_usage)
# ── COMBINED PRODUCTION BIBLE ────────────────────────────────────────
production_bible = ProductionBible(
characters=valid_characters,
locations=valid_locations,
)
bible_out = writer.write_named_raw("L3", "production_bible", production_bible.model_dump())
print(f"[L3] Production Bible written: {bible_out['path']}")
print(f"\n[DONE] Characters: {len(valid_characters)}, Locations: {len(valid_locations)}")
print(f"[DONE] Warnings: {len(char_warnings)} character, {len(loc_warnings)} location")
print(f"[DONE] Tokens: {total_token_usage['input']} in / {total_token_usage['output']} out")
return BiblePipelineResult(
success=True,
characters_count=len(valid_characters),
locations_count=len(valid_locations),
character_warnings=char_warnings,
location_warnings=loc_warnings,
)
def _load_l2_scenes(project_name: str, output_dir: str) -> list[dict]:
"""Load all latest L2 scene outputs for a project."""
l2_dir = os.path.join(output_dir, project_name, "L2")
if not os.path.exists(l2_dir):
return []
latest_path = os.path.join(l2_dir, "latest.json")
if not os.path.exists(latest_path):
return []
with open(latest_path, "r", encoding="utf-8") as f:
manifest = json.load(f)
scenes = []
for scene_key, version in sorted(manifest.items(), key=lambda x: int(x[0]) if x[0].isdigit() else 0):
if not scene_key.isdigit():
continue
scene_id = int(scene_key)
filepath = os.path.join(l2_dir, f"scene_{scene_id:03d}_v{version}.json")
if os.path.exists(filepath):
with open(filepath, "r", encoding="utf-8") as f:
scene_data = json.load(f)
scenes.append(scene_data)
return scenes

191
src/bible/validator.py Normal file
View File

@ -0,0 +1,191 @@
"""Production Bible validator — validates Character and Location bibles against scene data."""
from dataclasses import dataclass
from typing import Literal
from difflib import SequenceMatcher
from src.schemas.production_bible import Character, Location, CharacterBible, LocationBible
@dataclass
class BibleValidationWarning:
type: Literal[
"duplicate_character",
"duplicate_location",
"missing_required_field",
"scene_reference_broken",
"character_not_in_scenes",
"location_not_in_scenes",
"unsupported_detail",
"unknown_value",
]
entity_name: str
message: str
def validate_character_bible(
bible: CharacterBible,
scenes: list[dict],
) -> list[BibleValidationWarning]:
"""Validate Character Bible against source scene data."""
warnings: list[BibleValidationWarning] = []
# Collect all characters and locations mentioned in scenes
scene_characters: dict[int, set[str]] = {}
all_scene_characters: set[str] = set()
for s in scenes:
sn = s["scene_number"]
chars = set(c.upper() for c in s.get("characters_present", []))
scene_characters[sn] = chars
all_scene_characters.update(chars)
scene_numbers = {s["scene_number"] for s in scenes}
# 1. Duplicate character detection (fuzzy)
names = [c.canonical_name for c in bible.characters]
for i, name_a in enumerate(names):
for name_b in names[i + 1:]:
ratio = SequenceMatcher(None, name_a.upper(), name_b.upper()).ratio()
if ratio > 0.8:
warnings.append(BibleValidationWarning(
type="duplicate_character",
entity_name=name_a,
message=f"Possible duplicate: '{name_a}' and '{name_b}' (similarity: {ratio:.0%})",
))
for char in bible.characters:
# 2. Broken scene references
for sn in char.scenes_present:
if sn not in scene_numbers:
warnings.append(BibleValidationWarning(
type="scene_reference_broken",
entity_name=char.canonical_name,
message=f"scenes_present references scene {sn} which does not exist in L2 output",
))
# 3. Character not found in any scene's characters_present
char_upper = char.canonical_name.upper()
found_in_any = False
for chars_set in scene_characters.values():
# Check if canonical name or any alias matches
if char_upper in chars_set:
found_in_any = True
break
for alias in char.aliases:
if alias.upper() in chars_set:
found_in_any = True
break
# Also check partial match (e.g. "MARA" in "MARA REYES")
for scene_char in chars_set:
if char_upper in scene_char or scene_char in char_upper:
found_in_any = True
break
if found_in_any:
break
if not found_in_any:
warnings.append(BibleValidationWarning(
type="character_not_in_scenes",
entity_name=char.canonical_name,
message=f"Character '{char.canonical_name}' not found in any scene's characters_present — possible hallucination",
))
# 4. UNKNOWN values
for field_name in ["physical_description", "personality_summary", "arc_summary", "reference_prompt"]:
val = getattr(char, field_name)
if val == "UNKNOWN":
warnings.append(BibleValidationWarning(
type="unknown_value",
entity_name=char.canonical_name,
message=f"Field '{field_name}' is UNKNOWN",
))
# 5. first_appearance consistency
if char.scenes_present and char.first_appearance != min(char.scenes_present):
warnings.append(BibleValidationWarning(
type="unsupported_detail",
entity_name=char.canonical_name,
message=f"first_appearance ({char.first_appearance}) doesn't match min of scenes_present ({min(char.scenes_present)})",
))
# 6. Relationship evidence scenes exist
for rel in char.relationships:
for sn in rel.evidence_scenes:
if sn not in scene_numbers:
warnings.append(BibleValidationWarning(
type="scene_reference_broken",
entity_name=char.canonical_name,
message=f"Relationship with '{rel.character}' references scene {sn} which doesn't exist",
))
return warnings
def validate_location_bible(
bible: LocationBible,
scenes: list[dict],
) -> list[BibleValidationWarning]:
"""Validate Location Bible against source scene data."""
warnings: list[BibleValidationWarning] = []
scene_locations: dict[int, str] = {}
all_scene_locations: set[str] = set()
for s in scenes:
loc = s.get("location", "").upper()
scene_locations[s["scene_number"]] = loc
if loc:
all_scene_locations.add(loc)
scene_numbers = {s["scene_number"] for s in scenes}
# 1. Duplicate location detection (fuzzy)
names = [loc.canonical_name for loc in bible.locations]
for i, name_a in enumerate(names):
for name_b in names[i + 1:]:
ratio = SequenceMatcher(None, name_a.upper(), name_b.upper()).ratio()
if ratio > 0.8:
warnings.append(BibleValidationWarning(
type="duplicate_location",
entity_name=name_a,
message=f"Possible duplicate: '{name_a}' and '{name_b}' (similarity: {ratio:.0%})",
))
for loc in bible.locations:
# 2. Broken scene references
for sn in loc.scenes_used:
if sn not in scene_numbers:
warnings.append(BibleValidationWarning(
type="scene_reference_broken",
entity_name=loc.canonical_name,
message=f"scenes_used references scene {sn} which does not exist in L2 output",
))
# 3. Location not found in any scene
loc_upper = loc.canonical_name.upper()
found = False
for scene_loc in all_scene_locations:
if loc_upper in scene_loc or scene_loc in loc_upper:
found = True
break
for variant in loc.variants:
if variant.upper() in scene_loc or scene_loc in variant.upper():
found = True
break
if found:
break
if not found:
warnings.append(BibleValidationWarning(
type="location_not_in_scenes",
entity_name=loc.canonical_name,
message=f"Location '{loc.canonical_name}' not found in any scene — possible hallucination",
))
# 4. UNKNOWN values
for field_name in ["description", "reference_prompt"]:
val = getattr(loc, field_name)
if val == "UNKNOWN":
warnings.append(BibleValidationWarning(
type="unknown_value",
entity_name=loc.canonical_name,
message=f"Field '{field_name}' is UNKNOWN",
))
return warnings

View File

@ -11,6 +11,52 @@ class OutputWriter:
self.project_name = project_name self.project_name = project_name
self.output_dir = output_dir self.output_dir = output_dir
def write_named(self, layer_id: str, name: str, data: BaseModel) -> dict:
"""Write a named layer output (e.g. 'character_bible') to a versioned JSON file."""
layer_dir = os.path.join(self.output_dir, self.project_name, layer_id)
os.makedirs(layer_dir, exist_ok=True)
version = self._next_version(layer_dir, name)
data_dict = data.model_dump()
data_json = json.dumps(data_dict, indent=2, ensure_ascii=False)
data_hash = hashlib.sha256(data_json.encode()).hexdigest()
filename = f"{name}_v{version}.json"
filepath = os.path.join(layer_dir, filename)
with open(filepath, "w", encoding="utf-8") as f:
f.write(data_json)
self._update_latest(layer_dir, name, version)
return {"path": filepath, "version": version, "hash": f"sha256:{data_hash}"}
def write_named_raw(self, layer_id: str, name: str, data: dict) -> dict:
"""Write a named raw dict to a versioned JSON file."""
layer_dir = os.path.join(self.output_dir, self.project_name, layer_id)
os.makedirs(layer_dir, exist_ok=True)
version = self._next_version(layer_dir, name)
data_json = json.dumps(data, indent=2, ensure_ascii=False)
data_hash = hashlib.sha256(data_json.encode()).hexdigest()
filename = f"{name}_v{version}.json"
filepath = os.path.join(layer_dir, filename)
with open(filepath, "w", encoding="utf-8") as f:
f.write(data_json)
self._update_latest(layer_dir, name, version)
return {"path": filepath, "version": version, "hash": f"sha256:{data_hash}"}
def _update_latest(self, layer_dir: str, key: str | int | None, version: int):
"""Update the latest.json manifest."""
latest_path = os.path.join(layer_dir, "latest.json")
manifest = {}
if os.path.exists(latest_path):
with open(latest_path, "r", encoding="utf-8") as f:
manifest = json.load(f)
manifest[str(key) if key is not None else "output"] = version
with open(latest_path, "w", encoding="utf-8") as f:
json.dump(manifest, f, indent=2)
def write(self, layer_id: str, scene_id: int | None, data: BaseModel) -> dict: def write(self, layer_id: str, scene_id: int | None, data: BaseModel) -> dict:
"""Write a layer output to a versioned JSON file. """Write a layer output to a versioned JSON file.
@ -97,16 +143,4 @@ class OutputWriter:
version += 1 version += 1
return version return version
def _update_latest(self, layer_dir: str, scene_id: int | None, version: int): # Note: _update_latest is defined above with the named writer methods
"""Update the latest.json manifest."""
latest_path = os.path.join(layer_dir, "latest.json")
manifest = {}
if os.path.exists(latest_path):
with open(latest_path, "r", encoding="utf-8") as f:
manifest = json.load(f)
key = str(scene_id) if scene_id is not None else "output"
manifest[key] = version
with open(latest_path, "w", encoding="utf-8") as f:
json.dump(manifest, f, indent=2)

View File

@ -0,0 +1,20 @@
{
"contract_id": "L3_character_bible_v1",
"layer": "L3",
"version": 1,
"purpose": "Synthesize a canonical Character Bible from per-scene extraction data",
"required_output_schema": "CharacterBible",
"forbidden_behaviors": [
"Do not invent physical descriptions not grounded in the scene data",
"Do not invent personality traits not supported by scene evidence",
"Do not invent relationships not demonstrated in the scenes",
"Do not merge characters who are clearly different people",
"Do not split one character into multiple entries",
"Do not fabricate wardrobe details not present in wardrobe_clues",
"Do not fabricate emotional states not supported by emotional_tone or action_summary",
"Do not guess — if information is not available, use UNKNOWN"
],
"system_prompt": "You are a production bible compiler. Your job is to read structured per-scene extraction data from a screenplay and synthesize a canonical Character Bible.\n\nYou will receive a JSON array of scene objects. Each scene contains: characters_present, new_characters_introduced, wardrobe_clues, emotional_tone, action_summary, dialogue_summary, continuity_notes, and other fields.\n\nFor each unique character across all scenes, produce a canonical entry.\n\nReturn a JSON object with key \"characters\" containing an array of character objects.\n\nEach character object MUST have ALL of these fields:\n- canonical_name (string): The primary name used in the script. Use the most complete form (e.g. \"MARA REYES\" not \"MARA\")\n- aliases (string[]): Any alternate forms, nicknames, or shortened names found in the data\n- first_appearance (int): Scene number where the character first appears\n- scenes_present (int[]): All scene numbers where the character is present\n- physical_description (string): Physical appearance ONLY from scene data. Use \"UNKNOWN\" if not described.\n- personality_summary (string): Personality and role ONLY from scene evidence. Use \"UNKNOWN\" if not clear.\n- arc_summary (string): Character arc derived from scene-by-scene progression. Cite scene numbers.\n- relationships (array of {character: string, nature: string, evidence_scenes: int[]}): Only relationships demonstrated in scenes\n- wardrobe_states (array of {scene_range: int[], description: string}): Only from wardrobe_clues data\n- emotional_arc (array of {scene: int, state: string}): Per-scene emotional state from emotional_tone and action_summary\n- reference_prompt (string): A visual description seed for image generation, using ONLY confirmed physical and wardrobe details\n\nRULES:\n- Do not invent physical descriptions not grounded in the scene data\n- Do not invent personality traits not supported by scene evidence\n- Do not invent relationships not demonstrated in the scenes\n- Do not fabricate wardrobe or emotional details\n- If information is uncertain or absent, use \"UNKNOWN\" — never guess\n- Deduplicate characters carefully: same person appearing as \"MARA\" and \"MARA REYES\" is one entry\n- Return ONLY the JSON object, no additional text",
"user_prompt_template": "Build the Character Bible from these scene extractions:\n\n{{scenes_json}}",
"max_output_tokens": 8000
}

View File

@ -0,0 +1,18 @@
{
"contract_id": "L3_location_bible_v1",
"layer": "L3",
"version": 1,
"purpose": "Synthesize a canonical Location Bible from per-scene extraction data",
"required_output_schema": "LocationBible",
"forbidden_behaviors": [
"Do not invent location details not grounded in the scene data",
"Do not invent notable features not described in the scenes",
"Do not merge locations that are clearly different places",
"Do not split one location into multiple entries",
"Do not fabricate mood associations not supported by emotional_tone",
"Do not guess — if information is not available, use UNKNOWN"
],
"system_prompt": "You are a production bible compiler. Your job is to read structured per-scene extraction data from a screenplay and synthesize a canonical Location Bible.\n\nYou will receive a JSON array of scene objects. Each scene contains: scene_heading, location, time_of_day, int_ext, visual_beats, emotional_tone, action_summary, and other fields.\n\nFor each unique location across all scenes, produce a canonical entry.\n\nReturn a JSON object with key \"locations\" containing an array of location objects.\n\nEach location object MUST have ALL of these fields:\n- canonical_name (string): The primary location name. Normalize to a consistent form.\n- variants (string[]): Any alternate spellings or forms found in scene headings\n- description (string): Visual and spatial description ONLY from scene data (visual_beats, action lines). Use \"UNKNOWN\" if not described.\n- type (string): One of INTERIOR, EXTERIOR, BOTH, or UNKNOWN. Derived from int_ext field across scenes.\n- scenes_used (int[]): All scene numbers where this location appears\n- time_of_day_variants (string[]): All time_of_day values this location appears in\n- notable_features (string[]): Set elements, objects, or spatial features mentioned in scenes. Only from scene data.\n- mood_associations (string[]): Moods associated with this location from emotional_tone. Only from scene data.\n- reference_prompt (string): A visual description seed for image generation, using ONLY confirmed visual details from scenes\n\nRULES:\n- Do not invent location details not grounded in the scene data\n- Do not invent notable features not described in the scenes\n- Deduplicate locations carefully: \"SERVER ROOM\" appearing in multiple scenes is one entry\n- If a location only appears once, still create an entry\n- If information is uncertain or absent, use \"UNKNOWN\" — never guess\n- Return ONLY the JSON object, no additional text",
"user_prompt_template": "Build the Location Bible from these scene extractions:\n\n{{scenes_json}}",
"max_output_tokens": 4000
}

View File

@ -1,12 +1,12 @@
"""Production Bible schemas — Layer 3. Built in Phase 2, defined now for contract stability.""" """Production Bible schemas — Layer 3. Character Bible + Location Bible."""
from typing import Optional
from pydantic import BaseModel from pydantic import BaseModel
class Relationship(BaseModel): class Relationship(BaseModel):
character: str character: str
nature: str nature: str
evidence_scenes: list[int] # which scenes support this relationship
class WardrobeState(BaseModel): class WardrobeState(BaseModel):
@ -20,63 +20,39 @@ class EmotionalState(BaseModel):
class Character(BaseModel): class Character(BaseModel):
name: str canonical_name: str
aliases: list[str] aliases: list[str]
description: str
arc_summary: str
first_appearance: int first_appearance: int
scenes_present: list[int] scenes_present: list[int]
physical_description: str # grounded in scene data only; "UNKNOWN" if not available
personality_summary: str # grounded in scene data only; "UNKNOWN" if not available
arc_summary: str # derived from scene-by-scene evidence
relationships: list[Relationship] relationships: list[Relationship]
wardrobe_states: list[WardrobeState] wardrobe_states: list[WardrobeState]
emotional_arc: list[EmotionalState] emotional_arc: list[EmotionalState]
reference_prompt: str reference_prompt: str # seed for image gen, grounded in known data only
class Location(BaseModel): class Location(BaseModel):
name: str canonical_name: str
description: str variants: list[str] # normalized alternate names from scene headings
type: str description: str # grounded in scene data only
type: str # INTERIOR, EXTERIOR, BOTH, UNKNOWN
scenes_used: list[int] scenes_used: list[int]
time_of_day_variants: list[str] time_of_day_variants: list[str]
notable_features: list[str] notable_features: list[str]
mood_associations: list[str] mood_associations: list[str]
reference_prompt: str reference_prompt: str # seed for image gen, grounded in known data only
class Prop(BaseModel): class CharacterBible(BaseModel):
name: str characters: list[Character]
description: str
significance: str
scenes_present: list[int]
owner_or_association: str
state_changes: list[EmotionalState] # reuses {scene, state} shape
class WardrobeEntry(BaseModel): class LocationBible(BaseModel):
character: str locations: list[Location]
scene_range: list[int]
description: str
change_trigger: str
class EmotionalBeat(BaseModel):
scene: int
dominant_tone: str
tension_level: int
arc_position: str
class TimelineEntry(BaseModel):
scene: int
story_time: str
elapsed_since_previous: str
concurrent_with: list[int]
class ProductionBible(BaseModel): class ProductionBible(BaseModel):
characters: list[Character] characters: list[Character]
locations: list[Location] locations: list[Location]
props: list[Prop]
wardrobe: list[WardrobeEntry]
emotional_arc: list[EmotionalBeat]
timeline: list[TimelineEntry]