profit 17e410751c Phase 2: Production Bible — Character + Location bibles from scene data
Layer 3 implementation:
- Character Bible: canonical names, aliases, arcs, relationships, wardrobe
  states, emotional arcs, reference prompts — all grounded in scene evidence
- Location Bible: canonical names, variants, descriptions, types, features,
  mood associations, reference prompts — all grounded in scene evidence
- Combined Production Bible output for downstream layers
- Bible validator: duplicate detection, scene reference checks, hallucination
  detection, UNKNOWN field flagging
- Prompt contracts: L3_character_bible_v1, L3_location_bible_v1
- Named versioned output: character_bible_v1.json, location_bible_v1.json,
  production_bible_v1.json
- CLI: --phase 2 runs bible only, --phase omitted runs both phases
- OutputWriter: added write_named/write_named_raw for non-scene outputs

Tested on the_last_backup: 3 characters, 5 locations, 0 hallucinations,
3 warnings (UNKNOWN physical_description — correct behavior)

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-06 16:51:55 -07:00

225 lines
8.5 KiB
Python

"""CLI entry point for the AI Movie Production Pipeline."""
import argparse
import json
import os
import sys
from dotenv import load_dotenv
def main():
load_dotenv()
parser = argparse.ArgumentParser(description="AI Movie Production Pipeline")
parser.add_argument("--script", type=str, help="Path to .fountain script file (Phase 1)")
parser.add_argument("--project", type=str, help="Project name (determines output directory)")
parser.add_argument("--phase", type=int, default=None, choices=[1, 2], help="Run specific phase only (1=ingestion+extraction, 2=bible)")
parser.add_argument("--model", type=str, default="qwen3:14b", help="Model ID (default: qwen3:14b)")
parser.add_argument("--backend", type=str, default="ollama", choices=["ollama", "anthropic"], help="AI backend (default: ollama)")
parser.add_argument("--ollama-url", type=str, default="http://localhost:11434", help="Ollama server URL")
parser.add_argument("--scene", type=int, default=None, help="Process only this scene number (Phase 1)")
parser.add_argument("--dry-run", action="store_true", help="Validate inputs only, no AI calls")
parser.add_argument("--force", action="store_true", help="Ignore cache, re-run even if unchanged")
parser.add_argument("--test", action="store_true", help="Run test suite against test_scripts/")
parser.add_argument("--output-dir", type=str, default="output", help="Base output directory")
args = parser.parse_args()
if args.test:
run_tests(args.model, args.backend, args.ollama_url, args.output_dir)
return
if not args.project:
parser.error("--project is required (unless using --test)")
api_key = os.environ.get("ANTHROPIC_API_KEY", "")
if args.backend == "anthropic" and not api_key and not args.dry_run:
print("ERROR: ANTHROPIC_API_KEY not set. Set it in .env or environment.")
sys.exit(1)
run_phase1_flag = args.phase is None or args.phase == 1
run_phase2_flag = args.phase is None or args.phase == 2
# Phase 1: Script Ingestion + Understanding
if run_phase1_flag:
if not args.script:
if args.phase == 1 or args.phase is None:
parser.error("--script is required for Phase 1")
else:
if not os.path.exists(args.script):
print(f"ERROR: Script file not found: {args.script}")
sys.exit(1)
from src.execution.runner import run_phase1
result = run_phase1(
script_path=args.script,
project_name=args.project,
api_key=api_key,
model=args.model,
backend=args.backend,
ollama_url=args.ollama_url,
output_dir=args.output_dir,
scene_filter=args.scene,
dry_run=args.dry_run,
force=args.force,
)
if not result.success:
print(f"\nPHASE 1 FAILED: {result.stop_reason}")
sys.exit(1)
# Phase 2: Production Bible
if run_phase2_flag:
from src.bible.runner import run_phase2
bible_result = run_phase2(
project_name=args.project,
model=args.model,
backend=args.backend,
ollama_url=args.ollama_url,
api_key=api_key,
output_dir=args.output_dir,
dry_run=args.dry_run,
)
if not bible_result.success:
print(f"\nPHASE 2 FAILED: {bible_result.stop_reason}")
sys.exit(1)
print("\nPIPELINE COMPLETE")
sys.exit(0)
def run_tests(model: str, backend: str, ollama_url: str, output_dir: str):
"""Run test suite against all scripts in test_scripts/."""
api_key = os.environ.get("ANTHROPIC_API_KEY", "")
if backend == "anthropic" and not api_key:
print("ERROR: ANTHROPIC_API_KEY required for anthropic backend tests")
sys.exit(1)
test_dir = os.path.join(os.path.dirname(__file__), "test_scripts")
expected_dir = os.path.join(test_dir, "expected")
scripts = [f for f in os.listdir(test_dir) if f.endswith(".fountain")]
if not scripts:
print("No test scripts found in test_scripts/")
sys.exit(1)
from src.execution.runner import run_phase1
all_passed = True
for script_file in sorted(scripts):
script_path = os.path.join(test_dir, script_file)
project_name = f"test_{os.path.splitext(script_file)[0]}"
expected_file = os.path.join(expected_dir, f"{os.path.splitext(script_file)[0]}_scenes.json")
print(f"\n{'='*60}")
print(f"TEST: {script_file}")
print(f"{'='*60}")
result = run_phase1(
script_path=script_path,
project_name=project_name,
api_key=api_key,
model=model,
backend=backend,
ollama_url=ollama_url,
output_dir=output_dir,
)
# Check against expected
if os.path.exists(expected_file):
with open(expected_file, "r", encoding="utf-8") as f:
expected = json.load(f)
passed = True
# Scene count check (within 20%)
expected_count = expected.get("expected_scene_count", 0)
if expected_count > 0:
deviation = abs(result.total_scenes - expected_count) / expected_count
if deviation > 0.20:
print(f" FAIL: Scene count {result.total_scenes} vs expected {expected_count} (deviation {deviation:.0%})")
passed = False
else:
print(f" PASS: Scene count {result.total_scenes} (expected {expected_count})")
# Character check — read actual output
actual_characters = _collect_characters(output_dir, project_name)
for char in expected.get("expected_characters", []):
if char.upper() not in {c.upper() for c in actual_characters}:
print(f" FAIL: Expected character '{char}' not found")
passed = False
else:
print(f" PASS: Character '{char}' found")
# Hallucination check
for char in expected.get("must_not_contain_characters", []):
if char.upper() in {c.upper() for c in actual_characters}:
print(f" FAIL: Hallucinated character '{char}' found")
passed = False
# Location check
actual_locations = _collect_locations(output_dir, project_name)
for loc in expected.get("expected_locations", []):
if loc.upper() not in {l.upper() for l in actual_locations}:
print(f" FAIL: Expected location '{loc}' not found")
passed = False
else:
print(f" PASS: Location '{loc}' found")
if passed:
print(f" RESULT: PASSED")
else:
print(f" RESULT: FAILED")
all_passed = False
else:
print(f" No expected output file — skipping regression checks")
if not result.success:
all_passed = False
print(f"\n{'='*60}")
if all_passed:
print("ALL TESTS PASSED")
sys.exit(0)
else:
print("SOME TESTS FAILED")
sys.exit(1)
def _collect_characters(output_dir: str, project_name: str) -> set[str]:
"""Collect all character names from L2 scene outputs."""
l2_dir = os.path.join(output_dir, project_name, "L2")
characters: set[str] = set()
if not os.path.exists(l2_dir):
return characters
for f in os.listdir(l2_dir):
if f.startswith("scene_") and f.endswith(".json") and f != "latest.json":
with open(os.path.join(l2_dir, f), "r", encoding="utf-8") as fh:
data = json.load(fh)
characters.update(data.get("characters_present", []))
return characters
def _collect_locations(output_dir: str, project_name: str) -> set[str]:
"""Collect all location names from L2 scene outputs."""
l2_dir = os.path.join(output_dir, project_name, "L2")
locations: set[str] = set()
if not os.path.exists(l2_dir):
return locations
for f in os.listdir(l2_dir):
if f.startswith("scene_") and f.endswith(".json") and f != "latest.json":
with open(os.path.join(l2_dir, f), "r", encoding="utf-8") as fh:
data = json.load(fh)
loc = data.get("location", "")
if loc:
locations.add(loc)
return locations
if __name__ == "__main__":
main()