ai-movie-pipeline/run.py

"""CLI entry point for the AI Movie Production Pipeline."""

import argparse
import json
import os
import sys

from dotenv import load_dotenv


def main():
    load_dotenv()

    parser = argparse.ArgumentParser(description="AI Movie Production Pipeline")
    parser.add_argument("--script", type=str, help="Path to .fountain script file (Phase 1)")
    parser.add_argument("--project", type=str, help="Project name (determines output directory)")
    parser.add_argument("--phase", type=int, default=None, choices=[1, 2], help="Run specific phase only (1=ingestion+extraction, 2=bible)")
    parser.add_argument("--model", type=str, default="qwen3:14b", help="Model ID (default: qwen3:14b)")
    parser.add_argument("--backend", type=str, default="ollama", choices=["ollama", "anthropic"], help="AI backend (default: ollama)")
    parser.add_argument("--ollama-url", type=str, default="http://localhost:11434", help="Ollama server URL")
    parser.add_argument("--scene", type=int, default=None, help="Process only this scene number (Phase 1)")
    parser.add_argument("--dry-run", action="store_true", help="Validate inputs only, no AI calls")
    parser.add_argument("--force", action="store_true", help="Ignore cache, re-run even if unchanged")
    parser.add_argument("--test", action="store_true", help="Run test suite against test_scripts/")
    parser.add_argument("--output-dir", type=str, default="output", help="Base output directory")

    args = parser.parse_args()

    if args.test:
        run_tests(args.model, args.backend, args.ollama_url, args.output_dir)
        return

    if not args.project:
        parser.error("--project is required (unless using --test)")

    api_key = os.environ.get("ANTHROPIC_API_KEY", "")
    if args.backend == "anthropic" and not api_key and not args.dry_run:
        print("ERROR: ANTHROPIC_API_KEY not set. Set it in .env or environment.")
        sys.exit(1)

    run_phase1_flag = args.phase is None or args.phase == 1
    run_phase2_flag = args.phase is None or args.phase == 2

    # Phase 1: Script Ingestion + Understanding
    if run_phase1_flag:
        if not args.script:
            if args.phase == 1 or args.phase is None:
                parser.error("--script is required for Phase 1")
        else:
            if not os.path.exists(args.script):
                print(f"ERROR: Script file not found: {args.script}")
                sys.exit(1)

            from src.execution.runner import run_phase1

            result = run_phase1(
                script_path=args.script,
                project_name=args.project,
                api_key=api_key,
                model=args.model,
                backend=args.backend,
                ollama_url=args.ollama_url,
                output_dir=args.output_dir,
                scene_filter=args.scene,
                dry_run=args.dry_run,
                force=args.force,
            )

            if not result.success:
                print(f"\nPHASE 1 FAILED: {result.stop_reason}")
                sys.exit(1)

    # Phase 2: Production Bible
    if run_phase2_flag:
        from src.bible.runner import run_phase2

        bible_result = run_phase2(
            project_name=args.project,
            model=args.model,
            backend=args.backend,
            ollama_url=args.ollama_url,
            api_key=api_key,
            output_dir=args.output_dir,
            dry_run=args.dry_run,
        )

        if not bible_result.success:
            print(f"\nPHASE 2 FAILED: {bible_result.stop_reason}")
            sys.exit(1)

    print("\nPIPELINE COMPLETE")
    sys.exit(0)


def run_tests(model: str, backend: str, ollama_url: str, output_dir: str):
    """Run test suite against all scripts in test_scripts/."""
    api_key = os.environ.get("ANTHROPIC_API_KEY", "")
    if backend == "anthropic" and not api_key:
        print("ERROR: ANTHROPIC_API_KEY required for anthropic backend tests")
        sys.exit(1)

    test_dir = os.path.join(os.path.dirname(__file__), "test_scripts")
    expected_dir = os.path.join(test_dir, "expected")

    scripts = [f for f in os.listdir(test_dir) if f.endswith(".fountain")]
    if not scripts:
        print("No test scripts found in test_scripts/")
        sys.exit(1)

    from src.execution.runner import run_phase1

    all_passed = True

    for script_file in sorted(scripts):
        script_path = os.path.join(test_dir, script_file)
        project_name = f"test_{os.path.splitext(script_file)[0]}"
        expected_file = os.path.join(expected_dir, f"{os.path.splitext(script_file)[0]}_scenes.json")

        print(f"\n{'='*60}")
        print(f"TEST: {script_file}")
        print(f"{'='*60}")

        result = run_phase1(
            script_path=script_path,
            project_name=project_name,
            api_key=api_key,
            model=model,
            backend=backend,
            ollama_url=ollama_url,
            output_dir=output_dir,
        )

        # Check against expected
        if os.path.exists(expected_file):
            with open(expected_file, "r", encoding="utf-8") as f:
                expected = json.load(f)

            passed = True

            # Scene count check (within 20%)
            expected_count = expected.get("expected_scene_count", 0)
            if expected_count > 0:
                deviation = abs(result.total_scenes - expected_count) / expected_count
                if deviation > 0.20:
                    print(f"  FAIL: Scene count {result.total_scenes} vs expected {expected_count} (deviation {deviation:.0%})")
                    passed = False
                else:
                    print(f"  PASS: Scene count {result.total_scenes} (expected {expected_count})")

            # Character check — read actual output
            actual_characters = _collect_characters(output_dir, project_name)
            for char in expected.get("expected_characters", []):
                if char.upper() not in {c.upper() for c in actual_characters}:
                    print(f"  FAIL: Expected character '{char}' not found")
                    passed = False
                else:
                    print(f"  PASS: Character '{char}' found")

            # Hallucination check
            for char in expected.get("must_not_contain_characters", []):
                if char.upper() in {c.upper() for c in actual_characters}:
                    print(f"  FAIL: Hallucinated character '{char}' found")
                    passed = False

            # Location check
            actual_locations = _collect_locations(output_dir, project_name)
            for loc in expected.get("expected_locations", []):
                if loc.upper() not in {l.upper() for l in actual_locations}:
                    print(f"  FAIL: Expected location '{loc}' not found")
                    passed = False
                else:
                    print(f"  PASS: Location '{loc}' found")

            if passed:
                print(f"  RESULT: PASSED")
            else:
                print(f"  RESULT: FAILED")
                all_passed = False
        else:
            print(f"  No expected output file — skipping regression checks")
            if not result.success:
                all_passed = False

    print(f"\n{'='*60}")
    if all_passed:
        print("ALL TESTS PASSED")
        sys.exit(0)
    else:
        print("SOME TESTS FAILED")
        sys.exit(1)


def _collect_characters(output_dir: str, project_name: str) -> set[str]:
    """Collect all character names from L2 scene outputs."""
    l2_dir = os.path.join(output_dir, project_name, "L2")
    characters: set[str] = set()
    if not os.path.exists(l2_dir):
        return characters
    for f in os.listdir(l2_dir):
        if f.startswith("scene_") and f.endswith(".json") and f != "latest.json":
            with open(os.path.join(l2_dir, f), "r", encoding="utf-8") as fh:
                data = json.load(fh)
                characters.update(data.get("characters_present", []))
    return characters


def _collect_locations(output_dir: str, project_name: str) -> set[str]:
    """Collect all location names from L2 scene outputs."""
    l2_dir = os.path.join(output_dir, project_name, "L2")
    locations: set[str] = set()
    if not os.path.exists(l2_dir):
        return locations
    for f in os.listdir(l2_dir):
        if f.startswith("scene_") and f.endswith(".json") and f != "latest.json":
            with open(os.path.join(l2_dir, f), "r", encoding="utf-8") as fh:
                data = json.load(fh)
                loc = data.get("location", "")
                if loc:
                    locations.add(loc)
    return locations


if __name__ == "__main__":
    main()