diff --git a/.env.example b/.env.example new file mode 100644 index 0000000..607b4c3 --- /dev/null +++ b/.env.example @@ -0,0 +1 @@ +ANTHROPIC_API_KEY=your-api-key-here diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..c972363 --- /dev/null +++ b/.gitignore @@ -0,0 +1,6 @@ +output/ +.env +__pycache__/ +*.pyc +.venv/ +venv/ diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..66854ce --- /dev/null +++ b/requirements.txt @@ -0,0 +1,3 @@ +anthropic>=0.40.0 +pydantic>=2.0.0 +python-dotenv>=1.0.0 diff --git a/run.py b/run.py new file mode 100644 index 0000000..ce9d1a2 --- /dev/null +++ b/run.py @@ -0,0 +1,190 @@ +"""CLI entry point for the AI Movie Production Pipeline.""" + +import argparse +import json +import os +import sys + +from dotenv import load_dotenv + + +def main(): + load_dotenv() + + parser = argparse.ArgumentParser(description="AI Movie Production Pipeline — Phase 1") + parser.add_argument("--script", type=str, help="Path to .fountain script file") + parser.add_argument("--project", type=str, help="Project name (determines output directory)") + parser.add_argument("--model", type=str, default="claude-sonnet-4-20250514", help="Model ID") + parser.add_argument("--scene", type=int, default=None, help="Process only this scene number") + parser.add_argument("--dry-run", action="store_true", help="Validate inputs only, no AI calls") + parser.add_argument("--force", action="store_true", help="Ignore cache, re-run even if unchanged") + parser.add_argument("--test", action="store_true", help="Run test suite against test_scripts/") + parser.add_argument("--output-dir", type=str, default="output", help="Base output directory") + + args = parser.parse_args() + + if args.test: + run_tests(args.model, args.output_dir) + return + + if not args.script or not args.project: + parser.error("--script and --project are required (unless using --test)") + + api_key = os.environ.get("ANTHROPIC_API_KEY") + if not api_key and not args.dry_run: + print("ERROR: ANTHROPIC_API_KEY not set. Set it in .env or environment.") + sys.exit(1) + + if not os.path.exists(args.script): + print(f"ERROR: Script file not found: {args.script}") + sys.exit(1) + + from src.execution.runner import run_phase1 + + result = run_phase1( + script_path=args.script, + project_name=args.project, + api_key=api_key or "", + model=args.model, + output_dir=args.output_dir, + scene_filter=args.scene, + dry_run=args.dry_run, + force=args.force, + ) + + if not result.success: + print(f"\nPIPELINE FAILED: {result.stop_reason}") + sys.exit(1) + + print("\nPIPELINE COMPLETE") + sys.exit(0) + + +def run_tests(model: str, output_dir: str): + """Run test suite against all scripts in test_scripts/.""" + api_key = os.environ.get("ANTHROPIC_API_KEY") + if not api_key: + print("ERROR: ANTHROPIC_API_KEY required for tests") + sys.exit(1) + + test_dir = os.path.join(os.path.dirname(__file__), "test_scripts") + expected_dir = os.path.join(test_dir, "expected") + + scripts = [f for f in os.listdir(test_dir) if f.endswith(".fountain")] + if not scripts: + print("No test scripts found in test_scripts/") + sys.exit(1) + + from src.execution.runner import run_phase1 + + all_passed = True + + for script_file in sorted(scripts): + script_path = os.path.join(test_dir, script_file) + project_name = f"test_{os.path.splitext(script_file)[0]}" + expected_file = os.path.join(expected_dir, f"{os.path.splitext(script_file)[0]}_scenes.json") + + print(f"\n{'='*60}") + print(f"TEST: {script_file}") + print(f"{'='*60}") + + result = run_phase1( + script_path=script_path, + project_name=project_name, + api_key=api_key, + model=model, + output_dir=output_dir, + ) + + # Check against expected + if os.path.exists(expected_file): + with open(expected_file, "r", encoding="utf-8") as f: + expected = json.load(f) + + passed = True + + # Scene count check (within 20%) + expected_count = expected.get("expected_scene_count", 0) + if expected_count > 0: + deviation = abs(result.total_scenes - expected_count) / expected_count + if deviation > 0.20: + print(f" FAIL: Scene count {result.total_scenes} vs expected {expected_count} (deviation {deviation:.0%})") + passed = False + else: + print(f" PASS: Scene count {result.total_scenes} (expected {expected_count})") + + # Character check — read actual output + actual_characters = _collect_characters(output_dir, project_name) + for char in expected.get("expected_characters", []): + if char.upper() not in {c.upper() for c in actual_characters}: + print(f" FAIL: Expected character '{char}' not found") + passed = False + else: + print(f" PASS: Character '{char}' found") + + # Hallucination check + for char in expected.get("must_not_contain_characters", []): + if char.upper() in {c.upper() for c in actual_characters}: + print(f" FAIL: Hallucinated character '{char}' found") + passed = False + + # Location check + actual_locations = _collect_locations(output_dir, project_name) + for loc in expected.get("expected_locations", []): + if loc.upper() not in {l.upper() for l in actual_locations}: + print(f" FAIL: Expected location '{loc}' not found") + passed = False + else: + print(f" PASS: Location '{loc}' found") + + if passed: + print(f" RESULT: PASSED") + else: + print(f" RESULT: FAILED") + all_passed = False + else: + print(f" No expected output file — skipping regression checks") + if not result.success: + all_passed = False + + print(f"\n{'='*60}") + if all_passed: + print("ALL TESTS PASSED") + sys.exit(0) + else: + print("SOME TESTS FAILED") + sys.exit(1) + + +def _collect_characters(output_dir: str, project_name: str) -> set[str]: + """Collect all character names from L2 scene outputs.""" + l2_dir = os.path.join(output_dir, project_name, "L2") + characters: set[str] = set() + if not os.path.exists(l2_dir): + return characters + for f in os.listdir(l2_dir): + if f.startswith("scene_") and f.endswith(".json") and f != "latest.json": + with open(os.path.join(l2_dir, f), "r", encoding="utf-8") as fh: + data = json.load(fh) + characters.update(data.get("characters_present", [])) + return characters + + +def _collect_locations(output_dir: str, project_name: str) -> set[str]: + """Collect all location names from L2 scene outputs.""" + l2_dir = os.path.join(output_dir, project_name, "L2") + locations: set[str] = set() + if not os.path.exists(l2_dir): + return locations + for f in os.listdir(l2_dir): + if f.startswith("scene_") and f.endswith(".json") and f != "latest.json": + with open(os.path.join(l2_dir, f), "r", encoding="utf-8") as fh: + data = json.load(fh) + loc = data.get("location", "") + if loc: + locations.add(loc) + return locations + + +if __name__ == "__main__": + main() diff --git a/src/__init__.py b/src/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/config.py b/src/config.py new file mode 100644 index 0000000..5db9b57 --- /dev/null +++ b/src/config.py @@ -0,0 +1,6 @@ +"""Project-level configuration defaults.""" + +DEFAULT_MODEL = "claude-sonnet-4-20250514" +DEFAULT_OUTPUT_DIR = "output" +DEFAULT_MAX_CHUNK_CHARS = 50000 +DEFAULT_MAX_RETRIES_PER_LEVEL = 2 diff --git a/src/execution/__init__.py b/src/execution/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/execution/diff_detector.py b/src/execution/diff_detector.py new file mode 100644 index 0000000..7409498 --- /dev/null +++ b/src/execution/diff_detector.py @@ -0,0 +1,53 @@ +"""Diff and drift detector — compares layer output versions.""" + +from dataclasses import dataclass, field + + +@dataclass +class DiffReport: + changed: bool + fields_changed: list[str] = field(default_factory=list) + drift_categories: list[str] = field(default_factory=list) + + +# Fields that map to drift categories +_DRIFT_MAP = { + "character_drift": {"characters_present", "new_characters_introduced"}, + "location_drift": {"location", "int_ext"}, + "continuity_drift": {"continuity_notes", "dependencies"}, + "structure_drift": {"scene_number", "scene_heading"}, +} + + +def diff_outputs(previous: dict, current: dict) -> DiffReport: + """Compare two versions of a layer output. + + Args: + previous: The previous version's data dict. + current: The new version's data dict. + + Returns: + DiffReport with changed fields and drift categories. + """ + fields_changed: list[str] = [] + drift_categories: set[str] = set() + + all_keys = set(previous.keys()) | set(current.keys()) + + for key in all_keys: + prev_val = previous.get(key) + curr_val = current.get(key) + + if prev_val != curr_val: + fields_changed.append(key) + + # Categorize drift + for category, fields in _DRIFT_MAP.items(): + if key in fields: + drift_categories.add(category) + + return DiffReport( + changed=len(fields_changed) > 0, + fields_changed=fields_changed, + drift_categories=sorted(drift_categories), + ) diff --git a/src/execution/output_writer.py b/src/execution/output_writer.py new file mode 100644 index 0000000..ca2dacb --- /dev/null +++ b/src/execution/output_writer.py @@ -0,0 +1,112 @@ +"""Versioned output writer — writes layer outputs, never overwrites.""" + +import hashlib +import json +import os +from pydantic import BaseModel + + +class OutputWriter: + def __init__(self, project_name: str, output_dir: str = "output"): + self.project_name = project_name + self.output_dir = output_dir + + def write(self, layer_id: str, scene_id: int | None, data: BaseModel) -> dict: + """Write a layer output to a versioned JSON file. + + Returns: + {"path": str, "version": int, "hash": str} + """ + layer_dir = os.path.join(self.output_dir, self.project_name, layer_id) + os.makedirs(layer_dir, exist_ok=True) + + # Determine next version + prefix = f"scene_{scene_id:03d}" if scene_id is not None else "output" + version = self._next_version(layer_dir, prefix) + + # Serialize + data_dict = data.model_dump() + data_json = json.dumps(data_dict, indent=2, ensure_ascii=False) + data_hash = hashlib.sha256(data_json.encode()).hexdigest() + + # Write versioned file + filename = f"{prefix}_v{version}.json" + filepath = os.path.join(layer_dir, filename) + with open(filepath, "w", encoding="utf-8") as f: + f.write(data_json) + + # Update latest manifest + self._update_latest(layer_dir, scene_id, version) + + return {"path": filepath, "version": version, "hash": f"sha256:{data_hash}"} + + def write_raw(self, layer_id: str, scene_id: int | None, data: dict) -> dict: + """Write a raw dict (not a Pydantic model) to a versioned JSON file.""" + layer_dir = os.path.join(self.output_dir, self.project_name, layer_id) + os.makedirs(layer_dir, exist_ok=True) + + prefix = f"scene_{scene_id:03d}" if scene_id is not None else "output" + version = self._next_version(layer_dir, prefix) + + data_json = json.dumps(data, indent=2, ensure_ascii=False) + data_hash = hashlib.sha256(data_json.encode()).hexdigest() + + filename = f"{prefix}_v{version}.json" + filepath = os.path.join(layer_dir, filename) + with open(filepath, "w", encoding="utf-8") as f: + f.write(data_json) + + self._update_latest(layer_dir, scene_id, version) + + return {"path": filepath, "version": version, "hash": f"sha256:{data_hash}"} + + def read_latest(self, layer_id: str, scene_id: int | None) -> dict | None: + """Read the latest version of a layer output.""" + layer_dir = os.path.join(self.output_dir, self.project_name, layer_id) + latest_path = os.path.join(layer_dir, "latest.json") + + if not os.path.exists(latest_path): + return None + + with open(latest_path, "r", encoding="utf-8") as f: + manifest = json.load(f) + + key = str(scene_id) if scene_id is not None else "output" + version = manifest.get(key) + if version is None: + return None + + return self.read_version(layer_id, scene_id, version) + + def read_version(self, layer_id: str, scene_id: int | None, version: int) -> dict | None: + """Read a specific version of a layer output.""" + layer_dir = os.path.join(self.output_dir, self.project_name, layer_id) + prefix = f"scene_{scene_id:03d}" if scene_id is not None else "output" + filepath = os.path.join(layer_dir, f"{prefix}_v{version}.json") + + if not os.path.exists(filepath): + return None + + with open(filepath, "r", encoding="utf-8") as f: + return json.load(f) + + def _next_version(self, layer_dir: str, prefix: str) -> int: + """Find the next available version number.""" + version = 1 + while os.path.exists(os.path.join(layer_dir, f"{prefix}_v{version}.json")): + version += 1 + return version + + def _update_latest(self, layer_dir: str, scene_id: int | None, version: int): + """Update the latest.json manifest.""" + latest_path = os.path.join(layer_dir, "latest.json") + manifest = {} + if os.path.exists(latest_path): + with open(latest_path, "r", encoding="utf-8") as f: + manifest = json.load(f) + + key = str(scene_id) if scene_id is not None else "output" + manifest[key] = version + + with open(latest_path, "w", encoding="utf-8") as f: + json.dump(manifest, f, indent=2) diff --git a/src/execution/retry.py b/src/execution/retry.py new file mode 100644 index 0000000..44e8e42 --- /dev/null +++ b/src/execution/retry.py @@ -0,0 +1,92 @@ +"""Retry and escalation engine — 4-level failure escalation.""" + +import time +from dataclasses import dataclass, field +from typing import Callable, Any + + +@dataclass +class FailureRecord: + scene_id: int | None + layer_id: str + escalation_level: int + error: str + attempts: list[dict] = field(default_factory=list) + + +def execute_with_retry( + fn: Callable, + input_data: Any, + layer_id: str, + scene_id: int | None = None, + fallback_fn: Callable | None = None, + max_attempts_per_level: int = 2, +) -> Any | FailureRecord: + """Execute a function with 4-level escalation on failure. + + Level 1: Retry same config (up to max_attempts_per_level) + Level 2: Retry same config again (placeholder for modified prompt in future) + Level 3: Call fallback_fn if provided + Level 4: Return FailureRecord + + Args: + fn: The function to execute. Takes input_data as sole argument. + input_data: Passed to fn. + layer_id: For logging in FailureRecord. + scene_id: For logging in FailureRecord. + fallback_fn: Optional simplified extraction function for Level 3. + max_attempts_per_level: Max attempts at each escalation level. + + Returns: + fn's return value on success, or FailureRecord if all levels exhausted. + """ + attempts: list[dict] = [] + + # Level 1: Retry same config + for attempt in range(max_attempts_per_level): + try: + result = fn(input_data) + return result + except Exception as e: + attempts.append({ + "level": 1, + "attempt": attempt + 1, + "error": str(e), + "timestamp": time.strftime("%Y-%m-%dT%H:%M:%S.000Z", time.gmtime()), + }) + + # Level 2: Retry again (future: modified prompt) + for attempt in range(max_attempts_per_level): + try: + result = fn(input_data) + return result + except Exception as e: + attempts.append({ + "level": 2, + "attempt": attempt + 1, + "error": str(e), + "timestamp": time.strftime("%Y-%m-%dT%H:%M:%S.000Z", time.gmtime()), + }) + + # Level 3: Fallback + if fallback_fn: + for attempt in range(max_attempts_per_level): + try: + result = fallback_fn(input_data) + return result + except Exception as e: + attempts.append({ + "level": 3, + "attempt": attempt + 1, + "error": str(e), + "timestamp": time.strftime("%Y-%m-%dT%H:%M:%S.000Z", time.gmtime()), + }) + + # Level 4: Human intervention + return FailureRecord( + scene_id=scene_id, + layer_id=layer_id, + escalation_level=4, + error="All escalation levels exhausted", + attempts=attempts, + ) diff --git a/src/execution/runner.py b/src/execution/runner.py new file mode 100644 index 0000000..dc637a0 --- /dev/null +++ b/src/execution/runner.py @@ -0,0 +1,211 @@ +"""Pipeline runner — orchestrates Layer 1 → Layer 2 end-to-end.""" + +import hashlib +import json +import os +from dataclasses import dataclass, field + +from src.ingestion.fountain_parser import parse_fountain +from src.ingestion.normalizer import normalize, NormalizationError +from src.understanding.extractor import extract_scenes, ExtractionError +from src.understanding.chunker import chunk_script +from src.schemas.scene import Scene +from src.schemas.scene_array import SceneArray +from src.validators.schema_validator import validate, ValidationResult +from src.validators.scene_validator import validate_scenes, ValidationWarning +from src.logging.layer_logger import LayerLogger +from src.execution.output_writer import OutputWriter +from src.execution.retry import execute_with_retry, FailureRecord +from src.execution.stop_conditions import evaluate_stop +from src.execution.diff_detector import diff_outputs + + +@dataclass +class PipelineResult: + success: bool + total_scenes: int = 0 + valid_scenes: int = 0 + flagged_scenes: int = 0 + failed_scenes: int = 0 + warnings: list[ValidationWarning] = field(default_factory=list) + stop_reason: str | None = None + + +def run_phase1( + script_path: str, + project_name: str, + api_key: str, + model: str = "claude-sonnet-4-20250514", + output_dir: str = "output", + scene_filter: int | None = None, + dry_run: bool = False, + force: bool = False, +) -> PipelineResult: + """Run the Phase 1 pipeline: Layer 1 (ingestion) → Layer 2 (understanding). + + Args: + script_path: Path to .fountain file. + project_name: Project name for output directory. + api_key: Anthropic API key. + model: Model ID. + output_dir: Base output directory. + scene_filter: If set, only process this scene number in Layer 2. + dry_run: If True, validate inputs only — no AI calls. + force: If True, ignore cache and re-run even if input unchanged. + + Returns: + PipelineResult with counts and any stop reason. + """ + logger = LayerLogger(project_name, output_dir) + writer = OutputWriter(project_name, output_dir) + + # Resolve prompt contract path + prompts_dir = os.path.join(os.path.dirname(os.path.dirname(os.path.dirname(__file__))), "src", "prompts") + contract_path = os.path.join(prompts_dir, "L2_scene_extraction_v1.json") + + # ── LAYER 1: INGESTION ────────────────────────────────────────────── + print(f"[L1] Parsing: {script_path}") + run_id = logger.start("L1") + + # Read file + with open(script_path, "r", encoding="utf-8") as f: + raw_text = f.read() + + input_hash = f"sha256:{hashlib.sha256(raw_text.encode()).hexdigest()}" + + # Parse + try: + elements = parse_fountain(raw_text) + normalized = normalize(elements) + except (NormalizationError, Exception) as e: + logger.finish(run_id, input_hash, None, "failed", failure_state=str(e)) + print(f"[L1] STOP: {e}") + return PipelineResult(success=False, stop_reason=f"Layer 1 failed: {e}") + + # Write L1 output + l1_result = writer.write("L1", None, normalized) + logger.finish(run_id, input_hash, l1_result["hash"], "valid") + + heading_count = sum(1 for e in normalized.elements if e.type == "scene_heading") + print(f"[L1] Done: {len(normalized.elements)} elements, {heading_count} scene headings") + + if dry_run: + print(f"[DRY RUN] Layer 1 valid. {heading_count} scenes would be extracted.") + return PipelineResult(success=True, total_scenes=heading_count) + + # ── LAYER 2: UNDERSTANDING ────────────────────────────────────────── + print(f"[L2] Extracting scenes...") + + # Chunk if needed + chunks = chunk_script(normalized) + print(f"[L2] Processing in {len(chunks)} chunk(s)") + + all_results: list[ValidationResult | FailureRecord] = [] + all_valid_scenes: list[Scene] = [] + all_warnings: list[ValidationWarning] = [] + total_token_usage = {"input": 0, "output": 0} + + for chunk_idx, chunk in enumerate(chunks): + # Extract scenes from this chunk + def do_extract(data): + return extract_scenes(data, contract_path, api_key, model) + + run_id = logger.start("L2", scene_id=chunk_idx) + chunk_input_hash = f"sha256:{hashlib.sha256(json.dumps([e.model_dump() for e in chunk.elements]).encode()).hexdigest()}" + + extraction = execute_with_retry( + fn=do_extract, + input_data=chunk, + layer_id="L2", + scene_id=chunk_idx, + ) + + if isinstance(extraction, FailureRecord): + logger.finish(run_id, chunk_input_hash, None, "failed", + failure_state=extraction.error, retry_count=len(extraction.attempts)) + all_results.append(extraction) + print(f"[L2] Chunk {chunk_idx + 1}: FAILED after {len(extraction.attempts)} attempts") + continue + + # Validate each scene + total_token_usage["input"] += extraction.token_usage["input"] + total_token_usage["output"] += extraction.token_usage["output"] + + for raw_scene in extraction.raw_scenes: + scene_num = raw_scene.get("scene_number", "?") + + # Skip if scene_filter is set and doesn't match + if scene_filter is not None and scene_num != scene_filter: + continue + + scene_run_id = logger.start("L2", scene_id=scene_num if isinstance(scene_num, int) else None) + scene_input_hash = f"sha256:{hashlib.sha256(json.dumps(raw_scene).encode()).hexdigest()}" + + result = validate(raw_scene, Scene) + all_results.append(result) + + if result.status == "failed": + logger.finish(scene_run_id, scene_input_hash, None, "failed", + failure_state="; ".join(result.errors)) + print(f"[L2] Scene {scene_num}: FAILED validation — {result.errors}") + else: + # Write scene output + scene_out = writer.write_raw("L2", scene_num if isinstance(scene_num, int) else 0, raw_scene) + + # Check for drift against previous version + if isinstance(scene_num, int): + prev = writer.read_version("L2", scene_num, scene_out["version"] - 1) + if prev: + diff = diff_outputs(prev, raw_scene) + if diff.changed: + print(f"[L2] Scene {scene_num}: DRIFT detected — {diff.drift_categories}") + + logger.finish(scene_run_id, scene_input_hash, scene_out["hash"], result.status, + token_usage=extraction.token_usage) + + if result.status == "flagged": + print(f"[L2] Scene {scene_num}: FLAGGED (contains UNKNOWN values)") + else: + print(f"[L2] Scene {scene_num}: valid") + + all_valid_scenes.append(result.data) + + logger.finish(run_id, chunk_input_hash, "chunk", "valid", + token_usage=extraction.token_usage) + + # Run scene-level semantic validation + if all_valid_scenes: + scene_array = SceneArray(scenes=all_valid_scenes) + all_warnings = validate_scenes(scene_array, heading_count) + for w in all_warnings: + print(f"[L2] WARNING: {w.message}") + + # Evaluate stop conditions + stop = evaluate_stop(all_results, heading_count) + if stop.should_stop: + print(f"[L2] STOP CONDITION: {stop.reason}") + return PipelineResult( + success=False, + total_scenes=heading_count, + valid_scenes=sum(1 for r in all_results if isinstance(r, ValidationResult) and r.status == "valid"), + flagged_scenes=sum(1 for r in all_results if isinstance(r, ValidationResult) and r.status == "flagged"), + failed_scenes=sum(1 for r in all_results if isinstance(r, FailureRecord) or (isinstance(r, ValidationResult) and r.status == "failed")), + warnings=all_warnings, + stop_reason=stop.reason, + ) + + valid = sum(1 for r in all_results if isinstance(r, ValidationResult) and r.status == "valid") + flagged = sum(1 for r in all_results if isinstance(r, ValidationResult) and r.status == "flagged") + failed = sum(1 for r in all_results if isinstance(r, FailureRecord) or (isinstance(r, ValidationResult) and r.status == "failed")) + + print(f"\n[DONE] Scenes: {valid} valid, {flagged} flagged, {failed} failed") + print(f"[DONE] Tokens: {total_token_usage['input']} in / {total_token_usage['output']} out") + + return PipelineResult( + success=True, + total_scenes=heading_count, + valid_scenes=valid, + flagged_scenes=flagged, + failed_scenes=failed, + warnings=all_warnings, + ) diff --git a/src/execution/stop_conditions.py b/src/execution/stop_conditions.py new file mode 100644 index 0000000..934481e --- /dev/null +++ b/src/execution/stop_conditions.py @@ -0,0 +1,59 @@ +"""Stop condition evaluator — decides when the pipeline must halt.""" + +from dataclasses import dataclass +from typing import Literal +from src.validators.schema_validator import ValidationResult +from src.execution.retry import FailureRecord + + +@dataclass +class StopDecision: + should_stop: bool + reason: str | None + scope: Literal["unit", "global"] | None + + +def evaluate_stop( + results: list[ValidationResult | FailureRecord], + total_scenes: int, +) -> StopDecision: + """Evaluate whether a stop condition has been triggered. + + Args: + results: List of validation results and/or failure records from a layer run. + total_scenes: Total number of scenes that were expected to be processed. + + Returns: + StopDecision indicating whether to halt and why. + """ + if total_scenes == 0: + return StopDecision( + should_stop=True, + reason="Zero scenes to process", + scope="global", + ) + + failed_count = 0 + for result in results: + if isinstance(result, FailureRecord): + failed_count += 1 + elif isinstance(result, ValidationResult) and result.status == "failed": + failed_count += 1 + + # Global stop: all scenes failed + if failed_count == total_scenes: + return StopDecision( + should_stop=True, + reason=f"All {total_scenes} scenes failed", + scope="global", + ) + + # Global stop: >30% failure rate + if total_scenes > 0 and failed_count / total_scenes > 0.30: + return StopDecision( + should_stop=True, + reason=f"{failed_count}/{total_scenes} scenes failed ({failed_count/total_scenes:.0%} > 30% threshold)", + scope="global", + ) + + return StopDecision(should_stop=False, reason=None, scope=None) diff --git a/src/ingestion/__init__.py b/src/ingestion/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/ingestion/fountain_parser.py b/src/ingestion/fountain_parser.py new file mode 100644 index 0000000..3fa9f15 --- /dev/null +++ b/src/ingestion/fountain_parser.py @@ -0,0 +1,210 @@ +"""Fountain format parser. Deterministic code — no AI. + +Follows the Fountain spec: https://fountain.io/syntax +Converts raw .fountain text into a list of ScriptElement objects. +""" + +import re +from src.schemas.script_element import ScriptElement + + +# Fountain scene heading prefixes +_SCENE_HEADING_PREFIXES = ( + "INT ", "INT.", "EXT ", "EXT.", "INT/EXT", "INT./EXT", "I/E", + "int ", "int.", "ext ", "ext.", "int/ext", "int./ext", "i/e", +) + +_TRANSITION_PATTERN = re.compile(r"^[A-Z\s]+TO:$") +_FORCED_SCENE_HEADING = re.compile(r"^\.[A-Z]") +_FORCED_TRANSITION = re.compile(r"^>(?!<)") +_CENTERED_TEXT = re.compile(r"^>.*<$") +_PARENTHETICAL = re.compile(r"^\(.*\)$") + + +def parse_fountain(text: str) -> list[ScriptElement]: + """Parse Fountain-formatted text into ScriptElement list. + + Args: + text: Raw contents of a .fountain file. + + Returns: + Ordered list of ScriptElement objects with line numbers. + """ + lines = text.replace("\r\n", "\n").split("\n") + elements: list[ScriptElement] = [] + i = 0 + + while i < len(lines): + line = lines[i] + stripped = line.strip() + line_num = i + 1 # 1-indexed + + # Skip empty lines + if not stripped: + i += 1 + continue + + # Title page (key: value at start of document) — skip + if i < _find_first_content_line(lines) and ":" in stripped: + i += 1 + continue + + # Boneyard (/* ... */) — skip + if stripped.startswith("/*"): + while i < len(lines) and "*/" not in lines[i]: + i += 1 + i += 1 + continue + + # Notes ([[ ... ]]) — skip + if stripped.startswith("[[") and stripped.endswith("]]"): + i += 1 + continue + + # Section headers (# ) — skip (metadata, not story) + if stripped.startswith("#"): + i += 1 + continue + + # Synopsis (= ) — skip (metadata) + if stripped.startswith("=") and not stripped.startswith("=="): + i += 1 + continue + + # Page break (===) — skip + if stripped.startswith("==="): + i += 1 + continue + + # Forced scene heading (.LOCATION) + if _FORCED_SCENE_HEADING.match(stripped): + elements.append(ScriptElement( + type="scene_heading", + text=stripped[1:].strip(), # remove leading dot + line_number=line_num, + )) + i += 1 + continue + + # Standard scene heading + if stripped.upper().startswith(_SCENE_HEADING_PREFIXES): + elements.append(ScriptElement( + type="scene_heading", + text=stripped, + line_number=line_num, + )) + i += 1 + continue + + # Forced transition (> TEXT) + if _FORCED_TRANSITION.match(stripped): + elements.append(ScriptElement( + type="transition", + text=stripped[1:].strip(), + line_number=line_num, + )) + i += 1 + continue + + # Standard transition (SOMETHING TO:) + if _TRANSITION_PATTERN.match(stripped): + elements.append(ScriptElement( + type="transition", + text=stripped, + line_number=line_num, + )) + i += 1 + continue + + # Centered text — treat as action + if _CENTERED_TEXT.match(stripped): + elements.append(ScriptElement( + type="action", + text=stripped[1:-1].strip(), + line_number=line_num, + )) + i += 1 + continue + + # Character + Dialogue block + if _is_character_line(stripped, lines, i): + # Character name + char_name = stripped.rstrip("^").strip() # remove dual dialogue caret + if char_name.startswith("@"): + char_name = char_name[1:] # forced character + elements.append(ScriptElement( + type="character", + text=char_name, + line_number=line_num, + )) + i += 1 + + # Consume parentheticals and dialogue that follow + while i < len(lines): + next_line = lines[i].strip() + if not next_line: + break + if _PARENTHETICAL.match(next_line): + elements.append(ScriptElement( + type="parenthetical", + text=next_line, + line_number=i + 1, + )) + else: + elements.append(ScriptElement( + type="dialogue", + text=next_line, + line_number=i + 1, + )) + i += 1 + continue + + # Default: action + elements.append(ScriptElement( + type="action", + text=stripped, + line_number=line_num, + )) + i += 1 + + return elements + + +def _is_character_line(stripped: str, lines: list[str], index: int) -> bool: + """Determine if a line is a character cue. + + Fountain rules: + - All uppercase + - Followed by dialogue (non-empty next line after possible blank) + - May end with ^ (dual dialogue) + - May start with @ (forced character) + """ + if stripped.startswith("@"): + return True + + # Must be uppercase (ignoring parenthetical extensions like (V.O.), (O.S.)) + name_part = re.sub(r"\(.*?\)", "", stripped).strip().rstrip("^").strip() + if not name_part: + return False + if not name_part.replace(" ", "").replace(".", "").replace("'", "").replace("-", "").isalpha(): + return False + if name_part != name_part.upper(): + return False + + # Must have a non-empty line following (the dialogue) + next_i = index + 1 + if next_i < len(lines) and lines[next_i].strip(): + return True + + return False + + +def _find_first_content_line(lines: list[str]) -> int: + """Find the first line that isn't part of the title page. + + Title page ends at the first blank line. + """ + for i, line in enumerate(lines): + if not line.strip(): + return i + 1 + return 0 diff --git a/src/ingestion/normalizer.py b/src/ingestion/normalizer.py new file mode 100644 index 0000000..232729a --- /dev/null +++ b/src/ingestion/normalizer.py @@ -0,0 +1,29 @@ +"""Normalizer — wraps parsed ScriptElements into a validated NormalizedScript.""" + +from src.schemas.script_element import ScriptElement +from src.schemas.normalized_script import NormalizedScript + + +class NormalizationError(Exception): + pass + + +def normalize(elements: list[ScriptElement]) -> NormalizedScript: + """Validate and wrap ScriptElements into NormalizedScript. + + Args: + elements: Output from fountain_parser.parse_fountain() + + Returns: + Validated NormalizedScript. + + Raises: + NormalizationError: If elements are empty or contain no scene headings. + """ + if not elements: + raise NormalizationError("No elements to normalize — input is empty") + + try: + return NormalizedScript(elements=elements) + except ValueError as e: + raise NormalizationError(str(e)) from e diff --git a/src/logging/__init__.py b/src/logging/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/logging/layer_logger.py b/src/logging/layer_logger.py new file mode 100644 index 0000000..ef1d512 --- /dev/null +++ b/src/logging/layer_logger.py @@ -0,0 +1,63 @@ +"""Structured JSON logger for pipeline layer executions.""" + +import json +import os +import time +import uuid +from src.schemas.layer_log import LayerLog, TokenUsage + + +class LayerLogger: + def __init__(self, project_name: str, output_dir: str = "output"): + self.project_name = project_name + self.log_dir = os.path.join(output_dir, project_name, "logs") + os.makedirs(self.log_dir, exist_ok=True) + self._runs: dict[str, dict] = {} + + def start(self, layer_id: str, scene_id: int | None = None) -> str: + """Begin tracking a layer execution. Returns run_id.""" + run_id = uuid.uuid4().hex[:8] + self._runs[run_id] = { + "layer_id": layer_id, + "scene_id": scene_id, + "start_time": time.time(), + } + return run_id + + def finish( + self, + run_id: str, + input_hash: str, + output_hash: str | None, + validation_result: str, + failure_state: str | None = None, + retry_count: int = 0, + token_usage: dict | None = None, + ) -> LayerLog: + """Finalize a layer execution and write log to disk.""" + run = self._runs.pop(run_id) + execution_time_ms = int((time.time() - run["start_time"]) * 1000) + + log = LayerLog( + layer_id=run["layer_id"], + scene_id=run["scene_id"], + run_id=run_id, + timestamp=time.strftime("%Y-%m-%dT%H:%M:%S.000Z", time.gmtime()), + input_hash=input_hash, + output_hash=output_hash, + validation_result=validation_result, + execution_time_ms=execution_time_ms, + failure_state=failure_state, + retry_count=retry_count, + token_usage=TokenUsage(**token_usage) if token_usage else None, + ) + + # Write log file + scene_part = f"_scene_{run['scene_id']:03d}" if run["scene_id"] is not None else "" + filename = f"{run['layer_id']}{scene_part}_run_{run_id}.json" + filepath = os.path.join(self.log_dir, filename) + + with open(filepath, "w", encoding="utf-8") as f: + json.dump(log.model_dump(), f, indent=2) + + return log diff --git a/src/prompts/L2_continuity_check_v1.json b/src/prompts/L2_continuity_check_v1.json new file mode 100644 index 0000000..821e785 --- /dev/null +++ b/src/prompts/L2_continuity_check_v1.json @@ -0,0 +1,17 @@ +{ + "contract_id": "L2_continuity_check_v1", + "layer": "L2", + "version": 1, + "purpose": "Verify continuity flags across extracted scenes", + "required_output_schema": "SceneArray", + "forbidden_behaviors": [ + "Do not add new scenes", + "Do not remove existing scenes", + "Do not change scene_number or scene_heading values", + "Do not invent continuity relationships that are not supported by the script", + "Do not modify action_summary or dialogue_summary" + ], + "system_prompt": "You are a continuity verification engine. You will receive a JSON array of extracted scenes from a screenplay.\n\nYour job is to review and correct ONLY the following fields:\n- continuity_notes: Ensure these accurately reflect state changes (injuries, wardrobe changes, time jumps, emotional shifts) that affect other scenes\n- dependencies: Ensure these accurately reference scene numbers that this scene continues from or references\n\nDo NOT modify any other fields. Return the full scenes array with corrections applied.\n\nReturn ONLY a JSON object with key \"scenes\" containing the corrected array. No additional text.", + "user_prompt_template": "Review and correct continuity fields in these extracted scenes:\n\n{{scenes_json}}", + "max_output_tokens": 8000 +} diff --git a/src/prompts/L2_scene_extraction_v1.json b/src/prompts/L2_scene_extraction_v1.json new file mode 100644 index 0000000..ccb192a --- /dev/null +++ b/src/prompts/L2_scene_extraction_v1.json @@ -0,0 +1,19 @@ +{ + "contract_id": "L2_scene_extraction_v1", + "layer": "L2", + "version": 1, + "purpose": "Extract structured scene metadata from normalized screenplay text", + "required_output_schema": "SceneArray", + "forbidden_behaviors": [ + "Do not invent characters not present in the script text", + "Do not invent locations not present in the script text", + "Do not invent props not described or implied in the script text", + "Do not merge distinct scenes into one", + "Do not split a single scene into multiple scenes", + "Do not summarize dialogue as action or vice versa", + "Do not leave any field empty — use UNKNOWN if the information cannot be determined" + ], + "system_prompt": "You are a script analysis engine. Your job is to extract structured scene metadata from a screenplay.\n\nYou must return a JSON object with a single key \"scenes\" containing an array of scene objects.\n\nEach scene object MUST have ALL of the following fields (no exceptions):\n- scene_number (int): Sequential scene number starting from 1\n- scene_heading (string): The full scene heading line (e.g. \"INT. APARTMENT - NIGHT\")\n- location (string): The location name derived from the heading\n- time_of_day (string): One of: DAWN, MORNING, DAY, AFTERNOON, DUSK, NIGHT, UNKNOWN\n- int_ext (string): One of: INTERIOR, EXTERIOR, BOTH\n- characters_present (string[]): All characters present in the scene\n- new_characters_introduced (string[]): Characters appearing for the first time in the script\n- props (string[]): Significant objects mentioned or implied\n- wardrobe_clues (string[]): Described or implied clothing/appearance details\n- emotional_tone (string): The dominant mood of the scene\n- visual_beats (string[]): Key visual moments or images\n- action_summary (string): 2-3 sentence summary of scene action\n- dialogue_summary (string): 1-2 sentence summary of key dialogue\n- continuity_notes (string[]): State changes relevant to other scenes\n- dependencies (int[]): Scene numbers this scene references or continues from\n\nRULES:\n- Do not invent characters not present in the script text\n- Do not invent locations not present in the script text\n- Do not invent props not described or implied in the script text\n- Do not merge distinct scenes into one\n- Do not split a single scene into multiple scenes\n- Do not summarize dialogue as action or vice versa\n- Do not leave any field empty — use UNKNOWN if the information cannot be determined\n- For list fields where nothing applies, use an empty array []\n- Return ONLY the JSON object, no additional text", + "user_prompt_template": "Extract structured scene metadata from the following screenplay:\n\n{{script_text}}", + "max_output_tokens": 8000 +} diff --git a/src/prompts/contract_schema.json b/src/prompts/contract_schema.json new file mode 100644 index 0000000..b67a9e7 --- /dev/null +++ b/src/prompts/contract_schema.json @@ -0,0 +1,27 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "type": "object", + "required": [ + "contract_id", + "layer", + "version", + "purpose", + "required_output_schema", + "forbidden_behaviors", + "system_prompt", + "user_prompt_template", + "max_output_tokens" + ], + "properties": { + "contract_id": { "type": "string" }, + "layer": { "type": "string" }, + "version": { "type": "integer", "minimum": 1 }, + "purpose": { "type": "string" }, + "required_output_schema": { "type": "string" }, + "forbidden_behaviors": { "type": "array", "items": { "type": "string" } }, + "system_prompt": { "type": "string" }, + "user_prompt_template": { "type": "string" }, + "max_output_tokens": { "type": "integer", "minimum": 1 } + }, + "additionalProperties": false +} diff --git a/src/schemas/__init__.py b/src/schemas/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/schemas/department.py b/src/schemas/department.py new file mode 100644 index 0000000..9a6a107 --- /dev/null +++ b/src/schemas/department.py @@ -0,0 +1,61 @@ +"""Department interpretation schemas — Layer 4. Built in Phase 3, defined now for contract stability.""" + +from pydantic import BaseModel + + +class DirectingOutput(BaseModel): + scene_objective: str + pacing_notes: str + key_dramatic_beats: list[str] + subtext_notes: str + + +class CinematographyOutput(BaseModel): + camera_style: str + lens_character: str + movement_patterns: list[str] + depth_of_field_intent: str + color_palette_direction: str + + +class LightingOutput(BaseModel): + key_light_direction: str + practical_sources: list[str] + mood_notes: str + time_of_day_requirements: str + contrast_ratio_intent: str + + +class ProductionDesignOutput(BaseModel): + required_set_elements: list[str] + set_dressing_priorities: list[str] + color_texture_palette: str + era_period_notes: str + spatial_blocking_requirements: str + + +class WardrobeOutput(BaseModel): + character_wardrobe: list[dict] + costume_condition: str + thematic_notes: str + changes_from_previous: str + + +class PerformanceOutput(BaseModel): + character_states: list[dict] + key_shifts: list[str] + subtext_notes: str + physical_behavior_cues: list[str] + + +class StoryboardOutput(BaseModel): + character_positions: list[str] + key_composition_frames: list[str] + spatial_relationships: list[str] + entry_exit_patterns: list[str] + + +class SceneDepartmentBreakdown(BaseModel): + scene: int + directing: DirectingOutput + cinematography: CinematographyOutput diff --git a/src/schemas/layer_log.py b/src/schemas/layer_log.py new file mode 100644 index 0000000..ab54284 --- /dev/null +++ b/src/schemas/layer_log.py @@ -0,0 +1,21 @@ +from typing import Literal, Optional +from pydantic import BaseModel + + +class TokenUsage(BaseModel): + input: int + output: int + + +class LayerLog(BaseModel): + layer_id: str + scene_id: Optional[int] + run_id: str + timestamp: str + input_hash: str + output_hash: Optional[str] + validation_result: Literal["valid", "failed", "flagged"] + execution_time_ms: int + failure_state: Optional[str] + retry_count: int + token_usage: Optional[TokenUsage] diff --git a/src/schemas/normalized_script.py b/src/schemas/normalized_script.py new file mode 100644 index 0000000..7ce7e4f --- /dev/null +++ b/src/schemas/normalized_script.py @@ -0,0 +1,19 @@ +from pydantic import BaseModel, model_validator +from src.schemas.script_element import ScriptElement + + +class NormalizedScript(BaseModel): + elements: list[ScriptElement] + + @model_validator(mode="after") + def must_have_scene_heading(self): + headings = [e for e in self.elements if e.type == "scene_heading"] + if len(headings) == 0: + raise ValueError("Script contains zero scene headings — cannot proceed") + return self + + @model_validator(mode="after") + def must_not_be_empty(self): + if len(self.elements) == 0: + raise ValueError("Script contains no elements") + return self diff --git a/src/schemas/production_bible.py b/src/schemas/production_bible.py new file mode 100644 index 0000000..07eae25 --- /dev/null +++ b/src/schemas/production_bible.py @@ -0,0 +1,82 @@ +"""Production Bible schemas — Layer 3. Built in Phase 2, defined now for contract stability.""" + +from typing import Optional +from pydantic import BaseModel + + +class Relationship(BaseModel): + character: str + nature: str + + +class WardrobeState(BaseModel): + scene_range: list[int] + description: str + + +class EmotionalState(BaseModel): + scene: int + state: str + + +class Character(BaseModel): + name: str + aliases: list[str] + description: str + arc_summary: str + first_appearance: int + scenes_present: list[int] + relationships: list[Relationship] + wardrobe_states: list[WardrobeState] + emotional_arc: list[EmotionalState] + reference_prompt: str + + +class Location(BaseModel): + name: str + description: str + type: str + scenes_used: list[int] + time_of_day_variants: list[str] + notable_features: list[str] + mood_associations: list[str] + reference_prompt: str + + +class Prop(BaseModel): + name: str + description: str + significance: str + scenes_present: list[int] + owner_or_association: str + state_changes: list[EmotionalState] # reuses {scene, state} shape + + +class WardrobeEntry(BaseModel): + character: str + scene_range: list[int] + description: str + change_trigger: str + + +class EmotionalBeat(BaseModel): + scene: int + dominant_tone: str + tension_level: int + arc_position: str + + +class TimelineEntry(BaseModel): + scene: int + story_time: str + elapsed_since_previous: str + concurrent_with: list[int] + + +class ProductionBible(BaseModel): + characters: list[Character] + locations: list[Location] + props: list[Prop] + wardrobe: list[WardrobeEntry] + emotional_arc: list[EmotionalBeat] + timeline: list[TimelineEntry] diff --git a/src/schemas/prompt_package.py b/src/schemas/prompt_package.py new file mode 100644 index 0000000..4f5c994 --- /dev/null +++ b/src/schemas/prompt_package.py @@ -0,0 +1,12 @@ +"""Prompt package schema — Layer 6. Built in Phase 5, defined now for contract stability.""" + +from typing import Optional +from pydantic import BaseModel + + +class PromptPackage(BaseModel): + category: str + scene: int + prompt: str + source_refs: list[str] + negative_prompt: Optional[str] = None diff --git a/src/schemas/scene.py b/src/schemas/scene.py new file mode 100644 index 0000000..fb241ff --- /dev/null +++ b/src/schemas/scene.py @@ -0,0 +1,19 @@ +from pydantic import BaseModel + + +class Scene(BaseModel): + scene_number: int + scene_heading: str + location: str + time_of_day: str + int_ext: str + characters_present: list[str] + new_characters_introduced: list[str] + props: list[str] + wardrobe_clues: list[str] + emotional_tone: str + visual_beats: list[str] + action_summary: str + dialogue_summary: str + continuity_notes: list[str] + dependencies: list[int] diff --git a/src/schemas/scene_array.py b/src/schemas/scene_array.py new file mode 100644 index 0000000..9ddb5b8 --- /dev/null +++ b/src/schemas/scene_array.py @@ -0,0 +1,20 @@ +from pydantic import BaseModel, model_validator +from src.schemas.scene import Scene + + +class SceneArray(BaseModel): + scenes: list[Scene] + + @model_validator(mode="after") + def must_have_scenes(self): + if len(self.scenes) == 0: + raise ValueError("SceneArray contains no scenes") + return self + + @model_validator(mode="after") + def unique_scene_numbers(self): + numbers = [s.scene_number for s in self.scenes] + dupes = [n for n in numbers if numbers.count(n) > 1] + if dupes: + raise ValueError(f"Duplicate scene numbers: {set(dupes)}") + return self diff --git a/src/schemas/script_element.py b/src/schemas/script_element.py new file mode 100644 index 0000000..6427f9b --- /dev/null +++ b/src/schemas/script_element.py @@ -0,0 +1,18 @@ +from typing import Literal, Optional +from pydantic import BaseModel + + +ELEMENT_TYPES = Literal[ + "scene_heading", + "action", + "character", + "dialogue", + "parenthetical", + "transition", +] + + +class ScriptElement(BaseModel): + type: ELEMENT_TYPES + text: str + line_number: Optional[int] = None diff --git a/src/schemas/shot.py b/src/schemas/shot.py new file mode 100644 index 0000000..3962eae --- /dev/null +++ b/src/schemas/shot.py @@ -0,0 +1,24 @@ +"""Shot schema — Layer 5. Built in Phase 4, defined now for contract stability.""" + +from typing import Optional +from pydantic import BaseModel + + +class Shot(BaseModel): + shot_id: str + scene: int + order: int + type: str + movement: str + subject: str + framing_notes: str + duration_intent: str + emotional_intent: str + lighting_notes: str + lens_notes: str + action_description: str + dialogue: Optional[str] + transition_in: str + transition_out: str + vfx_notes: Optional[str] + audio_notes: Optional[str] diff --git a/src/understanding/__init__.py b/src/understanding/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/understanding/chunker.py b/src/understanding/chunker.py new file mode 100644 index 0000000..18652b2 --- /dev/null +++ b/src/understanding/chunker.py @@ -0,0 +1,63 @@ +"""Script chunker — splits long scripts at scene boundaries.""" + +from src.schemas.normalized_script import NormalizedScript +from src.schemas.script_element import ScriptElement + + +def chunk_script(script: NormalizedScript, max_chars: int = 50000) -> list[NormalizedScript]: + """Split a normalized script into chunks at scene boundaries. + + Args: + script: Full normalized script. + max_chars: Maximum character count per chunk (approximate, based on text content). + + Returns: + List of NormalizedScript chunks. Each contains only complete scenes. + """ + # Find scene boundary indices + scene_starts: list[int] = [] + for i, elem in enumerate(script.elements): + if elem.type == "scene_heading": + scene_starts.append(i) + + if not scene_starts: + return [script] + + # Build scene groups (each group = elements from one scene heading to the next) + scene_groups: list[list[ScriptElement]] = [] + for i, start in enumerate(scene_starts): + end = scene_starts[i + 1] if i + 1 < len(scene_starts) else len(script.elements) + scene_groups.append(script.elements[start:end]) + + # Build chunks by accumulating scenes until max_chars + chunks: list[NormalizedScript] = [] + current_elements: list[ScriptElement] = [] + current_chars = 0 + + for group in scene_groups: + group_chars = sum(len(e.text) for e in group) + + # If a single scene exceeds max_chars, it becomes its own chunk + if group_chars > max_chars: + # Flush current chunk if non-empty + if current_elements: + chunks.append(NormalizedScript(elements=current_elements)) + current_elements = [] + current_chars = 0 + chunks.append(NormalizedScript(elements=list(group))) + continue + + # Would adding this scene exceed the limit? + if current_chars + group_chars > max_chars and current_elements: + chunks.append(NormalizedScript(elements=current_elements)) + current_elements = [] + current_chars = 0 + + current_elements.extend(group) + current_chars += group_chars + + # Flush remaining + if current_elements: + chunks.append(NormalizedScript(elements=current_elements)) + + return chunks diff --git a/src/understanding/extractor.py b/src/understanding/extractor.py new file mode 100644 index 0000000..8c64472 --- /dev/null +++ b/src/understanding/extractor.py @@ -0,0 +1,129 @@ +"""Layer 2 AI extractor — sends normalized script to Claude, receives scene JSON.""" + +import json +from dataclasses import dataclass +from anthropic import Anthropic +from src.schemas.normalized_script import NormalizedScript + + +@dataclass +class ExtractionResult: + raw_scenes: list[dict] + token_usage: dict # {"input": int, "output": int} + + +class ExtractionError(Exception): + pass + + +def extract_scenes( + script: NormalizedScript, + contract_path: str, + api_key: str, + model: str = "claude-sonnet-4-20250514", +) -> ExtractionResult: + """Extract structured scene data from a normalized script using AI. + + Args: + script: Validated NormalizedScript from Layer 1. + contract_path: Path to the prompt contract JSON file. + api_key: Anthropic API key. + model: Model ID to use. + + Returns: + ExtractionResult with raw scene dicts and token usage. + + Raises: + ExtractionError: If AI response cannot be parsed as JSON. + """ + # Load prompt contract + with open(contract_path, "r", encoding="utf-8") as f: + contract = json.load(f) + + # Build script text from elements + script_text = _elements_to_text(script) + + # Render user prompt + user_prompt = contract["user_prompt_template"].replace("{{script_text}}", script_text) + + # Call Claude API + client = Anthropic(api_key=api_key) + response = client.messages.create( + model=model, + max_tokens=contract["max_output_tokens"], + temperature=0, + system=contract["system_prompt"], + messages=[{"role": "user", "content": user_prompt}], + ) + + # Extract text content + response_text = response.content[0].text + + # Parse JSON + try: + parsed = json.loads(response_text) + except json.JSONDecodeError as e: + # Try to extract JSON from response if wrapped in markdown + cleaned = _extract_json(response_text) + if cleaned: + try: + parsed = json.loads(cleaned) + except json.JSONDecodeError: + raise ExtractionError(f"AI response is not valid JSON: {e}") from e + else: + raise ExtractionError(f"AI response is not valid JSON: {e}") from e + + # Extract scenes array + if isinstance(parsed, dict) and "scenes" in parsed: + scenes = parsed["scenes"] + elif isinstance(parsed, list): + scenes = parsed + else: + raise ExtractionError(f"Unexpected response structure: expected dict with 'scenes' key or list, got {type(parsed)}") + + if not isinstance(scenes, list): + raise ExtractionError(f"'scenes' is not a list: {type(scenes)}") + + token_usage = { + "input": response.usage.input_tokens, + "output": response.usage.output_tokens, + } + + return ExtractionResult(raw_scenes=scenes, token_usage=token_usage) + + +def _elements_to_text(script: NormalizedScript) -> str: + """Convert NormalizedScript back to readable text for the AI prompt.""" + lines = [] + for elem in script.elements: + if elem.type == "scene_heading": + lines.append("") + lines.append(elem.text) + lines.append("") + elif elem.type == "character": + lines.append("") + lines.append(f" {elem.text}") + elif elem.type == "dialogue": + lines.append(f" {elem.text}") + elif elem.type == "parenthetical": + lines.append(f" {elem.text}") + elif elem.type == "transition": + lines.append("") + lines.append(f" {elem.text}") + lines.append("") + else: # action + lines.append(elem.text) + return "\n".join(lines) + + +def _extract_json(text: str) -> str | None: + """Try to extract JSON from text that may be wrapped in markdown code blocks.""" + if "```json" in text: + start = text.index("```json") + 7 + end = text.index("```", start) + return text[start:end].strip() + if "```" in text: + start = text.index("```") + 3 + end = text.index("```", start) + return text[start:end].strip() + return None diff --git a/src/validators/__init__.py b/src/validators/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/validators/scene_validator.py b/src/validators/scene_validator.py new file mode 100644 index 0000000..8ccda17 --- /dev/null +++ b/src/validators/scene_validator.py @@ -0,0 +1,92 @@ +"""Scene-specific validation — post-schema semantic checks for Layer 2 output.""" + +from dataclasses import dataclass +from typing import Literal +from difflib import SequenceMatcher +from src.schemas.scene_array import SceneArray + + +@dataclass +class ValidationWarning: + type: Literal[ + "duplicate_scene_number", + "similar_character_names", + "scene_count_deviation", + "empty_characters", + "unknown_values", + ] + scene_number: int | None + message: str + + +def validate_scenes(scene_array: SceneArray, heading_count: int) -> list[ValidationWarning]: + """Run semantic validation checks on extracted scenes. + + Args: + scene_array: Validated SceneArray from Layer 2. + heading_count: Number of scene_heading elements from Layer 1 output. + + Returns: + List of warnings. Empty list means all checks passed. + """ + warnings: list[ValidationWarning] = [] + + # 1. Duplicate scene numbers (already caught by SceneArray validator, but double-check) + seen_numbers: dict[int, int] = {} + for scene in scene_array.scenes: + if scene.scene_number in seen_numbers: + warnings.append(ValidationWarning( + type="duplicate_scene_number", + scene_number=scene.scene_number, + message=f"Scene number {scene.scene_number} appears more than once", + )) + seen_numbers[scene.scene_number] = seen_numbers.get(scene.scene_number, 0) + 1 + + # 2. Similar character names (possible duplicates) + all_characters: set[str] = set() + for scene in scene_array.scenes: + all_characters.update(scene.characters_present) + + char_list = sorted(all_characters) + for i, name_a in enumerate(char_list): + for name_b in char_list[i + 1:]: + ratio = SequenceMatcher(None, name_a.upper(), name_b.upper()).ratio() + if ratio > 0.8 and name_a != name_b: + warnings.append(ValidationWarning( + type="similar_character_names", + scene_number=None, + message=f"Possible duplicate characters: '{name_a}' and '{name_b}' (similarity: {ratio:.0%})", + )) + + # 3. Scene count deviation + extracted_count = len(scene_array.scenes) + if heading_count > 0: + deviation = abs(extracted_count - heading_count) / heading_count + if deviation > 0.20: + warnings.append(ValidationWarning( + type="scene_count_deviation", + scene_number=None, + message=f"Extracted {extracted_count} scenes but Layer 1 found {heading_count} scene headings (deviation: {deviation:.0%})", + )) + + # 4. Empty characters_present + for scene in scene_array.scenes: + if not scene.characters_present: + warnings.append(ValidationWarning( + type="empty_characters", + scene_number=scene.scene_number, + message=f"Scene {scene.scene_number} has no characters listed", + )) + + # 5. UNKNOWN values + for scene in scene_array.scenes: + scene_dict = scene.model_dump() + for key, value in scene_dict.items(): + if value == "UNKNOWN" or (isinstance(value, list) and "UNKNOWN" in value): + warnings.append(ValidationWarning( + type="unknown_values", + scene_number=scene.scene_number, + message=f"Scene {scene.scene_number} field '{key}' contains UNKNOWN", + )) + + return warnings diff --git a/src/validators/schema_validator.py b/src/validators/schema_validator.py new file mode 100644 index 0000000..28642b7 --- /dev/null +++ b/src/validators/schema_validator.py @@ -0,0 +1,48 @@ +"""Generic schema validator — validates raw dicts against Pydantic models.""" + +from dataclasses import dataclass, field +from typing import Literal, Optional, Any +from pydantic import BaseModel, ValidationError + + +@dataclass +class ValidationResult: + status: Literal["valid", "failed", "flagged"] + errors: list[str] = field(default_factory=list) + data: Optional[Any] = None # populated with the Pydantic model instance if valid/flagged + + +def validate(raw: dict, schema_class: type[BaseModel]) -> ValidationResult: + """Validate a raw dict against a Pydantic model class. + + Returns: + ValidationResult with status: + - "valid": all fields present, correct types, no UNKNOWN values + - "flagged": structurally valid but contains UNKNOWN values + - "failed": schema validation error (missing fields, wrong types) + """ + try: + instance = schema_class.model_validate(raw) + except ValidationError as e: + error_messages = [] + for err in e.errors(): + loc = " -> ".join(str(x) for x in err["loc"]) + error_messages.append(f"{loc}: {err['msg']}") + return ValidationResult(status="failed", errors=error_messages, data=None) + + # Check for UNKNOWN values — structurally valid but semantically incomplete + if _contains_unknown(raw): + return ValidationResult(status="flagged", errors=["Contains UNKNOWN values"], data=instance) + + return ValidationResult(status="valid", errors=[], data=instance) + + +def _contains_unknown(obj: Any) -> bool: + """Recursively check if any value in the structure is the string 'UNKNOWN'.""" + if isinstance(obj, str): + return obj == "UNKNOWN" + if isinstance(obj, dict): + return any(_contains_unknown(v) for v in obj.values()) + if isinstance(obj, list): + return any(_contains_unknown(item) for item in obj) + return False diff --git a/test_scripts/action_heavy.fountain b/test_scripts/action_heavy.fountain new file mode 100644 index 0000000..6570804 --- /dev/null +++ b/test_scripts/action_heavy.fountain @@ -0,0 +1,238 @@ +Title: The Extraction +Credit: written by +Author: Test Script +Draft date: 2026-04-06 + +==== + +EXT. ROOFTOP - MEXICO CITY - NIGHT + +A sprawling cityscape glitters below. JACK REEVES (40s, ex-military, scar across his jaw) crouches behind an air conditioning unit, binoculars pressed to his eyes. + +Through the binoculars: a fortified compound three blocks away. Armed guards patrol the perimeter. + +Jack touches his earpiece. + +JACK +I count twelve on the outside. How many inside? + +DISPATCH (V.O.) +Thermal says another eight. Plus the package. + +JACK +Twenty hostiles. One extraction target. Lovely. + +He slings a tactical bag over his shoulder and moves to the roof's edge. + +EXT. ALLEY - MEXICO CITY - NIGHT + +Jack drops from a fire escape, landing silently. He presses against the wall as a patrol vehicle passes. + +He checks his watch: 11:47 PM. + +He moves through the shadows toward the compound. + +EXT. COMPOUND PERIMETER - NIGHT + +Jack reaches a drainage grate at the compound's east wall. He pulls a small cutting tool from his bag and works the bolts. + +A guard's flashlight sweeps nearby. Jack freezes. The light passes. + +He removes the grate and slides into the drain. + +INT. DRAINAGE TUNNEL - NIGHT + +Cramped, wet, dark. Jack crawls through the tunnel using a red-filtered headlamp. Rats scatter ahead of him. + +He reaches a junction and checks a hand-drawn map. + +JACK +(whispering) +Left fork, thirty meters, then up. + +He crawls left. + +INT. COMPOUND - BASEMENT - NIGHT + +Jack pushes up through a floor grate into a dimly lit basement. Stacked crates everywhere. He draws his suppressed pistol and listens. + +Footsteps above. Two sets, moving away. + +He crosses the basement to a steel door. Locked. He pulls a bypass kit from his bag and works the lock. It clicks open in twelve seconds. + +INT. COMPOUND - GROUND FLOOR CORRIDOR - NIGHT + +Jack moves down a concrete corridor, back to the wall. Security camera ahead — he pulls a small device from his pocket and aims it. The camera's red light blinks off. + +He rounds a corner and comes face to face with GUARD #1. + +A brutal, silent fight. Jack disarms the guard, puts him in a chokehold. The guard goes limp. Jack drags him into a side room. + +INT. COMPOUND - STAIRWELL - NIGHT + +Jack climbs the stairs, pistol leading. He pauses at the second floor landing — voices in Spanish behind the door. + +He waits. The voices move away. He opens the door slowly. + +INT. COMPOUND - SECOND FLOOR - NIGHT + +A hallway lined with doors. At the far end, GUARD #2 and GUARD #3 stand outside a reinforced door. + +Jack checks his options. A ventilation shaft above him. He holsters the pistol, jumps, grabs the edge, and pulls himself up. + +INT. VENTILATION SHAFT - NIGHT + +Jack crawls above the hallway. Through a vent below, he can see the two guards. + +He pulls two smoke canisters from his bag. Drops them through the vent. + +The hallway fills with white smoke. Shouts from the guards. Jack drops through the vent, lands between them. + +Two precise strikes. Both guards are down in four seconds. + +INT. COMPOUND - HOLDING ROOM - NIGHT + +Jack kicks through the reinforced door. Inside: a bare room with a single chair. DR. SOFIA NAVARRO (30s, bruised, defiant) is handcuffed to it. + +SOFIA +About time. + +Jack cuts the cuffs with bolt cutters. + +JACK +Can you run? + +SOFIA +Can you get us out of here? + +An alarm blares. Red lights flood the corridor outside. + +JACK +That answers that. Move. + +He hands her a spare earpiece and they exit into the hallway. + +INT. COMPOUND - SECOND FLOOR - NIGHT + +They run. Jack fires twice behind them as guards pour from a doorway. Both shots hit. + +JACK +Stairs — go left! + +They hit the stairwell. Gunfire chases them, sparking off the metal railing. + +EXT. COMPOUND - COURTYARD - NIGHT + +They burst through a ground floor exit into an open courtyard. Floodlights snap on. Guards converge from two sides. + +Jack pulls a flashbang from his vest. Throws it. BANG — blinding white light. + +In the chaos, Jack grabs Sofia and they sprint for the east wall. + +JACK +Over the wall. I'll boost you. + +He cups his hands. Sofia steps up and scrambles over. Jack follows, bullets pocking the concrete around him. + +EXT. STREET - MEXICO CITY - NIGHT + +They drop onto a narrow street. A black SUV screeches around the corner — RAMOS (30s, driver, cool under fire) behind the wheel. + +RAMOS +Get in! + +They dive into the back. Ramos floors it. + +INT. SUV - MOVING - NIGHT + +Jack reloads. Sofia catches her breath. + +SOFIA +Who sent you? + +JACK +People who want you alive. + +SOFIA +That narrows it down to almost nobody. + +Rear window SHATTERS — pursuit vehicle behind them. + +RAMOS +Hang on! + +Ramos cuts hard left through a market street. Stalls explode as the SUV plows through. The pursuit vehicle follows. + +Jack leans out the window and fires at the pursuit vehicle's tires. Third shot connects — the vehicle swerves and crashes into a storefront. + +EXT. HIGHWAY - NIGHT + +The SUV merges onto a highway. City lights fall behind them. + +Silence in the car. Ramos checks the mirror — no pursuit. + +RAMOS +Airstrip is forty minutes out. + +Jack nods. He looks at Sofia. + +JACK +You're going to have to tell them everything. The lab, the formula, all of it. + +SOFIA +I know. + +JACK +They won't be happy about what you found. + +SOFIA +That's why they tried to kill me. + +EXT. DESERT AIRSTRIP - NIGHT + +A small prop plane waits on a dirt runway, engines running. The SUV skids to a stop beside it. + +Jack, Sofia, and Ramos exit. PILOT waves them toward the plane. + +JACK +(to Ramos) +Burn the car. + +RAMOS +Already planned on it. + +Jack and Sofia board the plane. + +INT. PROP PLANE - NIGHT + +They settle into seats. The plane taxis and lifts off. Through the window, they watch the SUV ignite below — Ramos standing clear, lighter in hand. + +SOFIA +Where are we going? + +JACK +Somewhere they can't reach you. + +SOFIA +Does that place exist? + +Jack doesn't answer. The plane climbs into darkness. + +EXT. COMPOUND - NIGHT + +The compound is in chaos. Guards shout, lights sweep the surrounding blocks. COLONEL VEGA (50s, cold, scarred) stands in the holding room, staring at the empty chair and cut handcuffs. + +An AIDE approaches. + +AIDE +They escaped east. Vehicle lost them on the highway. + +VEGA +Find the pilot's name. Find the airstrip. Find the plane. + +He picks up the cut handcuffs. + +VEGA +And find whoever sent the soldier. + +FADE OUT. diff --git a/test_scripts/dialogue_heavy.fountain b/test_scripts/dialogue_heavy.fountain new file mode 100644 index 0000000..6753b56 --- /dev/null +++ b/test_scripts/dialogue_heavy.fountain @@ -0,0 +1,267 @@ +Title: The Weight of Words +Credit: written by +Author: Test Script +Draft date: 2026-04-06 + +==== + +INT. THERAPIST'S OFFICE - DAY + +A small, warm room. Bookshelves line the walls. DR. ELENA VOSS (50s, calm, measured) sits across from MARCUS CHEN (30s, restless, avoiding eye contact). + +DR. VOSS +How have you been sleeping? + +MARCUS +(shifting in his seat) +Fine. Same as always. + +DR. VOSS +Marcus, we've talked about this. "Fine" isn't — + +MARCUS +It's not a feeling, I know. You've said that. + +A long beat. Elena writes something in her notebook. + +DR. VOSS +Your sister called me. + +MARCUS +She had no right to do that. + +DR. VOSS +She's worried about you. She said you haven't left the apartment in two weeks. + +MARCUS +That's an exaggeration. + +DR. VOSS +Is it? + +Marcus finally looks at her. His eyes are red-rimmed. + +INT. MARCUS'S APARTMENT - NIGHT + +A cluttered studio apartment. Takeout containers on every surface. Marcus sits on the floor against the wall, phone in hand. He stares at a text from LILY CHEN: "Please call me back." + +He sets the phone face-down. + +INT. COFFEE SHOP - MORNING + +Bright, busy. LILY CHEN (late 20s, sharp, put-together but tired) waits at a corner table. Marcus enters, looking like he hasn't slept. + +LILY +You look terrible. + +MARCUS +Thanks. Love you too. + +He sits. An awkward silence. + +LILY +Mom's been asking about you. + +MARCUS +Tell her I'm fine. + +LILY +I'm not going to lie to her, Marcus. + +MARCUS +Then don't tell her anything. + +LILY +That's the same thing and you know it. + +MARCUS +(quiet) +How is she? + +LILY +She's scared. Dad's getting worse and you won't even visit. + +Marcus stares at his coffee. + +INT. HOSPITAL ROOM - AFTERNOON + +HENRY CHEN (60s) lies in a hospital bed, frail but alert. Marcus stands in the doorway, unable to step inside. + +HENRY +Are you going to stand there all day or come in? + +Marcus enters slowly. He sits in the chair beside the bed. + +HENRY +Your sister tells me you've been hiding. + +MARCUS +I haven't been hiding. + +HENRY +(smiling weakly) +You're a terrible liar. Always were. + +MARCUS +Dad — + +HENRY +I'm dying, Marcus. Not dead yet. You can still talk to me. + +Marcus's composure cracks. He puts his head in his hands. + +INT. THERAPIST'S OFFICE - DAY + +Back with Dr. Voss. Marcus is more open now, leaning forward. + +MARCUS +He just said it. Like it was nothing. "I'm dying." + +DR. VOSS +How did that make you feel? + +MARCUS +Like I've been wasting time. Like every day I spent not going to see him was — + +He stops. + +DR. VOSS +Was what? + +MARCUS +Selfish. It was selfish. + +DR. VOSS +You were protecting yourself. That's not the same as selfishness. + +MARCUS +Isn't it? + +INT. MARCUS'S APARTMENT - NIGHT + +Marcus packs a small bag. He picks up a framed photo from the shelf — a young Marcus and Henry at a baseball game, both laughing. + +He puts the photo in the bag. + +INT. HOSPITAL ROOM - NIGHT + +Marcus enters with the bag. Henry is asleep. NURSE PATRICIA (40s, gentle) is checking his IV. + +NURSE PATRICIA +He's been asking for you. + +MARCUS +I'm here now. + +He sits in the chair and takes his father's hand. Henry stirs. + +HENRY +(half asleep) +Marcus? + +MARCUS +I'm here, Dad. I'm not going anywhere. + +Henry squeezes his hand weakly and drifts back to sleep. Marcus settles in, pulling his jacket around himself like a blanket. + +INT. HOSPITAL CAFETERIA - MORNING + +Marcus and Lily sit across from each other, both holding bad coffee. + +LILY +You stayed all night? + +MARCUS +Yeah. + +LILY +(softening) +Thank you. + +MARCUS +Don't thank me. I should have been here weeks ago. + +LILY +You're here now. That's what matters. + +A beat. + +MARCUS +I'm going to take a leave from work. Stay until... however long. + +LILY +(eyes welling up) +Okay. + +MARCUS +I called Dr. Voss. She's going to do phone sessions. + +LILY +That's good. That's really good, Marcus. + +They sit in silence, something unspoken settling between them. + +INT. HOSPITAL ROOM - AFTERNOON + +Marcus reads aloud from a book. Henry listens, eyes closed but smiling. + +HENRY +You always did have a good voice for reading. + +MARCUS +Mom used to say I should have been an actor. + +HENRY +Your mother was right about most things. + +MARCUS +She was right about you too. + +HENRY +(opening his eyes) +What did she say? + +MARCUS +That you were the most stubborn man alive. + +HENRY +(laughing, then coughing) +Was. Was the most stubborn. + +They share a look — sad, warm, real. + +INT. HOSPITAL HALLWAY - EVENING + +Marcus leans against the wall outside Henry's room, on the phone. + +MARCUS +(into phone) +I know I missed the deadline. I understand... No, I'm not asking for sympathy. I'm asking for two weeks... Thank you. I appreciate it. + +He hangs up. Takes a breath. Goes back inside. + +INT. THERAPIST'S OFFICE - DAY (PHONE SESSION) + +Marcus sits in the hospital cafeteria, phone to his ear. + +DR. VOSS (V.O.) +How does it feel to be there? + +MARCUS +Terrifying. And also... right. Like I'm finally where I'm supposed to be. + +DR. VOSS (V.O.) +That's a significant shift, Marcus. + +MARCUS +I know. I just wish it hadn't taken this long. + +DR. VOSS (V.O.) +The important thing isn't when you arrived. It's that you stayed. + +Marcus looks through the cafeteria window toward the hallway leading to his father's room. + +MARCUS +Yeah. I'm staying. + +FADE OUT. diff --git a/test_scripts/expected/action_heavy_scenes.json b/test_scripts/expected/action_heavy_scenes.json new file mode 100644 index 0000000..160bdeb --- /dev/null +++ b/test_scripts/expected/action_heavy_scenes.json @@ -0,0 +1,7 @@ +{ + "expected_scene_count": 18, + "expected_characters": ["JACK", "SOFIA", "RAMOS", "VEGA"], + "expected_locations": ["ROOFTOP", "ALLEY", "COMPOUND", "DRAINAGE TUNNEL", "STAIRWELL", "SUV", "HIGHWAY", "AIRSTRIP", "PROP PLANE"], + "must_not_contain_characters": [], + "must_not_contain_locations": [] +} diff --git a/test_scripts/expected/dialogue_heavy_scenes.json b/test_scripts/expected/dialogue_heavy_scenes.json new file mode 100644 index 0000000..b9eeb80 --- /dev/null +++ b/test_scripts/expected/dialogue_heavy_scenes.json @@ -0,0 +1,7 @@ +{ + "expected_scene_count": 13, + "expected_characters": ["MARCUS", "DR. VOSS", "LILY", "HENRY", "NURSE PATRICIA"], + "expected_locations": ["THERAPIST'S OFFICE", "APARTMENT", "COFFEE SHOP", "HOSPITAL ROOM", "HOSPITAL CAFETERIA", "HOSPITAL HALLWAY"], + "must_not_contain_characters": [], + "must_not_contain_locations": [] +} diff --git a/test_scripts/expected/nonstandard_scenes.json b/test_scripts/expected/nonstandard_scenes.json new file mode 100644 index 0000000..5131f6a --- /dev/null +++ b/test_scripts/expected/nonstandard_scenes.json @@ -0,0 +1,7 @@ +{ + "expected_scene_count": 11, + "expected_characters": ["NADIA", "DEREK", "TOMMY", "MRS. PETROV", "MOTHER"], + "expected_locations": ["APARTMENT", "GROCERY STORE", "PARKING LOT", "BUS STOP", "BUS", "GAS STATION", "FAMILY HOUSE", "MOTEL ROOM"], + "must_not_contain_characters": [], + "must_not_contain_locations": [] +} diff --git a/test_scripts/nonstandard.fountain b/test_scripts/nonstandard.fountain new file mode 100644 index 0000000..2494803 --- /dev/null +++ b/test_scripts/nonstandard.fountain @@ -0,0 +1,186 @@ +Title: Loose Threads +Author: Test Script + +==== + +apartment - morning + +NADIA wakes up on the couch. Still in yesterday's clothes. Her phone buzzes on the coffee table — 14 missed calls. + +She ignores it and walks to the kitchen. + +NADIA +(to herself) +Not today. + +She opens the fridge. Empty except for a bottle of hot sauce. + +INT GROCERY STORE DAY + +Nadia pushes a cart through fluorescent aisles. She looks half-asleep. Her phone rings again. She silences it. + +at the checkout, TOMMY (20s, works here, knows everybody) scans her items. + +TOMMY +You look rough. + +NADIA +Didn't sleep. + +TOMMY +Your brother was in here earlier looking for you. + +Nadia freezes. + +NADIA +What did you tell him? + +TOMMY +Nothing. I don't get involved. + +EXT. PARKING LOT - DAY + +Nadia loads groceries into her car. A beat-up sedan is parked three spaces away. DEREK (30s, her brother, intense) leans against it. + +DEREK +We need to talk. + +NADIA +No we don't. + +DEREK +It's about the house. + +NADIA +I said no. + +She gets in her car and drives away. Derek watches her go. + +nadia's apartment - night + +Nadia sits at her kitchen table eating cereal. A KNOCK at the door. She doesn't move. Another KNOCK. + +MRS. PETROV (O.S.) +Nadia, I know you're in there. I can hear the television. + +Nadia mutes the TV and stays quiet. + +MRS. PETROV (O.S.) +Your rent is late again, dear. + +NADIA +(calling out) +I'll have it Friday! + +MRS. PETROV (O.S.) +That's what you said last Friday. + +Silence. Footsteps recede. + +EXT. BUS STOP - EARLY MORNING + +Nadia waits at a bus stop, duffel bag at her feet. The sun is barely up. Nobody else around. + +Her phone rings. She looks at the screen — "DEREK." She answers. + +NADIA +What. + +DEREK (V.O.) +The lawyer says we have to both sign or we lose everything. + +NADIA +Maybe we should lose everything. + +DEREK (V.O.) +You don't mean that. + +NADIA +Don't tell me what I mean. + +She hangs up. The bus arrives. She boards. + +INT - BUS - MORNING + +Nadia sits in the back. PASSENGER (elderly woman) across the aisle knits something blue. + +PASSENGER +Going far? + +NADIA +Hopefully. + +The bus pulls away. + +EXT GAS STATION - AFTERNOON + +The bus has stopped. Passengers mill around. Nadia buys a sandwich from a vending machine and sits on the curb. + +Her phone has 3 new texts from Derek. She reads them: + +"Please just call me" +"This isn't about us this is about mom's house" +"Nadia" + +She types: "I need a few days." Sends it. + +.FLASHBACK - INT. FAMILY HOUSE - KITCHEN - YEARS AGO + +Young NADIA (12) and young DEREK (16) sit at the kitchen table. Their MOTHER sets plates in front of them. + +MOTHER +Eat your dinner before it gets cold. + +YOUNG DEREK +Mom, can I go to Tyler's after? + +MOTHER +After you clean your room. + +YOUNG NADIA +His room is disgusting. + +YOUNG DEREK +Shut up. + +MOTHER +Both of you. Eat. + +A normal evening. Nothing special. Everything special. + +INT. MOTEL ROOM - NIGHT + +Cheap. Clean enough. Nadia drops her duffel on the bed and lies down without undressing. + +She stares at the ceiling for a long time. + +Then she calls Derek. + +DEREK (V.O.) +Nadia? + +NADIA +Tell the lawyer I'll sign. + +DEREK (V.O.) +Really? + +NADIA +But I want to go through the house first. Before they clear it. + +DEREK (V.O.) +...okay. Yeah. We can do that. + +NADIA +I'll be back Thursday. + +DEREK (V.O.) +Okay. Drive safe. Or... bus safe. Whatever. + +NADIA +(almost smiling) +Goodnight, Derek. + +She hangs up. Closes her eyes. + +> FADE OUT.