Complete working pipeline from Fountain script to validated scene JSON: - Schemas (Pydantic): all 7 layers defined upfront - Fountain parser + normalizer (Layer 1) - AI scene extractor with prompt contracts (Layer 2) - Schema validator + scene-specific semantic validator - Structured JSON logging per layer/scene execution - Versioned output writer (never overwrites) - Retry engine with 4-level failure escalation - Stop condition evaluator (per-unit + global halts) - Diff/drift detector for re-run comparison - CLI entry point with --dry-run, --scene, --test, --force - 3 test scripts (dialogue-heavy, action-heavy, nonstandard) - Expected output files for regression testing Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
212 lines
8.7 KiB
Python
212 lines
8.7 KiB
Python
"""Pipeline runner — orchestrates Layer 1 → Layer 2 end-to-end."""
|
|
|
|
import hashlib
|
|
import json
|
|
import os
|
|
from dataclasses import dataclass, field
|
|
|
|
from src.ingestion.fountain_parser import parse_fountain
|
|
from src.ingestion.normalizer import normalize, NormalizationError
|
|
from src.understanding.extractor import extract_scenes, ExtractionError
|
|
from src.understanding.chunker import chunk_script
|
|
from src.schemas.scene import Scene
|
|
from src.schemas.scene_array import SceneArray
|
|
from src.validators.schema_validator import validate, ValidationResult
|
|
from src.validators.scene_validator import validate_scenes, ValidationWarning
|
|
from src.logging.layer_logger import LayerLogger
|
|
from src.execution.output_writer import OutputWriter
|
|
from src.execution.retry import execute_with_retry, FailureRecord
|
|
from src.execution.stop_conditions import evaluate_stop
|
|
from src.execution.diff_detector import diff_outputs
|
|
|
|
|
|
@dataclass
|
|
class PipelineResult:
|
|
success: bool
|
|
total_scenes: int = 0
|
|
valid_scenes: int = 0
|
|
flagged_scenes: int = 0
|
|
failed_scenes: int = 0
|
|
warnings: list[ValidationWarning] = field(default_factory=list)
|
|
stop_reason: str | None = None
|
|
|
|
|
|
def run_phase1(
|
|
script_path: str,
|
|
project_name: str,
|
|
api_key: str,
|
|
model: str = "claude-sonnet-4-20250514",
|
|
output_dir: str = "output",
|
|
scene_filter: int | None = None,
|
|
dry_run: bool = False,
|
|
force: bool = False,
|
|
) -> PipelineResult:
|
|
"""Run the Phase 1 pipeline: Layer 1 (ingestion) → Layer 2 (understanding).
|
|
|
|
Args:
|
|
script_path: Path to .fountain file.
|
|
project_name: Project name for output directory.
|
|
api_key: Anthropic API key.
|
|
model: Model ID.
|
|
output_dir: Base output directory.
|
|
scene_filter: If set, only process this scene number in Layer 2.
|
|
dry_run: If True, validate inputs only — no AI calls.
|
|
force: If True, ignore cache and re-run even if input unchanged.
|
|
|
|
Returns:
|
|
PipelineResult with counts and any stop reason.
|
|
"""
|
|
logger = LayerLogger(project_name, output_dir)
|
|
writer = OutputWriter(project_name, output_dir)
|
|
|
|
# Resolve prompt contract path
|
|
prompts_dir = os.path.join(os.path.dirname(os.path.dirname(os.path.dirname(__file__))), "src", "prompts")
|
|
contract_path = os.path.join(prompts_dir, "L2_scene_extraction_v1.json")
|
|
|
|
# ── LAYER 1: INGESTION ──────────────────────────────────────────────
|
|
print(f"[L1] Parsing: {script_path}")
|
|
run_id = logger.start("L1")
|
|
|
|
# Read file
|
|
with open(script_path, "r", encoding="utf-8") as f:
|
|
raw_text = f.read()
|
|
|
|
input_hash = f"sha256:{hashlib.sha256(raw_text.encode()).hexdigest()}"
|
|
|
|
# Parse
|
|
try:
|
|
elements = parse_fountain(raw_text)
|
|
normalized = normalize(elements)
|
|
except (NormalizationError, Exception) as e:
|
|
logger.finish(run_id, input_hash, None, "failed", failure_state=str(e))
|
|
print(f"[L1] STOP: {e}")
|
|
return PipelineResult(success=False, stop_reason=f"Layer 1 failed: {e}")
|
|
|
|
# Write L1 output
|
|
l1_result = writer.write("L1", None, normalized)
|
|
logger.finish(run_id, input_hash, l1_result["hash"], "valid")
|
|
|
|
heading_count = sum(1 for e in normalized.elements if e.type == "scene_heading")
|
|
print(f"[L1] Done: {len(normalized.elements)} elements, {heading_count} scene headings")
|
|
|
|
if dry_run:
|
|
print(f"[DRY RUN] Layer 1 valid. {heading_count} scenes would be extracted.")
|
|
return PipelineResult(success=True, total_scenes=heading_count)
|
|
|
|
# ── LAYER 2: UNDERSTANDING ──────────────────────────────────────────
|
|
print(f"[L2] Extracting scenes...")
|
|
|
|
# Chunk if needed
|
|
chunks = chunk_script(normalized)
|
|
print(f"[L2] Processing in {len(chunks)} chunk(s)")
|
|
|
|
all_results: list[ValidationResult | FailureRecord] = []
|
|
all_valid_scenes: list[Scene] = []
|
|
all_warnings: list[ValidationWarning] = []
|
|
total_token_usage = {"input": 0, "output": 0}
|
|
|
|
for chunk_idx, chunk in enumerate(chunks):
|
|
# Extract scenes from this chunk
|
|
def do_extract(data):
|
|
return extract_scenes(data, contract_path, api_key, model)
|
|
|
|
run_id = logger.start("L2", scene_id=chunk_idx)
|
|
chunk_input_hash = f"sha256:{hashlib.sha256(json.dumps([e.model_dump() for e in chunk.elements]).encode()).hexdigest()}"
|
|
|
|
extraction = execute_with_retry(
|
|
fn=do_extract,
|
|
input_data=chunk,
|
|
layer_id="L2",
|
|
scene_id=chunk_idx,
|
|
)
|
|
|
|
if isinstance(extraction, FailureRecord):
|
|
logger.finish(run_id, chunk_input_hash, None, "failed",
|
|
failure_state=extraction.error, retry_count=len(extraction.attempts))
|
|
all_results.append(extraction)
|
|
print(f"[L2] Chunk {chunk_idx + 1}: FAILED after {len(extraction.attempts)} attempts")
|
|
continue
|
|
|
|
# Validate each scene
|
|
total_token_usage["input"] += extraction.token_usage["input"]
|
|
total_token_usage["output"] += extraction.token_usage["output"]
|
|
|
|
for raw_scene in extraction.raw_scenes:
|
|
scene_num = raw_scene.get("scene_number", "?")
|
|
|
|
# Skip if scene_filter is set and doesn't match
|
|
if scene_filter is not None and scene_num != scene_filter:
|
|
continue
|
|
|
|
scene_run_id = logger.start("L2", scene_id=scene_num if isinstance(scene_num, int) else None)
|
|
scene_input_hash = f"sha256:{hashlib.sha256(json.dumps(raw_scene).encode()).hexdigest()}"
|
|
|
|
result = validate(raw_scene, Scene)
|
|
all_results.append(result)
|
|
|
|
if result.status == "failed":
|
|
logger.finish(scene_run_id, scene_input_hash, None, "failed",
|
|
failure_state="; ".join(result.errors))
|
|
print(f"[L2] Scene {scene_num}: FAILED validation — {result.errors}")
|
|
else:
|
|
# Write scene output
|
|
scene_out = writer.write_raw("L2", scene_num if isinstance(scene_num, int) else 0, raw_scene)
|
|
|
|
# Check for drift against previous version
|
|
if isinstance(scene_num, int):
|
|
prev = writer.read_version("L2", scene_num, scene_out["version"] - 1)
|
|
if prev:
|
|
diff = diff_outputs(prev, raw_scene)
|
|
if diff.changed:
|
|
print(f"[L2] Scene {scene_num}: DRIFT detected — {diff.drift_categories}")
|
|
|
|
logger.finish(scene_run_id, scene_input_hash, scene_out["hash"], result.status,
|
|
token_usage=extraction.token_usage)
|
|
|
|
if result.status == "flagged":
|
|
print(f"[L2] Scene {scene_num}: FLAGGED (contains UNKNOWN values)")
|
|
else:
|
|
print(f"[L2] Scene {scene_num}: valid")
|
|
|
|
all_valid_scenes.append(result.data)
|
|
|
|
logger.finish(run_id, chunk_input_hash, "chunk", "valid",
|
|
token_usage=extraction.token_usage)
|
|
|
|
# Run scene-level semantic validation
|
|
if all_valid_scenes:
|
|
scene_array = SceneArray(scenes=all_valid_scenes)
|
|
all_warnings = validate_scenes(scene_array, heading_count)
|
|
for w in all_warnings:
|
|
print(f"[L2] WARNING: {w.message}")
|
|
|
|
# Evaluate stop conditions
|
|
stop = evaluate_stop(all_results, heading_count)
|
|
if stop.should_stop:
|
|
print(f"[L2] STOP CONDITION: {stop.reason}")
|
|
return PipelineResult(
|
|
success=False,
|
|
total_scenes=heading_count,
|
|
valid_scenes=sum(1 for r in all_results if isinstance(r, ValidationResult) and r.status == "valid"),
|
|
flagged_scenes=sum(1 for r in all_results if isinstance(r, ValidationResult) and r.status == "flagged"),
|
|
failed_scenes=sum(1 for r in all_results if isinstance(r, FailureRecord) or (isinstance(r, ValidationResult) and r.status == "failed")),
|
|
warnings=all_warnings,
|
|
stop_reason=stop.reason,
|
|
)
|
|
|
|
valid = sum(1 for r in all_results if isinstance(r, ValidationResult) and r.status == "valid")
|
|
flagged = sum(1 for r in all_results if isinstance(r, ValidationResult) and r.status == "flagged")
|
|
failed = sum(1 for r in all_results if isinstance(r, FailureRecord) or (isinstance(r, ValidationResult) and r.status == "failed"))
|
|
|
|
print(f"\n[DONE] Scenes: {valid} valid, {flagged} flagged, {failed} failed")
|
|
print(f"[DONE] Tokens: {total_token_usage['input']} in / {total_token_usage['output']} out")
|
|
|
|
return PipelineResult(
|
|
success=True,
|
|
total_scenes=heading_count,
|
|
valid_scenes=valid,
|
|
flagged_scenes=flagged,
|
|
failed_scenes=failed,
|
|
warnings=all_warnings,
|
|
)
|