profit 87d0af0748 Phase 1 implementation: script ingestion + AI extraction pipeline
Complete working pipeline from Fountain script to validated scene JSON:
- Schemas (Pydantic): all 7 layers defined upfront
- Fountain parser + normalizer (Layer 1)
- AI scene extractor with prompt contracts (Layer 2)
- Schema validator + scene-specific semantic validator
- Structured JSON logging per layer/scene execution
- Versioned output writer (never overwrites)
- Retry engine with 4-level failure escalation
- Stop condition evaluator (per-unit + global halts)
- Diff/drift detector for re-run comparison
- CLI entry point with --dry-run, --scene, --test, --force
- 3 test scripts (dialogue-heavy, action-heavy, nonstandard)
- Expected output files for regression testing

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-06 15:49:43 -07:00

212 lines
8.7 KiB
Python

"""Pipeline runner — orchestrates Layer 1 → Layer 2 end-to-end."""
import hashlib
import json
import os
from dataclasses import dataclass, field
from src.ingestion.fountain_parser import parse_fountain
from src.ingestion.normalizer import normalize, NormalizationError
from src.understanding.extractor import extract_scenes, ExtractionError
from src.understanding.chunker import chunk_script
from src.schemas.scene import Scene
from src.schemas.scene_array import SceneArray
from src.validators.schema_validator import validate, ValidationResult
from src.validators.scene_validator import validate_scenes, ValidationWarning
from src.logging.layer_logger import LayerLogger
from src.execution.output_writer import OutputWriter
from src.execution.retry import execute_with_retry, FailureRecord
from src.execution.stop_conditions import evaluate_stop
from src.execution.diff_detector import diff_outputs
@dataclass
class PipelineResult:
success: bool
total_scenes: int = 0
valid_scenes: int = 0
flagged_scenes: int = 0
failed_scenes: int = 0
warnings: list[ValidationWarning] = field(default_factory=list)
stop_reason: str | None = None
def run_phase1(
script_path: str,
project_name: str,
api_key: str,
model: str = "claude-sonnet-4-20250514",
output_dir: str = "output",
scene_filter: int | None = None,
dry_run: bool = False,
force: bool = False,
) -> PipelineResult:
"""Run the Phase 1 pipeline: Layer 1 (ingestion) → Layer 2 (understanding).
Args:
script_path: Path to .fountain file.
project_name: Project name for output directory.
api_key: Anthropic API key.
model: Model ID.
output_dir: Base output directory.
scene_filter: If set, only process this scene number in Layer 2.
dry_run: If True, validate inputs only — no AI calls.
force: If True, ignore cache and re-run even if input unchanged.
Returns:
PipelineResult with counts and any stop reason.
"""
logger = LayerLogger(project_name, output_dir)
writer = OutputWriter(project_name, output_dir)
# Resolve prompt contract path
prompts_dir = os.path.join(os.path.dirname(os.path.dirname(os.path.dirname(__file__))), "src", "prompts")
contract_path = os.path.join(prompts_dir, "L2_scene_extraction_v1.json")
# ── LAYER 1: INGESTION ──────────────────────────────────────────────
print(f"[L1] Parsing: {script_path}")
run_id = logger.start("L1")
# Read file
with open(script_path, "r", encoding="utf-8") as f:
raw_text = f.read()
input_hash = f"sha256:{hashlib.sha256(raw_text.encode()).hexdigest()}"
# Parse
try:
elements = parse_fountain(raw_text)
normalized = normalize(elements)
except (NormalizationError, Exception) as e:
logger.finish(run_id, input_hash, None, "failed", failure_state=str(e))
print(f"[L1] STOP: {e}")
return PipelineResult(success=False, stop_reason=f"Layer 1 failed: {e}")
# Write L1 output
l1_result = writer.write("L1", None, normalized)
logger.finish(run_id, input_hash, l1_result["hash"], "valid")
heading_count = sum(1 for e in normalized.elements if e.type == "scene_heading")
print(f"[L1] Done: {len(normalized.elements)} elements, {heading_count} scene headings")
if dry_run:
print(f"[DRY RUN] Layer 1 valid. {heading_count} scenes would be extracted.")
return PipelineResult(success=True, total_scenes=heading_count)
# ── LAYER 2: UNDERSTANDING ──────────────────────────────────────────
print(f"[L2] Extracting scenes...")
# Chunk if needed
chunks = chunk_script(normalized)
print(f"[L2] Processing in {len(chunks)} chunk(s)")
all_results: list[ValidationResult | FailureRecord] = []
all_valid_scenes: list[Scene] = []
all_warnings: list[ValidationWarning] = []
total_token_usage = {"input": 0, "output": 0}
for chunk_idx, chunk in enumerate(chunks):
# Extract scenes from this chunk
def do_extract(data):
return extract_scenes(data, contract_path, api_key, model)
run_id = logger.start("L2", scene_id=chunk_idx)
chunk_input_hash = f"sha256:{hashlib.sha256(json.dumps([e.model_dump() for e in chunk.elements]).encode()).hexdigest()}"
extraction = execute_with_retry(
fn=do_extract,
input_data=chunk,
layer_id="L2",
scene_id=chunk_idx,
)
if isinstance(extraction, FailureRecord):
logger.finish(run_id, chunk_input_hash, None, "failed",
failure_state=extraction.error, retry_count=len(extraction.attempts))
all_results.append(extraction)
print(f"[L2] Chunk {chunk_idx + 1}: FAILED after {len(extraction.attempts)} attempts")
continue
# Validate each scene
total_token_usage["input"] += extraction.token_usage["input"]
total_token_usage["output"] += extraction.token_usage["output"]
for raw_scene in extraction.raw_scenes:
scene_num = raw_scene.get("scene_number", "?")
# Skip if scene_filter is set and doesn't match
if scene_filter is not None and scene_num != scene_filter:
continue
scene_run_id = logger.start("L2", scene_id=scene_num if isinstance(scene_num, int) else None)
scene_input_hash = f"sha256:{hashlib.sha256(json.dumps(raw_scene).encode()).hexdigest()}"
result = validate(raw_scene, Scene)
all_results.append(result)
if result.status == "failed":
logger.finish(scene_run_id, scene_input_hash, None, "failed",
failure_state="; ".join(result.errors))
print(f"[L2] Scene {scene_num}: FAILED validation — {result.errors}")
else:
# Write scene output
scene_out = writer.write_raw("L2", scene_num if isinstance(scene_num, int) else 0, raw_scene)
# Check for drift against previous version
if isinstance(scene_num, int):
prev = writer.read_version("L2", scene_num, scene_out["version"] - 1)
if prev:
diff = diff_outputs(prev, raw_scene)
if diff.changed:
print(f"[L2] Scene {scene_num}: DRIFT detected — {diff.drift_categories}")
logger.finish(scene_run_id, scene_input_hash, scene_out["hash"], result.status,
token_usage=extraction.token_usage)
if result.status == "flagged":
print(f"[L2] Scene {scene_num}: FLAGGED (contains UNKNOWN values)")
else:
print(f"[L2] Scene {scene_num}: valid")
all_valid_scenes.append(result.data)
logger.finish(run_id, chunk_input_hash, "chunk", "valid",
token_usage=extraction.token_usage)
# Run scene-level semantic validation
if all_valid_scenes:
scene_array = SceneArray(scenes=all_valid_scenes)
all_warnings = validate_scenes(scene_array, heading_count)
for w in all_warnings:
print(f"[L2] WARNING: {w.message}")
# Evaluate stop conditions
stop = evaluate_stop(all_results, heading_count)
if stop.should_stop:
print(f"[L2] STOP CONDITION: {stop.reason}")
return PipelineResult(
success=False,
total_scenes=heading_count,
valid_scenes=sum(1 for r in all_results if isinstance(r, ValidationResult) and r.status == "valid"),
flagged_scenes=sum(1 for r in all_results if isinstance(r, ValidationResult) and r.status == "flagged"),
failed_scenes=sum(1 for r in all_results if isinstance(r, FailureRecord) or (isinstance(r, ValidationResult) and r.status == "failed")),
warnings=all_warnings,
stop_reason=stop.reason,
)
valid = sum(1 for r in all_results if isinstance(r, ValidationResult) and r.status == "valid")
flagged = sum(1 for r in all_results if isinstance(r, ValidationResult) and r.status == "flagged")
failed = sum(1 for r in all_results if isinstance(r, FailureRecord) or (isinstance(r, ValidationResult) and r.status == "failed"))
print(f"\n[DONE] Scenes: {valid} valid, {flagged} flagged, {failed} failed")
print(f"[DONE] Tokens: {total_token_usage['input']} in / {total_token_usage['output']} out")
return PipelineResult(
success=True,
total_scenes=heading_count,
valid_scenes=valid,
flagged_scenes=flagged,
failed_scenes=failed,
warnings=all_warnings,
)