Session infrastructure: OpenRouter + tree-split reducer + observer→LLM Team + scrum_applier #11

Merged
profit merged 118 commits from scrum/auto-apply-19814 into main 2026-04-27 15:55:24 +00:00
Showing only changes of commit 940737daa7 - Show all commits

View File

@ -0,0 +1,65 @@
#!/usr/bin/env python3
"""
fixup_phone_type.py Decision D from the synthetic-data gap report.
Converts workers_500k.parquet `phone` column from int64 string. Phones
in this dataset are 11-digit US numbers (1 + area + 7), e.g. 13122277740.
Stored as int64, the column compares fine numerically but breaks join
keys with string-typed phone columns elsewhere (formatted "+1...", or
loaded from a CSV).
Backs up the original to workers_500k.parquet.bak-<date> before write.
Idempotent: detects when the fix has already been applied and exits 0.
Usage:
python3 scripts/staffing/fixup_phone_type.py
"""
import datetime as dt
import shutil
import sys
from pathlib import Path
import pyarrow as pa
import pyarrow.compute as pc
import pyarrow.parquet as pq
REPO = Path(__file__).resolve().parents[2]
TARGET = REPO / "data" / "datasets" / "workers_500k.parquet"
def main() -> int:
if not TARGET.exists():
print(f"missing: {TARGET}", file=sys.stderr)
return 1
table = pq.read_table(TARGET)
phone_field = table.schema.field("phone")
if phone_field.type == pa.string():
print(f"phone is already string — no-op")
return 0
today = dt.date.today().isoformat()
backup = TARGET.with_suffix(f".parquet.bak-{today}")
if not backup.exists():
shutil.copy2(TARGET, backup)
print(f"backup: {backup.relative_to(REPO)}")
phone_str = pc.cast(table["phone"], pa.string())
new_table = table.set_column(
table.schema.get_field_index("phone"),
pa.field("phone", pa.string()),
phone_str,
)
pq.write_table(new_table, TARGET, compression="snappy")
rounds_trip = pq.read_table(TARGET, columns=["phone"])
sample = rounds_trip["phone"].slice(0, 3).to_pylist()
print(f"wrote: {TARGET.relative_to(REPO)}")
print(f"phone type: {rounds_trip.schema.field('phone').type}")
print(f"sample: {sample}")
return 0
if __name__ == "__main__":
sys.exit(main())