#!/usr/bin/env python3 """Staffing agency day simulation — multi-agent stress test. Simulates a real staffing day: contracts arrive, agents match workers, draft communications, and a verifier catches every hallucination. Agents: 1. CONTRACT MANAGER — generates realistic daily job orders 2. MATCHER — finds qualified workers via SQL + vector hybrid 3. COMMUNICATOR — drafts outreach SMS/email to matched workers 4. VERIFIER — checks every claim against the golden data (zero tolerance) 5. DISPATCHER — assigns workers, tracks the day's outcome The golden rule: the synthetic data IS ground truth. Every name, skill, certification, city, and score the agents cite MUST exist in the actual dataset. The verifier queries SQL to confirm. Any mismatch = hallucination. """ import json, time, sys, random from datetime import datetime from urllib.request import Request, urlopen from urllib.error import HTTPError BASE = "http://localhost:3100" random.seed(42) def post(path, body=None, timeout=120): data = json.dumps(body).encode() if body else None req = Request(f"{BASE}{path}", data=data, headers={"Content-Type": "application/json"}) try: resp = urlopen(req, timeout=timeout) raw = resp.read() return json.loads(raw) if raw.strip() else {} except HTTPError as e: return {"error": e.read().decode()[:300]} except Exception as e: return {"error": str(e)} def sql(query): return post("/query/sql", {"sql": query}) # ══════════════════════════════════════════════════════ # DAILY CONTRACTS — realistic job orders for the day # ══════════════════════════════════════════════════════ CONTRACTS = [ { "id": "JO-2026-001", "client": "Midwest Logistics Inc", "role": "Forklift Operator", "state": "IL", "city": "Chicago", "required_certs": ["OSHA-10"], "min_reliability": 0.8, "headcount": 3, "urgency": "high", "notes": "Warehouse expansion, need certified forklift ops immediately", }, { "id": "JO-2026-002", "client": "Precision Manufacturing", "role": "Machine Operator", "state": "IN", "min_reliability": 0.7, "required_certs": [], "headcount": 5, "urgency": "medium", "notes": "2nd shift, CNC experience preferred", }, { "id": "JO-2026-003", "client": "CleanSpace Facilities", "role": "Sanitation Worker", "state": "OH", "required_certs": ["Hazmat"], "min_reliability": 0.6, "headcount": 2, "urgency": "low", "notes": "Chemical plant, hazmat cert mandatory", }, { "id": "JO-2026-004", "client": "Amazon DSP Partner", "role": "Loader", "state": "IL", "city": "Springfield", "required_certs": [], "min_reliability": 0.75, "headcount": 4, "urgency": "high", "notes": "Peak season, need physically fit workers", }, { "id": "JO-2026-005", "client": "AutoParts Direct", "role": "Quality Tech", "state": "MO", "required_certs": ["OSHA-30"], "min_reliability": 0.85, "headcount": 2, "urgency": "medium", "notes": "Inspection station, attention to detail critical", }, ] # ══════════════════════════════════════════════════════ # AGENT 1: MATCHER — SQL + vector hybrid # ══════════════════════════════════════════════════════ def match_workers(contract): """Find qualified workers via SQL (structured) + vector (semantic).""" # SQL path: exact role, state, reliability, certs where = [ f"role = '{contract['role']}'", f"state = '{contract['state']}'", f"reliability >= {contract['min_reliability']}", ] if contract.get("city"): where.append(f"city = '{contract['city']}'") sql_query = f""" SELECT worker_id, name, role, city, state, skills, certifications, ROUND(reliability,2) rel, ROUND(availability,2) avail, archetype FROM ethereal_workers WHERE {' AND '.join(where)} ORDER BY reliability DESC, availability DESC LIMIT 20 """ sql_result = sql(sql_query) if "error" in sql_result: return [], f"SQL error: {sql_result['error'][:80]}" sql_matches = sql_result.get("rows", []) # Filter by required certs if contract.get("required_certs"): required = set(c.lower() for c in contract["required_certs"]) sql_matches = [ w for w in sql_matches if required.issubset(set(c.strip().lower() for c in w.get("certifications", "").split(","))) ] # Vector path: semantic search for nuanced matching vector_query = f"{contract['role']} in {contract['state']} {contract.get('notes', '')}" vec_result = post("/vectors/hnsw/search", { "index_name": "ethereal_workers_v1", "query": vector_query, "top_k": 10, }) vec_matches = vec_result.get("results", []) if "error" not in vec_result else [] return sql_matches[:contract["headcount"] * 2], vec_matches # ══════════════════════════════════════════════════════ # AGENT 2: COMMUNICATOR — drafts outreach # ══════════════════════════════════════════════════════ def draft_communication(contract, worker): """Ask the LLM to draft an outreach SMS for a matched worker.""" r = post("/ai/generate", { "prompt": f"""Draft a short professional SMS (under 160 chars) to a staffing worker about a job opportunity. Worker: {worker['name']}, {worker['role']} in {worker['city']}, {worker['state']} Job: {contract['role']} for {contract['client']} in {contract.get('city', contract['state'])} Urgency: {contract['urgency']} Include their name. Be direct. SMS only — no subject line, no greeting.""", "model": "qwen2.5", "max_tokens": 80, "temperature": 0.3, }) if "error" in r: return None, r["error"] return r.get("text", "").strip(), None # ══════════════════════════════════════════════════════ # AGENT 3: VERIFIER — catches hallucinations # ══════════════════════════════════════════════════════ def verify_worker(worker_id, claims): """Check every claim about a worker against the golden data. Returns (verified_ok, discrepancies). Claims is a dict of {field: claimed_value} to verify. """ result = sql(f"SELECT * FROM ethereal_workers WHERE worker_id = {worker_id}") if "error" in result or not result.get("rows"): return False, [f"worker_id {worker_id} not found in golden data"] actual = result["rows"][0] discrepancies = [] for field, claimed in claims.items(): actual_val = actual.get(field) if actual_val is None: continue if field in ("reliability", "responsiveness", "availability", "compliance"): # Numeric: check within tolerance try: if abs(float(actual_val) - float(claimed)) > 0.05: discrepancies.append(f"{field}: claimed={claimed} actual={actual_val}") except (ValueError, TypeError): pass elif field == "certifications": # Check claimed cert exists in actual actual_certs = set(c.strip().lower() for c in str(actual_val).split(",")) claimed_certs = set(c.strip().lower() for c in str(claimed).split(",")) missing = claimed_certs - actual_certs - {""} if missing: discrepancies.append(f"certifications: claimed {missing} not in actual {actual_certs}") else: if str(actual_val).lower().strip() != str(claimed).lower().strip(): discrepancies.append(f"{field}: claimed='{claimed}' actual='{actual_val}'") return len(discrepancies) == 0, discrepancies # ══════════════════════════════════════════════════════ # AGENT 4: LLM ANALYZER — answers staffing questions # ══════════════════════════════════════════════════════ def ask_staffing_question(question, verify=True): """Ask a question, get an answer, verify facts against golden data.""" # RAG search r = post("/vectors/rag", { "index_name": "ethereal_workers_v1", "question": question, "top_k": 5, }, timeout=180) if "error" in r: return None, [], r["error"] answer = r.get("answer", "") sources = r.get("sources", []) # Verify: extract any worker names mentioned in the answer hallucinations = [] if verify: # Check each source worker exists for s in sources: wid = s.get("doc_id", "").replace("W-", "") if wid.isdigit(): ok, issues = verify_worker(int(wid), { "name": "", # just check existence }) if not ok: hallucinations.extend(issues) return answer, sources, hallucinations # ══════════════════════════════════════════════════════ # MAIN SIMULATION # ══════════════════════════════════════════════════════ def main(): print("=" * 70) print("STAFFING AGENCY DAY SIMULATION") print(f"Date: {datetime.now().strftime('%Y-%m-%d')}") print(f"Contracts: {len(CONTRACTS)} | Workers: 10,000 | Golden data: ethereal_workers") print("=" * 70) stats = { "contracts_processed": 0, "workers_matched": 0, "workers_verified": 0, "hallucinations_caught": 0, "messages_drafted": 0, "questions_answered": 0, "questions_verified": 0, "verification_failures": 0, } # ── Morning: Process contracts ── print("\n╔══ MORNING: CONTRACT PROCESSING ══════════════════════") all_assignments = [] for contract in CONTRACTS: print(f"\n║ Contract {contract['id']}: {contract['role']} × {contract['headcount']}") print(f"║ Client: {contract['client']} | {contract.get('city', contract['state'])}, {contract['state']}") print(f"║ Certs: {contract.get('required_certs', [])} | Min reliability: {contract['min_reliability']}") t0 = time.time() sql_matches, vec_matches = match_workers(contract) ms = (time.time() - t0) * 1000 print(f"║ SQL matches: {len(sql_matches)} | Vector hits: {len(vec_matches)} ({ms:.0f}ms)") # Verify each SQL match verified = [] for w in sql_matches[:contract["headcount"]]: claims = { "name": w["name"], "role": w["role"], "city": w["city"], "state": w["state"], "reliability": w["rel"], } if contract.get("required_certs"): claims["certifications"] = w.get("certifications", "") ok, issues = verify_worker(w["worker_id"], claims) stats["workers_verified"] += 1 if ok: verified.append(w) icon = "✓" else: stats["hallucinations_caught"] += len(issues) icon = "✗ HALLUCINATION" print(f"║ {icon}: {issues}") print(f"║ {icon} W-{w['worker_id']}: {w['name']} ({w['role']}) rel={w['rel']} avail={w['avail']}") stats["workers_matched"] += len(verified) stats["contracts_processed"] += 1 # Draft comms for verified matches for w in verified[:contract["headcount"]]: msg, err = draft_communication(contract, w) if msg: stats["messages_drafted"] += 1 # Verify the message mentions the correct name if w["name"].split()[0].lower() in msg.lower(): print(f"║ 📱 → {w['name']}: {msg[:120]}") else: stats["hallucinations_caught"] += 1 print(f"║ ⚠ SMS doesn't mention worker name: {msg[:80]}") elif err: print(f"║ ✗ SMS draft failed: {err[:60]}") all_assignments.append({ "contract": contract["id"], "filled": len(verified), "needed": contract["headcount"], }) print("╚══════════════════════════════════════════════════════") # ── Afternoon: Staffing questions ── print("\n╔══ AFTERNOON: STAFFING INTELLIGENCE ══════════════════") questions = [ ("Who are the most reliable forklift operators in Illinois?", {"check": "state", "expected": "IL"}), ("Which workers have hazmat certification in Ohio?", {"check": "state_and_cert", "expected_state": "OH", "expected_cert": "hazmat"}), ("Find machine operators with CNC experience", {"check": "skill", "expected": "cnc"}), ("Who are the 'erratic' archetype workers and should we flag them?", {"check": "archetype", "expected": "erratic"}), ("Which leaders in Indiana have the highest availability?", {"check": "archetype_state", "expected_arch": "leader", "expected_state": "IN"}), ] for question, verification in questions: print(f"\n║ Q: {question}") t0 = time.time() answer, sources, hallucinations = ask_staffing_question(question) ms = (time.time() - t0) * 1000 stats["questions_answered"] += 1 if answer: print(f"║ A ({ms:.0f}ms, {len(answer)} chars): {answer[:200]}...") # Verify against SQL ground truth check = verification.get("check") if check == "state": truth = sql(f"SELECT name, reliability FROM ethereal_workers WHERE state = '{verification['expected']}' AND role LIKE '%Forklift%' ORDER BY reliability DESC LIMIT 5") if "error" not in truth: names = [r["name"] for r in truth.get("rows", [])] found_in_answer = sum(1 for n in names if n.lower() in answer.lower()) stats["questions_verified"] += 1 if found_in_answer == 0: stats["verification_failures"] += 1 print(f"║ ⚠ VERIFY: top workers {names[:3]} NOT mentioned in answer") else: print(f"║ ✓ VERIFY: {found_in_answer}/{len(names)} top workers mentioned") elif check == "archetype": truth = sql(f"SELECT COUNT(*) cnt FROM ethereal_workers WHERE archetype = '{verification['expected']}'") if "error" not in truth: actual_count = truth["rows"][0]["cnt"] stats["questions_verified"] += 1 if str(actual_count) in answer: print(f"║ ✓ VERIFY: correct count ({actual_count}) in answer") else: print(f"║ ⚠ VERIFY: actual count is {actual_count}, not found in answer") stats["verification_failures"] += 1 elif check == "skill": truth = sql(f"SELECT COUNT(*) cnt FROM ethereal_workers WHERE skills LIKE '%CNC%' AND role LIKE '%Machine%'") if "error" not in truth: stats["questions_verified"] += 1 print(f"║ ✓ VERIFY: {truth['rows'][0]['cnt']} machine operators with CNC in system") if hallucinations: stats["hallucinations_caught"] += len(hallucinations) print(f"║ ✗ HALLUCINATIONS: {hallucinations}") print("╚══════════════════════════════════════════════════════") # ── End of day: Scorecard ── print("\n" + "=" * 70) print("END OF DAY SCORECARD") print("=" * 70) total_filled = sum(a["filled"] for a in all_assignments) total_needed = sum(a["needed"] for a in all_assignments) fill_rate = total_filled / max(total_needed, 1) * 100 print(f"\n Contracts processed: {stats['contracts_processed']}/{len(CONTRACTS)}") print(f" Positions filled: {total_filled}/{total_needed} ({fill_rate:.0f}%)") print(f" Workers verified: {stats['workers_verified']}") print(f" Messages drafted: {stats['messages_drafted']}") print(f" Questions answered: {stats['questions_answered']}") print(f" Questions fact-checked: {stats['questions_verified']}") print(f"\n ┌─ TRUST METRICS ─────────────────────────") print(f" │ Hallucinations caught: {stats['hallucinations_caught']}") print(f" │ Verification failures: {stats['verification_failures']}") accuracy = (stats['workers_verified'] - stats['hallucinations_caught']) / max(stats['workers_verified'], 1) * 100 print(f" │ Data accuracy: {accuracy:.1f}%") print(f" └──────────────────────────────────────────") print(f"\n Contract breakdown:") for a in all_assignments: icon = "✓" if a["filled"] >= a["needed"] else "△" if a["filled"] > 0 else "✗" print(f" {icon} {a['contract']}: {a['filled']}/{a['needed']} filled") if stats["hallucinations_caught"] == 0 and stats["verification_failures"] == 0: print(f"\n ★ ZERO HALLUCINATIONS — all agent outputs verified against golden data") else: print(f"\n ⚠ {stats['hallucinations_caught']} hallucination(s) + {stats['verification_failures']} verification gap(s)") print(f" → these are the gaps to close before production") return 0 if accuracy >= 95 else 1 if __name__ == "__main__": sys.exit(main())