Architectural snapshot of the lakehouse codebase at the point where the
full matrix-driven agent loop with Mem0 versioning + deletion was
validated end-to-end.
WHAT THIS REPO IS
A clean single-commit snapshot of the lakehouse code. Heavy test data
(.parquet datasets, vector indexes) excluded — see REPLICATION.md for
regen path. Full lakehouse history at git.agentview.dev/profit/lakehouse.
WHAT WAS PROVEN
- Vector retrieval across multi-corpora matrix (chicago_permits + entity
briefs + sec_tickers + distilled procedural + llm_team runs)
- Observer hand-review (cloud + heuristic fallback) gating each candidate
- Local-model agent loop (qwen3.5:latest) with tool use + scratchpad
- Playbook seal on success → next-iter retrieval surfaces it as preamble
- Mem0 versioning + deletion in pathway_memory:
* UPSERT: ADD on new workflow, UPDATE bumps replay_count on identical
* REVISE: chains versions, parent.superseded_at + superseded_by stamped
* RETIRE: marks specific trace retired with reason, excluded from retrieval
* HISTORY: walks chain root→tip, cycle-safe
KEY DIRECTORIES
- crates/vectord/src/pathway_memory.rs — Mem0 ops live here
- crates/vectord/src/playbook_memory.rs — original Mem0 reference
- tests/agent_test/ — local-model agent harness + PRD + session archives
- scripts/dump_raw_corpus.sh — MinIO bucket dump (raw test corpus)
- scripts/vectorize_raw_corpus.ts — corpus → vector indexes
- scripts/analyze_chicago_contracts.ts — real inference pipeline
- scripts/seal_agent_playbook.ts — Mem0 upsert from agent traces
Replication: see REPLICATION.md for Debian 13 clean install + cloud-only
adaptation (no local Ollama).
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
302 lines
16 KiB
Python
302 lines
16 KiB
Python
#!/usr/bin/env python3
|
||
"""Staffing Co-Pilot — the anticipation layer.
|
||
|
||
This isn't a tool you query. It's a system that watches your contracts,
|
||
your workers, your patterns — and tells you what you need before you
|
||
ask. It runs before the staffer starts their day.
|
||
|
||
Output: a structured briefing that any UI can render.
|
||
|
||
Layers:
|
||
1. CONTRACT SCAN — what needs filling today
|
||
2. PRE-MATCH — workers already identified per contract
|
||
3. ALERTS — cert expirations, reliability drops, unfilled positions
|
||
4. SUGGESTIONS — proactive opportunities the staffer wouldn't see
|
||
5. BRIEFING — natural language summary for the staffer's morning
|
||
|
||
Each layer feeds the next. The briefing is the human-facing output;
|
||
the structured data behind it feeds the agent gateway so any action
|
||
the staffer takes is one click away.
|
||
"""
|
||
|
||
import json, time, sys
|
||
from datetime import datetime, timedelta
|
||
from urllib.request import Request, urlopen
|
||
from urllib.error import HTTPError
|
||
|
||
GW = "http://localhost:3700"
|
||
LH = "http://localhost:3100"
|
||
|
||
def gw(path, body=None, timeout=180):
|
||
data = json.dumps(body).encode() if body else None
|
||
method = "POST" if body else "GET"
|
||
req = Request(f"{GW}{path}", data=data, method=method,
|
||
headers={"Content-Type": "application/json"} if body else {})
|
||
try:
|
||
return json.loads(urlopen(req, timeout=timeout).read())
|
||
except HTTPError as e:
|
||
return {"error": e.read().decode()[:200]}
|
||
except Exception as e:
|
||
return {"error": str(e)}
|
||
|
||
def sql(query):
|
||
r = gw("/sql", {"sql": query})
|
||
return r.get("rows", []) if "error" not in r else []
|
||
|
||
def gen(prompt, model="qwen3", max_tokens=400):
|
||
r = gw("/api/ai/generate", {"prompt": prompt, "model": model,
|
||
"max_tokens": max_tokens, "temperature": 0.3})
|
||
text = r.get("text", "")
|
||
if "<think>" in text:
|
||
text = text.split("</think>")[-1].strip()
|
||
return text
|
||
|
||
# ═══════════════════════════════════════════════════
|
||
# TODAY'S CONTRACTS — simulated but structured like real ops
|
||
# ═══════════════════════════════════════════════════
|
||
|
||
TODAYS_CONTRACTS = [
|
||
{"id": "C-4401", "client": "Midwest Logistics", "role": "Forklift Operator",
|
||
"state": "IL", "city": "Chicago", "headcount": 4, "min_rel": 0.8,
|
||
"certs": ["OSHA-10"], "priority": "high", "start": "7:00 AM",
|
||
"notes": "Warehouse expansion — client wants workers who've been there before"},
|
||
{"id": "C-4402", "client": "Precision Manufacturing", "role": "Machine Operator",
|
||
"state": "IN", "headcount": 6, "min_rel": 0.75, "certs": [],
|
||
"priority": "medium", "start": "6:00 AM",
|
||
"notes": "2nd shift CNC line, prefer experienced operators"},
|
||
{"id": "C-4403", "client": "CleanSpace Facilities", "role": "Sanitation Worker",
|
||
"state": "OH", "headcount": 2, "min_rel": 0.6, "certs": ["Hazmat"],
|
||
"priority": "medium", "start": "8:00 AM",
|
||
"notes": "Chemical plant — hazmat certification MANDATORY"},
|
||
{"id": "C-4404", "client": "Amazon DSP (Springfield)", "role": "Loader",
|
||
"state": "IL", "city": "Springfield", "headcount": 8, "min_rel": 0.7,
|
||
"certs": [], "priority": "urgent", "start": "5:00 AM",
|
||
"notes": "Peak season surge — client called last night, needs bodies NOW"},
|
||
{"id": "C-4405", "client": "AutoParts Direct", "role": "Quality Tech",
|
||
"state": "MO", "headcount": 2, "min_rel": 0.85, "certs": ["OSHA-30"],
|
||
"priority": "low", "start": "8:00 AM",
|
||
"notes": "ISO audit next week — need detail-oriented, compliant workers"},
|
||
{"id": "C-4406", "client": "Great Lakes Steel", "role": "Welder",
|
||
"state": "OH", "city": "Cleveland", "headcount": 3, "min_rel": 0.8,
|
||
"certs": [], "priority": "high", "start": "6:30 AM",
|
||
"notes": "Structural welding — experienced only, no trainees"},
|
||
]
|
||
|
||
# ═══════════════════════════════════════════════════
|
||
print("╔" + "═" * 63 + "╗")
|
||
print("║ STAFFING CO-PILOT — Morning Briefing ║")
|
||
print(f"║ {datetime.now().strftime('%A, %B %d, %Y')} ║")
|
||
print("╚" + "═" * 63 + "╝")
|
||
|
||
briefing = {"contracts": [], "alerts": [], "suggestions": [], "stats": {}}
|
||
|
||
# ═══════════════════════════════════════════════════
|
||
# LAYER 1: CONTRACT SCAN + PRE-MATCH
|
||
# ═══════════════════════════════════════════════════
|
||
print("\n┌─ TODAY'S CONTRACTS ────────────────────────────────")
|
||
|
||
total_needed = 0
|
||
total_prematched = 0
|
||
|
||
for c in sorted(TODAYS_CONTRACTS, key=lambda x: {"urgent": 0, "high": 1, "medium": 2, "low": 3}[x["priority"]]):
|
||
total_needed += c["headcount"]
|
||
priority_icon = {"urgent": "🔴", "high": "🟠", "medium": "🟡", "low": "🟢"}[c["priority"]]
|
||
|
||
# Pre-match via hybrid search
|
||
filt = f"role = '{c['role']}' AND state = '{c['state']}' AND reliability >= {c['min_rel']}"
|
||
if c.get("city"):
|
||
filt += f" AND city = '{c['city']}'"
|
||
|
||
r = gw("/search", {
|
||
"question": f"Best {c['role']} workers for {c['notes']}",
|
||
"sql_filter": filt, "top_k": c["headcount"] + 2, # extra for backups
|
||
"generate": False,
|
||
})
|
||
|
||
matches = r.get("sources", [])
|
||
filled = min(len(matches), c["headcount"])
|
||
total_prematched += filled
|
||
backups = len(matches) - filled
|
||
|
||
status = "✓ READY" if filled >= c["headcount"] else f"⚠ {c['headcount']-filled} UNFILLED"
|
||
|
||
print(f"│")
|
||
print(f"│ {priority_icon} {c['id']} — {c['client']}")
|
||
print(f"│ {c['role']} × {c['headcount']} | {c.get('city', c['state'])}, {c['state']} | Start: {c['start']}")
|
||
print(f"│ Status: {status} ({filled} matched, {backups} backups)")
|
||
|
||
# Show top matches with actionable info
|
||
for i, m in enumerate(matches[:c["headcount"]]):
|
||
text = m.get("chunk_text", "")
|
||
# Extract key info from the resume text
|
||
name = text.split("—")[0].strip() if "—" in text else m["doc_id"]
|
||
print(f"│ {i+1}. {name} (score: {m['score']:.2f})")
|
||
|
||
if c.get("certs"):
|
||
print(f"│ ⚠ Cert required: {', '.join(c['certs'])}")
|
||
print(f"│ 📝 {c['notes']}")
|
||
|
||
briefing["contracts"].append({
|
||
"id": c["id"], "client": c["client"], "role": c["role"],
|
||
"filled": filled, "needed": c["headcount"], "priority": c["priority"],
|
||
"matches": [{"doc_id": m["doc_id"], "score": m["score"]} for m in matches[:c["headcount"]]],
|
||
})
|
||
|
||
fill_pct = total_prematched / max(total_needed, 1) * 100
|
||
print(f"│")
|
||
print(f"│ 📊 Pre-match: {total_prematched}/{total_needed} ({fill_pct:.0f}%)")
|
||
print(f"└──────────────────────────────────────────────────────")
|
||
|
||
# ═══════════════════════════════════════════════════
|
||
# LAYER 2: ALERTS
|
||
# ═══════════════════════════════════════════════════
|
||
print("\n┌─ ALERTS ──────────────────────────────────────────")
|
||
|
||
# Alert: erratic workers on active matches
|
||
erratic = sql("SELECT name, role, city, state, ROUND(reliability,2) rel FROM ethereal_workers WHERE archetype = 'erratic' AND reliability < 0.4 ORDER BY reliability LIMIT 5")
|
||
if erratic:
|
||
print(f"│ ⚠ {len(erratic)} erratic workers with low reliability — flag for review:")
|
||
for w in erratic[:3]:
|
||
print(f"│ {w['name']} ({w['role']}, {w['city']}) — rel: {w['rel']}")
|
||
briefing["alerts"].append({"type": "erratic_workers", "count": len(erratic)})
|
||
|
||
# Alert: silent workers needing engagement
|
||
silent = sql("SELECT COUNT(*) cnt FROM ethereal_workers WHERE archetype = 'silent' AND responsiveness < 0.3")
|
||
if silent and silent[0].get("cnt", 0) > 0:
|
||
cnt = silent[0]["cnt"]
|
||
print(f"│ 📵 {cnt} silent workers with low responsiveness — may need different outreach channel")
|
||
briefing["alerts"].append({"type": "silent_workers", "count": cnt})
|
||
|
||
# Alert: state coverage gaps
|
||
for state in ["IL", "IN", "OH", "MO"]:
|
||
gap_roles = sql(f"SELECT role, COUNT(*) cnt FROM ethereal_workers WHERE state = '{state}' AND reliability >= 0.8 GROUP BY role HAVING COUNT(*) < 5 ORDER BY cnt")
|
||
if gap_roles:
|
||
thin = [f"{r['role']}({r['cnt']})" for r in gap_roles[:3]]
|
||
print(f"│ 📉 {state}: thin bench on {', '.join(thin)}")
|
||
briefing["alerts"].append({"type": "thin_bench", "state": state, "roles": thin})
|
||
|
||
print(f"└──────────────────────────────────────────────────────")
|
||
|
||
# ═══════════════════════════════════════════════════
|
||
# LAYER 3: PROACTIVE SUGGESTIONS
|
||
# ═══════════════════════════════════════════════════
|
||
print("\n┌─ SUGGESTIONS ─────────────────────────────────────")
|
||
|
||
# Suggestion: high-reliability workers not yet matched to any contract today
|
||
available = sql("""
|
||
SELECT name, role, city, state, ROUND(reliability,2) rel, ROUND(availability,2) avail
|
||
FROM ethereal_workers
|
||
WHERE reliability >= 0.9 AND availability >= 0.9 AND archetype IN ('reliable', 'leader')
|
||
ORDER BY reliability DESC, availability DESC
|
||
LIMIT 5
|
||
""")
|
||
if available:
|
||
print(f"│ 💎 Top available workers not yet assigned today:")
|
||
for w in available:
|
||
print(f"│ {w['name']} — {w['role']} in {w['city']}, {w['state']} (rel: {w['rel']}, avail: {w['avail']})")
|
||
briefing["suggestions"].append({"type": "top_available", "count": len(available)})
|
||
|
||
# Suggestion: roles with surplus capacity
|
||
surplus = sql("""
|
||
SELECT role, state, COUNT(*) workers, ROUND(AVG(reliability),2) avg_rel
|
||
FROM ethereal_workers
|
||
WHERE reliability >= 0.8
|
||
GROUP BY role, state
|
||
HAVING COUNT(*) > 20
|
||
ORDER BY workers DESC
|
||
LIMIT 3
|
||
""")
|
||
if surplus:
|
||
print(f"│ 📈 Deep bench — could fill larger orders:")
|
||
for s in surplus:
|
||
print(f"│ {s['role']} in {s['state']}: {s['workers']} workers (avg rel: {s['avg_rel']})")
|
||
briefing["suggestions"].append({"type": "deep_bench", "roles": [s["role"] for s in surplus]})
|
||
|
||
# Suggestion: check playbooks for optimization tips
|
||
pbs = gw("/playbooks?keyword=fill&limit=3")
|
||
playbooks = pbs.get("playbooks", []) if isinstance(pbs, dict) else []
|
||
if playbooks:
|
||
print(f"│ 📚 From playbook: {playbooks[0].get('result', '?')[:70]}")
|
||
|
||
print(f"└──────────────────────────────────────────────────────")
|
||
|
||
# ═══════════════════════════════════════════════════
|
||
# LAYER 4: MORNING BRIEFING (qwen3 generates)
|
||
# ═══════════════════════════════════════════════════
|
||
print("\n┌─ BRIEFING ────────────────────────────────────────")
|
||
|
||
briefing_data = f"""Today's summary:
|
||
- {len(TODAYS_CONTRACTS)} contracts, {total_needed} positions total
|
||
- Pre-matched: {total_prematched}/{total_needed} ({fill_pct:.0f}%)
|
||
- Urgent: {sum(1 for c in TODAYS_CONTRACTS if c['priority']=='urgent')} contracts need immediate attention
|
||
- High priority: {sum(1 for c in TODAYS_CONTRACTS if c['priority']=='high')} contracts
|
||
- Alerts: {len(briefing['alerts'])} items flagged
|
||
- Top available workers identified for proactive placement"""
|
||
|
||
morning_brief = gen(f"""You are a staffing co-pilot. Write a concise morning briefing for a staffing coordinator.
|
||
Be direct, actionable, no fluff. Tell them what to focus on first.
|
||
|
||
Data:
|
||
{briefing_data}
|
||
|
||
Urgent contract: C-4404 Amazon DSP Springfield needs 8 loaders by 5 AM — this is your #1 priority.
|
||
High priority: C-4401 Midwest Logistics Chicago needs 4 forklift ops, C-4406 Great Lakes Steel Cleveland needs 3 welders.
|
||
|
||
Write the briefing in 6 lines max. Start with the most urgent action.""", model="qwen3", max_tokens=300)
|
||
|
||
print(f"│")
|
||
for line in morning_brief.strip().split("\n")[:8]:
|
||
if line.strip():
|
||
print(f"│ {line.strip()}")
|
||
print(f"│")
|
||
print(f"└──────────────────────────────────────────────────────")
|
||
|
||
# ═══════════════════════════════════════════════════
|
||
# LAYER 5: ACTION QUEUE — ready for one-click execution
|
||
# ═══════════════════════════════════════════════════
|
||
print("\n┌─ ACTION QUEUE (ready for staffer) ─────────────────")
|
||
|
||
actions = []
|
||
for c_data in briefing["contracts"]:
|
||
if c_data["filled"] < c_data["needed"]:
|
||
actions.append(f"⚠ FILL: {c_data['id']} needs {c_data['needed']-c_data['filled']} more {c_data['role']}(s)")
|
||
elif c_data["matches"]:
|
||
actions.append(f"📱 CONFIRM: {c_data['id']} — {c_data['filled']} workers pre-matched, send outreach")
|
||
|
||
for a in actions[:8]:
|
||
print(f"│ {a}")
|
||
|
||
if not actions:
|
||
print(f"│ ✓ All contracts pre-matched — confirm and send outreach")
|
||
|
||
print(f"│")
|
||
print(f"│ Total actions: {len(actions)}")
|
||
print(f"└──────────────────────────────────────────────────────")
|
||
|
||
# Log the briefing as a playbook entry
|
||
gw("/log", {
|
||
"operation": f"copilot_briefing: {total_prematched}/{total_needed} pre-matched, {len(briefing['alerts'])} alerts",
|
||
"approach": "5-layer anticipation: scan → match → alert → suggest → brief",
|
||
"result": f"fill_rate={fill_pct:.0f}%, actions={len(actions)}, urgent=1, high=2",
|
||
"context": "morning briefing for staffing coordinator",
|
||
})
|
||
|
||
# ═══════════════════════════════════════════════════
|
||
# OUTPUT: structured JSON for any UI to render
|
||
# ═══════════════════════════════════════════════════
|
||
briefing["stats"] = {
|
||
"total_contracts": len(TODAYS_CONTRACTS),
|
||
"total_needed": total_needed,
|
||
"total_prematched": total_prematched,
|
||
"fill_pct": fill_pct,
|
||
"actions": len(actions),
|
||
"alerts": len(briefing["alerts"]),
|
||
}
|
||
|
||
# Write the structured briefing as JSON for the UI layer
|
||
with open("/tmp/copilot_briefing.json", "w") as f:
|
||
json.dump(briefing, f, indent=2)
|
||
|
||
print(f"\n📋 Structured briefing saved to /tmp/copilot_briefing.json")
|
||
print(f" Any UI can render this — the data is ready.")
|