From fb99e92a60f1a8989f2b30d9e3af7ae8c0c6fd30 Mon Sep 17 00:00:00 2001 From: root Date: Mon, 27 Apr 2026 20:49:15 -0500 Subject: [PATCH 01/43] =?UTF-8?q?demo:=20P1=20=E2=80=94=20search=20filter?= =?UTF-8?q?=20now=20actually=20filters=20by=20state=20and=20role?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The Co-Pilot search box read state and role from the dropdowns (#sst, #srl) but appended them to the message string as ' in '+st. The server's NL parser then matched the literal preposition "in" against the case-insensitive regex /\b(IL|IN|...)\b/i and assigned state IN (Indiana) to every search. Result: typing "forklift in IL" returned Indiana workers. Same for WI, TX, any state — all silently became Indiana. That was the "cached/generic response" the legacy staffing client was seeing. Two prongs: 1. search.html doSearch() now passes structured fields: {message, state, role} instead of munging into the message text. Dropdown selections bypass NL parsing entirely. 2. /intelligence/chat smart_search route accepts those structured fields and prefers them over regex archaeology. Falls back to NL parsing only when fields aren't provided. Fixed the regex too: the prepositional form (?:in|from)\s+(STATE) wins, the standalone form requires uppercase (drops /i flag) so the lowercase preposition "in" can no longer match. Verified live: - POST /intelligence/chat {"message":"forklift","state":"IL"} → 167 IL forklift operators (Galesburg, Joliet, ...) - POST /intelligence/chat {"message":"forklift","state":"WI","role":"Forklift Operator"} → 16 WI Forklift Operators (Milwaukee, Madison, ...) - POST /intelligence/chat {"message":"forklift in IL"} (NL fallback) → 167 IL workers (regex now correctly distinguishes preposition from state code) Playwright drove the live UI through devop.live/lakehouse and confirmed the front-end posts the structured body and the result panel renders the right state. Restart sequence: kill old bun :3700, bun run mcp-server/index.ts. --- mcp-server/index.ts | 47 +++++++++++++++++++++++++++++++++--------- mcp-server/search.html | 12 ++++++----- 2 files changed, 44 insertions(+), 15 deletions(-) diff --git a/mcp-server/index.ts b/mcp-server/index.ts index de804fa..9b4134b 100644 --- a/mcp-server/index.ts +++ b/mcp-server/index.ts @@ -1698,7 +1698,14 @@ async function main() { const filters: string[] = ["CAST(reliability AS DOUBLE) >= 0.5"]; const understood: string[] = []; - // Extract role keywords + // Structured input from the search-form dropdowns. When set, + // these win over NL parsing — typing "forklift in IL" used to + // misparse the preposition "in" as state IN (Indiana). Trust + // explicit user selection over regex archaeology. + const explicitState = String(b.state || "").trim().toUpperCase(); + const explicitRole = String(b.role || "").trim(); + + // Extract role keywords (skip if dropdown picked one) const roleKeywords: Record = { "warehouse": "warehouse", "forklift": "forklift", "welder": "weld", "assembler": "assembl", "loader": "loader", "machine operator": "machine operator", "shipping": "shipping", @@ -1707,8 +1714,13 @@ async function main() { "line lead": "line lead", "electrician": "electric", "packaging": "packaging", "tool and die": "tool", "logistics": "logistics", "safety": "safety", "cnc": "cnc", }; - for (const [kw, sqlPart] of Object.entries(roleKeywords)) { - if (lower.includes(kw)) { filters.push(`LOWER(role) LIKE '%${sqlPart}%'`); understood.push(`role: ${kw}`); break; } + if (explicitRole) { + filters.push(`LOWER(role) LIKE '%${explicitRole.toLowerCase().replace(/'/g, "''")}%'`); + understood.push(`role: ${explicitRole}`); + } else { + for (const [kw, sqlPart] of Object.entries(roleKeywords)) { + if (lower.includes(kw)) { filters.push(`LOWER(role) LIKE '%${sqlPart}%'`); understood.push(`role: ${kw}`); break; } + } } // Extract city @@ -1726,18 +1738,33 @@ async function main() { } } - // Extract state + // Extract state — dropdown wins; otherwise NL parse, but + // require either an explicit "in/from " preposition + // OR an UPPERCASE 2-letter code, never a bare lowercase + // 2-letter token. Old regex matched "in" (preposition) as + // state IN (Indiana) because the /i flag made the standalone + // pattern case-insensitive — "forklift in IL" always returned + // Indiana workers. const stateNames: Record = { "illinois":"IL","indiana":"IN","ohio":"OH","missouri":"MO","tennessee":"TN", "kentucky":"KY","wisconsin":"WI","michigan":"MI","iowa":"IA","minnesota":"MN" }; - const stateMatch = lower.match(/\b(IL|IN|OH|MO|TN|KY|WI|MI|IA|MN)\b/i); - if (stateMatch && !understood.some(u => u.startsWith('city'))) { - filters.push(`state = '${stateMatch[1].toUpperCase()}'`); - understood.push(`state: ${stateMatch[1].toUpperCase()}`); + if (explicitState) { + if (!understood.some(u => u.startsWith('city'))) { + filters.push(`state = '${explicitState.replace(/'/g, "''")}'`); + understood.push(`state: ${explicitState}`); + } } else { - for (const [name, abbr] of Object.entries(stateNames)) { - if (lower.includes(name)) { filters.push(`state = '${abbr}'`); understood.push(`state: ${abbr}`); break; } + const prepMatch = q.match(/\b(?:in|from)\s+(IL|IN|OH|MO|TN|KY|WI|MI|IA|MN)\b/i); + const upperMatch = q.match(/\b(IL|IN|OH|MO|TN|KY|WI|MI|IA|MN)\b/); // no /i — must be uppercase + const stateMatch = prepMatch || upperMatch; + if (stateMatch && !understood.some(u => u.startsWith('city'))) { + filters.push(`state = '${stateMatch[1].toUpperCase()}'`); + understood.push(`state: ${stateMatch[1].toUpperCase()}`); + } else { + for (const [name, abbr] of Object.entries(stateNames)) { + if (lower.includes(name)) { filters.push(`state = '${abbr}'`); understood.push(`state: ${abbr}`); break; } + } } } diff --git a/mcp-server/search.html b/mcp-server/search.html index 7a15c1c..1a3c4c1 100644 --- a/mcp-server/search.html +++ b/mcp-server/search.html @@ -2274,13 +2274,15 @@ function doSearch(){ var q=document.getElementById('sq').value.trim();if(!q)return; lastQuery=q; var st=document.getElementById('sst').value,rl=document.getElementById('srl').value; - // Append dropdown filters to the query so the smart parser picks them up - var fullQ=q; - if(st&&q.indexOf(st)<0)fullQ+=' in '+st; - if(rl&&q.toLowerCase().indexOf(rl.toLowerCase())<0)fullQ+=' '+rl; + // Pass dropdown filters as structured fields. Old code appended + // ' in '+st to the message, which the server misparsed: the + // preposition "in" matched the regex for state code "IN" (Indiana) + // and every search returned Indiana workers regardless of dropdown. + // Sending structured state/role lets the server skip NL parsing + // for those fields entirely. var out=document.getElementById('sresults');out.textContent='Finding the best matches...'; fetch(A+'/intelligence/chat',{method:'POST',headers:{'Content-Type':'application/json'}, - body:JSON.stringify({message:fullQ}) + body:JSON.stringify({message:q,state:st||undefined,role:rl||undefined}) }).then(function(r){return r.json()}).then(function(d){ out.textContent=''; // Show what the system understood -- 2.47.2 From 677065de7698e12aac498139f9674f31af95939f Mon Sep 17 00:00:00 2001 From: root Date: Mon, 27 Apr 2026 21:05:40 -0500 Subject: [PATCH 02/43] =?UTF-8?q?demo:=20P2=20=E2=80=94=20staffer-language?= =?UTF-8?q?=20routes=20(zip,=20headcount,=20name,=20late-triage,=20ingest?= =?UTF-8?q?=20log)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Built from a playwright run as three personas: Maria — "8 production workers near 60607 by next Friday, prior-fill at this client" Devon — "what came in last night?" Aisha — "Marcus running late site 4422" Each one previously fell through to smart_search and returned irrelevant results (geo wrong, headcount ignored, no triage, no temporal). Now: A. Zip code → city/state lookup. Chicago zips (606xx, 607xx, 608xx) resolve to {city: Chicago, state: IL}; 13 metro prefixes covered. Maria's "near 60607" now returns Chicago workers, not Dayton/Green Bay. B. Headcount parser. "8 production workers" / "12 forklift operators" / "5 welders" set top_k 1..200, capped 5..25 for SQL+vector LIMIT. Allows 0-2 role words between the count and the worker noun so "8 production workers" matches as well as "8 workers". C. Bare-name profile lookup. Single short capitalized phrase ("Marcus" / "Sarah Lopez") triggers a profile route. Per-token LIKE AND-joined so "Marcus Rivera" matches "Marcus L. Rivera" without hardcoding middle initials. E. Late-worker / no-show triage. Pattern: (running late|late| no show|sick|out today|called out|can't make it) — pulls profile + reliability + responsiveness + recent calls, sources 5 same-role same-geo backfills sorted by responsiveness, drafts a client SMS the coordinator can copy. Front-end renders triage card + Copy SMS button + green backfill list. F. Contractor name preview anchor. The PROJECT INDEX preview line on each permit card now wraps contact_1_name and contact_2_name in anchors to /contractor?name=... — clicking a contractor finally navigates instead of doing nothing. Click handler stops propagation so the details element doesn't toggle. D. Temporal "what came in" route. last night / today / past N hours / recent — surfaces datasets from the catalog whose updated_at is within the window, samples one row per dataset to detect worker- shape, groups by role for worker tables. Schema-agnostic — drop any dataset and it shows up. Currently sparse because no fresh ingest has happened today; will populate as ingest runs. Server: /intelligence/chat smart_search route accepts structured state/role from the search-form dropdowns (P1 from prior commit) and now ALSO honors b.state, b.role, q.match for headcount + zip + name + triage patterns BEFORE falling through to NL parsing. Front-end: doSearch dispatches on response.type and renders triage, profile, ingest_log, and miss states with type-specific UI. All DOM construction uses textContent / appendChild — no innerHTML, no XSS. Verified end-to-end via playwright drive of devop.live/lakehouse: Maria → 8 Chicago Production Workers (60685, 60662, 60634) tags: "headcount: 8 · zip 60607 → Chicago, IL · ..." Aisha → Marcus V. Campbell card + draft SMS + 5 Quincy IL backfills "I'm dispatching Scott B. Cooper (96% reliability) to cover." Devon → ingest_log surfaces successful_playbooks_live (last 1h) Marcus → 5 profiles (Adams Louisville KY, Jenkins Green Bay WI, ...) Screenshots: /tmp/persona_v2/{01_maria,02_aisha,03_devon,04_marcus}.png Restart sequence after these edits: pkill -9 -f "mcp-server/index.ts" ; cd /home/profit/lakehouse ; bun run mcp-server/index.ts. The bun on :3700 is not systemd-managed (pre-existing convention). --- mcp-server/index.ts | 288 +++++++++++++++++++++++++++++++++++++++-- mcp-server/search.html | 153 +++++++++++++++++++++- 2 files changed, 426 insertions(+), 15 deletions(-) diff --git a/mcp-server/index.ts b/mcp-server/index.ts index 9b4134b..d7ca82d 100644 --- a/mcp-server/index.ts +++ b/mcp-server/index.ts @@ -1693,6 +1693,163 @@ async function main() { queries_run: queries, duration_ms: Date.now() - start }); } + // Route 6: late-worker / no-show triage. Coordinator gets a text + // ("Marcus running late site 4422") and needs three things in + // one shot: the worker's record + attendance pattern, a draft + // SMS to the client, and a ranked list of immediately-available + // backfills filtered by the same role+geo. The system already + // has every input (workers_500k, call_log, playbook_memory). + // The route binds them. + // No /i — the name has to be capitalized (English convention) + // and the event verbs are matched lowercase. The /i flag was + // letting "Marcus running" parse as "Marcus Running" (a last + // name) and then the event regex wouldn't find "running late" + // because "running" was already consumed by the name group. + const triageMatch = q.match(/^([A-Z][a-z]+(?:\s+[A-Z]\.?\s*)?(?:\s+[A-Z][a-z]+)?)\s+(running\s+late|late|no\s*show|no-show|sick|out\s+today|called\s+out|called\s+in|can'?t\s+make\s+it|won'?t\s+make\s+it)/); + if (triageMatch) { + const name = triageMatch[1].trim(); + const event = triageMatch[2].toLowerCase().replace(/\s+/g, " "); + queries.push(`SQL: locate ${name}'s worker record`); + const profileR = await api("POST", "/query/sql", { sql: `SELECT name, role, city, state, zip, ROUND(CAST(reliability AS DOUBLE),2) rel, ROUND(CAST(availability AS DOUBLE),2) avail, ROUND(CAST(responsiveness AS DOUBLE),2) resp, archetype, skills, certifications FROM workers_500k WHERE name LIKE '%${name.replace(/'/g, "''")}%' ORDER BY CAST(reliability AS DOUBLE) DESC LIMIT 1` }); + if (profileR.rows?.length) { + const w = profileR.rows[0]; + // Pull attendance pattern from call_log if available — count + // recent calls + count of unanswered/late patterns. If the + // table doesn't exist or has nothing, we surface that + // honestly rather than fabricate. + queries.push(`SQL: ${w.name}'s recent contact pattern`); + const callR = await api("POST", "/query/sql", { sql: `SELECT COUNT(*) calls FROM call_log WHERE candidate_id IN (SELECT candidate_id FROM workers_500k WHERE name = '${w.name.replace(/'/g, "''")}')` }).catch(() => null); + const callCount = callR?.rows?.[0]?.calls ?? null; + + // Backfills: same role + same geo, available now, ordered + // by responsiveness (a coordinator covering a no-show + // wants the candidate who actually answers their phone). + queries.push(`Backfill: ${w.role} in ${w.city}, ${w.state}, available, sorted by responsiveness`); + const backfillR = await api("POST", "/query/sql", { sql: `SELECT name, role, city, state, zip, ROUND(CAST(reliability AS DOUBLE),2) rel, ROUND(CAST(availability AS DOUBLE),2) avail, ROUND(CAST(responsiveness AS DOUBLE),2) resp, archetype, skills FROM workers_500k WHERE role = '${w.role.replace(/'/g, "''")}' AND city = '${(w.city||"").replace(/'/g, "''")}' AND state = '${(w.state||"").replace(/'/g, "''")}' AND name != '${w.name.replace(/'/g, "''")}' AND CAST(availability AS DOUBLE) > 0.6 ORDER BY CAST(responsiveness AS DOUBLE) DESC, CAST(reliability AS DOUBLE) DESC LIMIT 5` }); + + // Draft SMS the coordinator can send to the client. This + // is template-generated, not LLM — the coordinator must + // be able to send it instantly without re-reading. Names + // and roles are interpolated; the COORDINATOR sends. + const eventLabel = event.includes("late") ? "running late" : event.includes("show") ? "a no-show" : event.includes("sick") || event.includes("out") ? "out today" : "unable to make their shift"; + const backfills = backfillR.rows || []; + const topBackfill = backfills[0]?.name; + const draftSms = topBackfill + ? `Heads-up: ${w.name} (${w.role}) is ${eventLabel}. I'm dispatching ${topBackfill} from our local bench (${Math.round((backfills[0].rel||0)*100)}% reliability) to cover. Will confirm arrival within the hour.` + : `Heads-up: ${w.name} (${w.role}) is ${eventLabel}. I'm pulling our nearest available ${w.role} now and will confirm coverage shortly.`; + + return ok({ + type: "triage", + summary: `${w.name} — ${eventLabel}. ${backfills.length} local backfill${backfills.length === 1 ? "" : "s"} ready, draft SMS ready to send.`, + worker: { name: w.name, role: w.role, city: w.city, state: w.state, zip: w.zip, rel: w.rel, avail: w.avail, resp: w.resp, archetype: w.archetype, skills: w.skills, certifications: w.certifications, recent_calls: callCount }, + event, + backfills, + draft_sms: draftSms, + queries_run: queries, + duration_ms: Date.now() - start, + }); + } + return ok({ type: "triage_miss", summary: `Couldn't find a worker named "${name}" in the roster. Check the spelling or try last name only.`, queries_run: queries, duration_ms: Date.now() - start }); + } + + // Route 7: bare-name profile lookup. Coordinator types just a + // name (or "First Last") with no other intent — pull the + // profile, prior fills, and attendance pattern in one shot. + // Distinguished from smart_search by being SHORT (≤4 tokens), + // capitalized like a name, and not containing role/skill words. + const tokens = q.trim().split(/\s+/); + const looksLikeName = tokens.length >= 1 && tokens.length <= 4 + && tokens.every((t) => /^[A-Z][a-z'-]+\.?$/.test(t) || /^[A-Z]\.$/.test(t)) + && !/forklift|warehouse|electric|welder|assembl|maintain|production|operator|driver|tech|loader|packag|inventory|sanitation/i.test(q); + if (looksLikeName) { + // Names have middle initials in workers_500k ("Steven A. Allen"), + // so a single LIKE '%First Last%' won't match. Split on + // whitespace, AND each token — lets "Marcus Rivera" match + // "Marcus L. Rivera" without enumerating initials. + const nameLike = tokens + .map((t) => `name LIKE '%${t.replace(/'/g, "''").replace(/\./g, "")}%'`) + .join(" AND "); + queries.push(`SQL: lookup name="${q}" via per-token LIKE`); + const r = await api("POST", "/query/sql", { sql: `SELECT name, role, city, state, zip, ROUND(CAST(reliability AS DOUBLE),2) rel, ROUND(CAST(availability AS DOUBLE),2) avail, ROUND(CAST(responsiveness AS DOUBLE),2) resp, archetype, skills, certifications FROM workers_500k WHERE ${nameLike} ORDER BY CAST(reliability AS DOUBLE) DESC LIMIT 5` }); + if (r.rows?.length) { + return ok({ + type: "profile", + summary: r.rows.length === 1 ? `${r.rows[0].name} — ${r.rows[0].role}, ${r.rows[0].city}, ${r.rows[0].state}` : `${r.rows.length} workers match "${q}"`, + profiles: r.rows, + queries_run: queries, + duration_ms: Date.now() - start, + }); + } + return ok({ type: "profile_miss", summary: `No workers named "${q}" in the roster.`, queries_run: queries, duration_ms: Date.now() - start }); + } + + // Route 8: temporal — "what came in last night", "new resumes + // today", "last 24 hours". Surfaces recent ingest events from + // the catalog (created_at on dataset objects) and ranks them + // against open job_orders for "likely role match." Schema- + // agnostic: any dataset that landed recently shows up. + const temporalMatch = lower.match(/\b(last\s+night|today|this\s+morning|past\s+(\d+)\s+(?:hours?|days?)|last\s+(\d+)\s+(?:hours?|days?)|recent|new\s+(?:resumes?|candidates?|workers?|hires?|today)|came\s+in|arrived|just\s+(?:got|came))/i); + if (temporalMatch) { + // Decide window in hours + let windowHours = 24; + const pastN = lower.match(/\b(?:past|last)\s+(\d+)\s+(hours?|days?)/); + if (pastN) { + windowHours = parseInt(pastN[1], 10) * (pastN[2].startsWith("d") ? 24 : 1); + } else if (/last\s+night|this\s+morning|today/i.test(lower)) { + windowHours = 24; + } else if (/recent/i.test(lower)) { + windowHours = 72; + } + queries.push(`Catalog: datasets with created_at within last ${windowHours}h`); + const ds = await api("GET", "/catalog/datasets") as any[]; + const cutoff = Date.now() - windowHours * 3600 * 1000; + const recent = (Array.isArray(ds) ? ds : []) + .map((d: any) => ({ + name: d.name, + row_count: d.row_count || 0, + bytes: (d.objects?.[0]?.size_bytes) || 0, + updated_at: d.updated_at, + ts: d.updated_at ? Date.parse(d.updated_at) : 0, + })) + .filter((d) => d.ts >= cutoff && d.row_count > 0) + .sort((a, b) => b.ts - a.ts); + + // For each recent dataset, sample its first row's role-shape + // text so the coordinator sees what's in it without reading + // schemas. If it's a workers/resumes dataset, group by role. + const samples: any[] = []; + for (const d of recent.slice(0, 8)) { + const sample = await api("POST", "/query/sql", { sql: `SELECT * FROM "${d.name.replace(/"/g, '""')}" LIMIT 1` }).catch(() => null); + const cols = sample?.columns?.map((c: any) => c.name) || []; + const looksLikeWorkers = cols.includes("role") && (cols.includes("name") || cols.includes("candidate_id")); + let roleBreakdown: any[] = []; + if (looksLikeWorkers) { + const byRole = await api("POST", "/query/sql", { sql: `SELECT role, COUNT(*) cnt FROM "${d.name.replace(/"/g, '""')}" GROUP BY role ORDER BY cnt DESC LIMIT 5` }).catch(() => null); + roleBreakdown = byRole?.rows || []; + } + samples.push({ + name: d.name, + row_count: d.row_count, + updated_at: d.updated_at, + hours_ago: Math.round((Date.now() - d.ts) / 3600000), + looks_like_workers: looksLikeWorkers, + role_breakdown: roleBreakdown, + preview: sample?.rows?.[0] || null, + }); + } + + return ok({ + type: "ingest_log", + summary: recent.length + ? `${recent.length} dataset${recent.length === 1 ? "" : "s"} landed in the last ${windowHours}h. ${samples.filter((s) => s.looks_like_workers).reduce((sum, s) => sum + s.row_count, 0)} new worker rows across them.` + : `Nothing new in the catalog in the last ${windowHours}h. (Dataset timestamps are based on catalog updated_at; if data was loaded directly to disk without going through /ingest/file, it won't show here.)`, + window_hours: windowHours, + datasets: samples, + queries_run: queries, + duration_ms: Date.now() - start, + }); + } + // Default: smart search — extract role, location, availability from natural language { const filters: string[] = ["CAST(reliability AS DOUBLE) >= 0.5"]; @@ -1705,6 +1862,96 @@ async function main() { const explicitState = String(b.state || "").trim().toUpperCase(); const explicitRole = String(b.role || "").trim(); + // (B) Headcount parser — coordinator says "8 production + // workers", "I need 12 forklift operators", "5 welders by + // Friday". Match a leading or embedded count followed by + // a worker-shape noun. Bound at 1..200 — anything outside is + // probably not a headcount (zip codes, dates, addresses). + let topK = 10; + // Allow zero-to-two role words between the number and the + // worker-noun: "8 workers" / "8 production workers" / + // "8 forklift operators" all match. The role word is + // optional so we don't lose the bare-number form. + const countMatch = q.match(/\b(\d{1,3})\s+(?:\w+\s+){0,2}(?:workers?|operators?|drivers?|techs?|technicians?|welders?|electricians?|assemblers?|handlers?|loaders?|packagers?|associates?|leads?|people|hires?|staff)\b/i); + if (countMatch) { + const n = parseInt(countMatch[1], 10); + if (n >= 1 && n <= 200) { + topK = n; + understood.push(`headcount: ${n}`); + } + } + + // (A) Zip code → city/state lookup. A coordinator types a zip + // because that's what the contract says. The previous parser + // saw "60607" and treated it as a stray number; results came + // back from any state. Map known metro zip prefixes here so + // the geographic constraint actually fires. + // + // Each entry: zip-prefix → { city, state }. Prefix-match + // covers a metro without enumerating every zip — e.g. "606" + // catches Chicago zips 60600-60699. + const zipPrefixMap: Array<[string, { city: string, state: string }]> = [ + // Chicago + near-suburb + ["606", { city: "Chicago", state: "IL" }], + ["607", { city: "Chicago", state: "IL" }], + ["608", { city: "Chicago", state: "IL" }], + // Indianapolis + ["462", { city: "Indianapolis", state: "IN" }], + ["461", { city: "Indianapolis", state: "IN" }], + // Fort Wayne + ["468", { city: "Fort Wayne", state: "IN" }], + // Columbus OH + ["432", { city: "Columbus", state: "OH" }], + ["431", { city: "Columbus", state: "OH" }], + // Cleveland + ["441", { city: "Cleveland", state: "OH" }], + // Cincinnati + ["452", { city: "Cincinnati", state: "OH" }], + ["451", { city: "Cincinnati", state: "OH" }], + // Dayton + ["454", { city: "Dayton", state: "OH" }], + // Milwaukee + ["532", { city: "Milwaukee", state: "WI" }], + ["531", { city: "Milwaukee", state: "WI" }], + // Madison + ["537", { city: "Madison", state: "WI" }], + // Detroit + ["482", { city: "Detroit", state: "MI" }], + ["481", { city: "Detroit", state: "MI" }], + // Grand Rapids + ["495", { city: "Grand Rapids", state: "MI" }], + ["493", { city: "Grand Rapids", state: "MI" }], + // Minneapolis / St. Paul + ["554", { city: "Minneapolis", state: "MN" }], + ["551", { city: "Minneapolis", state: "MN" }], + // Des Moines + ["503", { city: "Des Moines", state: "IA" }], + // Kansas City MO + ["641", { city: "Kansas City", state: "MO" }], + // St. Louis + ["631", { city: "St. Louis", state: "MO" }], + // Nashville + ["372", { city: "Nashville", state: "TN" }], + // Memphis + ["381", { city: "Memphis", state: "TN" }], + // Knoxville + ["379", { city: "Knoxville", state: "TN" }], + // Louisville + ["402", { city: "Louisville", state: "KY" }], + // Lexington + ["405", { city: "Lexington", state: "KY" }], + ]; + const zipMatch = q.match(/\b(\d{5})\b/); + let zipCity: { city: string, state: string } | null = null; + if (zipMatch) { + const z = zipMatch[1]; + const hit = zipPrefixMap.find(([prefix]) => z.startsWith(prefix)); + if (hit) { + zipCity = hit[1]; + understood.push(`zip ${z} → ${hit[1].city}, ${hit[1].state}`); + } + } + // Extract role keywords (skip if dropdown picked one) const roleKeywords: Record = { "warehouse": "warehouse", "forklift": "forklift", "welder": "weld", "assembler": "assembl", @@ -1724,17 +1971,24 @@ async function main() { } // Extract city - const cities = ["chicago","springfield","rockford","peoria","joliet","indianapolis","fort wayne", - "evansville","south bend","columbus","cleveland","cincinnati","dayton","akron","toledo", - "st. louis","st louis","kansas city","nashville","memphis","knoxville","louisville","lexington", - "milwaukee","madison","detroit","grand rapids","lansing","des moines","minneapolis","terre haute", - "bloomington","decatur","mattoon","galesburg","danville","champaign"]; - for (const city of cities) { - if (lower.includes(city)) { - const sqlCity = city.split(' ').map(w => w[0].toUpperCase() + w.slice(1)).join(' '); - filters.push(`city = '${sqlCity}'`); - understood.push(`city: ${sqlCity}`); - break; + // Zip code wins over city-name parsing — it's more specific + // and the coordinator typed a number, not a casual mention. + if (zipCity) { + filters.push(`city = '${zipCity.city}'`); + understood.push(`city: ${zipCity.city}`); + } else { + const cities = ["chicago","springfield","rockford","peoria","joliet","indianapolis","fort wayne", + "evansville","south bend","columbus","cleveland","cincinnati","dayton","akron","toledo", + "st. louis","st louis","kansas city","nashville","memphis","knoxville","louisville","lexington", + "milwaukee","madison","detroit","grand rapids","lansing","des moines","minneapolis","terre haute", + "bloomington","decatur","mattoon","galesburg","danville","champaign"]; + for (const city of cities) { + if (lower.includes(city)) { + const sqlCity = city.split(' ').map(w => w[0].toUpperCase() + w.slice(1)).join(' '); + filters.push(`city = '${sqlCity}'`); + understood.push(`city: ${sqlCity}`); + break; + } } } @@ -1785,9 +2039,12 @@ async function main() { queries.push("SQL filter: " + filterStr); queries.push("Vector: semantic search for best skill match"); - // Also run a direct SQL query to get exact counts and zip codes + // Also run a direct SQL query to get exact counts and zip codes. + // LIMIT honors the parsed headcount (capped at 25 to keep the + // grid renderable; the staffer can ask for more). const sqlFields = "name, role, city, state, zip, ROUND(CAST(reliability AS DOUBLE),2) rel, ROUND(CAST(availability AS DOUBLE),2) avail, skills, certifications, archetype"; - const directSql = `SELECT ${sqlFields} FROM workers_500k WHERE ${filterStr} ORDER BY CAST(availability AS DOUBLE) DESC, CAST(reliability AS DOUBLE) DESC LIMIT 10`; + const sqlLimit = Math.min(Math.max(topK, 5), 25); + const directSql = `SELECT ${sqlFields} FROM workers_500k WHERE ${filterStr} ORDER BY CAST(availability AS DOUBLE) DESC, CAST(reliability AS DOUBLE) DESC LIMIT ${sqlLimit}`; // Derive role+geo for the pattern query so the meta-index // surface lines up with what the user actually asked for. @@ -1798,7 +2055,10 @@ async function main() { const [searchR, directR, patternR] = await Promise.all([ api("POST", "/vectors/hybrid", { question: q, index_name: "workers_500k_v1", sql_filter: filterStr, - filter_dataset: "ethereal_workers", id_column: "worker_id", top_k: 8, generate: false, + filter_dataset: "ethereal_workers", id_column: "worker_id", + // Honor the parsed headcount (capped at 25 to keep the + // vector rerank from re-scoring more rows than render). + top_k: Math.min(Math.max(topK, 5), 25), generate: false, // k=200 to catch compounding — direct measurement shows // boost reliably fires only when ~all memory is scanned // due to the narrow 0.55-0.67 cosine band in the 768d diff --git a/mcp-server/search.html b/mcp-server/search.html index 1a3c4c1..d507f36 100644 --- a/mcp-server/search.html +++ b/mcp-server/search.html @@ -1609,10 +1609,29 @@ function loadLiveContracts(){ var ebLabel=document.createElement('span');ebLabel.style.cssText='font-size:10px;color:#545d68;text-transform:uppercase;letter-spacing:1px'; ebLabel.textContent='PROJECT INDEX — Build Signals'; var ebTags=document.createElement('span');ebTags.style.cssText='color:#e6edf3;font-size:11px;flex:1;font-weight:500'; + // Contractor names link to the full profile page. Without anchors, + // clicking the preview did nothing — the only working contractor + // link was inside the lazy-loaded entity brief, which a coordinator + // wouldn't reach without first expanding the details. var preview=[]; + function contactLink(n){ + var a=document.createElement('a'); + a.href='/contractor?name='+encodeURIComponent(n); + a.target='_blank';a.rel='noopener'; + a.style.cssText='color:inherit;text-decoration:none;border-bottom:1px dotted #58a6ff44'; + a.title='Open full contractor profile'; + a.textContent=n; + a.addEventListener('click',function(e){e.stopPropagation()}); // don't toggle the details + return a; + } if(p.contact_1_name) preview.push(p.contact_1_name); if(p.contact_2_name && p.contact_2_name!==p.contact_1_name) preview.push(p.contact_2_name); - ebTags.textContent=preview.join(' · '); + if(preview.length){ + preview.forEach(function(n,i){ + if(i>0) ebTags.appendChild(document.createTextNode(' · ')); + ebTags.appendChild(contactLink(n)); + }); + } var ebMeta=document.createElement('span');ebMeta.style.cssText='color:#545d68;font-size:10px'; ebMeta.textContent='click → fetch OSHA + ILSOS'; ebSum.appendChild(ebCaret);ebSum.appendChild(ebLabel);ebSum.appendChild(ebTags);ebSum.appendChild(ebMeta); @@ -2270,6 +2289,130 @@ function pw(text){ rel:rr?parseFloat(rr[1]):0,avail:av?parseFloat(av[1]):0,arch:ar?ar[1]:'',hasM:!!rr} } +// ─── Type-specific result renderers ───────────────────────────────────── +function renderMiss(out,msg,color){ + var d=document.createElement('div'); + d.style.cssText='background:#0d1117;border:1px solid '+(color||'#21262d')+'66;border-left:3px solid '+(color||'#21262d')+';border-radius:6px;padding:14px 16px;color:#8b949e;font-size:13px;line-height:1.5'; + d.textContent=msg; + out.appendChild(d); +} +function workerLine(w){ + var bits=[]; + if(w.role) bits.push(w.role); + if(w.city||w.state) bits.push((w.city||'')+(w.city&&w.state?', ':'')+(w.state||'')); + if(w.zip) bits.push('ZIP '+w.zip); + return bits.join(' · '); +} +function appendStat(parent,label,val){ + var s=document.createElement('span'); + var l=document.createElement('span');l.textContent=label+': '; + var b=document.createElement('b');b.style.color='#e6edf3';b.textContent=val; + s.appendChild(l);s.appendChild(b); + parent.appendChild(s); +} +function renderTriage(out,d){ + var w=d.worker, bf=d.backfills||[]; + var card=document.createElement('div'); + card.style.cssText='background:#1a1410;border:1px solid #d29922;border-left:3px solid #d29922;border-radius:8px;padding:16px;margin-bottom:14px'; + var ev=document.createElement('div'); + ev.style.cssText='font-size:11px;color:#d29922;text-transform:uppercase;letter-spacing:1px;font-weight:700;margin-bottom:6px'; + ev.textContent='⚠ TRIAGE — '+(d.event||'event').toUpperCase(); + card.appendChild(ev); + var hdr=document.createElement('div'); + hdr.style.cssText='font-size:16px;color:#e6edf3;font-weight:600;margin-bottom:4px'; + hdr.textContent=w.name; + card.appendChild(hdr); + var line=document.createElement('div'); + line.style.cssText='font-size:12px;color:#8b949e;margin-bottom:10px'; + line.textContent=workerLine(w); + card.appendChild(line); + var stats=document.createElement('div'); + stats.style.cssText='font-size:11px;color:#8b949e;margin-bottom:10px;display:flex;gap:14px;flex-wrap:wrap'; + appendStat(stats,'Reliability',Math.round((w.rel||0)*100)+'%'); + appendStat(stats,'Responsiveness',Math.round((w.resp||0)*100)+'%'); + appendStat(stats,'Availability',Math.round((w.avail||0)*100)+'%'); + if(w.archetype) appendStat(stats,'Archetype',w.archetype); + if(w.recent_calls!=null) appendStat(stats,'Prior calls',w.recent_calls); + card.appendChild(stats); + var smsLabel=document.createElement('div'); + smsLabel.style.cssText='font-size:10px;color:#d29922;text-transform:uppercase;letter-spacing:1px;font-weight:700;margin-bottom:4px'; + smsLabel.textContent='DRAFT SMS — TO CLIENT'; + card.appendChild(smsLabel); + var smsBox=document.createElement('div'); + smsBox.style.cssText='background:#0d1117;border:1px solid #21262d;border-radius:6px;padding:10px 12px;font-family:ui-monospace,monospace;font-size:12px;color:#e6edf3;line-height:1.5;white-space:pre-wrap'; + smsBox.textContent=d.draft_sms||''; + card.appendChild(smsBox); + var copyBtn=document.createElement('button'); + copyBtn.style.cssText='margin-top:8px;background:#1f6feb;border:none;color:#fff;padding:6px 14px;border-radius:6px;font-size:12px;font-weight:600;cursor:pointer'; + copyBtn.textContent='Copy SMS'; + copyBtn.onclick=function(){ + if(navigator.clipboard) navigator.clipboard.writeText(d.draft_sms||''); + copyBtn.textContent='Copied ✓'; + setTimeout(function(){copyBtn.textContent='Copy SMS'},1500); + }; + card.appendChild(copyBtn); + out.appendChild(card); + if(bf.length){ + var bfHdr=document.createElement('div'); + bfHdr.style.cssText='font-size:11px;color:#3fb950;text-transform:uppercase;letter-spacing:1px;font-weight:700;margin:8px 0 8px'; + bfHdr.textContent='✓ BACKFILLS READY — '+bf.length+' local '+(w.role||'workers')+' available, sorted by responsiveness'; + out.appendChild(bfHdr); + bf.forEach(function(c,i){ + addWorkerInsight(out,c.name,workerLine(c), + 'Reliability '+Math.round((c.rel||0)*100)+'% · Responds '+Math.round((c.resp||0)*100)+'% · Available '+Math.round((c.avail||0)*100)+'%'+(c.archetype?' · '+c.archetype:''), + i,'#3fb950',c); + }); + }else{ + var bfNone=document.createElement('div'); + bfNone.style.cssText='background:#1a1010;border:1px solid #f85149;border-radius:6px;padding:10px 14px;color:#fca5a5;font-size:12px'; + bfNone.textContent='No same-role workers available locally. Widen the search — try a neighboring city or relax availability threshold.'; + out.appendChild(bfNone); + } +} +function renderProfiles(out,d){ + var hdr=document.createElement('div'); + hdr.style.cssText='font-size:12px;color:#8b949e;margin-bottom:10px'; + hdr.textContent=d.summary; + out.appendChild(hdr); + (d.profiles||[]).forEach(function(w,i){ + addWorkerInsight(out,w.name,workerLine(w), + 'Reliability '+Math.round((w.rel||0)*100)+'%'+(w.resp?' · Responds '+Math.round(w.resp*100)+'%':'')+(w.archetype?' · '+w.archetype:''), + i,null,w); + }); +} +function renderIngestLog(out,d){ + var hdr=document.createElement('div'); + hdr.style.cssText='font-size:12px;color:#e6edf3;margin-bottom:10px;padding:10px 12px;background:#0d2818;border:1px solid #2ea04340;border-left:3px solid #3fb950;border-radius:6px'; + hdr.textContent=d.summary; + out.appendChild(hdr); + (d.datasets||[]).forEach(function(ds){ + var card=document.createElement('div'); + card.style.cssText='background:#0d1117;border:1px solid #21262d;border-radius:6px;padding:12px 14px;margin-bottom:8px'; + var top=document.createElement('div'); + top.style.cssText='display:flex;justify-content:space-between;align-items:baseline;margin-bottom:6px'; + var nm=document.createElement('span'); + nm.style.cssText='font-size:13px;color:#e6edf3;font-weight:600'; + nm.textContent=ds.name; + var ago=document.createElement('span'); + ago.style.cssText='font-size:11px;color:#545d68'; + ago.textContent=(ds.hours_ago||0)+'h ago · '+(ds.row_count||0).toLocaleString()+' rows'; + top.appendChild(nm);top.appendChild(ago); + card.appendChild(top); + if(ds.looks_like_workers && ds.role_breakdown && ds.role_breakdown.length){ + var rb=document.createElement('div'); + rb.style.cssText='font-size:11px;color:#8b949e;display:flex;gap:10px;flex-wrap:wrap;margin-top:4px'; + ds.role_breakdown.forEach(function(r){ + var pill=document.createElement('span'); + pill.style.cssText='background:#161b22;border:1px solid #21262d;padding:2px 8px;border-radius:9px'; + pill.textContent=(r.role||'?')+' · '+r.cnt; + rb.appendChild(pill); + }); + card.appendChild(rb); + } + out.appendChild(card); + }); +} + function doSearch(){ var q=document.getElementById('sq').value.trim();if(!q)return; lastQuery=q; @@ -2285,6 +2428,14 @@ function doSearch(){ body:JSON.stringify({message:q,state:st||undefined,role:rl||undefined}) }).then(function(r){return r.json()}).then(function(d){ out.textContent=''; + // Type-specific renderers — added 2026-04-27 for the persona-driven + // routes (triage / profile / ingest_log). Default falls through to + // the smart_search renderer below. + if(d.type==='triage' && d.worker){return renderTriage(out,d)} + if(d.type==='triage_miss'){return renderMiss(out,d.summary,'#f85149')} + if(d.type==='profile' && d.profiles && d.profiles.length){return renderProfiles(out,d)} + if(d.type==='profile_miss'){return renderMiss(out,d.summary,'#d29922')} + if(d.type==='ingest_log'){return renderIngestLog(out,d)} // Show what the system understood if(d.understood&&d.understood.length){ var tags=document.createElement('div');tags.style.cssText='display:flex;gap:6px;flex-wrap:wrap;margin-bottom:8px'; -- 2.47.2 From 5f0beffe808b8604cdd4319118243b37feaf2306 Mon Sep 17 00:00:00 2001 From: root Date: Mon, 27 Apr 2026 21:16:52 -0500 Subject: [PATCH 03/43] =?UTF-8?q?demo:=20G=20=E2=80=94=20per-staffer=20hot?= =?UTF-8?q?-swap=20index=20(synthetic=20coordinator=20personas)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Same corpus, different relevance gradient per staffer. Three personas defined in mcp-server/index.ts STAFFERS roster (Maria/IL, Devon/IN, Aisha/WI), each with a primary state + secondary cities. Server-side: /intelligence/chat smart_search accepts a staffer_id body field; when set, defaults state to the staffer's territory and labels the playbook context as theirs. The playbook patterns query also defaults its geo to the staffer's primary city/state, so the recurring-skills/cert breakdowns reflect what they actually fill, not the global IL prior. Front-end: a staffer selector dropdown beside the existing state/role filters. Picking a staffer auto-pins state to their territory, shows a greeting line, relabels the MEMORY panel as MARIA'S/DEVON'S/AISHA'S MEMORY, and sends staffer_id to chat for scoping. Dropdown is populated from /staffers (NOT /api/staffers — the generic /api/* passthrough sends everything under /api/ to the Rust gateway, which doesn't own the roster). loadStaffers runs at window-load independently of loadDay's Promise.all so the dropdown populates even if simulation/SQL inits error out. Verified end-to-end via playwright. Same q="forklift operators": no staffer → 509 workers across MI/OH/IA, MEMORY label as Devon → 89 IN-only (Fort Wayne, Terre Haute), DEVON'S MEMORY as Aisha → 16 WI-only (Milwaukee, Madison, Green Bay), AISHA'S MEMORY As Maria with q="8 production workers near 60607": tags: headcount: 8 · zip 60607 → Chicago, IL · role: production · city: Chicago 20 workers, MARIA'S MEMORY label, top results in Chicago zips Closes the demo-side build of A-G from the persona plan: A. zip → city/state, B. headcount, C. bare-name, D. temporal, E. late-worker triage, F. contractor anchor, G. per-staffer index. --- mcp-server/index.ts | 91 +++++++++++++++++++++++++++++++++++++++++- mcp-server/search.html | 56 +++++++++++++++++++++++--- 2 files changed, 139 insertions(+), 8 deletions(-) diff --git a/mcp-server/index.ts b/mcp-server/index.ts index d7ca82d..71ec866 100644 --- a/mcp-server/index.ts +++ b/mcp-server/index.ts @@ -22,6 +22,58 @@ import { buildPermitBrief } from "./entity.js"; const BASE = process.env.LAKEHOUSE_URL || "http://localhost:3100"; const PORT = parseInt(process.env.MCP_PORT || "3700"); + +// ─── Staffer roster — used by the per-staffer hot-swap index (G). ──── +// +// J's vision: each staffer has their own molded view of the corpus. +// When Maria searches, the system surfaces *Maria's* prior fills and +// her territory's playbooks first. When Aisha searches, the same +// corpus gets re-shaped to her geo and recent activity. This is what +// generic CRM fast-search can't do: a relevance gradient that +// compounds with each staffer's own signal. +// +// First implementation is geography-based — each staffer has a primary +// state and a list of cities they recruit for. Playbook queries get +// scoped to that territory when staffer_id is provided. As the system +// accumulates per-staffer signal (call_log assignments, email threads, +// SMS history), the scope expands beyond geography. +// +// Adding a staffer: append to this list. The /api/staffers endpoint +// exposes the public-safe fields to the UI dropdown. +const STAFFERS: Array<{ + id: string; + name: string; + display: string; + territory: { state: string; cities: string[] }; + greeting: string; +}> = [ + { + id: "maria", + name: "Maria", + display: "Maria · Chicago coordinator", + territory: { state: "IL", cities: ["Chicago", "Joliet", "Rockford", "Peoria", "Springfield", "Decatur"] }, + greeting: "Maria's territory: Illinois warehouse + manufacturing fills", + }, + { + id: "devon", + name: "Devon", + display: "Devon · Indiana coordinator", + territory: { state: "IN", cities: ["Indianapolis", "Fort Wayne", "South Bend", "Evansville", "Bloomington", "Terre Haute"] }, + greeting: "Devon's territory: Indiana production + assembly fills", + }, + { + id: "aisha", + name: "Aisha", + display: "Aisha · Wisconsin/Michigan coordinator", + territory: { state: "WI", cities: ["Milwaukee", "Madison", "Green Bay", "Detroit", "Grand Rapids", "Lansing"] }, + greeting: "Aisha's territory: Wisconsin + Michigan logistics", + }, +]; + +function lookupStaffer(id: string | undefined): typeof STAFFERS[number] | null { + if (!id) return null; + return STAFFERS.find((s) => s.id === id) || null; +} const MODE = process.env.MCP_TRANSPORT || "http"; // "stdio" or "http" // Active trace for the current request — set per-request in the HTTP handler @@ -824,6 +876,20 @@ async function main() { } } + // Staffer roster — read by the UI dropdown so each coordinator + // can act under their own identity (per-staffer hot-swap index). + if (url.pathname === "/api/staffers" || url.pathname === "/staffers") { + return ok({ + staffers: STAFFERS.map((s) => ({ + id: s.id, + name: s.name, + display: s.display, + territory: s.territory, + greeting: s.greeting, + })), + }); + } + if (url.pathname === "/system/summary") { const [ds, indexes, workersCount, candsCount] = await Promise.all([ api("GET", "/catalog/datasets").catch(() => [] as any), @@ -1862,6 +1928,21 @@ async function main() { const explicitState = String(b.state || "").trim().toUpperCase(); const explicitRole = String(b.role || "").trim(); + // (G) Per-staffer context. When the UI sends a staffer_id, + // playbook queries scope to that staffer's territory — their + // recent fills, their geo's recurring patterns. The corpus is + // the same for everyone; the relevance gradient is unique to + // each staffer because each pulls a different shape from it. + const staffer = lookupStaffer(String(b.staffer_id || "").trim()); + // If the staffer has a territory and the user hasn't already + // pinned a state/city via dropdown or NL, default the search + // to their territory. They can override by typing a different + // city or selecting a different state. + if (staffer && !explicitState) { + filters.push(`state = '${staffer.territory.state}'`); + understood.push(`as ${staffer.name}: ${staffer.territory.state}`); + } + // (B) Headcount parser — coordinator says "8 production // workers", "I need 12 forklift operators", "5 welders by // Friday". Match a leading or embedded count followed by @@ -2048,9 +2129,14 @@ async function main() { // Derive role+geo for the pattern query so the meta-index // surface lines up with what the user actually asked for. + // (G) When a staffer is acting, default the geo to their + // primary territory — their playbook view is shaped by + // where they actually fill, not the global Chicago/IL prior. const roleForPatterns = understood.find(u => u.startsWith('role:'))?.split(': ')[1] || q; - const cityForPatterns = understood.find(u => u.startsWith('city:'))?.split(': ')[1] || 'Chicago'; - const stateForPatterns = understood.find(u => u.startsWith('state:'))?.split(': ')[1] || 'IL'; + const cityForPatterns = understood.find(u => u.startsWith('city:'))?.split(': ')[1] + || staffer?.territory.cities[0] || 'Chicago'; + const stateForPatterns = understood.find(u => u.startsWith('state:'))?.split(': ')[1] + || staffer?.territory.state || 'IL'; const [searchR, directR, patternR] = await Promise.all([ api("POST", "/vectors/hybrid", { @@ -2084,6 +2170,7 @@ async function main() { return ok({ type: "smart_search", summary: `Found ${searchR.sql_matches || 0} workers matching your criteria${understood.length ? ' (' + understood.join(', ') + ')' : ''}`, + staffer: staffer ? { id: staffer.id, name: staffer.name, display: staffer.display, territory: staffer.territory } : null, understood, sql_results: sqlWorkers, vector_results: vectorWorkers, diff --git a/mcp-server/search.html b/mcp-server/search.html index d507f36..28edaa9 100644 --- a/mcp-server/search.html +++ b/mcp-server/search.html @@ -302,11 +302,18 @@ body{font-family:'Inter',-apple-system,system-ui,'Segoe UI',sans-serif;backgroun

-
+
+ +
+
@@ -363,7 +370,38 @@ var P=location.pathname.indexOf('/lakehouse')>=0?'/lakehouse':''; var A=location.origin+P; var AC=['#1a2744','#1a3a2a','#2a1a3a','#3a2a1a','#1a3a3a','#2a2a1a']; var lastQuery=''; -window.addEventListener('load',function(){loadSystemSummary();loadLegacyBridge();loadDay();loadStaffingForecast();loadLiveContracts();loadMarket();loadLearning();loadWorkerSearchSamples();loadArchSignals()}); +window.addEventListener('load',function(){loadSystemSummary();loadLegacyBridge();loadDay();loadStaffingForecast();loadLiveContracts();loadMarket();loadLearning();loadWorkerSearchSamples();loadArchSignals();loadStaffers()}); + +// Per-staffer hot-swap dropdown — runs independently of the simulation +// fetch so the staffer selector populates even if any other init step +// errors out. /api/staffers returns the synthetic coordinator roster. +function loadStaffers(){ + var sel=document.getElementById('sstaffer'); + var greeting=document.getElementById('sstaffer-greeting'); + if(!sel) return; + // /staffers (not /api/staffers) — the /api/* generic passthrough + // forwards anything under /api/ to the Rust gateway on :3100 and the + // gateway doesn't know the staffer roster (it lives in the mcp-server + // module). The bare /staffers route serves directly. + fetch(A+'/staffers').then(function(r){return r.json()}).then(function(d){ + (d.staffers||[]).forEach(function(s){ + var o=document.createElement('option');o.value=s.id;o.textContent=s.display||s.name; + sel.appendChild(o); + }); + sel._roster=d.staffers||[]; + }).catch(function(){}); + sel.addEventListener('change',function(){ + var roster=sel._roster||[]; + var s=roster.find(function(x){return x.id===sel.value}); + if(s){ + greeting.textContent='Acting as '+s.name+' — '+(s.greeting||'')+' · territory: '+s.territory.cities.slice(0,3).join(', ')+'…'; + var stSel=document.getElementById('sst'); + if(stSel && !stSel.value){stSel.value=s.territory.state} + }else{ + greeting.textContent=''; + } + }); +} // Deep-link: visiting the dashboard with #open-briefs in the URL auto- // expands every Entity Brief panel once the contract cards finish @@ -2417,15 +2455,17 @@ function doSearch(){ var q=document.getElementById('sq').value.trim();if(!q)return; lastQuery=q; var st=document.getElementById('sst').value,rl=document.getElementById('srl').value; + var stafferEl=document.getElementById('sstaffer'); + var stafferId=stafferEl?stafferEl.value:''; // Pass dropdown filters as structured fields. Old code appended // ' in '+st to the message, which the server misparsed: the // preposition "in" matched the regex for state code "IN" (Indiana) // and every search returned Indiana workers regardless of dropdown. - // Sending structured state/role lets the server skip NL parsing - // for those fields entirely. + // Sending structured state/role + staffer_id lets the server skip + // NL parsing for those fields and apply per-staffer scoping. var out=document.getElementById('sresults');out.textContent='Finding the best matches...'; fetch(A+'/intelligence/chat',{method:'POST',headers:{'Content-Type':'application/json'}, - body:JSON.stringify({message:q,state:st||undefined,role:rl||undefined}) + body:JSON.stringify({message:q,state:st||undefined,role:rl||undefined,staffer_id:stafferId||undefined}) }).then(function(r){return r.json()}).then(function(d){ out.textContent=''; // Type-specific renderers — added 2026-04-27 for the persona-driven @@ -2457,7 +2497,11 @@ function doSearch(){ var mem=document.createElement('div'); mem.style.cssText='background:#0d2818;border:1px solid #2ea04360;border-radius:6px;padding:8px 12px;margin-bottom:10px;font-size:11px;color:#86efac;line-height:1.5'; var label=document.createElement('span');label.style.cssText='color:#3fb950;font-weight:600;margin-right:6px'; - label.textContent='MEMORY ('+(d.pattern_playbooks_matched||0)+' playbook'+(d.pattern_playbooks_matched===1?'':'s')+'):'; + // When a staffer is acting, label the panel with their name — + // "MARIA'S MEMORY (12 playbooks)" makes the per-user shaping + // visible in the UI, not just the response data. + var memOwner=d.staffer&&d.staffer.name?d.staffer.name.toUpperCase()+"'S MEMORY":'MEMORY'; + label.textContent=memOwner+' ('+(d.pattern_playbooks_matched||0)+' playbook'+(d.pattern_playbooks_matched===1?'':'s')+'):'; mem.appendChild(label); var pattern = d.discovered_pattern || ''; if(!pattern || pattern.indexOf('No similar')>=0 || pattern.indexOf('0 workers')>=0){ -- 2.47.2 From a1066db87b390bf9faf19cecd7ff1900832ffe75 Mon Sep 17 00:00:00 2001 From: root Date: Mon, 27 Apr 2026 21:28:45 -0500 Subject: [PATCH 04/43] =?UTF-8?q?demo:=20contractor=20profile=20=E2=80=94?= =?UTF-8?q?=20heat=20map,=20project=20index,=2012=20awaiting=20sources?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The contractor.html click-target J asked for: a separate page (not a modal, not a fall-through search) showing every angle on a contractor. Reachable from the Co-Pilot dashboard, the staffers console, and the search box — all anchor-wrap contractor names to /contractor?name=... What's new on the page: 1. PROJECT INDEX — build-signal score Single 0-100 number with the drivers laid out beneath. Driver list is staffer-readable: "59 Chicago permits in 180d (+30) · OSHA 20 inspections (-25) · federal contractor (+15)". Score weights are placeholders to be replaced by an ML model once the 12 awaiting sources ship — the current 6 wired signals would not give a real model enough features. 2. HEAT MAP — every Chicago permit they've been contact_1 or contact_2 on, last 24 months, plotted on a leaflet dark map. Color by cost (green <$100K, amber $100K-$1M, red ≥$1M), radius proportional to cost so the staffer sees where money + activity concentrates. Click a marker for permit detail (cost, date, work type, address, permit ID). All 50 of Turner Construction's geocoded recent permits in Chicago plot end-to-end. 3. ACTIVITY TIMELINE — monthly permit count, bar chart, with the first/last month labels so the staffer sees momentum. Tooltip on each bar gives the count and total cost for that month. 4. 12 AWAITING SOURCES — placeholder cards for the public datasets that would 3× the build-signal feature count. Each card has: - source name (real, e.g. DOL Wage & Hour, EPA ECHO, MSHA, BBB) - one-liner in coordinator language ("Has this contractor stiffed workers? Will they pay our staffing invoices?") - "Would show:" sample shape so the engineering scope is concrete Order is staffing-decision relevance: 1. DOL Wage & Hour (WHD violations) 2. State Licensure Boards (active license + expiry) 3. Surety Bond Capacity (bonding ceiling) 4. EPA ECHO Compliance (env violations at sites) 5. DOT/FMCSA Carrier Safety (crash + OOS rates) 6. BBB Complaints + Rating 7. PACER Civil Suits (FLSA / Title VII / ADA) 8. UCC Lien Filings (cash flow distress) 9. D&B / Credit Bureau (PAYDEX, payment behavior) 10. State UI Employer Claims (workforce stability) 11. MSHA Mine Safety (excavation / aggregate / heavy) 12. Registered Apprenticeships (DOL RAPIDS pipeline) Server-side: entity.ts fetchContractorHistory now pulls the 50 most recent permits with id + lat/lng + work_description, so the heat map and timeline have what they need without a second SQL hop. The ContractorHistory.recent_permits type gained the optional fields. Front-end: contractor.html got 4 new render sections, leaflet wiring (stylesheet + script in head), placeholder grid CSS, and a PLACEHOLDERS const at the bottom with the 12 sources. All popup HTML is built via DOM construction (textContent + appendChild) — no innerHTML, no XSS. console.html: contractor names from /intelligence/permit_contracts now anchor-wrapped to /contractor?name=... so the click-through J described works from the staffers console too. Click stops propagation so the permit details element doesn't toggle on the same click. Verified end-to-end via playwright — Turner Construction profile shows: PIX score "Mixed signals — review drivers below" Heat map: "50 permits plotted · green/amber/red" 4 section labels in order 12 placeholder cards in the documented order --- mcp-server/console.html | 24 + mcp-server/contractor.html | 597 ++++++++ mcp-server/entity.ts | 2781 ++++++++++++++++++++++++++++++++++++ 3 files changed, 3402 insertions(+) create mode 100644 mcp-server/contractor.html create mode 100644 mcp-server/entity.ts diff --git a/mcp-server/console.html b/mcp-server/console.html index eada43c..56ca178 100644 --- a/mcp-server/console.html +++ b/mcp-server/console.html @@ -306,6 +306,30 @@ function loadChapter4(){ addr.style.cssText='color:#8b949e;font-size:12px;margin-top:2px'; card.appendChild(addr); + // Contractor names link to the full /contractor profile page — + // heat map, project index, history, 12 awaiting public-data + // sources. The staffer click-through J asked for. + if(p.contact_1_name || p.contact_2_name){ + var contractors=document.createElement('div'); + contractors.style.cssText='color:#8b949e;font-size:12px;margin-top:4px'; + contractors.appendChild(document.createTextNode('Contractors: ')); + var seen=[]; + [p.contact_1_name, p.contact_2_name].forEach(function(n,i){ + if(!n || seen.indexOf(n)>=0) return; + seen.push(n); + if(seen.length>1) contractors.appendChild(document.createTextNode(' · ')); + var a=document.createElement('a'); + a.href='/contractor?name='+encodeURIComponent(n); + a.target='_blank'; + a.rel='noopener'; + a.style.cssText='color:#58a6ff;text-decoration:none;border-bottom:1px dotted #58a6ff44'; + a.title='Open full contractor profile'; + a.textContent=n; + contractors.appendChild(a); + }); + card.appendChild(contractors); + } + card.appendChild(el('div','step-label','STEP 1 · Derive staffing need')); var s1=el('div','step-body'); s1.appendChild(document.createTextNode('Industry heuristic: ~1 worker per $150K of permit cost, capped 2-8. Resulting contract: ')); diff --git a/mcp-server/contractor.html b/mcp-server/contractor.html new file mode 100644 index 0000000..85ad7f8 --- /dev/null +++ b/mcp-server/contractor.html @@ -0,0 +1,597 @@ + + + +Contractor Profile · Staffing Co-Pilot + + + + +
+

Staffing Co-Pilot · Contractor Profile

+ ← Dashboard +
+
+ +
Type a name above to load the full portfolio across every wired data source.
+
+ + diff --git a/mcp-server/entity.ts b/mcp-server/entity.ts new file mode 100644 index 0000000..f1b45cb --- /dev/null +++ b/mcp-server/entity.ts @@ -0,0 +1,2781 @@ +// Project Index — per-Chicago-permit public-data signal portfolio. +// (Originally called "ETF" until we realized that's a SEC-regulated term; +// this is NOT an investment product. It's a custom index of Chicago +// building-related signals.) +// +// Pulls violation & corp-registry signals keyed to the contractor/applicant +// names extracted from public building permits. The point is to surface +// the kinds of signals a staffer or investor would want BEFORE committing +// workers or capital: OSHA history, corporate-registry status, federal +// contracts, parent-company chain, building violations, and (eventually) +// LLC-shuffle detection where a firm with a bad record dissolves and +// reforms under a new name. +// +// Sources right now: +// - OSHA Enforcement search (www.osha.gov/ords/imis/establishment.search) +// — HTML only, scraped. Reachable from our ASN (verified). +// - Illinois Secretary of State — BLOCKED from our datacenter ASN. +// We return a structured placeholder with a honest "source_unreachable" +// flag so the UI can show "awaiting source" rather than fake data. +// - OpenCorporates — requires API token even for free tier (not self-serve). +// +// Caching: JSONL at data/_entity_cache/entities.jsonl. Keyed by +// normalized company name. 30-day TTL because entity status and +// violation history don't change fast. +// +// Politeness: OSHA gets max 1 req/sec (with jitter) and a descriptive UA. + +import { readFile, writeFile, mkdir } from "node:fs/promises"; +import { existsSync } from "node:fs"; +import { join } from "node:path"; +import { findTifDistrict } from "./tif_polygons.js"; + +const CACHE_DIR = "/home/profit/lakehouse/data/_entity_cache"; +const CACHE_FILE = join(CACHE_DIR, "entities.jsonl"); +const DEFAULT_TTL_MS = 30 * 24 * 60 * 60 * 1000; +const UA = "lakehouse-staffing-copilot/1.0 (contact: ops@devop.live; public-permit-enrichment)"; + +const OSHA_MIN_GAP_MS = 1200; +let lastOshaAt = 0; +async function oshaGate() { + const gap = Date.now() - lastOshaAt; + if (gap < OSHA_MIN_GAP_MS) { + await new Promise((r) => setTimeout(r, OSHA_MIN_GAP_MS - gap + Math.random() * 200)); + } + lastOshaAt = Date.now(); +} + +// ─── Types ──────────────────────────────────────────────────────── + +export type OshaInspection = { + id: string; + date: string; + state: string; + type: string; + scope: string; + naics: string; + establishment: string; + detail_url: string; +}; + +export type OshaBrief = { + source: "osha"; + fetched_at: string; + searched_name: string; + normalized_name: string; + source_url: string; + status: "ok" | "no_match" | "error"; + inspection_count: number; + most_recent_date: string | null; + recent_inspections: OshaInspection[]; + states_seen: string[]; + error?: string; +}; + +export type IlsosBrief = { + source: "ilsos"; + fetched_at: string; + searched_name: string; + status: "source_unreachable" | "ok" | "no_match" | "error"; + reason?: string; + entity_name?: string; + file_number?: string; + status_text?: string; + formation_date?: string; + registered_agent?: string; + principal_address?: string; + officers?: string[]; + error?: string; +}; + +// Ticker brief — SEC EDGAR (name → CIK → ticker) + Stooq (live price). +// Both are free & no-auth. SEC is the authoritative name→ticker source +// (company_tickers.json covers 10K+ US-listed issuers); Stooq layers +// current price on top. We intentionally skip Yahoo Finance — its free +// endpoint has been auth-walled since 2024. +// +// "Market cap proxy" is price × volume rather than shares_outstanding × +// price because shares_outstanding requires scraping an SEC filing for +// each issuer. Good enough for ranking "most profitable related company" +// until we wire XBRL facts in pass 2. +export type TickerBrief = { + source: "sec+stooq"; + fetched_at: string; + searched_name: string; + status: "ok" | "no_match" | "error"; + ticker?: string; + company_name?: string; + cik?: string; + exchange?: string; + sic?: string; + sic_description?: string; + // Live quote fields (may be absent on holidays/weekends) + price?: number; + price_date?: string; + volume?: number; + open?: number; + high?: number; + low?: number; + day_change_pct?: number; + // Ranking hint — price × volume, used to sort tickers in the portfolio. + cap_proxy?: number; + sec_url?: string; + stooq_url?: string; + error?: string; +}; + +// Property owner brief — Cook County Assessor parcels dataset (c49d-89sn). +// Lookup by property address → PIN → owner mailing address. The mailing +// address is the key signal: it tells you who the assessor mails the +// tax bill to. PO Box in Minneapolis MN ~= TARGET CORP HQ (verified +// 1101 W Jackson Blvd → PO Box 9456 Minneapolis 55440 = Target). +// Free, no auth, Socrata. +export type PropertyOwnerBrief = { + source: "cook_county_assessor"; + fetched_at: string; + searched_address: string; + status: "ok" | "no_match" | "error"; + pin?: string; + property_address?: string; + property_zip?: string; + // Mailing addr is the smoking gun for ownership inference. + mailing_address?: string; + mailing_city?: string; + mailing_state?: string; + mailing_zip?: string; + township?: string; + ward?: string; + longitude?: string; + latitude?: string; + source_url?: string; + error?: string; +}; + +// Building violations on the property. Stop-work orders, code violations, +// failed inspections — direct "is this build behind schedule" signals. +// Dataset 22u3-xenr. +export type PropertyViolationsBrief = { + source: "chicago_building_violations"; + fetched_at: string; + searched_address: string; + status: "ok" | "error"; + total_violations: number; + open_violations: number; + stop_work_orders: number; + most_recent_date?: string; + recent_violations: Array<{ + date: string; + status: string; + description: string; + department: string; + }>; + error?: string; +}; + +// Debarment / Excluded Parties — federal SAM.gov + Illinois Dept of Labor. +// Both are "free in spirit" but blocked behind some friction: +// - SAM.gov v3 API requires registered API key (free at sam.gov but +// requires email + entity registration; can't be self-served from +// our server without a one-time human signup). +// - IDOL prevailing-wage debarment list is published as HTML/PDF on +// labor.illinois.gov, no API — needs a scraper + periodic refresh. +// Returning structured "needs_setup" placeholder so the UI can show +// the path forward rather than fake clean. +export type DebarmentBrief = { + source: "sam_gov+idol"; + fetched_at: string; + searched_name: string; + status: "ok" | "no_match" | "needs_setup"; + reason?: string; + sam_excluded?: boolean; + idol_debarred?: boolean; + excluded_until?: string; + agency?: string; +}; + +export async function fetchDebarmentBrief(name: string): Promise { + const now = new Date().toISOString(); + const cached = await cacheGet(normalizeEntityName(name), "debarment"); + if (cached) return cached; + const brief: DebarmentBrief = { + source: "sam_gov+idol", + fetched_at: now, + searched_name: name, + status: "needs_setup", + reason: + "SAM.gov v3 needs registered API key (free signup at sam.gov). IDOL prevailing-wage debarment list is HTML-only at labor.illinois.gov — needs scraper + periodic refresh.", + }; + return brief; +} + +// SEC EDGAR 10-K Exhibit 21 — parent/subsidiary tree. The "private GC → +// public parent ticker" chain. Each public company files Exhibit 21 of +// their 10-K listing all subsidiaries. We resolve by: +// 1. Fetch SEC company submissions for a known parent ticker +// 2. Find the most recent 10-K filing +// 3. Fetch the Exhibit 21 attachment HTML +// 4. Parse subsidiary names +// +// Reverse direction (subsidiary name → parent) requires either: +// (a) Pre-build an index of "subsidiary → parent" by walking every +// 10-K Exhibit 21 in our SEC index (~10K filings, large) +// (b) Use a known mapping for high-volume parents (Turner→ACS, +// Skanska→SKA-B.ST, Balfour Beatty→BBY.L, etc) +// (b) is pragmatic for v1; (a) is the proper long-term answer. +export type ParentLinkBrief = { + source: "sec_exhibit21+wikidata"; + fetched_at: string; + searched_name: string; + status: "ok" | "no_link" | "needs_index"; + reason?: string; + parent_name?: string; + parent_ticker?: string; + parent_exchange?: string; + parent_country?: string; + link_source?: string; +}; + +// Hand-curated map of well-known private contractor → public parent. +// The cleanest seed of the index — covers the dominant Chicago GCs. +// Each entry includes the source citation so a human can verify. +const KNOWN_PARENT_MAP: Record = { + "TURNER CONSTRUCTION": { + source: "sec_exhibit21+wikidata", + fetched_at: "", + searched_name: "TURNER CONSTRUCTION", + status: "ok", + parent_name: "Hochtief AG (subsidiary of ACS Group)", + parent_ticker: "HOC.DE", + parent_exchange: "Frankfurt (also ACS:MC.MC Madrid)", + parent_country: "DE/ES", + link_source: "Hochtief 2024 Annual Report — Turner is wholly-owned via Hochtief Americas", + }, + "SKANSKA USA": { + source: "sec_exhibit21+wikidata", + fetched_at: "", + searched_name: "SKANSKA USA", + status: "ok", + parent_name: "Skanska AB", + parent_ticker: "SKA-B.ST", + parent_exchange: "Stockholm", + parent_country: "SE", + link_source: "Skanska 2024 Annual Report Exhibit", + }, + "WALSH GROUP": { + source: "sec_exhibit21+wikidata", + fetched_at: "", + searched_name: "WALSH GROUP", + status: "no_link", + reason: "Walsh Group is private (Chicago — Walsh family-owned).", + }, + "POWER CONSTRUCTION": { + source: "sec_exhibit21+wikidata", + fetched_at: "", + searched_name: "POWER CONSTRUCTION", + status: "no_link", + reason: "Power Construction Co is private (Schaumburg, IL — family-owned).", + }, + "PEPPER CONSTRUCTION": { + source: "sec_exhibit21+wikidata", + fetched_at: "", + searched_name: "PEPPER CONSTRUCTION", + status: "no_link", + reason: "Pepper Construction Group is private (Barrington, IL — Pepper family).", + }, + "CLAYCO": { + source: "sec_exhibit21+wikidata", + fetched_at: "", + searched_name: "CLAYCO", + status: "no_link", + reason: "Clayco is private (Chicago — Bob Clark family).", + }, + "MCHUGH CONSTRUCTION": { + source: "sec_exhibit21+wikidata", + fetched_at: "", + searched_name: "MCHUGH CONSTRUCTION", + status: "no_link", + reason: "James McHugh Construction is private.", + }, + "BULLEY ANDREWS": { + source: "sec_exhibit21+wikidata", + fetched_at: "", + searched_name: "BULLEY ANDREWS", + status: "no_link", + reason: "Bulley & Andrews is private (Chicago, family-owned since 1891).", + }, + "LEND LEASE": { + source: "sec_exhibit21+wikidata", + fetched_at: "", + searched_name: "LEND LEASE", + status: "ok", + parent_name: "Lendlease Group", + parent_ticker: "LLC.AX", + parent_exchange: "Sydney (ASX)", + parent_country: "AU", + link_source: "Lendlease Annual Report — global construction division", + }, + "GILBANE": { + source: "sec_exhibit21+wikidata", + fetched_at: "", + searched_name: "GILBANE", + status: "no_link", + reason: "Gilbane is private (Providence RI — Gilbane family, 6th gen).", + }, + "WHITING TURNER": { + source: "sec_exhibit21+wikidata", + fetched_at: "", + searched_name: "WHITING TURNER", + status: "no_link", + reason: "Whiting-Turner is private (Baltimore MD).", + }, + "STO BUILDING GROUP": { + source: "sec_exhibit21+wikidata", + fetched_at: "", + searched_name: "STO BUILDING GROUP", + status: "no_link", + reason: "STO Building Group is private (Structure Tone family of firms).", + }, +}; + +export async function fetchParentLink(name: string): Promise { + const now = new Date().toISOString(); + const cached = await cacheGet(normalizeEntityName(name), "parent" as any); + if (cached) return cached; + // Match against curated map first. + const upper = name.toUpperCase(); + for (const [key, val] of Object.entries(KNOWN_PARENT_MAP)) { + if (upper.includes(key)) { + return { ...val, fetched_at: now, searched_name: name }; + } + } + return { + source: "sec_exhibit21+wikidata", + fetched_at: now, + searched_name: name, + status: "needs_index", + reason: + "Not in curated parent map. Full SEC Exhibit 21 walk would build a complete subsidiary→parent index — queued for batch 5.", + }; +} + +// Site Context — non-contractor signals tied to the property's location. +// TIF status (public subsidy), landmark (preservation = longer schedule), +// community area + median income, lat/long. Pulls from Chicago Open Data. +export type SiteContextBrief = { + source: "chicago_socrata"; + fetched_at: string; + searched_address: string; + status: "ok" | "no_match" | "error"; + in_tif_district?: boolean; + tif_district_name?: string; + is_landmark?: boolean; + landmark_name?: string; + community_area?: string; + community_area_number?: string; + ward?: string; + longitude?: string; + latitude?: string; + // Transit access: distance to nearest CTA L station (meters). + // Affects worker access — sub-1500m means crews can ride transit + // instead of needing parking/shuttle. + nearest_cta_station?: string; + nearest_cta_lines?: string; + nearest_cta_distance_m?: number; + // Permits within 800m issued in last 90d. High count = active labor + // competition for the same crew pool. + nearby_permits_90d?: number; + nearby_permits_value_90d?: number; + error?: string; +}; + +// MBE/WBE/DBE diversity certification — Chicago Dept of Procurement. +// Note: as of 2026-04, the catalog-listed datasets (2iq3-bugw, 69yt-tb5j, +// ci93-uc8s, etc.) all return 404 on the resource endpoint despite +// appearing in the catalog search. Likely archived or auth-gated. +// Returning needs_setup until alternate path identified. +export type DiversityCertBrief = { + source: "chicago_dps_economic_inclusion"; + fetched_at: string; + searched_name: string; + status: "ok" | "no_match" | "error" | "needs_setup"; + reason?: string; + certifications: Array<{ category: string; expiration?: string; type?: string }>; + source_url?: string; + error?: string; +}; + +// News mentions — Google News RSS (free, no auth). Returns recent +// headlines mentioning the contractor name. Quick reputation signal. +export type NewsBrief = { + source: "google_news_rss"; + fetched_at: string; + searched_name: string; + status: "ok" | "no_match" | "error"; + total_mentions: number; + recent_headlines: Array<{ title: string; source: string; date: string; url: string }>; + error?: string; +}; + +// BLS construction employment — macro context for ALL Chicago permits. +// Series SMU17169802000000001 = Construction, all employees, Chicago- +// Naperville-Elgin MSA, NSA, monthly. Free public API, no auth. +// Cached process-wide (single request per minutes-window) since this +// is macroeconomic context — same number applies to every permit shown. +export type BlsTrendBrief = { + source: "bls.gov"; + fetched_at: string; + series_id: string; + status: "ok" | "error"; + latest: { period: string; value: number } | null; + yoy_change_pct: number | null; + mom_change_pct: number | null; + recent: Array<{ period: string; value: number }>; // last 6 months + trend: "growing" | "stable" | "declining" | "unknown"; + error?: string; +}; + +// News sentiment — basic positive/negative keyword scoring layered over +// the existing Google News RSS results. Not LLM-grade but catches the +// obvious "lawsuit / fraud / fired / OSHA / debt" signals vs "awarded / +// expansion / contract win" signals. Output is a -1..+1 score with +// counts of positive/negative-flagged headlines. +export type NewsSentiment = { + score: number; // -1 to +1 + positive: number; + negative: number; + neutral: number; + flagged_headlines: Array<{ title: string; polarity: "pos" | "neg"; reasons: string[] }>; +}; + +// OSHA Severe Violator Enforcement Program (SVEP) — explicit named +// list of contractors placed in heightened federal/state OSHA +// enforcement after a willful or repeat-violation event. Highest- +// signal "this firm has been formally flagged as a worst actor" check. +// +// Data refreshed quarterly from osha.gov/enforcement/svep XLSX +// (Federal + State logs). Pre-parsed to JSON at +// /data/_entity_cache/svep_log.json. ~1,044 entries (990 federal + 54 state). +// Manual refresh: re-download Public_Federal_SVEP_Tracking_Log.xlsx + +// Public_State_SVEP_Log.xlsx, run the Python parser, update JSON. +export type SvepBrief = { + source: "osha.gov/enforcement/svep"; + fetched_at: string; + searched_name: string; + status: "ok" | "no_match" | "error"; + flagged: boolean; + matched_entries: Array<{ name: string }>; + log_size: number; + log_age_days: number; +}; + +let _svepIdx: Array<{ name: string; normalized: string }> | null = null; +let _svepMeta: { fetched_at: string; size: number } | null = null; + +async function ensureSvepLoaded() { + if (_svepIdx) return; + const path = "/home/profit/lakehouse/data/_entity_cache/svep_log.json"; + if (!existsSync(path)) { + _svepIdx = []; + _svepMeta = { fetched_at: "", size: 0 }; + return; + } + try { + const raw = JSON.parse(await readFile(path, "utf-8")); + _svepIdx = raw.companies || []; + _svepMeta = { fetched_at: raw.fetched_at || "unknown", size: _svepIdx?.length || 0 }; + } catch { + _svepIdx = []; + _svepMeta = { fetched_at: "", size: 0 }; + } +} + +export async function fetchSvepBrief(name: string): Promise { + const now = new Date().toISOString(); + await ensureSvepLoaded(); + const idx = _svepIdx || []; + const meta = _svepMeta || { fetched_at: "", size: 0 }; + // SVEP entries can have multiple aliases joined by "/". Match any token + // overlap with our query's identifying-words set. + const queryNorm = normalizeEntityName(name); + if (!queryNorm) { + return { + source: "osha.gov/enforcement/svep", + fetched_at: now, + searched_name: name, + status: "no_match", + flagged: false, + matched_entries: [], + log_size: meta.size, + log_age_days: 0, + }; + } + const queryWords = new Set( + queryNorm.split(" ").filter((w) => w.length >= 4), + ); + const matches: Array<{ name: string }> = []; + for (const e of idx) { + // SVEP names use "/" to separate aliases. Check each alias. + const aliases = e.name.split("/").map((a) => a.trim()).filter(Boolean); + for (const a of aliases) { + const aNorm = normalizeEntityName(a); + if (!aNorm) continue; + // Direct exact normalized match + if (aNorm === queryNorm) { + matches.push({ name: a }); + break; + } + // Word-overlap: at least 2 shared words ≥ 4 chars + const aWords = aNorm.split(" ").filter((w) => w.length >= 4); + const overlap = aWords.filter((w) => queryWords.has(w)).length; + if (overlap >= 2) { + matches.push({ name: a }); + break; + } + } + } + let logAge = 0; + if (meta.fetched_at) { + const t = Date.parse(meta.fetched_at); + if (!isNaN(t)) logAge = Math.round((Date.now() - t) / 86400000); + } + return { + source: "osha.gov/enforcement/svep", + fetched_at: now, + searched_name: name, + status: matches.length > 0 ? "ok" : "no_match", + flagged: matches.length > 0, + matched_entries: matches.slice(0, 5), + log_size: meta.size, + log_age_days: logAge, + }; +} + +// OSHA Severe Injury Reports — placeholder, bot-protected. +export type OshaSirBrief = { + source: "osha.gov/severeinjury"; + fetched_at: string; + searched_name: string; + status: "needs_setup"; + reason: string; +}; + +// Cook County Recorder mechanics liens — placeholder, scrape research needed. +export type LiensBrief = { + source: "cook_county_recorder"; + fetched_at: string; + searched_address: string; + status: "needs_setup"; + reason: string; +}; + +// USASpending.gov federal contracts. Free API, no auth. +// api.usaspending.gov/api/v2/search/spending_by_award/ +// Returns total awarded $, awarding agency, NAICS, subaward count. +// Strong signal: a Chicago contractor with $50M of federal work has +// scale + compliance posture. Quiet contractors are private/local. +export type FederalContractsBrief = { + source: "usaspending.gov"; + fetched_at: string; + searched_name: string; + status: "ok" | "no_match" | "error"; + total_awards_count: number; + total_awards_value: number; + most_recent_award_date?: string; + top_agencies: Array<{ agency: string; value: number }>; + source_url?: string; + error?: string; +}; + +// NLRB cases — union actions / unfair labor practice charges. +// Free public search at apps.nlrb.gov/case but the JSON endpoint +// is undocumented. Need short scraper. For now placeholder. +export type NlrbBrief = { + source: "nlrb.gov"; + fetched_at: string; + searched_name: string; + status: "ok" | "no_match" | "needs_setup"; + reason?: string; + total_cases?: number; + recent_cases?: Array<{ case_number: string; case_type: string; date: string; status: string }>; +}; + +// Union locals likely to handle each trade. Static lookup — no API. +// Static is fine because Chicago union jurisdictions don't change often, +// and the granularity here is "show staffers what unions are involved." +// We don't claim certainty per project; we surface the typical mapping. +export type UnionMapping = { + trade: string; + primary_locals: Array<{ name: string; local: string; jurisdiction: string }>; + // Training centers run by the JATCs (joint apprenticeship) + training_centers: Array<{ name: string; address: string; program_length: string }>; +}; + +// Contractor history over the Chicago permit dataset. Cheap signal: +// how active has this contractor been historically, and what's their +// trajectory in the last 24 months? Helpful for spotting "suddenly +// active" (possible shell LLC) or "long-tenured + now declining" +// (losing market share). +export type ContractorHistory = { + source: "chicago_socrata"; + fetched_at: string; + searched_name: string; + status: "ok" | "error"; + permits_last_180d: number; + permits_last_24mo: number; + permits_historical_total: number; + total_cost_last_24mo: number; + trend: "growing" | "stable" | "declining" | "new" | "unknown"; + recent_permits: Array<{ date: string; work_type: string; cost: number; address: string; lat?: number; lng?: number; permit_id?: string; description?: string }>; + error?: string; +}; + +export type EntityBrief = { + key: string; + display_name: string; + role: string; + ticker: string; + osha: OshaBrief | null; + ilsos: IlsosBrief | null; + stock: TickerBrief | null; + history: ContractorHistory | null; + debarment: DebarmentBrief | null; + parent_link: ParentLinkBrief | null; + federal: FederalContractsBrief | null; + nlrb: NlrbBrief | null; + diversity: DiversityCertBrief | null; + news: NewsBrief | null; + news_sentiment: NewsSentiment | null; + osha_sir: OshaSirBrief | null; + svep: SvepBrief | null; + risk: { + score: number | null; + factors: string[]; + partial: boolean; + }; +}; + +// Project Index Score — auditable weighted aggregation of every signal +// we pulled. Each contribution carries its weight, direction (good/bad), +// raw value, and a human-readable note so staffers can see exactly why +// the project landed where it did. Score ranges 0-100 anchored at 50 +// neutral. Bands: red (<30), amber (30-45), neutral (45-55), green +// (55-75), strong (75+). +export type SignalContribution = { + signal: string; // e.g. "osha_recent_inspections" + weight: number; // 1-10 importance + direction: -1 | 0 | 1; // -1 bad, 0 neutral, +1 good + raw: any; // underlying value + contribution: number; // signed points moved away from 50 + note: string; // explanation +}; +export type ProjectIndexScore = { + score: number; + band: "red" | "amber" | "neutral" | "green" | "strong"; + contributions: SignalContribution[]; + partial: boolean; // true when ILSOS / SAM / NLRB / other key sources unavailable +}; + +export type PermitEntityBrief = { + permit_id: string; + property: { + address: string; + ticker: string; + owner: PropertyOwnerBrief | null; + violations: PropertyViolationsBrief | null; + union: UnionMapping | null; + site_context: SiteContextBrief | null; + liens: LiensBrief | null; + }; + entities: EntityBrief[]; + tickers: TickerBrief[]; + // Macroeconomic context — same for all permits served in same window. + // Tells the staffer "the broader Chicago construction labor market + // is shrinking" or "growing" — context for the per-permit decision. + macro: BlsTrendBrief | null; + // Aggregate score across all entities + property signals + index_score: ProjectIndexScore; + roadmap: string[]; + generated_at: string; +}; + +// ─── Cache ──────────────────────────────────────────────────────── + +type CacheKind = + | "osha" | "ilsos" | "ticker" | "history" | "owner" | "violations" + | "debarment" | "parent" | "federal" | "nlrb" + | "site_context" | "diversity" | "news" | "osha_sir" | "liens"; +type CacheRow = { + key: string; + kind: CacheKind; + data: any; + expires_at: number; +}; + +let cacheMap: Map | null = null; + +async function ensureCacheLoaded(): Promise> { + if (cacheMap) return cacheMap; + cacheMap = new Map(); + if (!existsSync(CACHE_DIR)) { + await mkdir(CACHE_DIR, { recursive: true }); + } + if (!existsSync(CACHE_FILE)) return cacheMap; + try { + const text = await readFile(CACHE_FILE, "utf-8"); + for (const line of text.split("\n")) { + if (!line.trim()) continue; + try { + const row = JSON.parse(line) as CacheRow; + const mk = `${row.kind}:${row.key}`; + const prev = cacheMap.get(mk); + if (!prev || prev.expires_at < row.expires_at) cacheMap.set(mk, row); + } catch { + /* skip malformed row */ + } + } + } catch (e) { + console.warn("[entity] cache load failed:", (e as Error).message); + } + return cacheMap; +} + +async function cacheGet(key: string, kind: CacheKind): Promise { + const m = await ensureCacheLoaded(); + const row = m.get(`${kind}:${key}`); + if (!row) return null; + if (row.expires_at < Date.now()) return null; + return row.data; +} + +async function cacheSet( + key: string, + kind: CacheKind, + data: any, + ttl = DEFAULT_TTL_MS, +) { + const m = await ensureCacheLoaded(); + const row: CacheRow = { key, kind, data, expires_at: Date.now() + ttl }; + m.set(`${kind}:${row.key}`, row); + try { + await writeFile(CACHE_FILE, JSON.stringify(row) + "\n", { flag: "a" }); + } catch (e) { + console.warn("[entity] cache write failed:", (e as Error).message); + } +} + +// ─── Name normalization ───────────────────────────────────────────── +// Collapses "ADT,LLC" / "ADT LLC" / "ADT, L.L.C." to a single +// canonical key so cache hits don't miss on punctuation drift. + +export function normalizeEntityName(name: string): string { + return (name || "") + .toUpperCase() + .replace(/[,.\-]/g, " ") + .replace(/\b(THE|AND)\b/g, "") + .replace(/\b(L\s*L\s*C|L\s*L\s*P|L\s*P|INC|CO|CORP|COMPANY|CORPORATION|LTD|LIMITED|GROUP|HOLDINGS|ENTERPRISES)\b/g, "") + .replace(/[^\w ]/g, " ") + .replace(/\s+/g, " ") + .trim(); +} + +export function entityTicker(name: string): string { + const n = normalizeEntityName(name); + const parts = n.split(" ").filter(Boolean).slice(0, 3); + const short = parts.map((p) => p.slice(0, 6)).join("-").slice(0, 18); + return `LLC·${short || "UNKNOWN"}`; +} + +function addressTicker(address: string): string { + const digits = (address.match(/\d+/g) || []).join(""); + const firstWord = (address.split(/\s+/).find((w) => /^[A-Za-z]/.test(w)) || "BLDG") + .toUpperCase() + .slice(0, 8); + return `BLDG·${firstWord}-${digits.slice(0, 5) || "0000"}`; +} + +// ─── OSHA scraper ────────────────────────────────────────────────── + +async function fetchOshaHTML(name: string): Promise { + await oshaGate(); + const u = new URL("https://www.osha.gov/ords/imis/establishment.search"); + u.searchParams.set("p_logger", "1"); + u.searchParams.set("establishment", name); + u.searchParams.set("State", "all"); + u.searchParams.set("p_case", "all"); + const res = await fetch(u.toString(), { + headers: { "User-Agent": UA }, + signal: AbortSignal.timeout(15000), + }); + if (!res.ok) throw new Error(`osha HTTP ${res.status}`); + return await res.text(); +} + +// Parse inspection list rows. Uses matchAll so we don't rely on +// stateful regex iteration. +function parseOshaInspections(html: string): OshaInspection[] { + const inspections: OshaInspection[] = []; + const rows = [...html.matchAll(/]*>([\s\S]*?)<\/tr>/gi)]; + for (const rm of rows) { + const row = rm[1]; + if (!row.includes("inspection_detail")) continue; + const tds = [...row.matchAll(/]*>([\s\S]*?)<\/td>/gi)].map((t) => + t[1].replace(/<[^>]+>/g, "").replace(/ /g, "").trim(), + ); + const idMatch = row.match(/establishment\.inspection_detail\?id=([\d.]+)/); + if (!idMatch) continue; + const id = idMatch[1]; + inspections.push({ + id, + date: tds[3] || "", + state: tds[5] || "", + type: tds[6] || "", + scope: tds[7] || "", + naics: tds[9] || "", + establishment: tds[tds.length - 1] || "", + detail_url: `https://www.osha.gov/ords/imis/establishment.inspection_detail?id=${id}`, + }); + } + return inspections; +} + +export async function fetchOshaBrief(name: string, useCache = true): Promise { + const normalized = normalizeEntityName(name); + const now = new Date().toISOString(); + if (!normalized) { + return { + source: "osha", + fetched_at: now, + searched_name: name, + normalized_name: "", + source_url: "", + status: "no_match", + inspection_count: 0, + most_recent_date: null, + recent_inspections: [], + states_seen: [], + }; + } + if (useCache) { + const hit = await cacheGet(normalized, "osha"); + if (hit) return hit; + } + const u = `https://www.osha.gov/ords/imis/establishment.search?p_logger=1&establishment=${encodeURIComponent(name)}&State=all&p_case=all`; + try { + const html = await fetchOshaHTML(name); + const inspections = parseOshaInspections(html); + inspections.sort((a, b) => { + const pa = Date.parse(a.date) || 0; + const pb = Date.parse(b.date) || 0; + return pb - pa; + }); + const states_seen = [...new Set(inspections.map((i) => i.state).filter(Boolean))]; + const brief: OshaBrief = { + source: "osha", + fetched_at: now, + searched_name: name, + normalized_name: normalized, + source_url: u, + status: inspections.length > 0 ? "ok" : "no_match", + inspection_count: inspections.length, + most_recent_date: inspections[0]?.date ?? null, + recent_inspections: inspections.slice(0, 5), + states_seen, + }; + await cacheSet(normalized, "osha", brief); + return brief; + } catch (e) { + return { + source: "osha", + fetched_at: now, + searched_name: name, + normalized_name: normalized, + source_url: u, + status: "error", + inspection_count: 0, + most_recent_date: null, + recent_inspections: [], + states_seen: [], + error: (e as Error).message, + }; + } +} + +// ─── ILSOS placeholder ──────────────────────────────────────────── + +export async function fetchIlsosBrief(name: string): Promise { + const normalized = normalizeEntityName(name); + const cached = await cacheGet(normalized, "ilsos"); + if (cached) return cached; + const brief: IlsosBrief = { + source: "ilsos", + fetched_at: new Date().toISOString(), + searched_name: name, + status: "source_unreachable", + reason: + "Illinois SoS apps.ilsos.gov blocks our datacenter ASN. Pending: VPN-routed fetch, OpenCorporates API token (~200/mo free), or paid IL bulk feed.", + }; + return brief; +} + +// ─── SEC EDGAR ticker lookup ────────────────────────────────────── +// The canonical free source. A single JSON file at sec.gov/files/ +// company_tickers.json maps every US-listed ticker → CIK → company +// name (~10K issuers). We fetch it at most once per 24h and keep it +// in memory + on disk. + +const SEC_TICKERS_URL = "https://www.sec.gov/files/company_tickers.json"; +const SEC_TICKERS_CACHE = join(CACHE_DIR, "sec_company_tickers.json"); +const SEC_TICKERS_TTL_MS = 24 * 60 * 60 * 1000; + +type SecTickerRow = { cik_str: number; ticker: string; title: string }; +let secTickersIdx: { + rows: SecTickerRow[]; + byNormalized: Map; + refreshed_at: number; +} | null = null; + +async function ensureSecTickerIndex(): Promise { + const fresh = secTickersIdx && Date.now() - secTickersIdx.refreshed_at < SEC_TICKERS_TTL_MS; + if (fresh) return secTickersIdx; + + let raw: Record | null = null; + // Disk first + if (existsSync(SEC_TICKERS_CACHE)) { + try { + const st = await readFile(SEC_TICKERS_CACHE, "utf-8"); + const parsed = JSON.parse(st); + if (parsed?._fetched_at && Date.now() - parsed._fetched_at < SEC_TICKERS_TTL_MS) { + raw = parsed.rows; + } + } catch {/* fall through */} + } + // Refresh from SEC + if (!raw) { + try { + const res = await fetch(SEC_TICKERS_URL, { + headers: { "User-Agent": UA, "Accept": "application/json" }, + signal: AbortSignal.timeout(20000), + }); + if (res.ok) { + raw = await res.json(); + await writeFile( + SEC_TICKERS_CACHE, + JSON.stringify({ _fetched_at: Date.now(), rows: raw }), + ); + } + } catch (e) { + console.warn("[entity] sec tickers fetch failed:", (e as Error).message); + } + } + if (!raw) return secTickersIdx; // may still be null + + const rows = Object.values(raw).filter((r) => r && r.ticker && r.title); + const byNormalized = new Map(); + for (const r of rows) { + const k = normalizeEntityName(r.title); + if (!k) continue; + const arr = byNormalized.get(k) || []; + arr.push(r); + byNormalized.set(k, arr); + } + secTickersIdx = { rows, byNormalized, refreshed_at: Date.now() }; + return secTickersIdx; +} + +// Name → ticker resolution. Tries exact-normalized first, then +// word-overlap match. Hard rule: at least one shared word of 4+ chars +// between the searched name and the candidate title. Prevents spurious +// hits like "POREMBA DENISE" matching "NI Holdings" because "DENISE" +// contains "NI". Also skips names that look like individual persons +// (two capitalized words, no entity suffix) — they're never issuers. +async function resolveSecTicker(name: string): Promise { + const idx = await ensureSecTickerIndex(); + if (!idx) return null; + const key = normalizeEntityName(name); + if (!key) return null; + + // Skip obvious individual names — "LAST, FIRST M" or "FIRST LAST". + // Two signals: + // (1) comma in name → "LAST, FIRST" form, human. + // (2) exactly 2 word-tokens AND no entity marker → "FIRST LAST" + // form. A company almost always has ≥3 tokens (X Y Inc) or + // an entity marker; two-word entities like "APPLE INC" get + // caught by the marker, not by looksIndividual. + const upper = name.toUpperCase(); + const hasEntityMarker = + /\b(LLC|L\s*L\s*C|LLP|INC|INC\.|CORP|CORPORATION|COMPANY|CO|CO\.|LTD|LIMITED|GROUP|HOLDINGS|ENTERPRISES|AUTHORITY|ASSOCIATION|TRUST|BANK|PARTNERS|INDUSTRIES|CONSTRUCTION|ELECTRIC|MECHANICAL|CONTRACTING|PROPERTIES|REALTY|DEVELOPMENT|ENGINEERING|PLUMBING|ROOFING|SERVICES|SOLUTIONS|SYSTEMS|TECHNOLOGIES|LOGISTICS|RESOURCES)\b/.test( + upper, + ); + const tokens = upper.split(/\s+/).filter(Boolean); + const hasComma = /,/.test(name); + const looksIndividual = (hasComma && !hasEntityMarker) || (tokens.length === 2 && !hasEntityMarker); + if (looksIndividual) return null; + + // Skip obvious government / municipal entities — they're not US public + // issuers. If they were, their bond funds would surface separately. + const isGovernment = + /\b(AUTHORITY|MUNICIPAL|CITY OF|COUNTY|DISTRICT|BOARD OF EDUCATION|PUBLIC SCHOOLS|TRANSIT|HOUSING AUTHORITY|DEPARTMENT OF|BUREAU OF|PARK DISTRICT|WATER RECLAMATION)\b/i.test( + name, + ); + if (isGovernment) return null; + + // Exact normalized match — most reliable path. + const exact = idx.byNormalized.get(key); + if (exact && exact.length) return exact.sort((a, b) => a.ticker.length - b.ticker.length)[0]; + + // Word-overlap with stopwords filtered. "CONSTRUCTION", "COMPANY", + // "SERVICES" etc. are too generic to identify an issuer on their own + // — shared presence is meaningless. Require overlap on identifying + // words only. + const STOPWORDS = new Set([ + "CONSTRUCTION","COMPANY","SERVICES","SERVICE","GROUP","SYSTEMS", + "SOLUTIONS","RESOURCES","TECHNOLOGIES","INDUSTRIES","PARTNERS", + "ELECTRIC","MECHANICAL","PLUMBING","ROOFING","DEVELOPMENT","REALTY", + "PROPERTIES","ENGINEERING","ASSOCIATES","INTERNATIONAL","NATIONAL", + "AMERICAN","GENERAL","AUTHORITY","HOLDINGS","ENTERPRISES","LOGISTICS", + "COMMUNICATIONS","FINANCIAL","CAPITAL","PROPERTY","RESIDENTIAL", + "COMMERCIAL","GLOBAL","UNITED","STATES","STATE","FEDERAL", + "MANAGEMENT","CONTRACTING","CONTRACTORS","BUILDERS","BUILDING", + ]); + const identifyingWords = (s: string) => + new Set( + s + .split(" ") + .filter((w) => w.length >= 4 && !STOPWORDS.has(w)), + ); + const keyId = identifyingWords(key); + if (keyId.size === 0) return null; // no non-stopword words to match on + + const candidates: { row: SecTickerRow; overlap: number; k2: string }[] = []; + for (const r of idx.rows) { + const k2 = normalizeEntityName(r.title); + if (!k2) continue; + const k2Id = identifyingWords(k2); + let overlap = 0; + for (const w of k2Id) { + if (keyId.has(w)) overlap++; + } + // Require overlap ≥ 1 AND that overlap covers ≥ 50% of the shorter + // identifying-word set. Prevents "TARGET HOSPITALITY" from matching + // "TARGET CORP" on just the shared word "TARGET" when the sets + // are 1-vs-1; forces them to agree on substance. + // Fuzzy path requires overlap ≥ 2 non-stopword words. Single-word + // matches are only valid via the exact-normalized path above. + // Otherwise "POWER" (from POWER CONSTRUCTION) falsely matches + // POWER INTEGRATIONS and CAPITAL POWER CORP. + if (overlap < 2) continue; + const minIdSize = Math.min(keyId.size, k2Id.size); + if (overlap / minIdSize < 0.5) continue; + candidates.push({ row: r, overlap, k2 }); + } + if (!candidates.length) return null; + + candidates.sort((a, b) => { + if (b.overlap !== a.overlap) return b.overlap - a.overlap; + const la = Math.abs(a.k2.length - key.length); + const lb = Math.abs(b.k2.length - key.length); + if (la !== lb) return la - lb; + return a.row.ticker.length - b.row.ticker.length; + }); + return candidates[0].row; +} + +// Pull SIC + exchange for a CIK from data.sec.gov/submissions +async function fetchSecProfile(cik: number | string): Promise<{ + exchange?: string; + sic?: string; + sic_description?: string; +}> { + const padded = String(cik).padStart(10, "0"); + try { + const res = await fetch(`https://data.sec.gov/submissions/CIK${padded}.json`, { + headers: { "User-Agent": UA }, + signal: AbortSignal.timeout(10000), + }); + if (!res.ok) return {}; + const j = await res.json(); + return { + exchange: Array.isArray(j.exchanges) ? j.exchanges[0] : undefined, + sic: j.sic, + sic_description: j.sicDescription, + }; + } catch { + return {}; + } +} + +// Stooq — free daily quote CSV. Format (column order from the response): +// Symbol,Date,Time,Open,High,Low,Close,Volume +async function fetchStooqQuote(ticker: string): Promise<{ + price?: number; + price_date?: string; + open?: number; + high?: number; + low?: number; + volume?: number; +} | null> { + try { + const res = await fetch( + `https://stooq.com/q/l/?s=${encodeURIComponent(ticker.toLowerCase())}.us&f=sd2t2ohlcv&h&e=csv`, + { headers: { "User-Agent": UA }, signal: AbortSignal.timeout(8000) }, + ); + if (!res.ok) return null; + const csv = await res.text(); + const lines = csv.trim().split("\n"); + if (lines.length < 2) return null; + const cols = lines[1].split(","); + // Stooq returns "N/D" literally for unknown symbols + if (cols.includes("N/D")) return null; + const [sym, date, _t, open, high, low, close, volume] = cols; + return { + price: parseFloat(close) || undefined, + price_date: date, + open: parseFloat(open) || undefined, + high: parseFloat(high) || undefined, + low: parseFloat(low) || undefined, + volume: parseInt(volume, 10) || undefined, + }; + } catch { + return null; + } +} + +export async function fetchTickerBrief(name: string): Promise { + const now = new Date().toISOString(); + const cached = await cacheGet(normalizeEntityName(name), "ticker"); + if (cached) return cached; + const hit = await resolveSecTicker(name); + if (!hit) { + const b: TickerBrief = { + source: "sec+stooq", + fetched_at: now, + searched_name: name, + status: "no_match", + }; + // 7-day TTL on no_match — company could become public + await cacheSet(normalizeEntityName(name), "ticker", b, 7 * 24 * 60 * 60 * 1000); + return b; + } + const [profile, quote] = await Promise.all([ + fetchSecProfile(hit.cik_str), + fetchStooqQuote(hit.ticker), + ]); + const cap_proxy = + quote?.price && quote?.volume ? quote.price * quote.volume : undefined; + const day_change_pct = + quote?.price && quote?.open && quote.open > 0 + ? ((quote.price - quote.open) / quote.open) * 100 + : undefined; + const brief: TickerBrief = { + source: "sec+stooq", + fetched_at: now, + searched_name: name, + status: "ok", + ticker: hit.ticker, + company_name: hit.title, + cik: String(hit.cik_str), + exchange: profile.exchange, + sic: profile.sic, + sic_description: profile.sic_description, + price: quote?.price, + price_date: quote?.price_date, + open: quote?.open, + high: quote?.high, + low: quote?.low, + volume: quote?.volume, + day_change_pct, + cap_proxy, + sec_url: `https://www.sec.gov/cgi-bin/browse-edgar?action=getcompany&CIK=${String(hit.cik_str).padStart(10, "0")}`, + stooq_url: `https://stooq.com/q/?s=${hit.ticker.toLowerCase()}.us`, + }; + // 6-hour TTL on ok hits — prices go stale, but SEC metadata doesn't. + await cacheSet(normalizeEntityName(name), "ticker", brief, 6 * 60 * 60 * 1000); + return brief; +} + +// ─── Contractor history ──────────────────────────────────────────── +// Queries the same Chicago Socrata permits dataset for prior activity. +// Two time windows: last 180 days (recent velocity) and last 24 months +// (longer-run baseline). All-time count gives tenure. Trend classifier +// compares 180d rate to 24mo rate. + +export async function fetchContractorHistory(name: string): Promise { + const now = new Date().toISOString(); + const cached = await cacheGet(normalizeEntityName(name), "history"); + if (cached) return cached; + const today = new Date(); + const d180 = new Date(today.getTime() - 180 * 86400000).toISOString().slice(0, 10); + const d24mo = new Date(today.getTime() - 730 * 86400000).toISOString().slice(0, 10); + const base = "https://data.cityofchicago.org/resource/ydr8-5enu.json"; + // Escape single quotes in contractor name per Socrata rules. + // Socrata LIKE with leading % prefix is substring — we need this + // because "TURNER CONSTRUCTION" on a new permit should match against + // "TURNER CONSTRUCTION COMPANY" on historical ones. Guard against + // false-positive hits on very short names (< 6 chars). + const safeName = name.replace(/'/g, "''"); + const match = + name.length >= 6 + ? `upper(contact_1_name) LIKE upper('%${safeName}%')` + : `contact_1_name='${safeName}'`; + const where180 = encodeURIComponent(`${match} AND issue_date>'${d180}'`); + const where24mo = encodeURIComponent(`${match} AND issue_date>'${d24mo}'`); + const whereAll = encodeURIComponent(match); + try { + const [c180, c24, cAll, recent] = await Promise.all([ + fetch(`${base}?$select=count(*)&$where=${where180}`).then((r) => r.json()), + fetch(`${base}?$select=count(*),sum(reported_cost)&$where=${where24mo}`).then((r) => r.json()), + fetch(`${base}?$select=count(*)&$where=${whereAll}`).then((r) => r.json()), + fetch( + // Pulled to 50 rows + lat/lng + permit id + description so the + // contractor profile heat map and timeline have something to plot. + `${base}?$select=id,issue_date,work_type,reported_cost,street_number,street_direction,street_name,latitude,longitude,work_description&$where=${whereAll}&$order=issue_date DESC&$limit=50`, + ).then((r) => r.json()), + ]); + const n180 = parseInt(c180?.[0]?.count || "0", 10); + const n24 = parseInt(c24?.[0]?.count || "0", 10); + const cost24 = parseFloat(c24?.[0]?.sum_reported_cost || "0"); + const nAll = parseInt(cAll?.[0]?.count || "0", 10); + + // Trend: compare 180d rate to prior 18mo rate. + // 180d = half a year; 24mo = 8 half-years. Prior half-years = (n24-n180)/7. + let trend: ContractorHistory["trend"] = "unknown"; + if (nAll === 0) trend = "unknown"; + else if (n24 <= 1 && nAll <= 3) trend = "new"; + else { + const priorRate = (n24 - n180) / 7; // permits per 180d across prior 18mo + if (priorRate === 0 && n180 > 0) trend = "growing"; + else if (priorRate > 0) { + const ratio = n180 / priorRate; + if (ratio > 1.5) trend = "growing"; + else if (ratio < 0.5) trend = "declining"; + else trend = "stable"; + } else trend = "stable"; + } + + const brief: ContractorHistory = { + source: "chicago_socrata", + fetched_at: now, + searched_name: name, + status: "ok", + permits_last_180d: n180, + permits_last_24mo: n24, + permits_historical_total: nAll, + total_cost_last_24mo: cost24, + trend, + recent_permits: (recent || []).map((r: any) => ({ + date: (r.issue_date || "").slice(0, 10), + work_type: r.work_type || "", + cost: parseFloat(r.reported_cost || 0), + address: `${r.street_number || ""} ${r.street_direction || ""} ${r.street_name || ""}`.trim().replace(/\s+/g, " "), + lat: r.latitude ? parseFloat(r.latitude) : undefined, + lng: r.longitude ? parseFloat(r.longitude) : undefined, + permit_id: r.id || undefined, + description: r.work_description || undefined, + })), + }; + // 12-hour TTL — activity rarely changes more than once/day + await cacheSet(normalizeEntityName(name), "history", brief, 12 * 60 * 60 * 1000); + return brief; + } catch (e) { + return { + source: "chicago_socrata", + fetched_at: now, + searched_name: name, + status: "error", + permits_last_180d: 0, + permits_last_24mo: 0, + permits_historical_total: 0, + total_cost_last_24mo: 0, + trend: "unknown", + recent_permits: [], + error: (e as Error).message, + }; + } +} + +// ─── Cook County Assessor (property owner) ───────────────────────── +// Free Socrata at datacatalog.cookcountyil.gov/resource/c49d-89sn.json. +// Address normalization is gentle: keep number + direction + name; drop +// suite/unit. Match using LIKE prefix because addresses on building +// permits are often missing trailing details ("4809 N BROADWAY" vs +// "4809 N BROADWAY UNIT 5" in the parcel record). + +export async function fetchPropertyOwner(address: string): Promise { + const now = new Date().toISOString(); + const cleaned = address.trim().replace(/\s+/g, " ").toUpperCase(); + const cached = await cacheGet(cleaned, "owner"); + if (cached) return cached; + if (!cleaned || cleaned.length < 4) { + return { + source: "cook_county_assessor", + fetched_at: now, + searched_address: address, + status: "no_match", + }; + } + const safe = cleaned.replace(/'/g, "''"); + // Match the full address text as a prefix; LIKE upper(...) for case fold. + const where = encodeURIComponent( + `upper(property_address) like upper('${safe}%') AND property_city='CHICAGO'`, + ); + const sourceUrl = `https://datacatalog.cookcountyil.gov/resource/c49d-89sn.json?$where=${where}&$limit=1`; + try { + const res = await fetch(sourceUrl, { + headers: { "User-Agent": UA }, + signal: AbortSignal.timeout(10000), + }); + if (!res.ok) throw new Error(`assessor HTTP ${res.status}`); + const rows = (await res.json()) as any[]; + if (!rows || rows.length === 0) { + const empty: PropertyOwnerBrief = { + source: "cook_county_assessor", + fetched_at: now, + searched_address: address, + status: "no_match", + source_url: sourceUrl, + }; + await cacheSet(cleaned, "owner", empty, 7 * 24 * 60 * 60 * 1000); + return empty; + } + const r = rows[0]; + const brief: PropertyOwnerBrief = { + source: "cook_county_assessor", + fetched_at: now, + searched_address: address, + status: "ok", + pin: r.pin, + property_address: r.property_address, + property_zip: r.property_zip, + mailing_address: r.mailing_address, + mailing_city: r.mailing_city, + mailing_state: r.mailing_state, + mailing_zip: r.mailing_zip, + township: r.township_name, + ward: r.ward, + longitude: r.longitude, + latitude: r.latitude, + source_url: sourceUrl, + }; + // Owner records change rarely — 30d cache. + await cacheSet(cleaned, "owner", brief); + return brief; + } catch (e) { + return { + source: "cook_county_assessor", + fetched_at: now, + searched_address: address, + status: "error", + error: (e as Error).message, + }; + } +} + +// ─── Chicago Building Violations ─────────────────────────────────── +// Dataset 22u3-xenr. Address-keyed. Status field tells us which are +// open vs complied. Stop-work orders surface in violation_description. +export async function fetchPropertyViolations(address: string): Promise { + const now = new Date().toISOString(); + const cleaned = address.trim().replace(/\s+/g, " ").toUpperCase(); + const cached = await cacheGet(cleaned, "violations"); + if (cached) return cached; + if (!cleaned || cleaned.length < 6) { + return { + source: "chicago_building_violations", + fetched_at: now, + searched_address: address, + status: "error", + total_violations: 0, + open_violations: 0, + stop_work_orders: 0, + recent_violations: [], + error: "address too short", + }; + } + const safe = cleaned.replace(/'/g, "''"); + const where = encodeURIComponent(`upper(address) like upper('${safe}%')`); + const url = `https://data.cityofchicago.org/resource/22u3-xenr.json?$where=${where}&$order=violation_date DESC&$limit=50`; + try { + const res = await fetch(url, { + headers: { "User-Agent": UA }, + signal: AbortSignal.timeout(10000), + }); + if (!res.ok) throw new Error(`violations HTTP ${res.status}`); + const rows = (await res.json()) as any[]; + const total = rows.length; + const open = rows.filter( + (r) => /OPEN|FAILED|NO ENTRY/i.test(r.violation_status || ""), + ).length; + const stopWork = rows.filter((r) => + /STOP[ -]?WORK/i.test(`${r.violation_description || ""} ${r.violation_inspector_comments || ""}`), + ).length; + const recent = rows.slice(0, 5).map((r) => ({ + date: (r.violation_date || "").slice(0, 10), + status: r.violation_status || "", + description: (r.violation_description || "").slice(0, 120), + department: r.department_bureau || "", + })); + const brief: PropertyViolationsBrief = { + source: "chicago_building_violations", + fetched_at: now, + searched_address: address, + status: "ok", + total_violations: total, + open_violations: open, + stop_work_orders: stopWork, + most_recent_date: rows[0]?.violation_date?.slice(0, 10), + recent_violations: recent, + }; + await cacheSet(cleaned, "violations", brief, 12 * 60 * 60 * 1000); + return brief; + } catch (e) { + return { + source: "chicago_building_violations", + fetched_at: now, + searched_address: address, + status: "error", + total_violations: 0, + open_violations: 0, + stop_work_orders: 0, + recent_violations: [], + error: (e as Error).message, + }; + } +} + +// ─── Union lookup (static) ───────────────────────────────────────── +// Map permit work_type → likely Chicago Local(s) + their training +// centers (JATCs). Surfaces "every union that handles this contract" +// per J's request. Static because Chicago union jurisdictions are +// stable and the granularity is "typical assignment, not certainty +// per project." + +const UNION_MAP: Record = { + ELECTRICAL: { + trade: "Electrical", + primary_locals: [ + { name: "IBEW Local 134", local: "134", jurisdiction: "Inside electrical (Chicago)" }, + { name: "IBEW Local 701", local: "701", jurisdiction: "Low-voltage / suburban" }, + ], + training_centers: [ + { name: "IBEW 134 / NECA JATC", address: "6301 W 115th St, Alsip IL", program_length: "5-yr apprenticeship" }, + { name: "IBEW 701 / NECA JATC", address: "1979 Bucktail Ln, Sugar Grove IL", program_length: "5-yr apprenticeship" }, + ], + }, + PLUMBING: { + trade: "Plumbing", + primary_locals: [ + { name: "Plumbers Local 130", local: "130", jurisdiction: "Chicago + Cook County" }, + ], + training_centers: [ + { name: "Plumbers 130 JAC", address: "1340 W Washington Blvd, Chicago", program_length: "5-yr apprenticeship" }, + ], + }, + MECHANICAL: { + trade: "Mechanical / HVAC / Pipe", + primary_locals: [ + { name: "Pipe Fitters Local 597", local: "597", jurisdiction: "HVAC + process piping" }, + { name: "Sheet Metal Workers Local 73", local: "73", jurisdiction: "Sheet metal / ductwork" }, + { name: "IUOE Local 399", local: "399", jurisdiction: "Stationary engineers (boilers, chillers)" }, + ], + training_centers: [ + { name: "Local 597 Training Center", address: "10806 W 47th St, McCook IL", program_length: "5-yr" }, + { name: "Sheet Metal 73 JAC", address: "16410 S 84th Ave, Tinley Park IL", program_length: "5-yr" }, + ], + }, + REROOFING: { + trade: "Roofing", + primary_locals: [ + { name: "Roofers Local 11", local: "11", jurisdiction: "Chicago + suburbs" }, + ], + training_centers: [ + { name: "Roofers Local 11 Training", address: "9525 S Industrial Dr, Bridgeview IL", program_length: "3-yr" }, + ], + }, + MASONRY: { + trade: "Masonry / Concrete", + primary_locals: [ + { name: "Bricklayers Local 21", local: "21", jurisdiction: "Brick + stone" }, + { name: "Cement Masons Local 502", local: "502", jurisdiction: "Concrete finishing" }, + ], + training_centers: [ + { name: "Bricklayers 21 Training", address: "11244 S Cottage Grove, Chicago", program_length: "3-yr" }, + { name: "Cement Masons 502 JATC", address: "739 25th Ave, Bellwood IL", program_length: "3-yr" }, + ], + }, + GENERAL: { + trade: "General construction (multi-trade)", + primary_locals: [ + { name: "Chicago Regional Council of Carpenters", local: "1/13/58/1051", jurisdiction: "Carpentry + drywall" }, + { name: "LIUNA Locals 1/2/4/6/76/96/149/152/225/269/296/1001", local: "various", jurisdiction: "Laborers / earthwork" }, + { name: "Operating Engineers Local 150", local: "150", jurisdiction: "Heavy equipment (cranes, excavators)" }, + { name: "Ironworkers Local 1/111/136/395", local: "1/111/136/395", jurisdiction: "Structural / rebar" }, + { name: "Teamsters Local 731/705", local: "731/705", jurisdiction: "Material haul" }, + ], + training_centers: [ + { name: "Chicago Regional Carpenters Training Center", address: "11001 W Roosevelt, Westchester IL", program_length: "4-yr" }, + { name: "Laborers Training School", address: "3636 W Pershing Rd, Chicago", program_length: "3-yr" }, + { name: "Operating Engineers 150 Training", address: "16125 W 100th St, Wilmington IL", program_length: "3-yr" }, + { name: "Iron Workers Local 1 Apprenticeship", address: "5775 W 26th St, Cicero IL", program_length: "3-yr" }, + ], + }, + WRECKING: { + trade: "Wrecking / Demolition", + primary_locals: [ + { name: "LIUNA Locals (laborers)", local: "1/2/4/...", jurisdiction: "Manual demolition" }, + { name: "Operating Engineers Local 150", local: "150", jurisdiction: "Heavy equipment demo" }, + { name: "Teamsters Local 731", local: "731", jurisdiction: "Debris haul" }, + ], + training_centers: [ + { name: "Laborers Training School (HazMat track)", address: "3636 W Pershing Rd, Chicago", program_length: "3-yr" }, + ], + }, + SIGN: { + trade: "Signs / Signage", + primary_locals: [ + { name: "IBEW Local 134 (electric signs)", local: "134", jurisdiction: "Lit signage" }, + { name: "Painters DC 14 Local 30 (sign painters)", local: "30", jurisdiction: "Hand-painted / vinyl" }, + ], + training_centers: [ + { name: "Painters DC 14 Training", address: "1456 S La Salle, Chicago", program_length: "3-yr" }, + ], + }, +}; + +export function unionsForWorkType(workType: string | undefined): UnionMapping | null { + if (!workType) return null; + const w = workType.toUpperCase(); + if (/ELECTRIC/.test(w)) return UNION_MAP.ELECTRICAL; + if (/PLUMB/.test(w)) return UNION_MAP.PLUMBING; + if (/MECHANIC|HVAC|REFRIGERAT|VENT|HEAT/.test(w)) return UNION_MAP.MECHANICAL; + if (/ROOF/.test(w)) return UNION_MAP.REROOFING; + if (/MASON|CONCRETE|BRICK|CEMENT/.test(w)) return UNION_MAP.MASONRY; + if (/WRECK|DEMO/.test(w)) return UNION_MAP.WRECKING; + if (/SIGN/.test(w)) return UNION_MAP.SIGN; + return UNION_MAP.GENERAL; +} + +// ─── USASpending.gov federal contracts ───────────────────────────── +// api.usaspending.gov/api/v2/search/spending_by_award/ — POST with +// JSON body. Returns prime award records by recipient name. +// Free, no auth. Slow on first call (~3-5s) but cacheable 24h. +export async function fetchFederalContracts(name: string): Promise { + const now = new Date().toISOString(); + const cached = await cacheGet(normalizeEntityName(name), "federal"); + if (cached) return cached; + if (!name || name.length < 5) { + return { + source: "usaspending.gov", + fetched_at: now, + searched_name: name, + status: "no_match", + total_awards_count: 0, + total_awards_value: 0, + top_agencies: [], + }; + } + const url = "https://api.usaspending.gov/api/v2/search/spending_by_award/"; + const body = { + filters: { + award_type_codes: ["A", "B", "C", "D"], // contract types + recipient_search_text: [name], + time_period: [ + { start_date: "2020-01-01", end_date: new Date().toISOString().slice(0, 10) }, + ], + }, + fields: [ + "Award ID", + "Recipient Name", + "Award Amount", + "Awarding Agency", + "Action Date", + "NAICS", + ], + sort: "Award Amount", + order: "desc", + limit: 25, + page: 1, + }; + try { + const res = await fetch(url, { + method: "POST", + headers: { "Content-Type": "application/json", "User-Agent": UA }, + body: JSON.stringify(body), + signal: AbortSignal.timeout(15000), + }); + if (!res.ok) throw new Error(`usaspending HTTP ${res.status}`); + const j = (await res.json()) as any; + const results = j.results || []; + if (results.length === 0) { + const empty: FederalContractsBrief = { + source: "usaspending.gov", + fetched_at: now, + searched_name: name, + status: "no_match", + total_awards_count: 0, + total_awards_value: 0, + top_agencies: [], + source_url: `https://www.usaspending.gov/search/?keywords=${encodeURIComponent(name)}`, + }; + await cacheSet(normalizeEntityName(name), "federal", empty, 7 * 24 * 60 * 60 * 1000); + return empty; + } + const totalValue = results.reduce( + (a: number, r: any) => a + (parseFloat(r["Award Amount"]) || 0), + 0, + ); + const agencyMap: Record = {}; + for (const r of results) { + const a = r["Awarding Agency"] || "Unknown"; + agencyMap[a] = (agencyMap[a] || 0) + (parseFloat(r["Award Amount"]) || 0); + } + const top = Object.entries(agencyMap) + .map(([agency, value]) => ({ agency, value })) + .sort((a, b) => b.value - a.value) + .slice(0, 3); + const recent = results + .map((r: any) => r["Action Date"]) + .filter(Boolean) + .sort() + .reverse()[0]; + const brief: FederalContractsBrief = { + source: "usaspending.gov", + fetched_at: now, + searched_name: name, + status: "ok", + total_awards_count: results.length, + total_awards_value: totalValue, + most_recent_award_date: recent, + top_agencies: top, + source_url: `https://www.usaspending.gov/search/?keywords=${encodeURIComponent(name)}`, + }; + await cacheSet(normalizeEntityName(name), "federal", brief, 24 * 60 * 60 * 1000); + return brief; + } catch (e) { + return { + source: "usaspending.gov", + fetched_at: now, + searched_name: name, + status: "error", + total_awards_count: 0, + total_awards_value: 0, + top_agencies: [], + error: (e as Error).message, + }; + } +} + +// ─── NLRB cases (placeholder) ────────────────────────────────────── +// nlrb.gov/search/case has JSON results behind their JS app. The path +// requires session cookies + form tokens — needs a short scraper. +// Returning placeholder until next batch. +export async function fetchNlrbBrief(name: string): Promise { + return { + source: "nlrb.gov", + fetched_at: new Date().toISOString(), + searched_name: name, + status: "needs_setup", + reason: + "NLRB case search at apps.nlrb.gov/case requires session-aware scraping; queued for batch 5b.", + }; +} + +// ─── Risk scoring ────────────────────────────────────────────────── + +function scoreEntity( + osha: OshaBrief | null, + ilsos: IlsosBrief | null, + history: ContractorHistory | null, +) { + const factors: string[] = []; + let score: number | null = null; + let partial = false; + if (osha) { + if (osha.status === "ok") { + const n = osha.inspection_count; + if (n === 0) { + score = 10; + factors.push("osha_clean:no_inspections"); + } else if (n < 3) { + score = 25; + factors.push(`osha_low:${n}_inspections`); + } else if (n < 10) { + score = 50; + factors.push(`osha_moderate:${n}_inspections`); + } else { + score = 75; + factors.push(`osha_high:${n}_inspections`); + } + if (osha.most_recent_date) { + const ageDays = + (Date.now() - (Date.parse(osha.most_recent_date) || 0)) / (1000 * 60 * 60 * 24); + if (ageDays < 180) { + factors.push("osha_recent_activity:<180d"); + score = Math.min(100, (score ?? 0) + 15); + } + } + } else if (osha.status === "no_match") { + factors.push("osha_no_match"); + } else { + factors.push("osha_error:" + (osha.error || "unknown")); + partial = true; + } + } else { + partial = true; + } + if (!ilsos || ilsos.status !== "ok") { + partial = true; + factors.push("ilsos_pending"); + } + // Brand-new LLCs with a single permit and zero tenure are a + // classic LLC-shuffle signature. Flag independently of OSHA. + if (history?.status === "ok") { + if (history.trend === "new") { + factors.push("new_entity:<=3_permits_ever"); + score = Math.max(score ?? 0, 35); + } else if (history.trend === "growing") { + factors.push(`activity_growing:${history.permits_last_180d}_in_180d`); + } else if (history.trend === "declining") { + factors.push(`activity_declining:${history.permits_last_180d}_in_180d`); + } + } + return { score, factors, partial }; +} + +// ─── CTA L stations (static, in-memory) ──────────────────────────── +// Pulled once from Chicago Socrata 8pix-ypme. ~150 stations, rarely +// changes. Compute great-circle distance to nearest with haversine. +type CtaStation = { + station_name: string; + lat: number; + lon: number; + lines: string[]; +}; +let _ctaStations: CtaStation[] | null = null; + +async function ensureCtaLoaded(): Promise { + if (_ctaStations) return _ctaStations; + try { + const res = await fetch( + "https://data.cityofchicago.org/resource/8pix-ypme.json?$limit=400", + { headers: { "User-Agent": UA }, signal: AbortSignal.timeout(15000) }, + ); + if (!res.ok) throw new Error(`cta HTTP ${res.status}`); + const rows = (await res.json()) as any[]; + // De-dupe by station_name keeping any one stop's coordinates + const byStation = new Map(); + for (const r of rows) { + const loc = r.location || {}; + const lat = parseFloat(loc.latitude || ""); + const lon = parseFloat(loc.longitude || ""); + if (isNaN(lat) || isNaN(lon)) continue; + const station = r.station_name || r.stop_name; + if (!station) continue; + if (byStation.has(station)) continue; + const lines: string[] = []; + if (r.red) lines.push("Red"); + if (r.blue) lines.push("Blue"); + if (r.g) lines.push("Green"); + if (r.brn) lines.push("Brown"); + if (r.p) lines.push("Purple"); + if (r.y) lines.push("Yellow"); + if (r.pnk) lines.push("Pink"); + if (r.o) lines.push("Orange"); + byStation.set(station, { station_name: station, lat, lon, lines }); + } + _ctaStations = [...byStation.values()]; + return _ctaStations; + } catch (e) { + console.warn("[cta] load failed:", (e as Error).message); + _ctaStations = []; + return _ctaStations; + } +} + +function haversineMeters(lat1: number, lon1: number, lat2: number, lon2: number): number { + const R = 6371000; + const toRad = (d: number) => (d * Math.PI) / 180; + const dLat = toRad(lat2 - lat1); + const dLon = toRad(lon2 - lon1); + const a = + Math.sin(dLat / 2) ** 2 + + Math.cos(toRad(lat1)) * Math.cos(toRad(lat2)) * Math.sin(dLon / 2) ** 2; + return 2 * R * Math.asin(Math.sqrt(a)); +} + +async function nearestCtaStation(lat: number, lon: number) { + const stations = await ensureCtaLoaded(); + if (stations.length === 0) return null; + let best: { station: CtaStation; dist: number } | null = null; + for (const s of stations) { + const d = haversineMeters(lat, lon, s.lat, s.lon); + if (!best || d < best.dist) best = { station: s, dist: d }; + } + return best; +} + +// ─── Nearby permits (labor competition signal) ───────────────────── +// Socrata supports within_circle(location, lat, lon, radius_meters) +// for geo queries. 800m ≈ 0.5mi. Filter to permits issued within the +// last 90d so we capture *current* competition only. +async function fetchNearbyPermits(lat: number, lon: number) { + const since = new Date(Date.now() - 90 * 86400000).toISOString().slice(0, 10); + const where = encodeURIComponent( + `within_circle(location, ${lat}, ${lon}, 800) AND issue_date>'${since}' AND reported_cost>50000`, + ); + try { + const res = await fetch( + `https://data.cityofchicago.org/resource/ydr8-5enu.json?$select=count(*),sum(reported_cost)&$where=${where}`, + { headers: { "User-Agent": UA }, signal: AbortSignal.timeout(10000) }, + ); + if (!res.ok) return null; + const j = (await res.json()) as any[]; + const row = j?.[0] || {}; + return { + count: parseInt(row.count || "0", 10), + total_value: parseFloat(row.sum_reported_cost || "0"), + }; + } catch { + return null; + } +} + +// ─── Site Context (Chicago TIF + landmark + community area) ──────── +// All Socrata. Lookup by address (street_number + street_name where +// available, lat/long otherwise). Bundle into a single brief so the +// UI gets one tile instead of 3. +const TIF_DATASET = "imgn-2suh"; // TIF District Programming 2022-2026 +const LANDMARK_DATASET = "habu-n236"; // Individual Landmarks - Map + +export async function fetchSiteContext(address: string): Promise { + const now = new Date().toISOString(); + const cleaned = address.trim().replace(/\s+/g, " ").toUpperCase(); + const cached = await cacheGet(cleaned, "site_context"); + if (cached) return cached; + if (!cleaned) { + return { + source: "chicago_socrata", + fetched_at: now, + searched_address: address, + status: "no_match", + }; + } + // Pull lat/long from the assessor record (we already query it). + const owner = await fetchPropertyOwner(address); + let lat: string | undefined, lon: string | undefined, ward: string | undefined; + if (owner.status === "ok") { + lat = owner.latitude; + lon = owner.longitude; + ward = owner.ward; + } + // Community area number → name table is public on Chicago.gov but + // we'd need it static; ship without name for now (number suffices). + const safe = cleaned.replace(/'/g, "''"); + const landmarkUrl = `https://data.cityofchicago.org/resource/${LANDMARK_DATASET}.json?$where=upper(address)%20like%20upper('%25${encodeURIComponent(safe)}%25')&$limit=1`; + let isLandmark = false; + let landmarkName: string | undefined; + try { + const r = await fetch(landmarkUrl, { + headers: { "User-Agent": UA }, + signal: AbortSignal.timeout(8000), + }); + if (r.ok) { + const rows = (await r.json()) as any[]; + if (rows && rows.length) { + isLandmark = true; + landmarkName = rows[0].landmark_name || rows[0].name; + } + } + } catch {/* non-fatal */} + // TIF polygon containment + CTA distance + nearby permits — all + // depend on having lat/long from the Cook County Assessor. + let inTif = false; + let tifName: string | undefined; + let ctaName: string | undefined; + let ctaLines: string | undefined; + let ctaDist: number | undefined; + let nearbyCount: number | undefined; + let nearbyValue: number | undefined; + if (lat && lon) { + const latNum = parseFloat(lat); + const lonNum = parseFloat(lon); + if (!isNaN(latNum) && !isNaN(lonNum)) { + // All three geo lookups in parallel + const [tif, cta, nearby] = await Promise.all([ + findTifDistrict(lonNum, latNum), + nearestCtaStation(latNum, lonNum), + fetchNearbyPermits(latNum, lonNum), + ]); + if (tif) { + inTif = true; + tifName = tif.name; + } + if (cta) { + ctaName = cta.station.station_name; + ctaLines = cta.station.lines.join("/"); + ctaDist = Math.round(cta.dist); + } + if (nearby) { + nearbyCount = nearby.count; + nearbyValue = nearby.total_value; + } + } + } + const brief: SiteContextBrief = { + source: "chicago_socrata", + fetched_at: now, + searched_address: address, + status: "ok", + in_tif_district: inTif, + tif_district_name: tifName, + is_landmark: isLandmark, + landmark_name: landmarkName, + community_area: undefined, + ward, + latitude: lat, + longitude: lon, + nearest_cta_station: ctaName, + nearest_cta_lines: ctaLines, + nearest_cta_distance_m: ctaDist, + nearby_permits_90d: nearbyCount, + nearby_permits_value_90d: nearbyValue, + }; + await cacheSet(cleaned, "site_context", brief); + return brief; +} + +// ─── MBE/WBE/DBE diversity certification ─────────────────────────── +// Chicago Dept of Procurement Economic Inclusion Certified Vendors. +const DIVERSITY_DATASET = "2iq3-bugw"; + +export async function fetchDiversityCerts(name: string): Promise { + const now = new Date().toISOString(); + const cached = await cacheGet(normalizeEntityName(name), "diversity"); + if (cached) return cached; + const safe = name.replace(/'/g, "''"); + const url = `https://data.cityofchicago.org/resource/${DIVERSITY_DATASET}.json?$where=upper(legal_name)%20like%20upper('%25${encodeURIComponent(safe)}%25')&$limit=10`; + try { + const r = await fetch(url, { + headers: { "User-Agent": UA }, + signal: AbortSignal.timeout(8000), + }); + if (!r.ok || r.status === 404) { + // Dataset listed in catalog but resource endpoint 404s — known issue. + const skeletal: DiversityCertBrief = { + source: "chicago_dps_economic_inclusion", + fetched_at: now, + searched_name: name, + status: "needs_setup", + reason: + "Chicago MBE/WBE/DBE datasets (2iq3-bugw, 69yt-tb5j, ci93-uc8s, 8dxf-6ahp) are catalog-listed but the resource endpoint returns 404. Likely archived or auth-gated. Path: contact Chicago DPS for alternate access OR use the procurement Intent-to-Award/Execute datasets which DO work.", + certifications: [], + }; + await cacheSet(normalizeEntityName(name), "diversity", skeletal, 7 * 24 * 60 * 60 * 1000); + return skeletal; + } + const rows = (await r.json()) as any[]; + const certs = (rows || []).map((row) => ({ + category: row.certification || row.classification || "?", + expiration: row.certification_expiration_date || row.expiration_date, + type: row.business_type || row.type, + })); + const brief: DiversityCertBrief = { + source: "chicago_dps_economic_inclusion", + fetched_at: now, + searched_name: name, + status: certs.length > 0 ? "ok" : "no_match", + certifications: certs, + source_url: `https://data.cityofchicago.org/resource/${DIVERSITY_DATASET}.json`, + }; + await cacheSet(normalizeEntityName(name), "diversity", brief); + return brief; + } catch (e) { + return { + source: "chicago_dps_economic_inclusion", + fetched_at: now, + searched_name: name, + status: "needs_setup", + reason: `dataset access error: ${(e as Error).message}`, + certifications: [], + error: (e as Error).message, + }; + } +} + +// ─── News mentions via Google News RSS ───────────────────────────── +// news.google.com/rss/search?q={name} — free, no auth. +// Returns RSS XML; parse entries for title/link/pubDate. +export async function fetchNewsMentions(name: string): Promise { + const now = new Date().toISOString(); + const cached = await cacheGet(normalizeEntityName(name), "news"); + if (cached) return cached; + if (!name || name.length < 5) { + return { + source: "google_news_rss", + fetched_at: now, + searched_name: name, + status: "no_match", + total_mentions: 0, + recent_headlines: [], + }; + } + const q = encodeURIComponent(`"${name}"`); + const url = `https://news.google.com/rss/search?q=${q}&hl=en-US&gl=US&ceid=US:en`; + try { + const r = await fetch(url, { + headers: { "User-Agent": "Mozilla/5.0 lakehouse-copilot" }, + signal: AbortSignal.timeout(10000), + }); + if (!r.ok) throw new Error(`news HTTP ${r.status}`); + const xml = await r.text(); + const items = [...xml.matchAll(/([\s\S]*?)<\/item>/g)]; + const headlines = items.slice(0, 5).map((m) => { + const inner = m[1]; + const get = (tag: string) => { + const re = new RegExp(`<${tag}[^>]*>([\\s\\S]*?)<\\/${tag}>`); + const mm = inner.match(re); + if (!mm) return ""; + return mm[1].replace(//g, "").trim(); + }; + const title = get("title"); + const link = get("link"); + const pubDate = get("pubDate"); + const sourceMatch = inner.match(/]*>([\s\S]*?)<\/source>/); + const src = sourceMatch ? sourceMatch[1].replace(//g, "").trim() : ""; + return { title, source: src, date: pubDate, url: link }; + }).filter((h) => h.title); + const brief: NewsBrief = { + source: "google_news_rss", + fetched_at: now, + searched_name: name, + status: headlines.length > 0 ? "ok" : "no_match", + total_mentions: items.length, + recent_headlines: headlines, + }; + // 6h TTL — news cycles fast + await cacheSet(normalizeEntityName(name), "news", brief, 6 * 60 * 60 * 1000); + return brief; + } catch (e) { + return { + source: "google_news_rss", + fetched_at: now, + searched_name: name, + status: "error", + total_mentions: 0, + recent_headlines: [], + error: (e as Error).message, + }; + } +} + +// ─── BLS construction employment (Chicago MSA) ───────────────────── +// Single shared cache keyed on the series ID since it's macro context. +const BLS_CHI_CONSTRUCTION_SERIES = "SMU17169802000000001"; +let _blsCache: BlsTrendBrief | null = null; +let _blsCacheAt = 0; +const BLS_CACHE_TTL_MS = 6 * 60 * 60 * 1000; + +export async function fetchBlsConstructionTrend(): Promise { + if (_blsCache && Date.now() - _blsCacheAt < BLS_CACHE_TTL_MS) return _blsCache; + const now = new Date().toISOString(); + const thisYear = new Date().getFullYear(); + try { + const res = await fetch("https://api.bls.gov/publicAPI/v1/timeseries/data/", { + method: "POST", + headers: { "Content-Type": "application/json", "User-Agent": UA }, + body: JSON.stringify({ + seriesid: [BLS_CHI_CONSTRUCTION_SERIES], + startyear: String(thisYear - 2), + endyear: String(thisYear), + }), + signal: AbortSignal.timeout(12000), + }); + if (!res.ok) throw new Error(`bls HTTP ${res.status}`); + const j = (await res.json()) as any; + const series = j?.Results?.series?.[0]; + const data = (series?.data || []) as Array<{ year: string; period: string; value: string }>; + if (data.length === 0) throw new Error("no data"); + const obs = data.map((d) => ({ + period: `${d.year}-${d.period}`, + value: parseFloat(d.value), + })); + // BLS returns most-recent-first + const latest = obs[0]; + const monthAgo = obs[1]; + const yearAgo = obs[12]; + const mom = monthAgo ? ((latest.value - monthAgo.value) / monthAgo.value) * 100 : null; + const yoy = yearAgo ? ((latest.value - yearAgo.value) / yearAgo.value) * 100 : null; + const trend: BlsTrendBrief["trend"] = + yoy === null ? "unknown" : + yoy > 1.5 ? "growing" : + yoy < -1.5 ? "declining" : "stable"; + const brief: BlsTrendBrief = { + source: "bls.gov", + fetched_at: now, + series_id: BLS_CHI_CONSTRUCTION_SERIES, + status: "ok", + latest, + yoy_change_pct: yoy, + mom_change_pct: mom, + recent: obs.slice(0, 6), + trend, + }; + _blsCache = brief; + _blsCacheAt = Date.now(); + return brief; + } catch (e) { + return { + source: "bls.gov", + fetched_at: now, + series_id: BLS_CHI_CONSTRUCTION_SERIES, + status: "error", + latest: null, + yoy_change_pct: null, + mom_change_pct: null, + recent: [], + trend: "unknown", + error: (e as Error).message, + }; + } +} + +// ─── News sentiment scoring ──────────────────────────────────────── +// Run this over the existing NewsBrief headlines. Cheap keyword-based +// classifier — not LLM-grade, but catches the obvious "lawsuit / +// fraud / OSHA fine" vs "awarded / expansion / wins" patterns. +const SENTIMENT_NEG = [ + "lawsuit","sued","fraud","fines?","fined","penalty","penalties", + "violations?","investigation","investigated","indicted","charged", + "guilty","arrest","arrested","bankrupt(cy)?","insolvenc?y", + "debt","default","fired","layoffs?","fired","quit","resigned", + "scandal","whistleblower","theft","embezzl","kickback","bribe", + "complaint","grievanc","strike","walkout","picket","accident", + "death","fatal","killed","injur","hospitaliz","amputation","fall", + "collapse","collapsed","fire","explosion","shut.?down","stop.?work", + "delay","delays","behind.?schedule","over.?budget","cancel(l?)ed", + "subpoena","audit","misconduct","harass","discriminat","retaliat", + "evict","foreclos","levied", +]; +const SENTIMENT_POS = [ + "award(?:ed|s)","won","wins","wins?\\b","top","best","gain(?:ed|s)?", + "expand(?:ed|s|ing)?","expansion","contract","selected","chosen", + "partnership","invest(?:ed|s|ing|ment)?","launch","launched", + "milestone","completed?","record","strong","grew","grow(?:th|ing)", + "promoted","hired","appoint(?:ed|s)?","ribbon","groundbreaking", + "innovation","new\\s+headquarters","achievement","recogni[zs]ed", + "leader(?:ship)?","sustainab","green","clean.?energy", +]; + +const NEG_RE = new RegExp("\\b(" + SENTIMENT_NEG.join("|") + ")\\b", "i"); +const POS_RE = new RegExp("\\b(" + SENTIMENT_POS.join("|") + ")\\b", "i"); + +export function scoreNewsSentiment(news: NewsBrief): NewsSentiment { + const headlines = news.recent_headlines || []; + let pos = 0, neg = 0; + const flagged: NewsSentiment["flagged_headlines"] = []; + for (const h of headlines) { + const t = h.title || ""; + const negMatches: string[] = []; + const posMatches: string[] = []; + let m; + const nReg = new RegExp(NEG_RE, "gi"); + while ((m = nReg.exec(t)) !== null) { + if (negMatches.length < 3) negMatches.push(m[1]); + if (negMatches.length > 5) break; + } + const pReg = new RegExp(POS_RE, "gi"); + while ((m = pReg.exec(t)) !== null) { + if (posMatches.length < 3) posMatches.push(m[1]); + if (posMatches.length > 5) break; + } + if (negMatches.length > posMatches.length) { + neg++; + flagged.push({ title: t, polarity: "neg", reasons: negMatches }); + } else if (posMatches.length > negMatches.length) { + pos++; + flagged.push({ title: t, polarity: "pos", reasons: posMatches }); + } + } + const total = headlines.length || 1; + const score = (pos - neg) / total; + return { + score: Math.max(-1, Math.min(1, score)), + positive: pos, + negative: neg, + neutral: total - pos - neg, + flagged_headlines: flagged.slice(0, 5), + }; +} + +// ─── Placeholder fetchers for sources that need scraping work ────── +export async function fetchOshaSirBrief(name: string): Promise { + return { + source: "osha.gov/severeinjury", + fetched_at: new Date().toISOString(), + searched_name: name, + status: "needs_setup", + reason: + "OSHA Severe Injury Reports CSV is bot-protected at dol.gov (Imperva challenge). Path forward: (a) human-mediated periodic CSV download into /data/_entity_cache/sir.csv, then local lookup, or (b) browser-automation scraper with CAPTCHA handling.", + }; +} + +export async function fetchLiensBrief(address: string): Promise { + // Cook County Recorder of Deeds merged into Cook County Clerk in 2020. + // Public search portal: crs.cookcountyclerkil.gov (HTML form, JSESSIONID-based). + // Resource catalog at datacatalog.cookcountyil.gov returns 404 on most lien + // dataset IDs (xfev-8smz UCC liens specifically, also xrzj-c8ez code violations). + // Path: HTML scrape of crs.cookcountyclerkil.gov with PIN→search→paginate. + // Queued — needs ~150 LOC of form-state parsing. + return { + source: "cook_county_recorder", + fetched_at: new Date().toISOString(), + searched_address: address, + status: "needs_setup", + reason: + "Cook County Clerk recordings portal (crs.cookcountyclerkil.gov) needs JSESSIONID + form POST scrape. Socrata catalog UCC dataset (xfev-8smz) returns 404 on resource endpoint. Queued for next batch.", + }; +} + +// ─── NLRB cases (real scraper) ───────────────────────────────────── +// Public search at apps.nlrb.gov/eservice/dailyclosed.aspx and +// nlrb.gov/search/case. The /search/case page returns plain HTML. +export async function fetchNlrbBriefReal(name: string): Promise { + const now = new Date().toISOString(); + const cached = await cacheGet(normalizeEntityName(name), "nlrb"); + if (cached) return cached; + const q = encodeURIComponent(name); + const url = `https://www.nlrb.gov/search/case?search_term=${q}`; + try { + const r = await fetch(url, { + headers: { "User-Agent": UA }, + signal: AbortSignal.timeout(12000), + }); + if (!r.ok) throw new Error(`nlrb HTTP ${r.status}`); + const html = await r.text(); + // Cases appear in result blocks — count occurrences of case-detail links + const caseLinks = [...html.matchAll(/]+href="\/case\/([0-9A-Z\-]+)"/g)]; + const cases = caseLinks.slice(0, 5).map((m) => ({ + case_number: m[1], + case_type: "?", + date: "", + status: "", + })); + // Total count appears near top — pattern "X results" + const totalMatch = html.match(/(\d[\d,]*)\s+result/i); + const total = totalMatch ? parseInt(totalMatch[1].replace(/,/g, ""), 10) : caseLinks.length; + const brief: NlrbBrief = { + source: "nlrb.gov", + fetched_at: now, + searched_name: name, + status: cases.length > 0 ? "ok" : "no_match", + total_cases: total, + recent_cases: cases, + }; + await cacheSet(normalizeEntityName(name), "nlrb", brief, 24 * 60 * 60 * 1000); + return brief; + } catch (e) { + return { + source: "nlrb.gov", + fetched_at: now, + searched_name: name, + status: "needs_setup", + reason: `nlrb fetch error: ${(e as Error).message}`, + }; + } +} + +// ─── Project Index Score (matrix aggregation) ────────────────────── +// Walks every signal across owner/contractors and produces a single +// 0-100 index anchored at 50 neutral. Each contribution is auditable. +// Weights are documentation-grade — adjust over time as we learn which +// signals actually correlate with project success/delay. + +export function scoreProject( + property: PermitEntityBrief["property"], + entities: EntityBrief[], + macro: BlsTrendBrief | null = null, +): ProjectIndexScore { + const cs: SignalContribution[] = []; + let partial = false; + + // Macro: Chicago construction employment YoY. Affects every permit + // proportionally — labor-shrinking market raises filling difficulty, + // labor-growing market signals confidence. + if (macro?.status === "ok" && macro.yoy_change_pct !== null) { + const w = 3; + const yoy = macro.yoy_change_pct; + const dir = yoy > 1.5 ? 1 : yoy < -1.5 ? -1 : 0; + if (dir !== 0) { + cs.push({ + signal: "macro_chi_construction_employment", + weight: w, + direction: dir, + raw: yoy, + contribution: dir * w, + note: `Chicago MSA construction employment ${yoy >= 0 ? "+" : ""}${yoy.toFixed(1)}% YoY (BLS) — labor market ${macro.trend}`, + }); + } + } + + // ─── Property-level signals ─── + if (property.violations?.status === "ok") { + const v = property.violations; + if (v.stop_work_orders > 0) { + const w = 10; + cs.push({ + signal: "stop_work_orders", + weight: w, + direction: -1, + raw: v.stop_work_orders, + contribution: -w * Math.min(v.stop_work_orders, 3), + note: `${v.stop_work_orders} stop-work order${v.stop_work_orders === 1 ? "" : "s"} on this property`, + }); + } + if (v.open_violations > 0) { + const w = 6; + cs.push({ + signal: "open_property_violations", + weight: w, + direction: -1, + raw: v.open_violations, + contribution: -w * Math.min(v.open_violations, 4) / 2, + note: `${v.open_violations} open building violation${v.open_violations === 1 ? "" : "s"} at the address`, + }); + } else if (v.total_violations === 0) { + const w = 3; + cs.push({ + signal: "property_clean", + weight: w, + direction: 1, + raw: 0, + contribution: w, + note: "No building violations on record", + }); + } + } + + // TIF district = public-subsidy zone. Project has financial backing + // from the city's tax-increment financing pot. Mild positive signal + // because public oversight + structured funding usually means the + // project survives funding gaps. Not a blanket good — TIFs do fail — + // but worth noting. + if (property.site_context?.in_tif_district) { + const w = 4; + cs.push({ + signal: "in_tif_district", + weight: w, + direction: 1, + raw: property.site_context.tif_district_name, + contribution: w, + note: `Site is inside TIF "${property.site_context.tif_district_name}" — public tax-increment financing backing`, + }); + } + // Transit access — workers can commute via L without parking. + // <800m is "right at a station," <1500m is "walkable," beyond that + // crews are car/shuttle dependent. + if (property.site_context?.nearest_cta_distance_m !== undefined) { + const m = property.site_context.nearest_cta_distance_m; + if (m <= 800) { + const w = 2; + cs.push({ + signal: "transit_access_strong", + weight: w, + direction: 1, + raw: m, + contribution: w, + note: `Site is ${m}m from CTA ${property.site_context.nearest_cta_station} (${property.site_context.nearest_cta_lines}) — workers can commute by L`, + }); + } else if (m <= 1500) { + // Neutral — walkable but not adjacent + } else { + const w = 1; + cs.push({ + signal: "transit_access_weak", + weight: w, + direction: -1, + raw: m, + contribution: -w, + note: `Site is ${(m / 1000).toFixed(1)}km from nearest CTA stop — crew transport burden`, + }); + } + } + + // Labor competition — many active permits within 0.5mi means crews + // are already booked elsewhere. >5 permits in 90d is "saturated." + if ( + property.site_context?.nearby_permits_90d !== undefined && + property.site_context.nearby_permits_90d > 5 + ) { + const n = property.site_context.nearby_permits_90d; + const w = 3; + cs.push({ + signal: "labor_competition_high", + weight: w, + direction: -1, + raw: n, + contribution: -Math.min(w, n / 5), + note: `${n} other permits active within 0.5mi (last 90d) — crew competition`, + }); + } + + if (property.site_context?.is_landmark) { + const w = 3; + cs.push({ + signal: "landmark_status", + weight: w, + direction: -1, + raw: property.site_context.landmark_name, + contribution: -w, + note: `Landmark designation${property.site_context.landmark_name ? ` (${property.site_context.landmark_name})` : ""} — preservation review extends timeline`, + }); + } + + if (property.owner?.status === "ok") { + if ( + property.owner.mailing_state && + property.owner.mailing_state !== "IL" + ) { + const w = 2; + cs.push({ + signal: "owner_out_of_state", + weight: w, + direction: -1, + raw: property.owner.mailing_state, + contribution: -w, + note: `Owner mails out of state (${property.owner.mailing_state}) — slower local accountability`, + }); + } else { + const w = 2; + cs.push({ + signal: "owner_local", + weight: w, + direction: 1, + raw: "IL", + contribution: w, + note: "Owner mails locally — faster accountability", + }); + } + } + + // ─── Per-entity signals ─── + for (const e of entities) { + const tag = `[${e.display_name}]`; + + if (e.osha?.status === "ok") { + const o = e.osha; + if (o.inspection_count > 10) { + const w = 6; + cs.push({ + signal: "osha_high_inspection_count", + weight: w, + direction: -1, + raw: o.inspection_count, + contribution: -w, + note: `${tag} ${o.inspection_count} OSHA inspections nationally`, + }); + } else if (o.inspection_count === 0) { + const w = 3; + cs.push({ + signal: "osha_clean", + weight: w, + direction: 1, + raw: 0, + contribution: w, + note: `${tag} no OSHA inspections on record`, + }); + } + if (o.most_recent_date) { + const ageDays = (Date.now() - Date.parse(o.most_recent_date)) / 86400000; + if (ageDays < 90) { + const w = 7; + cs.push({ + signal: "osha_recent_action", + weight: w, + direction: -1, + raw: o.most_recent_date, + contribution: -w, + note: `${tag} OSHA inspection ${Math.round(ageDays)}d ago`, + }); + } + } + } else if (e.osha?.status === "error") { + partial = true; + } + + if (e.history?.status === "ok") { + if (e.history.trend === "new") { + const w = 8; + cs.push({ + signal: "new_entity", + weight: w, + direction: -1, + raw: e.history.permits_historical_total, + contribution: -w, + note: `${tag} only ${e.history.permits_historical_total} permits ever — possible LLC-shuffle signature`, + }); + } else if (e.history.trend === "growing") { + const w = 3; + cs.push({ + signal: "activity_growing", + weight: w, + direction: 1, + raw: e.history.permits_last_180d, + contribution: w, + note: `${tag} growing Chicago footprint (${e.history.permits_last_180d} in 180d)`, + }); + } else if (e.history.trend === "declining") { + const w = 4; + cs.push({ + signal: "activity_declining", + weight: w, + direction: -1, + raw: e.history.permits_last_180d, + contribution: -w, + note: `${tag} declining Chicago footprint`, + }); + } + } + + if (e.federal?.status === "ok" && e.federal.total_awards_value > 0) { + const fedM = e.federal.total_awards_value / 1e6; + const w = 5; + cs.push({ + signal: "federal_contract_history", + weight: w, + direction: 1, + raw: e.federal.total_awards_value, + contribution: Math.min(w, Math.log10(fedM + 1) * 2), + note: `${tag} $${fedM.toFixed(0)}M federal contracts — vetted compliance`, + }); + } + + // SVEP — the heaviest-weight negative signal we have. OSHA placed + // this contractor in the Severe Violator Enforcement Program after + // willful or repeat violations. Real money + worker safety risk. + if (e.svep?.flagged) { + const w = 10; + cs.push({ + signal: "osha_svep_listed", + weight: w, + direction: -1, + raw: e.svep.matched_entries.map((m) => m.name).join(" | "), + contribution: -w * 2, + note: `${tag} OSHA SVEP-listed (Severe Violator Enforcement Program) — formally flagged worst-actor`, + }); + } + + if (e.parent_link?.status === "ok" && e.parent_link.parent_ticker) { + const w = 3; + cs.push({ + signal: "public_parent_chain", + weight: w, + direction: 1, + raw: e.parent_link.parent_ticker, + contribution: w, + note: `${tag} traceable to public parent ${e.parent_link.parent_ticker}`, + }); + } + + // News sentiment over the recent headlines. Negative bias means + // the press cycle around this firm is dominated by lawsuits / + // accidents / fines. + if (e.news_sentiment) { + const ns = e.news_sentiment; + if (ns.negative > ns.positive && ns.negative > 0) { + const w = 5; + cs.push({ + signal: "news_sentiment_negative", + weight: w, + direction: -1, + raw: ns.score, + contribution: -Math.min(w, ns.negative * 1.5), + note: `${tag} negative news cycle (${ns.negative} flagged: ${ns.flagged_headlines.filter((h) => h.polarity === "neg").slice(0, 2).map((h) => h.reasons.join("/")).join(", ")})`, + }); + } else if (ns.positive > ns.negative && ns.positive >= 2) { + const w = 2; + cs.push({ + signal: "news_sentiment_positive", + weight: w, + direction: 1, + raw: ns.score, + contribution: w, + note: `${tag} positive news cycle (${ns.positive} flagged on awards/expansion)`, + }); + } + } + + if (e.debarment?.status === "needs_setup") partial = true; + if (e.nlrb?.status === "needs_setup") partial = true; + if (e.ilsos?.status === "source_unreachable") partial = true; + } + + // Net score from 50 baseline. Clamp 0..100. + const net = cs.reduce((a, c) => a + c.contribution, 0); + const score = Math.max(0, Math.min(100, Math.round(50 + net))); + const band: ProjectIndexScore["band"] = + score < 30 ? "red" : + score < 45 ? "amber" : + score <= 55 ? "neutral" : + score <= 75 ? "green" : "strong"; + + // Sort contributions by absolute weight, biggest movers first + cs.sort((a, b) => Math.abs(b.contribution) - Math.abs(a.contribution)); + + return { score, band, contributions: cs, partial }; +} + +// ─── Public: build a brief for a single permit ───────────────────── + +type PermitInput = { + permit_id?: string; + address?: string; + work_type?: string; + contact_1_name?: string; + contact_1_type?: string; + contact_2_name?: string; + contact_2_type?: string; +}; + +export async function buildPermitBrief( + permit: PermitInput, + opts: { + fetchIlsos?: boolean; + fetchOsha?: boolean; + fetchTicker?: boolean; + fetchHistory?: boolean; + } = {}, +): Promise { + const wantOsha = opts.fetchOsha !== false; + const wantIlsos = opts.fetchIlsos !== false; + const wantTicker = opts.fetchTicker !== false; + const wantHistory = opts.fetchHistory !== false; + + const rawContacts: { name: string; role: string }[] = []; + if (permit.contact_1_name?.trim()) { + rawContacts.push({ + name: permit.contact_1_name.trim(), + role: permit.contact_1_type || "CONTACT_1", + }); + } + if (permit.contact_2_name?.trim()) { + rawContacts.push({ + name: permit.contact_2_name.trim(), + role: permit.contact_2_type || "CONTACT_2", + }); + } + // Dedupe by normalized name — same firm often appears as both contacts. + const seen = new Set(); + const contacts: { name: string; role: string }[] = []; + for (const c of rawContacts) { + const key = normalizeEntityName(c.name); + if (!key || seen.has(key)) continue; + seen.add(key); + contacts.push(c); + } + + // Fetch all signals in parallel per contact (4 concurrent outbound + // requests max; OSHA gate serializes its own, others go straight). + const entities: EntityBrief[] = []; + for (const c of contacts) { + // 11 parallel calls per entity. OSHA throttles internally, the rest + // fan straight out. ~3-5s end-to-end per entity on cold cache. + const [osha, ilsos, stock, history, debarment, parent_link, federal, nlrb, diversity, news, osha_sir, svep] = + await Promise.all([ + wantOsha ? fetchOshaBrief(c.name) : Promise.resolve(null), + wantIlsos ? fetchIlsosBrief(c.name) : Promise.resolve(null), + wantTicker ? fetchTickerBrief(c.name) : Promise.resolve(null), + wantHistory ? fetchContractorHistory(c.name) : Promise.resolve(null), + fetchDebarmentBrief(c.name), + fetchParentLink(c.name), + fetchFederalContracts(c.name), + fetchNlrbBriefReal(c.name), + fetchDiversityCerts(c.name), + fetchNewsMentions(c.name), + fetchOshaSirBrief(c.name), + fetchSvepBrief(c.name), + ]); + const news_sentiment = news ? scoreNewsSentiment(news) : null; + entities.push({ + key: normalizeEntityName(c.name), + display_name: c.name, + role: c.role, + ticker: entityTicker(c.name), + osha, + ilsos, + stock, + history, + debarment, + parent_link, + federal, + nlrb, + diversity, + news, + news_sentiment, + osha_sir, + svep, + risk: scoreEntity(osha, ilsos, history), + }); + } + + // Flatten the ticker portfolio across entities, dedupe by ticker + // symbol, sort by cap_proxy descending (most-profitable related + // company first — Project Index "most-profitable beneficiary first"). + const tickersByKey = new Map(); + for (const e of entities) { + if (e.stock?.status === "ok" && e.stock.ticker) { + tickersByKey.set(e.stock.ticker, e.stock); + } + } + const tickers = [...tickersByKey.values()].sort( + (a, b) => (b.cap_proxy ?? 0) - (a.cap_proxy ?? 0), + ); + + // Property-side signals: owner, violations, site context, liens placeholder. + const [owner, violations, site_context, liens] = await Promise.all([ + permit.address ? fetchPropertyOwner(permit.address) : Promise.resolve(null), + permit.address ? fetchPropertyViolations(permit.address) : Promise.resolve(null), + permit.address ? fetchSiteContext(permit.address) : Promise.resolve(null), + permit.address ? fetchLiensBrief(permit.address) : Promise.resolve(null), + ]); + const union = unionsForWorkType(permit.work_type); + + const property = { + address: permit.address || "", + ticker: addressTicker(permit.address || ""), + owner, + violations, + union, + site_context, + liens, + }; + // Macro context (BLS Chicago construction employment) — single fetch + // per process-cache window. Run in parallel with the per-entity calls. + const macro = await fetchBlsConstructionTrend(); + const index_score = scoreProject(property, entities, macro); + + return { + permit_id: permit.permit_id || "", + property, + entities, + tickers, + macro, + index_score, + // Roadmap: now-shorter list as more sources go live. Items pending + // for batch 4-5: SEC Exhibit 21, debarment lists, USASpending, + // mechanics liens, NLRB+OSHA-Severe-Injury, contractor drill-down. + roadmap: [ + "SEC Exhibit 21 full index — walk every 10-K Exhibit 21 to build a complete subsidiary→parent tree (curated map covers top 12 GCs today)", + "SAM.gov v3 API — register at sam.gov for free key; OR human-mediated CSV download into local cache", + "IDOL prevailing-wage debarment list — HTML/PDF scrape from labor.illinois.gov", + "OSHA Severe Injury Reports — bot-protected at dol.gov; needs human-mediated CSV refresh OR browser-automation", + "Cook County Recorder mechanics liens — HTML scrape with PIN-based pagination", + "TIF polygon containment check — needs lat/long-in-polygon (turf.js) to confirm in_tif_district", + "Community area name table — static lookup from Chicago number → name", + "BLS QCEW + FRED commodity prices — macroeconomic context", + "PACER bankruptcy + civil litigation — federal court filings (free for opinion search)", + "Cook County Treasurer — property tax delinquency", + "NYC DOB + LA LADBS — cross-city contractor footprint", + "LLC-shuffle detector — principal+agent fingerprint matching (blocked on ILSOS access)", + ], + generated_at: new Date().toISOString(), + }; +} -- 2.47.2 From 31d8ef918c3eb912ecac64b093841d2b81185d17 Mon Sep 17 00:00:00 2001 From: root Date: Mon, 27 Apr 2026 21:44:59 -0500 Subject: [PATCH 05/43] =?UTF-8?q?demo:=20contractor=20links=20=E2=80=94=20?= =?UTF-8?q?respect=20the=20/lakehouse=20path=20prefix?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit J reported https://devop.live/contractor?name=3115%20W%20POLK%20ST.%20LLC returned 404. Cause: the anchor href was a bare /contractor, which on devop.live routes to the LLM Team UI (port 5000) at the main site root, not the lakehouse mcp-server (which lives under /lakehouse/*). Every page that renders a contractor link now uses the same prefix detector the dashboard already had: var P = location.pathname.indexOf('/lakehouse') >= 0 ? '/lakehouse' : ''; Files updated: - search.html: entity-brief anchor + preview anchor → P+/contractor - console.html: permit-card contractor list → P+/contractor - contractor.html: history.replaceState + back-link + the /intelligence/contractor_profile fetch all use P prefix. The page is reachable at /lakehouse/contractor on the public URL and bare /contractor on localhost; both work without further config. Verified: https://devop.live/lakehouse/contractor?name=3115%20W%20POLK%20ST.%20LLC → 200, 29.9 KB, full profile renders. Contractor has 1 permit on file (a small LLC), 1 geocoded so the heat map plots one marker. --- mcp-server/console.html | 2 +- mcp-server/contractor.html | 13 +++++++++++-- mcp-server/search.html | 4 ++-- 3 files changed, 14 insertions(+), 5 deletions(-) diff --git a/mcp-server/console.html b/mcp-server/console.html index 56ca178..d2316fd 100644 --- a/mcp-server/console.html +++ b/mcp-server/console.html @@ -319,7 +319,7 @@ function loadChapter4(){ seen.push(n); if(seen.length>1) contractors.appendChild(document.createTextNode(' · ')); var a=document.createElement('a'); - a.href='/contractor?name='+encodeURIComponent(n); + a.href=P+'/contractor?name='+encodeURIComponent(n); a.target='_blank'; a.rel='noopener'; a.style.cssText='color:#58a6ff;text-decoration:none;border-bottom:1px dotted #58a6ff44'; diff --git a/mcp-server/contractor.html b/mcp-server/contractor.html index 85ad7f8..fbf6a15 100644 --- a/mcp-server/contractor.html +++ b/mcp-server/contractor.html @@ -66,6 +66,12 @@ body{font-family:'Inter',-apple-system,system-ui,sans-serif;background:#090c10;c + diff --git a/mcp-server/search.html b/mcp-server/search.html index 4170189..70ba766 100644 --- a/mcp-server/search.html +++ b/mcp-server/search.html @@ -202,6 +202,7 @@ body{font-family:'Inter',-apple-system,system-ui,'Segoe UI',sans-serif;backgroun
+
+ +
@@ -192,6 +223,9 @@ window.addEventListener('load',function(){ loadChapter3(); loadChapter4(); loadChapter5(); + loadChapter6(); + loadChapter7(); + loadChapter8(); }); // ─── Chapter 1 ──────────────────────────────────────────── @@ -432,7 +466,186 @@ function loadChapter5(){ }); } -// ─── Chapter 6 ──────────────────────────────────────────── +// ─── Chapter 6 — per-staffer hot-swap ───────────────────── +function loadChapter6(){ + apiGet('/staffers').then(function(r){ + var host=document.getElementById('ch6-staffers');host.textContent=''; + var staffers=(r&&r.staffers)||[]; + if(!staffers.length){ + host.appendChild(el('div','err','No staffer roster — /staffers returned empty.')); + return; + } + var grid=document.createElement('div'); grid.className='grid'; grid.style.gridTemplateColumns='repeat(auto-fit,minmax(280px,1fr))'; + staffers.forEach(function(s){ + var card=el('div','card accent-b'); + var name=el('div',null,s.name); + name.style.cssText='font-size:18px;font-weight:700;color:#e6edf3;letter-spacing:-0.3px'; + card.appendChild(name); + var role=el('div',null,s.display||''); + role.style.cssText='font-size:11px;color:#545d68;text-transform:uppercase;letter-spacing:1.2px;margin-top:2px'; + card.appendChild(role); + var ter=el('div',null,'Territory: '+s.territory.state+' · '+s.territory.cities.slice(0,3).join(', ')+'…'); + ter.style.cssText='color:#8b949e;font-size:12px;margin-top:8px'; + card.appendChild(ter); + var greet=el('div',null,s.greeting||''); + greet.style.cssText='color:#c9d1d9;font-size:11px;margin-top:6px;line-height:1.5;border-top:1px dashed #1f2631;padding-top:6px'; + card.appendChild(greet); + grid.appendChild(card); + }); + host.appendChild(grid); + var narr=el('div','narr'); + narr.appendChild(el('strong',null,'What this means for a staffer. ')); + narr.appendChild(document.createTextNode('Same query — "forklift operators" — returns 89 Indiana workers when Devon is acting, 16 Wisconsin workers when Aisha is acting, 167 Illinois workers when Maria is acting. The MEMORY panel relabels itself with whoever\'s viewing. The corpus stays intact; the relevance gradient is per coordinator. As they each accumulate fills, their slice of the playbook compounds independently.')); + host.appendChild(narr); + }).catch(function(e){ + var h=document.getElementById('ch6-staffers');h.textContent='';h.appendChild(el('div','err','Staffer roster unavailable: '+(e.message||e))); + }); +} + +// ─── Chapter 7 — Construction Activity Signal Engine ────── +function loadChapter7(){ + Promise.all([ + api('/intelligence/profiler_index',{limit:200}), + ]).then(function(rs){ + var prof=rs[0]||{}; + var rows=prof.contractors||[]; + var host=document.getElementById('ch7-signal');host.textContent=''; + // Aggregate basket + var byTicker={}; + rows.forEach(function(r){ + var ts=(r.tickers&&r.tickers.direct?r.tickers.direct:[]).concat(r.tickers&&r.tickers.associated?r.tickers.associated:[]); + ts.forEach(function(t){ + if(!t||!t.ticker) return; + if(!byTicker[t.ticker]) byTicker[t.ticker]={ticker:t.ticker,count:0,kinds:new Set()}; + byTicker[t.ticker].count++; + byTicker[t.ticker].kinds.add(t.via); + }); + }); + var basket=Object.values(byTicker).sort(function(a,b){return b.count-a.count}); + var attribCost=0; + rows.forEach(function(r){ + var ts=(r.tickers&&r.tickers.direct?r.tickers.direct:[]).concat(r.tickers&&r.tickers.associated?r.tickers.associated:[]); + if(ts.length>0) attribCost += (r.total_cost||0); + }); + var totalAttrib = basket.reduce(function(s,b){return s+b.count},0); + if(!basket.length){ + host.appendChild(el('div','loading','No public-issuer attributions in this view yet.')); + return; + } + // Top-line metric strip + var grid=document.createElement('div');grid.className='grid'; + var c1=el('div','card accent-g'); + var b1=el('div',null,basket.length); b1.style.cssText='font-size:30px;font-weight:800;color:#3fb950;line-height:1'; + c1.appendChild(b1); + var l1=el('div',null,'Public issuers in scope'); l1.style.cssText='font-size:10px;color:#545d68;text-transform:uppercase;letter-spacing:1.2px;margin-top:8px;font-weight:600'; + c1.appendChild(l1); + var s1=el('div',null,totalAttrib+' attribution edges across the contractor graph'); s1.style.cssText='font-size:12px;color:#8b949e;margin-top:4px'; + c1.appendChild(s1); + grid.appendChild(c1); + var c2=el('div','card accent-b'); + var bav = attribCost>=1e9?'$'+(attribCost/1e9).toFixed(2)+'B':attribCost>=1e6?'$'+(attribCost/1e6).toFixed(0)+'M':'$'+Math.round(attribCost/1e3)+'K'; + var b2=el('div',null,bav); b2.style.cssText='font-size:30px;font-weight:800;color:#58a6ff;line-height:1'; + c2.appendChild(b2); + var l2=el('div',null,'Attributed build value'); l2.style.cssText='font-size:10px;color:#545d68;text-transform:uppercase;letter-spacing:1.2px;margin-top:8px;font-weight:600'; + c2.appendChild(l2); + var s2=el('div',null,'Permits with at least one wired public-issuer thread'); s2.style.cssText='font-size:12px;color:#8b949e;margin-top:4px'; + c2.appendChild(s2); + grid.appendChild(c2); + var c3=el('div','card accent-l'); + var b3=el('div',null,rows.length); b3.style.cssText='font-size:30px;font-weight:800;color:#bc8cff;line-height:1'; + c3.appendChild(b3); + var l3=el('div',null,'Contractors indexed'); l3.style.cssText='font-size:10px;color:#545d68;text-transform:uppercase;letter-spacing:1.2px;margin-top:8px;font-weight:600'; + c3.appendChild(l3); + var s3=el('div',null,'Each is also a heat map of where they work'); s3.style.cssText='font-size:12px;color:#8b949e;margin-top:4px'; + c3.appendChild(s3); + grid.appendChild(c3); + host.appendChild(grid); + // Top issuer table + var tHdr=document.createElement('div');tHdr.style.cssText='color:#545d68;font-size:11px;text-transform:uppercase;letter-spacing:1.4px;font-weight:600;margin:14px 0 8px'; + tHdr.textContent='Top public issuers attributable in this view'; + host.appendChild(tHdr); + basket.slice(0,8).forEach(function(b){ + var row=el('div','row'); + var left=document.createElement('div');left.style.flex='1';left.style.minWidth='0'; + var tk=el('div','title',b.ticker); + tk.style.cssText+='font-family:ui-monospace,monospace;color:#3fb950'; + left.appendChild(tk); + var kinds=Array.from(b.kinds); + var meta=el('div','meta',b.count+' attribution'+(b.count===1?'':'s')+' · '+kinds.join('+')); + left.appendChild(meta); + row.appendChild(left); + var right=document.createElement('div');right.style.cssText='font-size:11px;color:#58a6ff'; + var a=document.createElement('a');a.href=P+'/profiler';a.target='_blank';a.style.color='#58a6ff';a.style.textDecoration='none'; + a.textContent='see in profiler →'; + right.appendChild(a); + row.appendChild(right); + host.appendChild(row); + }); + var narr=el('div','narr'); + narr.appendChild(el('strong',null,'What this means for the business. ')); + narr.appendChild(document.createTextNode('The data corpus is also a market-signal engine. When a contractor co-files permits with a public company, that contractor inherits the ticker as an associated indicator. Permit volume changes precede earnings calls by months. As we add cities (NYC DOB next, then LA / Houston / Boston) the network compounds — and we own a piece of the signal that nobody else has.')); + host.appendChild(narr); + }).catch(function(e){ + var h=document.getElementById('ch7-signal');h.textContent='';h.appendChild(el('div','err','Signal engine unavailable: '+(e.message||e))); + }); +} + +// ─── Chapter 8 — Triage in one shot ─────────────────────── +function loadChapter8(){ + api('/intelligence/chat',{message:'Marcus running late site 4422'}).then(function(d){ + var host=document.getElementById('ch8-triage');host.textContent=''; + if(d.type!=='triage'){ + host.appendChild(el('div','err','Triage route did not fire. Got type=' + (d.type||'?'))); + return; + } + // Worker card + var wc=el('div','card accent-r'); + var lbl=el('div',null,'⚠ TRIAGE EVENT'); lbl.style.cssText='font-size:10px;color:#f85149;text-transform:uppercase;letter-spacing:1.2px;font-weight:700;margin-bottom:8px'; + wc.appendChild(lbl); + var nm=el('div',null,d.worker.name); nm.style.cssText='font-size:18px;font-weight:700;color:#e6edf3'; + wc.appendChild(nm); + var loc=el('div',null,(d.worker.role||'?')+' · '+(d.worker.city||'')+', '+(d.worker.state||'')); + loc.style.cssText='font-size:12px;color:#8b949e;margin-top:2px'; + wc.appendChild(loc); + var stats=document.createElement('div');stats.style.cssText='display:flex;gap:14px;font-size:11px;color:#8b949e;margin-top:8px;flex-wrap:wrap'; + [['Reliability',Math.round((d.worker.rel||0)*100)+'%'],['Responsiveness',Math.round((d.worker.resp||0)*100)+'%'],['Availability',Math.round((d.worker.avail||0)*100)+'%']].forEach(function(p){ + var s=document.createElement('span'); + var l=document.createElement('span');l.textContent=p[0]+': '; + var b=document.createElement('b');b.style.color='#e6edf3';b.textContent=p[1]; + s.appendChild(l);s.appendChild(b);stats.appendChild(s); + }); + wc.appendChild(stats); + host.appendChild(wc); + // Draft SMS + var smsLabel=el('div',null,'DRAFT SMS — TO CLIENT'); smsLabel.style.cssText='font-size:10px;color:#d29922;text-transform:uppercase;letter-spacing:1.2px;font-weight:700;margin:14px 0 4px'; + host.appendChild(smsLabel); + var smsBox=el('div',null,d.draft_sms||''); + smsBox.style.cssText='background:#0d1117;border:1px solid #21262d;border-radius:6px;padding:10px 12px;font-family:ui-monospace,monospace;font-size:12px;color:#e6edf3;line-height:1.5;white-space:pre-wrap'; + host.appendChild(smsBox); + // Backfills + if((d.backfills||[]).length){ + var bfHdr=document.createElement('div');bfHdr.style.cssText='font-size:11px;color:#3fb950;text-transform:uppercase;letter-spacing:1.2px;font-weight:600;margin:14px 0 8px'; + bfHdr.textContent='✓ '+d.backfills.length+' local '+(d.worker.role||'workers')+' available — sorted by responsiveness'; + host.appendChild(bfHdr); + d.backfills.slice(0,5).forEach(function(c){ + var row=el('div','row'); + var left=document.createElement('div');left.style.flex='1';left.style.minWidth='0'; + left.appendChild(el('div','title',c.name)); + left.appendChild(el('div','meta',(c.role||'?')+' · '+(c.city||'')+', '+(c.state||'')+' · rel '+Math.round((c.rel||0)*100)+'% · resp '+Math.round((c.resp||0)*100)+'%')); + row.appendChild(left); + host.appendChild(row); + }); + } + var narr=el('div','narr'); + narr.appendChild(el('strong',null,'What this means for a coordinator. ')); + narr.appendChild(document.createTextNode('A normal afternoon: text rolls in, coordinator opens 3 tabs to look up the worker, checks the bench by hand, drafts a message. 20 minutes. Here: the system pulled the profile, scored attendance, surfaced 5 same-role same-geo backfills sorted by who actually answers their phone, and pre-wrote the client-facing SMS. The coordinator clicks send. ' + d.duration_ms + 'ms.')); + host.appendChild(narr); + }).catch(function(e){ + var h=document.getElementById('ch8-triage');h.textContent='';h.appendChild(el('div','err','Triage demo unavailable: '+(e.message||e))); + }); +} + +// ─── Chapter 9 (was 6) — Try it yourself ────────────────── function runTry(){ var q=document.getElementById('try-q').value.trim();if(!q)return; var btn=document.getElementById('try-btn'),out=document.getElementById('try-out'); -- 2.47.2 From 631b0329b19b1b6335a174754e686eca7290c585 Mon Sep 17 00:00:00 2001 From: root Date: Mon, 27 Apr 2026 23:13:46 -0500 Subject: [PATCH 13/43] =?UTF-8?q?demo:=20proof=20=E2=80=94=20full=20archit?= =?UTF-8?q?ecture-page=20rewrite=20for=20current=20state?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit J: "needs a rewrite." Old version was anchored on a dual-agent mistral+qwen2.5 loop that hasn't been the model story for weeks, called the system 13 crates (it's 15), referenced "Local 7B models" in the honest-limits section, and had no mention of: - the 40-model OpenCode fleet via one sk-* key - the 9-rung cloud-first ladder - N=3 consensus + cross-architecture tie-breaker - auditor cross-lineage (Kimi K2.6 ↔ Haiku 4.5, Opus auto-promote) - distillation v1.0.0 frozen substrate (e7636f2) - pathway memory (88 traces, 11/11 replays, ADR-021) - per-staffer hot-swap index - Construction Activity Signal Engine + BAI + ticker network - the gateway as OpenAI-compat drop-in middleware Rewrote into 10 chapters: 1. Receipts — live tests + new live tile showing the Signal Engine view for THIS load (issuer count, attributed build value, contractor count, attribution edges) 2. Architecture — corrected to 15 crates with current responsibilities; ASCII diagram showing OpenAI consumers + MCP + Browser all hitting gateway /v1/*; provider fleet table with all 5 (ollama, ollama_cloud, openrouter, opencode 40-model, kimi); validator + truth + auditor crates added 3. Model fleet — REPLACED the dual-agent mistral story. Now: the 9-rung ladder (kimi-k2:1t through openrouter:free → ollama local), N=3 consensus + tie-breaker math, auditor Kimi↔Haiku alternation with Opus auto-promote on big diffs, distillation v1.0.0 freeze tag e7636f2 (145 tests · 22/22 · 16/16 · bit-identical) 4. Two memory layers — kept playbook content (Phase 19 boost math still load-bearing), added pathway memory (ADR-021) section with live counters in the page (88 / 11-11 / 100% reuse rate) 5. Per-staffer hot-swap — NEW. Pseudocode showing how staffer_id scopes state filter + playbook geo + UI relabel to MARIA'S MEMORY 6. Construction Activity Signal Engine — NEW. Three attribution flavors (direct, parent, associated), BAI math, cross-metro replication framing (NYC DOB next, then LA / Houston / Boston) 7. Architectural choices — added ADR-021 row + distillation freeze row 8. Measured at scale — kept (uses /proof.json scale data) 9. Verify or dispute — REFRESHED with current endpoints. Removed the stale "bun run tests/multi-agent/scenario.ts" recipe; added curl examples for /v1/health, pathway/stats, per-staffer scoping (3-loop bash script), late-worker triage, profiler_index, ticker_quotes, auditor verdicts, distillation acceptance gate 10. What we are NOT claiming — REFRESHED. Removed "Local 7B models" caveat; added: 12 awaiting public-data sources are placeholders, SEC name-fuzzy has rare false positives, BAI is a thesis not a backtest yet, single-metro today Live data probes added: loadPathwayLive — fills pwm-traces / pwm-replays / pwm-rate spans loadSignalLive — renders the LIVE Signal Engine tile under Ch1 Nav also gained a Profiler link to match search.html and console.html. Verified end-to-end on devop.live/lakehouse/proof: 10 chapters render, 5/5 live tests pass, pathway shows 88 traces + 100% reuse rate, live signal tile shows 11 issuers + $347M attributed + 200 contractors + 14 attribution edges. Architecture diagram + crate table accurate as of HEAD. --- mcp-server/proof.html | 425 +++++++++++++++++++++++++++++------------- 1 file changed, 297 insertions(+), 128 deletions(-) diff --git a/mcp-server/proof.html b/mcp-server/proof.html index ceb428a..deeca7f 100644 --- a/mcp-server/proof.html +++ b/mcp-server/proof.html @@ -81,6 +81,7 @@ pre{background:#161b22;border:1px solid #171d27;border-radius:8px;padding:14px 1