From 1ac8045924113cd8658d50da3b7dd3b5515ba4a9 Mon Sep 17 00:00:00 2001 From: root Date: Mon, 27 Apr 2026 21:28:45 -0500 Subject: [PATCH] =?UTF-8?q?demo:=20contractor=20profile=20=E2=80=94=20heat?= =?UTF-8?q?=20map,=20project=20index,=2012=20awaiting=20sources?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The contractor.html click-target J asked for: a separate page (not a modal, not a fall-through search) showing every angle on a contractor. Reachable from the Co-Pilot dashboard, the staffers console, and the search box — all anchor-wrap contractor names to /contractor?name=... What's new on the page: 1. PROJECT INDEX — build-signal score Single 0-100 number with the drivers laid out beneath. Driver list is staffer-readable: "59 Chicago permits in 180d (+30) · OSHA 20 inspections (-25) · federal contractor (+15)". Score weights are placeholders to be replaced by an ML model once the 12 awaiting sources ship — the current 6 wired signals would not give a real model enough features. 2. HEAT MAP — every Chicago permit they've been contact_1 or contact_2 on, last 24 months, plotted on a leaflet dark map. Color by cost (green <$100K, amber $100K-$1M, red ≥$1M), radius proportional to cost so the staffer sees where money + activity concentrates. Click a marker for permit detail (cost, date, work type, address, permit ID). All 50 of Turner Construction's geocoded recent permits in Chicago plot end-to-end. 3. ACTIVITY TIMELINE — monthly permit count, bar chart, with the first/last month labels so the staffer sees momentum. Tooltip on each bar gives the count and total cost for that month. 4. 12 AWAITING SOURCES — placeholder cards for the public datasets that would 3× the build-signal feature count. Each card has: - source name (real, e.g. DOL Wage & Hour, EPA ECHO, MSHA, BBB) - one-liner in coordinator language ("Has this contractor stiffed workers? Will they pay our staffing invoices?") - "Would show:" sample shape so the engineering scope is concrete Order is staffing-decision relevance: 1. DOL Wage & Hour (WHD violations) 2. State Licensure Boards (active license + expiry) 3. Surety Bond Capacity (bonding ceiling) 4. EPA ECHO Compliance (env violations at sites) 5. DOT/FMCSA Carrier Safety (crash + OOS rates) 6. BBB Complaints + Rating 7. PACER Civil Suits (FLSA / Title VII / ADA) 8. UCC Lien Filings (cash flow distress) 9. D&B / Credit Bureau (PAYDEX, payment behavior) 10. State UI Employer Claims (workforce stability) 11. MSHA Mine Safety (excavation / aggregate / heavy) 12. Registered Apprenticeships (DOL RAPIDS pipeline) Server-side: entity.ts fetchContractorHistory now pulls the 50 most recent permits with id + lat/lng + work_description, so the heat map and timeline have what they need without a second SQL hop. The ContractorHistory.recent_permits type gained the optional fields. Front-end: contractor.html got 4 new render sections, leaflet wiring (stylesheet + script in head), placeholder grid CSS, and a PLACEHOLDERS const at the bottom with the 12 sources. All popup HTML is built via DOM construction (textContent + appendChild) — no innerHTML, no XSS. console.html: contractor names from /intelligence/permit_contracts now anchor-wrapped to /contractor?name=... so the click-through J described works from the staffers console too. Click stops propagation so the permit details element doesn't toggle on the same click. Verified end-to-end via playwright — Turner Construction profile shows: PIX score "Mixed signals — review drivers below" Heat map: "50 permits plotted · green/amber/red" 4 section labels in order 12 placeholder cards in the documented order --- mcp-server/console.html | 24 + mcp-server/contractor.html | 597 ++++++++ mcp-server/entity.ts | 2781 ++++++++++++++++++++++++++++++++++++ 3 files changed, 3402 insertions(+) create mode 100644 mcp-server/contractor.html create mode 100644 mcp-server/entity.ts diff --git a/mcp-server/console.html b/mcp-server/console.html index eada43c..56ca178 100644 --- a/mcp-server/console.html +++ b/mcp-server/console.html @@ -306,6 +306,30 @@ function loadChapter4(){ addr.style.cssText='color:#8b949e;font-size:12px;margin-top:2px'; card.appendChild(addr); + // Contractor names link to the full /contractor profile page — + // heat map, project index, history, 12 awaiting public-data + // sources. The staffer click-through J asked for. + if(p.contact_1_name || p.contact_2_name){ + var contractors=document.createElement('div'); + contractors.style.cssText='color:#8b949e;font-size:12px;margin-top:4px'; + contractors.appendChild(document.createTextNode('Contractors: ')); + var seen=[]; + [p.contact_1_name, p.contact_2_name].forEach(function(n,i){ + if(!n || seen.indexOf(n)>=0) return; + seen.push(n); + if(seen.length>1) contractors.appendChild(document.createTextNode(' · ')); + var a=document.createElement('a'); + a.href='/contractor?name='+encodeURIComponent(n); + a.target='_blank'; + a.rel='noopener'; + a.style.cssText='color:#58a6ff;text-decoration:none;border-bottom:1px dotted #58a6ff44'; + a.title='Open full contractor profile'; + a.textContent=n; + contractors.appendChild(a); + }); + card.appendChild(contractors); + } + card.appendChild(el('div','step-label','STEP 1 · Derive staffing need')); var s1=el('div','step-body'); s1.appendChild(document.createTextNode('Industry heuristic: ~1 worker per $150K of permit cost, capped 2-8. Resulting contract: ')); diff --git a/mcp-server/contractor.html b/mcp-server/contractor.html new file mode 100644 index 0000000..85ad7f8 --- /dev/null +++ b/mcp-server/contractor.html @@ -0,0 +1,597 @@ + + + +Contractor Profile · Staffing Co-Pilot + + + + +
+

Staffing Co-Pilot · Contractor Profile

+ ← Dashboard +
+
+ +
Type a name above to load the full portfolio across every wired data source.
+
+ + diff --git a/mcp-server/entity.ts b/mcp-server/entity.ts new file mode 100644 index 0000000..f1b45cb --- /dev/null +++ b/mcp-server/entity.ts @@ -0,0 +1,2781 @@ +// Project Index — per-Chicago-permit public-data signal portfolio. +// (Originally called "ETF" until we realized that's a SEC-regulated term; +// this is NOT an investment product. It's a custom index of Chicago +// building-related signals.) +// +// Pulls violation & corp-registry signals keyed to the contractor/applicant +// names extracted from public building permits. The point is to surface +// the kinds of signals a staffer or investor would want BEFORE committing +// workers or capital: OSHA history, corporate-registry status, federal +// contracts, parent-company chain, building violations, and (eventually) +// LLC-shuffle detection where a firm with a bad record dissolves and +// reforms under a new name. +// +// Sources right now: +// - OSHA Enforcement search (www.osha.gov/ords/imis/establishment.search) +// — HTML only, scraped. Reachable from our ASN (verified). +// - Illinois Secretary of State — BLOCKED from our datacenter ASN. +// We return a structured placeholder with a honest "source_unreachable" +// flag so the UI can show "awaiting source" rather than fake data. +// - OpenCorporates — requires API token even for free tier (not self-serve). +// +// Caching: JSONL at data/_entity_cache/entities.jsonl. Keyed by +// normalized company name. 30-day TTL because entity status and +// violation history don't change fast. +// +// Politeness: OSHA gets max 1 req/sec (with jitter) and a descriptive UA. + +import { readFile, writeFile, mkdir } from "node:fs/promises"; +import { existsSync } from "node:fs"; +import { join } from "node:path"; +import { findTifDistrict } from "./tif_polygons.js"; + +const CACHE_DIR = "/home/profit/lakehouse/data/_entity_cache"; +const CACHE_FILE = join(CACHE_DIR, "entities.jsonl"); +const DEFAULT_TTL_MS = 30 * 24 * 60 * 60 * 1000; +const UA = "lakehouse-staffing-copilot/1.0 (contact: ops@devop.live; public-permit-enrichment)"; + +const OSHA_MIN_GAP_MS = 1200; +let lastOshaAt = 0; +async function oshaGate() { + const gap = Date.now() - lastOshaAt; + if (gap < OSHA_MIN_GAP_MS) { + await new Promise((r) => setTimeout(r, OSHA_MIN_GAP_MS - gap + Math.random() * 200)); + } + lastOshaAt = Date.now(); +} + +// ─── Types ──────────────────────────────────────────────────────── + +export type OshaInspection = { + id: string; + date: string; + state: string; + type: string; + scope: string; + naics: string; + establishment: string; + detail_url: string; +}; + +export type OshaBrief = { + source: "osha"; + fetched_at: string; + searched_name: string; + normalized_name: string; + source_url: string; + status: "ok" | "no_match" | "error"; + inspection_count: number; + most_recent_date: string | null; + recent_inspections: OshaInspection[]; + states_seen: string[]; + error?: string; +}; + +export type IlsosBrief = { + source: "ilsos"; + fetched_at: string; + searched_name: string; + status: "source_unreachable" | "ok" | "no_match" | "error"; + reason?: string; + entity_name?: string; + file_number?: string; + status_text?: string; + formation_date?: string; + registered_agent?: string; + principal_address?: string; + officers?: string[]; + error?: string; +}; + +// Ticker brief — SEC EDGAR (name → CIK → ticker) + Stooq (live price). +// Both are free & no-auth. SEC is the authoritative name→ticker source +// (company_tickers.json covers 10K+ US-listed issuers); Stooq layers +// current price on top. We intentionally skip Yahoo Finance — its free +// endpoint has been auth-walled since 2024. +// +// "Market cap proxy" is price × volume rather than shares_outstanding × +// price because shares_outstanding requires scraping an SEC filing for +// each issuer. Good enough for ranking "most profitable related company" +// until we wire XBRL facts in pass 2. +export type TickerBrief = { + source: "sec+stooq"; + fetched_at: string; + searched_name: string; + status: "ok" | "no_match" | "error"; + ticker?: string; + company_name?: string; + cik?: string; + exchange?: string; + sic?: string; + sic_description?: string; + // Live quote fields (may be absent on holidays/weekends) + price?: number; + price_date?: string; + volume?: number; + open?: number; + high?: number; + low?: number; + day_change_pct?: number; + // Ranking hint — price × volume, used to sort tickers in the portfolio. + cap_proxy?: number; + sec_url?: string; + stooq_url?: string; + error?: string; +}; + +// Property owner brief — Cook County Assessor parcels dataset (c49d-89sn). +// Lookup by property address → PIN → owner mailing address. The mailing +// address is the key signal: it tells you who the assessor mails the +// tax bill to. PO Box in Minneapolis MN ~= TARGET CORP HQ (verified +// 1101 W Jackson Blvd → PO Box 9456 Minneapolis 55440 = Target). +// Free, no auth, Socrata. +export type PropertyOwnerBrief = { + source: "cook_county_assessor"; + fetched_at: string; + searched_address: string; + status: "ok" | "no_match" | "error"; + pin?: string; + property_address?: string; + property_zip?: string; + // Mailing addr is the smoking gun for ownership inference. + mailing_address?: string; + mailing_city?: string; + mailing_state?: string; + mailing_zip?: string; + township?: string; + ward?: string; + longitude?: string; + latitude?: string; + source_url?: string; + error?: string; +}; + +// Building violations on the property. Stop-work orders, code violations, +// failed inspections — direct "is this build behind schedule" signals. +// Dataset 22u3-xenr. +export type PropertyViolationsBrief = { + source: "chicago_building_violations"; + fetched_at: string; + searched_address: string; + status: "ok" | "error"; + total_violations: number; + open_violations: number; + stop_work_orders: number; + most_recent_date?: string; + recent_violations: Array<{ + date: string; + status: string; + description: string; + department: string; + }>; + error?: string; +}; + +// Debarment / Excluded Parties — federal SAM.gov + Illinois Dept of Labor. +// Both are "free in spirit" but blocked behind some friction: +// - SAM.gov v3 API requires registered API key (free at sam.gov but +// requires email + entity registration; can't be self-served from +// our server without a one-time human signup). +// - IDOL prevailing-wage debarment list is published as HTML/PDF on +// labor.illinois.gov, no API — needs a scraper + periodic refresh. +// Returning structured "needs_setup" placeholder so the UI can show +// the path forward rather than fake clean. +export type DebarmentBrief = { + source: "sam_gov+idol"; + fetched_at: string; + searched_name: string; + status: "ok" | "no_match" | "needs_setup"; + reason?: string; + sam_excluded?: boolean; + idol_debarred?: boolean; + excluded_until?: string; + agency?: string; +}; + +export async function fetchDebarmentBrief(name: string): Promise { + const now = new Date().toISOString(); + const cached = await cacheGet(normalizeEntityName(name), "debarment"); + if (cached) return cached; + const brief: DebarmentBrief = { + source: "sam_gov+idol", + fetched_at: now, + searched_name: name, + status: "needs_setup", + reason: + "SAM.gov v3 needs registered API key (free signup at sam.gov). IDOL prevailing-wage debarment list is HTML-only at labor.illinois.gov — needs scraper + periodic refresh.", + }; + return brief; +} + +// SEC EDGAR 10-K Exhibit 21 — parent/subsidiary tree. The "private GC → +// public parent ticker" chain. Each public company files Exhibit 21 of +// their 10-K listing all subsidiaries. We resolve by: +// 1. Fetch SEC company submissions for a known parent ticker +// 2. Find the most recent 10-K filing +// 3. Fetch the Exhibit 21 attachment HTML +// 4. Parse subsidiary names +// +// Reverse direction (subsidiary name → parent) requires either: +// (a) Pre-build an index of "subsidiary → parent" by walking every +// 10-K Exhibit 21 in our SEC index (~10K filings, large) +// (b) Use a known mapping for high-volume parents (Turner→ACS, +// Skanska→SKA-B.ST, Balfour Beatty→BBY.L, etc) +// (b) is pragmatic for v1; (a) is the proper long-term answer. +export type ParentLinkBrief = { + source: "sec_exhibit21+wikidata"; + fetched_at: string; + searched_name: string; + status: "ok" | "no_link" | "needs_index"; + reason?: string; + parent_name?: string; + parent_ticker?: string; + parent_exchange?: string; + parent_country?: string; + link_source?: string; +}; + +// Hand-curated map of well-known private contractor → public parent. +// The cleanest seed of the index — covers the dominant Chicago GCs. +// Each entry includes the source citation so a human can verify. +const KNOWN_PARENT_MAP: Record = { + "TURNER CONSTRUCTION": { + source: "sec_exhibit21+wikidata", + fetched_at: "", + searched_name: "TURNER CONSTRUCTION", + status: "ok", + parent_name: "Hochtief AG (subsidiary of ACS Group)", + parent_ticker: "HOC.DE", + parent_exchange: "Frankfurt (also ACS:MC.MC Madrid)", + parent_country: "DE/ES", + link_source: "Hochtief 2024 Annual Report — Turner is wholly-owned via Hochtief Americas", + }, + "SKANSKA USA": { + source: "sec_exhibit21+wikidata", + fetched_at: "", + searched_name: "SKANSKA USA", + status: "ok", + parent_name: "Skanska AB", + parent_ticker: "SKA-B.ST", + parent_exchange: "Stockholm", + parent_country: "SE", + link_source: "Skanska 2024 Annual Report Exhibit", + }, + "WALSH GROUP": { + source: "sec_exhibit21+wikidata", + fetched_at: "", + searched_name: "WALSH GROUP", + status: "no_link", + reason: "Walsh Group is private (Chicago — Walsh family-owned).", + }, + "POWER CONSTRUCTION": { + source: "sec_exhibit21+wikidata", + fetched_at: "", + searched_name: "POWER CONSTRUCTION", + status: "no_link", + reason: "Power Construction Co is private (Schaumburg, IL — family-owned).", + }, + "PEPPER CONSTRUCTION": { + source: "sec_exhibit21+wikidata", + fetched_at: "", + searched_name: "PEPPER CONSTRUCTION", + status: "no_link", + reason: "Pepper Construction Group is private (Barrington, IL — Pepper family).", + }, + "CLAYCO": { + source: "sec_exhibit21+wikidata", + fetched_at: "", + searched_name: "CLAYCO", + status: "no_link", + reason: "Clayco is private (Chicago — Bob Clark family).", + }, + "MCHUGH CONSTRUCTION": { + source: "sec_exhibit21+wikidata", + fetched_at: "", + searched_name: "MCHUGH CONSTRUCTION", + status: "no_link", + reason: "James McHugh Construction is private.", + }, + "BULLEY ANDREWS": { + source: "sec_exhibit21+wikidata", + fetched_at: "", + searched_name: "BULLEY ANDREWS", + status: "no_link", + reason: "Bulley & Andrews is private (Chicago, family-owned since 1891).", + }, + "LEND LEASE": { + source: "sec_exhibit21+wikidata", + fetched_at: "", + searched_name: "LEND LEASE", + status: "ok", + parent_name: "Lendlease Group", + parent_ticker: "LLC.AX", + parent_exchange: "Sydney (ASX)", + parent_country: "AU", + link_source: "Lendlease Annual Report — global construction division", + }, + "GILBANE": { + source: "sec_exhibit21+wikidata", + fetched_at: "", + searched_name: "GILBANE", + status: "no_link", + reason: "Gilbane is private (Providence RI — Gilbane family, 6th gen).", + }, + "WHITING TURNER": { + source: "sec_exhibit21+wikidata", + fetched_at: "", + searched_name: "WHITING TURNER", + status: "no_link", + reason: "Whiting-Turner is private (Baltimore MD).", + }, + "STO BUILDING GROUP": { + source: "sec_exhibit21+wikidata", + fetched_at: "", + searched_name: "STO BUILDING GROUP", + status: "no_link", + reason: "STO Building Group is private (Structure Tone family of firms).", + }, +}; + +export async function fetchParentLink(name: string): Promise { + const now = new Date().toISOString(); + const cached = await cacheGet(normalizeEntityName(name), "parent" as any); + if (cached) return cached; + // Match against curated map first. + const upper = name.toUpperCase(); + for (const [key, val] of Object.entries(KNOWN_PARENT_MAP)) { + if (upper.includes(key)) { + return { ...val, fetched_at: now, searched_name: name }; + } + } + return { + source: "sec_exhibit21+wikidata", + fetched_at: now, + searched_name: name, + status: "needs_index", + reason: + "Not in curated parent map. Full SEC Exhibit 21 walk would build a complete subsidiary→parent index — queued for batch 5.", + }; +} + +// Site Context — non-contractor signals tied to the property's location. +// TIF status (public subsidy), landmark (preservation = longer schedule), +// community area + median income, lat/long. Pulls from Chicago Open Data. +export type SiteContextBrief = { + source: "chicago_socrata"; + fetched_at: string; + searched_address: string; + status: "ok" | "no_match" | "error"; + in_tif_district?: boolean; + tif_district_name?: string; + is_landmark?: boolean; + landmark_name?: string; + community_area?: string; + community_area_number?: string; + ward?: string; + longitude?: string; + latitude?: string; + // Transit access: distance to nearest CTA L station (meters). + // Affects worker access — sub-1500m means crews can ride transit + // instead of needing parking/shuttle. + nearest_cta_station?: string; + nearest_cta_lines?: string; + nearest_cta_distance_m?: number; + // Permits within 800m issued in last 90d. High count = active labor + // competition for the same crew pool. + nearby_permits_90d?: number; + nearby_permits_value_90d?: number; + error?: string; +}; + +// MBE/WBE/DBE diversity certification — Chicago Dept of Procurement. +// Note: as of 2026-04, the catalog-listed datasets (2iq3-bugw, 69yt-tb5j, +// ci93-uc8s, etc.) all return 404 on the resource endpoint despite +// appearing in the catalog search. Likely archived or auth-gated. +// Returning needs_setup until alternate path identified. +export type DiversityCertBrief = { + source: "chicago_dps_economic_inclusion"; + fetched_at: string; + searched_name: string; + status: "ok" | "no_match" | "error" | "needs_setup"; + reason?: string; + certifications: Array<{ category: string; expiration?: string; type?: string }>; + source_url?: string; + error?: string; +}; + +// News mentions — Google News RSS (free, no auth). Returns recent +// headlines mentioning the contractor name. Quick reputation signal. +export type NewsBrief = { + source: "google_news_rss"; + fetched_at: string; + searched_name: string; + status: "ok" | "no_match" | "error"; + total_mentions: number; + recent_headlines: Array<{ title: string; source: string; date: string; url: string }>; + error?: string; +}; + +// BLS construction employment — macro context for ALL Chicago permits. +// Series SMU17169802000000001 = Construction, all employees, Chicago- +// Naperville-Elgin MSA, NSA, monthly. Free public API, no auth. +// Cached process-wide (single request per minutes-window) since this +// is macroeconomic context — same number applies to every permit shown. +export type BlsTrendBrief = { + source: "bls.gov"; + fetched_at: string; + series_id: string; + status: "ok" | "error"; + latest: { period: string; value: number } | null; + yoy_change_pct: number | null; + mom_change_pct: number | null; + recent: Array<{ period: string; value: number }>; // last 6 months + trend: "growing" | "stable" | "declining" | "unknown"; + error?: string; +}; + +// News sentiment — basic positive/negative keyword scoring layered over +// the existing Google News RSS results. Not LLM-grade but catches the +// obvious "lawsuit / fraud / fired / OSHA / debt" signals vs "awarded / +// expansion / contract win" signals. Output is a -1..+1 score with +// counts of positive/negative-flagged headlines. +export type NewsSentiment = { + score: number; // -1 to +1 + positive: number; + negative: number; + neutral: number; + flagged_headlines: Array<{ title: string; polarity: "pos" | "neg"; reasons: string[] }>; +}; + +// OSHA Severe Violator Enforcement Program (SVEP) — explicit named +// list of contractors placed in heightened federal/state OSHA +// enforcement after a willful or repeat-violation event. Highest- +// signal "this firm has been formally flagged as a worst actor" check. +// +// Data refreshed quarterly from osha.gov/enforcement/svep XLSX +// (Federal + State logs). Pre-parsed to JSON at +// /data/_entity_cache/svep_log.json. ~1,044 entries (990 federal + 54 state). +// Manual refresh: re-download Public_Federal_SVEP_Tracking_Log.xlsx + +// Public_State_SVEP_Log.xlsx, run the Python parser, update JSON. +export type SvepBrief = { + source: "osha.gov/enforcement/svep"; + fetched_at: string; + searched_name: string; + status: "ok" | "no_match" | "error"; + flagged: boolean; + matched_entries: Array<{ name: string }>; + log_size: number; + log_age_days: number; +}; + +let _svepIdx: Array<{ name: string; normalized: string }> | null = null; +let _svepMeta: { fetched_at: string; size: number } | null = null; + +async function ensureSvepLoaded() { + if (_svepIdx) return; + const path = "/home/profit/lakehouse/data/_entity_cache/svep_log.json"; + if (!existsSync(path)) { + _svepIdx = []; + _svepMeta = { fetched_at: "", size: 0 }; + return; + } + try { + const raw = JSON.parse(await readFile(path, "utf-8")); + _svepIdx = raw.companies || []; + _svepMeta = { fetched_at: raw.fetched_at || "unknown", size: _svepIdx?.length || 0 }; + } catch { + _svepIdx = []; + _svepMeta = { fetched_at: "", size: 0 }; + } +} + +export async function fetchSvepBrief(name: string): Promise { + const now = new Date().toISOString(); + await ensureSvepLoaded(); + const idx = _svepIdx || []; + const meta = _svepMeta || { fetched_at: "", size: 0 }; + // SVEP entries can have multiple aliases joined by "/". Match any token + // overlap with our query's identifying-words set. + const queryNorm = normalizeEntityName(name); + if (!queryNorm) { + return { + source: "osha.gov/enforcement/svep", + fetched_at: now, + searched_name: name, + status: "no_match", + flagged: false, + matched_entries: [], + log_size: meta.size, + log_age_days: 0, + }; + } + const queryWords = new Set( + queryNorm.split(" ").filter((w) => w.length >= 4), + ); + const matches: Array<{ name: string }> = []; + for (const e of idx) { + // SVEP names use "/" to separate aliases. Check each alias. + const aliases = e.name.split("/").map((a) => a.trim()).filter(Boolean); + for (const a of aliases) { + const aNorm = normalizeEntityName(a); + if (!aNorm) continue; + // Direct exact normalized match + if (aNorm === queryNorm) { + matches.push({ name: a }); + break; + } + // Word-overlap: at least 2 shared words ≥ 4 chars + const aWords = aNorm.split(" ").filter((w) => w.length >= 4); + const overlap = aWords.filter((w) => queryWords.has(w)).length; + if (overlap >= 2) { + matches.push({ name: a }); + break; + } + } + } + let logAge = 0; + if (meta.fetched_at) { + const t = Date.parse(meta.fetched_at); + if (!isNaN(t)) logAge = Math.round((Date.now() - t) / 86400000); + } + return { + source: "osha.gov/enforcement/svep", + fetched_at: now, + searched_name: name, + status: matches.length > 0 ? "ok" : "no_match", + flagged: matches.length > 0, + matched_entries: matches.slice(0, 5), + log_size: meta.size, + log_age_days: logAge, + }; +} + +// OSHA Severe Injury Reports — placeholder, bot-protected. +export type OshaSirBrief = { + source: "osha.gov/severeinjury"; + fetched_at: string; + searched_name: string; + status: "needs_setup"; + reason: string; +}; + +// Cook County Recorder mechanics liens — placeholder, scrape research needed. +export type LiensBrief = { + source: "cook_county_recorder"; + fetched_at: string; + searched_address: string; + status: "needs_setup"; + reason: string; +}; + +// USASpending.gov federal contracts. Free API, no auth. +// api.usaspending.gov/api/v2/search/spending_by_award/ +// Returns total awarded $, awarding agency, NAICS, subaward count. +// Strong signal: a Chicago contractor with $50M of federal work has +// scale + compliance posture. Quiet contractors are private/local. +export type FederalContractsBrief = { + source: "usaspending.gov"; + fetched_at: string; + searched_name: string; + status: "ok" | "no_match" | "error"; + total_awards_count: number; + total_awards_value: number; + most_recent_award_date?: string; + top_agencies: Array<{ agency: string; value: number }>; + source_url?: string; + error?: string; +}; + +// NLRB cases — union actions / unfair labor practice charges. +// Free public search at apps.nlrb.gov/case but the JSON endpoint +// is undocumented. Need short scraper. For now placeholder. +export type NlrbBrief = { + source: "nlrb.gov"; + fetched_at: string; + searched_name: string; + status: "ok" | "no_match" | "needs_setup"; + reason?: string; + total_cases?: number; + recent_cases?: Array<{ case_number: string; case_type: string; date: string; status: string }>; +}; + +// Union locals likely to handle each trade. Static lookup — no API. +// Static is fine because Chicago union jurisdictions don't change often, +// and the granularity here is "show staffers what unions are involved." +// We don't claim certainty per project; we surface the typical mapping. +export type UnionMapping = { + trade: string; + primary_locals: Array<{ name: string; local: string; jurisdiction: string }>; + // Training centers run by the JATCs (joint apprenticeship) + training_centers: Array<{ name: string; address: string; program_length: string }>; +}; + +// Contractor history over the Chicago permit dataset. Cheap signal: +// how active has this contractor been historically, and what's their +// trajectory in the last 24 months? Helpful for spotting "suddenly +// active" (possible shell LLC) or "long-tenured + now declining" +// (losing market share). +export type ContractorHistory = { + source: "chicago_socrata"; + fetched_at: string; + searched_name: string; + status: "ok" | "error"; + permits_last_180d: number; + permits_last_24mo: number; + permits_historical_total: number; + total_cost_last_24mo: number; + trend: "growing" | "stable" | "declining" | "new" | "unknown"; + recent_permits: Array<{ date: string; work_type: string; cost: number; address: string; lat?: number; lng?: number; permit_id?: string; description?: string }>; + error?: string; +}; + +export type EntityBrief = { + key: string; + display_name: string; + role: string; + ticker: string; + osha: OshaBrief | null; + ilsos: IlsosBrief | null; + stock: TickerBrief | null; + history: ContractorHistory | null; + debarment: DebarmentBrief | null; + parent_link: ParentLinkBrief | null; + federal: FederalContractsBrief | null; + nlrb: NlrbBrief | null; + diversity: DiversityCertBrief | null; + news: NewsBrief | null; + news_sentiment: NewsSentiment | null; + osha_sir: OshaSirBrief | null; + svep: SvepBrief | null; + risk: { + score: number | null; + factors: string[]; + partial: boolean; + }; +}; + +// Project Index Score — auditable weighted aggregation of every signal +// we pulled. Each contribution carries its weight, direction (good/bad), +// raw value, and a human-readable note so staffers can see exactly why +// the project landed where it did. Score ranges 0-100 anchored at 50 +// neutral. Bands: red (<30), amber (30-45), neutral (45-55), green +// (55-75), strong (75+). +export type SignalContribution = { + signal: string; // e.g. "osha_recent_inspections" + weight: number; // 1-10 importance + direction: -1 | 0 | 1; // -1 bad, 0 neutral, +1 good + raw: any; // underlying value + contribution: number; // signed points moved away from 50 + note: string; // explanation +}; +export type ProjectIndexScore = { + score: number; + band: "red" | "amber" | "neutral" | "green" | "strong"; + contributions: SignalContribution[]; + partial: boolean; // true when ILSOS / SAM / NLRB / other key sources unavailable +}; + +export type PermitEntityBrief = { + permit_id: string; + property: { + address: string; + ticker: string; + owner: PropertyOwnerBrief | null; + violations: PropertyViolationsBrief | null; + union: UnionMapping | null; + site_context: SiteContextBrief | null; + liens: LiensBrief | null; + }; + entities: EntityBrief[]; + tickers: TickerBrief[]; + // Macroeconomic context — same for all permits served in same window. + // Tells the staffer "the broader Chicago construction labor market + // is shrinking" or "growing" — context for the per-permit decision. + macro: BlsTrendBrief | null; + // Aggregate score across all entities + property signals + index_score: ProjectIndexScore; + roadmap: string[]; + generated_at: string; +}; + +// ─── Cache ──────────────────────────────────────────────────────── + +type CacheKind = + | "osha" | "ilsos" | "ticker" | "history" | "owner" | "violations" + | "debarment" | "parent" | "federal" | "nlrb" + | "site_context" | "diversity" | "news" | "osha_sir" | "liens"; +type CacheRow = { + key: string; + kind: CacheKind; + data: any; + expires_at: number; +}; + +let cacheMap: Map | null = null; + +async function ensureCacheLoaded(): Promise> { + if (cacheMap) return cacheMap; + cacheMap = new Map(); + if (!existsSync(CACHE_DIR)) { + await mkdir(CACHE_DIR, { recursive: true }); + } + if (!existsSync(CACHE_FILE)) return cacheMap; + try { + const text = await readFile(CACHE_FILE, "utf-8"); + for (const line of text.split("\n")) { + if (!line.trim()) continue; + try { + const row = JSON.parse(line) as CacheRow; + const mk = `${row.kind}:${row.key}`; + const prev = cacheMap.get(mk); + if (!prev || prev.expires_at < row.expires_at) cacheMap.set(mk, row); + } catch { + /* skip malformed row */ + } + } + } catch (e) { + console.warn("[entity] cache load failed:", (e as Error).message); + } + return cacheMap; +} + +async function cacheGet(key: string, kind: CacheKind): Promise { + const m = await ensureCacheLoaded(); + const row = m.get(`${kind}:${key}`); + if (!row) return null; + if (row.expires_at < Date.now()) return null; + return row.data; +} + +async function cacheSet( + key: string, + kind: CacheKind, + data: any, + ttl = DEFAULT_TTL_MS, +) { + const m = await ensureCacheLoaded(); + const row: CacheRow = { key, kind, data, expires_at: Date.now() + ttl }; + m.set(`${kind}:${row.key}`, row); + try { + await writeFile(CACHE_FILE, JSON.stringify(row) + "\n", { flag: "a" }); + } catch (e) { + console.warn("[entity] cache write failed:", (e as Error).message); + } +} + +// ─── Name normalization ───────────────────────────────────────────── +// Collapses "ADT,LLC" / "ADT LLC" / "ADT, L.L.C." to a single +// canonical key so cache hits don't miss on punctuation drift. + +export function normalizeEntityName(name: string): string { + return (name || "") + .toUpperCase() + .replace(/[,.\-]/g, " ") + .replace(/\b(THE|AND)\b/g, "") + .replace(/\b(L\s*L\s*C|L\s*L\s*P|L\s*P|INC|CO|CORP|COMPANY|CORPORATION|LTD|LIMITED|GROUP|HOLDINGS|ENTERPRISES)\b/g, "") + .replace(/[^\w ]/g, " ") + .replace(/\s+/g, " ") + .trim(); +} + +export function entityTicker(name: string): string { + const n = normalizeEntityName(name); + const parts = n.split(" ").filter(Boolean).slice(0, 3); + const short = parts.map((p) => p.slice(0, 6)).join("-").slice(0, 18); + return `LLC·${short || "UNKNOWN"}`; +} + +function addressTicker(address: string): string { + const digits = (address.match(/\d+/g) || []).join(""); + const firstWord = (address.split(/\s+/).find((w) => /^[A-Za-z]/.test(w)) || "BLDG") + .toUpperCase() + .slice(0, 8); + return `BLDG·${firstWord}-${digits.slice(0, 5) || "0000"}`; +} + +// ─── OSHA scraper ────────────────────────────────────────────────── + +async function fetchOshaHTML(name: string): Promise { + await oshaGate(); + const u = new URL("https://www.osha.gov/ords/imis/establishment.search"); + u.searchParams.set("p_logger", "1"); + u.searchParams.set("establishment", name); + u.searchParams.set("State", "all"); + u.searchParams.set("p_case", "all"); + const res = await fetch(u.toString(), { + headers: { "User-Agent": UA }, + signal: AbortSignal.timeout(15000), + }); + if (!res.ok) throw new Error(`osha HTTP ${res.status}`); + return await res.text(); +} + +// Parse inspection list rows. Uses matchAll so we don't rely on +// stateful regex iteration. +function parseOshaInspections(html: string): OshaInspection[] { + const inspections: OshaInspection[] = []; + const rows = [...html.matchAll(/]*>([\s\S]*?)<\/tr>/gi)]; + for (const rm of rows) { + const row = rm[1]; + if (!row.includes("inspection_detail")) continue; + const tds = [...row.matchAll(/]*>([\s\S]*?)<\/td>/gi)].map((t) => + t[1].replace(/<[^>]+>/g, "").replace(/ /g, "").trim(), + ); + const idMatch = row.match(/establishment\.inspection_detail\?id=([\d.]+)/); + if (!idMatch) continue; + const id = idMatch[1]; + inspections.push({ + id, + date: tds[3] || "", + state: tds[5] || "", + type: tds[6] || "", + scope: tds[7] || "", + naics: tds[9] || "", + establishment: tds[tds.length - 1] || "", + detail_url: `https://www.osha.gov/ords/imis/establishment.inspection_detail?id=${id}`, + }); + } + return inspections; +} + +export async function fetchOshaBrief(name: string, useCache = true): Promise { + const normalized = normalizeEntityName(name); + const now = new Date().toISOString(); + if (!normalized) { + return { + source: "osha", + fetched_at: now, + searched_name: name, + normalized_name: "", + source_url: "", + status: "no_match", + inspection_count: 0, + most_recent_date: null, + recent_inspections: [], + states_seen: [], + }; + } + if (useCache) { + const hit = await cacheGet(normalized, "osha"); + if (hit) return hit; + } + const u = `https://www.osha.gov/ords/imis/establishment.search?p_logger=1&establishment=${encodeURIComponent(name)}&State=all&p_case=all`; + try { + const html = await fetchOshaHTML(name); + const inspections = parseOshaInspections(html); + inspections.sort((a, b) => { + const pa = Date.parse(a.date) || 0; + const pb = Date.parse(b.date) || 0; + return pb - pa; + }); + const states_seen = [...new Set(inspections.map((i) => i.state).filter(Boolean))]; + const brief: OshaBrief = { + source: "osha", + fetched_at: now, + searched_name: name, + normalized_name: normalized, + source_url: u, + status: inspections.length > 0 ? "ok" : "no_match", + inspection_count: inspections.length, + most_recent_date: inspections[0]?.date ?? null, + recent_inspections: inspections.slice(0, 5), + states_seen, + }; + await cacheSet(normalized, "osha", brief); + return brief; + } catch (e) { + return { + source: "osha", + fetched_at: now, + searched_name: name, + normalized_name: normalized, + source_url: u, + status: "error", + inspection_count: 0, + most_recent_date: null, + recent_inspections: [], + states_seen: [], + error: (e as Error).message, + }; + } +} + +// ─── ILSOS placeholder ──────────────────────────────────────────── + +export async function fetchIlsosBrief(name: string): Promise { + const normalized = normalizeEntityName(name); + const cached = await cacheGet(normalized, "ilsos"); + if (cached) return cached; + const brief: IlsosBrief = { + source: "ilsos", + fetched_at: new Date().toISOString(), + searched_name: name, + status: "source_unreachable", + reason: + "Illinois SoS apps.ilsos.gov blocks our datacenter ASN. Pending: VPN-routed fetch, OpenCorporates API token (~200/mo free), or paid IL bulk feed.", + }; + return brief; +} + +// ─── SEC EDGAR ticker lookup ────────────────────────────────────── +// The canonical free source. A single JSON file at sec.gov/files/ +// company_tickers.json maps every US-listed ticker → CIK → company +// name (~10K issuers). We fetch it at most once per 24h and keep it +// in memory + on disk. + +const SEC_TICKERS_URL = "https://www.sec.gov/files/company_tickers.json"; +const SEC_TICKERS_CACHE = join(CACHE_DIR, "sec_company_tickers.json"); +const SEC_TICKERS_TTL_MS = 24 * 60 * 60 * 1000; + +type SecTickerRow = { cik_str: number; ticker: string; title: string }; +let secTickersIdx: { + rows: SecTickerRow[]; + byNormalized: Map; + refreshed_at: number; +} | null = null; + +async function ensureSecTickerIndex(): Promise { + const fresh = secTickersIdx && Date.now() - secTickersIdx.refreshed_at < SEC_TICKERS_TTL_MS; + if (fresh) return secTickersIdx; + + let raw: Record | null = null; + // Disk first + if (existsSync(SEC_TICKERS_CACHE)) { + try { + const st = await readFile(SEC_TICKERS_CACHE, "utf-8"); + const parsed = JSON.parse(st); + if (parsed?._fetched_at && Date.now() - parsed._fetched_at < SEC_TICKERS_TTL_MS) { + raw = parsed.rows; + } + } catch {/* fall through */} + } + // Refresh from SEC + if (!raw) { + try { + const res = await fetch(SEC_TICKERS_URL, { + headers: { "User-Agent": UA, "Accept": "application/json" }, + signal: AbortSignal.timeout(20000), + }); + if (res.ok) { + raw = await res.json(); + await writeFile( + SEC_TICKERS_CACHE, + JSON.stringify({ _fetched_at: Date.now(), rows: raw }), + ); + } + } catch (e) { + console.warn("[entity] sec tickers fetch failed:", (e as Error).message); + } + } + if (!raw) return secTickersIdx; // may still be null + + const rows = Object.values(raw).filter((r) => r && r.ticker && r.title); + const byNormalized = new Map(); + for (const r of rows) { + const k = normalizeEntityName(r.title); + if (!k) continue; + const arr = byNormalized.get(k) || []; + arr.push(r); + byNormalized.set(k, arr); + } + secTickersIdx = { rows, byNormalized, refreshed_at: Date.now() }; + return secTickersIdx; +} + +// Name → ticker resolution. Tries exact-normalized first, then +// word-overlap match. Hard rule: at least one shared word of 4+ chars +// between the searched name and the candidate title. Prevents spurious +// hits like "POREMBA DENISE" matching "NI Holdings" because "DENISE" +// contains "NI". Also skips names that look like individual persons +// (two capitalized words, no entity suffix) — they're never issuers. +async function resolveSecTicker(name: string): Promise { + const idx = await ensureSecTickerIndex(); + if (!idx) return null; + const key = normalizeEntityName(name); + if (!key) return null; + + // Skip obvious individual names — "LAST, FIRST M" or "FIRST LAST". + // Two signals: + // (1) comma in name → "LAST, FIRST" form, human. + // (2) exactly 2 word-tokens AND no entity marker → "FIRST LAST" + // form. A company almost always has ≥3 tokens (X Y Inc) or + // an entity marker; two-word entities like "APPLE INC" get + // caught by the marker, not by looksIndividual. + const upper = name.toUpperCase(); + const hasEntityMarker = + /\b(LLC|L\s*L\s*C|LLP|INC|INC\.|CORP|CORPORATION|COMPANY|CO|CO\.|LTD|LIMITED|GROUP|HOLDINGS|ENTERPRISES|AUTHORITY|ASSOCIATION|TRUST|BANK|PARTNERS|INDUSTRIES|CONSTRUCTION|ELECTRIC|MECHANICAL|CONTRACTING|PROPERTIES|REALTY|DEVELOPMENT|ENGINEERING|PLUMBING|ROOFING|SERVICES|SOLUTIONS|SYSTEMS|TECHNOLOGIES|LOGISTICS|RESOURCES)\b/.test( + upper, + ); + const tokens = upper.split(/\s+/).filter(Boolean); + const hasComma = /,/.test(name); + const looksIndividual = (hasComma && !hasEntityMarker) || (tokens.length === 2 && !hasEntityMarker); + if (looksIndividual) return null; + + // Skip obvious government / municipal entities — they're not US public + // issuers. If they were, their bond funds would surface separately. + const isGovernment = + /\b(AUTHORITY|MUNICIPAL|CITY OF|COUNTY|DISTRICT|BOARD OF EDUCATION|PUBLIC SCHOOLS|TRANSIT|HOUSING AUTHORITY|DEPARTMENT OF|BUREAU OF|PARK DISTRICT|WATER RECLAMATION)\b/i.test( + name, + ); + if (isGovernment) return null; + + // Exact normalized match — most reliable path. + const exact = idx.byNormalized.get(key); + if (exact && exact.length) return exact.sort((a, b) => a.ticker.length - b.ticker.length)[0]; + + // Word-overlap with stopwords filtered. "CONSTRUCTION", "COMPANY", + // "SERVICES" etc. are too generic to identify an issuer on their own + // — shared presence is meaningless. Require overlap on identifying + // words only. + const STOPWORDS = new Set([ + "CONSTRUCTION","COMPANY","SERVICES","SERVICE","GROUP","SYSTEMS", + "SOLUTIONS","RESOURCES","TECHNOLOGIES","INDUSTRIES","PARTNERS", + "ELECTRIC","MECHANICAL","PLUMBING","ROOFING","DEVELOPMENT","REALTY", + "PROPERTIES","ENGINEERING","ASSOCIATES","INTERNATIONAL","NATIONAL", + "AMERICAN","GENERAL","AUTHORITY","HOLDINGS","ENTERPRISES","LOGISTICS", + "COMMUNICATIONS","FINANCIAL","CAPITAL","PROPERTY","RESIDENTIAL", + "COMMERCIAL","GLOBAL","UNITED","STATES","STATE","FEDERAL", + "MANAGEMENT","CONTRACTING","CONTRACTORS","BUILDERS","BUILDING", + ]); + const identifyingWords = (s: string) => + new Set( + s + .split(" ") + .filter((w) => w.length >= 4 && !STOPWORDS.has(w)), + ); + const keyId = identifyingWords(key); + if (keyId.size === 0) return null; // no non-stopword words to match on + + const candidates: { row: SecTickerRow; overlap: number; k2: string }[] = []; + for (const r of idx.rows) { + const k2 = normalizeEntityName(r.title); + if (!k2) continue; + const k2Id = identifyingWords(k2); + let overlap = 0; + for (const w of k2Id) { + if (keyId.has(w)) overlap++; + } + // Require overlap ≥ 1 AND that overlap covers ≥ 50% of the shorter + // identifying-word set. Prevents "TARGET HOSPITALITY" from matching + // "TARGET CORP" on just the shared word "TARGET" when the sets + // are 1-vs-1; forces them to agree on substance. + // Fuzzy path requires overlap ≥ 2 non-stopword words. Single-word + // matches are only valid via the exact-normalized path above. + // Otherwise "POWER" (from POWER CONSTRUCTION) falsely matches + // POWER INTEGRATIONS and CAPITAL POWER CORP. + if (overlap < 2) continue; + const minIdSize = Math.min(keyId.size, k2Id.size); + if (overlap / minIdSize < 0.5) continue; + candidates.push({ row: r, overlap, k2 }); + } + if (!candidates.length) return null; + + candidates.sort((a, b) => { + if (b.overlap !== a.overlap) return b.overlap - a.overlap; + const la = Math.abs(a.k2.length - key.length); + const lb = Math.abs(b.k2.length - key.length); + if (la !== lb) return la - lb; + return a.row.ticker.length - b.row.ticker.length; + }); + return candidates[0].row; +} + +// Pull SIC + exchange for a CIK from data.sec.gov/submissions +async function fetchSecProfile(cik: number | string): Promise<{ + exchange?: string; + sic?: string; + sic_description?: string; +}> { + const padded = String(cik).padStart(10, "0"); + try { + const res = await fetch(`https://data.sec.gov/submissions/CIK${padded}.json`, { + headers: { "User-Agent": UA }, + signal: AbortSignal.timeout(10000), + }); + if (!res.ok) return {}; + const j = await res.json(); + return { + exchange: Array.isArray(j.exchanges) ? j.exchanges[0] : undefined, + sic: j.sic, + sic_description: j.sicDescription, + }; + } catch { + return {}; + } +} + +// Stooq — free daily quote CSV. Format (column order from the response): +// Symbol,Date,Time,Open,High,Low,Close,Volume +async function fetchStooqQuote(ticker: string): Promise<{ + price?: number; + price_date?: string; + open?: number; + high?: number; + low?: number; + volume?: number; +} | null> { + try { + const res = await fetch( + `https://stooq.com/q/l/?s=${encodeURIComponent(ticker.toLowerCase())}.us&f=sd2t2ohlcv&h&e=csv`, + { headers: { "User-Agent": UA }, signal: AbortSignal.timeout(8000) }, + ); + if (!res.ok) return null; + const csv = await res.text(); + const lines = csv.trim().split("\n"); + if (lines.length < 2) return null; + const cols = lines[1].split(","); + // Stooq returns "N/D" literally for unknown symbols + if (cols.includes("N/D")) return null; + const [sym, date, _t, open, high, low, close, volume] = cols; + return { + price: parseFloat(close) || undefined, + price_date: date, + open: parseFloat(open) || undefined, + high: parseFloat(high) || undefined, + low: parseFloat(low) || undefined, + volume: parseInt(volume, 10) || undefined, + }; + } catch { + return null; + } +} + +export async function fetchTickerBrief(name: string): Promise { + const now = new Date().toISOString(); + const cached = await cacheGet(normalizeEntityName(name), "ticker"); + if (cached) return cached; + const hit = await resolveSecTicker(name); + if (!hit) { + const b: TickerBrief = { + source: "sec+stooq", + fetched_at: now, + searched_name: name, + status: "no_match", + }; + // 7-day TTL on no_match — company could become public + await cacheSet(normalizeEntityName(name), "ticker", b, 7 * 24 * 60 * 60 * 1000); + return b; + } + const [profile, quote] = await Promise.all([ + fetchSecProfile(hit.cik_str), + fetchStooqQuote(hit.ticker), + ]); + const cap_proxy = + quote?.price && quote?.volume ? quote.price * quote.volume : undefined; + const day_change_pct = + quote?.price && quote?.open && quote.open > 0 + ? ((quote.price - quote.open) / quote.open) * 100 + : undefined; + const brief: TickerBrief = { + source: "sec+stooq", + fetched_at: now, + searched_name: name, + status: "ok", + ticker: hit.ticker, + company_name: hit.title, + cik: String(hit.cik_str), + exchange: profile.exchange, + sic: profile.sic, + sic_description: profile.sic_description, + price: quote?.price, + price_date: quote?.price_date, + open: quote?.open, + high: quote?.high, + low: quote?.low, + volume: quote?.volume, + day_change_pct, + cap_proxy, + sec_url: `https://www.sec.gov/cgi-bin/browse-edgar?action=getcompany&CIK=${String(hit.cik_str).padStart(10, "0")}`, + stooq_url: `https://stooq.com/q/?s=${hit.ticker.toLowerCase()}.us`, + }; + // 6-hour TTL on ok hits — prices go stale, but SEC metadata doesn't. + await cacheSet(normalizeEntityName(name), "ticker", brief, 6 * 60 * 60 * 1000); + return brief; +} + +// ─── Contractor history ──────────────────────────────────────────── +// Queries the same Chicago Socrata permits dataset for prior activity. +// Two time windows: last 180 days (recent velocity) and last 24 months +// (longer-run baseline). All-time count gives tenure. Trend classifier +// compares 180d rate to 24mo rate. + +export async function fetchContractorHistory(name: string): Promise { + const now = new Date().toISOString(); + const cached = await cacheGet(normalizeEntityName(name), "history"); + if (cached) return cached; + const today = new Date(); + const d180 = new Date(today.getTime() - 180 * 86400000).toISOString().slice(0, 10); + const d24mo = new Date(today.getTime() - 730 * 86400000).toISOString().slice(0, 10); + const base = "https://data.cityofchicago.org/resource/ydr8-5enu.json"; + // Escape single quotes in contractor name per Socrata rules. + // Socrata LIKE with leading % prefix is substring — we need this + // because "TURNER CONSTRUCTION" on a new permit should match against + // "TURNER CONSTRUCTION COMPANY" on historical ones. Guard against + // false-positive hits on very short names (< 6 chars). + const safeName = name.replace(/'/g, "''"); + const match = + name.length >= 6 + ? `upper(contact_1_name) LIKE upper('%${safeName}%')` + : `contact_1_name='${safeName}'`; + const where180 = encodeURIComponent(`${match} AND issue_date>'${d180}'`); + const where24mo = encodeURIComponent(`${match} AND issue_date>'${d24mo}'`); + const whereAll = encodeURIComponent(match); + try { + const [c180, c24, cAll, recent] = await Promise.all([ + fetch(`${base}?$select=count(*)&$where=${where180}`).then((r) => r.json()), + fetch(`${base}?$select=count(*),sum(reported_cost)&$where=${where24mo}`).then((r) => r.json()), + fetch(`${base}?$select=count(*)&$where=${whereAll}`).then((r) => r.json()), + fetch( + // Pulled to 50 rows + lat/lng + permit id + description so the + // contractor profile heat map and timeline have something to plot. + `${base}?$select=id,issue_date,work_type,reported_cost,street_number,street_direction,street_name,latitude,longitude,work_description&$where=${whereAll}&$order=issue_date DESC&$limit=50`, + ).then((r) => r.json()), + ]); + const n180 = parseInt(c180?.[0]?.count || "0", 10); + const n24 = parseInt(c24?.[0]?.count || "0", 10); + const cost24 = parseFloat(c24?.[0]?.sum_reported_cost || "0"); + const nAll = parseInt(cAll?.[0]?.count || "0", 10); + + // Trend: compare 180d rate to prior 18mo rate. + // 180d = half a year; 24mo = 8 half-years. Prior half-years = (n24-n180)/7. + let trend: ContractorHistory["trend"] = "unknown"; + if (nAll === 0) trend = "unknown"; + else if (n24 <= 1 && nAll <= 3) trend = "new"; + else { + const priorRate = (n24 - n180) / 7; // permits per 180d across prior 18mo + if (priorRate === 0 && n180 > 0) trend = "growing"; + else if (priorRate > 0) { + const ratio = n180 / priorRate; + if (ratio > 1.5) trend = "growing"; + else if (ratio < 0.5) trend = "declining"; + else trend = "stable"; + } else trend = "stable"; + } + + const brief: ContractorHistory = { + source: "chicago_socrata", + fetched_at: now, + searched_name: name, + status: "ok", + permits_last_180d: n180, + permits_last_24mo: n24, + permits_historical_total: nAll, + total_cost_last_24mo: cost24, + trend, + recent_permits: (recent || []).map((r: any) => ({ + date: (r.issue_date || "").slice(0, 10), + work_type: r.work_type || "", + cost: parseFloat(r.reported_cost || 0), + address: `${r.street_number || ""} ${r.street_direction || ""} ${r.street_name || ""}`.trim().replace(/\s+/g, " "), + lat: r.latitude ? parseFloat(r.latitude) : undefined, + lng: r.longitude ? parseFloat(r.longitude) : undefined, + permit_id: r.id || undefined, + description: r.work_description || undefined, + })), + }; + // 12-hour TTL — activity rarely changes more than once/day + await cacheSet(normalizeEntityName(name), "history", brief, 12 * 60 * 60 * 1000); + return brief; + } catch (e) { + return { + source: "chicago_socrata", + fetched_at: now, + searched_name: name, + status: "error", + permits_last_180d: 0, + permits_last_24mo: 0, + permits_historical_total: 0, + total_cost_last_24mo: 0, + trend: "unknown", + recent_permits: [], + error: (e as Error).message, + }; + } +} + +// ─── Cook County Assessor (property owner) ───────────────────────── +// Free Socrata at datacatalog.cookcountyil.gov/resource/c49d-89sn.json. +// Address normalization is gentle: keep number + direction + name; drop +// suite/unit. Match using LIKE prefix because addresses on building +// permits are often missing trailing details ("4809 N BROADWAY" vs +// "4809 N BROADWAY UNIT 5" in the parcel record). + +export async function fetchPropertyOwner(address: string): Promise { + const now = new Date().toISOString(); + const cleaned = address.trim().replace(/\s+/g, " ").toUpperCase(); + const cached = await cacheGet(cleaned, "owner"); + if (cached) return cached; + if (!cleaned || cleaned.length < 4) { + return { + source: "cook_county_assessor", + fetched_at: now, + searched_address: address, + status: "no_match", + }; + } + const safe = cleaned.replace(/'/g, "''"); + // Match the full address text as a prefix; LIKE upper(...) for case fold. + const where = encodeURIComponent( + `upper(property_address) like upper('${safe}%') AND property_city='CHICAGO'`, + ); + const sourceUrl = `https://datacatalog.cookcountyil.gov/resource/c49d-89sn.json?$where=${where}&$limit=1`; + try { + const res = await fetch(sourceUrl, { + headers: { "User-Agent": UA }, + signal: AbortSignal.timeout(10000), + }); + if (!res.ok) throw new Error(`assessor HTTP ${res.status}`); + const rows = (await res.json()) as any[]; + if (!rows || rows.length === 0) { + const empty: PropertyOwnerBrief = { + source: "cook_county_assessor", + fetched_at: now, + searched_address: address, + status: "no_match", + source_url: sourceUrl, + }; + await cacheSet(cleaned, "owner", empty, 7 * 24 * 60 * 60 * 1000); + return empty; + } + const r = rows[0]; + const brief: PropertyOwnerBrief = { + source: "cook_county_assessor", + fetched_at: now, + searched_address: address, + status: "ok", + pin: r.pin, + property_address: r.property_address, + property_zip: r.property_zip, + mailing_address: r.mailing_address, + mailing_city: r.mailing_city, + mailing_state: r.mailing_state, + mailing_zip: r.mailing_zip, + township: r.township_name, + ward: r.ward, + longitude: r.longitude, + latitude: r.latitude, + source_url: sourceUrl, + }; + // Owner records change rarely — 30d cache. + await cacheSet(cleaned, "owner", brief); + return brief; + } catch (e) { + return { + source: "cook_county_assessor", + fetched_at: now, + searched_address: address, + status: "error", + error: (e as Error).message, + }; + } +} + +// ─── Chicago Building Violations ─────────────────────────────────── +// Dataset 22u3-xenr. Address-keyed. Status field tells us which are +// open vs complied. Stop-work orders surface in violation_description. +export async function fetchPropertyViolations(address: string): Promise { + const now = new Date().toISOString(); + const cleaned = address.trim().replace(/\s+/g, " ").toUpperCase(); + const cached = await cacheGet(cleaned, "violations"); + if (cached) return cached; + if (!cleaned || cleaned.length < 6) { + return { + source: "chicago_building_violations", + fetched_at: now, + searched_address: address, + status: "error", + total_violations: 0, + open_violations: 0, + stop_work_orders: 0, + recent_violations: [], + error: "address too short", + }; + } + const safe = cleaned.replace(/'/g, "''"); + const where = encodeURIComponent(`upper(address) like upper('${safe}%')`); + const url = `https://data.cityofchicago.org/resource/22u3-xenr.json?$where=${where}&$order=violation_date DESC&$limit=50`; + try { + const res = await fetch(url, { + headers: { "User-Agent": UA }, + signal: AbortSignal.timeout(10000), + }); + if (!res.ok) throw new Error(`violations HTTP ${res.status}`); + const rows = (await res.json()) as any[]; + const total = rows.length; + const open = rows.filter( + (r) => /OPEN|FAILED|NO ENTRY/i.test(r.violation_status || ""), + ).length; + const stopWork = rows.filter((r) => + /STOP[ -]?WORK/i.test(`${r.violation_description || ""} ${r.violation_inspector_comments || ""}`), + ).length; + const recent = rows.slice(0, 5).map((r) => ({ + date: (r.violation_date || "").slice(0, 10), + status: r.violation_status || "", + description: (r.violation_description || "").slice(0, 120), + department: r.department_bureau || "", + })); + const brief: PropertyViolationsBrief = { + source: "chicago_building_violations", + fetched_at: now, + searched_address: address, + status: "ok", + total_violations: total, + open_violations: open, + stop_work_orders: stopWork, + most_recent_date: rows[0]?.violation_date?.slice(0, 10), + recent_violations: recent, + }; + await cacheSet(cleaned, "violations", brief, 12 * 60 * 60 * 1000); + return brief; + } catch (e) { + return { + source: "chicago_building_violations", + fetched_at: now, + searched_address: address, + status: "error", + total_violations: 0, + open_violations: 0, + stop_work_orders: 0, + recent_violations: [], + error: (e as Error).message, + }; + } +} + +// ─── Union lookup (static) ───────────────────────────────────────── +// Map permit work_type → likely Chicago Local(s) + their training +// centers (JATCs). Surfaces "every union that handles this contract" +// per J's request. Static because Chicago union jurisdictions are +// stable and the granularity is "typical assignment, not certainty +// per project." + +const UNION_MAP: Record = { + ELECTRICAL: { + trade: "Electrical", + primary_locals: [ + { name: "IBEW Local 134", local: "134", jurisdiction: "Inside electrical (Chicago)" }, + { name: "IBEW Local 701", local: "701", jurisdiction: "Low-voltage / suburban" }, + ], + training_centers: [ + { name: "IBEW 134 / NECA JATC", address: "6301 W 115th St, Alsip IL", program_length: "5-yr apprenticeship" }, + { name: "IBEW 701 / NECA JATC", address: "1979 Bucktail Ln, Sugar Grove IL", program_length: "5-yr apprenticeship" }, + ], + }, + PLUMBING: { + trade: "Plumbing", + primary_locals: [ + { name: "Plumbers Local 130", local: "130", jurisdiction: "Chicago + Cook County" }, + ], + training_centers: [ + { name: "Plumbers 130 JAC", address: "1340 W Washington Blvd, Chicago", program_length: "5-yr apprenticeship" }, + ], + }, + MECHANICAL: { + trade: "Mechanical / HVAC / Pipe", + primary_locals: [ + { name: "Pipe Fitters Local 597", local: "597", jurisdiction: "HVAC + process piping" }, + { name: "Sheet Metal Workers Local 73", local: "73", jurisdiction: "Sheet metal / ductwork" }, + { name: "IUOE Local 399", local: "399", jurisdiction: "Stationary engineers (boilers, chillers)" }, + ], + training_centers: [ + { name: "Local 597 Training Center", address: "10806 W 47th St, McCook IL", program_length: "5-yr" }, + { name: "Sheet Metal 73 JAC", address: "16410 S 84th Ave, Tinley Park IL", program_length: "5-yr" }, + ], + }, + REROOFING: { + trade: "Roofing", + primary_locals: [ + { name: "Roofers Local 11", local: "11", jurisdiction: "Chicago + suburbs" }, + ], + training_centers: [ + { name: "Roofers Local 11 Training", address: "9525 S Industrial Dr, Bridgeview IL", program_length: "3-yr" }, + ], + }, + MASONRY: { + trade: "Masonry / Concrete", + primary_locals: [ + { name: "Bricklayers Local 21", local: "21", jurisdiction: "Brick + stone" }, + { name: "Cement Masons Local 502", local: "502", jurisdiction: "Concrete finishing" }, + ], + training_centers: [ + { name: "Bricklayers 21 Training", address: "11244 S Cottage Grove, Chicago", program_length: "3-yr" }, + { name: "Cement Masons 502 JATC", address: "739 25th Ave, Bellwood IL", program_length: "3-yr" }, + ], + }, + GENERAL: { + trade: "General construction (multi-trade)", + primary_locals: [ + { name: "Chicago Regional Council of Carpenters", local: "1/13/58/1051", jurisdiction: "Carpentry + drywall" }, + { name: "LIUNA Locals 1/2/4/6/76/96/149/152/225/269/296/1001", local: "various", jurisdiction: "Laborers / earthwork" }, + { name: "Operating Engineers Local 150", local: "150", jurisdiction: "Heavy equipment (cranes, excavators)" }, + { name: "Ironworkers Local 1/111/136/395", local: "1/111/136/395", jurisdiction: "Structural / rebar" }, + { name: "Teamsters Local 731/705", local: "731/705", jurisdiction: "Material haul" }, + ], + training_centers: [ + { name: "Chicago Regional Carpenters Training Center", address: "11001 W Roosevelt, Westchester IL", program_length: "4-yr" }, + { name: "Laborers Training School", address: "3636 W Pershing Rd, Chicago", program_length: "3-yr" }, + { name: "Operating Engineers 150 Training", address: "16125 W 100th St, Wilmington IL", program_length: "3-yr" }, + { name: "Iron Workers Local 1 Apprenticeship", address: "5775 W 26th St, Cicero IL", program_length: "3-yr" }, + ], + }, + WRECKING: { + trade: "Wrecking / Demolition", + primary_locals: [ + { name: "LIUNA Locals (laborers)", local: "1/2/4/...", jurisdiction: "Manual demolition" }, + { name: "Operating Engineers Local 150", local: "150", jurisdiction: "Heavy equipment demo" }, + { name: "Teamsters Local 731", local: "731", jurisdiction: "Debris haul" }, + ], + training_centers: [ + { name: "Laborers Training School (HazMat track)", address: "3636 W Pershing Rd, Chicago", program_length: "3-yr" }, + ], + }, + SIGN: { + trade: "Signs / Signage", + primary_locals: [ + { name: "IBEW Local 134 (electric signs)", local: "134", jurisdiction: "Lit signage" }, + { name: "Painters DC 14 Local 30 (sign painters)", local: "30", jurisdiction: "Hand-painted / vinyl" }, + ], + training_centers: [ + { name: "Painters DC 14 Training", address: "1456 S La Salle, Chicago", program_length: "3-yr" }, + ], + }, +}; + +export function unionsForWorkType(workType: string | undefined): UnionMapping | null { + if (!workType) return null; + const w = workType.toUpperCase(); + if (/ELECTRIC/.test(w)) return UNION_MAP.ELECTRICAL; + if (/PLUMB/.test(w)) return UNION_MAP.PLUMBING; + if (/MECHANIC|HVAC|REFRIGERAT|VENT|HEAT/.test(w)) return UNION_MAP.MECHANICAL; + if (/ROOF/.test(w)) return UNION_MAP.REROOFING; + if (/MASON|CONCRETE|BRICK|CEMENT/.test(w)) return UNION_MAP.MASONRY; + if (/WRECK|DEMO/.test(w)) return UNION_MAP.WRECKING; + if (/SIGN/.test(w)) return UNION_MAP.SIGN; + return UNION_MAP.GENERAL; +} + +// ─── USASpending.gov federal contracts ───────────────────────────── +// api.usaspending.gov/api/v2/search/spending_by_award/ — POST with +// JSON body. Returns prime award records by recipient name. +// Free, no auth. Slow on first call (~3-5s) but cacheable 24h. +export async function fetchFederalContracts(name: string): Promise { + const now = new Date().toISOString(); + const cached = await cacheGet(normalizeEntityName(name), "federal"); + if (cached) return cached; + if (!name || name.length < 5) { + return { + source: "usaspending.gov", + fetched_at: now, + searched_name: name, + status: "no_match", + total_awards_count: 0, + total_awards_value: 0, + top_agencies: [], + }; + } + const url = "https://api.usaspending.gov/api/v2/search/spending_by_award/"; + const body = { + filters: { + award_type_codes: ["A", "B", "C", "D"], // contract types + recipient_search_text: [name], + time_period: [ + { start_date: "2020-01-01", end_date: new Date().toISOString().slice(0, 10) }, + ], + }, + fields: [ + "Award ID", + "Recipient Name", + "Award Amount", + "Awarding Agency", + "Action Date", + "NAICS", + ], + sort: "Award Amount", + order: "desc", + limit: 25, + page: 1, + }; + try { + const res = await fetch(url, { + method: "POST", + headers: { "Content-Type": "application/json", "User-Agent": UA }, + body: JSON.stringify(body), + signal: AbortSignal.timeout(15000), + }); + if (!res.ok) throw new Error(`usaspending HTTP ${res.status}`); + const j = (await res.json()) as any; + const results = j.results || []; + if (results.length === 0) { + const empty: FederalContractsBrief = { + source: "usaspending.gov", + fetched_at: now, + searched_name: name, + status: "no_match", + total_awards_count: 0, + total_awards_value: 0, + top_agencies: [], + source_url: `https://www.usaspending.gov/search/?keywords=${encodeURIComponent(name)}`, + }; + await cacheSet(normalizeEntityName(name), "federal", empty, 7 * 24 * 60 * 60 * 1000); + return empty; + } + const totalValue = results.reduce( + (a: number, r: any) => a + (parseFloat(r["Award Amount"]) || 0), + 0, + ); + const agencyMap: Record = {}; + for (const r of results) { + const a = r["Awarding Agency"] || "Unknown"; + agencyMap[a] = (agencyMap[a] || 0) + (parseFloat(r["Award Amount"]) || 0); + } + const top = Object.entries(agencyMap) + .map(([agency, value]) => ({ agency, value })) + .sort((a, b) => b.value - a.value) + .slice(0, 3); + const recent = results + .map((r: any) => r["Action Date"]) + .filter(Boolean) + .sort() + .reverse()[0]; + const brief: FederalContractsBrief = { + source: "usaspending.gov", + fetched_at: now, + searched_name: name, + status: "ok", + total_awards_count: results.length, + total_awards_value: totalValue, + most_recent_award_date: recent, + top_agencies: top, + source_url: `https://www.usaspending.gov/search/?keywords=${encodeURIComponent(name)}`, + }; + await cacheSet(normalizeEntityName(name), "federal", brief, 24 * 60 * 60 * 1000); + return brief; + } catch (e) { + return { + source: "usaspending.gov", + fetched_at: now, + searched_name: name, + status: "error", + total_awards_count: 0, + total_awards_value: 0, + top_agencies: [], + error: (e as Error).message, + }; + } +} + +// ─── NLRB cases (placeholder) ────────────────────────────────────── +// nlrb.gov/search/case has JSON results behind their JS app. The path +// requires session cookies + form tokens — needs a short scraper. +// Returning placeholder until next batch. +export async function fetchNlrbBrief(name: string): Promise { + return { + source: "nlrb.gov", + fetched_at: new Date().toISOString(), + searched_name: name, + status: "needs_setup", + reason: + "NLRB case search at apps.nlrb.gov/case requires session-aware scraping; queued for batch 5b.", + }; +} + +// ─── Risk scoring ────────────────────────────────────────────────── + +function scoreEntity( + osha: OshaBrief | null, + ilsos: IlsosBrief | null, + history: ContractorHistory | null, +) { + const factors: string[] = []; + let score: number | null = null; + let partial = false; + if (osha) { + if (osha.status === "ok") { + const n = osha.inspection_count; + if (n === 0) { + score = 10; + factors.push("osha_clean:no_inspections"); + } else if (n < 3) { + score = 25; + factors.push(`osha_low:${n}_inspections`); + } else if (n < 10) { + score = 50; + factors.push(`osha_moderate:${n}_inspections`); + } else { + score = 75; + factors.push(`osha_high:${n}_inspections`); + } + if (osha.most_recent_date) { + const ageDays = + (Date.now() - (Date.parse(osha.most_recent_date) || 0)) / (1000 * 60 * 60 * 24); + if (ageDays < 180) { + factors.push("osha_recent_activity:<180d"); + score = Math.min(100, (score ?? 0) + 15); + } + } + } else if (osha.status === "no_match") { + factors.push("osha_no_match"); + } else { + factors.push("osha_error:" + (osha.error || "unknown")); + partial = true; + } + } else { + partial = true; + } + if (!ilsos || ilsos.status !== "ok") { + partial = true; + factors.push("ilsos_pending"); + } + // Brand-new LLCs with a single permit and zero tenure are a + // classic LLC-shuffle signature. Flag independently of OSHA. + if (history?.status === "ok") { + if (history.trend === "new") { + factors.push("new_entity:<=3_permits_ever"); + score = Math.max(score ?? 0, 35); + } else if (history.trend === "growing") { + factors.push(`activity_growing:${history.permits_last_180d}_in_180d`); + } else if (history.trend === "declining") { + factors.push(`activity_declining:${history.permits_last_180d}_in_180d`); + } + } + return { score, factors, partial }; +} + +// ─── CTA L stations (static, in-memory) ──────────────────────────── +// Pulled once from Chicago Socrata 8pix-ypme. ~150 stations, rarely +// changes. Compute great-circle distance to nearest with haversine. +type CtaStation = { + station_name: string; + lat: number; + lon: number; + lines: string[]; +}; +let _ctaStations: CtaStation[] | null = null; + +async function ensureCtaLoaded(): Promise { + if (_ctaStations) return _ctaStations; + try { + const res = await fetch( + "https://data.cityofchicago.org/resource/8pix-ypme.json?$limit=400", + { headers: { "User-Agent": UA }, signal: AbortSignal.timeout(15000) }, + ); + if (!res.ok) throw new Error(`cta HTTP ${res.status}`); + const rows = (await res.json()) as any[]; + // De-dupe by station_name keeping any one stop's coordinates + const byStation = new Map(); + for (const r of rows) { + const loc = r.location || {}; + const lat = parseFloat(loc.latitude || ""); + const lon = parseFloat(loc.longitude || ""); + if (isNaN(lat) || isNaN(lon)) continue; + const station = r.station_name || r.stop_name; + if (!station) continue; + if (byStation.has(station)) continue; + const lines: string[] = []; + if (r.red) lines.push("Red"); + if (r.blue) lines.push("Blue"); + if (r.g) lines.push("Green"); + if (r.brn) lines.push("Brown"); + if (r.p) lines.push("Purple"); + if (r.y) lines.push("Yellow"); + if (r.pnk) lines.push("Pink"); + if (r.o) lines.push("Orange"); + byStation.set(station, { station_name: station, lat, lon, lines }); + } + _ctaStations = [...byStation.values()]; + return _ctaStations; + } catch (e) { + console.warn("[cta] load failed:", (e as Error).message); + _ctaStations = []; + return _ctaStations; + } +} + +function haversineMeters(lat1: number, lon1: number, lat2: number, lon2: number): number { + const R = 6371000; + const toRad = (d: number) => (d * Math.PI) / 180; + const dLat = toRad(lat2 - lat1); + const dLon = toRad(lon2 - lon1); + const a = + Math.sin(dLat / 2) ** 2 + + Math.cos(toRad(lat1)) * Math.cos(toRad(lat2)) * Math.sin(dLon / 2) ** 2; + return 2 * R * Math.asin(Math.sqrt(a)); +} + +async function nearestCtaStation(lat: number, lon: number) { + const stations = await ensureCtaLoaded(); + if (stations.length === 0) return null; + let best: { station: CtaStation; dist: number } | null = null; + for (const s of stations) { + const d = haversineMeters(lat, lon, s.lat, s.lon); + if (!best || d < best.dist) best = { station: s, dist: d }; + } + return best; +} + +// ─── Nearby permits (labor competition signal) ───────────────────── +// Socrata supports within_circle(location, lat, lon, radius_meters) +// for geo queries. 800m ≈ 0.5mi. Filter to permits issued within the +// last 90d so we capture *current* competition only. +async function fetchNearbyPermits(lat: number, lon: number) { + const since = new Date(Date.now() - 90 * 86400000).toISOString().slice(0, 10); + const where = encodeURIComponent( + `within_circle(location, ${lat}, ${lon}, 800) AND issue_date>'${since}' AND reported_cost>50000`, + ); + try { + const res = await fetch( + `https://data.cityofchicago.org/resource/ydr8-5enu.json?$select=count(*),sum(reported_cost)&$where=${where}`, + { headers: { "User-Agent": UA }, signal: AbortSignal.timeout(10000) }, + ); + if (!res.ok) return null; + const j = (await res.json()) as any[]; + const row = j?.[0] || {}; + return { + count: parseInt(row.count || "0", 10), + total_value: parseFloat(row.sum_reported_cost || "0"), + }; + } catch { + return null; + } +} + +// ─── Site Context (Chicago TIF + landmark + community area) ──────── +// All Socrata. Lookup by address (street_number + street_name where +// available, lat/long otherwise). Bundle into a single brief so the +// UI gets one tile instead of 3. +const TIF_DATASET = "imgn-2suh"; // TIF District Programming 2022-2026 +const LANDMARK_DATASET = "habu-n236"; // Individual Landmarks - Map + +export async function fetchSiteContext(address: string): Promise { + const now = new Date().toISOString(); + const cleaned = address.trim().replace(/\s+/g, " ").toUpperCase(); + const cached = await cacheGet(cleaned, "site_context"); + if (cached) return cached; + if (!cleaned) { + return { + source: "chicago_socrata", + fetched_at: now, + searched_address: address, + status: "no_match", + }; + } + // Pull lat/long from the assessor record (we already query it). + const owner = await fetchPropertyOwner(address); + let lat: string | undefined, lon: string | undefined, ward: string | undefined; + if (owner.status === "ok") { + lat = owner.latitude; + lon = owner.longitude; + ward = owner.ward; + } + // Community area number → name table is public on Chicago.gov but + // we'd need it static; ship without name for now (number suffices). + const safe = cleaned.replace(/'/g, "''"); + const landmarkUrl = `https://data.cityofchicago.org/resource/${LANDMARK_DATASET}.json?$where=upper(address)%20like%20upper('%25${encodeURIComponent(safe)}%25')&$limit=1`; + let isLandmark = false; + let landmarkName: string | undefined; + try { + const r = await fetch(landmarkUrl, { + headers: { "User-Agent": UA }, + signal: AbortSignal.timeout(8000), + }); + if (r.ok) { + const rows = (await r.json()) as any[]; + if (rows && rows.length) { + isLandmark = true; + landmarkName = rows[0].landmark_name || rows[0].name; + } + } + } catch {/* non-fatal */} + // TIF polygon containment + CTA distance + nearby permits — all + // depend on having lat/long from the Cook County Assessor. + let inTif = false; + let tifName: string | undefined; + let ctaName: string | undefined; + let ctaLines: string | undefined; + let ctaDist: number | undefined; + let nearbyCount: number | undefined; + let nearbyValue: number | undefined; + if (lat && lon) { + const latNum = parseFloat(lat); + const lonNum = parseFloat(lon); + if (!isNaN(latNum) && !isNaN(lonNum)) { + // All three geo lookups in parallel + const [tif, cta, nearby] = await Promise.all([ + findTifDistrict(lonNum, latNum), + nearestCtaStation(latNum, lonNum), + fetchNearbyPermits(latNum, lonNum), + ]); + if (tif) { + inTif = true; + tifName = tif.name; + } + if (cta) { + ctaName = cta.station.station_name; + ctaLines = cta.station.lines.join("/"); + ctaDist = Math.round(cta.dist); + } + if (nearby) { + nearbyCount = nearby.count; + nearbyValue = nearby.total_value; + } + } + } + const brief: SiteContextBrief = { + source: "chicago_socrata", + fetched_at: now, + searched_address: address, + status: "ok", + in_tif_district: inTif, + tif_district_name: tifName, + is_landmark: isLandmark, + landmark_name: landmarkName, + community_area: undefined, + ward, + latitude: lat, + longitude: lon, + nearest_cta_station: ctaName, + nearest_cta_lines: ctaLines, + nearest_cta_distance_m: ctaDist, + nearby_permits_90d: nearbyCount, + nearby_permits_value_90d: nearbyValue, + }; + await cacheSet(cleaned, "site_context", brief); + return brief; +} + +// ─── MBE/WBE/DBE diversity certification ─────────────────────────── +// Chicago Dept of Procurement Economic Inclusion Certified Vendors. +const DIVERSITY_DATASET = "2iq3-bugw"; + +export async function fetchDiversityCerts(name: string): Promise { + const now = new Date().toISOString(); + const cached = await cacheGet(normalizeEntityName(name), "diversity"); + if (cached) return cached; + const safe = name.replace(/'/g, "''"); + const url = `https://data.cityofchicago.org/resource/${DIVERSITY_DATASET}.json?$where=upper(legal_name)%20like%20upper('%25${encodeURIComponent(safe)}%25')&$limit=10`; + try { + const r = await fetch(url, { + headers: { "User-Agent": UA }, + signal: AbortSignal.timeout(8000), + }); + if (!r.ok || r.status === 404) { + // Dataset listed in catalog but resource endpoint 404s — known issue. + const skeletal: DiversityCertBrief = { + source: "chicago_dps_economic_inclusion", + fetched_at: now, + searched_name: name, + status: "needs_setup", + reason: + "Chicago MBE/WBE/DBE datasets (2iq3-bugw, 69yt-tb5j, ci93-uc8s, 8dxf-6ahp) are catalog-listed but the resource endpoint returns 404. Likely archived or auth-gated. Path: contact Chicago DPS for alternate access OR use the procurement Intent-to-Award/Execute datasets which DO work.", + certifications: [], + }; + await cacheSet(normalizeEntityName(name), "diversity", skeletal, 7 * 24 * 60 * 60 * 1000); + return skeletal; + } + const rows = (await r.json()) as any[]; + const certs = (rows || []).map((row) => ({ + category: row.certification || row.classification || "?", + expiration: row.certification_expiration_date || row.expiration_date, + type: row.business_type || row.type, + })); + const brief: DiversityCertBrief = { + source: "chicago_dps_economic_inclusion", + fetched_at: now, + searched_name: name, + status: certs.length > 0 ? "ok" : "no_match", + certifications: certs, + source_url: `https://data.cityofchicago.org/resource/${DIVERSITY_DATASET}.json`, + }; + await cacheSet(normalizeEntityName(name), "diversity", brief); + return brief; + } catch (e) { + return { + source: "chicago_dps_economic_inclusion", + fetched_at: now, + searched_name: name, + status: "needs_setup", + reason: `dataset access error: ${(e as Error).message}`, + certifications: [], + error: (e as Error).message, + }; + } +} + +// ─── News mentions via Google News RSS ───────────────────────────── +// news.google.com/rss/search?q={name} — free, no auth. +// Returns RSS XML; parse entries for title/link/pubDate. +export async function fetchNewsMentions(name: string): Promise { + const now = new Date().toISOString(); + const cached = await cacheGet(normalizeEntityName(name), "news"); + if (cached) return cached; + if (!name || name.length < 5) { + return { + source: "google_news_rss", + fetched_at: now, + searched_name: name, + status: "no_match", + total_mentions: 0, + recent_headlines: [], + }; + } + const q = encodeURIComponent(`"${name}"`); + const url = `https://news.google.com/rss/search?q=${q}&hl=en-US&gl=US&ceid=US:en`; + try { + const r = await fetch(url, { + headers: { "User-Agent": "Mozilla/5.0 lakehouse-copilot" }, + signal: AbortSignal.timeout(10000), + }); + if (!r.ok) throw new Error(`news HTTP ${r.status}`); + const xml = await r.text(); + const items = [...xml.matchAll(/([\s\S]*?)<\/item>/g)]; + const headlines = items.slice(0, 5).map((m) => { + const inner = m[1]; + const get = (tag: string) => { + const re = new RegExp(`<${tag}[^>]*>([\\s\\S]*?)<\\/${tag}>`); + const mm = inner.match(re); + if (!mm) return ""; + return mm[1].replace(//g, "").trim(); + }; + const title = get("title"); + const link = get("link"); + const pubDate = get("pubDate"); + const sourceMatch = inner.match(/]*>([\s\S]*?)<\/source>/); + const src = sourceMatch ? sourceMatch[1].replace(//g, "").trim() : ""; + return { title, source: src, date: pubDate, url: link }; + }).filter((h) => h.title); + const brief: NewsBrief = { + source: "google_news_rss", + fetched_at: now, + searched_name: name, + status: headlines.length > 0 ? "ok" : "no_match", + total_mentions: items.length, + recent_headlines: headlines, + }; + // 6h TTL — news cycles fast + await cacheSet(normalizeEntityName(name), "news", brief, 6 * 60 * 60 * 1000); + return brief; + } catch (e) { + return { + source: "google_news_rss", + fetched_at: now, + searched_name: name, + status: "error", + total_mentions: 0, + recent_headlines: [], + error: (e as Error).message, + }; + } +} + +// ─── BLS construction employment (Chicago MSA) ───────────────────── +// Single shared cache keyed on the series ID since it's macro context. +const BLS_CHI_CONSTRUCTION_SERIES = "SMU17169802000000001"; +let _blsCache: BlsTrendBrief | null = null; +let _blsCacheAt = 0; +const BLS_CACHE_TTL_MS = 6 * 60 * 60 * 1000; + +export async function fetchBlsConstructionTrend(): Promise { + if (_blsCache && Date.now() - _blsCacheAt < BLS_CACHE_TTL_MS) return _blsCache; + const now = new Date().toISOString(); + const thisYear = new Date().getFullYear(); + try { + const res = await fetch("https://api.bls.gov/publicAPI/v1/timeseries/data/", { + method: "POST", + headers: { "Content-Type": "application/json", "User-Agent": UA }, + body: JSON.stringify({ + seriesid: [BLS_CHI_CONSTRUCTION_SERIES], + startyear: String(thisYear - 2), + endyear: String(thisYear), + }), + signal: AbortSignal.timeout(12000), + }); + if (!res.ok) throw new Error(`bls HTTP ${res.status}`); + const j = (await res.json()) as any; + const series = j?.Results?.series?.[0]; + const data = (series?.data || []) as Array<{ year: string; period: string; value: string }>; + if (data.length === 0) throw new Error("no data"); + const obs = data.map((d) => ({ + period: `${d.year}-${d.period}`, + value: parseFloat(d.value), + })); + // BLS returns most-recent-first + const latest = obs[0]; + const monthAgo = obs[1]; + const yearAgo = obs[12]; + const mom = monthAgo ? ((latest.value - monthAgo.value) / monthAgo.value) * 100 : null; + const yoy = yearAgo ? ((latest.value - yearAgo.value) / yearAgo.value) * 100 : null; + const trend: BlsTrendBrief["trend"] = + yoy === null ? "unknown" : + yoy > 1.5 ? "growing" : + yoy < -1.5 ? "declining" : "stable"; + const brief: BlsTrendBrief = { + source: "bls.gov", + fetched_at: now, + series_id: BLS_CHI_CONSTRUCTION_SERIES, + status: "ok", + latest, + yoy_change_pct: yoy, + mom_change_pct: mom, + recent: obs.slice(0, 6), + trend, + }; + _blsCache = brief; + _blsCacheAt = Date.now(); + return brief; + } catch (e) { + return { + source: "bls.gov", + fetched_at: now, + series_id: BLS_CHI_CONSTRUCTION_SERIES, + status: "error", + latest: null, + yoy_change_pct: null, + mom_change_pct: null, + recent: [], + trend: "unknown", + error: (e as Error).message, + }; + } +} + +// ─── News sentiment scoring ──────────────────────────────────────── +// Run this over the existing NewsBrief headlines. Cheap keyword-based +// classifier — not LLM-grade, but catches the obvious "lawsuit / +// fraud / OSHA fine" vs "awarded / expansion / wins" patterns. +const SENTIMENT_NEG = [ + "lawsuit","sued","fraud","fines?","fined","penalty","penalties", + "violations?","investigation","investigated","indicted","charged", + "guilty","arrest","arrested","bankrupt(cy)?","insolvenc?y", + "debt","default","fired","layoffs?","fired","quit","resigned", + "scandal","whistleblower","theft","embezzl","kickback","bribe", + "complaint","grievanc","strike","walkout","picket","accident", + "death","fatal","killed","injur","hospitaliz","amputation","fall", + "collapse","collapsed","fire","explosion","shut.?down","stop.?work", + "delay","delays","behind.?schedule","over.?budget","cancel(l?)ed", + "subpoena","audit","misconduct","harass","discriminat","retaliat", + "evict","foreclos","levied", +]; +const SENTIMENT_POS = [ + "award(?:ed|s)","won","wins","wins?\\b","top","best","gain(?:ed|s)?", + "expand(?:ed|s|ing)?","expansion","contract","selected","chosen", + "partnership","invest(?:ed|s|ing|ment)?","launch","launched", + "milestone","completed?","record","strong","grew","grow(?:th|ing)", + "promoted","hired","appoint(?:ed|s)?","ribbon","groundbreaking", + "innovation","new\\s+headquarters","achievement","recogni[zs]ed", + "leader(?:ship)?","sustainab","green","clean.?energy", +]; + +const NEG_RE = new RegExp("\\b(" + SENTIMENT_NEG.join("|") + ")\\b", "i"); +const POS_RE = new RegExp("\\b(" + SENTIMENT_POS.join("|") + ")\\b", "i"); + +export function scoreNewsSentiment(news: NewsBrief): NewsSentiment { + const headlines = news.recent_headlines || []; + let pos = 0, neg = 0; + const flagged: NewsSentiment["flagged_headlines"] = []; + for (const h of headlines) { + const t = h.title || ""; + const negMatches: string[] = []; + const posMatches: string[] = []; + let m; + const nReg = new RegExp(NEG_RE, "gi"); + while ((m = nReg.exec(t)) !== null) { + if (negMatches.length < 3) negMatches.push(m[1]); + if (negMatches.length > 5) break; + } + const pReg = new RegExp(POS_RE, "gi"); + while ((m = pReg.exec(t)) !== null) { + if (posMatches.length < 3) posMatches.push(m[1]); + if (posMatches.length > 5) break; + } + if (negMatches.length > posMatches.length) { + neg++; + flagged.push({ title: t, polarity: "neg", reasons: negMatches }); + } else if (posMatches.length > negMatches.length) { + pos++; + flagged.push({ title: t, polarity: "pos", reasons: posMatches }); + } + } + const total = headlines.length || 1; + const score = (pos - neg) / total; + return { + score: Math.max(-1, Math.min(1, score)), + positive: pos, + negative: neg, + neutral: total - pos - neg, + flagged_headlines: flagged.slice(0, 5), + }; +} + +// ─── Placeholder fetchers for sources that need scraping work ────── +export async function fetchOshaSirBrief(name: string): Promise { + return { + source: "osha.gov/severeinjury", + fetched_at: new Date().toISOString(), + searched_name: name, + status: "needs_setup", + reason: + "OSHA Severe Injury Reports CSV is bot-protected at dol.gov (Imperva challenge). Path forward: (a) human-mediated periodic CSV download into /data/_entity_cache/sir.csv, then local lookup, or (b) browser-automation scraper with CAPTCHA handling.", + }; +} + +export async function fetchLiensBrief(address: string): Promise { + // Cook County Recorder of Deeds merged into Cook County Clerk in 2020. + // Public search portal: crs.cookcountyclerkil.gov (HTML form, JSESSIONID-based). + // Resource catalog at datacatalog.cookcountyil.gov returns 404 on most lien + // dataset IDs (xfev-8smz UCC liens specifically, also xrzj-c8ez code violations). + // Path: HTML scrape of crs.cookcountyclerkil.gov with PIN→search→paginate. + // Queued — needs ~150 LOC of form-state parsing. + return { + source: "cook_county_recorder", + fetched_at: new Date().toISOString(), + searched_address: address, + status: "needs_setup", + reason: + "Cook County Clerk recordings portal (crs.cookcountyclerkil.gov) needs JSESSIONID + form POST scrape. Socrata catalog UCC dataset (xfev-8smz) returns 404 on resource endpoint. Queued for next batch.", + }; +} + +// ─── NLRB cases (real scraper) ───────────────────────────────────── +// Public search at apps.nlrb.gov/eservice/dailyclosed.aspx and +// nlrb.gov/search/case. The /search/case page returns plain HTML. +export async function fetchNlrbBriefReal(name: string): Promise { + const now = new Date().toISOString(); + const cached = await cacheGet(normalizeEntityName(name), "nlrb"); + if (cached) return cached; + const q = encodeURIComponent(name); + const url = `https://www.nlrb.gov/search/case?search_term=${q}`; + try { + const r = await fetch(url, { + headers: { "User-Agent": UA }, + signal: AbortSignal.timeout(12000), + }); + if (!r.ok) throw new Error(`nlrb HTTP ${r.status}`); + const html = await r.text(); + // Cases appear in result blocks — count occurrences of case-detail links + const caseLinks = [...html.matchAll(/]+href="\/case\/([0-9A-Z\-]+)"/g)]; + const cases = caseLinks.slice(0, 5).map((m) => ({ + case_number: m[1], + case_type: "?", + date: "", + status: "", + })); + // Total count appears near top — pattern "X results" + const totalMatch = html.match(/(\d[\d,]*)\s+result/i); + const total = totalMatch ? parseInt(totalMatch[1].replace(/,/g, ""), 10) : caseLinks.length; + const brief: NlrbBrief = { + source: "nlrb.gov", + fetched_at: now, + searched_name: name, + status: cases.length > 0 ? "ok" : "no_match", + total_cases: total, + recent_cases: cases, + }; + await cacheSet(normalizeEntityName(name), "nlrb", brief, 24 * 60 * 60 * 1000); + return brief; + } catch (e) { + return { + source: "nlrb.gov", + fetched_at: now, + searched_name: name, + status: "needs_setup", + reason: `nlrb fetch error: ${(e as Error).message}`, + }; + } +} + +// ─── Project Index Score (matrix aggregation) ────────────────────── +// Walks every signal across owner/contractors and produces a single +// 0-100 index anchored at 50 neutral. Each contribution is auditable. +// Weights are documentation-grade — adjust over time as we learn which +// signals actually correlate with project success/delay. + +export function scoreProject( + property: PermitEntityBrief["property"], + entities: EntityBrief[], + macro: BlsTrendBrief | null = null, +): ProjectIndexScore { + const cs: SignalContribution[] = []; + let partial = false; + + // Macro: Chicago construction employment YoY. Affects every permit + // proportionally — labor-shrinking market raises filling difficulty, + // labor-growing market signals confidence. + if (macro?.status === "ok" && macro.yoy_change_pct !== null) { + const w = 3; + const yoy = macro.yoy_change_pct; + const dir = yoy > 1.5 ? 1 : yoy < -1.5 ? -1 : 0; + if (dir !== 0) { + cs.push({ + signal: "macro_chi_construction_employment", + weight: w, + direction: dir, + raw: yoy, + contribution: dir * w, + note: `Chicago MSA construction employment ${yoy >= 0 ? "+" : ""}${yoy.toFixed(1)}% YoY (BLS) — labor market ${macro.trend}`, + }); + } + } + + // ─── Property-level signals ─── + if (property.violations?.status === "ok") { + const v = property.violations; + if (v.stop_work_orders > 0) { + const w = 10; + cs.push({ + signal: "stop_work_orders", + weight: w, + direction: -1, + raw: v.stop_work_orders, + contribution: -w * Math.min(v.stop_work_orders, 3), + note: `${v.stop_work_orders} stop-work order${v.stop_work_orders === 1 ? "" : "s"} on this property`, + }); + } + if (v.open_violations > 0) { + const w = 6; + cs.push({ + signal: "open_property_violations", + weight: w, + direction: -1, + raw: v.open_violations, + contribution: -w * Math.min(v.open_violations, 4) / 2, + note: `${v.open_violations} open building violation${v.open_violations === 1 ? "" : "s"} at the address`, + }); + } else if (v.total_violations === 0) { + const w = 3; + cs.push({ + signal: "property_clean", + weight: w, + direction: 1, + raw: 0, + contribution: w, + note: "No building violations on record", + }); + } + } + + // TIF district = public-subsidy zone. Project has financial backing + // from the city's tax-increment financing pot. Mild positive signal + // because public oversight + structured funding usually means the + // project survives funding gaps. Not a blanket good — TIFs do fail — + // but worth noting. + if (property.site_context?.in_tif_district) { + const w = 4; + cs.push({ + signal: "in_tif_district", + weight: w, + direction: 1, + raw: property.site_context.tif_district_name, + contribution: w, + note: `Site is inside TIF "${property.site_context.tif_district_name}" — public tax-increment financing backing`, + }); + } + // Transit access — workers can commute via L without parking. + // <800m is "right at a station," <1500m is "walkable," beyond that + // crews are car/shuttle dependent. + if (property.site_context?.nearest_cta_distance_m !== undefined) { + const m = property.site_context.nearest_cta_distance_m; + if (m <= 800) { + const w = 2; + cs.push({ + signal: "transit_access_strong", + weight: w, + direction: 1, + raw: m, + contribution: w, + note: `Site is ${m}m from CTA ${property.site_context.nearest_cta_station} (${property.site_context.nearest_cta_lines}) — workers can commute by L`, + }); + } else if (m <= 1500) { + // Neutral — walkable but not adjacent + } else { + const w = 1; + cs.push({ + signal: "transit_access_weak", + weight: w, + direction: -1, + raw: m, + contribution: -w, + note: `Site is ${(m / 1000).toFixed(1)}km from nearest CTA stop — crew transport burden`, + }); + } + } + + // Labor competition — many active permits within 0.5mi means crews + // are already booked elsewhere. >5 permits in 90d is "saturated." + if ( + property.site_context?.nearby_permits_90d !== undefined && + property.site_context.nearby_permits_90d > 5 + ) { + const n = property.site_context.nearby_permits_90d; + const w = 3; + cs.push({ + signal: "labor_competition_high", + weight: w, + direction: -1, + raw: n, + contribution: -Math.min(w, n / 5), + note: `${n} other permits active within 0.5mi (last 90d) — crew competition`, + }); + } + + if (property.site_context?.is_landmark) { + const w = 3; + cs.push({ + signal: "landmark_status", + weight: w, + direction: -1, + raw: property.site_context.landmark_name, + contribution: -w, + note: `Landmark designation${property.site_context.landmark_name ? ` (${property.site_context.landmark_name})` : ""} — preservation review extends timeline`, + }); + } + + if (property.owner?.status === "ok") { + if ( + property.owner.mailing_state && + property.owner.mailing_state !== "IL" + ) { + const w = 2; + cs.push({ + signal: "owner_out_of_state", + weight: w, + direction: -1, + raw: property.owner.mailing_state, + contribution: -w, + note: `Owner mails out of state (${property.owner.mailing_state}) — slower local accountability`, + }); + } else { + const w = 2; + cs.push({ + signal: "owner_local", + weight: w, + direction: 1, + raw: "IL", + contribution: w, + note: "Owner mails locally — faster accountability", + }); + } + } + + // ─── Per-entity signals ─── + for (const e of entities) { + const tag = `[${e.display_name}]`; + + if (e.osha?.status === "ok") { + const o = e.osha; + if (o.inspection_count > 10) { + const w = 6; + cs.push({ + signal: "osha_high_inspection_count", + weight: w, + direction: -1, + raw: o.inspection_count, + contribution: -w, + note: `${tag} ${o.inspection_count} OSHA inspections nationally`, + }); + } else if (o.inspection_count === 0) { + const w = 3; + cs.push({ + signal: "osha_clean", + weight: w, + direction: 1, + raw: 0, + contribution: w, + note: `${tag} no OSHA inspections on record`, + }); + } + if (o.most_recent_date) { + const ageDays = (Date.now() - Date.parse(o.most_recent_date)) / 86400000; + if (ageDays < 90) { + const w = 7; + cs.push({ + signal: "osha_recent_action", + weight: w, + direction: -1, + raw: o.most_recent_date, + contribution: -w, + note: `${tag} OSHA inspection ${Math.round(ageDays)}d ago`, + }); + } + } + } else if (e.osha?.status === "error") { + partial = true; + } + + if (e.history?.status === "ok") { + if (e.history.trend === "new") { + const w = 8; + cs.push({ + signal: "new_entity", + weight: w, + direction: -1, + raw: e.history.permits_historical_total, + contribution: -w, + note: `${tag} only ${e.history.permits_historical_total} permits ever — possible LLC-shuffle signature`, + }); + } else if (e.history.trend === "growing") { + const w = 3; + cs.push({ + signal: "activity_growing", + weight: w, + direction: 1, + raw: e.history.permits_last_180d, + contribution: w, + note: `${tag} growing Chicago footprint (${e.history.permits_last_180d} in 180d)`, + }); + } else if (e.history.trend === "declining") { + const w = 4; + cs.push({ + signal: "activity_declining", + weight: w, + direction: -1, + raw: e.history.permits_last_180d, + contribution: -w, + note: `${tag} declining Chicago footprint`, + }); + } + } + + if (e.federal?.status === "ok" && e.federal.total_awards_value > 0) { + const fedM = e.federal.total_awards_value / 1e6; + const w = 5; + cs.push({ + signal: "federal_contract_history", + weight: w, + direction: 1, + raw: e.federal.total_awards_value, + contribution: Math.min(w, Math.log10(fedM + 1) * 2), + note: `${tag} $${fedM.toFixed(0)}M federal contracts — vetted compliance`, + }); + } + + // SVEP — the heaviest-weight negative signal we have. OSHA placed + // this contractor in the Severe Violator Enforcement Program after + // willful or repeat violations. Real money + worker safety risk. + if (e.svep?.flagged) { + const w = 10; + cs.push({ + signal: "osha_svep_listed", + weight: w, + direction: -1, + raw: e.svep.matched_entries.map((m) => m.name).join(" | "), + contribution: -w * 2, + note: `${tag} OSHA SVEP-listed (Severe Violator Enforcement Program) — formally flagged worst-actor`, + }); + } + + if (e.parent_link?.status === "ok" && e.parent_link.parent_ticker) { + const w = 3; + cs.push({ + signal: "public_parent_chain", + weight: w, + direction: 1, + raw: e.parent_link.parent_ticker, + contribution: w, + note: `${tag} traceable to public parent ${e.parent_link.parent_ticker}`, + }); + } + + // News sentiment over the recent headlines. Negative bias means + // the press cycle around this firm is dominated by lawsuits / + // accidents / fines. + if (e.news_sentiment) { + const ns = e.news_sentiment; + if (ns.negative > ns.positive && ns.negative > 0) { + const w = 5; + cs.push({ + signal: "news_sentiment_negative", + weight: w, + direction: -1, + raw: ns.score, + contribution: -Math.min(w, ns.negative * 1.5), + note: `${tag} negative news cycle (${ns.negative} flagged: ${ns.flagged_headlines.filter((h) => h.polarity === "neg").slice(0, 2).map((h) => h.reasons.join("/")).join(", ")})`, + }); + } else if (ns.positive > ns.negative && ns.positive >= 2) { + const w = 2; + cs.push({ + signal: "news_sentiment_positive", + weight: w, + direction: 1, + raw: ns.score, + contribution: w, + note: `${tag} positive news cycle (${ns.positive} flagged on awards/expansion)`, + }); + } + } + + if (e.debarment?.status === "needs_setup") partial = true; + if (e.nlrb?.status === "needs_setup") partial = true; + if (e.ilsos?.status === "source_unreachable") partial = true; + } + + // Net score from 50 baseline. Clamp 0..100. + const net = cs.reduce((a, c) => a + c.contribution, 0); + const score = Math.max(0, Math.min(100, Math.round(50 + net))); + const band: ProjectIndexScore["band"] = + score < 30 ? "red" : + score < 45 ? "amber" : + score <= 55 ? "neutral" : + score <= 75 ? "green" : "strong"; + + // Sort contributions by absolute weight, biggest movers first + cs.sort((a, b) => Math.abs(b.contribution) - Math.abs(a.contribution)); + + return { score, band, contributions: cs, partial }; +} + +// ─── Public: build a brief for a single permit ───────────────────── + +type PermitInput = { + permit_id?: string; + address?: string; + work_type?: string; + contact_1_name?: string; + contact_1_type?: string; + contact_2_name?: string; + contact_2_type?: string; +}; + +export async function buildPermitBrief( + permit: PermitInput, + opts: { + fetchIlsos?: boolean; + fetchOsha?: boolean; + fetchTicker?: boolean; + fetchHistory?: boolean; + } = {}, +): Promise { + const wantOsha = opts.fetchOsha !== false; + const wantIlsos = opts.fetchIlsos !== false; + const wantTicker = opts.fetchTicker !== false; + const wantHistory = opts.fetchHistory !== false; + + const rawContacts: { name: string; role: string }[] = []; + if (permit.contact_1_name?.trim()) { + rawContacts.push({ + name: permit.contact_1_name.trim(), + role: permit.contact_1_type || "CONTACT_1", + }); + } + if (permit.contact_2_name?.trim()) { + rawContacts.push({ + name: permit.contact_2_name.trim(), + role: permit.contact_2_type || "CONTACT_2", + }); + } + // Dedupe by normalized name — same firm often appears as both contacts. + const seen = new Set(); + const contacts: { name: string; role: string }[] = []; + for (const c of rawContacts) { + const key = normalizeEntityName(c.name); + if (!key || seen.has(key)) continue; + seen.add(key); + contacts.push(c); + } + + // Fetch all signals in parallel per contact (4 concurrent outbound + // requests max; OSHA gate serializes its own, others go straight). + const entities: EntityBrief[] = []; + for (const c of contacts) { + // 11 parallel calls per entity. OSHA throttles internally, the rest + // fan straight out. ~3-5s end-to-end per entity on cold cache. + const [osha, ilsos, stock, history, debarment, parent_link, federal, nlrb, diversity, news, osha_sir, svep] = + await Promise.all([ + wantOsha ? fetchOshaBrief(c.name) : Promise.resolve(null), + wantIlsos ? fetchIlsosBrief(c.name) : Promise.resolve(null), + wantTicker ? fetchTickerBrief(c.name) : Promise.resolve(null), + wantHistory ? fetchContractorHistory(c.name) : Promise.resolve(null), + fetchDebarmentBrief(c.name), + fetchParentLink(c.name), + fetchFederalContracts(c.name), + fetchNlrbBriefReal(c.name), + fetchDiversityCerts(c.name), + fetchNewsMentions(c.name), + fetchOshaSirBrief(c.name), + fetchSvepBrief(c.name), + ]); + const news_sentiment = news ? scoreNewsSentiment(news) : null; + entities.push({ + key: normalizeEntityName(c.name), + display_name: c.name, + role: c.role, + ticker: entityTicker(c.name), + osha, + ilsos, + stock, + history, + debarment, + parent_link, + federal, + nlrb, + diversity, + news, + news_sentiment, + osha_sir, + svep, + risk: scoreEntity(osha, ilsos, history), + }); + } + + // Flatten the ticker portfolio across entities, dedupe by ticker + // symbol, sort by cap_proxy descending (most-profitable related + // company first — Project Index "most-profitable beneficiary first"). + const tickersByKey = new Map(); + for (const e of entities) { + if (e.stock?.status === "ok" && e.stock.ticker) { + tickersByKey.set(e.stock.ticker, e.stock); + } + } + const tickers = [...tickersByKey.values()].sort( + (a, b) => (b.cap_proxy ?? 0) - (a.cap_proxy ?? 0), + ); + + // Property-side signals: owner, violations, site context, liens placeholder. + const [owner, violations, site_context, liens] = await Promise.all([ + permit.address ? fetchPropertyOwner(permit.address) : Promise.resolve(null), + permit.address ? fetchPropertyViolations(permit.address) : Promise.resolve(null), + permit.address ? fetchSiteContext(permit.address) : Promise.resolve(null), + permit.address ? fetchLiensBrief(permit.address) : Promise.resolve(null), + ]); + const union = unionsForWorkType(permit.work_type); + + const property = { + address: permit.address || "", + ticker: addressTicker(permit.address || ""), + owner, + violations, + union, + site_context, + liens, + }; + // Macro context (BLS Chicago construction employment) — single fetch + // per process-cache window. Run in parallel with the per-entity calls. + const macro = await fetchBlsConstructionTrend(); + const index_score = scoreProject(property, entities, macro); + + return { + permit_id: permit.permit_id || "", + property, + entities, + tickers, + macro, + index_score, + // Roadmap: now-shorter list as more sources go live. Items pending + // for batch 4-5: SEC Exhibit 21, debarment lists, USASpending, + // mechanics liens, NLRB+OSHA-Severe-Injury, contractor drill-down. + roadmap: [ + "SEC Exhibit 21 full index — walk every 10-K Exhibit 21 to build a complete subsidiary→parent tree (curated map covers top 12 GCs today)", + "SAM.gov v3 API — register at sam.gov for free key; OR human-mediated CSV download into local cache", + "IDOL prevailing-wage debarment list — HTML/PDF scrape from labor.illinois.gov", + "OSHA Severe Injury Reports — bot-protected at dol.gov; needs human-mediated CSV refresh OR browser-automation", + "Cook County Recorder mechanics liens — HTML scrape with PIN-based pagination", + "TIF polygon containment check — needs lat/long-in-polygon (turf.js) to confirm in_tif_district", + "Community area name table — static lookup from Chicago number → name", + "BLS QCEW + FRED commodity prices — macroeconomic context", + "PACER bankruptcy + civil litigation — federal court filings (free for opinion search)", + "Cook County Treasurer — property tax delinquency", + "NYC DOB + LA LADBS — cross-city contractor footprint", + "LLC-shuffle detector — principal+agent fingerprint matching (blocked on ILSOS access)", + ], + generated_at: new Date().toISOString(), + }; +}