]*>([\s\S]*?)<\/tr>/gi)];
+ for (const rm of rows) {
+ const row = rm[1];
+ if (!row.includes("inspection_detail")) continue;
+ const tds = [...row.matchAll(/]*>([\s\S]*?)<\/td>/gi)].map((t) =>
+ t[1].replace(/<[^>]+>/g, "").replace(/ /g, "").trim(),
+ );
+ const idMatch = row.match(/establishment\.inspection_detail\?id=([\d.]+)/);
+ if (!idMatch) continue;
+ const id = idMatch[1];
+ inspections.push({
+ id,
+ date: tds[3] || "",
+ state: tds[5] || "",
+ type: tds[6] || "",
+ scope: tds[7] || "",
+ naics: tds[9] || "",
+ establishment: tds[tds.length - 1] || "",
+ detail_url: `https://www.osha.gov/ords/imis/establishment.inspection_detail?id=${id}`,
+ });
+ }
+ return inspections;
+}
+
+export async function fetchOshaBrief(name: string, useCache = true): Promise {
+ const normalized = normalizeEntityName(name);
+ const now = new Date().toISOString();
+ if (!normalized) {
+ return {
+ source: "osha",
+ fetched_at: now,
+ searched_name: name,
+ normalized_name: "",
+ source_url: "",
+ status: "no_match",
+ inspection_count: 0,
+ most_recent_date: null,
+ recent_inspections: [],
+ states_seen: [],
+ };
+ }
+ if (useCache) {
+ const hit = await cacheGet(normalized, "osha");
+ if (hit) return hit;
+ }
+ const u = `https://www.osha.gov/ords/imis/establishment.search?p_logger=1&establishment=${encodeURIComponent(name)}&State=all&p_case=all`;
+ try {
+ const html = await fetchOshaHTML(name);
+ const inspections = parseOshaInspections(html);
+ inspections.sort((a, b) => {
+ const pa = Date.parse(a.date) || 0;
+ const pb = Date.parse(b.date) || 0;
+ return pb - pa;
+ });
+ const states_seen = [...new Set(inspections.map((i) => i.state).filter(Boolean))];
+ const brief: OshaBrief = {
+ source: "osha",
+ fetched_at: now,
+ searched_name: name,
+ normalized_name: normalized,
+ source_url: u,
+ status: inspections.length > 0 ? "ok" : "no_match",
+ inspection_count: inspections.length,
+ most_recent_date: inspections[0]?.date ?? null,
+ recent_inspections: inspections.slice(0, 5),
+ states_seen,
+ };
+ await cacheSet(normalized, "osha", brief);
+ return brief;
+ } catch (e) {
+ return {
+ source: "osha",
+ fetched_at: now,
+ searched_name: name,
+ normalized_name: normalized,
+ source_url: u,
+ status: "error",
+ inspection_count: 0,
+ most_recent_date: null,
+ recent_inspections: [],
+ states_seen: [],
+ error: (e as Error).message,
+ };
+ }
+}
+
+// ─── ILSOS placeholder ────────────────────────────────────────────
+
+export async function fetchIlsosBrief(name: string): Promise {
+ const normalized = normalizeEntityName(name);
+ const cached = await cacheGet(normalized, "ilsos");
+ if (cached) return cached;
+ const brief: IlsosBrief = {
+ source: "ilsos",
+ fetched_at: new Date().toISOString(),
+ searched_name: name,
+ status: "source_unreachable",
+ reason:
+ "Illinois SoS apps.ilsos.gov blocks our datacenter ASN. Pending: VPN-routed fetch, OpenCorporates API token (~200/mo free), or paid IL bulk feed.",
+ };
+ return brief;
+}
+
+// ─── SEC EDGAR ticker lookup ──────────────────────────────────────
+// The canonical free source. A single JSON file at sec.gov/files/
+// company_tickers.json maps every US-listed ticker → CIK → company
+// name (~10K issuers). We fetch it at most once per 24h and keep it
+// in memory + on disk.
+
+const SEC_TICKERS_URL = "https://www.sec.gov/files/company_tickers.json";
+const SEC_TICKERS_CACHE = join(CACHE_DIR, "sec_company_tickers.json");
+const SEC_TICKERS_TTL_MS = 24 * 60 * 60 * 1000;
+
+type SecTickerRow = { cik_str: number; ticker: string; title: string };
+let secTickersIdx: {
+ rows: SecTickerRow[];
+ byNormalized: Map;
+ refreshed_at: number;
+} | null = null;
+
+async function ensureSecTickerIndex(): Promise {
+ const fresh = secTickersIdx && Date.now() - secTickersIdx.refreshed_at < SEC_TICKERS_TTL_MS;
+ if (fresh) return secTickersIdx;
+
+ let raw: Record | null = null;
+ // Disk first
+ if (existsSync(SEC_TICKERS_CACHE)) {
+ try {
+ const st = await readFile(SEC_TICKERS_CACHE, "utf-8");
+ const parsed = JSON.parse(st);
+ if (parsed?._fetched_at && Date.now() - parsed._fetched_at < SEC_TICKERS_TTL_MS) {
+ raw = parsed.rows;
+ }
+ } catch {/* fall through */}
+ }
+ // Refresh from SEC
+ if (!raw) {
+ try {
+ const res = await fetch(SEC_TICKERS_URL, {
+ headers: { "User-Agent": UA, "Accept": "application/json" },
+ signal: AbortSignal.timeout(20000),
+ });
+ if (res.ok) {
+ raw = await res.json();
+ await writeFile(
+ SEC_TICKERS_CACHE,
+ JSON.stringify({ _fetched_at: Date.now(), rows: raw }),
+ );
+ }
+ } catch (e) {
+ console.warn("[entity] sec tickers fetch failed:", (e as Error).message);
+ }
+ }
+ if (!raw) return secTickersIdx; // may still be null
+
+ const rows = Object.values(raw).filter((r) => r && r.ticker && r.title);
+ const byNormalized = new Map();
+ for (const r of rows) {
+ const k = normalizeEntityName(r.title);
+ if (!k) continue;
+ const arr = byNormalized.get(k) || [];
+ arr.push(r);
+ byNormalized.set(k, arr);
+ }
+ secTickersIdx = { rows, byNormalized, refreshed_at: Date.now() };
+ return secTickersIdx;
+}
+
+// Name → ticker resolution. Tries exact-normalized first, then
+// word-overlap match. Hard rule: at least one shared word of 4+ chars
+// between the searched name and the candidate title. Prevents spurious
+// hits like "POREMBA DENISE" matching "NI Holdings" because "DENISE"
+// contains "NI". Also skips names that look like individual persons
+// (two capitalized words, no entity suffix) — they're never issuers.
+async function resolveSecTicker(name: string): Promise {
+ const idx = await ensureSecTickerIndex();
+ if (!idx) return null;
+ const key = normalizeEntityName(name);
+ if (!key) return null;
+
+ // Skip obvious individual names — "LAST, FIRST M" or "FIRST LAST".
+ // Two signals:
+ // (1) comma in name → "LAST, FIRST" form, human.
+ // (2) exactly 2 word-tokens AND no entity marker → "FIRST LAST"
+ // form. A company almost always has ≥3 tokens (X Y Inc) or
+ // an entity marker; two-word entities like "APPLE INC" get
+ // caught by the marker, not by looksIndividual.
+ const upper = name.toUpperCase();
+ const hasEntityMarker =
+ /\b(LLC|L\s*L\s*C|LLP|INC|INC\.|CORP|CORPORATION|COMPANY|CO|CO\.|LTD|LIMITED|GROUP|HOLDINGS|ENTERPRISES|AUTHORITY|ASSOCIATION|TRUST|BANK|PARTNERS|INDUSTRIES|CONSTRUCTION|ELECTRIC|MECHANICAL|CONTRACTING|PROPERTIES|REALTY|DEVELOPMENT|ENGINEERING|PLUMBING|ROOFING|SERVICES|SOLUTIONS|SYSTEMS|TECHNOLOGIES|LOGISTICS|RESOURCES)\b/.test(
+ upper,
+ );
+ const tokens = upper.split(/\s+/).filter(Boolean);
+ const hasComma = /,/.test(name);
+ const looksIndividual = (hasComma && !hasEntityMarker) || (tokens.length === 2 && !hasEntityMarker);
+ if (looksIndividual) return null;
+
+ // Skip obvious government / municipal entities — they're not US public
+ // issuers. If they were, their bond funds would surface separately.
+ const isGovernment =
+ /\b(AUTHORITY|MUNICIPAL|CITY OF|COUNTY|DISTRICT|BOARD OF EDUCATION|PUBLIC SCHOOLS|TRANSIT|HOUSING AUTHORITY|DEPARTMENT OF|BUREAU OF|PARK DISTRICT|WATER RECLAMATION)\b/i.test(
+ name,
+ );
+ if (isGovernment) return null;
+
+ // Exact normalized match — most reliable path.
+ const exact = idx.byNormalized.get(key);
+ if (exact && exact.length) return exact.sort((a, b) => a.ticker.length - b.ticker.length)[0];
+
+ // Word-overlap with stopwords filtered. "CONSTRUCTION", "COMPANY",
+ // "SERVICES" etc. are too generic to identify an issuer on their own
+ // — shared presence is meaningless. Require overlap on identifying
+ // words only.
+ const STOPWORDS = new Set([
+ "CONSTRUCTION","COMPANY","SERVICES","SERVICE","GROUP","SYSTEMS",
+ "SOLUTIONS","RESOURCES","TECHNOLOGIES","INDUSTRIES","PARTNERS",
+ "ELECTRIC","MECHANICAL","PLUMBING","ROOFING","DEVELOPMENT","REALTY",
+ "PROPERTIES","ENGINEERING","ASSOCIATES","INTERNATIONAL","NATIONAL",
+ "AMERICAN","GENERAL","AUTHORITY","HOLDINGS","ENTERPRISES","LOGISTICS",
+ "COMMUNICATIONS","FINANCIAL","CAPITAL","PROPERTY","RESIDENTIAL",
+ "COMMERCIAL","GLOBAL","UNITED","STATES","STATE","FEDERAL",
+ "MANAGEMENT","CONTRACTING","CONTRACTORS","BUILDERS","BUILDING",
+ ]);
+ const identifyingWords = (s: string) =>
+ new Set(
+ s
+ .split(" ")
+ .filter((w) => w.length >= 4 && !STOPWORDS.has(w)),
+ );
+ const keyId = identifyingWords(key);
+ if (keyId.size === 0) return null; // no non-stopword words to match on
+
+ const candidates: { row: SecTickerRow; overlap: number; k2: string }[] = [];
+ for (const r of idx.rows) {
+ const k2 = normalizeEntityName(r.title);
+ if (!k2) continue;
+ const k2Id = identifyingWords(k2);
+ let overlap = 0;
+ for (const w of k2Id) {
+ if (keyId.has(w)) overlap++;
+ }
+ // Require overlap ≥ 1 AND that overlap covers ≥ 50% of the shorter
+ // identifying-word set. Prevents "TARGET HOSPITALITY" from matching
+ // "TARGET CORP" on just the shared word "TARGET" when the sets
+ // are 1-vs-1; forces them to agree on substance.
+ // Fuzzy path requires overlap ≥ 2 non-stopword words. Single-word
+ // matches are only valid via the exact-normalized path above.
+ // Otherwise "POWER" (from POWER CONSTRUCTION) falsely matches
+ // POWER INTEGRATIONS and CAPITAL POWER CORP.
+ if (overlap < 2) continue;
+ const minIdSize = Math.min(keyId.size, k2Id.size);
+ if (overlap / minIdSize < 0.5) continue;
+ candidates.push({ row: r, overlap, k2 });
+ }
+ if (!candidates.length) return null;
+
+ candidates.sort((a, b) => {
+ if (b.overlap !== a.overlap) return b.overlap - a.overlap;
+ const la = Math.abs(a.k2.length - key.length);
+ const lb = Math.abs(b.k2.length - key.length);
+ if (la !== lb) return la - lb;
+ return a.row.ticker.length - b.row.ticker.length;
+ });
+ return candidates[0].row;
+}
+
+// Pull SIC + exchange for a CIK from data.sec.gov/submissions
+async function fetchSecProfile(cik: number | string): Promise<{
+ exchange?: string;
+ sic?: string;
+ sic_description?: string;
+}> {
+ const padded = String(cik).padStart(10, "0");
+ try {
+ const res = await fetch(`https://data.sec.gov/submissions/CIK${padded}.json`, {
+ headers: { "User-Agent": UA },
+ signal: AbortSignal.timeout(10000),
+ });
+ if (!res.ok) return {};
+ const j = await res.json();
+ return {
+ exchange: Array.isArray(j.exchanges) ? j.exchanges[0] : undefined,
+ sic: j.sic,
+ sic_description: j.sicDescription,
+ };
+ } catch {
+ return {};
+ }
+}
+
+// Stooq — free daily quote CSV. Format (column order from the response):
+// Symbol,Date,Time,Open,High,Low,Close,Volume
+async function fetchStooqQuote(ticker: string): Promise<{
+ price?: number;
+ price_date?: string;
+ open?: number;
+ high?: number;
+ low?: number;
+ volume?: number;
+} | null> {
+ try {
+ const res = await fetch(
+ `https://stooq.com/q/l/?s=${encodeURIComponent(ticker.toLowerCase())}.us&f=sd2t2ohlcv&h&e=csv`,
+ { headers: { "User-Agent": UA }, signal: AbortSignal.timeout(8000) },
+ );
+ if (!res.ok) return null;
+ const csv = await res.text();
+ const lines = csv.trim().split("\n");
+ if (lines.length < 2) return null;
+ const cols = lines[1].split(",");
+ // Stooq returns "N/D" literally for unknown symbols
+ if (cols.includes("N/D")) return null;
+ const [sym, date, _t, open, high, low, close, volume] = cols;
+ return {
+ price: parseFloat(close) || undefined,
+ price_date: date,
+ open: parseFloat(open) || undefined,
+ high: parseFloat(high) || undefined,
+ low: parseFloat(low) || undefined,
+ volume: parseInt(volume, 10) || undefined,
+ };
+ } catch {
+ return null;
+ }
+}
+
+export async function fetchTickerBrief(name: string): Promise {
+ const now = new Date().toISOString();
+ const cached = await cacheGet(normalizeEntityName(name), "ticker");
+ if (cached) return cached;
+ const hit = await resolveSecTicker(name);
+ if (!hit) {
+ const b: TickerBrief = {
+ source: "sec+stooq",
+ fetched_at: now,
+ searched_name: name,
+ status: "no_match",
+ };
+ // 7-day TTL on no_match — company could become public
+ await cacheSet(normalizeEntityName(name), "ticker", b, 7 * 24 * 60 * 60 * 1000);
+ return b;
+ }
+ const [profile, quote] = await Promise.all([
+ fetchSecProfile(hit.cik_str),
+ fetchStooqQuote(hit.ticker),
+ ]);
+ const cap_proxy =
+ quote?.price && quote?.volume ? quote.price * quote.volume : undefined;
+ const day_change_pct =
+ quote?.price && quote?.open && quote.open > 0
+ ? ((quote.price - quote.open) / quote.open) * 100
+ : undefined;
+ const brief: TickerBrief = {
+ source: "sec+stooq",
+ fetched_at: now,
+ searched_name: name,
+ status: "ok",
+ ticker: hit.ticker,
+ company_name: hit.title,
+ cik: String(hit.cik_str),
+ exchange: profile.exchange,
+ sic: profile.sic,
+ sic_description: profile.sic_description,
+ price: quote?.price,
+ price_date: quote?.price_date,
+ open: quote?.open,
+ high: quote?.high,
+ low: quote?.low,
+ volume: quote?.volume,
+ day_change_pct,
+ cap_proxy,
+ sec_url: `https://www.sec.gov/cgi-bin/browse-edgar?action=getcompany&CIK=${String(hit.cik_str).padStart(10, "0")}`,
+ stooq_url: `https://stooq.com/q/?s=${hit.ticker.toLowerCase()}.us`,
+ };
+ // 6-hour TTL on ok hits — prices go stale, but SEC metadata doesn't.
+ await cacheSet(normalizeEntityName(name), "ticker", brief, 6 * 60 * 60 * 1000);
+ return brief;
+}
+
+// ─── Contractor history ────────────────────────────────────────────
+// Queries the same Chicago Socrata permits dataset for prior activity.
+// Two time windows: last 180 days (recent velocity) and last 24 months
+// (longer-run baseline). All-time count gives tenure. Trend classifier
+// compares 180d rate to 24mo rate.
+
+export async function fetchContractorHistory(name: string): Promise {
+ const now = new Date().toISOString();
+ const cached = await cacheGet(normalizeEntityName(name), "history");
+ if (cached) return cached;
+ const today = new Date();
+ const d180 = new Date(today.getTime() - 180 * 86400000).toISOString().slice(0, 10);
+ const d24mo = new Date(today.getTime() - 730 * 86400000).toISOString().slice(0, 10);
+ const base = "https://data.cityofchicago.org/resource/ydr8-5enu.json";
+ // Escape single quotes in contractor name per Socrata rules.
+ // Socrata LIKE with leading % prefix is substring — we need this
+ // because "TURNER CONSTRUCTION" on a new permit should match against
+ // "TURNER CONSTRUCTION COMPANY" on historical ones. Guard against
+ // false-positive hits on very short names (< 6 chars).
+ const safeName = name.replace(/'/g, "''");
+ const match =
+ name.length >= 6
+ ? `upper(contact_1_name) LIKE upper('%${safeName}%')`
+ : `contact_1_name='${safeName}'`;
+ const where180 = encodeURIComponent(`${match} AND issue_date>'${d180}'`);
+ const where24mo = encodeURIComponent(`${match} AND issue_date>'${d24mo}'`);
+ const whereAll = encodeURIComponent(match);
+ try {
+ const [c180, c24, cAll, recent] = await Promise.all([
+ fetch(`${base}?$select=count(*)&$where=${where180}`).then((r) => r.json()),
+ fetch(`${base}?$select=count(*),sum(reported_cost)&$where=${where24mo}`).then((r) => r.json()),
+ fetch(`${base}?$select=count(*)&$where=${whereAll}`).then((r) => r.json()),
+ fetch(
+ // Pulled to 50 rows + lat/lng + permit id + description so the
+ // contractor profile heat map and timeline have something to plot.
+ `${base}?$select=id,issue_date,work_type,reported_cost,street_number,street_direction,street_name,latitude,longitude,work_description&$where=${whereAll}&$order=issue_date DESC&$limit=50`,
+ ).then((r) => r.json()),
+ ]);
+ const n180 = parseInt(c180?.[0]?.count || "0", 10);
+ const n24 = parseInt(c24?.[0]?.count || "0", 10);
+ const cost24 = parseFloat(c24?.[0]?.sum_reported_cost || "0");
+ const nAll = parseInt(cAll?.[0]?.count || "0", 10);
+
+ // Trend: compare 180d rate to prior 18mo rate.
+ // 180d = half a year; 24mo = 8 half-years. Prior half-years = (n24-n180)/7.
+ let trend: ContractorHistory["trend"] = "unknown";
+ if (nAll === 0) trend = "unknown";
+ else if (n24 <= 1 && nAll <= 3) trend = "new";
+ else {
+ const priorRate = (n24 - n180) / 7; // permits per 180d across prior 18mo
+ if (priorRate === 0 && n180 > 0) trend = "growing";
+ else if (priorRate > 0) {
+ const ratio = n180 / priorRate;
+ if (ratio > 1.5) trend = "growing";
+ else if (ratio < 0.5) trend = "declining";
+ else trend = "stable";
+ } else trend = "stable";
+ }
+
+ const brief: ContractorHistory = {
+ source: "chicago_socrata",
+ fetched_at: now,
+ searched_name: name,
+ status: "ok",
+ permits_last_180d: n180,
+ permits_last_24mo: n24,
+ permits_historical_total: nAll,
+ total_cost_last_24mo: cost24,
+ trend,
+ recent_permits: (recent || []).map((r: any) => ({
+ date: (r.issue_date || "").slice(0, 10),
+ work_type: r.work_type || "",
+ cost: parseFloat(r.reported_cost || 0),
+ address: `${r.street_number || ""} ${r.street_direction || ""} ${r.street_name || ""}`.trim().replace(/\s+/g, " "),
+ lat: r.latitude ? parseFloat(r.latitude) : undefined,
+ lng: r.longitude ? parseFloat(r.longitude) : undefined,
+ permit_id: r.id || undefined,
+ description: r.work_description || undefined,
+ })),
+ };
+ // 12-hour TTL — activity rarely changes more than once/day
+ await cacheSet(normalizeEntityName(name), "history", brief, 12 * 60 * 60 * 1000);
+ return brief;
+ } catch (e) {
+ return {
+ source: "chicago_socrata",
+ fetched_at: now,
+ searched_name: name,
+ status: "error",
+ permits_last_180d: 0,
+ permits_last_24mo: 0,
+ permits_historical_total: 0,
+ total_cost_last_24mo: 0,
+ trend: "unknown",
+ recent_permits: [],
+ error: (e as Error).message,
+ };
+ }
+}
+
+// ─── Cook County Assessor (property owner) ─────────────────────────
+// Free Socrata at datacatalog.cookcountyil.gov/resource/c49d-89sn.json.
+// Address normalization is gentle: keep number + direction + name; drop
+// suite/unit. Match using LIKE prefix because addresses on building
+// permits are often missing trailing details ("4809 N BROADWAY" vs
+// "4809 N BROADWAY UNIT 5" in the parcel record).
+
+export async function fetchPropertyOwner(address: string): Promise {
+ const now = new Date().toISOString();
+ const cleaned = address.trim().replace(/\s+/g, " ").toUpperCase();
+ const cached = await cacheGet(cleaned, "owner");
+ if (cached) return cached;
+ if (!cleaned || cleaned.length < 4) {
+ return {
+ source: "cook_county_assessor",
+ fetched_at: now,
+ searched_address: address,
+ status: "no_match",
+ };
+ }
+ const safe = cleaned.replace(/'/g, "''");
+ // Match the full address text as a prefix; LIKE upper(...) for case fold.
+ const where = encodeURIComponent(
+ `upper(property_address) like upper('${safe}%') AND property_city='CHICAGO'`,
+ );
+ const sourceUrl = `https://datacatalog.cookcountyil.gov/resource/c49d-89sn.json?$where=${where}&$limit=1`;
+ try {
+ const res = await fetch(sourceUrl, {
+ headers: { "User-Agent": UA },
+ signal: AbortSignal.timeout(10000),
+ });
+ if (!res.ok) throw new Error(`assessor HTTP ${res.status}`);
+ const rows = (await res.json()) as any[];
+ if (!rows || rows.length === 0) {
+ const empty: PropertyOwnerBrief = {
+ source: "cook_county_assessor",
+ fetched_at: now,
+ searched_address: address,
+ status: "no_match",
+ source_url: sourceUrl,
+ };
+ await cacheSet(cleaned, "owner", empty, 7 * 24 * 60 * 60 * 1000);
+ return empty;
+ }
+ const r = rows[0];
+ const brief: PropertyOwnerBrief = {
+ source: "cook_county_assessor",
+ fetched_at: now,
+ searched_address: address,
+ status: "ok",
+ pin: r.pin,
+ property_address: r.property_address,
+ property_zip: r.property_zip,
+ mailing_address: r.mailing_address,
+ mailing_city: r.mailing_city,
+ mailing_state: r.mailing_state,
+ mailing_zip: r.mailing_zip,
+ township: r.township_name,
+ ward: r.ward,
+ longitude: r.longitude,
+ latitude: r.latitude,
+ source_url: sourceUrl,
+ };
+ // Owner records change rarely — 30d cache.
+ await cacheSet(cleaned, "owner", brief);
+ return brief;
+ } catch (e) {
+ return {
+ source: "cook_county_assessor",
+ fetched_at: now,
+ searched_address: address,
+ status: "error",
+ error: (e as Error).message,
+ };
+ }
+}
+
+// ─── Chicago Building Violations ───────────────────────────────────
+// Dataset 22u3-xenr. Address-keyed. Status field tells us which are
+// open vs complied. Stop-work orders surface in violation_description.
+export async function fetchPropertyViolations(address: string): Promise {
+ const now = new Date().toISOString();
+ const cleaned = address.trim().replace(/\s+/g, " ").toUpperCase();
+ const cached = await cacheGet(cleaned, "violations");
+ if (cached) return cached;
+ if (!cleaned || cleaned.length < 6) {
+ return {
+ source: "chicago_building_violations",
+ fetched_at: now,
+ searched_address: address,
+ status: "error",
+ total_violations: 0,
+ open_violations: 0,
+ stop_work_orders: 0,
+ recent_violations: [],
+ error: "address too short",
+ };
+ }
+ const safe = cleaned.replace(/'/g, "''");
+ const where = encodeURIComponent(`upper(address) like upper('${safe}%')`);
+ const url = `https://data.cityofchicago.org/resource/22u3-xenr.json?$where=${where}&$order=violation_date DESC&$limit=50`;
+ try {
+ const res = await fetch(url, {
+ headers: { "User-Agent": UA },
+ signal: AbortSignal.timeout(10000),
+ });
+ if (!res.ok) throw new Error(`violations HTTP ${res.status}`);
+ const rows = (await res.json()) as any[];
+ const total = rows.length;
+ const open = rows.filter(
+ (r) => /OPEN|FAILED|NO ENTRY/i.test(r.violation_status || ""),
+ ).length;
+ const stopWork = rows.filter((r) =>
+ /STOP[ -]?WORK/i.test(`${r.violation_description || ""} ${r.violation_inspector_comments || ""}`),
+ ).length;
+ const recent = rows.slice(0, 5).map((r) => ({
+ date: (r.violation_date || "").slice(0, 10),
+ status: r.violation_status || "",
+ description: (r.violation_description || "").slice(0, 120),
+ department: r.department_bureau || "",
+ }));
+ const brief: PropertyViolationsBrief = {
+ source: "chicago_building_violations",
+ fetched_at: now,
+ searched_address: address,
+ status: "ok",
+ total_violations: total,
+ open_violations: open,
+ stop_work_orders: stopWork,
+ most_recent_date: rows[0]?.violation_date?.slice(0, 10),
+ recent_violations: recent,
+ };
+ await cacheSet(cleaned, "violations", brief, 12 * 60 * 60 * 1000);
+ return brief;
+ } catch (e) {
+ return {
+ source: "chicago_building_violations",
+ fetched_at: now,
+ searched_address: address,
+ status: "error",
+ total_violations: 0,
+ open_violations: 0,
+ stop_work_orders: 0,
+ recent_violations: [],
+ error: (e as Error).message,
+ };
+ }
+}
+
+// ─── Union lookup (static) ─────────────────────────────────────────
+// Map permit work_type → likely Chicago Local(s) + their training
+// centers (JATCs). Surfaces "every union that handles this contract"
+// per J's request. Static because Chicago union jurisdictions are
+// stable and the granularity is "typical assignment, not certainty
+// per project."
+
+const UNION_MAP: Record = {
+ ELECTRICAL: {
+ trade: "Electrical",
+ primary_locals: [
+ { name: "IBEW Local 134", local: "134", jurisdiction: "Inside electrical (Chicago)" },
+ { name: "IBEW Local 701", local: "701", jurisdiction: "Low-voltage / suburban" },
+ ],
+ training_centers: [
+ { name: "IBEW 134 / NECA JATC", address: "6301 W 115th St, Alsip IL", program_length: "5-yr apprenticeship" },
+ { name: "IBEW 701 / NECA JATC", address: "1979 Bucktail Ln, Sugar Grove IL", program_length: "5-yr apprenticeship" },
+ ],
+ },
+ PLUMBING: {
+ trade: "Plumbing",
+ primary_locals: [
+ { name: "Plumbers Local 130", local: "130", jurisdiction: "Chicago + Cook County" },
+ ],
+ training_centers: [
+ { name: "Plumbers 130 JAC", address: "1340 W Washington Blvd, Chicago", program_length: "5-yr apprenticeship" },
+ ],
+ },
+ MECHANICAL: {
+ trade: "Mechanical / HVAC / Pipe",
+ primary_locals: [
+ { name: "Pipe Fitters Local 597", local: "597", jurisdiction: "HVAC + process piping" },
+ { name: "Sheet Metal Workers Local 73", local: "73", jurisdiction: "Sheet metal / ductwork" },
+ { name: "IUOE Local 399", local: "399", jurisdiction: "Stationary engineers (boilers, chillers)" },
+ ],
+ training_centers: [
+ { name: "Local 597 Training Center", address: "10806 W 47th St, McCook IL", program_length: "5-yr" },
+ { name: "Sheet Metal 73 JAC", address: "16410 S 84th Ave, Tinley Park IL", program_length: "5-yr" },
+ ],
+ },
+ REROOFING: {
+ trade: "Roofing",
+ primary_locals: [
+ { name: "Roofers Local 11", local: "11", jurisdiction: "Chicago + suburbs" },
+ ],
+ training_centers: [
+ { name: "Roofers Local 11 Training", address: "9525 S Industrial Dr, Bridgeview IL", program_length: "3-yr" },
+ ],
+ },
+ MASONRY: {
+ trade: "Masonry / Concrete",
+ primary_locals: [
+ { name: "Bricklayers Local 21", local: "21", jurisdiction: "Brick + stone" },
+ { name: "Cement Masons Local 502", local: "502", jurisdiction: "Concrete finishing" },
+ ],
+ training_centers: [
+ { name: "Bricklayers 21 Training", address: "11244 S Cottage Grove, Chicago", program_length: "3-yr" },
+ { name: "Cement Masons 502 JATC", address: "739 25th Ave, Bellwood IL", program_length: "3-yr" },
+ ],
+ },
+ GENERAL: {
+ trade: "General construction (multi-trade)",
+ primary_locals: [
+ { name: "Chicago Regional Council of Carpenters", local: "1/13/58/1051", jurisdiction: "Carpentry + drywall" },
+ { name: "LIUNA Locals 1/2/4/6/76/96/149/152/225/269/296/1001", local: "various", jurisdiction: "Laborers / earthwork" },
+ { name: "Operating Engineers Local 150", local: "150", jurisdiction: "Heavy equipment (cranes, excavators)" },
+ { name: "Ironworkers Local 1/111/136/395", local: "1/111/136/395", jurisdiction: "Structural / rebar" },
+ { name: "Teamsters Local 731/705", local: "731/705", jurisdiction: "Material haul" },
+ ],
+ training_centers: [
+ { name: "Chicago Regional Carpenters Training Center", address: "11001 W Roosevelt, Westchester IL", program_length: "4-yr" },
+ { name: "Laborers Training School", address: "3636 W Pershing Rd, Chicago", program_length: "3-yr" },
+ { name: "Operating Engineers 150 Training", address: "16125 W 100th St, Wilmington IL", program_length: "3-yr" },
+ { name: "Iron Workers Local 1 Apprenticeship", address: "5775 W 26th St, Cicero IL", program_length: "3-yr" },
+ ],
+ },
+ WRECKING: {
+ trade: "Wrecking / Demolition",
+ primary_locals: [
+ { name: "LIUNA Locals (laborers)", local: "1/2/4/...", jurisdiction: "Manual demolition" },
+ { name: "Operating Engineers Local 150", local: "150", jurisdiction: "Heavy equipment demo" },
+ { name: "Teamsters Local 731", local: "731", jurisdiction: "Debris haul" },
+ ],
+ training_centers: [
+ { name: "Laborers Training School (HazMat track)", address: "3636 W Pershing Rd, Chicago", program_length: "3-yr" },
+ ],
+ },
+ SIGN: {
+ trade: "Signs / Signage",
+ primary_locals: [
+ { name: "IBEW Local 134 (electric signs)", local: "134", jurisdiction: "Lit signage" },
+ { name: "Painters DC 14 Local 30 (sign painters)", local: "30", jurisdiction: "Hand-painted / vinyl" },
+ ],
+ training_centers: [
+ { name: "Painters DC 14 Training", address: "1456 S La Salle, Chicago", program_length: "3-yr" },
+ ],
+ },
+};
+
+export function unionsForWorkType(workType: string | undefined): UnionMapping | null {
+ if (!workType) return null;
+ const w = workType.toUpperCase();
+ if (/ELECTRIC/.test(w)) return UNION_MAP.ELECTRICAL;
+ if (/PLUMB/.test(w)) return UNION_MAP.PLUMBING;
+ if (/MECHANIC|HVAC|REFRIGERAT|VENT|HEAT/.test(w)) return UNION_MAP.MECHANICAL;
+ if (/ROOF/.test(w)) return UNION_MAP.REROOFING;
+ if (/MASON|CONCRETE|BRICK|CEMENT/.test(w)) return UNION_MAP.MASONRY;
+ if (/WRECK|DEMO/.test(w)) return UNION_MAP.WRECKING;
+ if (/SIGN/.test(w)) return UNION_MAP.SIGN;
+ return UNION_MAP.GENERAL;
+}
+
+// ─── USASpending.gov federal contracts ─────────────────────────────
+// api.usaspending.gov/api/v2/search/spending_by_award/ — POST with
+// JSON body. Returns prime award records by recipient name.
+// Free, no auth. Slow on first call (~3-5s) but cacheable 24h.
+export async function fetchFederalContracts(name: string): Promise {
+ const now = new Date().toISOString();
+ const cached = await cacheGet(normalizeEntityName(name), "federal");
+ if (cached) return cached;
+ if (!name || name.length < 5) {
+ return {
+ source: "usaspending.gov",
+ fetched_at: now,
+ searched_name: name,
+ status: "no_match",
+ total_awards_count: 0,
+ total_awards_value: 0,
+ top_agencies: [],
+ };
+ }
+ const url = "https://api.usaspending.gov/api/v2/search/spending_by_award/";
+ const body = {
+ filters: {
+ award_type_codes: ["A", "B", "C", "D"], // contract types
+ recipient_search_text: [name],
+ time_period: [
+ { start_date: "2020-01-01", end_date: new Date().toISOString().slice(0, 10) },
+ ],
+ },
+ fields: [
+ "Award ID",
+ "Recipient Name",
+ "Award Amount",
+ "Awarding Agency",
+ "Action Date",
+ "NAICS",
+ ],
+ sort: "Award Amount",
+ order: "desc",
+ limit: 25,
+ page: 1,
+ };
+ try {
+ const res = await fetch(url, {
+ method: "POST",
+ headers: { "Content-Type": "application/json", "User-Agent": UA },
+ body: JSON.stringify(body),
+ signal: AbortSignal.timeout(15000),
+ });
+ if (!res.ok) throw new Error(`usaspending HTTP ${res.status}`);
+ const j = (await res.json()) as any;
+ const results = j.results || [];
+ if (results.length === 0) {
+ const empty: FederalContractsBrief = {
+ source: "usaspending.gov",
+ fetched_at: now,
+ searched_name: name,
+ status: "no_match",
+ total_awards_count: 0,
+ total_awards_value: 0,
+ top_agencies: [],
+ source_url: `https://www.usaspending.gov/search/?keywords=${encodeURIComponent(name)}`,
+ };
+ await cacheSet(normalizeEntityName(name), "federal", empty, 7 * 24 * 60 * 60 * 1000);
+ return empty;
+ }
+ const totalValue = results.reduce(
+ (a: number, r: any) => a + (parseFloat(r["Award Amount"]) || 0),
+ 0,
+ );
+ const agencyMap: Record = {};
+ for (const r of results) {
+ const a = r["Awarding Agency"] || "Unknown";
+ agencyMap[a] = (agencyMap[a] || 0) + (parseFloat(r["Award Amount"]) || 0);
+ }
+ const top = Object.entries(agencyMap)
+ .map(([agency, value]) => ({ agency, value }))
+ .sort((a, b) => b.value - a.value)
+ .slice(0, 3);
+ const recent = results
+ .map((r: any) => r["Action Date"])
+ .filter(Boolean)
+ .sort()
+ .reverse()[0];
+ const brief: FederalContractsBrief = {
+ source: "usaspending.gov",
+ fetched_at: now,
+ searched_name: name,
+ status: "ok",
+ total_awards_count: results.length,
+ total_awards_value: totalValue,
+ most_recent_award_date: recent,
+ top_agencies: top,
+ source_url: `https://www.usaspending.gov/search/?keywords=${encodeURIComponent(name)}`,
+ };
+ await cacheSet(normalizeEntityName(name), "federal", brief, 24 * 60 * 60 * 1000);
+ return brief;
+ } catch (e) {
+ return {
+ source: "usaspending.gov",
+ fetched_at: now,
+ searched_name: name,
+ status: "error",
+ total_awards_count: 0,
+ total_awards_value: 0,
+ top_agencies: [],
+ error: (e as Error).message,
+ };
+ }
+}
+
+// ─── NLRB cases (placeholder) ──────────────────────────────────────
+// nlrb.gov/search/case has JSON results behind their JS app. The path
+// requires session cookies + form tokens — needs a short scraper.
+// Returning placeholder until next batch.
+export async function fetchNlrbBrief(name: string): Promise {
+ return {
+ source: "nlrb.gov",
+ fetched_at: new Date().toISOString(),
+ searched_name: name,
+ status: "needs_setup",
+ reason:
+ "NLRB case search at apps.nlrb.gov/case requires session-aware scraping; queued for batch 5b.",
+ };
+}
+
+// ─── Risk scoring ──────────────────────────────────────────────────
+
+function scoreEntity(
+ osha: OshaBrief | null,
+ ilsos: IlsosBrief | null,
+ history: ContractorHistory | null,
+) {
+ const factors: string[] = [];
+ let score: number | null = null;
+ let partial = false;
+ if (osha) {
+ if (osha.status === "ok") {
+ const n = osha.inspection_count;
+ if (n === 0) {
+ score = 10;
+ factors.push("osha_clean:no_inspections");
+ } else if (n < 3) {
+ score = 25;
+ factors.push(`osha_low:${n}_inspections`);
+ } else if (n < 10) {
+ score = 50;
+ factors.push(`osha_moderate:${n}_inspections`);
+ } else {
+ score = 75;
+ factors.push(`osha_high:${n}_inspections`);
+ }
+ if (osha.most_recent_date) {
+ const ageDays =
+ (Date.now() - (Date.parse(osha.most_recent_date) || 0)) / (1000 * 60 * 60 * 24);
+ if (ageDays < 180) {
+ factors.push("osha_recent_activity:<180d");
+ score = Math.min(100, (score ?? 0) + 15);
+ }
+ }
+ } else if (osha.status === "no_match") {
+ factors.push("osha_no_match");
+ } else {
+ factors.push("osha_error:" + (osha.error || "unknown"));
+ partial = true;
+ }
+ } else {
+ partial = true;
+ }
+ if (!ilsos || ilsos.status !== "ok") {
+ partial = true;
+ factors.push("ilsos_pending");
+ }
+ // Brand-new LLCs with a single permit and zero tenure are a
+ // classic LLC-shuffle signature. Flag independently of OSHA.
+ if (history?.status === "ok") {
+ if (history.trend === "new") {
+ factors.push("new_entity:<=3_permits_ever");
+ score = Math.max(score ?? 0, 35);
+ } else if (history.trend === "growing") {
+ factors.push(`activity_growing:${history.permits_last_180d}_in_180d`);
+ } else if (history.trend === "declining") {
+ factors.push(`activity_declining:${history.permits_last_180d}_in_180d`);
+ }
+ }
+ return { score, factors, partial };
+}
+
+// ─── CTA L stations (static, in-memory) ────────────────────────────
+// Pulled once from Chicago Socrata 8pix-ypme. ~150 stations, rarely
+// changes. Compute great-circle distance to nearest with haversine.
+type CtaStation = {
+ station_name: string;
+ lat: number;
+ lon: number;
+ lines: string[];
+};
+let _ctaStations: CtaStation[] | null = null;
+
+async function ensureCtaLoaded(): Promise {
+ if (_ctaStations) return _ctaStations;
+ try {
+ const res = await fetch(
+ "https://data.cityofchicago.org/resource/8pix-ypme.json?$limit=400",
+ { headers: { "User-Agent": UA }, signal: AbortSignal.timeout(15000) },
+ );
+ if (!res.ok) throw new Error(`cta HTTP ${res.status}`);
+ const rows = (await res.json()) as any[];
+ // De-dupe by station_name keeping any one stop's coordinates
+ const byStation = new Map();
+ for (const r of rows) {
+ const loc = r.location || {};
+ const lat = parseFloat(loc.latitude || "");
+ const lon = parseFloat(loc.longitude || "");
+ if (isNaN(lat) || isNaN(lon)) continue;
+ const station = r.station_name || r.stop_name;
+ if (!station) continue;
+ if (byStation.has(station)) continue;
+ const lines: string[] = [];
+ if (r.red) lines.push("Red");
+ if (r.blue) lines.push("Blue");
+ if (r.g) lines.push("Green");
+ if (r.brn) lines.push("Brown");
+ if (r.p) lines.push("Purple");
+ if (r.y) lines.push("Yellow");
+ if (r.pnk) lines.push("Pink");
+ if (r.o) lines.push("Orange");
+ byStation.set(station, { station_name: station, lat, lon, lines });
+ }
+ _ctaStations = [...byStation.values()];
+ return _ctaStations;
+ } catch (e) {
+ console.warn("[cta] load failed:", (e as Error).message);
+ _ctaStations = [];
+ return _ctaStations;
+ }
+}
+
+function haversineMeters(lat1: number, lon1: number, lat2: number, lon2: number): number {
+ const R = 6371000;
+ const toRad = (d: number) => (d * Math.PI) / 180;
+ const dLat = toRad(lat2 - lat1);
+ const dLon = toRad(lon2 - lon1);
+ const a =
+ Math.sin(dLat / 2) ** 2 +
+ Math.cos(toRad(lat1)) * Math.cos(toRad(lat2)) * Math.sin(dLon / 2) ** 2;
+ return 2 * R * Math.asin(Math.sqrt(a));
+}
+
+async function nearestCtaStation(lat: number, lon: number) {
+ const stations = await ensureCtaLoaded();
+ if (stations.length === 0) return null;
+ let best: { station: CtaStation; dist: number } | null = null;
+ for (const s of stations) {
+ const d = haversineMeters(lat, lon, s.lat, s.lon);
+ if (!best || d < best.dist) best = { station: s, dist: d };
+ }
+ return best;
+}
+
+// ─── Nearby permits (labor competition signal) ─────────────────────
+// Socrata supports within_circle(location, lat, lon, radius_meters)
+// for geo queries. 800m ≈ 0.5mi. Filter to permits issued within the
+// last 90d so we capture *current* competition only.
+async function fetchNearbyPermits(lat: number, lon: number) {
+ const since = new Date(Date.now() - 90 * 86400000).toISOString().slice(0, 10);
+ const where = encodeURIComponent(
+ `within_circle(location, ${lat}, ${lon}, 800) AND issue_date>'${since}' AND reported_cost>50000`,
+ );
+ try {
+ const res = await fetch(
+ `https://data.cityofchicago.org/resource/ydr8-5enu.json?$select=count(*),sum(reported_cost)&$where=${where}`,
+ { headers: { "User-Agent": UA }, signal: AbortSignal.timeout(10000) },
+ );
+ if (!res.ok) return null;
+ const j = (await res.json()) as any[];
+ const row = j?.[0] || {};
+ return {
+ count: parseInt(row.count || "0", 10),
+ total_value: parseFloat(row.sum_reported_cost || "0"),
+ };
+ } catch {
+ return null;
+ }
+}
+
+// ─── Site Context (Chicago TIF + landmark + community area) ────────
+// All Socrata. Lookup by address (street_number + street_name where
+// available, lat/long otherwise). Bundle into a single brief so the
+// UI gets one tile instead of 3.
+const TIF_DATASET = "imgn-2suh"; // TIF District Programming 2022-2026
+const LANDMARK_DATASET = "habu-n236"; // Individual Landmarks - Map
+
+export async function fetchSiteContext(address: string): Promise {
+ const now = new Date().toISOString();
+ const cleaned = address.trim().replace(/\s+/g, " ").toUpperCase();
+ const cached = await cacheGet(cleaned, "site_context");
+ if (cached) return cached;
+ if (!cleaned) {
+ return {
+ source: "chicago_socrata",
+ fetched_at: now,
+ searched_address: address,
+ status: "no_match",
+ };
+ }
+ // Pull lat/long from the assessor record (we already query it).
+ const owner = await fetchPropertyOwner(address);
+ let lat: string | undefined, lon: string | undefined, ward: string | undefined;
+ if (owner.status === "ok") {
+ lat = owner.latitude;
+ lon = owner.longitude;
+ ward = owner.ward;
+ }
+ // Community area number → name table is public on Chicago.gov but
+ // we'd need it static; ship without name for now (number suffices).
+ const safe = cleaned.replace(/'/g, "''");
+ const landmarkUrl = `https://data.cityofchicago.org/resource/${LANDMARK_DATASET}.json?$where=upper(address)%20like%20upper('%25${encodeURIComponent(safe)}%25')&$limit=1`;
+ let isLandmark = false;
+ let landmarkName: string | undefined;
+ try {
+ const r = await fetch(landmarkUrl, {
+ headers: { "User-Agent": UA },
+ signal: AbortSignal.timeout(8000),
+ });
+ if (r.ok) {
+ const rows = (await r.json()) as any[];
+ if (rows && rows.length) {
+ isLandmark = true;
+ landmarkName = rows[0].landmark_name || rows[0].name;
+ }
+ }
+ } catch {/* non-fatal */}
+ // TIF polygon containment + CTA distance + nearby permits — all
+ // depend on having lat/long from the Cook County Assessor.
+ let inTif = false;
+ let tifName: string | undefined;
+ let ctaName: string | undefined;
+ let ctaLines: string | undefined;
+ let ctaDist: number | undefined;
+ let nearbyCount: number | undefined;
+ let nearbyValue: number | undefined;
+ if (lat && lon) {
+ const latNum = parseFloat(lat);
+ const lonNum = parseFloat(lon);
+ if (!isNaN(latNum) && !isNaN(lonNum)) {
+ // All three geo lookups in parallel
+ const [tif, cta, nearby] = await Promise.all([
+ findTifDistrict(lonNum, latNum),
+ nearestCtaStation(latNum, lonNum),
+ fetchNearbyPermits(latNum, lonNum),
+ ]);
+ if (tif) {
+ inTif = true;
+ tifName = tif.name;
+ }
+ if (cta) {
+ ctaName = cta.station.station_name;
+ ctaLines = cta.station.lines.join("/");
+ ctaDist = Math.round(cta.dist);
+ }
+ if (nearby) {
+ nearbyCount = nearby.count;
+ nearbyValue = nearby.total_value;
+ }
+ }
+ }
+ const brief: SiteContextBrief = {
+ source: "chicago_socrata",
+ fetched_at: now,
+ searched_address: address,
+ status: "ok",
+ in_tif_district: inTif,
+ tif_district_name: tifName,
+ is_landmark: isLandmark,
+ landmark_name: landmarkName,
+ community_area: undefined,
+ ward,
+ latitude: lat,
+ longitude: lon,
+ nearest_cta_station: ctaName,
+ nearest_cta_lines: ctaLines,
+ nearest_cta_distance_m: ctaDist,
+ nearby_permits_90d: nearbyCount,
+ nearby_permits_value_90d: nearbyValue,
+ };
+ await cacheSet(cleaned, "site_context", brief);
+ return brief;
+}
+
+// ─── MBE/WBE/DBE diversity certification ───────────────────────────
+// Chicago Dept of Procurement Economic Inclusion Certified Vendors.
+const DIVERSITY_DATASET = "2iq3-bugw";
+
+export async function fetchDiversityCerts(name: string): Promise {
+ const now = new Date().toISOString();
+ const cached = await cacheGet(normalizeEntityName(name), "diversity");
+ if (cached) return cached;
+ const safe = name.replace(/'/g, "''");
+ const url = `https://data.cityofchicago.org/resource/${DIVERSITY_DATASET}.json?$where=upper(legal_name)%20like%20upper('%25${encodeURIComponent(safe)}%25')&$limit=10`;
+ try {
+ const r = await fetch(url, {
+ headers: { "User-Agent": UA },
+ signal: AbortSignal.timeout(8000),
+ });
+ if (!r.ok || r.status === 404) {
+ // Dataset listed in catalog but resource endpoint 404s — known issue.
+ const skeletal: DiversityCertBrief = {
+ source: "chicago_dps_economic_inclusion",
+ fetched_at: now,
+ searched_name: name,
+ status: "needs_setup",
+ reason:
+ "Chicago MBE/WBE/DBE datasets (2iq3-bugw, 69yt-tb5j, ci93-uc8s, 8dxf-6ahp) are catalog-listed but the resource endpoint returns 404. Likely archived or auth-gated. Path: contact Chicago DPS for alternate access OR use the procurement Intent-to-Award/Execute datasets which DO work.",
+ certifications: [],
+ };
+ await cacheSet(normalizeEntityName(name), "diversity", skeletal, 7 * 24 * 60 * 60 * 1000);
+ return skeletal;
+ }
+ const rows = (await r.json()) as any[];
+ const certs = (rows || []).map((row) => ({
+ category: row.certification || row.classification || "?",
+ expiration: row.certification_expiration_date || row.expiration_date,
+ type: row.business_type || row.type,
+ }));
+ const brief: DiversityCertBrief = {
+ source: "chicago_dps_economic_inclusion",
+ fetched_at: now,
+ searched_name: name,
+ status: certs.length > 0 ? "ok" : "no_match",
+ certifications: certs,
+ source_url: `https://data.cityofchicago.org/resource/${DIVERSITY_DATASET}.json`,
+ };
+ await cacheSet(normalizeEntityName(name), "diversity", brief);
+ return brief;
+ } catch (e) {
+ return {
+ source: "chicago_dps_economic_inclusion",
+ fetched_at: now,
+ searched_name: name,
+ status: "needs_setup",
+ reason: `dataset access error: ${(e as Error).message}`,
+ certifications: [],
+ error: (e as Error).message,
+ };
+ }
+}
+
+// ─── News mentions via Google News RSS ─────────────────────────────
+// news.google.com/rss/search?q={name} — free, no auth.
+// Returns RSS XML; parse - entries for title/link/pubDate.
+export async function fetchNewsMentions(name: string): Promise {
+ const now = new Date().toISOString();
+ const cached = await cacheGet(normalizeEntityName(name), "news");
+ if (cached) return cached;
+ if (!name || name.length < 5) {
+ return {
+ source: "google_news_rss",
+ fetched_at: now,
+ searched_name: name,
+ status: "no_match",
+ total_mentions: 0,
+ recent_headlines: [],
+ };
+ }
+ const q = encodeURIComponent(`"${name}"`);
+ const url = `https://news.google.com/rss/search?q=${q}&hl=en-US&gl=US&ceid=US:en`;
+ try {
+ const r = await fetch(url, {
+ headers: { "User-Agent": "Mozilla/5.0 lakehouse-copilot" },
+ signal: AbortSignal.timeout(10000),
+ });
+ if (!r.ok) throw new Error(`news HTTP ${r.status}`);
+ const xml = await r.text();
+ const items = [...xml.matchAll(/
- ([\s\S]*?)<\/item>/g)];
+ const headlines = items.slice(0, 5).map((m) => {
+ const inner = m[1];
+ const get = (tag: string) => {
+ const re = new RegExp(`<${tag}[^>]*>([\\s\\S]*?)<\\/${tag}>`);
+ const mm = inner.match(re);
+ if (!mm) return "";
+ return mm[1].replace(//g, "").trim();
+ };
+ const title = get("title");
+ const link = get("link");
+ const pubDate = get("pubDate");
+ const sourceMatch = inner.match(/]*>([\s\S]*?)<\/source>/);
+ const src = sourceMatch ? sourceMatch[1].replace(//g, "").trim() : "";
+ return { title, source: src, date: pubDate, url: link };
+ }).filter((h) => h.title);
+ const brief: NewsBrief = {
+ source: "google_news_rss",
+ fetched_at: now,
+ searched_name: name,
+ status: headlines.length > 0 ? "ok" : "no_match",
+ total_mentions: items.length,
+ recent_headlines: headlines,
+ };
+ // 6h TTL — news cycles fast
+ await cacheSet(normalizeEntityName(name), "news", brief, 6 * 60 * 60 * 1000);
+ return brief;
+ } catch (e) {
+ return {
+ source: "google_news_rss",
+ fetched_at: now,
+ searched_name: name,
+ status: "error",
+ total_mentions: 0,
+ recent_headlines: [],
+ error: (e as Error).message,
+ };
+ }
+}
+
+// ─── BLS construction employment (Chicago MSA) ─────────────────────
+// Single shared cache keyed on the series ID since it's macro context.
+const BLS_CHI_CONSTRUCTION_SERIES = "SMU17169802000000001";
+let _blsCache: BlsTrendBrief | null = null;
+let _blsCacheAt = 0;
+const BLS_CACHE_TTL_MS = 6 * 60 * 60 * 1000;
+
+export async function fetchBlsConstructionTrend(): Promise {
+ if (_blsCache && Date.now() - _blsCacheAt < BLS_CACHE_TTL_MS) return _blsCache;
+ const now = new Date().toISOString();
+ const thisYear = new Date().getFullYear();
+ try {
+ const res = await fetch("https://api.bls.gov/publicAPI/v1/timeseries/data/", {
+ method: "POST",
+ headers: { "Content-Type": "application/json", "User-Agent": UA },
+ body: JSON.stringify({
+ seriesid: [BLS_CHI_CONSTRUCTION_SERIES],
+ startyear: String(thisYear - 2),
+ endyear: String(thisYear),
+ }),
+ signal: AbortSignal.timeout(12000),
+ });
+ if (!res.ok) throw new Error(`bls HTTP ${res.status}`);
+ const j = (await res.json()) as any;
+ const series = j?.Results?.series?.[0];
+ const data = (series?.data || []) as Array<{ year: string; period: string; value: string }>;
+ if (data.length === 0) throw new Error("no data");
+ const obs = data.map((d) => ({
+ period: `${d.year}-${d.period}`,
+ value: parseFloat(d.value),
+ }));
+ // BLS returns most-recent-first
+ const latest = obs[0];
+ const monthAgo = obs[1];
+ const yearAgo = obs[12];
+ const mom = monthAgo ? ((latest.value - monthAgo.value) / monthAgo.value) * 100 : null;
+ const yoy = yearAgo ? ((latest.value - yearAgo.value) / yearAgo.value) * 100 : null;
+ const trend: BlsTrendBrief["trend"] =
+ yoy === null ? "unknown" :
+ yoy > 1.5 ? "growing" :
+ yoy < -1.5 ? "declining" : "stable";
+ const brief: BlsTrendBrief = {
+ source: "bls.gov",
+ fetched_at: now,
+ series_id: BLS_CHI_CONSTRUCTION_SERIES,
+ status: "ok",
+ latest,
+ yoy_change_pct: yoy,
+ mom_change_pct: mom,
+ recent: obs.slice(0, 6),
+ trend,
+ };
+ _blsCache = brief;
+ _blsCacheAt = Date.now();
+ return brief;
+ } catch (e) {
+ return {
+ source: "bls.gov",
+ fetched_at: now,
+ series_id: BLS_CHI_CONSTRUCTION_SERIES,
+ status: "error",
+ latest: null,
+ yoy_change_pct: null,
+ mom_change_pct: null,
+ recent: [],
+ trend: "unknown",
+ error: (e as Error).message,
+ };
+ }
+}
+
+// ─── News sentiment scoring ────────────────────────────────────────
+// Run this over the existing NewsBrief headlines. Cheap keyword-based
+// classifier — not LLM-grade, but catches the obvious "lawsuit /
+// fraud / OSHA fine" vs "awarded / expansion / wins" patterns.
+const SENTIMENT_NEG = [
+ "lawsuit","sued","fraud","fines?","fined","penalty","penalties",
+ "violations?","investigation","investigated","indicted","charged",
+ "guilty","arrest","arrested","bankrupt(cy)?","insolvenc?y",
+ "debt","default","fired","layoffs?","fired","quit","resigned",
+ "scandal","whistleblower","theft","embezzl","kickback","bribe",
+ "complaint","grievanc","strike","walkout","picket","accident",
+ "death","fatal","killed","injur","hospitaliz","amputation","fall",
+ "collapse","collapsed","fire","explosion","shut.?down","stop.?work",
+ "delay","delays","behind.?schedule","over.?budget","cancel(l?)ed",
+ "subpoena","audit","misconduct","harass","discriminat","retaliat",
+ "evict","foreclos","levied",
+];
+const SENTIMENT_POS = [
+ "award(?:ed|s)","won","wins","wins?\\b","top","best","gain(?:ed|s)?",
+ "expand(?:ed|s|ing)?","expansion","contract","selected","chosen",
+ "partnership","invest(?:ed|s|ing|ment)?","launch","launched",
+ "milestone","completed?","record","strong","grew","grow(?:th|ing)",
+ "promoted","hired","appoint(?:ed|s)?","ribbon","groundbreaking",
+ "innovation","new\\s+headquarters","achievement","recogni[zs]ed",
+ "leader(?:ship)?","sustainab","green","clean.?energy",
+];
+
+const NEG_RE = new RegExp("\\b(" + SENTIMENT_NEG.join("|") + ")\\b", "i");
+const POS_RE = new RegExp("\\b(" + SENTIMENT_POS.join("|") + ")\\b", "i");
+
+export function scoreNewsSentiment(news: NewsBrief): NewsSentiment {
+ const headlines = news.recent_headlines || [];
+ let pos = 0, neg = 0;
+ const flagged: NewsSentiment["flagged_headlines"] = [];
+ for (const h of headlines) {
+ const t = h.title || "";
+ const negMatches: string[] = [];
+ const posMatches: string[] = [];
+ let m;
+ const nReg = new RegExp(NEG_RE, "gi");
+ while ((m = nReg.exec(t)) !== null) {
+ if (negMatches.length < 3) negMatches.push(m[1]);
+ if (negMatches.length > 5) break;
+ }
+ const pReg = new RegExp(POS_RE, "gi");
+ while ((m = pReg.exec(t)) !== null) {
+ if (posMatches.length < 3) posMatches.push(m[1]);
+ if (posMatches.length > 5) break;
+ }
+ if (negMatches.length > posMatches.length) {
+ neg++;
+ flagged.push({ title: t, polarity: "neg", reasons: negMatches });
+ } else if (posMatches.length > negMatches.length) {
+ pos++;
+ flagged.push({ title: t, polarity: "pos", reasons: posMatches });
+ }
+ }
+ const total = headlines.length || 1;
+ const score = (pos - neg) / total;
+ return {
+ score: Math.max(-1, Math.min(1, score)),
+ positive: pos,
+ negative: neg,
+ neutral: total - pos - neg,
+ flagged_headlines: flagged.slice(0, 5),
+ };
+}
+
+// ─── Placeholder fetchers for sources that need scraping work ──────
+export async function fetchOshaSirBrief(name: string): Promise {
+ return {
+ source: "osha.gov/severeinjury",
+ fetched_at: new Date().toISOString(),
+ searched_name: name,
+ status: "needs_setup",
+ reason:
+ "OSHA Severe Injury Reports CSV is bot-protected at dol.gov (Imperva challenge). Path forward: (a) human-mediated periodic CSV download into /data/_entity_cache/sir.csv, then local lookup, or (b) browser-automation scraper with CAPTCHA handling.",
+ };
+}
+
+export async function fetchLiensBrief(address: string): Promise {
+ // Cook County Recorder of Deeds merged into Cook County Clerk in 2020.
+ // Public search portal: crs.cookcountyclerkil.gov (HTML form, JSESSIONID-based).
+ // Resource catalog at datacatalog.cookcountyil.gov returns 404 on most lien
+ // dataset IDs (xfev-8smz UCC liens specifically, also xrzj-c8ez code violations).
+ // Path: HTML scrape of crs.cookcountyclerkil.gov with PIN→search→paginate.
+ // Queued — needs ~150 LOC of form-state parsing.
+ return {
+ source: "cook_county_recorder",
+ fetched_at: new Date().toISOString(),
+ searched_address: address,
+ status: "needs_setup",
+ reason:
+ "Cook County Clerk recordings portal (crs.cookcountyclerkil.gov) needs JSESSIONID + form POST scrape. Socrata catalog UCC dataset (xfev-8smz) returns 404 on resource endpoint. Queued for next batch.",
+ };
+}
+
+// ─── NLRB cases (real scraper) ─────────────────────────────────────
+// Public search at apps.nlrb.gov/eservice/dailyclosed.aspx and
+// nlrb.gov/search/case. The /search/case page returns plain HTML.
+export async function fetchNlrbBriefReal(name: string): Promise {
+ const now = new Date().toISOString();
+ const cached = await cacheGet(normalizeEntityName(name), "nlrb");
+ if (cached) return cached;
+ const q = encodeURIComponent(name);
+ const url = `https://www.nlrb.gov/search/case?search_term=${q}`;
+ try {
+ const r = await fetch(url, {
+ headers: { "User-Agent": UA },
+ signal: AbortSignal.timeout(12000),
+ });
+ if (!r.ok) throw new Error(`nlrb HTTP ${r.status}`);
+ const html = await r.text();
+ // Cases appear in result blocks — count occurrences of case-detail links
+ const caseLinks = [...html.matchAll(/]+href="\/case\/([0-9A-Z\-]+)"/g)];
+ const cases = caseLinks.slice(0, 5).map((m) => ({
+ case_number: m[1],
+ case_type: "?",
+ date: "",
+ status: "",
+ }));
+ // Total count appears near top — pattern "X results"
+ const totalMatch = html.match(/(\d[\d,]*)\s+result/i);
+ const total = totalMatch ? parseInt(totalMatch[1].replace(/,/g, ""), 10) : caseLinks.length;
+ const brief: NlrbBrief = {
+ source: "nlrb.gov",
+ fetched_at: now,
+ searched_name: name,
+ status: cases.length > 0 ? "ok" : "no_match",
+ total_cases: total,
+ recent_cases: cases,
+ };
+ await cacheSet(normalizeEntityName(name), "nlrb", brief, 24 * 60 * 60 * 1000);
+ return brief;
+ } catch (e) {
+ return {
+ source: "nlrb.gov",
+ fetched_at: now,
+ searched_name: name,
+ status: "needs_setup",
+ reason: `nlrb fetch error: ${(e as Error).message}`,
+ };
+ }
+}
+
+// ─── Project Index Score (matrix aggregation) ──────────────────────
+// Walks every signal across owner/contractors and produces a single
+// 0-100 index anchored at 50 neutral. Each contribution is auditable.
+// Weights are documentation-grade — adjust over time as we learn which
+// signals actually correlate with project success/delay.
+
+export function scoreProject(
+ property: PermitEntityBrief["property"],
+ entities: EntityBrief[],
+ macro: BlsTrendBrief | null = null,
+): ProjectIndexScore {
+ const cs: SignalContribution[] = [];
+ let partial = false;
+
+ // Macro: Chicago construction employment YoY. Affects every permit
+ // proportionally — labor-shrinking market raises filling difficulty,
+ // labor-growing market signals confidence.
+ if (macro?.status === "ok" && macro.yoy_change_pct !== null) {
+ const w = 3;
+ const yoy = macro.yoy_change_pct;
+ const dir = yoy > 1.5 ? 1 : yoy < -1.5 ? -1 : 0;
+ if (dir !== 0) {
+ cs.push({
+ signal: "macro_chi_construction_employment",
+ weight: w,
+ direction: dir,
+ raw: yoy,
+ contribution: dir * w,
+ note: `Chicago MSA construction employment ${yoy >= 0 ? "+" : ""}${yoy.toFixed(1)}% YoY (BLS) — labor market ${macro.trend}`,
+ });
+ }
+ }
+
+ // ─── Property-level signals ───
+ if (property.violations?.status === "ok") {
+ const v = property.violations;
+ if (v.stop_work_orders > 0) {
+ const w = 10;
+ cs.push({
+ signal: "stop_work_orders",
+ weight: w,
+ direction: -1,
+ raw: v.stop_work_orders,
+ contribution: -w * Math.min(v.stop_work_orders, 3),
+ note: `${v.stop_work_orders} stop-work order${v.stop_work_orders === 1 ? "" : "s"} on this property`,
+ });
+ }
+ if (v.open_violations > 0) {
+ const w = 6;
+ cs.push({
+ signal: "open_property_violations",
+ weight: w,
+ direction: -1,
+ raw: v.open_violations,
+ contribution: -w * Math.min(v.open_violations, 4) / 2,
+ note: `${v.open_violations} open building violation${v.open_violations === 1 ? "" : "s"} at the address`,
+ });
+ } else if (v.total_violations === 0) {
+ const w = 3;
+ cs.push({
+ signal: "property_clean",
+ weight: w,
+ direction: 1,
+ raw: 0,
+ contribution: w,
+ note: "No building violations on record",
+ });
+ }
+ }
+
+ // TIF district = public-subsidy zone. Project has financial backing
+ // from the city's tax-increment financing pot. Mild positive signal
+ // because public oversight + structured funding usually means the
+ // project survives funding gaps. Not a blanket good — TIFs do fail —
+ // but worth noting.
+ if (property.site_context?.in_tif_district) {
+ const w = 4;
+ cs.push({
+ signal: "in_tif_district",
+ weight: w,
+ direction: 1,
+ raw: property.site_context.tif_district_name,
+ contribution: w,
+ note: `Site is inside TIF "${property.site_context.tif_district_name}" — public tax-increment financing backing`,
+ });
+ }
+ // Transit access — workers can commute via L without parking.
+ // <800m is "right at a station," <1500m is "walkable," beyond that
+ // crews are car/shuttle dependent.
+ if (property.site_context?.nearest_cta_distance_m !== undefined) {
+ const m = property.site_context.nearest_cta_distance_m;
+ if (m <= 800) {
+ const w = 2;
+ cs.push({
+ signal: "transit_access_strong",
+ weight: w,
+ direction: 1,
+ raw: m,
+ contribution: w,
+ note: `Site is ${m}m from CTA ${property.site_context.nearest_cta_station} (${property.site_context.nearest_cta_lines}) — workers can commute by L`,
+ });
+ } else if (m <= 1500) {
+ // Neutral — walkable but not adjacent
+ } else {
+ const w = 1;
+ cs.push({
+ signal: "transit_access_weak",
+ weight: w,
+ direction: -1,
+ raw: m,
+ contribution: -w,
+ note: `Site is ${(m / 1000).toFixed(1)}km from nearest CTA stop — crew transport burden`,
+ });
+ }
+ }
+
+ // Labor competition — many active permits within 0.5mi means crews
+ // are already booked elsewhere. >5 permits in 90d is "saturated."
+ if (
+ property.site_context?.nearby_permits_90d !== undefined &&
+ property.site_context.nearby_permits_90d > 5
+ ) {
+ const n = property.site_context.nearby_permits_90d;
+ const w = 3;
+ cs.push({
+ signal: "labor_competition_high",
+ weight: w,
+ direction: -1,
+ raw: n,
+ contribution: -Math.min(w, n / 5),
+ note: `${n} other permits active within 0.5mi (last 90d) — crew competition`,
+ });
+ }
+
+ if (property.site_context?.is_landmark) {
+ const w = 3;
+ cs.push({
+ signal: "landmark_status",
+ weight: w,
+ direction: -1,
+ raw: property.site_context.landmark_name,
+ contribution: -w,
+ note: `Landmark designation${property.site_context.landmark_name ? ` (${property.site_context.landmark_name})` : ""} — preservation review extends timeline`,
+ });
+ }
+
+ if (property.owner?.status === "ok") {
+ if (
+ property.owner.mailing_state &&
+ property.owner.mailing_state !== "IL"
+ ) {
+ const w = 2;
+ cs.push({
+ signal: "owner_out_of_state",
+ weight: w,
+ direction: -1,
+ raw: property.owner.mailing_state,
+ contribution: -w,
+ note: `Owner mails out of state (${property.owner.mailing_state}) — slower local accountability`,
+ });
+ } else {
+ const w = 2;
+ cs.push({
+ signal: "owner_local",
+ weight: w,
+ direction: 1,
+ raw: "IL",
+ contribution: w,
+ note: "Owner mails locally — faster accountability",
+ });
+ }
+ }
+
+ // ─── Per-entity signals ───
+ for (const e of entities) {
+ const tag = `[${e.display_name}]`;
+
+ if (e.osha?.status === "ok") {
+ const o = e.osha;
+ if (o.inspection_count > 10) {
+ const w = 6;
+ cs.push({
+ signal: "osha_high_inspection_count",
+ weight: w,
+ direction: -1,
+ raw: o.inspection_count,
+ contribution: -w,
+ note: `${tag} ${o.inspection_count} OSHA inspections nationally`,
+ });
+ } else if (o.inspection_count === 0) {
+ const w = 3;
+ cs.push({
+ signal: "osha_clean",
+ weight: w,
+ direction: 1,
+ raw: 0,
+ contribution: w,
+ note: `${tag} no OSHA inspections on record`,
+ });
+ }
+ if (o.most_recent_date) {
+ const ageDays = (Date.now() - Date.parse(o.most_recent_date)) / 86400000;
+ if (ageDays < 90) {
+ const w = 7;
+ cs.push({
+ signal: "osha_recent_action",
+ weight: w,
+ direction: -1,
+ raw: o.most_recent_date,
+ contribution: -w,
+ note: `${tag} OSHA inspection ${Math.round(ageDays)}d ago`,
+ });
+ }
+ }
+ } else if (e.osha?.status === "error") {
+ partial = true;
+ }
+
+ if (e.history?.status === "ok") {
+ if (e.history.trend === "new") {
+ const w = 8;
+ cs.push({
+ signal: "new_entity",
+ weight: w,
+ direction: -1,
+ raw: e.history.permits_historical_total,
+ contribution: -w,
+ note: `${tag} only ${e.history.permits_historical_total} permits ever — possible LLC-shuffle signature`,
+ });
+ } else if (e.history.trend === "growing") {
+ const w = 3;
+ cs.push({
+ signal: "activity_growing",
+ weight: w,
+ direction: 1,
+ raw: e.history.permits_last_180d,
+ contribution: w,
+ note: `${tag} growing Chicago footprint (${e.history.permits_last_180d} in 180d)`,
+ });
+ } else if (e.history.trend === "declining") {
+ const w = 4;
+ cs.push({
+ signal: "activity_declining",
+ weight: w,
+ direction: -1,
+ raw: e.history.permits_last_180d,
+ contribution: -w,
+ note: `${tag} declining Chicago footprint`,
+ });
+ }
+ }
+
+ if (e.federal?.status === "ok" && e.federal.total_awards_value > 0) {
+ const fedM = e.federal.total_awards_value / 1e6;
+ const w = 5;
+ cs.push({
+ signal: "federal_contract_history",
+ weight: w,
+ direction: 1,
+ raw: e.federal.total_awards_value,
+ contribution: Math.min(w, Math.log10(fedM + 1) * 2),
+ note: `${tag} $${fedM.toFixed(0)}M federal contracts — vetted compliance`,
+ });
+ }
+
+ // SVEP — the heaviest-weight negative signal we have. OSHA placed
+ // this contractor in the Severe Violator Enforcement Program after
+ // willful or repeat violations. Real money + worker safety risk.
+ if (e.svep?.flagged) {
+ const w = 10;
+ cs.push({
+ signal: "osha_svep_listed",
+ weight: w,
+ direction: -1,
+ raw: e.svep.matched_entries.map((m) => m.name).join(" | "),
+ contribution: -w * 2,
+ note: `${tag} OSHA SVEP-listed (Severe Violator Enforcement Program) — formally flagged worst-actor`,
+ });
+ }
+
+ if (e.parent_link?.status === "ok" && e.parent_link.parent_ticker) {
+ const w = 3;
+ cs.push({
+ signal: "public_parent_chain",
+ weight: w,
+ direction: 1,
+ raw: e.parent_link.parent_ticker,
+ contribution: w,
+ note: `${tag} traceable to public parent ${e.parent_link.parent_ticker}`,
+ });
+ }
+
+ // News sentiment over the recent headlines. Negative bias means
+ // the press cycle around this firm is dominated by lawsuits /
+ // accidents / fines.
+ if (e.news_sentiment) {
+ const ns = e.news_sentiment;
+ if (ns.negative > ns.positive && ns.negative > 0) {
+ const w = 5;
+ cs.push({
+ signal: "news_sentiment_negative",
+ weight: w,
+ direction: -1,
+ raw: ns.score,
+ contribution: -Math.min(w, ns.negative * 1.5),
+ note: `${tag} negative news cycle (${ns.negative} flagged: ${ns.flagged_headlines.filter((h) => h.polarity === "neg").slice(0, 2).map((h) => h.reasons.join("/")).join(", ")})`,
+ });
+ } else if (ns.positive > ns.negative && ns.positive >= 2) {
+ const w = 2;
+ cs.push({
+ signal: "news_sentiment_positive",
+ weight: w,
+ direction: 1,
+ raw: ns.score,
+ contribution: w,
+ note: `${tag} positive news cycle (${ns.positive} flagged on awards/expansion)`,
+ });
+ }
+ }
+
+ if (e.debarment?.status === "needs_setup") partial = true;
+ if (e.nlrb?.status === "needs_setup") partial = true;
+ if (e.ilsos?.status === "source_unreachable") partial = true;
+ }
+
+ // Net score from 50 baseline. Clamp 0..100.
+ const net = cs.reduce((a, c) => a + c.contribution, 0);
+ const score = Math.max(0, Math.min(100, Math.round(50 + net)));
+ const band: ProjectIndexScore["band"] =
+ score < 30 ? "red" :
+ score < 45 ? "amber" :
+ score <= 55 ? "neutral" :
+ score <= 75 ? "green" : "strong";
+
+ // Sort contributions by absolute weight, biggest movers first
+ cs.sort((a, b) => Math.abs(b.contribution) - Math.abs(a.contribution));
+
+ return { score, band, contributions: cs, partial };
+}
+
+// ─── Public: build a brief for a single permit ─────────────────────
+
+type PermitInput = {
+ permit_id?: string;
+ address?: string;
+ work_type?: string;
+ contact_1_name?: string;
+ contact_1_type?: string;
+ contact_2_name?: string;
+ contact_2_type?: string;
+};
+
+export async function buildPermitBrief(
+ permit: PermitInput,
+ opts: {
+ fetchIlsos?: boolean;
+ fetchOsha?: boolean;
+ fetchTicker?: boolean;
+ fetchHistory?: boolean;
+ } = {},
+): Promise {
+ const wantOsha = opts.fetchOsha !== false;
+ const wantIlsos = opts.fetchIlsos !== false;
+ const wantTicker = opts.fetchTicker !== false;
+ const wantHistory = opts.fetchHistory !== false;
+
+ const rawContacts: { name: string; role: string }[] = [];
+ if (permit.contact_1_name?.trim()) {
+ rawContacts.push({
+ name: permit.contact_1_name.trim(),
+ role: permit.contact_1_type || "CONTACT_1",
+ });
+ }
+ if (permit.contact_2_name?.trim()) {
+ rawContacts.push({
+ name: permit.contact_2_name.trim(),
+ role: permit.contact_2_type || "CONTACT_2",
+ });
+ }
+ // Dedupe by normalized name — same firm often appears as both contacts.
+ const seen = new Set();
+ const contacts: { name: string; role: string }[] = [];
+ for (const c of rawContacts) {
+ const key = normalizeEntityName(c.name);
+ if (!key || seen.has(key)) continue;
+ seen.add(key);
+ contacts.push(c);
+ }
+
+ // Fetch all signals in parallel per contact (4 concurrent outbound
+ // requests max; OSHA gate serializes its own, others go straight).
+ const entities: EntityBrief[] = [];
+ for (const c of contacts) {
+ // 11 parallel calls per entity. OSHA throttles internally, the rest
+ // fan straight out. ~3-5s end-to-end per entity on cold cache.
+ const [osha, ilsos, stock, history, debarment, parent_link, federal, nlrb, diversity, news, osha_sir, svep] =
+ await Promise.all([
+ wantOsha ? fetchOshaBrief(c.name) : Promise.resolve(null),
+ wantIlsos ? fetchIlsosBrief(c.name) : Promise.resolve(null),
+ wantTicker ? fetchTickerBrief(c.name) : Promise.resolve(null),
+ wantHistory ? fetchContractorHistory(c.name) : Promise.resolve(null),
+ fetchDebarmentBrief(c.name),
+ fetchParentLink(c.name),
+ fetchFederalContracts(c.name),
+ fetchNlrbBriefReal(c.name),
+ fetchDiversityCerts(c.name),
+ fetchNewsMentions(c.name),
+ fetchOshaSirBrief(c.name),
+ fetchSvepBrief(c.name),
+ ]);
+ const news_sentiment = news ? scoreNewsSentiment(news) : null;
+ entities.push({
+ key: normalizeEntityName(c.name),
+ display_name: c.name,
+ role: c.role,
+ ticker: entityTicker(c.name),
+ osha,
+ ilsos,
+ stock,
+ history,
+ debarment,
+ parent_link,
+ federal,
+ nlrb,
+ diversity,
+ news,
+ news_sentiment,
+ osha_sir,
+ svep,
+ risk: scoreEntity(osha, ilsos, history),
+ });
+ }
+
+ // Flatten the ticker portfolio across entities, dedupe by ticker
+ // symbol, sort by cap_proxy descending (most-profitable related
+ // company first — Project Index "most-profitable beneficiary first").
+ const tickersByKey = new Map();
+ for (const e of entities) {
+ if (e.stock?.status === "ok" && e.stock.ticker) {
+ tickersByKey.set(e.stock.ticker, e.stock);
+ }
+ }
+ const tickers = [...tickersByKey.values()].sort(
+ (a, b) => (b.cap_proxy ?? 0) - (a.cap_proxy ?? 0),
+ );
+
+ // Property-side signals: owner, violations, site context, liens placeholder.
+ const [owner, violations, site_context, liens] = await Promise.all([
+ permit.address ? fetchPropertyOwner(permit.address) : Promise.resolve(null),
+ permit.address ? fetchPropertyViolations(permit.address) : Promise.resolve(null),
+ permit.address ? fetchSiteContext(permit.address) : Promise.resolve(null),
+ permit.address ? fetchLiensBrief(permit.address) : Promise.resolve(null),
+ ]);
+ const union = unionsForWorkType(permit.work_type);
+
+ const property = {
+ address: permit.address || "",
+ ticker: addressTicker(permit.address || ""),
+ owner,
+ violations,
+ union,
+ site_context,
+ liens,
+ };
+ // Macro context (BLS Chicago construction employment) — single fetch
+ // per process-cache window. Run in parallel with the per-entity calls.
+ const macro = await fetchBlsConstructionTrend();
+ const index_score = scoreProject(property, entities, macro);
+
+ return {
+ permit_id: permit.permit_id || "",
+ property,
+ entities,
+ tickers,
+ macro,
+ index_score,
+ // Roadmap: now-shorter list as more sources go live. Items pending
+ // for batch 4-5: SEC Exhibit 21, debarment lists, USASpending,
+ // mechanics liens, NLRB+OSHA-Severe-Injury, contractor drill-down.
+ roadmap: [
+ "SEC Exhibit 21 full index — walk every 10-K Exhibit 21 to build a complete subsidiary→parent tree (curated map covers top 12 GCs today)",
+ "SAM.gov v3 API — register at sam.gov for free key; OR human-mediated CSV download into local cache",
+ "IDOL prevailing-wage debarment list — HTML/PDF scrape from labor.illinois.gov",
+ "OSHA Severe Injury Reports — bot-protected at dol.gov; needs human-mediated CSV refresh OR browser-automation",
+ "Cook County Recorder mechanics liens — HTML scrape with PIN-based pagination",
+ "TIF polygon containment check — needs lat/long-in-polygon (turf.js) to confirm in_tif_district",
+ "Community area name table — static lookup from Chicago number → name",
+ "BLS QCEW + FRED commodity prices — macroeconomic context",
+ "PACER bankruptcy + civil litigation — federal court filings (free for opinion search)",
+ "Cook County Treasurer — property tax delinquency",
+ "NYC DOB + LA LADBS — cross-city contractor footprint",
+ "LLC-shuffle detector — principal+agent fingerprint matching (blocked on ILSOS access)",
+ ],
+ generated_at: new Date().toISOString(),
+ };
+}
|