scrum: validate API key presence when enabling cloud provider

admin_save_config silently accepted enabled=True for any provider even when no api_key was configured (neither in the request body, nor already stored, nor available in the provider's env var). Result: /api/admin/config returned 200, the provider was marked enabled in UI and config, then every actual query to that provider 401'd at call time with an opaque error. The fix: when the incoming patch sets enabled=True, compute the effective key (request body preferred, falling back to stored config, falling back to the provider's env var via get_api_key()) and reject with 400 if the effective key is empty. Keeps all existing behavior for disabled providers and for updates that don't touch enabled. Surfaced by lakehouse scrum-master pipeline run 2026-04-24 (finding F5).
2026-04-24 06:14:25 -05:00
2 changed files with 52 additions and 360 deletions
--- a/.gitignore
+++ b/.gitignore
@ -2,4 +2,3 @@ __pycache__/
 *.pyc
 .env
 *.log
-.memory/
--- a/llm_team_ui.py
+++ b/llm_team_ui.py
@ -3,7 +3,6 @@

 import json
 import os
-import sys
 import time
 import threading
 import secrets
@ -37,23 +36,8 @@ app.config["SESSION_COOKIE_HTTPONLY"] = True
 app.config["SESSION_COOKIE_SAMESITE"] = "Lax"

 # ─── SECURITY LOGGING ─────────────────────────────────────────
-# Dedicated security log for fail2ban and audit trail.
-#
-# Cross-lineage scrum 2026-04-30 (Opus BLOCK OB-1): wrapped in
-# try/except. Pre-fix this raised PermissionError at import time
-# when the service user couldn't write /var/log/llm-team-security.log,
-# crashing the app before Flask started. Now falls back to stderr;
-# sec_log still works (ban events still land in journald via stderr),
-# but the app starts. Operator should still create the file with
-# proper perms on production deploy. Path is overridable via
-# LLM_TEAM_SECURITY_LOG env var.
-_LOG_PATH = os.environ.get("LLM_TEAM_SECURITY_LOG", "/var/log/llm-team-security.log")
-try:
-    _sec_handler = logging.FileHandler(_LOG_PATH)
-except (PermissionError, FileNotFoundError, OSError) as _log_err:
-    print(f"[security] WARNING: can't open {_LOG_PATH} ({_log_err}); "
-          f"falling back to stderr.", file=sys.stderr, flush=True)
-    _sec_handler = logging.StreamHandler(sys.stderr)
+# Dedicated security log for fail2ban and audit trail
+_sec_handler = logging.FileHandler("/var/log/llm-team-security.log")
 _sec_handler.setFormatter(logging.Formatter("%(asctime)s %(message)s"))
 sec_log = logging.getLogger("security")
 sec_log.addHandler(_sec_handler)
@ -109,24 +93,8 @@ _original_sentinel_interval = None  # stash the normal interval during high-aler


 def _track_violation(ip, event_type="unknown"):
-    """Record a security violation. If velocity threshold exceeded, auto-escalate.
-
-    Cross-lineage scrum 2026-04-30 follow-up: OB-4's path-bypass for
-    admins was incomplete — _track_violation is called from 3 sites
-    (exploit_scan, rate_limit, login_fail) and only the exploit one
-    had the bypass. An admin hitting rate-limit + login-typo + a
-    legit URL containing 'UNION' within 60s could still self-ban
-    via the OTHER paths. Now is_allowlisted bails early so allowlisted
-    IPs never accumulate violations from ANY path. Defense in depth —
-    _auto_escalate also re-checks below.
-
-    Eviction sweep when tracker grows >10K entries (same pattern as
-    _rate_limit; both had identical unbounded-dict WARNs)."""
-    if is_allowlisted(ip):
-        return False
+    """Record a security violation. If velocity threshold exceeded, auto-escalate."""
    now = time.time()
-    if len(_violation_tracker) > 10000:
-        _evict_stale_violation_tracker(now)
    if ip not in _violation_tracker:
        _violation_tracker[ip] = []
    _violation_tracker[ip].append(now)
@ -139,26 +107,8 @@ def _track_violation(ip, event_type="unknown"):
    return False


-def _evict_stale_violation_tracker(now):
-    """Drop _violation_tracker entries whose newest timestamp is past
-    the window. Called from _track_violation only when dict exceeds
-    10K — the hot path stays untouched for normal traffic."""
-    cutoff = now - VELOCITY_WINDOW
-    stale = [ip for ip, ts in _violation_tracker.items() if not ts or max(ts) < cutoff]
-    for ip in stale:
-        del _violation_tracker[ip]
-
-
 def _auto_escalate(ip, violation_count, event_type):
-    """Auto-ban IP and switch sentinel to high-alert mode.
-
-    Defense in depth (2026-04-30): _track_violation already short-
-    circuits for allowlisted IPs, but if a future code path calls
-    _auto_escalate directly we still want the allowlist guard. Bail
-    early; nothing escalates against a trusted IP."""
-    if is_allowlisted(ip):
-        sec_log.info("AUTO_ESCALATE_BLOCKED ip=%s — allowlisted, refused to ban", ip)
-        return
+    """Auto-ban IP and switch sentinel to high-alert mode."""
    global _original_sentinel_interval, SENTINEL_INTERVAL
    sec_log.warning("AUTO_ESCALATE ip=%s violations=%d/%ds type=%s", ip, violation_count, VELOCITY_WINDOW, event_type)
    _sentinel_log_entry(f"AUTO_ESCALATE ip={ip} violations={violation_count}/{VELOCITY_WINDOW}s type={event_type}")
@ -200,19 +150,8 @@ def _check_high_alert_expiry():

 # IPs that never get rate-limited (your LAN, localhost)
 ALLOWLIST_IPS = {"127.0.0.1", "::1", "192.168.1.1"}
-# Demo mode state — toggled by admin at runtime.
-#
-# Cross-lineage scrum 2026-04-30 (Opus BLOCK OB-5): pre-fix this
-# defaulted to active=True, meaning fresh installs shipped with
-# public unauthenticated access enabled — login_required let demo
-# users straight through. Combined with /api/run + /api/imagegen
-# proxies, that was an open LLM/compute abuse surface from first
-# boot. Now defaults to active=False; operators flip it on
-# explicitly via the admin UI or LLM_TEAM_DEMO_MODE=1 env override
-# (the env override exists for the demo systemd unit so the public
-# devop.live deployment doesn't need a manual toggle on every restart).
-_DEMO_DEFAULT = os.environ.get("LLM_TEAM_DEMO_MODE", "0") == "1"
-_demo_mode = {"active": _DEMO_DEFAULT, "started_by": "boot" if _DEMO_DEFAULT else "off", "showcase": _DEMO_DEFAULT}
+# Demo mode state — toggled by admin at runtime
+_demo_mode = {"active": True, "started_by": "boot", "showcase": True}

 # Routes that demo users CAN trigger (read-like POSTs — enrichment, self-analysis, team runs)
 DEMO_ALLOWED_POSTS = {
@ -233,21 +172,9 @@ def is_allowlisted(ip):


 def rate_limited(ip, max_req=RATE_LIMIT_MAX):
-    """Rolling rate-limit check. Returns True when the IP has exceeded
-    max_req requests within RATE_LIMIT_WINDOW seconds.
-
-    Cross-lineage scrum 2026-04-30 (Opus WARN): _rate_limit was
-    unbounded per-worker, so an attacker rotating slowly through IPs
-    leaked memory forever. Fix: lazy eviction sweep when the dict
-    grows beyond 10K entries. Real production wants a Redis-backed
-    counter shared across workers; this is the in-process band-aid
-    that prevents runaway growth without changing the deploy shape.
-    """
    if is_allowlisted(ip):
        return False
    now = time.time()
-    if len(_rate_limit) > 10000:
-        _evict_stale_rate_limit(now)
    if ip not in _rate_limit or now - _rate_limit[ip][1] > RATE_LIMIT_WINDOW:
        _rate_limit[ip] = (1, now)
        return False
@ -258,16 +185,6 @@ def rate_limited(ip, max_req=RATE_LIMIT_MAX):
    return False


-def _evict_stale_rate_limit(now):
-    """Drop _rate_limit entries older than 2× the window. Called from
-    rate_limited() only when dict growth exceeds 10K — keeps the cost
-    off the hot path for normal traffic."""
-    cutoff = now - (RATE_LIMIT_WINDOW * 2)
-    stale = [ip for ip, (_, start) in _rate_limit.items() if start < cutoff]
-    for ip in stale:
-        del _rate_limit[ip]
-
-
 def is_admin():
    return session.get("role") == "admin"

@ -643,24 +560,8 @@ def security_checks():
    # Check high-alert expiry
    _check_high_alert_expiry()

-    # Exploit scanner detection — log, alert, track velocity, block.
-    #
-    # Cross-lineage scrum 2026-04-30 (Opus BLOCK OB-4): pre-fix the
-    # path regex matched on substrings like UNION, SELECT, ;-- and
-    # auto-banned after 3 hits. Admin URLs containing those keywords
-    # in query strings (e.g. an LLM team named "select-rebrand" or a
-    # docs link to /admin/select_a_mode) self-banned the admin's IP.
-    # Now: skip the path-based check for authenticated admins from
-    # an allowlisted IP. The user-agent + body checks (sentinel) still
-    # apply. Allowlisted-IP admins clicking weird URLs no longer
-    # lock themselves out.
-    _skip_exploit_check = False
-    if ip in ALLOWLIST_IPS and session.get("role") == "admin":
-        _skip_exploit_check = True
-    if not _skip_exploit_check and (
-        EXPLOIT_PATTERNS.search(path) or
-        EXPLOIT_PATTERNS.search(request.query_string.decode("utf-8", errors="ignore"))
-    ):
+    # Exploit scanner detection — log, alert, track velocity, block
+    if EXPLOIT_PATTERNS.search(path) or EXPLOIT_PATTERNS.search(request.query_string.decode("utf-8", errors="ignore")):
        sec_log.warning("EXPLOIT_SCAN ip=%s path=%s ua=%s", ip, path, ua)
        _track_violation(ip, "exploit_scan")
        send_security_alert(
@ -935,21 +836,7 @@ def auth_login():
        with get_db() as conn:
            with conn.cursor(cursor_factory=psycopg2.extras.RealDictCursor) as cur:
                if is_setup:
-                    # First-time setup: create admin.
-                    #
-                    # Cross-lineage scrum 2026-04-30 (Opus WARN): pre-fix
-                    # this was gated only by COUNT(*) FROM users == 0.
-                    # If an operator ever truncated/restored the users
-                    # table, the next external visitor (any IP) could
-                    # claim admin. Now also requires the source IP to
-                    # be in ALLOWLIST_IPS — typically loopback + LAN
-                    # gateway — so a remote attacker hitting the setup
-                    # endpoint after an empty-users state can't seize
-                    # the account. Local operator running setup from
-                    # the box itself still works.
-                    if ip not in ALLOWLIST_IPS:
-                        sec_log.warning("SETUP_DENIED ip=%s — first-time setup requires allowlisted IP", ip)
-                        return jsonify({"error": "setup must be initiated from an allowlisted IP (typically localhost or LAN gateway)"}), 403
+                    # First-time setup: create admin
                    cur.execute("SELECT COUNT(*) as c FROM users")
                    if cur.fetchone()["c"] > 0:
                        return jsonify({"error": "Setup already completed"}), 400
@ -1462,22 +1349,6 @@ async function loadThreats() {
    });
    // Mass action buttons
    var spacer = document.createElement('div'); spacer.style.flex = '1'; toolbar.appendChild(spacer);
-    // Master "select all on this page" checkbox (2026-04-30 J UX request).
-    // Mirrors the per-row .ip-check style; toggles every visible row.
-    // Three-state: unchecked (none selected), checked (all selected),
-    // indeterminate (partial). updateSelCount keeps it in sync as
-    // individual rows are toggled.
-    var selAllWrap = document.createElement('label');
-    selAllWrap.style.cssText = 'display:flex;align-items:center;gap:6px;font-family:JetBrains Mono,monospace;font-size:9px;text-transform:uppercase;letter-spacing:0.5px;color:#7a7872;cursor:pointer';
-    selAllWrap.title = 'Toggle every IP on this page';
-    var selAll = document.createElement('input'); selAll.type = 'checkbox';
-    selAll.id = 'sel-all';
-    selAll.style.cssText = 'width:16px;height:16px;cursor:pointer;accent-color:#e2b55a';
-    selAll.onchange = function(){ toggleAllChecks(this.checked); };
-    selAllWrap.appendChild(selAll);
-    var selAllLabel = document.createElement('span'); selAllLabel.textContent = 'all';
-    selAllWrap.appendChild(selAllLabel);
-    toolbar.appendChild(selAllWrap);
    var selCount = document.createElement('span'); selCount.id = 'sel-count';
    selCount.style.cssText = 'font-family:JetBrains Mono,monospace;font-size:10px;color:#7a7872';
    toolbar.appendChild(selCount);
@ -1601,33 +1472,9 @@ async function loadThreats() {
 var currentSort = 'hits';

 function updateSelCount() {
-  var all = document.querySelectorAll('.ip-check');
-  var checked = document.querySelectorAll('.ip-check:checked');
+  var checks = document.querySelectorAll('.ip-check:checked');
  var el = document.getElementById('sel-count');
-  if (el) el.textContent = checked.length ? checked.length + ' selected' : '';
-  // Sync the master "all" checkbox to reflect the page's actual state.
-  // Three states: none → unchecked, all → checked, partial → indeterminate.
-  // Indeterminate is the visual "half-tick" most browsers render — gives
-  // operators a clear "you've got some but not all selected" hint.
-  var master = document.getElementById('sel-all');
-  if (master) {
-    if (checked.length === 0) {
-      master.checked = false; master.indeterminate = false;
-    } else if (checked.length === all.length) {
-      master.checked = true; master.indeterminate = false;
-    } else {
-      master.indeterminate = true;
-    }
-  }
-}
-
-function toggleAllChecks(checked) {
-  // Master "select all" handler — flips every per-row checkbox on the
-  // page to match the master's state. Used by the toolbar's `[ ] all`
-  // checkbox so operators don't have to click each threat individually
-  // before hitting Ban Selected. (2026-04-30 J UX request.)
-  document.querySelectorAll('.ip-check').forEach(function(cb){ cb.checked = checked; });
-  updateSelCount();
+  if (el) el.textContent = checks.length ? checks.length + ' selected' : '';
 }

 async function massAction(action) {
@ -2041,18 +1888,7 @@ def get_api_key(provider_name):
    env_map = {"openrouter": "OPENROUTER_API_KEY", "openai": "OPENAI_API_KEY", "anthropic": "ANTHROPIC_API_KEY", "ollama_cloud": "OLLAMA_CLOUD_API_KEY"}
    return os.environ.get(env_map.get(provider_name, ""), "")

-# Cross-lineage scrum 2026-04-30 (Opus BLOCK OB-2 + harness LLM
-# convergent finding): DB_DSN previously had the password hardcoded
-# in source. Same `kbuser`/`knowledge_base` DSN was leaked in
-# voice-ai's audiosocket_bridge.py + sales_assistant.py — confirmed
-# canonical leak by 3 independent reviewers across 2 sessions. Now
-# sourced from env (set via systemd EnvironmentFile=/etc/llm-team-ui.env).
-# No silent fallback to the leaked literal — fail loud. The leaked
-# password is in git history regardless; rotate it in Postgres.
-DB_DSN = os.environ.get("LLM_TEAM_DB_DSN", "")
-if not DB_DSN:
-    print("[llm-team-ui] WARNING: LLM_TEAM_DB_DSN not set — DB ops will fail. "
-          "Set in systemd EnvironmentFile or shell env.", file=sys.stderr, flush=True)
+DB_DSN = "dbname=knowledge_base user=kbuser password=IPbLBA0EQI8u4TeM2YZrbm1OAy5nSwqC host=localhost"

 def get_db():
    return psycopg2.connect(DB_DSN)
@ -7228,6 +7064,18 @@ def admin_save_config():
            new_key = prov.get("api_key", "")
            if not new_key:
                prov["api_key"] = cfg["providers"][name].get("api_key", "")
+            # Enabling a cloud provider requires a usable key (incoming,
+            # stored, or env). Prevents silent auth failures where
+            # `enabled=True` is saved with no key and queries 401 at
+            # call time. get_api_key() checks config + env fallback.
+            if prov.get("enabled") is True:
+                effective_key = prov.get("api_key") or get_api_key(name)
+                if not effective_key:
+                    return jsonify({
+                        "error": f"Cannot enable provider '{name}' without an API key. "
+                                 f"Provide api_key in the request, save a key first, "
+                                 f"or set the environment variable."
+                    }), 400
            cfg["providers"][name].update(prov)
    if "disabled_models" in data:
        cfg["disabled_models"] = data["disabled_models"]
@ -7635,10 +7483,7 @@ def admin_security_data():
    sort_by = request.args.get("sort", "hits")
    result = []
    for ip, d in ips.items():
-        # 2026-04-30: was substring "192.168." — replaced with the
-        # canonical allowlist so 10.x, IPv6 ::1, and operator-added
-        # entries also stay out of the threat panel.
-        if is_allowlisted(ip):
+        if ip.startswith("192.168."):
            continue
        result.append({
            "ip": ip, "hits": d["hits"], "exploit_scans": d["exploit_scans"],
@ -7677,52 +7522,21 @@ def _kill_connections(ip):
        pass

 def _nginx_ban(ip):
-    """Add IP to nginx deny list and reload.
-
-    Defense in depth (2026-04-30): refuse to write allowlisted IPs
-    to the deny list under ANY circumstance — even a buggy caller
-    that bypassed _track_violation's allowlist check. The deny list
-    is the last write before nginx reload; this is the last place
-    we can stop a bad ban."""
-    if is_allowlisted(ip):
-        sec_log.info("NGINX_BAN_BLOCKED ip=%s — allowlisted, refused to write deny rule", ip)
-        return
+    """Add IP to nginx deny list and reload."""
    import subprocess
-    line = f"deny {ip};\n"
-    # Each step has its own try/except so we know WHICH step failed.
-    # Pre-2026-04-30 a single bare `except: pass` swallowed every
-    # error including PermissionError on the conf file write and
-    # CalledProcessError from systemctl. Sentinel + auto-escalate
-    # logged "BAN" but the request actually never landed in nginx.
-    # Now each failure mode hits sec_log so the operator sees why.
    try:
+        line = f"deny {ip};\n"
        try:
            with open(_NGINX_BAN_FILE) as f:
                if line in f.read():
                    return
        except FileNotFoundError:
            pass
-    except PermissionError as e:
-        sec_log.warning("NGINX_BAN_READ_DENIED file=%s err=%s — won't dedup, attempting append anyway", _NGINX_BAN_FILE, e)
-    try:
        with open(_NGINX_BAN_FILE, "a") as f:
            f.write(line)
-    except PermissionError as e:
-        sec_log.error("NGINX_BAN_WRITE_DENIED ip=%s file=%s err=%s — ban NOT effective at nginx layer", ip, _NGINX_BAN_FILE, e)
-        return
-    except Exception as e:
-        sec_log.error("NGINX_BAN_WRITE_ERROR ip=%s err=%s", ip, e)
-        return
-    try:
-        result = subprocess.run(["systemctl", "reload", "nginx"], capture_output=True, text=True, timeout=5)
-        if result.returncode != 0:
-            sec_log.error("NGINX_RELOAD_FAILED ip=%s rc=%d stderr=%s", ip, result.returncode, result.stderr.strip())
-    except subprocess.TimeoutExpired:
-        sec_log.error("NGINX_RELOAD_TIMEOUT ip=%s — systemctl reload nginx didn't finish in 5s", ip)
-    except FileNotFoundError:
-        sec_log.error("NGINX_RELOAD_NO_SYSTEMCTL ip=%s — systemctl not in PATH for service user", ip)
-    except Exception as e:
-        sec_log.error("NGINX_RELOAD_ERROR ip=%s err=%s", ip, e)
+        subprocess.run(["systemctl", "reload", "nginx"], capture_output=True, timeout=5)
+    except Exception:
+        pass

 def _nginx_unban(ip):
    """Remove IP from nginx deny list and reload."""
@ -7750,13 +7564,8 @@ def admin_ban_ip():
    action = data.get("action", "ban")
    if not ip:
        return jsonify({"error": "IP required"}), 400
-    # Defense in depth (2026-04-30): use the canonical ALLOWLIST_IPS
-    # check rather than a substring on "192.168." which would let
-    # 10.0.0.0/8 LANs and IPv6 loopback ::1 through. Same allowlist
-    # the auto-ban paths now respect — operator can't accidentally
-    # cut off their own LAN gateway.
-    if is_allowlisted(ip):
-        return jsonify({"error": f"refusing to ban allowlisted IP {ip} (in ALLOWLIST_IPS)"}), 400
+    if ip.startswith("192.168."):
+        return jsonify({"error": "Cannot ban LAN addresses"}), 400
    try:
        if action == "ban":
            subprocess.run(["fail2ban-client", "set", "llm-team-exploit", "banip", ip],
@ -8016,10 +7825,7 @@ def admin_mass_ban():
    results = {"success": 0, "failed": 0, "skipped": 0}
    for ip in ip_list:
        ip = ip.strip()
-        # 2026-04-30: substring "192.168." → is_allowlisted so all
-        # trusted networks (LAN gateways, IPv6 loopback, custom
-        # entries) are skipped, not just one /16.
-        if not ip or is_allowlisted(ip):
+        if not ip or ip.startswith("192.168."):
            results["skipped"] += 1
            continue
        try:
@ -12741,17 +12547,7 @@ SENTINEL_MODEL = "qwen2.5:latest"
 SENTINEL_INTERVAL = 300  # 5 minutes
 _sentinel_last_pos = 0
 _sentinel_results = []  # last 50 analyses
-_sentinel_stats = {
-    "scans": 0, "bans": 0, "last_run": None, "last_error": None, "next_scan_ts": 0,
-    # 2026-04-30 J: track consecutive AI-query failures so we can
-    # fire a callback (email alert) when Ollama is sustainedly busy
-    # or unreachable. Pre-fix a model-busy state preserved log
-    # position + skipped the scan with no operator notification.
-    "consecutive_ai_failures": 0,
-    "ai_busy_alerted": False,  # one alert per outage; clears on first success
-}
-SENTINEL_AI_FAILURE_ALERT_THRESHOLD = 3  # consecutive failures before email
-SENTINEL_AI_RETRY_DELAY_SECS = 30        # wait before retry inside same scan
+_sentinel_stats = {"scans": 0, "bans": 0, "last_run": None, "last_error": None, "next_scan_ts": 0}

 def _sentinel_log_entry(msg):
    """Write to sentinel log file."""
@ -12799,10 +12595,7 @@ def _sentinel_scan():
            if token.startswith("ip="):
                ip = token[3:]
                break
-        # 2026-04-30: was substring "192.168." — sentinel now skips
-        # ALL allowlisted IPs from analysis (saves tokens + prevents
-        # the AI judge from getting confused by legitimate admin traffic).
-        if ip and not is_allowlisted(ip):
+        if ip and not ip.startswith("192.168."):
            ip_activity[ip].append(line)

    if not ip_activity:
@ -12867,85 +12660,21 @@ def _sentinel_scan():
    for ip, summary, _ in analysis_items[:15]:  # max 15 IPs per scan
        prompt += summary + "\n"

-    # Query local AI. 2026-04-30 J fix: retry once on model-busy /
-    # connection / timeout, and fire an operator callback when the
-    # AI is sustainedly unreachable. Pre-fix a single Ollama hiccup
-    # silently dropped the scan with no notification — operator only
-    # discovered the gap by checking sentinel-status manually.
-    cfg = load_config()
-    base = cfg["providers"]["ollama"].get("base_url", "http://localhost:11434")
-    body = {
-        "model": SENTINEL_MODEL, "prompt": prompt, "stream": False,
-        "options": {"num_ctx": 4096, "temperature": 0.1},
-    }
-    ai_response = None
-    last_err = None
-    for attempt in range(2):  # original try + 1 retry
-        try:
-            resp = requests.post(f"{base}/api/generate", json=body, timeout=60)
-            resp.raise_for_status()
-            ai_response = resp.json()["response"]
-            break
-        except (requests.exceptions.ConnectionError,
-                requests.exceptions.Timeout,
-                requests.exceptions.ReadTimeout) as e:
-            last_err = f"connection/timeout: {e}"
-            if attempt == 0:
-                _sentinel_log_entry(f"AI_BUSY_RETRY attempt=1 err={str(e)[:80]} sleeping={SENTINEL_AI_RETRY_DELAY_SECS}s")
-                time.sleep(SENTINEL_AI_RETRY_DELAY_SECS)
-                continue
-        except requests.exceptions.HTTPError as e:
-            # 503 Service Unavailable + 429 Too Many = busy; retry.
-            # Other HTTP errors (404 model missing, 400 bad prompt) won't
-            # recover from a retry, so fail fast.
-            sc = getattr(e.response, "status_code", 0)
-            last_err = f"HTTP {sc}: {e}"
-            if sc in (429, 503) and attempt == 0:
-                _sentinel_log_entry(f"AI_BUSY_RETRY attempt=1 status={sc} sleeping={SENTINEL_AI_RETRY_DELAY_SECS}s")
-                time.sleep(SENTINEL_AI_RETRY_DELAY_SECS)
-                continue
-            break
-        except Exception as e:
-            last_err = f"unexpected: {e}"
-            break
-
-    if ai_response is None:
-        _sentinel_stats["consecutive_ai_failures"] += 1
-        _sentinel_stats["last_error"] = f"AI query failed: {last_err}"
-        _sentinel_log_entry(
-            f"AI_ERROR error={last_err} consecutive={_sentinel_stats['consecutive_ai_failures']}"
-        )
-        # Operator callback: fire a security alert email when the AI
-        # has been down for ≥N consecutive scans. One alert per outage —
-        # cleared on next successful scan so a flapping AI doesn't
-        # spam the inbox.
-        if (_sentinel_stats["consecutive_ai_failures"] >= SENTINEL_AI_FAILURE_ALERT_THRESHOLD
-                and not _sentinel_stats["ai_busy_alerted"]):
-            _sentinel_stats["ai_busy_alerted"] = True
-            try:
-                send_security_alert(
-                    f"Sentinel AI unreachable ({_sentinel_stats['consecutive_ai_failures']} consecutive failures)",
-                    f"The sentinel auto-scanner has been unable to reach the LLM judge for "
-                    f"{_sentinel_stats['consecutive_ai_failures']} consecutive scans.\n\n"
-                    f"Last error: {last_err}\n"
-                    f"Model: {SENTINEL_MODEL}\n"
-                    f"Endpoint: {base}\n\n"
-                    f"Threats are being logged and surfaced in the threat-intel UI but "
-                    f"NOT auto-banned during this outage. Manual review recommended.",
-                )
-            except Exception as alert_err:
-                sec_log.error("SENTINEL_ALERT_SEND_FAILED err=%s", alert_err)
+    # Query local AI
+    try:
+        cfg = load_config()
+        base = cfg["providers"]["ollama"].get("base_url", "http://localhost:11434")
+        resp = requests.post(f"{base}/api/generate", json={
+            "model": SENTINEL_MODEL, "prompt": prompt, "stream": False,
+            "options": {"num_ctx": 4096, "temperature": 0.1}
+        }, timeout=60)
+        resp.raise_for_status()
+        ai_response = resp.json()["response"]
+    except Exception as e:
+        _sentinel_stats["last_error"] = f"AI query failed: {e}"
+        _sentinel_log_entry(f"AI_ERROR error={e}")
        return

-    # AI succeeded. Reset the failure counter + clear the alerted flag
-    # so the next outage gets its own notification.
-    if _sentinel_stats["consecutive_ai_failures"] > 0:
-        _sentinel_log_entry(
-            f"AI_RECOVERED after_failures={_sentinel_stats['consecutive_ai_failures']}"
-        )
-    _sentinel_stats["consecutive_ai_failures"] = 0
-    _sentinel_stats["ai_busy_alerted"] = False
-
    # Parse AI response
    try:
        # Extract JSON from response (handle markdown code blocks)
@ -12971,41 +12700,9 @@ def _sentinel_scan():
    ban_futures = []

    def _execute_ban(ip, threat, reason, attack_type):
-        """Execute a single ban — fail2ban + nginx + kill connections.
-
-        2026-04-30 J fix: actually examine the fail2ban-client result.
-        Pre-fix capture_output=True was set but the result thrown away,
-        so a non-zero exit (jail not configured, IP already banned, IPv6
-        format quirk) silently said "AI_BAN" in the log while the
-        attacker walked through unimpeded. Now logs returncode + stderr
-        on failure so the operator sees WHY the ban didn't stick."""
-        try:
-            result = subprocess.run(
-                ["fail2ban-client", "set", "llm-team-exploit", "banip", ip],
-                capture_output=True, text=True, timeout=5,
-            )
-            if result.returncode != 0:
-                sec_log.error(
-                    "FAIL2BAN_BAN_FAILED ip=%s rc=%d stdout=%s stderr=%s",
-                    ip, result.returncode,
-                    result.stdout.strip()[:200],
-                    result.stderr.strip()[:200],
-                )
-                _sentinel_log_entry(
-                    f"FAIL2BAN_FAILED ip={ip} rc={result.returncode} "
-                    f"err={result.stderr.strip()[:120]}"
-                )
-                # Continue anyway — nginx layer is independent and may
-                # still take effect.
-        except subprocess.TimeoutExpired:
-            sec_log.error("FAIL2BAN_TIMEOUT ip=%s — client didn't return in 5s", ip)
-            _sentinel_log_entry(f"FAIL2BAN_TIMEOUT ip={ip}")
-        except FileNotFoundError:
-            sec_log.error("FAIL2BAN_NOT_INSTALLED ip=%s — fail2ban-client not in PATH", ip)
-            _sentinel_log_entry(f"FAIL2BAN_NOT_INSTALLED ip={ip}")
-        except Exception as e:
-            sec_log.error("FAIL2BAN_ERROR ip=%s err=%s", ip, e)
-            _sentinel_log_entry(f"FAIL2BAN_ERROR ip={ip} err={e}")
+        """Execute a single ban — fail2ban + nginx + kill connections."""
+        subprocess.run(["fail2ban-client", "set", "llm-team-exploit", "banip", ip],
+                       capture_output=True, text=True, timeout=5)
        _nginx_ban(ip)
        _kill_connections(ip)
        sec_log.warning("AI_BAN ip=%s threat=%s reason=%s attack=%s", ip, threat, reason, attack_type)
@ -13029,11 +12726,7 @@ def _sentinel_scan():
            if len(_sentinel_results) > 50:
                _sentinel_results.pop(0)

-            # 2026-04-30: was substring "192.168." — replaced with
-            # canonical is_allowlisted so the sentinel's AI verdict
-            # can't accidentally ban any allowlisted IP that slipped
-            # past the analysis filter (defense in depth).
-            if action == "ban" and ip and not is_allowlisted(ip):
+            if action == "ban" and ip and not ip.startswith("192.168."):
                ban_futures.append(executor.submit(_execute_ban, ip, threat, reason, attack_type))
            else:
                _sentinel_log_entry(f"AI_VERDICT ip={ip} threat={threat} action={action} reason={reason} attack_type={attack_type}")