From 266de613b21eec50e0613e6a9819651ce408d4c2 Mon Sep 17 00:00:00 2001
From: root <root@island37.com>
Date: Thu, 30 Apr 2026 03:19:38 -0500
Subject: [PATCH] llm_team_ui: 2 more scrum WARNs (rate_limit eviction + setup
 IP gate)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Closes the 2 remaining surgical-fix WARNs from the 2026-04-30
cross-lineage scrum on this codebase. OB-3 (root-running web app
with shell calls to fail2ban-client / systemctl / nginx config) and
the sentinel prompt-injection WARN both need bigger architectural
work and stay deferred.

OB-rate-limit (Opus WARN) — _rate_limit dict unbounded
  Pre-fix: per-worker dict with no eviction; an attacker slowly
  rotating IPs leaked memory forever. Fix: lazy eviction sweep
  triggered when dict grows beyond 10K entries (cheap because we
  only scan when growth is unusual). Real production wants a
  Redis-backed shared counter; this is the in-process band-aid
  that prevents runaway growth without changing the deploy shape.

OB-auth-setup (Opus WARN) — first-time setup grant from any IP
  Pre-fix: /api/auth/login with setup=true was gated only by
  COUNT(*) FROM users == 0. If the users table was ever truncated
  or restored empty, the next external visitor (ANY IP) claimed
  admin. Fix: also require the source IP to be in ALLOWLIST_IPS
  (typically loopback + LAN gateway). Local operator setup still
  works; remote attackers hitting the endpoint after an empty-
  users state get 403.

Both fixes are surgical — single function, no behavior change for
the happy path. The eviction sweep runs O(n) only when n>10K and
only drops entries already past their useful window, so it never
removes an active rate-limit count.

Outstanding from the scrum (deferred):
- OB-3 root-running web app: needs split into non-root Flask tier
  + privileged sudo wrapper service. 2-4 hr architectural work.
- Sentinel prompt-injection WARN: feeds attacker-controlled UA/
  path into LLM judge prompt. Needs prompt-template hardening or
  output validation gate before LLM verdicts can issue ban actions.
- CSP unsafe-inline WARN: defeats most XSS protection. Removing
  it requires moving inline scripts to external files (HTML
  refactor).

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 llm_team_ui.py | 38 +++++++++++++++++++++++++++++++++++++-
 1 file changed, 37 insertions(+), 1 deletion(-)

diff --git a/llm_team_ui.py b/llm_team_ui.py
index 6ee3e46..303fb4e 100644
--- a/llm_team_ui.py
+++ b/llm_team_ui.py
@@ -199,9 +199,21 @@ def is_allowlisted(ip):
 
 
 def rate_limited(ip, max_req=RATE_LIMIT_MAX):
+    """Rolling rate-limit check. Returns True when the IP has exceeded
+    max_req requests within RATE_LIMIT_WINDOW seconds.
+
+    Cross-lineage scrum 2026-04-30 (Opus WARN): _rate_limit was
+    unbounded per-worker, so an attacker rotating slowly through IPs
+    leaked memory forever. Fix: lazy eviction sweep when the dict
+    grows beyond 10K entries. Real production wants a Redis-backed
+    counter shared across workers; this is the in-process band-aid
+    that prevents runaway growth without changing the deploy shape.
+    """
     if is_allowlisted(ip):
         return False
     now = time.time()
+    if len(_rate_limit) > 10000:
+        _evict_stale_rate_limit(now)
     if ip not in _rate_limit or now - _rate_limit[ip][1] > RATE_LIMIT_WINDOW:
         _rate_limit[ip] = (1, now)
         return False
@@ -212,6 +224,16 @@ def rate_limited(ip, max_req=RATE_LIMIT_MAX):
     return False
 
 
+def _evict_stale_rate_limit(now):
+    """Drop _rate_limit entries older than 2× the window. Called from
+    rate_limited() only when dict growth exceeds 10K — keeps the cost
+    off the hot path for normal traffic."""
+    cutoff = now - (RATE_LIMIT_WINDOW * 2)
+    stale = [ip for ip, (_, start) in _rate_limit.items() if start < cutoff]
+    for ip in stale:
+        del _rate_limit[ip]
+
+
 def is_admin():
     return session.get("role") == "admin"
 
@@ -879,7 +901,21 @@ def auth_login():
         with get_db() as conn:
             with conn.cursor(cursor_factory=psycopg2.extras.RealDictCursor) as cur:
                 if is_setup:
-                    # First-time setup: create admin
+                    # First-time setup: create admin.
+                    #
+                    # Cross-lineage scrum 2026-04-30 (Opus WARN): pre-fix
+                    # this was gated only by COUNT(*) FROM users == 0.
+                    # If an operator ever truncated/restored the users
+                    # table, the next external visitor (any IP) could
+                    # claim admin. Now also requires the source IP to
+                    # be in ALLOWLIST_IPS — typically loopback + LAN
+                    # gateway — so a remote attacker hitting the setup
+                    # endpoint after an empty-users state can't seize
+                    # the account. Local operator running setup from
+                    # the box itself still works.
+                    if ip not in ALLOWLIST_IPS:
+                        sec_log.warning("SETUP_DENIED ip=%s — first-time setup requires allowlisted IP", ip)
+                        return jsonify({"error": "setup must be initiated from an allowlisted IP (typically localhost or LAN gateway)"}), 403
                     cur.execute("SELECT COUNT(*) as c FROM users")
                     if cur.fetchone()["c"] > 0:
                         return jsonify({"error": "Setup already completed"}), 400