Compare commits

..

1 Commits

Author SHA1 Message Date
root
12ab391679 scrum: swap mistral:latest defaults to ollama_cloud::gpt-oss:120b
Three default model lists hardcoded mistral:latest as the fallback
when config.get("model_sets" / "models") returns nothing. Per
feedback_no_mistral.md, mistral 7B has decoder-level JSON malformation
issues (0/5 fill rate on A/B) and is a liability in any path that
depends on structured output from the model.

Swapping to ollama_cloud::gpt-oss:120b (Phase 20 T3 cloud tier)
keeps the defaults reliable for the meta-pipeline orchestrator
(line 9959), the fallback model list for empty Ollama (10084), and
the worker pool default (11835). All three are DEFAULTS — any caller
passing explicit config.model_sets / config.models is unaffected.

Routing works because query_model's "::" provider prefix already
resolves ollama_cloud via commit fa6ccff. Activation requires
OLLAMA_CLOUD_API_KEY or a key saved via the Admin UI; this PR does
not change credential behavior, only the default model list.

Surfaced by lakehouse scrum-master pipeline run 2026-04-24, findings
confirmed by grep verification against the live code.
2026-04-24 06:09:34 -05:00
2 changed files with 43 additions and 363 deletions

1
.gitignore vendored
View File

@ -2,4 +2,3 @@ __pycache__/
*.pyc *.pyc
.env .env
*.log *.log
.memory/

View File

@ -3,7 +3,6 @@
import json import json
import os import os
import sys
import time import time
import threading import threading
import secrets import secrets
@ -37,23 +36,8 @@ app.config["SESSION_COOKIE_HTTPONLY"] = True
app.config["SESSION_COOKIE_SAMESITE"] = "Lax" app.config["SESSION_COOKIE_SAMESITE"] = "Lax"
# ─── SECURITY LOGGING ───────────────────────────────────────── # ─── SECURITY LOGGING ─────────────────────────────────────────
# Dedicated security log for fail2ban and audit trail. # Dedicated security log for fail2ban and audit trail
# _sec_handler = logging.FileHandler("/var/log/llm-team-security.log")
# Cross-lineage scrum 2026-04-30 (Opus BLOCK OB-1): wrapped in
# try/except. Pre-fix this raised PermissionError at import time
# when the service user couldn't write /var/log/llm-team-security.log,
# crashing the app before Flask started. Now falls back to stderr;
# sec_log still works (ban events still land in journald via stderr),
# but the app starts. Operator should still create the file with
# proper perms on production deploy. Path is overridable via
# LLM_TEAM_SECURITY_LOG env var.
_LOG_PATH = os.environ.get("LLM_TEAM_SECURITY_LOG", "/var/log/llm-team-security.log")
try:
_sec_handler = logging.FileHandler(_LOG_PATH)
except (PermissionError, FileNotFoundError, OSError) as _log_err:
print(f"[security] WARNING: can't open {_LOG_PATH} ({_log_err}); "
f"falling back to stderr.", file=sys.stderr, flush=True)
_sec_handler = logging.StreamHandler(sys.stderr)
_sec_handler.setFormatter(logging.Formatter("%(asctime)s %(message)s")) _sec_handler.setFormatter(logging.Formatter("%(asctime)s %(message)s"))
sec_log = logging.getLogger("security") sec_log = logging.getLogger("security")
sec_log.addHandler(_sec_handler) sec_log.addHandler(_sec_handler)
@ -109,24 +93,8 @@ _original_sentinel_interval = None # stash the normal interval during high-aler
def _track_violation(ip, event_type="unknown"): def _track_violation(ip, event_type="unknown"):
"""Record a security violation. If velocity threshold exceeded, auto-escalate. """Record a security violation. If velocity threshold exceeded, auto-escalate."""
Cross-lineage scrum 2026-04-30 follow-up: OB-4's path-bypass for
admins was incomplete _track_violation is called from 3 sites
(exploit_scan, rate_limit, login_fail) and only the exploit one
had the bypass. An admin hitting rate-limit + login-typo + a
legit URL containing 'UNION' within 60s could still self-ban
via the OTHER paths. Now is_allowlisted bails early so allowlisted
IPs never accumulate violations from ANY path. Defense in depth
_auto_escalate also re-checks below.
Eviction sweep when tracker grows >10K entries (same pattern as
_rate_limit; both had identical unbounded-dict WARNs)."""
if is_allowlisted(ip):
return False
now = time.time() now = time.time()
if len(_violation_tracker) > 10000:
_evict_stale_violation_tracker(now)
if ip not in _violation_tracker: if ip not in _violation_tracker:
_violation_tracker[ip] = [] _violation_tracker[ip] = []
_violation_tracker[ip].append(now) _violation_tracker[ip].append(now)
@ -139,26 +107,8 @@ def _track_violation(ip, event_type="unknown"):
return False return False
def _evict_stale_violation_tracker(now):
"""Drop _violation_tracker entries whose newest timestamp is past
the window. Called from _track_violation only when dict exceeds
10K the hot path stays untouched for normal traffic."""
cutoff = now - VELOCITY_WINDOW
stale = [ip for ip, ts in _violation_tracker.items() if not ts or max(ts) < cutoff]
for ip in stale:
del _violation_tracker[ip]
def _auto_escalate(ip, violation_count, event_type): def _auto_escalate(ip, violation_count, event_type):
"""Auto-ban IP and switch sentinel to high-alert mode. """Auto-ban IP and switch sentinel to high-alert mode."""
Defense in depth (2026-04-30): _track_violation already short-
circuits for allowlisted IPs, but if a future code path calls
_auto_escalate directly we still want the allowlist guard. Bail
early; nothing escalates against a trusted IP."""
if is_allowlisted(ip):
sec_log.info("AUTO_ESCALATE_BLOCKED ip=%s — allowlisted, refused to ban", ip)
return
global _original_sentinel_interval, SENTINEL_INTERVAL global _original_sentinel_interval, SENTINEL_INTERVAL
sec_log.warning("AUTO_ESCALATE ip=%s violations=%d/%ds type=%s", ip, violation_count, VELOCITY_WINDOW, event_type) sec_log.warning("AUTO_ESCALATE ip=%s violations=%d/%ds type=%s", ip, violation_count, VELOCITY_WINDOW, event_type)
_sentinel_log_entry(f"AUTO_ESCALATE ip={ip} violations={violation_count}/{VELOCITY_WINDOW}s type={event_type}") _sentinel_log_entry(f"AUTO_ESCALATE ip={ip} violations={violation_count}/{VELOCITY_WINDOW}s type={event_type}")
@ -200,19 +150,8 @@ def _check_high_alert_expiry():
# IPs that never get rate-limited (your LAN, localhost) # IPs that never get rate-limited (your LAN, localhost)
ALLOWLIST_IPS = {"127.0.0.1", "::1", "192.168.1.1"} ALLOWLIST_IPS = {"127.0.0.1", "::1", "192.168.1.1"}
# Demo mode state — toggled by admin at runtime. # Demo mode state — toggled by admin at runtime
# _demo_mode = {"active": True, "started_by": "boot", "showcase": True}
# Cross-lineage scrum 2026-04-30 (Opus BLOCK OB-5): pre-fix this
# defaulted to active=True, meaning fresh installs shipped with
# public unauthenticated access enabled — login_required let demo
# users straight through. Combined with /api/run + /api/imagegen
# proxies, that was an open LLM/compute abuse surface from first
# boot. Now defaults to active=False; operators flip it on
# explicitly via the admin UI or LLM_TEAM_DEMO_MODE=1 env override
# (the env override exists for the demo systemd unit so the public
# devop.live deployment doesn't need a manual toggle on every restart).
_DEMO_DEFAULT = os.environ.get("LLM_TEAM_DEMO_MODE", "0") == "1"
_demo_mode = {"active": _DEMO_DEFAULT, "started_by": "boot" if _DEMO_DEFAULT else "off", "showcase": _DEMO_DEFAULT}
# Routes that demo users CAN trigger (read-like POSTs — enrichment, self-analysis, team runs) # Routes that demo users CAN trigger (read-like POSTs — enrichment, self-analysis, team runs)
DEMO_ALLOWED_POSTS = { DEMO_ALLOWED_POSTS = {
@ -233,21 +172,9 @@ def is_allowlisted(ip):
def rate_limited(ip, max_req=RATE_LIMIT_MAX): def rate_limited(ip, max_req=RATE_LIMIT_MAX):
"""Rolling rate-limit check. Returns True when the IP has exceeded
max_req requests within RATE_LIMIT_WINDOW seconds.
Cross-lineage scrum 2026-04-30 (Opus WARN): _rate_limit was
unbounded per-worker, so an attacker rotating slowly through IPs
leaked memory forever. Fix: lazy eviction sweep when the dict
grows beyond 10K entries. Real production wants a Redis-backed
counter shared across workers; this is the in-process band-aid
that prevents runaway growth without changing the deploy shape.
"""
if is_allowlisted(ip): if is_allowlisted(ip):
return False return False
now = time.time() now = time.time()
if len(_rate_limit) > 10000:
_evict_stale_rate_limit(now)
if ip not in _rate_limit or now - _rate_limit[ip][1] > RATE_LIMIT_WINDOW: if ip not in _rate_limit or now - _rate_limit[ip][1] > RATE_LIMIT_WINDOW:
_rate_limit[ip] = (1, now) _rate_limit[ip] = (1, now)
return False return False
@ -258,16 +185,6 @@ def rate_limited(ip, max_req=RATE_LIMIT_MAX):
return False return False
def _evict_stale_rate_limit(now):
"""Drop _rate_limit entries older than 2× the window. Called from
rate_limited() only when dict growth exceeds 10K keeps the cost
off the hot path for normal traffic."""
cutoff = now - (RATE_LIMIT_WINDOW * 2)
stale = [ip for ip, (_, start) in _rate_limit.items() if start < cutoff]
for ip in stale:
del _rate_limit[ip]
def is_admin(): def is_admin():
return session.get("role") == "admin" return session.get("role") == "admin"
@ -643,24 +560,8 @@ def security_checks():
# Check high-alert expiry # Check high-alert expiry
_check_high_alert_expiry() _check_high_alert_expiry()
# Exploit scanner detection — log, alert, track velocity, block. # Exploit scanner detection — log, alert, track velocity, block
# if EXPLOIT_PATTERNS.search(path) or EXPLOIT_PATTERNS.search(request.query_string.decode("utf-8", errors="ignore")):
# Cross-lineage scrum 2026-04-30 (Opus BLOCK OB-4): pre-fix the
# path regex matched on substrings like UNION, SELECT, ;-- and
# auto-banned after 3 hits. Admin URLs containing those keywords
# in query strings (e.g. an LLM team named "select-rebrand" or a
# docs link to /admin/select_a_mode) self-banned the admin's IP.
# Now: skip the path-based check for authenticated admins from
# an allowlisted IP. The user-agent + body checks (sentinel) still
# apply. Allowlisted-IP admins clicking weird URLs no longer
# lock themselves out.
_skip_exploit_check = False
if ip in ALLOWLIST_IPS and session.get("role") == "admin":
_skip_exploit_check = True
if not _skip_exploit_check and (
EXPLOIT_PATTERNS.search(path) or
EXPLOIT_PATTERNS.search(request.query_string.decode("utf-8", errors="ignore"))
):
sec_log.warning("EXPLOIT_SCAN ip=%s path=%s ua=%s", ip, path, ua) sec_log.warning("EXPLOIT_SCAN ip=%s path=%s ua=%s", ip, path, ua)
_track_violation(ip, "exploit_scan") _track_violation(ip, "exploit_scan")
send_security_alert( send_security_alert(
@ -935,21 +836,7 @@ def auth_login():
with get_db() as conn: with get_db() as conn:
with conn.cursor(cursor_factory=psycopg2.extras.RealDictCursor) as cur: with conn.cursor(cursor_factory=psycopg2.extras.RealDictCursor) as cur:
if is_setup: if is_setup:
# First-time setup: create admin. # First-time setup: create admin
#
# Cross-lineage scrum 2026-04-30 (Opus WARN): pre-fix
# this was gated only by COUNT(*) FROM users == 0.
# If an operator ever truncated/restored the users
# table, the next external visitor (any IP) could
# claim admin. Now also requires the source IP to
# be in ALLOWLIST_IPS — typically loopback + LAN
# gateway — so a remote attacker hitting the setup
# endpoint after an empty-users state can't seize
# the account. Local operator running setup from
# the box itself still works.
if ip not in ALLOWLIST_IPS:
sec_log.warning("SETUP_DENIED ip=%s — first-time setup requires allowlisted IP", ip)
return jsonify({"error": "setup must be initiated from an allowlisted IP (typically localhost or LAN gateway)"}), 403
cur.execute("SELECT COUNT(*) as c FROM users") cur.execute("SELECT COUNT(*) as c FROM users")
if cur.fetchone()["c"] > 0: if cur.fetchone()["c"] > 0:
return jsonify({"error": "Setup already completed"}), 400 return jsonify({"error": "Setup already completed"}), 400
@ -1462,22 +1349,6 @@ async function loadThreats() {
}); });
// Mass action buttons // Mass action buttons
var spacer = document.createElement('div'); spacer.style.flex = '1'; toolbar.appendChild(spacer); var spacer = document.createElement('div'); spacer.style.flex = '1'; toolbar.appendChild(spacer);
// Master "select all on this page" checkbox (2026-04-30 J UX request).
// Mirrors the per-row .ip-check style; toggles every visible row.
// Three-state: unchecked (none selected), checked (all selected),
// indeterminate (partial). updateSelCount keeps it in sync as
// individual rows are toggled.
var selAllWrap = document.createElement('label');
selAllWrap.style.cssText = 'display:flex;align-items:center;gap:6px;font-family:JetBrains Mono,monospace;font-size:9px;text-transform:uppercase;letter-spacing:0.5px;color:#7a7872;cursor:pointer';
selAllWrap.title = 'Toggle every IP on this page';
var selAll = document.createElement('input'); selAll.type = 'checkbox';
selAll.id = 'sel-all';
selAll.style.cssText = 'width:16px;height:16px;cursor:pointer;accent-color:#e2b55a';
selAll.onchange = function(){ toggleAllChecks(this.checked); };
selAllWrap.appendChild(selAll);
var selAllLabel = document.createElement('span'); selAllLabel.textContent = 'all';
selAllWrap.appendChild(selAllLabel);
toolbar.appendChild(selAllWrap);
var selCount = document.createElement('span'); selCount.id = 'sel-count'; var selCount = document.createElement('span'); selCount.id = 'sel-count';
selCount.style.cssText = 'font-family:JetBrains Mono,monospace;font-size:10px;color:#7a7872'; selCount.style.cssText = 'font-family:JetBrains Mono,monospace;font-size:10px;color:#7a7872';
toolbar.appendChild(selCount); toolbar.appendChild(selCount);
@ -1601,33 +1472,9 @@ async function loadThreats() {
var currentSort = 'hits'; var currentSort = 'hits';
function updateSelCount() { function updateSelCount() {
var all = document.querySelectorAll('.ip-check'); var checks = document.querySelectorAll('.ip-check:checked');
var checked = document.querySelectorAll('.ip-check:checked');
var el = document.getElementById('sel-count'); var el = document.getElementById('sel-count');
if (el) el.textContent = checked.length ? checked.length + ' selected' : ''; if (el) el.textContent = checks.length ? checks.length + ' selected' : '';
// Sync the master "all" checkbox to reflect the page's actual state.
// Three states: none unchecked, all checked, partial indeterminate.
// Indeterminate is the visual "half-tick" most browsers render gives
// operators a clear "you've got some but not all selected" hint.
var master = document.getElementById('sel-all');
if (master) {
if (checked.length === 0) {
master.checked = false; master.indeterminate = false;
} else if (checked.length === all.length) {
master.checked = true; master.indeterminate = false;
} else {
master.indeterminate = true;
}
}
}
function toggleAllChecks(checked) {
// Master "select all" handler flips every per-row checkbox on the
// page to match the master's state. Used by the toolbar's `[ ] all`
// checkbox so operators don't have to click each threat individually
// before hitting Ban Selected. (2026-04-30 J UX request.)
document.querySelectorAll('.ip-check').forEach(function(cb){ cb.checked = checked; });
updateSelCount();
} }
async function massAction(action) { async function massAction(action) {
@ -2041,18 +1888,7 @@ def get_api_key(provider_name):
env_map = {"openrouter": "OPENROUTER_API_KEY", "openai": "OPENAI_API_KEY", "anthropic": "ANTHROPIC_API_KEY", "ollama_cloud": "OLLAMA_CLOUD_API_KEY"} env_map = {"openrouter": "OPENROUTER_API_KEY", "openai": "OPENAI_API_KEY", "anthropic": "ANTHROPIC_API_KEY", "ollama_cloud": "OLLAMA_CLOUD_API_KEY"}
return os.environ.get(env_map.get(provider_name, ""), "") return os.environ.get(env_map.get(provider_name, ""), "")
# Cross-lineage scrum 2026-04-30 (Opus BLOCK OB-2 + harness LLM DB_DSN = "dbname=knowledge_base user=kbuser password=IPbLBA0EQI8u4TeM2YZrbm1OAy5nSwqC host=localhost"
# convergent finding): DB_DSN previously had the password hardcoded
# in source. Same `kbuser`/`knowledge_base` DSN was leaked in
# voice-ai's audiosocket_bridge.py + sales_assistant.py — confirmed
# canonical leak by 3 independent reviewers across 2 sessions. Now
# sourced from env (set via systemd EnvironmentFile=/etc/llm-team-ui.env).
# No silent fallback to the leaked literal — fail loud. The leaked
# password is in git history regardless; rotate it in Postgres.
DB_DSN = os.environ.get("LLM_TEAM_DB_DSN", "")
if not DB_DSN:
print("[llm-team-ui] WARNING: LLM_TEAM_DB_DSN not set — DB ops will fail. "
"Set in systemd EnvironmentFile or shell env.", file=sys.stderr, flush=True)
def get_db(): def get_db():
return psycopg2.connect(DB_DSN) return psycopg2.connect(DB_DSN)
@ -7635,10 +7471,7 @@ def admin_security_data():
sort_by = request.args.get("sort", "hits") sort_by = request.args.get("sort", "hits")
result = [] result = []
for ip, d in ips.items(): for ip, d in ips.items():
# 2026-04-30: was substring "192.168." — replaced with the if ip.startswith("192.168."):
# canonical allowlist so 10.x, IPv6 ::1, and operator-added
# entries also stay out of the threat panel.
if is_allowlisted(ip):
continue continue
result.append({ result.append({
"ip": ip, "hits": d["hits"], "exploit_scans": d["exploit_scans"], "ip": ip, "hits": d["hits"], "exploit_scans": d["exploit_scans"],
@ -7677,52 +7510,21 @@ def _kill_connections(ip):
pass pass
def _nginx_ban(ip): def _nginx_ban(ip):
"""Add IP to nginx deny list and reload. """Add IP to nginx deny list and reload."""
Defense in depth (2026-04-30): refuse to write allowlisted IPs
to the deny list under ANY circumstance even a buggy caller
that bypassed _track_violation's allowlist check. The deny list
is the last write before nginx reload; this is the last place
we can stop a bad ban."""
if is_allowlisted(ip):
sec_log.info("NGINX_BAN_BLOCKED ip=%s — allowlisted, refused to write deny rule", ip)
return
import subprocess import subprocess
line = f"deny {ip};\n"
# Each step has its own try/except so we know WHICH step failed.
# Pre-2026-04-30 a single bare `except: pass` swallowed every
# error including PermissionError on the conf file write and
# CalledProcessError from systemctl. Sentinel + auto-escalate
# logged "BAN" but the request actually never landed in nginx.
# Now each failure mode hits sec_log so the operator sees why.
try: try:
line = f"deny {ip};\n"
try: try:
with open(_NGINX_BAN_FILE) as f: with open(_NGINX_BAN_FILE) as f:
if line in f.read(): if line in f.read():
return return
except FileNotFoundError: except FileNotFoundError:
pass pass
except PermissionError as e:
sec_log.warning("NGINX_BAN_READ_DENIED file=%s err=%s — won't dedup, attempting append anyway", _NGINX_BAN_FILE, e)
try:
with open(_NGINX_BAN_FILE, "a") as f: with open(_NGINX_BAN_FILE, "a") as f:
f.write(line) f.write(line)
except PermissionError as e: subprocess.run(["systemctl", "reload", "nginx"], capture_output=True, timeout=5)
sec_log.error("NGINX_BAN_WRITE_DENIED ip=%s file=%s err=%s — ban NOT effective at nginx layer", ip, _NGINX_BAN_FILE, e) except Exception:
return pass
except Exception as e:
sec_log.error("NGINX_BAN_WRITE_ERROR ip=%s err=%s", ip, e)
return
try:
result = subprocess.run(["systemctl", "reload", "nginx"], capture_output=True, text=True, timeout=5)
if result.returncode != 0:
sec_log.error("NGINX_RELOAD_FAILED ip=%s rc=%d stderr=%s", ip, result.returncode, result.stderr.strip())
except subprocess.TimeoutExpired:
sec_log.error("NGINX_RELOAD_TIMEOUT ip=%s — systemctl reload nginx didn't finish in 5s", ip)
except FileNotFoundError:
sec_log.error("NGINX_RELOAD_NO_SYSTEMCTL ip=%s — systemctl not in PATH for service user", ip)
except Exception as e:
sec_log.error("NGINX_RELOAD_ERROR ip=%s err=%s", ip, e)
def _nginx_unban(ip): def _nginx_unban(ip):
"""Remove IP from nginx deny list and reload.""" """Remove IP from nginx deny list and reload."""
@ -7750,13 +7552,8 @@ def admin_ban_ip():
action = data.get("action", "ban") action = data.get("action", "ban")
if not ip: if not ip:
return jsonify({"error": "IP required"}), 400 return jsonify({"error": "IP required"}), 400
# Defense in depth (2026-04-30): use the canonical ALLOWLIST_IPS if ip.startswith("192.168."):
# check rather than a substring on "192.168." which would let return jsonify({"error": "Cannot ban LAN addresses"}), 400
# 10.0.0.0/8 LANs and IPv6 loopback ::1 through. Same allowlist
# the auto-ban paths now respect — operator can't accidentally
# cut off their own LAN gateway.
if is_allowlisted(ip):
return jsonify({"error": f"refusing to ban allowlisted IP {ip} (in ALLOWLIST_IPS)"}), 400
try: try:
if action == "ban": if action == "ban":
subprocess.run(["fail2ban-client", "set", "llm-team-exploit", "banip", ip], subprocess.run(["fail2ban-client", "set", "llm-team-exploit", "banip", ip],
@ -8016,10 +7813,7 @@ def admin_mass_ban():
results = {"success": 0, "failed": 0, "skipped": 0} results = {"success": 0, "failed": 0, "skipped": 0}
for ip in ip_list: for ip in ip_list:
ip = ip.strip() ip = ip.strip()
# 2026-04-30: substring "192.168." → is_allowlisted so all if not ip or ip.startswith("192.168."):
# trusted networks (LAN gateways, IPv6 loopback, custom
# entries) are skipped, not just one /16.
if not ip or is_allowlisted(ip):
results["skipped"] += 1 results["skipped"] += 1
continue continue
try: try:
@ -10162,7 +9956,7 @@ def _run_meta_pipeline(pipeline_id):
stages = pipe["stages"] or ["extract", "research", "validate", "synthesize"] stages = pipe["stages"] or ["extract", "research", "validate", "synthesize"]
data_source = pipe["data_source"] data_source = pipe["data_source"]
config = pipe["config"] or {} config = pipe["config"] or {}
model_sets = config.get("model_sets", [["qwen2.5:latest"], ["mistral:latest"], ["gemma2:latest"]]) model_sets = config.get("model_sets", [["qwen2.5:latest"], ["ollama_cloud::gpt-oss:120b"], ["gemma2:latest"]])
max_iterations = config.get("max_iterations", len(model_sets)) max_iterations = config.get("max_iterations", len(model_sets))
_meta_status[pipeline_id] = {"stage": 0, "substep": "Gathering data...", "progress": 0, "iteration": 0} _meta_status[pipeline_id] = {"stage": 0, "substep": "Gathering data...", "progress": 0, "iteration": 0}
@ -10287,7 +10081,7 @@ def create_meta_pipeline():
all_m = [m["name"] for m in resp.json().get("models", []) if m["size"] > 1e9] all_m = [m["name"] for m in resp.json().get("models", []) if m["size"] > 1e9]
models = [[m] for m in all_m[:4]] models = [[m] for m in all_m[:4]]
except Exception: except Exception:
models = [["qwen2.5:latest"], ["mistral:latest"]] models = [["qwen2.5:latest"], ["ollama_cloud::gpt-oss:120b"]]
config = {"model_sets": models, "max_iterations": len(models)} config = {"model_sets": models, "max_iterations": len(models)}
with get_db() as conn: with get_db() as conn:
@ -12038,7 +11832,7 @@ def run_refine(config):
start = time.time() * 1000 start = time.time() * 1000
prompt = config["prompt"] prompt = config["prompt"]
orchestrator = config.get("orchestrator", "qwen2.5:latest") orchestrator = config.get("orchestrator", "qwen2.5:latest")
workers = config.get("models", ["qwen2.5:latest", "mistral:latest"]) workers = config.get("models", ["qwen2.5:latest", "ollama_cloud::gpt-oss:120b"])
max_stages = config.get("max_stages", 5) max_stages = config.get("max_stages", 5)
yield sse({"type": "clear"}) yield sse({"type": "clear"})
steps = [] steps = []
@ -12741,17 +12535,7 @@ SENTINEL_MODEL = "qwen2.5:latest"
SENTINEL_INTERVAL = 300 # 5 minutes SENTINEL_INTERVAL = 300 # 5 minutes
_sentinel_last_pos = 0 _sentinel_last_pos = 0
_sentinel_results = [] # last 50 analyses _sentinel_results = [] # last 50 analyses
_sentinel_stats = { _sentinel_stats = {"scans": 0, "bans": 0, "last_run": None, "last_error": None, "next_scan_ts": 0}
"scans": 0, "bans": 0, "last_run": None, "last_error": None, "next_scan_ts": 0,
# 2026-04-30 J: track consecutive AI-query failures so we can
# fire a callback (email alert) when Ollama is sustainedly busy
# or unreachable. Pre-fix a model-busy state preserved log
# position + skipped the scan with no operator notification.
"consecutive_ai_failures": 0,
"ai_busy_alerted": False, # one alert per outage; clears on first success
}
SENTINEL_AI_FAILURE_ALERT_THRESHOLD = 3 # consecutive failures before email
SENTINEL_AI_RETRY_DELAY_SECS = 30 # wait before retry inside same scan
def _sentinel_log_entry(msg): def _sentinel_log_entry(msg):
"""Write to sentinel log file.""" """Write to sentinel log file."""
@ -12799,10 +12583,7 @@ def _sentinel_scan():
if token.startswith("ip="): if token.startswith("ip="):
ip = token[3:] ip = token[3:]
break break
# 2026-04-30: was substring "192.168." — sentinel now skips if ip and not ip.startswith("192.168."):
# ALL allowlisted IPs from analysis (saves tokens + prevents
# the AI judge from getting confused by legitimate admin traffic).
if ip and not is_allowlisted(ip):
ip_activity[ip].append(line) ip_activity[ip].append(line)
if not ip_activity: if not ip_activity:
@ -12867,85 +12648,21 @@ def _sentinel_scan():
for ip, summary, _ in analysis_items[:15]: # max 15 IPs per scan for ip, summary, _ in analysis_items[:15]: # max 15 IPs per scan
prompt += summary + "\n" prompt += summary + "\n"
# Query local AI. 2026-04-30 J fix: retry once on model-busy / # Query local AI
# connection / timeout, and fire an operator callback when the try:
# AI is sustainedly unreachable. Pre-fix a single Ollama hiccup cfg = load_config()
# silently dropped the scan with no notification — operator only base = cfg["providers"]["ollama"].get("base_url", "http://localhost:11434")
# discovered the gap by checking sentinel-status manually. resp = requests.post(f"{base}/api/generate", json={
cfg = load_config() "model": SENTINEL_MODEL, "prompt": prompt, "stream": False,
base = cfg["providers"]["ollama"].get("base_url", "http://localhost:11434") "options": {"num_ctx": 4096, "temperature": 0.1}
body = { }, timeout=60)
"model": SENTINEL_MODEL, "prompt": prompt, "stream": False, resp.raise_for_status()
"options": {"num_ctx": 4096, "temperature": 0.1}, ai_response = resp.json()["response"]
} except Exception as e:
ai_response = None _sentinel_stats["last_error"] = f"AI query failed: {e}"
last_err = None _sentinel_log_entry(f"AI_ERROR error={e}")
for attempt in range(2): # original try + 1 retry
try:
resp = requests.post(f"{base}/api/generate", json=body, timeout=60)
resp.raise_for_status()
ai_response = resp.json()["response"]
break
except (requests.exceptions.ConnectionError,
requests.exceptions.Timeout,
requests.exceptions.ReadTimeout) as e:
last_err = f"connection/timeout: {e}"
if attempt == 0:
_sentinel_log_entry(f"AI_BUSY_RETRY attempt=1 err={str(e)[:80]} sleeping={SENTINEL_AI_RETRY_DELAY_SECS}s")
time.sleep(SENTINEL_AI_RETRY_DELAY_SECS)
continue
except requests.exceptions.HTTPError as e:
# 503 Service Unavailable + 429 Too Many = busy; retry.
# Other HTTP errors (404 model missing, 400 bad prompt) won't
# recover from a retry, so fail fast.
sc = getattr(e.response, "status_code", 0)
last_err = f"HTTP {sc}: {e}"
if sc in (429, 503) and attempt == 0:
_sentinel_log_entry(f"AI_BUSY_RETRY attempt=1 status={sc} sleeping={SENTINEL_AI_RETRY_DELAY_SECS}s")
time.sleep(SENTINEL_AI_RETRY_DELAY_SECS)
continue
break
except Exception as e:
last_err = f"unexpected: {e}"
break
if ai_response is None:
_sentinel_stats["consecutive_ai_failures"] += 1
_sentinel_stats["last_error"] = f"AI query failed: {last_err}"
_sentinel_log_entry(
f"AI_ERROR error={last_err} consecutive={_sentinel_stats['consecutive_ai_failures']}"
)
# Operator callback: fire a security alert email when the AI
# has been down for ≥N consecutive scans. One alert per outage —
# cleared on next successful scan so a flapping AI doesn't
# spam the inbox.
if (_sentinel_stats["consecutive_ai_failures"] >= SENTINEL_AI_FAILURE_ALERT_THRESHOLD
and not _sentinel_stats["ai_busy_alerted"]):
_sentinel_stats["ai_busy_alerted"] = True
try:
send_security_alert(
f"Sentinel AI unreachable ({_sentinel_stats['consecutive_ai_failures']} consecutive failures)",
f"The sentinel auto-scanner has been unable to reach the LLM judge for "
f"{_sentinel_stats['consecutive_ai_failures']} consecutive scans.\n\n"
f"Last error: {last_err}\n"
f"Model: {SENTINEL_MODEL}\n"
f"Endpoint: {base}\n\n"
f"Threats are being logged and surfaced in the threat-intel UI but "
f"NOT auto-banned during this outage. Manual review recommended.",
)
except Exception as alert_err:
sec_log.error("SENTINEL_ALERT_SEND_FAILED err=%s", alert_err)
return return
# AI succeeded. Reset the failure counter + clear the alerted flag
# so the next outage gets its own notification.
if _sentinel_stats["consecutive_ai_failures"] > 0:
_sentinel_log_entry(
f"AI_RECOVERED after_failures={_sentinel_stats['consecutive_ai_failures']}"
)
_sentinel_stats["consecutive_ai_failures"] = 0
_sentinel_stats["ai_busy_alerted"] = False
# Parse AI response # Parse AI response
try: try:
# Extract JSON from response (handle markdown code blocks) # Extract JSON from response (handle markdown code blocks)
@ -12971,41 +12688,9 @@ def _sentinel_scan():
ban_futures = [] ban_futures = []
def _execute_ban(ip, threat, reason, attack_type): def _execute_ban(ip, threat, reason, attack_type):
"""Execute a single ban — fail2ban + nginx + kill connections. """Execute a single ban — fail2ban + nginx + kill connections."""
subprocess.run(["fail2ban-client", "set", "llm-team-exploit", "banip", ip],
2026-04-30 J fix: actually examine the fail2ban-client result. capture_output=True, text=True, timeout=5)
Pre-fix capture_output=True was set but the result thrown away,
so a non-zero exit (jail not configured, IP already banned, IPv6
format quirk) silently said "AI_BAN" in the log while the
attacker walked through unimpeded. Now logs returncode + stderr
on failure so the operator sees WHY the ban didn't stick."""
try:
result = subprocess.run(
["fail2ban-client", "set", "llm-team-exploit", "banip", ip],
capture_output=True, text=True, timeout=5,
)
if result.returncode != 0:
sec_log.error(
"FAIL2BAN_BAN_FAILED ip=%s rc=%d stdout=%s stderr=%s",
ip, result.returncode,
result.stdout.strip()[:200],
result.stderr.strip()[:200],
)
_sentinel_log_entry(
f"FAIL2BAN_FAILED ip={ip} rc={result.returncode} "
f"err={result.stderr.strip()[:120]}"
)
# Continue anyway — nginx layer is independent and may
# still take effect.
except subprocess.TimeoutExpired:
sec_log.error("FAIL2BAN_TIMEOUT ip=%s — client didn't return in 5s", ip)
_sentinel_log_entry(f"FAIL2BAN_TIMEOUT ip={ip}")
except FileNotFoundError:
sec_log.error("FAIL2BAN_NOT_INSTALLED ip=%s — fail2ban-client not in PATH", ip)
_sentinel_log_entry(f"FAIL2BAN_NOT_INSTALLED ip={ip}")
except Exception as e:
sec_log.error("FAIL2BAN_ERROR ip=%s err=%s", ip, e)
_sentinel_log_entry(f"FAIL2BAN_ERROR ip={ip} err={e}")
_nginx_ban(ip) _nginx_ban(ip)
_kill_connections(ip) _kill_connections(ip)
sec_log.warning("AI_BAN ip=%s threat=%s reason=%s attack=%s", ip, threat, reason, attack_type) sec_log.warning("AI_BAN ip=%s threat=%s reason=%s attack=%s", ip, threat, reason, attack_type)
@ -13029,11 +12714,7 @@ def _sentinel_scan():
if len(_sentinel_results) > 50: if len(_sentinel_results) > 50:
_sentinel_results.pop(0) _sentinel_results.pop(0)
# 2026-04-30: was substring "192.168." — replaced with if action == "ban" and ip and not ip.startswith("192.168."):
# canonical is_allowlisted so the sentinel's AI verdict
# can't accidentally ban any allowlisted IP that slipped
# past the analysis filter (defense in depth).
if action == "ban" and ip and not is_allowlisted(ip):
ban_futures.append(executor.submit(_execute_ban, ip, threat, reason, attack_type)) ban_futures.append(executor.submit(_execute_ban, ip, threat, reason, attack_type))
else: else:
_sentinel_log_entry(f"AI_VERDICT ip={ip} threat={threat} action={action} reason={reason} attack_type={attack_type}") _sentinel_log_entry(f"AI_VERDICT ip={ip} threat={threat} action={action} reason={reason} attack_type={attack_type}")