diff --git a/llm_team_ui.py b/llm_team_ui.py index 2e18522..bc1605f 100644 --- a/llm_team_ui.py +++ b/llm_team_ui.py @@ -19,7 +19,21 @@ from flask import Flask, render_template_string, request, jsonify, Response, red from functools import wraps app = Flask(__name__) -app.secret_key = os.environ.get("FLASK_SECRET", secrets.token_hex(32)) +_flask_secret = os.environ.get("FLASK_SECRET") +if not _flask_secret: + # Generate a persistent secret on first run — survives restarts + _secret_file = os.path.expanduser("~/.llm-team-secret") + if os.path.exists(_secret_file): + with open(_secret_file) as f: + _flask_secret = f.read().strip() + else: + _flask_secret = secrets.token_hex(32) + with open(_secret_file, "w") as f: + f.write(_flask_secret) + os.chmod(_secret_file, 0o600) +app.secret_key = _flask_secret +app.config["SESSION_COOKIE_HTTPONLY"] = True +app.config["SESSION_COOKIE_SAMESITE"] = "Lax" # ─── SECURITY LOGGING ───────────────────────────────────────── # Dedicated security log for fail2ban and audit trail @@ -67,6 +81,73 @@ RATE_LIMIT_WINDOW = 60 RATE_LIMIT_MAX = 60 LOGIN_RATE_MAX = 5 +# ─── VIOLATION VELOCITY TRACKER (auto-escalation) ─────────── +# Tracks security violations per IP in a sliding window. +# When an IP exceeds the threshold, it's auto-banned and sentinel enters high-alert. +_violation_tracker = {} # ip -> [timestamp, timestamp, ...] +VELOCITY_WINDOW = 60 # seconds — sliding window for counting violations +VELOCITY_THRESHOLD = 3 # violations within window → auto-ban + high-alert +HIGH_ALERT_DURATION = 300 # seconds — how long high-alert mode lasts +_high_alert = {"active": False, "triggered_by": None, "until": 0, "reason": None} +_original_sentinel_interval = None # stash the normal interval during high-alert + + +def _track_violation(ip, event_type="unknown"): + """Record a security violation. If velocity threshold exceeded, auto-escalate.""" + now = time.time() + if ip not in _violation_tracker: + _violation_tracker[ip] = [] + _violation_tracker[ip].append(now) + # Prune old entries outside window + _violation_tracker[ip] = [t for t in _violation_tracker[ip] if now - t < VELOCITY_WINDOW] + count = len(_violation_tracker[ip]) + if count >= VELOCITY_THRESHOLD: + _auto_escalate(ip, count, event_type) + return True + return False + + +def _auto_escalate(ip, violation_count, event_type): + """Auto-ban IP and switch sentinel to high-alert mode.""" + global _original_sentinel_interval, SENTINEL_INTERVAL + sec_log.warning("AUTO_ESCALATE ip=%s violations=%d/%ds type=%s", ip, violation_count, VELOCITY_WINDOW, event_type) + _sentinel_log_entry(f"AUTO_ESCALATE ip={ip} violations={violation_count}/{VELOCITY_WINDOW}s type={event_type}") + # Auto-ban via nginx + fail2ban + connection kill + try: + _nginx_ban(ip) + import subprocess + subprocess.run(["fail2ban-client", "set", "llm-team-exploit", "banip", ip], + capture_output=True, timeout=5) + _kill_connections(ip) + except Exception as e: + sec_log.warning("AUTO_ESCALATE_BAN_ERROR ip=%s err=%s", ip, e) + # Enter high-alert mode — reduce sentinel scan interval + if not _high_alert["active"]: + _original_sentinel_interval = SENTINEL_INTERVAL + SENTINEL_INTERVAL = 30 # aggressive 30-second scans + _high_alert["active"] = True + _high_alert["triggered_by"] = ip + _high_alert["until"] = time.time() + HIGH_ALERT_DURATION + _high_alert["reason"] = f"{violation_count} violations in {VELOCITY_WINDOW}s from {ip} ({event_type})" + _sentinel_log_entry(f"HIGH_ALERT_ON interval=30s duration={HIGH_ALERT_DURATION}s trigger={ip}") + send_security_alert( + f"HIGH ALERT: Auto-escalation triggered by {ip}", + f"IP {ip} hit {violation_count} violations in {VELOCITY_WINDOW}s.\n" + f"Event: {event_type}\nSentinel interval reduced to 30s for {HIGH_ALERT_DURATION}s.\n" + f"IP has been auto-banned." + ) + + +def _check_high_alert_expiry(): + """Called periodically to return sentinel to normal mode.""" + global SENTINEL_INTERVAL + if _high_alert["active"] and time.time() > _high_alert["until"]: + if _original_sentinel_interval is not None: + SENTINEL_INTERVAL = _original_sentinel_interval + _high_alert["active"] = False + _sentinel_log_entry(f"HIGH_ALERT_OFF interval restored to {SENTINEL_INTERVAL}s") + + # IPs that never get rate-limited (your LAN, localhost) ALLOWLIST_IPS = {"127.0.0.1", "::1", "192.168.1.1"} # Demo mode state — toggled by admin at runtime @@ -442,6 +523,10 @@ def admin_required(f): if path in DEMO_ALLOWED_POSTS: return f(*args, **kwargs) + # Showcase mode: also allow optimize + score + deep-optimize (non-destructive, feature demo) + if is_showcase and any(path.endswith(sfx) for sfx in ("/optimize", "/deep-optimize", "/score")): + return f(*args, **kwargs) + # Block destructive writes for non-admins if not is_admin(): return jsonify({"error": "demo mode: read-only", "demo": True}), 403 @@ -463,9 +548,13 @@ def security_checks(): path = request.path ua = request.headers.get("User-Agent", "") - # Exploit scanner detection — log, alert, and block + # Check high-alert expiry + _check_high_alert_expiry() + + # Exploit scanner detection — log, alert, track velocity, block if EXPLOIT_PATTERNS.search(path) or EXPLOIT_PATTERNS.search(request.query_string.decode("utf-8", errors="ignore")): sec_log.warning("EXPLOIT_SCAN ip=%s path=%s ua=%s", ip, path, ua) + _track_violation(ip, "exploit_scan") send_security_alert( f"Exploit Scan from {ip}", f"IP: {ip}\nPath: {path}\nUser-Agent: {ua}\nTime: {time.strftime('%Y-%m-%d %H:%M:%S')}" @@ -475,6 +564,7 @@ def security_checks(): # Rate limit (allowlisted IPs skip) if rate_limited(ip): sec_log.warning("RATE_LIMITED ip=%s path=%s", ip, path) + _track_violation(ip, "rate_limit") return jsonify({"error": "rate limited"}), 429 # Always allow these @@ -495,7 +585,9 @@ def security_headers(response): response.headers["X-Frame-Options"] = "DENY" response.headers["X-XSS-Protection"] = "1; mode=block" response.headers["Referrer-Policy"] = "strict-origin-when-cross-origin" - response.headers["Content-Security-Policy"] = "default-src 'self'; script-src 'self' 'unsafe-inline'; style-src 'self' 'unsafe-inline'; img-src 'self' data:; connect-src 'self'" + response.headers["Strict-Transport-Security"] = "max-age=31536000; includeSubDomains" + response.headers["Cross-Origin-Opener-Policy"] = "same-origin" + response.headers["Content-Security-Policy"] = "default-src 'self'; script-src 'self' 'unsafe-inline' https://esm.sh; style-src 'self' 'unsafe-inline' https://fonts.googleapis.com; font-src 'self' https://fonts.gstatic.com; img-src 'self' data:; connect-src 'self' https://esm.sh" response.headers["Permissions-Policy"] = "camera=(), microphone=(), geolocation=()" # Theme injection if response.content_type and "text/html" in response.content_type and response.status_code == 200: @@ -753,6 +845,7 @@ def auth_login(): user = cur.fetchone() if not user or not bcrypt.checkpw(password.encode(), user["password_hash"].encode()): sec_log.warning("LOGIN_FAILED ip=%s user=%s", ip, username) + _track_violation(ip, "login_fail") send_security_alert( f"Failed Login from {ip}", f"IP: {ip}\nUsername attempted: {username}\nTime: {time.strftime('%Y-%m-%d %H:%M:%S')}" @@ -844,11 +937,26 @@ def demo_set_allowlist(): def logs_page(): return LOGS_HTML +_LOG_REDACT_PATTERNS = re.compile( + r'(password|secret|token|api.key|authorization)\s*[=:]\s*\S+', + re.IGNORECASE +) + +def _redact_log_line(line): + """Redact sensitive patterns from log lines before display.""" + line = _LOG_REDACT_PATTERNS.sub(r'\1=***REDACTED***', line) + return line + + @app.route("/api/admin/logs") @admin_required def admin_logs(): source = request.args.get("source", "app") limit = min(int(request.args.get("limit", 100)), 500) + # Rate-limit logs endpoint — prevent rapid scraping + ip = request.headers.get("X-Real-IP", request.remote_addr) + if not is_allowlisted(ip) and rate_limited(ip, max_req=10): + return jsonify({"error": "rate limited", "lines": []}), 429 lines = [] try: if source == "nginx_access": @@ -863,7 +971,6 @@ def admin_logs(): elif source == "runs": return jsonify({"lines": [], "runs": list(reversed(_run_log[-limit:]))}) else: - # App log — get from journalctl import subprocess result = subprocess.run( ["journalctl", "-u", "llm-team-ui", "--no-pager", "-n", str(limit), "--output=short-iso"], @@ -872,7 +979,7 @@ def admin_logs(): lines = result.stdout.strip().split("\n") if result.stdout else [] except Exception as e: lines = [f"Error reading log: {e}"] - return jsonify({"lines": [l.rstrip() for l in lines]}) + return jsonify({"lines": [_redact_log_line(l.rstrip()) for l in lines]}) LOGS_HTML = r""" @@ -1807,12 +1914,21 @@ def _auto_score_run(run_id, mode, prompt, responses): text = best["text"][:3000] judge_prompt = ( - f"Rate the quality of this AI response on a scale of 1-10.\n" - f"Consider: relevance to the prompt, completeness, accuracy, clarity, usefulness.\n\n" - f"PROMPT: {prompt[:500]}\n\n" + f"You are a strict quality judge. Rate this AI response 1-10 using these anchors:\n\n" + f"SCORE ANCHORS:\n" + f"- 1-2: Refuses to answer, completely off-topic, or gibberish\n" + f"- 3-4: Addresses the topic but is shallow, generic, or mostly wrong. Could be a template.\n" + f"- 5-6: Adequate but unremarkable. Covers basics, misses nuance. You'd redo it.\n" + f"- 7-8: Good. Specific, accurate, well-structured. Minor gaps or generic sections.\n" + f"- 9: Excellent. Insightful, comprehensive, actionable. Teaches you something new.\n" + f"- 10: Exceptional. Reserve for responses that genuinely surprise you with depth and originality.\n\n" + f"IMPORTANT: Most responses are 4-7. A score of 8+ means it's genuinely impressive, not just correct.\n" + f"Penalize: vague filler, bullet-point padding, restating the prompt, hedging without substance.\n" + f"Reward: specific examples, concrete details, novel angles, honest trade-offs.\n\n" + f"PROMPT: {prompt[:500]}\n" f"MODE: {mode}\n\n" f"RESPONSE:\n{text}\n\n" - f"Return ONLY a JSON object: {{\"score\": N, \"reason\": \"one sentence\"}}" + f"Return ONLY JSON: {{\"score\": N, \"reason\": \"one sentence why this score\"}}" ) judgment = query_model(_SCORE_MODEL, judge_prompt) @@ -1911,9 +2027,22 @@ HTML = r""" .pipeline-step .remove-step:hover { opacity: 1; } .add-step-btn { width: 100%; padding: 7px; background: transparent; border: 2px dashed var(--border); border-radius: 2px; color: var(--text2); cursor: pointer; font-size: 11px; margin-bottom: 14px; transition: all 0.15s; font-family: 'JetBrains Mono', monospace; text-transform: uppercase; letter-spacing: 0.5px; } .add-step-btn:hover { border-color: var(--accent); color: var(--accent); } - .prompt-area { width: 100%; min-height: 90px; background: rgba(0,0,0,0.4); border: 2px solid var(--border); border-radius: 2px; color: var(--text); padding: 14px; font-size: 13px; font-family: 'Inter', sans-serif; resize: vertical; margin-bottom: 10px; line-height: 1.5; } + .prompt-wrap { position: relative; margin-bottom: 10px; } + .prompt-canvas { position: absolute; top: 0; left: 0; width: 100%; height: 100%; pointer-events: none; z-index: 1; border-radius: 2px; } + .prompt-area { width: 100%; min-height: 90px; background: rgba(0,0,0,0.4); border: 2px solid var(--border); border-radius: 2px; color: var(--text); padding: 14px; font-size: 13px; font-family: 'Inter', sans-serif; resize: vertical; line-height: 1.5; position: relative; z-index: 2; transition: border-color 0.3s, box-shadow 0.3s; } .prompt-area:focus { outline: none; border-color: var(--accent); box-shadow: 0 0 0 1px var(--accent), 0 0 20px rgba(226,181,90,0.06); } - .prompt-area::placeholder { color: var(--text2); opacity: 0.5; font-family: 'JetBrains Mono', monospace; font-size: 12px; } + .prompt-area::placeholder { color: transparent; } + .prompt-ghost { position: absolute; top: 2px; left: 2px; right: 2px; bottom: 2px; padding: 14px; font-size: 13px; font-family: 'Inter', sans-serif; line-height: 1.5; color: rgba(122,120,114,0.4); pointer-events: none; z-index: 1; overflow: hidden; white-space: pre-wrap; word-wrap: break-word; } + .prompt-ghost .ghost-char { display: inline; transition: opacity 0.6s, color 0.4s; } + .prompt-particles { position: absolute; top: 0; left: 0; width: 100%; height: 100%; pointer-events: none; z-index: 3; overflow: hidden; border-radius: 2px; } + .prompt-metrics { display: flex; gap: 8px; font-family: 'JetBrains Mono', monospace; font-size: 9px; color: var(--text2); padding: 4px 0; opacity: 0; transition: opacity 0.3s; } + .prompt-wrap:focus-within .prompt-metrics { opacity: 1; } + .prompt-metrics .pm-item { display: flex; align-items: center; gap: 3px; } + .prompt-metrics .pm-val { color: var(--accent); font-weight: 700; } + @keyframes prompt-glow-pulse { 0%,100%{box-shadow:0 0 0 1px var(--accent),0 0 20px rgba(226,181,90,0.06)} 50%{box-shadow:0 0 0 1px var(--accent),0 0 30px rgba(226,181,90,0.12),0 0 60px rgba(226,181,90,0.04)} } + .prompt-area.typing:focus { animation: prompt-glow-pulse 2s infinite; } + .sample-chip-enter { animation: chip-enter 0.3s ease-out both; } + @keyframes chip-enter { from { opacity:0; transform: translateY(8px) scale(0.95); } to { opacity:1; transform: translateY(0) scale(1); } } .run-btn { width: 100%; padding: 12px; background: var(--accent); color: #08090c; border: none; border-radius: 2px; font-size: 13px; font-weight: 700; cursor: pointer; transition: all 0.15s; letter-spacing: 1px; font-family: 'JetBrains Mono', monospace; text-transform: uppercase; } .run-btn:hover { background: var(--accent2); box-shadow: 0 0 24px rgba(226,181,90,0.2), 0 0 60px rgba(226,181,90,0.06); transform: translateY(-1px); } .run-btn:active { transform: translateY(0); } @@ -2307,7 +2436,20 @@ HTML = r"""