Add triage, backup, and disaster recovery system
- brain-backup: daily borg + pg_dump, 7d/4w/3m retention, cron at 3AM
- brain-triage: full system health check (services, ports, firewall,
headers, kernel, app, DB, disk, backups, security scan)
- brain-recover: restore from backup (full/db/configs/app) + emergency
lockdown mode that blocks all external access except LAN SSH
All accessible via /usr/local/bin/brain-{backup,triage,recover}
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
6ea457d01d
commit
2bb910b72c
73
server/backup.sh
Executable file
73
server/backup.sh
Executable file
@ -0,0 +1,73 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
# =============================================================================
|
||||||
|
# brain server backup — runs daily via cron
|
||||||
|
# Backs up: app, configs, database, nginx, systemd, SSL, fail2ban, sysctl
|
||||||
|
# Storage: /var/backups/brain/ (borg repo + pg dumps)
|
||||||
|
# Retention: 7 daily, 4 weekly, 3 monthly
|
||||||
|
# =============================================================================
|
||||||
|
|
||||||
|
set -euo pipefail
|
||||||
|
BACKUP_DIR="/var/backups/brain"
|
||||||
|
BORG_REPO="$BACKUP_DIR/borg-repo"
|
||||||
|
PG_DIR="$BACKUP_DIR/pg-dumps"
|
||||||
|
LOG="/var/log/brain-backup.log"
|
||||||
|
TIMESTAMP=$(date +%Y-%m-%d_%H%M)
|
||||||
|
|
||||||
|
export BORG_PASSPHRASE=""
|
||||||
|
|
||||||
|
log() { echo "[$(date '+%Y-%m-%d %H:%M:%S')] $1" | tee -a "$LOG"; }
|
||||||
|
|
||||||
|
log "=== Backup starting ==="
|
||||||
|
|
||||||
|
# --- PostgreSQL dump ---
|
||||||
|
mkdir -p "$PG_DIR"
|
||||||
|
log "Dumping PostgreSQL knowledge_base..."
|
||||||
|
sudo -u postgres pg_dump -Fc knowledge_base > "$PG_DIR/knowledge_base_${TIMESTAMP}.dump" 2>> "$LOG"
|
||||||
|
# Keep last 14 dumps
|
||||||
|
ls -t "$PG_DIR"/knowledge_base_*.dump 2>/dev/null | tail -n +15 | xargs -r rm --
|
||||||
|
log " pg_dump OK ($(du -sh "$PG_DIR/knowledge_base_${TIMESTAMP}.dump" | cut -f1))"
|
||||||
|
|
||||||
|
# --- Borg backup ---
|
||||||
|
log "Running borg backup..."
|
||||||
|
borg create \
|
||||||
|
--stats \
|
||||||
|
--compression zstd,3 \
|
||||||
|
--exclude '*.pyc' \
|
||||||
|
--exclude '__pycache__' \
|
||||||
|
--exclude '.git' \
|
||||||
|
--exclude 'node_modules' \
|
||||||
|
"$BORG_REPO::${TIMESTAMP}" \
|
||||||
|
/root/llm_team_ui.py \
|
||||||
|
/root/llm_team_config.json \
|
||||||
|
/home/profit/.env \
|
||||||
|
/etc/nginx/sites-available/ \
|
||||||
|
/etc/nginx/sites-enabled/ \
|
||||||
|
/etc/nginx/nginx.conf \
|
||||||
|
/etc/fail2ban/jail.local \
|
||||||
|
/etc/fail2ban/jail.d/ \
|
||||||
|
/etc/ssh/sshd_config \
|
||||||
|
/etc/sysctl.d/99-security.conf \
|
||||||
|
/etc/systemd/system/llm-team-ui.service \
|
||||||
|
/etc/systemd/system/goaccess.service \
|
||||||
|
/etc/systemd/system/ollama.service \
|
||||||
|
/etc/systemd/system/minio.service \
|
||||||
|
/etc/systemd/system/vault.service \
|
||||||
|
/var/backups/brain/pg-dumps/ \
|
||||||
|
2>> "$LOG"
|
||||||
|
|
||||||
|
log " borg OK"
|
||||||
|
|
||||||
|
# --- Prune old backups ---
|
||||||
|
log "Pruning old backups..."
|
||||||
|
borg prune \
|
||||||
|
--keep-daily=7 \
|
||||||
|
--keep-weekly=4 \
|
||||||
|
--keep-monthly=3 \
|
||||||
|
"$BORG_REPO" 2>> "$LOG"
|
||||||
|
|
||||||
|
borg compact "$BORG_REPO" 2>> "$LOG"
|
||||||
|
log " prune OK"
|
||||||
|
|
||||||
|
# --- Report ---
|
||||||
|
REPO_SIZE=$(du -sh "$BORG_REPO" | cut -f1)
|
||||||
|
log "=== Backup complete. Repo size: $REPO_SIZE ==="
|
||||||
286
server/recover.sh
Executable file
286
server/recover.sh
Executable file
@ -0,0 +1,286 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
# =============================================================================
|
||||||
|
# brain server recovery — restore from backup or lock down after compromise
|
||||||
|
#
|
||||||
|
# Usage:
|
||||||
|
# ./recover.sh status Show available backups
|
||||||
|
# ./recover.sh restore [DATE] Restore configs + DB from backup (latest or DATE)
|
||||||
|
# ./recover.sh lockdown Emergency lockdown — block all external access
|
||||||
|
# ./recover.sh unlock Undo lockdown — restore normal firewall rules
|
||||||
|
# ./recover.sh db [DATE] Restore only the database
|
||||||
|
# ./recover.sh configs [DATE] Restore only configs (nginx, ssh, fail2ban, etc.)
|
||||||
|
# ./recover.sh app [DATE] Restore only the app file + config
|
||||||
|
# =============================================================================
|
||||||
|
|
||||||
|
set -euo pipefail
|
||||||
|
|
||||||
|
BORG_REPO="/var/backups/brain/borg-repo"
|
||||||
|
PG_DIR="/var/backups/brain/pg-dumps"
|
||||||
|
LOCKDOWN_FLAG="/var/backups/brain/.lockdown-active"
|
||||||
|
LOG="/var/log/brain-recovery.log"
|
||||||
|
|
||||||
|
export BORG_PASSPHRASE=""
|
||||||
|
|
||||||
|
RED='\033[0;31m'
|
||||||
|
GREEN='\033[0;32m'
|
||||||
|
YELLOW='\033[1;33m'
|
||||||
|
CYAN='\033[0;36m'
|
||||||
|
NC='\033[0m'
|
||||||
|
|
||||||
|
log() { echo -e "[$(date '+%Y-%m-%d %H:%M:%S')] $1" | tee -a "$LOG"; }
|
||||||
|
|
||||||
|
usage() {
|
||||||
|
echo "Usage: $0 {status|restore|lockdown|unlock|db|configs|app} [DATE]"
|
||||||
|
echo ""
|
||||||
|
echo "Commands:"
|
||||||
|
echo " status List available backups"
|
||||||
|
echo " restore [DATE] Full restore (configs + DB + app)"
|
||||||
|
echo " lockdown Emergency: block all external traffic"
|
||||||
|
echo " unlock Undo lockdown, restore normal firewall"
|
||||||
|
echo " db [DATE] Restore database only"
|
||||||
|
echo " configs [DATE] Restore server configs only"
|
||||||
|
echo " app [DATE] Restore app + config file only"
|
||||||
|
exit 1
|
||||||
|
}
|
||||||
|
|
||||||
|
get_archive() {
|
||||||
|
local date="${1:-}"
|
||||||
|
if [ -n "$date" ]; then
|
||||||
|
borg list "$BORG_REPO" | grep "$date" | tail -1 | awk '{print $1}'
|
||||||
|
else
|
||||||
|
borg list "$BORG_REPO" | tail -1 | awk '{print $1}'
|
||||||
|
fi
|
||||||
|
}
|
||||||
|
|
||||||
|
# --- STATUS ---
|
||||||
|
cmd_status() {
|
||||||
|
echo -e "${CYAN}=== Available Backups ===${NC}"
|
||||||
|
|
||||||
|
echo -e "\n${CYAN}Borg archives:${NC}"
|
||||||
|
if borg list "$BORG_REPO" 2>/dev/null | head -20; then
|
||||||
|
repo_size=$(du -sh "$BORG_REPO" | cut -f1)
|
||||||
|
echo -e " Repo size: $repo_size"
|
||||||
|
else
|
||||||
|
echo " No borg archives found"
|
||||||
|
fi
|
||||||
|
|
||||||
|
echo -e "\n${CYAN}PostgreSQL dumps:${NC}"
|
||||||
|
if ls -lht "$PG_DIR"/knowledge_base_*.dump 2>/dev/null | head -10; then
|
||||||
|
true
|
||||||
|
else
|
||||||
|
echo " No dumps found"
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [ -f "$LOCKDOWN_FLAG" ]; then
|
||||||
|
echo -e "\n${RED}*** LOCKDOWN IS ACTIVE ***${NC}"
|
||||||
|
echo " Run '$0 unlock' to restore normal access"
|
||||||
|
fi
|
||||||
|
}
|
||||||
|
|
||||||
|
# --- LOCKDOWN ---
|
||||||
|
cmd_lockdown() {
|
||||||
|
log "${RED}=== EMERGENCY LOCKDOWN ===${NC}"
|
||||||
|
log "Blocking all external access except SSH from LAN..."
|
||||||
|
|
||||||
|
# Save current rules
|
||||||
|
ufw status verbose > /var/backups/brain/ufw-pre-lockdown.txt 2>/dev/null || true
|
||||||
|
|
||||||
|
# Reset and lock down
|
||||||
|
ufw --force reset >/dev/null 2>&1
|
||||||
|
ufw default deny incoming
|
||||||
|
ufw default deny outgoing
|
||||||
|
|
||||||
|
# Only allow SSH from LAN
|
||||||
|
ufw allow from 192.168.1.0/24 to any port 22
|
||||||
|
# Allow DNS out (needed for recovery)
|
||||||
|
ufw allow out 53
|
||||||
|
# Allow apt out (needed for fixes)
|
||||||
|
ufw allow out 80/tcp
|
||||||
|
ufw allow out 443/tcp
|
||||||
|
|
||||||
|
ufw --force enable >/dev/null 2>&1
|
||||||
|
|
||||||
|
# Stop public-facing services
|
||||||
|
systemctl stop nginx 2>/dev/null || true
|
||||||
|
|
||||||
|
touch "$LOCKDOWN_FLAG"
|
||||||
|
log "${RED}LOCKDOWN ACTIVE — only LAN SSH allowed${NC}"
|
||||||
|
log "Services stopped: nginx"
|
||||||
|
log "Run '$0 unlock' when safe"
|
||||||
|
|
||||||
|
# Log who's connected right now
|
||||||
|
log "Current connections:"
|
||||||
|
ss -tnp | tee -a "$LOG"
|
||||||
|
|
||||||
|
# Snapshot auth log
|
||||||
|
log "Saving auth log snapshot..."
|
||||||
|
cp /var/log/auth.log "/var/backups/brain/auth-lockdown-$(date +%Y%m%d_%H%M%S).log" 2>/dev/null || true
|
||||||
|
}
|
||||||
|
|
||||||
|
# --- UNLOCK ---
|
||||||
|
cmd_unlock() {
|
||||||
|
if [ ! -f "$LOCKDOWN_FLAG" ]; then
|
||||||
|
echo "Lockdown is not active."
|
||||||
|
exit 0
|
||||||
|
fi
|
||||||
|
|
||||||
|
log "${GREEN}=== RESTORING NORMAL ACCESS ===${NC}"
|
||||||
|
|
||||||
|
# Restore firewall
|
||||||
|
ufw --force reset >/dev/null 2>&1
|
||||||
|
ufw default deny incoming
|
||||||
|
ufw default allow outgoing
|
||||||
|
|
||||||
|
ufw allow 22/tcp
|
||||||
|
ufw allow 80/tcp comment "HTTP web server"
|
||||||
|
ufw allow 443/tcp comment "HTTPS web server"
|
||||||
|
ufw allow 3030/tcp
|
||||||
|
ufw allow from 192.168.1.0/24 to any port 139,445 proto tcp
|
||||||
|
ufw allow from 192.168.1.0/24 to any port 137,138 proto udp
|
||||||
|
ufw allow from 192.168.1.0/24 to any port 5000 comment "LLM Team UI"
|
||||||
|
ufw allow from 192.168.1.0/24 to any port 9000 comment "MinIO LAN only"
|
||||||
|
ufw deny 9000 comment "Block MinIO external"
|
||||||
|
ufw allow from 192.168.1.0/24 to any port 11434 comment "Ollama internal"
|
||||||
|
ufw allow from 192.168.1.0/24 to any port 18789 comment "OpenClaw brain"
|
||||||
|
|
||||||
|
ufw --force enable >/dev/null 2>&1
|
||||||
|
|
||||||
|
# Restart services
|
||||||
|
systemctl start nginx
|
||||||
|
systemctl restart llm-team-ui
|
||||||
|
systemctl restart fail2ban
|
||||||
|
|
||||||
|
rm -f "$LOCKDOWN_FLAG"
|
||||||
|
log "${GREEN}Normal access restored. All services restarted.${NC}"
|
||||||
|
}
|
||||||
|
|
||||||
|
# --- RESTORE DB ---
|
||||||
|
cmd_db() {
|
||||||
|
local date="${1:-}"
|
||||||
|
local dump
|
||||||
|
|
||||||
|
if [ -n "$date" ]; then
|
||||||
|
dump=$(ls -t "$PG_DIR"/knowledge_base_${date}*.dump 2>/dev/null | head -1)
|
||||||
|
else
|
||||||
|
dump=$(ls -t "$PG_DIR"/knowledge_base_*.dump 2>/dev/null | head -1)
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [ -z "$dump" ]; then
|
||||||
|
echo -e "${RED}No matching database dump found${NC}"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
log "Restoring database from: $(basename "$dump")"
|
||||||
|
echo -e "${YELLOW}This will DROP and recreate knowledge_base. Continue? [y/N]${NC}"
|
||||||
|
read -r confirm
|
||||||
|
[ "$confirm" = "y" ] || exit 0
|
||||||
|
|
||||||
|
# Stop app to release connections
|
||||||
|
systemctl stop llm-team-ui 2>/dev/null || true
|
||||||
|
|
||||||
|
sudo -u postgres dropdb --if-exists knowledge_base
|
||||||
|
sudo -u postgres createdb -O kbuser knowledge_base
|
||||||
|
sudo -u postgres pg_restore -d knowledge_base "$dump" 2>&1 | tee -a "$LOG"
|
||||||
|
|
||||||
|
systemctl start llm-team-ui
|
||||||
|
log "${GREEN}Database restored from $(basename "$dump")${NC}"
|
||||||
|
}
|
||||||
|
|
||||||
|
# --- RESTORE CONFIGS ---
|
||||||
|
cmd_configs() {
|
||||||
|
local date="${1:-}"
|
||||||
|
local archive
|
||||||
|
archive=$(get_archive "$date")
|
||||||
|
|
||||||
|
if [ -z "$archive" ]; then
|
||||||
|
echo -e "${RED}No matching borg archive found${NC}"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
log "Restoring configs from archive: $archive"
|
||||||
|
echo -e "${YELLOW}This will overwrite current server configs. Continue? [y/N]${NC}"
|
||||||
|
read -r confirm
|
||||||
|
[ "$confirm" = "y" ] || exit 0
|
||||||
|
|
||||||
|
local tmpdir
|
||||||
|
tmpdir=$(mktemp -d)
|
||||||
|
cd "$tmpdir"
|
||||||
|
borg extract "$BORG_REPO::${archive}"
|
||||||
|
|
||||||
|
# Restore each config
|
||||||
|
cp -v etc/nginx/sites-available/* /etc/nginx/sites-available/ 2>/dev/null || true
|
||||||
|
cp -v etc/nginx/nginx.conf /etc/nginx/nginx.conf 2>/dev/null || true
|
||||||
|
cp -v etc/fail2ban/jail.local /etc/fail2ban/jail.local 2>/dev/null || true
|
||||||
|
cp -v etc/ssh/sshd_config /etc/ssh/sshd_config 2>/dev/null || true
|
||||||
|
cp -v etc/sysctl.d/99-security.conf /etc/sysctl.d/99-security.conf 2>/dev/null || true
|
||||||
|
cp -v etc/systemd/system/llm-team-ui.service /etc/systemd/system/ 2>/dev/null || true
|
||||||
|
cp -v etc/systemd/system/goaccess.service /etc/systemd/system/ 2>/dev/null || true
|
||||||
|
|
||||||
|
# Reload everything
|
||||||
|
nginx -t && systemctl reload nginx
|
||||||
|
sshd -t && systemctl reload sshd
|
||||||
|
systemctl restart fail2ban
|
||||||
|
sysctl --system >/dev/null 2>&1
|
||||||
|
systemctl daemon-reload
|
||||||
|
|
||||||
|
rm -rf "$tmpdir"
|
||||||
|
log "${GREEN}Configs restored from $archive and services reloaded${NC}"
|
||||||
|
}
|
||||||
|
|
||||||
|
# --- RESTORE APP ---
|
||||||
|
cmd_app() {
|
||||||
|
local date="${1:-}"
|
||||||
|
local archive
|
||||||
|
archive=$(get_archive "$date")
|
||||||
|
|
||||||
|
if [ -z "$archive" ]; then
|
||||||
|
echo -e "${RED}No matching borg archive found${NC}"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
log "Restoring app from archive: $archive"
|
||||||
|
|
||||||
|
local tmpdir
|
||||||
|
tmpdir=$(mktemp -d)
|
||||||
|
cd "$tmpdir"
|
||||||
|
borg extract "$BORG_REPO::${archive}"
|
||||||
|
|
||||||
|
cp -v root/llm_team_ui.py /root/llm_team_ui.py
|
||||||
|
cp -v root/llm_team_config.json /root/llm_team_config.json
|
||||||
|
cp -v home/profit/.env /home/profit/.env 2>/dev/null || true
|
||||||
|
|
||||||
|
systemctl restart llm-team-ui
|
||||||
|
|
||||||
|
rm -rf "$tmpdir"
|
||||||
|
log "${GREEN}App restored from $archive and restarted${NC}"
|
||||||
|
}
|
||||||
|
|
||||||
|
# --- FULL RESTORE ---
|
||||||
|
cmd_restore() {
|
||||||
|
local date="${1:-}"
|
||||||
|
log "${CYAN}=== FULL RESTORE ===${NC}"
|
||||||
|
echo -e "${YELLOW}This will restore configs, app, and database. Continue? [y/N]${NC}"
|
||||||
|
read -r confirm
|
||||||
|
[ "$confirm" = "y" ] || exit 0
|
||||||
|
|
||||||
|
cmd_configs "$date"
|
||||||
|
cmd_app "$date"
|
||||||
|
cmd_db "$date"
|
||||||
|
|
||||||
|
log "${GREEN}=== FULL RESTORE COMPLETE ===${NC}"
|
||||||
|
log "Run ./triage.sh to verify system health"
|
||||||
|
}
|
||||||
|
|
||||||
|
# --- Main ---
|
||||||
|
[ $# -lt 1 ] && usage
|
||||||
|
|
||||||
|
case "$1" in
|
||||||
|
status) cmd_status ;;
|
||||||
|
restore) cmd_restore "${2:-}" ;;
|
||||||
|
lockdown) cmd_lockdown ;;
|
||||||
|
unlock) cmd_unlock ;;
|
||||||
|
db) cmd_db "${2:-}" ;;
|
||||||
|
configs) cmd_configs "${2:-}" ;;
|
||||||
|
app) cmd_app "${2:-}" ;;
|
||||||
|
*) usage ;;
|
||||||
|
esac
|
||||||
218
server/triage.sh
Executable file
218
server/triage.sh
Executable file
@ -0,0 +1,218 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
# =============================================================================
|
||||||
|
# brain server triage — quick system health check
|
||||||
|
# Run after incident, reboot, or anytime something feels off.
|
||||||
|
# Exits 0 if healthy, 1 if issues found.
|
||||||
|
# =============================================================================
|
||||||
|
|
||||||
|
RED='\033[0;31m'
|
||||||
|
GREEN='\033[0;32m'
|
||||||
|
YELLOW='\033[1;33m'
|
||||||
|
CYAN='\033[0;36m'
|
||||||
|
NC='\033[0m'
|
||||||
|
|
||||||
|
ISSUES=0
|
||||||
|
|
||||||
|
pass() { echo -e " ${GREEN}✓${NC} $1"; }
|
||||||
|
fail() { echo -e " ${RED}✗${NC} $1"; ISSUES=$((ISSUES+1)); }
|
||||||
|
warn() { echo -e " ${YELLOW}!${NC} $1"; }
|
||||||
|
section() { echo -e "\n${CYAN}[$1]${NC}"; }
|
||||||
|
|
||||||
|
echo -e "${CYAN}=== brain server triage — $(date) ===${NC}"
|
||||||
|
|
||||||
|
# --- Services ---
|
||||||
|
section "Services"
|
||||||
|
for svc in llm-team-ui nginx ollama postgresql minio vault fail2ban ufw; do
|
||||||
|
if systemctl is-active --quiet "$svc" 2>/dev/null; then
|
||||||
|
pass "$svc running"
|
||||||
|
else
|
||||||
|
fail "$svc NOT running"
|
||||||
|
fi
|
||||||
|
done
|
||||||
|
|
||||||
|
# --- Ports ---
|
||||||
|
section "Ports"
|
||||||
|
check_port() {
|
||||||
|
local port=$1 name=$2 bind=$3
|
||||||
|
if ss -tlnp | grep -q ":${port} "; then
|
||||||
|
actual_bind=$(ss -tlnp | grep ":${port} " | awk '{print $4}' | head -1)
|
||||||
|
if [ -n "$bind" ] && ! echo "$actual_bind" | grep -q "$bind"; then
|
||||||
|
fail "$name on $port — bound to $actual_bind (expected $bind)"
|
||||||
|
else
|
||||||
|
pass "$name listening on $actual_bind"
|
||||||
|
fi
|
||||||
|
else
|
||||||
|
fail "$name NOT listening on port $port"
|
||||||
|
fi
|
||||||
|
}
|
||||||
|
check_port 5000 "Flask app" "127.0.0.1"
|
||||||
|
check_port 80 "Nginx HTTP" ""
|
||||||
|
check_port 11434 "Ollama" ""
|
||||||
|
check_port 5432 "PostgreSQL" "127.0.0.1"
|
||||||
|
check_port 9000 "MinIO" ""
|
||||||
|
|
||||||
|
# --- Firewall ---
|
||||||
|
section "Firewall"
|
||||||
|
if ufw status | grep -q "Status: active"; then
|
||||||
|
pass "UFW active"
|
||||||
|
# Check default deny
|
||||||
|
if ufw status verbose | grep -q "Default: deny (incoming)"; then
|
||||||
|
pass "Default deny incoming"
|
||||||
|
else
|
||||||
|
fail "Default is NOT deny incoming"
|
||||||
|
fi
|
||||||
|
else
|
||||||
|
fail "UFW is NOT active"
|
||||||
|
fi
|
||||||
|
|
||||||
|
# --- Fail2ban ---
|
||||||
|
section "Fail2ban"
|
||||||
|
jail_count=$(fail2ban-client status 2>/dev/null | grep "Number of jail" | awk '{print $NF}')
|
||||||
|
if [ -n "$jail_count" ] && [ "$jail_count" -ge 3 ]; then
|
||||||
|
pass "$jail_count jails active"
|
||||||
|
else
|
||||||
|
fail "Only $jail_count fail2ban jails (expected >= 3)"
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Check for banned IPs
|
||||||
|
banned=$(fail2ban-client status sshd 2>/dev/null | grep "Currently banned" | awk '{print $NF}')
|
||||||
|
if [ "$banned" -gt 0 ] 2>/dev/null; then
|
||||||
|
warn "$banned IPs currently banned on SSH"
|
||||||
|
fi
|
||||||
|
|
||||||
|
# --- SSH ---
|
||||||
|
section "SSH"
|
||||||
|
if grep -q "^PermitRootLogin no" /etc/ssh/sshd_config; then
|
||||||
|
pass "Root login disabled"
|
||||||
|
else
|
||||||
|
fail "Root login NOT disabled"
|
||||||
|
fi
|
||||||
|
|
||||||
|
if grep -q "^PasswordAuthentication no" /etc/ssh/sshd_config; then
|
||||||
|
pass "Password auth disabled"
|
||||||
|
else
|
||||||
|
warn "Password auth still enabled (SSH keys not yet set up)"
|
||||||
|
fi
|
||||||
|
|
||||||
|
if grep -q "^MaxAuthTries 3" /etc/ssh/sshd_config; then
|
||||||
|
pass "Max auth tries = 3"
|
||||||
|
else
|
||||||
|
fail "Max auth tries not set to 3"
|
||||||
|
fi
|
||||||
|
|
||||||
|
# --- Nginx headers ---
|
||||||
|
section "Nginx Security Headers"
|
||||||
|
headers=$(curl -sI http://127.0.0.1 2>/dev/null)
|
||||||
|
for h in "X-Frame-Options" "X-Content-Type-Options" "Referrer-Policy" "X-XSS-Protection"; do
|
||||||
|
if echo "$headers" | grep -qi "$h"; then
|
||||||
|
pass "$h present"
|
||||||
|
else
|
||||||
|
fail "$h MISSING"
|
||||||
|
fi
|
||||||
|
done
|
||||||
|
|
||||||
|
# --- Kernel hardening ---
|
||||||
|
section "Kernel"
|
||||||
|
if [ "$(sysctl -n net.ipv4.conf.all.rp_filter 2>/dev/null)" = "1" ]; then
|
||||||
|
pass "Reverse path filtering enabled"
|
||||||
|
else
|
||||||
|
fail "Reverse path filtering disabled"
|
||||||
|
fi
|
||||||
|
if [ "$(sysctl -n net.ipv4.conf.all.send_redirects 2>/dev/null)" = "0" ]; then
|
||||||
|
pass "ICMP redirects disabled"
|
||||||
|
else
|
||||||
|
fail "ICMP redirects NOT disabled"
|
||||||
|
fi
|
||||||
|
|
||||||
|
# --- App health ---
|
||||||
|
section "Application"
|
||||||
|
http_code=$(curl -so /dev/null -w '%{http_code}' http://127.0.0.1/ 2>/dev/null)
|
||||||
|
if [ "$http_code" = "200" ] || [ "$http_code" = "302" ]; then
|
||||||
|
pass "App responding (HTTP $http_code)"
|
||||||
|
else
|
||||||
|
fail "App NOT responding (HTTP $http_code)"
|
||||||
|
fi
|
||||||
|
|
||||||
|
# --- Database ---
|
||||||
|
section "Database"
|
||||||
|
if sudo -u postgres psql -d knowledge_base -c "SELECT 1;" >/dev/null 2>&1; then
|
||||||
|
tables=$(sudo -u postgres psql -d knowledge_base -t -c "SELECT count(*) FROM information_schema.tables WHERE table_schema='public';" 2>/dev/null | tr -d ' ')
|
||||||
|
pass "knowledge_base reachable ($tables tables)"
|
||||||
|
else
|
||||||
|
fail "Cannot connect to knowledge_base"
|
||||||
|
fi
|
||||||
|
|
||||||
|
# --- Disk ---
|
||||||
|
section "Disk"
|
||||||
|
usage=$(df / --output=pcent | tail -1 | tr -d ' %')
|
||||||
|
if [ "$usage" -lt 80 ]; then
|
||||||
|
pass "Disk usage: ${usage}%"
|
||||||
|
elif [ "$usage" -lt 95 ]; then
|
||||||
|
warn "Disk usage: ${usage}% (getting full)"
|
||||||
|
else
|
||||||
|
fail "Disk usage: ${usage}% (critical!)"
|
||||||
|
fi
|
||||||
|
|
||||||
|
# --- Backups ---
|
||||||
|
section "Backups"
|
||||||
|
if [ -d /var/backups/brain/borg-repo ]; then
|
||||||
|
last_backup=$(borg list /var/backups/brain/borg-repo 2>/dev/null | tail -1)
|
||||||
|
if [ -n "$last_backup" ]; then
|
||||||
|
pass "Borg repo exists, last: $last_backup"
|
||||||
|
else
|
||||||
|
warn "Borg repo exists but empty — run backup.sh"
|
||||||
|
fi
|
||||||
|
else
|
||||||
|
fail "No borg repo at /var/backups/brain/borg-repo"
|
||||||
|
fi
|
||||||
|
|
||||||
|
last_dump=$(ls -t /var/backups/brain/pg-dumps/knowledge_base_*.dump 2>/dev/null | head -1)
|
||||||
|
if [ -n "$last_dump" ]; then
|
||||||
|
dump_age=$(( ($(date +%s) - $(stat -c %Y "$last_dump")) / 86400 ))
|
||||||
|
if [ "$dump_age" -le 1 ]; then
|
||||||
|
pass "Latest pg dump: $(basename "$last_dump")"
|
||||||
|
else
|
||||||
|
warn "Latest pg dump is ${dump_age} days old: $(basename "$last_dump")"
|
||||||
|
fi
|
||||||
|
else
|
||||||
|
warn "No PostgreSQL dumps found"
|
||||||
|
fi
|
||||||
|
|
||||||
|
# --- Suspicious activity ---
|
||||||
|
section "Security Scan"
|
||||||
|
# Check for unexpected SUID binaries
|
||||||
|
suid_count=$(find /usr/local -perm -4000 -type f 2>/dev/null | wc -l)
|
||||||
|
if [ "$suid_count" -eq 0 ]; then
|
||||||
|
pass "No unexpected SUID binaries in /usr/local"
|
||||||
|
else
|
||||||
|
fail "$suid_count SUID binaries found in /usr/local — investigate"
|
||||||
|
find /usr/local -perm -4000 -type f 2>/dev/null
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Check for unauthorized cron jobs
|
||||||
|
cron_users=$(ls /var/spool/cron/crontabs/ 2>/dev/null | grep -v root)
|
||||||
|
if [ -z "$cron_users" ]; then
|
||||||
|
pass "No non-root user crontabs"
|
||||||
|
else
|
||||||
|
warn "Crontabs found for: $cron_users"
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Check for recent failed SSH logins
|
||||||
|
failed_ssh=$(journalctl -u sshd --since "1 hour ago" --no-pager 2>/dev/null | grep -c "Failed password" || true)
|
||||||
|
if [ "$failed_ssh" -gt 10 ]; then
|
||||||
|
warn "$failed_ssh failed SSH logins in last hour"
|
||||||
|
elif [ "$failed_ssh" -gt 0 ]; then
|
||||||
|
pass "$failed_ssh failed SSH logins in last hour (normal)"
|
||||||
|
else
|
||||||
|
pass "No failed SSH logins in last hour"
|
||||||
|
fi
|
||||||
|
|
||||||
|
# --- Summary ---
|
||||||
|
echo ""
|
||||||
|
if [ "$ISSUES" -eq 0 ]; then
|
||||||
|
echo -e "${GREEN}=== ALL CHECKS PASSED ===${NC}"
|
||||||
|
exit 0
|
||||||
|
else
|
||||||
|
echo -e "${RED}=== $ISSUES ISSUE(S) FOUND ===${NC}"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
Loading…
x
Reference in New Issue
Block a user