- brain-backup: daily borg + pg_dump, 7d/4w/3m retention, cron at 3AM
- brain-triage: full system health check (services, ports, firewall,
headers, kernel, app, DB, disk, backups, security scan)
- brain-recover: restore from backup (full/db/configs/app) + emergency
lockdown mode that blocks all external access except LAN SSH
All accessible via /usr/local/bin/brain-{backup,triage,recover}
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
287 lines
8.6 KiB
Bash
Executable File
287 lines
8.6 KiB
Bash
Executable File
#!/bin/bash
|
|
# =============================================================================
|
|
# brain server recovery — restore from backup or lock down after compromise
|
|
#
|
|
# Usage:
|
|
# ./recover.sh status Show available backups
|
|
# ./recover.sh restore [DATE] Restore configs + DB from backup (latest or DATE)
|
|
# ./recover.sh lockdown Emergency lockdown — block all external access
|
|
# ./recover.sh unlock Undo lockdown — restore normal firewall rules
|
|
# ./recover.sh db [DATE] Restore only the database
|
|
# ./recover.sh configs [DATE] Restore only configs (nginx, ssh, fail2ban, etc.)
|
|
# ./recover.sh app [DATE] Restore only the app file + config
|
|
# =============================================================================
|
|
|
|
set -euo pipefail
|
|
|
|
BORG_REPO="/var/backups/brain/borg-repo"
|
|
PG_DIR="/var/backups/brain/pg-dumps"
|
|
LOCKDOWN_FLAG="/var/backups/brain/.lockdown-active"
|
|
LOG="/var/log/brain-recovery.log"
|
|
|
|
export BORG_PASSPHRASE=""
|
|
|
|
RED='\033[0;31m'
|
|
GREEN='\033[0;32m'
|
|
YELLOW='\033[1;33m'
|
|
CYAN='\033[0;36m'
|
|
NC='\033[0m'
|
|
|
|
log() { echo -e "[$(date '+%Y-%m-%d %H:%M:%S')] $1" | tee -a "$LOG"; }
|
|
|
|
usage() {
|
|
echo "Usage: $0 {status|restore|lockdown|unlock|db|configs|app} [DATE]"
|
|
echo ""
|
|
echo "Commands:"
|
|
echo " status List available backups"
|
|
echo " restore [DATE] Full restore (configs + DB + app)"
|
|
echo " lockdown Emergency: block all external traffic"
|
|
echo " unlock Undo lockdown, restore normal firewall"
|
|
echo " db [DATE] Restore database only"
|
|
echo " configs [DATE] Restore server configs only"
|
|
echo " app [DATE] Restore app + config file only"
|
|
exit 1
|
|
}
|
|
|
|
get_archive() {
|
|
local date="${1:-}"
|
|
if [ -n "$date" ]; then
|
|
borg list "$BORG_REPO" | grep "$date" | tail -1 | awk '{print $1}'
|
|
else
|
|
borg list "$BORG_REPO" | tail -1 | awk '{print $1}'
|
|
fi
|
|
}
|
|
|
|
# --- STATUS ---
|
|
cmd_status() {
|
|
echo -e "${CYAN}=== Available Backups ===${NC}"
|
|
|
|
echo -e "\n${CYAN}Borg archives:${NC}"
|
|
if borg list "$BORG_REPO" 2>/dev/null | head -20; then
|
|
repo_size=$(du -sh "$BORG_REPO" | cut -f1)
|
|
echo -e " Repo size: $repo_size"
|
|
else
|
|
echo " No borg archives found"
|
|
fi
|
|
|
|
echo -e "\n${CYAN}PostgreSQL dumps:${NC}"
|
|
if ls -lht "$PG_DIR"/knowledge_base_*.dump 2>/dev/null | head -10; then
|
|
true
|
|
else
|
|
echo " No dumps found"
|
|
fi
|
|
|
|
if [ -f "$LOCKDOWN_FLAG" ]; then
|
|
echo -e "\n${RED}*** LOCKDOWN IS ACTIVE ***${NC}"
|
|
echo " Run '$0 unlock' to restore normal access"
|
|
fi
|
|
}
|
|
|
|
# --- LOCKDOWN ---
|
|
cmd_lockdown() {
|
|
log "${RED}=== EMERGENCY LOCKDOWN ===${NC}"
|
|
log "Blocking all external access except SSH from LAN..."
|
|
|
|
# Save current rules
|
|
ufw status verbose > /var/backups/brain/ufw-pre-lockdown.txt 2>/dev/null || true
|
|
|
|
# Reset and lock down
|
|
ufw --force reset >/dev/null 2>&1
|
|
ufw default deny incoming
|
|
ufw default deny outgoing
|
|
|
|
# Only allow SSH from LAN
|
|
ufw allow from 192.168.1.0/24 to any port 22
|
|
# Allow DNS out (needed for recovery)
|
|
ufw allow out 53
|
|
# Allow apt out (needed for fixes)
|
|
ufw allow out 80/tcp
|
|
ufw allow out 443/tcp
|
|
|
|
ufw --force enable >/dev/null 2>&1
|
|
|
|
# Stop public-facing services
|
|
systemctl stop nginx 2>/dev/null || true
|
|
|
|
touch "$LOCKDOWN_FLAG"
|
|
log "${RED}LOCKDOWN ACTIVE — only LAN SSH allowed${NC}"
|
|
log "Services stopped: nginx"
|
|
log "Run '$0 unlock' when safe"
|
|
|
|
# Log who's connected right now
|
|
log "Current connections:"
|
|
ss -tnp | tee -a "$LOG"
|
|
|
|
# Snapshot auth log
|
|
log "Saving auth log snapshot..."
|
|
cp /var/log/auth.log "/var/backups/brain/auth-lockdown-$(date +%Y%m%d_%H%M%S).log" 2>/dev/null || true
|
|
}
|
|
|
|
# --- UNLOCK ---
|
|
cmd_unlock() {
|
|
if [ ! -f "$LOCKDOWN_FLAG" ]; then
|
|
echo "Lockdown is not active."
|
|
exit 0
|
|
fi
|
|
|
|
log "${GREEN}=== RESTORING NORMAL ACCESS ===${NC}"
|
|
|
|
# Restore firewall
|
|
ufw --force reset >/dev/null 2>&1
|
|
ufw default deny incoming
|
|
ufw default allow outgoing
|
|
|
|
ufw allow 22/tcp
|
|
ufw allow 80/tcp comment "HTTP web server"
|
|
ufw allow 443/tcp comment "HTTPS web server"
|
|
ufw allow 3030/tcp
|
|
ufw allow from 192.168.1.0/24 to any port 139,445 proto tcp
|
|
ufw allow from 192.168.1.0/24 to any port 137,138 proto udp
|
|
ufw allow from 192.168.1.0/24 to any port 5000 comment "LLM Team UI"
|
|
ufw allow from 192.168.1.0/24 to any port 9000 comment "MinIO LAN only"
|
|
ufw deny 9000 comment "Block MinIO external"
|
|
ufw allow from 192.168.1.0/24 to any port 11434 comment "Ollama internal"
|
|
ufw allow from 192.168.1.0/24 to any port 18789 comment "OpenClaw brain"
|
|
|
|
ufw --force enable >/dev/null 2>&1
|
|
|
|
# Restart services
|
|
systemctl start nginx
|
|
systemctl restart llm-team-ui
|
|
systemctl restart fail2ban
|
|
|
|
rm -f "$LOCKDOWN_FLAG"
|
|
log "${GREEN}Normal access restored. All services restarted.${NC}"
|
|
}
|
|
|
|
# --- RESTORE DB ---
|
|
cmd_db() {
|
|
local date="${1:-}"
|
|
local dump
|
|
|
|
if [ -n "$date" ]; then
|
|
dump=$(ls -t "$PG_DIR"/knowledge_base_${date}*.dump 2>/dev/null | head -1)
|
|
else
|
|
dump=$(ls -t "$PG_DIR"/knowledge_base_*.dump 2>/dev/null | head -1)
|
|
fi
|
|
|
|
if [ -z "$dump" ]; then
|
|
echo -e "${RED}No matching database dump found${NC}"
|
|
exit 1
|
|
fi
|
|
|
|
log "Restoring database from: $(basename "$dump")"
|
|
echo -e "${YELLOW}This will DROP and recreate knowledge_base. Continue? [y/N]${NC}"
|
|
read -r confirm
|
|
[ "$confirm" = "y" ] || exit 0
|
|
|
|
# Stop app to release connections
|
|
systemctl stop llm-team-ui 2>/dev/null || true
|
|
|
|
sudo -u postgres dropdb --if-exists knowledge_base
|
|
sudo -u postgres createdb -O kbuser knowledge_base
|
|
sudo -u postgres pg_restore -d knowledge_base "$dump" 2>&1 | tee -a "$LOG"
|
|
|
|
systemctl start llm-team-ui
|
|
log "${GREEN}Database restored from $(basename "$dump")${NC}"
|
|
}
|
|
|
|
# --- RESTORE CONFIGS ---
|
|
cmd_configs() {
|
|
local date="${1:-}"
|
|
local archive
|
|
archive=$(get_archive "$date")
|
|
|
|
if [ -z "$archive" ]; then
|
|
echo -e "${RED}No matching borg archive found${NC}"
|
|
exit 1
|
|
fi
|
|
|
|
log "Restoring configs from archive: $archive"
|
|
echo -e "${YELLOW}This will overwrite current server configs. Continue? [y/N]${NC}"
|
|
read -r confirm
|
|
[ "$confirm" = "y" ] || exit 0
|
|
|
|
local tmpdir
|
|
tmpdir=$(mktemp -d)
|
|
cd "$tmpdir"
|
|
borg extract "$BORG_REPO::${archive}"
|
|
|
|
# Restore each config
|
|
cp -v etc/nginx/sites-available/* /etc/nginx/sites-available/ 2>/dev/null || true
|
|
cp -v etc/nginx/nginx.conf /etc/nginx/nginx.conf 2>/dev/null || true
|
|
cp -v etc/fail2ban/jail.local /etc/fail2ban/jail.local 2>/dev/null || true
|
|
cp -v etc/ssh/sshd_config /etc/ssh/sshd_config 2>/dev/null || true
|
|
cp -v etc/sysctl.d/99-security.conf /etc/sysctl.d/99-security.conf 2>/dev/null || true
|
|
cp -v etc/systemd/system/llm-team-ui.service /etc/systemd/system/ 2>/dev/null || true
|
|
cp -v etc/systemd/system/goaccess.service /etc/systemd/system/ 2>/dev/null || true
|
|
|
|
# Reload everything
|
|
nginx -t && systemctl reload nginx
|
|
sshd -t && systemctl reload sshd
|
|
systemctl restart fail2ban
|
|
sysctl --system >/dev/null 2>&1
|
|
systemctl daemon-reload
|
|
|
|
rm -rf "$tmpdir"
|
|
log "${GREEN}Configs restored from $archive and services reloaded${NC}"
|
|
}
|
|
|
|
# --- RESTORE APP ---
|
|
cmd_app() {
|
|
local date="${1:-}"
|
|
local archive
|
|
archive=$(get_archive "$date")
|
|
|
|
if [ -z "$archive" ]; then
|
|
echo -e "${RED}No matching borg archive found${NC}"
|
|
exit 1
|
|
fi
|
|
|
|
log "Restoring app from archive: $archive"
|
|
|
|
local tmpdir
|
|
tmpdir=$(mktemp -d)
|
|
cd "$tmpdir"
|
|
borg extract "$BORG_REPO::${archive}"
|
|
|
|
cp -v root/llm_team_ui.py /root/llm_team_ui.py
|
|
cp -v root/llm_team_config.json /root/llm_team_config.json
|
|
cp -v home/profit/.env /home/profit/.env 2>/dev/null || true
|
|
|
|
systemctl restart llm-team-ui
|
|
|
|
rm -rf "$tmpdir"
|
|
log "${GREEN}App restored from $archive and restarted${NC}"
|
|
}
|
|
|
|
# --- FULL RESTORE ---
|
|
cmd_restore() {
|
|
local date="${1:-}"
|
|
log "${CYAN}=== FULL RESTORE ===${NC}"
|
|
echo -e "${YELLOW}This will restore configs, app, and database. Continue? [y/N]${NC}"
|
|
read -r confirm
|
|
[ "$confirm" = "y" ] || exit 0
|
|
|
|
cmd_configs "$date"
|
|
cmd_app "$date"
|
|
cmd_db "$date"
|
|
|
|
log "${GREEN}=== FULL RESTORE COMPLETE ===${NC}"
|
|
log "Run ./triage.sh to verify system health"
|
|
}
|
|
|
|
# --- Main ---
|
|
[ $# -lt 1 ] && usage
|
|
|
|
case "$1" in
|
|
status) cmd_status ;;
|
|
restore) cmd_restore "${2:-}" ;;
|
|
lockdown) cmd_lockdown ;;
|
|
unlock) cmd_unlock ;;
|
|
db) cmd_db "${2:-}" ;;
|
|
configs) cmd_configs "${2:-}" ;;
|
|
app) cmd_app "${2:-}" ;;
|
|
*) usage ;;
|
|
esac
|