#!/bin/bash # ============================================================================= # brain server recovery — restore from backup or lock down after compromise # # Usage: # ./recover.sh status Show available backups # ./recover.sh restore [DATE] Restore configs + DB from backup (latest or DATE) # ./recover.sh lockdown Emergency lockdown — block all external access # ./recover.sh unlock Undo lockdown — restore normal firewall rules # ./recover.sh db [DATE] Restore only the database # ./recover.sh configs [DATE] Restore only configs (nginx, ssh, fail2ban, etc.) # ./recover.sh app [DATE] Restore only the app file + config # ============================================================================= set -euo pipefail BORG_REPO="/var/backups/brain/borg-repo" PG_DIR="/var/backups/brain/pg-dumps" LOCKDOWN_FLAG="/var/backups/brain/.lockdown-active" LOG="/var/log/brain-recovery.log" export BORG_PASSPHRASE="" RED='\033[0;31m' GREEN='\033[0;32m' YELLOW='\033[1;33m' CYAN='\033[0;36m' NC='\033[0m' log() { echo -e "[$(date '+%Y-%m-%d %H:%M:%S')] $1" | tee -a "$LOG"; } usage() { echo "Usage: $0 {status|restore|lockdown|unlock|db|configs|app} [DATE]" echo "" echo "Commands:" echo " status List available backups" echo " restore [DATE] Full restore (configs + DB + app)" echo " lockdown Emergency: block all external traffic" echo " unlock Undo lockdown, restore normal firewall" echo " db [DATE] Restore database only" echo " configs [DATE] Restore server configs only" echo " app [DATE] Restore app + config file only" exit 1 } get_archive() { local date="${1:-}" if [ -n "$date" ]; then borg list "$BORG_REPO" | grep "$date" | tail -1 | awk '{print $1}' else borg list "$BORG_REPO" | tail -1 | awk '{print $1}' fi } # --- STATUS --- cmd_status() { echo -e "${CYAN}=== Available Backups ===${NC}" echo -e "\n${CYAN}Borg archives:${NC}" if borg list "$BORG_REPO" 2>/dev/null | head -20; then repo_size=$(du -sh "$BORG_REPO" | cut -f1) echo -e " Repo size: $repo_size" else echo " No borg archives found" fi echo -e "\n${CYAN}PostgreSQL dumps:${NC}" if ls -lht "$PG_DIR"/knowledge_base_*.dump 2>/dev/null | head -10; then true else echo " No dumps found" fi if [ -f "$LOCKDOWN_FLAG" ]; then echo -e "\n${RED}*** LOCKDOWN IS ACTIVE ***${NC}" echo " Run '$0 unlock' to restore normal access" fi } # --- LOCKDOWN --- cmd_lockdown() { log "${RED}=== EMERGENCY LOCKDOWN ===${NC}" log "Blocking all external access except SSH from LAN..." # Save current rules ufw status verbose > /var/backups/brain/ufw-pre-lockdown.txt 2>/dev/null || true # Reset and lock down ufw --force reset >/dev/null 2>&1 ufw default deny incoming ufw default deny outgoing # Only allow SSH from LAN ufw allow from 192.168.1.0/24 to any port 22 # Allow DNS out (needed for recovery) ufw allow out 53 # Allow apt out (needed for fixes) ufw allow out 80/tcp ufw allow out 443/tcp ufw --force enable >/dev/null 2>&1 # Stop public-facing services systemctl stop nginx 2>/dev/null || true touch "$LOCKDOWN_FLAG" log "${RED}LOCKDOWN ACTIVE — only LAN SSH allowed${NC}" log "Services stopped: nginx" log "Run '$0 unlock' when safe" # Log who's connected right now log "Current connections:" ss -tnp | tee -a "$LOG" # Snapshot auth log log "Saving auth log snapshot..." cp /var/log/auth.log "/var/backups/brain/auth-lockdown-$(date +%Y%m%d_%H%M%S).log" 2>/dev/null || true } # --- UNLOCK --- cmd_unlock() { if [ ! -f "$LOCKDOWN_FLAG" ]; then echo "Lockdown is not active." exit 0 fi log "${GREEN}=== RESTORING NORMAL ACCESS ===${NC}" # Restore firewall ufw --force reset >/dev/null 2>&1 ufw default deny incoming ufw default allow outgoing ufw allow 22/tcp ufw allow 80/tcp comment "HTTP web server" ufw allow 443/tcp comment "HTTPS web server" ufw allow 3030/tcp ufw allow from 192.168.1.0/24 to any port 139,445 proto tcp ufw allow from 192.168.1.0/24 to any port 137,138 proto udp ufw allow from 192.168.1.0/24 to any port 5000 comment "LLM Team UI" ufw allow from 192.168.1.0/24 to any port 9000 comment "MinIO LAN only" ufw deny 9000 comment "Block MinIO external" ufw allow from 192.168.1.0/24 to any port 11434 comment "Ollama internal" ufw allow from 192.168.1.0/24 to any port 18789 comment "OpenClaw brain" ufw --force enable >/dev/null 2>&1 # Restart services systemctl start nginx systemctl restart llm-team-ui systemctl restart fail2ban rm -f "$LOCKDOWN_FLAG" log "${GREEN}Normal access restored. All services restarted.${NC}" } # --- RESTORE DB --- cmd_db() { local date="${1:-}" local dump if [ -n "$date" ]; then dump=$(ls -t "$PG_DIR"/knowledge_base_${date}*.dump 2>/dev/null | head -1) else dump=$(ls -t "$PG_DIR"/knowledge_base_*.dump 2>/dev/null | head -1) fi if [ -z "$dump" ]; then echo -e "${RED}No matching database dump found${NC}" exit 1 fi log "Restoring database from: $(basename "$dump")" echo -e "${YELLOW}This will DROP and recreate knowledge_base. Continue? [y/N]${NC}" read -r confirm [ "$confirm" = "y" ] || exit 0 # Stop app to release connections systemctl stop llm-team-ui 2>/dev/null || true sudo -u postgres dropdb --if-exists knowledge_base sudo -u postgres createdb -O kbuser knowledge_base sudo -u postgres pg_restore -d knowledge_base "$dump" 2>&1 | tee -a "$LOG" systemctl start llm-team-ui log "${GREEN}Database restored from $(basename "$dump")${NC}" } # --- RESTORE CONFIGS --- cmd_configs() { local date="${1:-}" local archive archive=$(get_archive "$date") if [ -z "$archive" ]; then echo -e "${RED}No matching borg archive found${NC}" exit 1 fi log "Restoring configs from archive: $archive" echo -e "${YELLOW}This will overwrite current server configs. Continue? [y/N]${NC}" read -r confirm [ "$confirm" = "y" ] || exit 0 local tmpdir tmpdir=$(mktemp -d) cd "$tmpdir" borg extract "$BORG_REPO::${archive}" # Restore each config cp -v etc/nginx/sites-available/* /etc/nginx/sites-available/ 2>/dev/null || true cp -v etc/nginx/nginx.conf /etc/nginx/nginx.conf 2>/dev/null || true cp -v etc/fail2ban/jail.local /etc/fail2ban/jail.local 2>/dev/null || true cp -v etc/ssh/sshd_config /etc/ssh/sshd_config 2>/dev/null || true cp -v etc/sysctl.d/99-security.conf /etc/sysctl.d/99-security.conf 2>/dev/null || true cp -v etc/systemd/system/llm-team-ui.service /etc/systemd/system/ 2>/dev/null || true cp -v etc/systemd/system/goaccess.service /etc/systemd/system/ 2>/dev/null || true # Reload everything nginx -t && systemctl reload nginx sshd -t && systemctl reload sshd systemctl restart fail2ban sysctl --system >/dev/null 2>&1 systemctl daemon-reload rm -rf "$tmpdir" log "${GREEN}Configs restored from $archive and services reloaded${NC}" } # --- RESTORE APP --- cmd_app() { local date="${1:-}" local archive archive=$(get_archive "$date") if [ -z "$archive" ]; then echo -e "${RED}No matching borg archive found${NC}" exit 1 fi log "Restoring app from archive: $archive" local tmpdir tmpdir=$(mktemp -d) cd "$tmpdir" borg extract "$BORG_REPO::${archive}" cp -v root/llm_team_ui.py /root/llm_team_ui.py cp -v root/llm_team_config.json /root/llm_team_config.json cp -v home/profit/.env /home/profit/.env 2>/dev/null || true systemctl restart llm-team-ui rm -rf "$tmpdir" log "${GREEN}App restored from $archive and restarted${NC}" } # --- FULL RESTORE --- cmd_restore() { local date="${1:-}" log "${CYAN}=== FULL RESTORE ===${NC}" echo -e "${YELLOW}This will restore configs, app, and database. Continue? [y/N]${NC}" read -r confirm [ "$confirm" = "y" ] || exit 0 cmd_configs "$date" cmd_app "$date" cmd_db "$date" log "${GREEN}=== FULL RESTORE COMPLETE ===${NC}" log "Run ./triage.sh to verify system health" } # --- Main --- [ $# -lt 1 ] && usage case "$1" in status) cmd_status ;; restore) cmd_restore "${2:-}" ;; lockdown) cmd_lockdown ;; unlock) cmd_unlock ;; db) cmd_db "${2:-}" ;; configs) cmd_configs "${2:-}" ;; app) cmd_app "${2:-}" ;; *) usage ;; esac