From c22b3aa69148822dcc087b4f4db34e0346f2026f Mon Sep 17 00:00:00 2001 From: michael Date: Mon, 23 Jun 2025 11:57:42 +0200 Subject: [PATCH] =?UTF-8?q?feat(deployment):=20implement=20comprehensive?= =?UTF-8?q?=20karl=E2=86=92walter=20deployment=20script?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Add automated rsync-based file synchronization with _furt user permissions - Implement OpenBSD rcctl service management with backup/rollback functionality - Add port availability checks and health validation after deployment - Include comprehensive error handling and status reporting - Support dry-run mode for safe deployment testing - Provide automatic service file generation with correct paths Features: - SSH-based secure transfer with permission preservation - Pre-deployment backup with configurable retention (3 backups) - Intelligent service stop/start handling for OpenBSD rcctl - Health check validation via HTTP endpoint - Colored output and structured logging for better UX - Support for --dry-run, --rollback, and --force modes Successfully deploys furt-lua from development (karl) to staging (walter). Manual service management required due to OpenBSD rc.d pexp pattern issues. Closes #76 (deployment automation) Related: Service file pexp pattern matching requires follow-up investigation Files: - scripts/deploy/deploy_walter.sh (new) --- scripts/deploy/deploy_walter.sh | 692 ++++++++++++++++++++++++++++++++ 1 file changed, 692 insertions(+) create mode 100755 scripts/deploy/deploy_walter.sh diff --git a/scripts/deploy/deploy_walter.sh b/scripts/deploy/deploy_walter.sh new file mode 100755 index 0000000..5c12742 --- /dev/null +++ b/scripts/deploy/deploy_walter.sh @@ -0,0 +1,692 @@ +#!/bin/bash +# scripts/deploy/deploy_walter.sh +# Deployment script: karl (development) → walter (OpenBSD staging) +# +# Usage: +# ./scripts/deploy/deploy_walter.sh [--dry-run] [--rollback] [--force] +# +# Dragons@Work - Furt API-Gateway Deployment +# Version: 1.0 + +set -euo pipefail # Exit on error, undefined vars, pipe failures + +# ============================================================================= +# CONFIGURATION +# ============================================================================= + +# Source (karl development) +SOURCE_DIR="/home/michael/Develop/DAW/furt/furt-lua" +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +PROJECT_ROOT="$(dirname "$(dirname "$SCRIPT_DIR")")" # scripts/deploy/ -> scripts/ -> furt/ + +# Target (walter OpenBSD staging) +WALTER_HOST="walter" # Assumes SSH config entry (as michael user) +TARGET_DIR="/usr/local/furt/furt-lua" +SERVICE_USER="_furt" +SERVICE_GROUP="_furt" + +# Backup configuration +BACKUP_DIR="/usr/local/furt/backups" +BACKUP_RETENTION=3 # Keep last 3 backups + +# Health check configuration +HEALTH_URL="http://localhost:8080/health" +HEALTH_TIMEOUT=10 +HEALTH_RETRIES=3 + +# Colors for output +RED='\033[0;31m' +GREEN='\033[0;32m' +YELLOW='\033[1;33m' +BLUE='\033[0;34m' +NC='\033[0m' # No Color + +# ============================================================================= +# LOGGING FUNCTIONS +# ============================================================================= + +log_info() { + echo -e "${BLUE}[INFO]${NC} $1" +} + +log_success() { + echo -e "${GREEN}[SUCCESS]${NC} $1" +} + +log_warning() { + echo -e "${YELLOW}[WARNING]${NC} $1" +} + +log_error() { + echo -e "${RED}[ERROR]${NC} $1" +} + +log_step() { + echo -e "\n${BLUE}==>${NC} $1" +} + +# ============================================================================= +# UTILITY FUNCTIONS +# ============================================================================= + +usage() { + cat << EOF +Usage: $0 [OPTIONS] + +Deployment script for furt-lua: karl (development) → walter (OpenBSD staging) + +OPTIONS: + --dry-run Show what would be deployed without making changes + --rollback Rollback to previous deployment + --force Skip confirmation prompts + --help Show this help message + +EXAMPLES: + $0 # Normal deployment with confirmation + $0 --dry-run # Preview deployment without changes + $0 --force # Deploy without confirmation + $0 --rollback # Rollback to previous version + +EOF +} + +check_dependencies() { + log_step "Checking dependencies" + + # Check if source directory exists + if [[ ! -d "$SOURCE_DIR" ]]; then + log_error "Source directory not found: $SOURCE_DIR" + exit 1 + fi + + # Check SSH connectivity to walter + if ! ssh -o ConnectTimeout=5 -o BatchMode=yes "$WALTER_HOST" exit 2>/dev/null; then + log_error "Cannot connect to walter via SSH" + log_info "Please ensure SSH key is set up for $WALTER_HOST" + exit 1 + fi + + # Check rsync availability + if ! command -v rsync &> /dev/null; then + log_error "rsync is required but not installed" + exit 1 + fi + + log_success "All dependencies OK" +} + +get_backup_timestamp() { + date +"%Y%m%d_%H%M%S" +} + +# ============================================================================= +# SSH REMOTE EXECUTION FUNCTIONS +# ============================================================================= + +walter_exec() { + ssh "$WALTER_HOST" "$@" +} + +walter_exec_as_root() { + ssh "$WALTER_HOST" "doas $@" +} + +walter_exec_as_furt() { + ssh "$WALTER_HOST" "doas -u $SERVICE_USER $@" +} + +# ============================================================================= +# BACKUP FUNCTIONS +# ============================================================================= + +create_backup() { + local timestamp=$(get_backup_timestamp) + local backup_path="$BACKUP_DIR/furt-lua_$timestamp" + + log_step "Creating backup" + + # Create backup directory if it doesn't exist + walter_exec_as_root "mkdir -p $BACKUP_DIR" + walter_exec_as_root "chown $SERVICE_USER:$SERVICE_GROUP $BACKUP_DIR" + + # Check if target directory exists + if walter_exec "test -d $TARGET_DIR"; then + log_info "Backing up current deployment to: $backup_path" + walter_exec_as_root "cp -r $TARGET_DIR $backup_path" + walter_exec_as_root "chown -R $SERVICE_USER:$SERVICE_GROUP $backup_path" + + # Set backup metadata (fix shell redirect issue) + walter_exec_as_root "sh -c \"echo 'Backup created: \$(date)' > $backup_path/.backup_info\"" + walter_exec_as_root "sh -c \"echo 'Original path: $TARGET_DIR' >> $backup_path/.backup_info\"" + walter_exec_as_root "chown $SERVICE_USER:$SERVICE_GROUP $backup_path/.backup_info" + + log_success "Backup created: $backup_path" + echo "$backup_path" # Return backup path + else + log_warning "No existing deployment found, skipping backup" + echo "" # Return empty string + fi +} + +cleanup_old_backups() { + log_step "Cleaning up old backups" + + local backup_count=$(walter_exec "ls -1 $BACKUP_DIR/furt-lua_* 2>/dev/null | wc -l" || echo "0") + + if [[ $backup_count -gt $BACKUP_RETENTION ]]; then + log_info "Found $backup_count backups, keeping last $BACKUP_RETENTION" + walter_exec_as_root "ls -1t $BACKUP_DIR/furt-lua_* | tail -n +$((BACKUP_RETENTION + 1)) | xargs rm -rf" + log_success "Old backups cleaned up" + else + log_info "Found $backup_count backups, no cleanup needed" + fi +} + +list_backups() { + log_step "Available backups" + + if walter_exec "ls -1 $BACKUP_DIR/furt-lua_* 2>/dev/null"; then + walter_exec "ls -1t $BACKUP_DIR/furt-lua_* | head -n 5 | while read backup; do + echo \" \$backup\" + if [ -f \"\$backup/.backup_info\" ]; then + cat \"\$backup/.backup_info\" | sed 's/^/ /' + fi + echo + done" + else + log_warning "No backups found" + fi +} + +rollback_deployment() { + log_step "Rolling back deployment" + + # List available backups + local latest_backup=$(walter_exec "ls -1t $BACKUP_DIR/furt-lua_* 2>/dev/null | head -n 1" || echo "") + + if [[ -z "$latest_backup" ]]; then + log_error "No backups available for rollback" + exit 1 + fi + + log_info "Latest backup: $latest_backup" + + if [[ "$FORCE" != "true" ]]; then + echo -n "Rollback to this version? [y/N]: " + read -r response + if [[ ! "$response" =~ ^[Yy]$ ]]; then + log_info "Rollback cancelled" + exit 0 + fi + fi + + # Stop service + stop_service + + # Backup current version (before rollback) + local rollback_backup=$(create_backup) + if [[ -n "$rollback_backup" ]]; then + walter_exec_as_root "mv $rollback_backup ${rollback_backup}_pre_rollback" + fi + + # Restore from backup + walter_exec_as_root "rm -rf $TARGET_DIR" + walter_exec_as_root "cp -r $latest_backup $TARGET_DIR" + + # Fix permissions + fix_permissions + + # Start service + start_service + + # Health check + if health_check; then + log_success "Rollback completed successfully" + else + log_error "Rollback completed but health check failed" + exit 1 + fi +} + +# ============================================================================= +# SERVICE MANAGEMENT FUNCTIONS +# ============================================================================= + +get_service_status() { + # Check if furt process is actually running (regardless of rcctl status) + if walter_exec "ps aux | grep -v grep | grep -q '_furt.*lua.*main.lua'"; then + echo "running" + else + echo "stopped" + fi +} + +get_rcctl_status() { + # Check OpenBSD service status specifically + if walter_exec "rcctl check furt >/dev/null 2>&1"; then + echo "running" + else + echo "stopped" + fi +} + +stop_service() { + log_step "Stopping furt service" + + local process_status=$(get_service_status) + local rcctl_status=$(get_rcctl_status) + + if [[ "$process_status" == "running" ]]; then + log_info "Furt process is running (rcctl status: $rcctl_status)" + + # Try rcctl stop first if service is managed by rcctl + if [[ "$rcctl_status" == "running" ]]; then + log_info "Stopping via rcctl..." + walter_exec_as_root "rcctl stop furt" || true + sleep 2 + fi + + # Check if still running (manual process or rcctl didn't work) + if [[ $(get_service_status) == "running" ]]; then + log_info "Process still running, stopping manually..." + walter_exec_as_root "pkill -f -U $SERVICE_USER 'lua.*main.lua'" || true + sleep 2 + fi + + # Final check + local final_status=$(get_service_status) + if [[ "$final_status" == "stopped" ]]; then + log_success "Service stopped" + else + log_error "Could not stop service" + return 1 + fi + else + log_info "Service already stopped" + fi +} + +start_service() { + log_step "Starting furt service" + + local status=$(get_service_status) + if [[ "$status" == "stopped" ]]; then + # Check port availability before starting + if ! check_port_availability; then + log_error "Cannot start service - port 8080 is occupied" + return 1 + fi + + log_info "Starting furt service via rcctl..." + walter_exec_as_root "rcctl start furt" + + # Wait and check if service actually started + sleep 5 + + local process_status=$(get_service_status) + local rcctl_status=$(get_rcctl_status) + + if [[ "$process_status" == "running" ]]; then + log_success "Service started successfully (rcctl: $rcctl_status, process: $process_status)" + elif [[ "$rcctl_status" == "running" ]]; then + log_warning "rcctl reports running but process not detected - checking port..." + if walter_exec "netstat -an | grep -q ':8080.*LISTEN'"; then + log_success "Service appears to be running (port 8080 active)" + else + log_error "Service failed to start properly" + # Show service logs for debugging + walter_exec "tail -10 /var/log/daemon" || true + return 1 + fi + else + log_error "Failed to start service" + # Show service logs for debugging + walter_exec "tail -10 /var/log/daemon" || true + return 1 + fi + else + log_info "Service already running" + fi +} + +# ============================================================================= +# DEPLOYMENT FUNCTIONS +# ============================================================================= + +prepare_source() { + log_step "Preparing source files" + + # Check source directory structure + local required_dirs=("src" "config" "scripts") + for dir in "${required_dirs[@]}"; do + if [[ ! -d "$SOURCE_DIR/$dir" ]]; then + log_error "Required directory missing: $SOURCE_DIR/$dir" + exit 1 + fi + done + + # Check required files + local required_files=("src/main.lua" "scripts/start.sh") + for file in "${required_files[@]}"; do + if [[ ! -f "$SOURCE_DIR/$file" ]]; then + log_error "Required file missing: $SOURCE_DIR/$file" + exit 1 + fi + done + + log_success "Source files validated" +} + +sync_files() { + log_step "Syncing files to walter" + + # Prepare rsync excludes + local excludes=( + "--exclude=.env" + "--exclude=.env.*" + "--exclude=*.backup" + "--exclude=*.orig" + "--exclude=*.tmp" + "--exclude=.git/" + "--exclude=.DS_Store" + "--exclude=logs/" + "--exclude=*.log" + ) + + if [[ "$DRY_RUN" == "true" ]]; then + log_info "DRY RUN: Would sync the following files:" + rsync -avz --dry-run "${excludes[@]}" "$SOURCE_DIR/" "$WALTER_HOST:$TARGET_DIR/" + return 0 + fi + + # Create target directory and set initial ownership + walter_exec_as_root "mkdir -p $TARGET_DIR" + walter_exec_as_root "chown -R $SERVICE_USER:$SERVICE_GROUP $TARGET_DIR" + + # Sync files using rsync with _furt user + log_info "Syncing files..." + rsync -avz "${excludes[@]}" \ + -e "ssh" \ + --rsync-path="doas -u $SERVICE_USER rsync" \ + "$SOURCE_DIR/" "$WALTER_HOST:$TARGET_DIR/" + + log_success "Files synced" +} + +fix_service_file() { + log_step "Updating OpenBSD service file" + + if [[ "$DRY_RUN" == "true" ]]; then + log_info "DRY RUN: Would update /etc/rc.d/furt" + return 0 + fi + + log_info "Creating correct service file..." + walter_exec_as_root "sh -c 'cat > /etc/rc.d/furt << \"EOF\" +#!/bin/ksh +daemon=\"$TARGET_DIR/scripts/start.sh\" +daemon_user=\"$SERVICE_USER\" +daemon_cwd=\"$TARGET_DIR\" +daemon_flags=\"start\" +pexp=\"lua.*main.lua\" + +. /etc/rc.d/rc.subr +rc_bg=YES +rc_cmd \$1 +EOF'" + + # Make service file executable + walter_exec_as_root "chmod +x /etc/rc.d/furt" + + # Enable service if not already enabled + if ! walter_exec "rcctl ls on | grep -q furt"; then + log_info "Enabling furt service..." + walter_exec_as_root "rcctl enable furt" + log_success "Service enabled" + else + log_info "Service already enabled" + fi + + log_success "Service file updated" +} + +fix_permissions() { + log_step "Fixing permissions" + + if [[ "$DRY_RUN" == "true" ]]; then + log_info "DRY RUN: Would set ownership to $SERVICE_USER:$SERVICE_GROUP" + return 0 + fi + + # Set ownership (already done in sync_files, but ensure it's correct) + walter_exec_as_root "chown -R $SERVICE_USER:$SERVICE_GROUP $TARGET_DIR" + + # Set executable permissions for scripts + walter_exec_as_root "chmod +x $TARGET_DIR/scripts/*.sh" + + log_success "Permissions fixed" +} + +# ============================================================================= +# HEALTH CHECK FUNCTIONS +# ============================================================================= + +check_port_availability() { + log_step "Checking port availability" + + local port="8080" # Default furt port + + if walter_exec "netstat -an | grep -q ':$port'"; then + log_warning "Port $port is already in use" + walter_exec "netstat -an | grep ':$port'" || true + + # Try to identify what's using the port + log_info "Checking what's using port $port..." + walter_exec "fstat 2>/dev/null | grep ':$port'" || walter_exec "netstat -anp 2>/dev/null | grep ':$port'" || true + + return 1 + else + log_success "Port $port is available" + return 0 + fi +} + +health_check() { + log_step "Running health check" + + local retries=$HEALTH_RETRIES + while [[ $retries -gt 0 ]]; do + log_info "Health check attempt $((HEALTH_RETRIES - retries + 1))/$HEALTH_RETRIES" + + if walter_exec "curl -s --max-time $HEALTH_TIMEOUT $HEALTH_URL >/dev/null 2>&1"; then + log_success "Health check passed" + + # Get health check response + local health_response=$(walter_exec "curl -s --max-time $HEALTH_TIMEOUT $HEALTH_URL" || echo "") + if [[ -n "$health_response" ]]; then + log_info "Health response: $health_response" + fi + + # Additional status info + local process_status=$(get_service_status) + local rcctl_status=$(get_rcctl_status) + log_info "Service status - Process: $process_status, rcctl: $rcctl_status" + + return 0 + fi + + ((retries--)) + if [[ $retries -gt 0 ]]; then + log_info "Health check failed, retrying in 5 seconds..." + sleep 5 + fi + done + + log_error "Health check failed after $HEALTH_RETRIES attempts" + log_info "Debugging service status..." + local process_status=$(get_service_status) + local rcctl_status=$(get_rcctl_status) + log_info "Process status: $process_status" + log_info "rcctl status: $rcctl_status" + + # Check if port is at least listening + if walter_exec "netstat -an | grep -q ':8080.*LISTEN'"; then + log_warning "Port 8080 is listening but health check failed - possible service issue" + else + log_error "Port 8080 is not listening - service not running" + fi + + return 1 +} + +# ============================================================================= +# MAIN DEPLOYMENT FUNCTION +# ============================================================================= + +deploy() { + log_step "Starting deployment: karl → walter" + + # Pre-deployment checks + check_dependencies + prepare_source + + # Show deployment summary + log_info "Deployment Summary:" + log_info " Source: $SOURCE_DIR" + log_info " Target: $WALTER_HOST:$TARGET_DIR" + log_info " Service User: $SERVICE_USER" + log_info " Dry Run: $DRY_RUN" + + if [[ "$DRY_RUN" == "true" ]]; then + log_warning "DRY RUN MODE - No changes will be made" + fi + + # Confirmation prompt + if [[ "$FORCE" != "true" && "$DRY_RUN" != "true" ]]; then + echo -n "Continue with deployment? [y/N]: " + read -r response + if [[ ! "$response" =~ ^[Yy]$ ]]; then + log_info "Deployment cancelled" + exit 0 + fi + fi + + # Create backup before deployment + local backup_path="" + if [[ "$DRY_RUN" != "true" ]]; then + backup_path=$(create_backup) + fi + + # Stop service + if [[ "$DRY_RUN" != "true" ]]; then + stop_service + fi + + # Deploy files + sync_files + fix_permissions + + # Update service file for correct paths + fix_service_file + + # Start service + if [[ "$DRY_RUN" != "true" ]]; then + # Check if port is available before starting + if ! check_port_availability; then + log_error "Cannot start service - port conflict detected" + log_info "Try: ssh walter \"doas pkill -f 'lua.*main.lua'\" to kill conflicting processes" + exit 1 + fi + + if start_service; then + # Health check + if health_check; then + log_success "Deployment completed successfully!" + + # Cleanup old backups + cleanup_old_backups + else + log_error "Deployment failed health check" + + # Offer rollback + if [[ -n "$backup_path" ]]; then + echo -n "Rollback to previous version? [y/N]: " + read -r response + if [[ "$response" =~ ^[Yy]$ ]]; then + log_info "Rolling back..." + walter_exec_as_root "rm -rf $TARGET_DIR" + walter_exec_as_root "cp -r $backup_path $TARGET_DIR" + fix_permissions + start_service + health_check + fi + fi + exit 1 + fi + else + log_error "Failed to start service after deployment" + exit 1 + fi + else + log_success "Dry run completed - no changes made" + fi +} + +# ============================================================================= +# MAIN SCRIPT LOGIC +# ============================================================================= + +# Parse command line arguments +DRY_RUN=false +ROLLBACK=false +FORCE=false + +while [[ $# -gt 0 ]]; do + case $1 in + --dry-run) + DRY_RUN=true + shift + ;; + --rollback) + ROLLBACK=true + shift + ;; + --force) + FORCE=true + shift + ;; + --help) + usage + exit 0 + ;; + *) + log_error "Unknown option: $1" + usage + exit 1 + ;; + esac +done + +# Main execution +main() { + log_info "Furt Deployment Script - karl → walter" + log_info "$(date)" + + if [[ "$ROLLBACK" == "true" ]]; then + rollback_deployment + else + deploy + fi +} + +# Trap for cleanup on exit +cleanup() { + if [[ $? -ne 0 ]]; then + log_error "Deployment failed" + fi +} +trap cleanup EXIT + +# Run main function +main "$@" +