feat(deployment): implement comprehensive karl→walter deployment script

- Add automated rsync-based file synchronization with _furt user permissions
- Implement OpenBSD rcctl service management with backup/rollback functionality
- Add port availability checks and health validation after deployment
- Include comprehensive error handling and status reporting
- Support dry-run mode for safe deployment testing
- Provide automatic service file generation with correct paths

Features:
- SSH-based secure transfer with permission preservation
- Pre-deployment backup with configurable retention (3 backups)
- Intelligent service stop/start handling for OpenBSD rcctl
- Health check validation via HTTP endpoint
- Colored output and structured logging for better UX
- Support for --dry-run, --rollback, and --force modes

Successfully deploys furt-lua from development (karl) to staging (walter).
Manual service management required due to OpenBSD rc.d pexp pattern issues.

Closes #76 (deployment automation)
Related: Service file pexp pattern matching requires follow-up investigation

Files:
- scripts/deploy/deploy_walter.sh (new)
This commit is contained in:
michael 2025-06-23 11:57:42 +02:00
parent e23b24d5d0
commit c22b3aa691

692
scripts/deploy/deploy_walter.sh Executable file
View file

@ -0,0 +1,692 @@
#!/bin/bash
# scripts/deploy/deploy_walter.sh
# Deployment script: karl (development) → walter (OpenBSD staging)
#
# Usage:
# ./scripts/deploy/deploy_walter.sh [--dry-run] [--rollback] [--force]
#
# Dragons@Work - Furt API-Gateway Deployment
# Version: 1.0
set -euo pipefail # Exit on error, undefined vars, pipe failures
# =============================================================================
# CONFIGURATION
# =============================================================================
# Source (karl development)
SOURCE_DIR="/home/michael/Develop/DAW/furt/furt-lua"
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
PROJECT_ROOT="$(dirname "$(dirname "$SCRIPT_DIR")")" # scripts/deploy/ -> scripts/ -> furt/
# Target (walter OpenBSD staging)
WALTER_HOST="walter" # Assumes SSH config entry (as michael user)
TARGET_DIR="/usr/local/furt/furt-lua"
SERVICE_USER="_furt"
SERVICE_GROUP="_furt"
# Backup configuration
BACKUP_DIR="/usr/local/furt/backups"
BACKUP_RETENTION=3 # Keep last 3 backups
# Health check configuration
HEALTH_URL="http://localhost:8080/health"
HEALTH_TIMEOUT=10
HEALTH_RETRIES=3
# Colors for output
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
BLUE='\033[0;34m'
NC='\033[0m' # No Color
# =============================================================================
# LOGGING FUNCTIONS
# =============================================================================
log_info() {
echo -e "${BLUE}[INFO]${NC} $1"
}
log_success() {
echo -e "${GREEN}[SUCCESS]${NC} $1"
}
log_warning() {
echo -e "${YELLOW}[WARNING]${NC} $1"
}
log_error() {
echo -e "${RED}[ERROR]${NC} $1"
}
log_step() {
echo -e "\n${BLUE}==>${NC} $1"
}
# =============================================================================
# UTILITY FUNCTIONS
# =============================================================================
usage() {
cat << EOF
Usage: $0 [OPTIONS]
Deployment script for furt-lua: karl (development) → walter (OpenBSD staging)
OPTIONS:
--dry-run Show what would be deployed without making changes
--rollback Rollback to previous deployment
--force Skip confirmation prompts
--help Show this help message
EXAMPLES:
$0 # Normal deployment with confirmation
$0 --dry-run # Preview deployment without changes
$0 --force # Deploy without confirmation
$0 --rollback # Rollback to previous version
EOF
}
check_dependencies() {
log_step "Checking dependencies"
# Check if source directory exists
if [[ ! -d "$SOURCE_DIR" ]]; then
log_error "Source directory not found: $SOURCE_DIR"
exit 1
fi
# Check SSH connectivity to walter
if ! ssh -o ConnectTimeout=5 -o BatchMode=yes "$WALTER_HOST" exit 2>/dev/null; then
log_error "Cannot connect to walter via SSH"
log_info "Please ensure SSH key is set up for $WALTER_HOST"
exit 1
fi
# Check rsync availability
if ! command -v rsync &> /dev/null; then
log_error "rsync is required but not installed"
exit 1
fi
log_success "All dependencies OK"
}
get_backup_timestamp() {
date +"%Y%m%d_%H%M%S"
}
# =============================================================================
# SSH REMOTE EXECUTION FUNCTIONS
# =============================================================================
walter_exec() {
ssh "$WALTER_HOST" "$@"
}
walter_exec_as_root() {
ssh "$WALTER_HOST" "doas $@"
}
walter_exec_as_furt() {
ssh "$WALTER_HOST" "doas -u $SERVICE_USER $@"
}
# =============================================================================
# BACKUP FUNCTIONS
# =============================================================================
create_backup() {
local timestamp=$(get_backup_timestamp)
local backup_path="$BACKUP_DIR/furt-lua_$timestamp"
log_step "Creating backup"
# Create backup directory if it doesn't exist
walter_exec_as_root "mkdir -p $BACKUP_DIR"
walter_exec_as_root "chown $SERVICE_USER:$SERVICE_GROUP $BACKUP_DIR"
# Check if target directory exists
if walter_exec "test -d $TARGET_DIR"; then
log_info "Backing up current deployment to: $backup_path"
walter_exec_as_root "cp -r $TARGET_DIR $backup_path"
walter_exec_as_root "chown -R $SERVICE_USER:$SERVICE_GROUP $backup_path"
# Set backup metadata (fix shell redirect issue)
walter_exec_as_root "sh -c \"echo 'Backup created: \$(date)' > $backup_path/.backup_info\""
walter_exec_as_root "sh -c \"echo 'Original path: $TARGET_DIR' >> $backup_path/.backup_info\""
walter_exec_as_root "chown $SERVICE_USER:$SERVICE_GROUP $backup_path/.backup_info"
log_success "Backup created: $backup_path"
echo "$backup_path" # Return backup path
else
log_warning "No existing deployment found, skipping backup"
echo "" # Return empty string
fi
}
cleanup_old_backups() {
log_step "Cleaning up old backups"
local backup_count=$(walter_exec "ls -1 $BACKUP_DIR/furt-lua_* 2>/dev/null | wc -l" || echo "0")
if [[ $backup_count -gt $BACKUP_RETENTION ]]; then
log_info "Found $backup_count backups, keeping last $BACKUP_RETENTION"
walter_exec_as_root "ls -1t $BACKUP_DIR/furt-lua_* | tail -n +$((BACKUP_RETENTION + 1)) | xargs rm -rf"
log_success "Old backups cleaned up"
else
log_info "Found $backup_count backups, no cleanup needed"
fi
}
list_backups() {
log_step "Available backups"
if walter_exec "ls -1 $BACKUP_DIR/furt-lua_* 2>/dev/null"; then
walter_exec "ls -1t $BACKUP_DIR/furt-lua_* | head -n 5 | while read backup; do
echo \" \$backup\"
if [ -f \"\$backup/.backup_info\" ]; then
cat \"\$backup/.backup_info\" | sed 's/^/ /'
fi
echo
done"
else
log_warning "No backups found"
fi
}
rollback_deployment() {
log_step "Rolling back deployment"
# List available backups
local latest_backup=$(walter_exec "ls -1t $BACKUP_DIR/furt-lua_* 2>/dev/null | head -n 1" || echo "")
if [[ -z "$latest_backup" ]]; then
log_error "No backups available for rollback"
exit 1
fi
log_info "Latest backup: $latest_backup"
if [[ "$FORCE" != "true" ]]; then
echo -n "Rollback to this version? [y/N]: "
read -r response
if [[ ! "$response" =~ ^[Yy]$ ]]; then
log_info "Rollback cancelled"
exit 0
fi
fi
# Stop service
stop_service
# Backup current version (before rollback)
local rollback_backup=$(create_backup)
if [[ -n "$rollback_backup" ]]; then
walter_exec_as_root "mv $rollback_backup ${rollback_backup}_pre_rollback"
fi
# Restore from backup
walter_exec_as_root "rm -rf $TARGET_DIR"
walter_exec_as_root "cp -r $latest_backup $TARGET_DIR"
# Fix permissions
fix_permissions
# Start service
start_service
# Health check
if health_check; then
log_success "Rollback completed successfully"
else
log_error "Rollback completed but health check failed"
exit 1
fi
}
# =============================================================================
# SERVICE MANAGEMENT FUNCTIONS
# =============================================================================
get_service_status() {
# Check if furt process is actually running (regardless of rcctl status)
if walter_exec "ps aux | grep -v grep | grep -q '_furt.*lua.*main.lua'"; then
echo "running"
else
echo "stopped"
fi
}
get_rcctl_status() {
# Check OpenBSD service status specifically
if walter_exec "rcctl check furt >/dev/null 2>&1"; then
echo "running"
else
echo "stopped"
fi
}
stop_service() {
log_step "Stopping furt service"
local process_status=$(get_service_status)
local rcctl_status=$(get_rcctl_status)
if [[ "$process_status" == "running" ]]; then
log_info "Furt process is running (rcctl status: $rcctl_status)"
# Try rcctl stop first if service is managed by rcctl
if [[ "$rcctl_status" == "running" ]]; then
log_info "Stopping via rcctl..."
walter_exec_as_root "rcctl stop furt" || true
sleep 2
fi
# Check if still running (manual process or rcctl didn't work)
if [[ $(get_service_status) == "running" ]]; then
log_info "Process still running, stopping manually..."
walter_exec_as_root "pkill -f -U $SERVICE_USER 'lua.*main.lua'" || true
sleep 2
fi
# Final check
local final_status=$(get_service_status)
if [[ "$final_status" == "stopped" ]]; then
log_success "Service stopped"
else
log_error "Could not stop service"
return 1
fi
else
log_info "Service already stopped"
fi
}
start_service() {
log_step "Starting furt service"
local status=$(get_service_status)
if [[ "$status" == "stopped" ]]; then
# Check port availability before starting
if ! check_port_availability; then
log_error "Cannot start service - port 8080 is occupied"
return 1
fi
log_info "Starting furt service via rcctl..."
walter_exec_as_root "rcctl start furt"
# Wait and check if service actually started
sleep 5
local process_status=$(get_service_status)
local rcctl_status=$(get_rcctl_status)
if [[ "$process_status" == "running" ]]; then
log_success "Service started successfully (rcctl: $rcctl_status, process: $process_status)"
elif [[ "$rcctl_status" == "running" ]]; then
log_warning "rcctl reports running but process not detected - checking port..."
if walter_exec "netstat -an | grep -q ':8080.*LISTEN'"; then
log_success "Service appears to be running (port 8080 active)"
else
log_error "Service failed to start properly"
# Show service logs for debugging
walter_exec "tail -10 /var/log/daemon" || true
return 1
fi
else
log_error "Failed to start service"
# Show service logs for debugging
walter_exec "tail -10 /var/log/daemon" || true
return 1
fi
else
log_info "Service already running"
fi
}
# =============================================================================
# DEPLOYMENT FUNCTIONS
# =============================================================================
prepare_source() {
log_step "Preparing source files"
# Check source directory structure
local required_dirs=("src" "config" "scripts")
for dir in "${required_dirs[@]}"; do
if [[ ! -d "$SOURCE_DIR/$dir" ]]; then
log_error "Required directory missing: $SOURCE_DIR/$dir"
exit 1
fi
done
# Check required files
local required_files=("src/main.lua" "scripts/start.sh")
for file in "${required_files[@]}"; do
if [[ ! -f "$SOURCE_DIR/$file" ]]; then
log_error "Required file missing: $SOURCE_DIR/$file"
exit 1
fi
done
log_success "Source files validated"
}
sync_files() {
log_step "Syncing files to walter"
# Prepare rsync excludes
local excludes=(
"--exclude=.env"
"--exclude=.env.*"
"--exclude=*.backup"
"--exclude=*.orig"
"--exclude=*.tmp"
"--exclude=.git/"
"--exclude=.DS_Store"
"--exclude=logs/"
"--exclude=*.log"
)
if [[ "$DRY_RUN" == "true" ]]; then
log_info "DRY RUN: Would sync the following files:"
rsync -avz --dry-run "${excludes[@]}" "$SOURCE_DIR/" "$WALTER_HOST:$TARGET_DIR/"
return 0
fi
# Create target directory and set initial ownership
walter_exec_as_root "mkdir -p $TARGET_DIR"
walter_exec_as_root "chown -R $SERVICE_USER:$SERVICE_GROUP $TARGET_DIR"
# Sync files using rsync with _furt user
log_info "Syncing files..."
rsync -avz "${excludes[@]}" \
-e "ssh" \
--rsync-path="doas -u $SERVICE_USER rsync" \
"$SOURCE_DIR/" "$WALTER_HOST:$TARGET_DIR/"
log_success "Files synced"
}
fix_service_file() {
log_step "Updating OpenBSD service file"
if [[ "$DRY_RUN" == "true" ]]; then
log_info "DRY RUN: Would update /etc/rc.d/furt"
return 0
fi
log_info "Creating correct service file..."
walter_exec_as_root "sh -c 'cat > /etc/rc.d/furt << \"EOF\"
#!/bin/ksh
daemon=\"$TARGET_DIR/scripts/start.sh\"
daemon_user=\"$SERVICE_USER\"
daemon_cwd=\"$TARGET_DIR\"
daemon_flags=\"start\"
pexp=\"lua.*main.lua\"
. /etc/rc.d/rc.subr
rc_bg=YES
rc_cmd \$1
EOF'"
# Make service file executable
walter_exec_as_root "chmod +x /etc/rc.d/furt"
# Enable service if not already enabled
if ! walter_exec "rcctl ls on | grep -q furt"; then
log_info "Enabling furt service..."
walter_exec_as_root "rcctl enable furt"
log_success "Service enabled"
else
log_info "Service already enabled"
fi
log_success "Service file updated"
}
fix_permissions() {
log_step "Fixing permissions"
if [[ "$DRY_RUN" == "true" ]]; then
log_info "DRY RUN: Would set ownership to $SERVICE_USER:$SERVICE_GROUP"
return 0
fi
# Set ownership (already done in sync_files, but ensure it's correct)
walter_exec_as_root "chown -R $SERVICE_USER:$SERVICE_GROUP $TARGET_DIR"
# Set executable permissions for scripts
walter_exec_as_root "chmod +x $TARGET_DIR/scripts/*.sh"
log_success "Permissions fixed"
}
# =============================================================================
# HEALTH CHECK FUNCTIONS
# =============================================================================
check_port_availability() {
log_step "Checking port availability"
local port="8080" # Default furt port
if walter_exec "netstat -an | grep -q ':$port'"; then
log_warning "Port $port is already in use"
walter_exec "netstat -an | grep ':$port'" || true
# Try to identify what's using the port
log_info "Checking what's using port $port..."
walter_exec "fstat 2>/dev/null | grep ':$port'" || walter_exec "netstat -anp 2>/dev/null | grep ':$port'" || true
return 1
else
log_success "Port $port is available"
return 0
fi
}
health_check() {
log_step "Running health check"
local retries=$HEALTH_RETRIES
while [[ $retries -gt 0 ]]; do
log_info "Health check attempt $((HEALTH_RETRIES - retries + 1))/$HEALTH_RETRIES"
if walter_exec "curl -s --max-time $HEALTH_TIMEOUT $HEALTH_URL >/dev/null 2>&1"; then
log_success "Health check passed"
# Get health check response
local health_response=$(walter_exec "curl -s --max-time $HEALTH_TIMEOUT $HEALTH_URL" || echo "")
if [[ -n "$health_response" ]]; then
log_info "Health response: $health_response"
fi
# Additional status info
local process_status=$(get_service_status)
local rcctl_status=$(get_rcctl_status)
log_info "Service status - Process: $process_status, rcctl: $rcctl_status"
return 0
fi
((retries--))
if [[ $retries -gt 0 ]]; then
log_info "Health check failed, retrying in 5 seconds..."
sleep 5
fi
done
log_error "Health check failed after $HEALTH_RETRIES attempts"
log_info "Debugging service status..."
local process_status=$(get_service_status)
local rcctl_status=$(get_rcctl_status)
log_info "Process status: $process_status"
log_info "rcctl status: $rcctl_status"
# Check if port is at least listening
if walter_exec "netstat -an | grep -q ':8080.*LISTEN'"; then
log_warning "Port 8080 is listening but health check failed - possible service issue"
else
log_error "Port 8080 is not listening - service not running"
fi
return 1
}
# =============================================================================
# MAIN DEPLOYMENT FUNCTION
# =============================================================================
deploy() {
log_step "Starting deployment: karl → walter"
# Pre-deployment checks
check_dependencies
prepare_source
# Show deployment summary
log_info "Deployment Summary:"
log_info " Source: $SOURCE_DIR"
log_info " Target: $WALTER_HOST:$TARGET_DIR"
log_info " Service User: $SERVICE_USER"
log_info " Dry Run: $DRY_RUN"
if [[ "$DRY_RUN" == "true" ]]; then
log_warning "DRY RUN MODE - No changes will be made"
fi
# Confirmation prompt
if [[ "$FORCE" != "true" && "$DRY_RUN" != "true" ]]; then
echo -n "Continue with deployment? [y/N]: "
read -r response
if [[ ! "$response" =~ ^[Yy]$ ]]; then
log_info "Deployment cancelled"
exit 0
fi
fi
# Create backup before deployment
local backup_path=""
if [[ "$DRY_RUN" != "true" ]]; then
backup_path=$(create_backup)
fi
# Stop service
if [[ "$DRY_RUN" != "true" ]]; then
stop_service
fi
# Deploy files
sync_files
fix_permissions
# Update service file for correct paths
fix_service_file
# Start service
if [[ "$DRY_RUN" != "true" ]]; then
# Check if port is available before starting
if ! check_port_availability; then
log_error "Cannot start service - port conflict detected"
log_info "Try: ssh walter \"doas pkill -f 'lua.*main.lua'\" to kill conflicting processes"
exit 1
fi
if start_service; then
# Health check
if health_check; then
log_success "Deployment completed successfully!"
# Cleanup old backups
cleanup_old_backups
else
log_error "Deployment failed health check"
# Offer rollback
if [[ -n "$backup_path" ]]; then
echo -n "Rollback to previous version? [y/N]: "
read -r response
if [[ "$response" =~ ^[Yy]$ ]]; then
log_info "Rolling back..."
walter_exec_as_root "rm -rf $TARGET_DIR"
walter_exec_as_root "cp -r $backup_path $TARGET_DIR"
fix_permissions
start_service
health_check
fi
fi
exit 1
fi
else
log_error "Failed to start service after deployment"
exit 1
fi
else
log_success "Dry run completed - no changes made"
fi
}
# =============================================================================
# MAIN SCRIPT LOGIC
# =============================================================================
# Parse command line arguments
DRY_RUN=false
ROLLBACK=false
FORCE=false
while [[ $# -gt 0 ]]; do
case $1 in
--dry-run)
DRY_RUN=true
shift
;;
--rollback)
ROLLBACK=true
shift
;;
--force)
FORCE=true
shift
;;
--help)
usage
exit 0
;;
*)
log_error "Unknown option: $1"
usage
exit 1
;;
esac
done
# Main execution
main() {
log_info "Furt Deployment Script - karl → walter"
log_info "$(date)"
if [[ "$ROLLBACK" == "true" ]]; then
rollback_deployment
else
deploy
fi
}
# Trap for cleanup on exit
cleanup() {
if [[ $? -ne 0 ]]; then
log_error "Deployment failed"
fi
}
trap cleanup EXIT
# Run main function
main "$@"