# 获取当前NGINX配置 get_current_config() { local config_file="/etc/nginx/nginx.conf" if [ -f "$config_file" ]; then local worker_processes=$(grep "worker_processes""$config_file" | grep -v "^#" | awk '{print $2}' | sed 's/;//') local worker_connections=$(grep "worker_connections""$config_file" | grep -v "^#" | awk '{print $2}' | sed 's/;//') echo"$worker_processes$worker_connections" else echo"auto 1024" fi }
# 计算推荐的Worker进程数 calculate_worker_processes() { local cpu_cores=$1 local workload_type=$2 local memory_limit=$3 local recommended_processes case"$workload_type"in "cpu_intensive") # CPU密集型:进程数 = CPU核心数 recommended_processes=$cpu_cores ;; "io_intensive") # I/O密集型:进程数 = CPU核心数 × 2 recommended_processes=$((cpu_cores * 2)) ;; "mixed") # 混合型:进程数 = CPU核心数 × 1.5 recommended_processes=$(echo"$cpu_cores * 1.5" | bc | cut -d. -f1) ;; "high_concurrency") # 高并发:进程数 = CPU核心数 × 2-4 recommended_processes=$((cpu_cores * 3)) ;; *) # 默认:进程数 = CPU核心数 recommended_processes=$cpu_cores ;; esac # 内存限制检查 if [ $memory_limit -gt 0 ]; then local max_processes_by_memory=$((memory_limit / 100)) # 假设每个进程100MB if [ $recommended_processes -gt $max_processes_by_memory ]; then recommended_processes=$max_processes_by_memory fi fi echo$recommended_processes }
# 计算推荐的连接数 calculate_worker_connections() { local worker_processes=$1 local expected_connections=$2 local memory_per_connection=$3 # 默认每个连接占用内存(字节) if [ -z "$memory_per_connection" ]; then memory_per_connection=8192 # 8KB fi # 计算每个进程的连接数 local connections_per_process=$((expected_connections / worker_processes)) # 内存限制检查 local available_memory=$(free -b | grep "Mem:" | awk '{print $7}') local max_connections_by_memory=$((available_memory / memory_per_connection / worker_processes)) if [ $connections_per_process -gt $max_connections_by_memory ]; then connections_per_process=$max_connections_by_memory fi # 系统限制检查 local ulimit_n=$(ulimit -n) local max_connections_by_ulimit=$((ulimit_n / worker_processes)) if [ $connections_per_process -gt $max_connections_by_ulimit ]; then connections_per_process=$max_connections_by_ulimit fi echo$connections_per_process }
# 获取系统内存使用率 get_system_memory_usage() { local memory_usage=$(free | grep Mem | awk '{printf "%.2f", $3/$2 * 100.0}') echo"$memory_usage" }
# 获取NGINX状态 get_nginx_status() { local status=$(curl -s http://localhost/nginx_status 2>/dev/null) if [ $? -eq 0 ]; then echo"$status" else echo"ERROR" fi }
# 解析NGINX状态 parse_nginx_status() { local status="$1" if [ "$status" = "ERROR" ]; then echo"0 0 0 0 0 0 0" return fi local active=$(echo"$status" | grep "Active connections" | awk '{print $3}') local accepts=$(echo"$status" | awk 'NR==3 {print $1}') local handled=$(echo"$status" | awk 'NR==3 {print $2}') local requests=$(echo"$status" | awk 'NR==3 {print $3}') local reading=$(echo"$status" | awk 'NR==4 {print $2}') local writing=$(echo"$status" | awk 'NR==4 {print $4}') local waiting=$(echo"$status" | awk 'NR==4 {print $6}') echo"$active$accepts$handled$requests$reading$writing$waiting" }
# 检查Worker进程健康状态 check_worker_health() { local worker_info=$(get_worker_info) local total_processes=$(echo$worker_info | awk '{print $1}') local worker_processes=$(echo$worker_info | awk '{print $2}') local master_processes=$(echo$worker_info | awk '{print $3}') # 检查进程数量 if [ $total_processes -eq 0 ]; then echo"CRITICAL: No NGINX processes running" return 1 fi if [ $master_processes -eq 0 ]; then echo"CRITICAL: No NGINX master process running" return 1 fi if [ $worker_processes -eq 0 ]; then echo"CRITICAL: No NGINX worker processes running" return 1 fi # 检查进程比例 local expected_workers=$(grep "worker_processes" /etc/nginx/nginx.conf | grep -v "^#" | awk '{print $2}' | sed 's/;//') if [ "$expected_workers" = "auto" ]; then local cpu_cores=$(grep -c ^processor /proc/cpuinfo) expected_workers=$cpu_cores fi if [ $worker_processes -lt $expected_workers ]; then echo"WARNING: Worker processes count ($worker_processes) is less than expected ($expected_workers)" return 1 fi echo"HEALTHY: Worker processes running normally" return 0 }
# 检查资源使用情况 check_resource_usage() { local worker_cpu=$(get_worker_cpu_usage) local worker_memory=$(get_worker_memory_usage) local system_load=$(get_system_load) local system_cpu=$(get_system_cpu_usage) local system_memory=$(get_system_memory_usage) local issues=() # 检查Worker进程CPU使用率 if (( $(echo "$worker_cpu > $CPU_THRESHOLD" | bc -l) )); then issues+=("Worker CPU usage is high: ${worker_cpu}%") fi # 检查Worker进程内存使用率 if (( $(echo "$worker_memory > $MEMORY_THRESHOLD" | bc -l) )); then issues+=("Worker memory usage is high: ${worker_memory}%") fi # 检查系统负载 if (( $(echo "$system_load > $LOAD_THRESHOLD" | bc -l) )); then issues+=("System load is high: $system_load") fi # 检查系统CPU使用率 if (( $(echo "$system_cpu > $CPU_THRESHOLD" | bc -l) )); then issues+=("System CPU usage is high: ${system_cpu}%") fi # 检查系统内存使用率 if (( $(echo "$system_memory > $MEMORY_THRESHOLD" | bc -l) )); then issues+=("System memory usage is high: ${system_memory}%") fi if [ ${#issues[@]} -gt 0 ]; then echo"WARNING: ${issues[*]}" return 1 fi echo"HEALTHY: Resource usage is normal" return 0 }
# 发送告警邮件 send_alert() { local message="$1" echo"$message" | mail -s "NGINX Worker Alert"$ALERT_EMAIL log_message "ALERT: $message" }
# 生成监控报告 generate_report() { local worker_info=$(get_worker_info) local total_processes=$(echo$worker_info | awk '{print $1}') local worker_processes=$(echo$worker_info | awk '{print $2}') local master_processes=$(echo$worker_info | awk '{print $3}') local worker_cpu=$(get_worker_cpu_usage) local worker_memory=$(get_worker_memory_usage) local worker_memory_mb=$(get_worker_memory_mb) local system_load=$(get_system_load) local system_cpu=$(get_system_cpu_usage) local system_memory=$(get_system_memory_usage) local nginx_status=$(get_nginx_status) local status_info=$(parse_nginx_status "$nginx_status") local active=$(echo$status_info | awk '{print $1}') local accepts=$(echo$status_info | awk '{print $2}') local handled=$(echo$status_info | awk '{print $3}') local requests=$(echo$status_info | awk '{print $4}') local reading=$(echo$status_info | awk '{print $5}') local writing=$(echo$status_info | awk '{print $6}') local waiting=$(echo$status_info | awk '{print $7}') cat << EOF === NGINX Worker进程监控报告 === 生成时间: $(date) === 进程信息 === 总进程数: $total_processes Worker进程数: $worker_processes Master进程数: $master_processes === Worker进程资源使用 === CPU使用率: ${worker_cpu}% 内存使用率: ${worker_memory}% 内存占用: ${worker_memory_mb}MB === 系统资源使用 === 系统负载: $system_load 系统CPU使用率: ${system_cpu}% 系统内存使用率: ${system_memory}% === NGINX状态 === 活跃连接数: $active 总接受连接数: $accepts 总处理连接数: $handled 总请求数: $requests 读取连接数: $reading 写入连接数: $writing 等待连接数: $waiting === 健康检查 === 进程健康状态: $(check_worker_health) 资源使用状态: $(check_resource_usage) EOF }
# 主监控函数 monitor_workers() { log_message "Starting NGINX worker process monitoring" # 检查Worker进程健康状态 local health_status=$(check_worker_health) if [ $? -ne 0 ]; then send_alert "$health_status" fi # 检查资源使用情况 local resource_status=$(check_resource_usage) if [ $? -ne 0 ]; then send_alert "$resource_status" fi # 生成监控报告 generate_report >> $LOG_FILE log_message "NGINX worker process monitoring completed" }
# 日志函数 log_message() { echo"$(date '+%Y-%m-%d %H:%M:%S') - $1" | tee -a $LOG_FILE }
# 备份配置文件 backup_config() { local timestamp=$(date +%Y%m%d_%H%M%S) local backup_file="$BACKUP_DIR/nginx.conf.backup.$timestamp" mkdir -p "$BACKUP_DIR" cp"$NGINX_CONFIG""$backup_file" log_message "Configuration backed up to: $backup_file" }
# 获取当前配置 get_current_config() { local worker_processes=$(grep "worker_processes""$NGINX_CONFIG" | grep -v "^#" | awk '{print $2}' | sed 's/;//') local worker_connections=$(grep "worker_connections""$NGINX_CONFIG" | grep -v "^#" | awk '{print $2}' | sed 's/;//') local worker_cpu_affinity=$(grep "worker_cpu_affinity""$NGINX_CONFIG" | grep -v "^#" | awk '{print $2}' | sed 's/;//') echo"$worker_processes$worker_connections$worker_cpu_affinity" }
# 获取系统信息 get_system_info() { local cpu_cores=$(grep -c ^processor /proc/cpuinfo) local memory_gb=$(free -g | grep "Mem:" | awk '{print $2}') local load_avg=$(uptime | awk -F'load average:''{print $2}' | awk '{print $1}' | sed 's/,//') echo"$cpu_cores$memory_gb$load_avg" }
# 分析当前性能 analyze_performance() { local current_config=$(get_current_config) local worker_processes=$(echo$current_config | awk '{print $1}') local worker_connections=$(echo$current_config | awk '{print $2}') local system_info=$(get_system_info) local cpu_cores=$(echo$system_info | awk '{print $1}') local memory_gb=$(echo$system_info | awk '{print $2}') local load_avg=$(echo$system_info | awk '{print $3}') log_message "=== 当前配置分析 ===" log_message "Worker进程数: $worker_processes" log_message "每个Worker连接数: $worker_connections" log_message "CPU核心数: $cpu_cores" log_message "内存大小: ${memory_gb}GB" log_message "系统负载: $load_avg" # 分析配置合理性 local total_connections=$((worker_processes * worker_connections)) log_message "总连接数: $total_connections" # 检查进程数配置 if [ "$worker_processes" = "auto" ]; then log_message "Worker进程数设置为auto,将使用CPU核心数: $cpu_cores" elif [ $worker_processes -gt $cpu_cores ]; then log_message "WARNING: Worker进程数($worker_processes)大于CPU核心数($cpu_cores)" elif [ $worker_processes -lt $cpu_cores ]; then log_message "INFO: Worker进程数($worker_processes)小于CPU核心数($cpu_cores),可能未充分利用CPU" fi # 检查连接数配置 local max_connections_by_memory=$((memory_gb * 1024 * 1024 / 8192)) # 假设每个连接8KB if [ $total_connections -gt $max_connections_by_memory ]; then log_message "WARNING: 总连接数($total_connections)可能超过内存限制" fi local ulimit_n=$(ulimit -n) if [ $total_connections -gt $ulimit_n ]; then log_message "WARNING: 总连接数($total_connections)超过系统文件描述符限制($ulimit_n)" fi }
# 优化Worker进程数 optimize_worker_processes() { local workload_type="$1" local system_info=$(get_system_info) local cpu_cores=$(echo$system_info | awk '{print $1}') local optimized_processes case"$workload_type"in "cpu_intensive") optimized_processes=$cpu_cores ;; "io_intensive") optimized_processes=$((cpu_cores * 2)) ;; "mixed") optimized_processes=$(echo"$cpu_cores * 1.5" | bc | cut -d. -f1) ;; "high_concurrency") optimized_processes=$((cpu_cores * 3)) ;; *) optimized_processes=$cpu_cores ;; esac log_message "优化后的Worker进程数: $optimized_processes" # 更新配置文件 sed -i "s/worker_processes.*/worker_processes $optimized_processes;/""$NGINX_CONFIG" return$optimized_processes }
# 优化Worker连接数 optimize_worker_connections() { local worker_processes="$1" local expected_connections="$2" local optimized_connections=$((expected_connections / worker_processes)) # 内存限制检查 local system_info=$(get_system_info) local memory_gb=$(echo$system_info | awk '{print $2}') local max_connections_by_memory=$((memory_gb * 1024 * 1024 / 8192 / worker_processes)) if [ $optimized_connections -gt $max_connections_by_memory ]; then optimized_connections=$max_connections_by_memory fi # 系统限制检查 local ulimit_n=$(ulimit -n) local max_connections_by_ulimit=$((ulimit_n / worker_processes)) if [ $optimized_connections -gt $max_connections_by_ulimit ]; then optimized_connections=$max_connections_by_ulimit fi log_message "优化后的每个Worker连接数: $optimized_connections" # 更新配置文件 sed -i "s/worker_connections.*/worker_connections $optimized_connections;/""$NGINX_CONFIG" return$optimized_connections }
# 优化CPU绑定 optimize_cpu_affinity() { local worker_processes="$1" local system_info=$(get_system_info) local cpu_cores=$(echo$system_info | awk '{print $1}') if [ $worker_processes -le $cpu_cores ]; then # 生成CPU绑定掩码 local affinity_mask="" for ((i=0; i<worker_processes; i++)); do local mask=$((1 << i)) local binary_mask=$(printf"%0${cpu_cores}d" $(echo"obase=2; $mask" | bc)) affinity_mask="$affinity_mask$binary_mask" done log_message "优化后的CPU绑定: $affinity_mask" # 更新配置文件 sed -i "s/worker_cpu_affinity.*/worker_cpu_affinity$affinity_mask;/""$NGINX_CONFIG" else log_message "Worker进程数大于CPU核心数,跳过CPU绑定优化" fi }