第178集NGINX可以处理百万级别请求吗

1. NGINX百万级请求处理概述

NGINX是一个高性能的Web服务器和反向代理服务器，以其出色的并发处理能力而闻名。在正确的配置和优化下，NGINX确实可以处理百万级别的并发请求。本文将详细介绍NGINX的高并发处理原理、性能优化策略以及在运维实战中的最佳实践。

1.1 NGINX高并发核心优势

事件驱动架构: 基于epoll的异步非阻塞I/O模型
内存效率: 低内存占用，高效的内存管理
CPU效率: 单线程事件循环，避免上下文切换
连接复用: 高效的连接池和keep-alive机制
负载均衡: 多种负载均衡算法
缓存机制: 内置缓存和代理缓存

1.2 NGINX性能特点

高并发: 支持数万到百万级并发连接
低延迟: 毫秒级响应时间
高吞吐: 每秒处理数十万请求
稳定性: 7x24小时稳定运行
可扩展: 支持水平扩展和垂直扩展

1.3 NGINX架构模式

单进程模式: 单进程处理所有请求
多进程模式: 多进程worker处理请求
多线程模式: 多线程处理请求
混合模式: 进程+线程混合模式

2. NGINX高并发配置

2.1 NGINX主配置文件

# NGINX主配置文件 - 高并发优化
# /etc/nginx/nginx.conf

# 运行用户
user nginx;

# 工作进程数，通常设置为CPU核心数
worker_processes auto;

# 每个工作进程的最大文件描述符数
worker_rlimit_nofile 65535;

# 错误日志
error_log /var/log/nginx/error.log warn;

# 进程ID文件
pid /var/run/nginx.pid;

# 事件模块配置
events {
    # 使用epoll事件模型（Linux）
    use epoll;
    
    # 每个工作进程的最大连接数
    worker_connections 65535;
    
    # 允许一个工作进程同时接受多个新连接
    multi_accept on;
    
    # 优化accept()系统调用
    accept_mutex off;
}

# HTTP模块配置
http {
    # 包含MIME类型定义
    include /etc/nginx/mime.types;
    default_type application/octet-stream;
    
    # 日志格式定义
    log_format main '$remote_addr - $remote_user [$time_local] "$request" '
                    '$status $body_bytes_sent "$http_referer" '
                    '"$http_user_agent" "$http_x_forwarded_for" '
                    'rt=$request_time uct="$upstream_connect_time" '
                    'uht="$upstream_header_time" urt="$upstream_response_time"';
    
    # 访问日志
    access_log /var/log/nginx/access.log main;
    
    # 性能优化配置
    # 开启高效文件传输
    sendfile on;
    
    # 优化sendfile()系统调用
    tcp_nopush on;
    
    # 优化tcp_nodelay
    tcp_nodelay on;
    
    # 保持连接超时时间
    keepalive_timeout 65;
    
    # 保持连接的最大请求数
    keepalive_requests 1000;
    
    # 客户端请求体大小限制
    client_max_body_size 100m;
    
    # 客户端请求头缓冲区大小
    client_header_buffer_size 4k;
    
    # 大请求头缓冲区大小
    large_client_header_buffers 8 16k;
    
    # 客户端请求体缓冲区大小
    client_body_buffer_size 128k;
    
    # 客户端请求体临时文件路径
    client_body_temp_path /var/cache/nginx/client_temp;
    
    # 代理临时文件路径
    proxy_temp_path /var/cache/nginx/proxy_temp;
    
    # FastCGI临时文件路径
    fastcgi_temp_path /var/cache/nginx/fastcgi_temp;
    
    # 压缩配置
    gzip on;
    gzip_vary on;
    gzip_min_length 1024;
    gzip_comp_level 6;
    gzip_types
        text/plain
        text/css
        text/xml
        text/javascript
        application/json
        application/javascript
        application/xml+rss
        application/atom+xml
        image/svg+xml;
    
    # 缓存配置
    open_file_cache max=10000 inactive=20s;
    open_file_cache_valid 30s;
    open_file_cache_min_uses 2;
    open_file_cache_errors on;
    
    # 限制连接数
    limit_conn_zone $binary_remote_addr zone=conn_limit_per_ip:10m;
    limit_conn conn_limit_per_ip 20;
    
    # 限制请求频率
    limit_req_zone $binary_remote_addr zone=req_limit_per_ip:10m rate=10r/s;
    limit_req zone=req_limit_per_ip burst=20 nodelay;
    
    # 上游服务器组
    upstream backend {
        # 负载均衡算法
        least_conn;
        
        # 服务器列表
        server 192.168.1.10:8080 weight=3 max_fails=3 fail_timeout=30s;
        server 192.168.1.11:8080 weight=3 max_fails=3 fail_timeout=30s;
        server 192.168.1.12:8080 weight=2 max_fails=3 fail_timeout=30s;
        
        # 保持连接
        keepalive 32;
        keepalive_requests 1000;
        keepalive_timeout 60s;
    }
    
    # 缓存配置
    proxy_cache_path /var/cache/nginx levels=1:2 keys_zone=my_cache:10m max_size=1g inactive=60m use_temp_path=off;
    
    # 主服务器配置
    server {
        listen 80;
        server_name example.com;
        
        # 安全头
        add_header X-Frame-Options DENY;
        add_header X-Content-Type-Options nosniff;
        add_header X-XSS-Protection "1; mode=block";
        
        # 静态文件处理
        location ~* \.(jpg|jpeg|png|gif|ico|css|js)$ {
            expires 1y;
            add_header Cache-Control "public, immutable";
            access_log off;
        }
        
        # API接口代理
        location /api/ {
            # 限制请求方法
            limit_except GET POST {
                deny all;
            }
            
            # 代理配置
            proxy_pass http://backend;
            proxy_set_header Host $host;
            proxy_set_header X-Real-IP $remote_addr;
            proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
            proxy_set_header X-Forwarded-Proto $scheme;
            
            # 超时配置
            proxy_connect_timeout 5s;
            proxy_send_timeout 10s;
            proxy_read_timeout 10s;
            
            # 缓冲配置
            proxy_buffering on;
            proxy_buffer_size 4k;
            proxy_buffers 8 4k;
            proxy_busy_buffers_size 8k;
            
            # 缓存配置
            proxy_cache my_cache;
            proxy_cache_valid 200 302 10m;
            proxy_cache_valid 404 1m;
            proxy_cache_use_stale error timeout updating http_500 http_502 http_503 http_504;
            proxy_cache_lock on;
            
            # 健康检查
            proxy_next_upstream error timeout invalid_header http_500 http_502 http_503 http_504;
            proxy_next_upstream_tries 3;
            proxy_next_upstream_timeout 10s;
        }
        
        # 健康检查端点
        location /health {
            access_log off;
            return 200 "healthy\n";
            add_header Content-Type text/plain;
        }
        
        # 状态监控端点
        location /nginx_status {
            stub_status on;
            access_log off;
            allow 127.0.0.1;
            allow 192.168.1.0/24;
            deny all;
        }
    }
}

2.2 NGINX性能优化配置

# NGINX性能优化配置文件
# /etc/nginx/conf.d/performance.conf

# 工作进程优化
worker_processes auto;
worker_cpu_affinity auto;
worker_rlimit_nofile 65535;

# 事件模型优化
events {
    use epoll;
    worker_connections 65535;
    multi_accept on;
    accept_mutex off;
}

# HTTP模块性能优化
http {
    # 基础优化
    sendfile on;
    tcp_nopush on;
    tcp_nodelay on;
    
    # 连接优化
    keepalive_timeout 75;
    keepalive_requests 1000;
    keepalive_disable msie6;
    
    # 缓冲区优化
    client_header_buffer_size 4k;
    large_client_header_buffers 8 16k;
    client_body_buffer_size 128k;
    client_max_body_size 100m;
    
    # 临时文件优化
    client_body_temp_path /var/cache/nginx/client_temp 1 2;
    proxy_temp_path /var/cache/nginx/proxy_temp 1 2;
    fastcgi_temp_path /var/cache/nginx/fastcgi_temp 1 2;
    
    # 压缩优化
    gzip on;
    gzip_vary on;
    gzip_min_length 1024;
    gzip_comp_level 6;
    gzip_proxied any;
    gzip_types
        text/plain
        text/css
        text/xml
        text/javascript
        application/json
        application/javascript
        application/xml+rss
        application/atom+xml
        image/svg+xml;
    
    # 文件缓存优化
    open_file_cache max=10000 inactive=20s;
    open_file_cache_valid 30s;
    open_file_cache_min_uses 2;
    open_file_cache_errors on;
    
    # 限制优化
    limit_conn_zone $binary_remote_addr zone=conn_limit_per_ip:10m;
    limit_conn conn_limit_per_ip 20;
    
    limit_req_zone $binary_remote_addr zone=req_limit_per_ip:10m rate=10r/s;
    limit_req zone=req_limit_per_ip burst=20 nodelay;
    
    # 上游连接优化
    upstream backend {
        least_conn;
        server 192.168.1.10:8080 weight=3 max_fails=3 fail_timeout=30s;
        server 192.168.1.11:8080 weight=3 max_fails=3 fail_timeout=30s;
        server 192.168.1.12:8080 weight=2 max_fails=3 fail_timeout=30s;
        
        keepalive 32;
        keepalive_requests 1000;
        keepalive_timeout 60s;
    }
    
    # 代理缓存优化
    proxy_cache_path /var/cache/nginx levels=1:2 keys_zone=my_cache:10m max_size=1g inactive=60m use_temp_path=off;
    
    # 服务器配置优化
    server {
        listen 80;
        server_name example.com;
        
        # 静态文件优化
        location ~* \.(jpg|jpeg|png|gif|ico|css|js|woff|woff2|ttf|eot|svg)$ {
            expires 1y;
            add_header Cache-Control "public, immutable";
            access_log off;
            
            # 压缩优化
            gzip_static on;
        }
        
        # API代理优化
        location /api/ {
            # 请求限制
            limit_except GET POST {
                deny all;
            }
            
            # 代理配置
            proxy_pass http://backend;
            proxy_set_header Host $host;
            proxy_set_header X-Real-IP $remote_addr;
            proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
            proxy_set_header X-Forwarded-Proto $scheme;
            
            # 超时优化
            proxy_connect_timeout 5s;
            proxy_send_timeout 10s;
            proxy_read_timeout 10s;
            
            # 缓冲优化
            proxy_buffering on;
            proxy_buffer_size 4k;
            proxy_buffers 8 4k;
            proxy_busy_buffers_size 8k;
            proxy_temp_file_write_size 8k;
            
            # 缓存优化
            proxy_cache my_cache;
            proxy_cache_valid 200 302 10m;
            proxy_cache_valid 404 1m;
            proxy_cache_use_stale error timeout updating http_500 http_502 http_503 http_504;
            proxy_cache_lock on;
            proxy_cache_lock_timeout 5s;
            
            # 健康检查优化
            proxy_next_upstream error timeout invalid_header http_500 http_502 http_503 http_504;
            proxy_next_upstream_tries 3;
            proxy_next_upstream_timeout 10s;
        }
        
        # 健康检查
        location /health {
            access_log off;
            return 200 "healthy\n";
            add_header Content-Type text/plain;
        }
        
        # 状态监控
        location /nginx_status {
            stub_status on;
            access_log off;
            allow 127.0.0.1;
            allow 192.168.1.0/24;
            deny all;
        }
    }
}

2.3 NGINX负载均衡配置

# NGINX负载均衡配置文件
# /etc/nginx/conf.d/loadbalance.conf

# 上游服务器组配置
upstream web_servers {
    # 负载均衡算法
    least_conn;
    
    # 服务器配置
    server 192.168.1.10:8080 weight=3 max_fails=3 fail_timeout=30s;
    server 192.168.1.11:8080 weight=3 max_fails=3 fail_timeout=30s;
    server 192.168.1.12:8080 weight=2 max_fails=3 fail_timeout=30s;
    server 192.168.1.13:8080 weight=2 max_fails=3 fail_timeout=30s;
    
    # 保持连接配置
    keepalive 32;
    keepalive_requests 1000;
    keepalive_timeout 60s;
}

# API服务器组
upstream api_servers {
    # 负载均衡算法
    ip_hash;
    
    # 服务器配置
    server 192.168.1.20:8080 weight=2 max_fails=3 fail_timeout=30s;
    server 192.168.1.21:8080 weight=2 max_fails=3 fail_timeout=30s;
    server 192.168.1.22:8080 weight=1 max_fails=3 fail_timeout=30s;
    
    # 保持连接配置
    keepalive 16;
    keepalive_requests 500;
    keepalive_timeout 30s;
}

# 静态文件服务器组
upstream static_servers {
    # 负载均衡算法
    round_robin;
    
    # 服务器配置
    server 192.168.1.30:8080 weight=1 max_fails=3 fail_timeout=30s;
    server 192.168.1.31:8080 weight=1 max_fails=3 fail_timeout=30s;
    
    # 保持连接配置
    keepalive 8;
    keepalive_requests 200;
    keepalive_timeout 15s;
}

# 主服务器配置
server {
    listen 80;
    server_name example.com;
    
    # Web服务器代理
    location / {
        proxy_pass http://web_servers;
        proxy_set_header Host $host;
        proxy_set_header X-Real-IP $remote_addr;
        proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
        proxy_set_header X-Forwarded-Proto $scheme;
        
        # 超时配置
        proxy_connect_timeout 5s;
        proxy_send_timeout 10s;
        proxy_read_timeout 10s;
        
        # 缓冲配置
        proxy_buffering on;
        proxy_buffer_size 4k;
        proxy_buffers 8 4k;
        proxy_busy_buffers_size 8k;
        
        # 健康检查
        proxy_next_upstream error timeout invalid_header http_500 http_502 http_503 http_504;
        proxy_next_upstream_tries 3;
        proxy_next_upstream_timeout 10s;
    }
    
    # API服务器代理
    location /api/ {
        proxy_pass http://api_servers;
        proxy_set_header Host $host;
        proxy_set_header X-Real-IP $remote_addr;
        proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
        proxy_set_header X-Forwarded-Proto $scheme;
        
        # 超时配置
        proxy_connect_timeout 3s;
        proxy_send_timeout 5s;
        proxy_read_timeout 5s;
        
        # 缓冲配置
        proxy_buffering on;
        proxy_buffer_size 2k;
        proxy_buffers 4 2k;
        proxy_busy_buffers_size 4k;
        
        # 健康检查
        proxy_next_upstream error timeout invalid_header http_500 http_502 http_503 http_504;
        proxy_next_upstream_tries 2;
        proxy_next_upstream_timeout 5s;
    }
    
    # 静态文件代理
    location /static/ {
        proxy_pass http://static_servers;
        proxy_set_header Host $host;
        proxy_set_header X-Real-IP $remote_addr;
        proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
        proxy_set_header X-Forwarded-Proto $scheme;
        
        # 超时配置
        proxy_connect_timeout 2s;
        proxy_send_timeout 3s;
        proxy_read_timeout 3s;
        
        # 缓冲配置
        proxy_buffering on;
        proxy_buffer_size 1k;
        proxy_buffers 2 1k;
        proxy_busy_buffers_size 2k;
        
        # 健康检查
        proxy_next_upstream error timeout invalid_header http_500 http_502 http_503 http_504;
        proxy_next_upstream_tries 1;
        proxy_next_upstream_timeout 3s;
    }
    
    # 健康检查端点
    location /health {
        access_log off;
        return 200 "healthy\n";
        add_header Content-Type text/plain;
    }
    
    # 状态监控端点
    location /nginx_status {
        stub_status on;
        access_log off;
        allow 127.0.0.1;
        allow 192.168.1.0/24;
        deny all;
    }
}

3. NGINX监控和测试

3.1 NGINX状态监控脚本

#!/bin/bash
# NGINX状态监控脚本
# /opt/scripts/nginx_monitor.sh

# 配置变量
NGINX_STATUS_URL="http://localhost/nginx_status"
LOG_FILE="/var/log/nginx_monitor.log"
ALERT_EMAIL="admin@example.com"
ALERT_THRESHOLD=1000

# 日志函数
log_message() {
    echo "$(date '+%Y-%m-%d %H:%M:%S') - $1" >> $LOG_FILE
}

# 获取NGINX状态
get_nginx_status() {
    local status=$(curl -s $NGINX_STATUS_URL 2>/dev/null)
    if [ $? -eq 0 ]; then
        echo "$status"
    else
        echo "ERROR"
    fi
}

# 解析状态信息
parse_status() {
    local status="$1"
    if [ "$status" = "ERROR" ]; then
        echo "0 0 0 0"
        return
    fi
    
    # 解析状态信息
    local active=$(echo "$status" | grep "Active connections" | awk '{print $3}')
    local accepts=$(echo "$status" | awk 'NR==3 {print $1}')
    local handled=$(echo "$status" | awk 'NR==3 {print $2}')
    local requests=$(echo "$status" | awk 'NR==3 {print $3}')
    
    echo "$active $accepts $handled $requests"
}

# 检查NGINX进程
check_nginx_process() {
    local process_count=$(pgrep -c nginx)
    if [ $process_count -gt 0 ]; then
        echo "RUNNING"
    else
        echo "STOPPED"
    fi
}

# 检查NGINX配置
check_nginx_config() {
    nginx -t >/dev/null 2>&1
    if [ $? -eq 0 ]; then
        echo "VALID"
    else
        echo "INVALID"
    fi
}

# 获取系统负载
get_system_load() {
    uptime | awk -F'load average:' '{print $2}' | awk '{print $1}' | sed 's/,//'
}

# 获取内存使用率
get_memory_usage() {
    free | grep Mem | awk '{printf "%.2f", $3/$2 * 100.0}'
}

# 获取CPU使用率
get_cpu_usage() {
    top -bn1 | grep "Cpu(s)" | awk '{print $2}' | awk -F'%' '{print $1}'
}

# 发送告警邮件
send_alert() {
    local message="$1"
    echo "$message" | mail -s "NGINX Alert" $ALERT_EMAIL
    log_message "ALERT: $message"
}

# 主监控函数
monitor_nginx() {
    log_message "Starting NGINX monitoring"
    
    # 检查NGINX进程
    local process_status=$(check_nginx_process)
    if [ "$process_status" = "STOPPED" ]; then
        send_alert "NGINX process is not running"
        return 1
    fi
    
    # 检查NGINX配置
    local config_status=$(check_nginx_config)
    if [ "$config_status" = "INVALID" ]; then
        send_alert "NGINX configuration is invalid"
        return 1
    fi
    
    # 获取NGINX状态
    local nginx_status=$(get_nginx_status)
    if [ "$nginx_status" = "ERROR" ]; then
        send_alert "Cannot get NGINX status"
        return 1
    fi
    
    # 解析状态信息
    local status_info=$(parse_status "$nginx_status")
    local active=$(echo $status_info | awk '{print $1}')
    local accepts=$(echo $status_info | awk '{print $2}')
    local handled=$(echo $status_info | awk '{print $3}')
    local requests=$(echo $status_info | awk '{print $4}')
    
    # 获取系统信息
    local system_load=$(get_system_load)
    local memory_usage=$(get_memory_usage)
    local cpu_usage=$(get_cpu_usage)
    
    # 记录状态信息
    log_message "NGINX Status - Active: $active, Accepts: $accepts, Handled: $handled, Requests: $requests"
    log_message "System Status - Load: $system_load, Memory: ${memory_usage}%, CPU: ${cpu_usage}%"
    
    # 检查告警条件
    if [ $active -gt $ALERT_THRESHOLD ]; then
        send_alert "NGINX active connections exceed threshold: $active > $ALERT_THRESHOLD"
    fi
    
    if (( $(echo "$memory_usage > 90" | bc -l) )); then
        send_alert "Memory usage is high: ${memory_usage}%"
    fi
    
    if (( $(echo "$cpu_usage > 90" | bc -l) )); then
        send_alert "CPU usage is high: ${cpu_usage}%"
    fi
    
    return 0
}

# 主函数
main() {
    case "$1" in
        "monitor")
            monitor_nginx
            ;;
        "status")
            get_nginx_status
            ;;
        "process")
            check_nginx_process
            ;;
        "config")
            check_nginx_config
            ;;
        *)
            echo "Usage: $0 {monitor|status|process|config}"
            exit 1
            ;;
    esac
}

# 执行主函数
main "$@"

3.2 NGINX压力测试脚本

#!/bin/bash
# NGINX压力测试脚本
# /opt/scripts/nginx_stress_test.sh

# 配置变量
TARGET_URL="http://localhost"
CONCURRENT_USERS=1000
TEST_DURATION=60
REQUEST_RATE=1000
LOG_FILE="/var/log/nginx_stress_test.log"

# 日志函数
log_message() {
    echo "$(date '+%Y-%m-%d %H:%M:%S') - $1" | tee -a $LOG_FILE
}

# 检查依赖
check_dependencies() {
    local missing_deps=()
    
    if ! command -v ab &> /dev/null; then
        missing_deps+=("apache2-utils")
    fi
    
    if ! command -v wrk &> /dev/null; then
        missing_deps+=("wrk")
    fi
    
    if ! command -v curl &> /dev/null; then
        missing_deps+=("curl")
    fi
    
    if [ ${#missing_deps[@]} -gt 0 ]; then
        log_message "Missing dependencies: ${missing_deps[*]}"
        log_message "Please install: apt-get install ${missing_deps[*]}"
        return 1
    fi
    
    return 0
}

# Apache Bench测试
run_ab_test() {
    local url="$1"
    local concurrent="$2"
    local requests="$3"
    
    log_message "Running Apache Bench test - URL: $url, Concurrent: $concurrent, Requests: $requests"
    
    ab -n $requests -c $concurrent -k -H "Accept-Encoding: gzip" "$url" > /tmp/ab_result.txt 2>&1
    
    if [ $? -eq 0 ]; then
        local rps=$(grep "Requests per second" /tmp/ab_result.txt | awk '{print $4}')
        local avg_time=$(grep "Time per request" /tmp/ab_result.txt | head -1 | awk '{print $4}')
        local failed_requests=$(grep "Failed requests" /tmp/ab_result.txt | awk '{print $3}')
        
        log_message "AB Test Results - RPS: $rps, Avg Time: ${avg_time}ms, Failed: $failed_requests"
        
        # 检查结果
        if [ $failed_requests -gt 0 ]; then
            log_message "WARNING: $failed_requests failed requests detected"
        fi
        
        return 0
    else
        log_message "ERROR: Apache Bench test failed"
        return 1
    fi
}

# WRK测试
run_wrk_test() {
    local url="$1"
    local threads="$2"
    local connections="$3"
    local duration="$4"
    
    log_message "Running WRK test - URL: $url, Threads: $threads, Connections: $connections, Duration: ${duration}s"
    
    wrk -t$threads -c$connections -d${duration}s --latency "$url" > /tmp/wrk_result.txt 2>&1
    
    if [ $? -eq 0 ]; then
        local rps=$(grep "Requests/sec" /tmp/wrk_result.txt | awk '{print $2}')
        local avg_latency=$(grep "Latency" /tmp/wrk_result.txt | awk '{print $2}')
        local max_latency=$(grep "Latency" /tmp/wrk_result.txt | awk '{print $4}')
        
        log_message "WRK Test Results - RPS: $rps, Avg Latency: $avg_latency, Max Latency: $max_latency"
        
        return 0
    else
        log_message "ERROR: WRK test failed"
        return 1
    fi
}

# 自定义压力测试
run_custom_test() {
    local url="$1"
    local concurrent="$2"
    local duration="$3"
    local rate="$4"
    
    log_message "Running custom test - URL: $url, Concurrent: $concurrent, Duration: ${duration}s, Rate: $rate/s"
    
    # 创建测试脚本
    cat > /tmp/stress_test.lua << EOF
wrk.method = "GET"
wrk.headers["Accept-Encoding"] = "gzip"

local counter = 0
local threads = {}

function setup(thread)
    thread:set("id", counter)
    table.insert(threads, thread)
    counter = counter + 1
end

function init(args)
    requests = 0
    responses = 0
    errors = 0
end

function request()
    requests = requests + 1
    return wrk.request()
end

function response(status, headers, body)
    responses = responses + 1
    if status ~= 200 then
        errors = errors + 1
    end
end

function done(summary, latency, requests)
    print("Custom Test Results:")
    print("Total Requests: " .. summary.requests)
    print("Total Responses: " .. summary.responses)
    print("Total Errors: " .. summary.errors)
    print("Requests/sec: " .. summary.requests / (summary.duration / 1000000))
    print("Avg Latency: " .. latency.mean / 1000 .. "ms")
    print("Max Latency: " .. latency.max / 1000 .. "ms")
end
EOF
    
    wrk -t$concurrent -c$concurrent -d${duration}s -s /tmp/stress_test.lua "$url" > /tmp/custom_result.txt 2>&1
    
    if [ $? -eq 0 ]; then
        log_message "Custom test completed successfully"
        cat /tmp/custom_result.txt >> $LOG_FILE
        return 0
    else
        log_message "ERROR: Custom test failed"
        return 1
    fi
}

# 监控系统资源
monitor_resources() {
    local duration="$1"
    local interval=5
    
    log_message "Starting resource monitoring for ${duration}s"
    
    for ((i=0; i<duration; i+=interval)); do
        local timestamp=$(date '+%Y-%m-%d %H:%M:%S')
        local load=$(uptime | awk -F'load average:' '{print $2}' | awk '{print $1}' | sed 's/,//')
        local memory=$(free | grep Mem | awk '{printf "%.2f", $3/$2 * 100.0}')
        local cpu=$(top -bn1 | grep "Cpu(s)" | awk '{print $2}' | awk -F'%' '{print $1}')
        
        log_message "Resource Monitor - Time: $timestamp, Load: $load, Memory: ${memory}%, CPU: ${cpu}%"
        
        sleep $interval
    done
}

# 主测试函数
run_stress_test() {
    log_message "Starting NGINX stress test"
    
    # 检查依赖
    if ! check_dependencies; then
        return 1
    fi
    
    # 检查目标URL
    if ! curl -s -o /dev/null -w "%{http_code}" "$TARGET_URL" | grep -q "200"; then
        log_message "ERROR: Target URL is not accessible: $TARGET_URL"
        return 1
    fi
    
    # 开始资源监控
    monitor_resources $TEST_DURATION &
    local monitor_pid=$!
    
    # 运行测试
    log_message "Running stress tests..."
    
    # Apache Bench测试
    run_ab_test "$TARGET_URL" $CONCURRENT_USERS $((CONCURRENT_USERS * 10))
    
    # WRK测试
    run_wrk_test "$TARGET_URL" $((CONCURRENT_USERS / 10)) $CONCURRENT_USERS $TEST_DURATION
    
    # 自定义测试
    run_custom_test "$TARGET_URL" $CONCURRENT_USERS $TEST_DURATION $REQUEST_RATE
    
    # 等待资源监控完成
    wait $monitor_pid
    
    log_message "NGINX stress test completed"
    
    return 0
}

# 主函数
main() {
    case "$1" in
        "test")
            run_stress_test
            ;;
        "ab")
            run_ab_test "$2" "$3" "$4"
            ;;
        "wrk")
            run_wrk_test "$2" "$3" "$4" "$5"
            ;;
        "custom")
            run_custom_test "$2" "$3" "$4" "$5"
            ;;
        "monitor")
            monitor_resources "$2"
            ;;
        *)
            echo "Usage: $0 {test|ab|wrk|custom|monitor}"
            echo "  test - Run full stress test"
            echo "  ab <url> <concurrent> <requests> - Run Apache Bench test"
            echo "  wrk <url> <threads> <connections> <duration> - Run WRK test"
            echo "  custom <url> <concurrent> <duration> <rate> - Run custom test"
            echo "  monitor <duration> - Monitor system resources"
            exit 1
            ;;
    esac
}

# 执行主函数
main "$@"

3.3 NGINX性能分析脚本

#!/bin/bash
# NGINX性能分析脚本
# /opt/scripts/nginx_performance_analysis.sh

# 配置变量
NGINX_STATUS_URL="http://localhost/nginx_status"
LOG_FILE="/var/log/nginx_performance.log"
ANALYSIS_FILE="/var/log/nginx_analysis.txt"

# 日志函数
log_message() {
    echo "$(date '+%Y-%m-%d %H:%M:%S') - $1" | tee -a $LOG_FILE
}

# 获取NGINX状态
get_nginx_status() {
    curl -s "$NGINX_STATUS_URL" 2>/dev/null
}

# 分析NGINX性能
analyze_performance() {
    local status=$(get_nginx_status)
    
    if [ -z "$status" ]; then
        log_message "ERROR: Cannot get NGINX status"
        return 1
    fi
    
    # 解析状态信息
    local active=$(echo "$status" | grep "Active connections" | awk '{print $3}')
    local accepts=$(echo "$status" | awk 'NR==3 {print $1}')
    local handled=$(echo "$status" | awk 'NR==3 {print $2}')
    local requests=$(echo "$status" | awk 'NR==3 {print $3}')
    local reading=$(echo "$status" | awk 'NR==4 {print $2}')
    local writing=$(echo "$status" | awk 'NR==4 {print $4}')
    local waiting=$(echo "$status" | awk 'NR==4 {print $6}')
    
    # 计算性能指标
    local acceptance_rate=0
    if [ $accepts -gt 0 ]; then
        acceptance_rate=$(echo "scale=2; $handled * 100 / $accepts" | bc)
    fi
    
    local request_rate=0
    if [ $handled -gt 0 ]; then
        request_rate=$(echo "scale=2; $requests * 100 / $handled" | bc)
    fi
    
    # 生成分析报告
    cat > $ANALYSIS_FILE << EOF
NGINX Performance Analysis Report
Generated: $(date)
================================

Current Status:
- Active Connections: $active
- Total Accepts: $accepts
- Total Handled: $handled
- Total Requests: $requests
- Reading: $reading
- Writing: $writing
- Waiting: $waiting

Performance Metrics:
- Acceptance Rate: ${acceptance_rate}%
- Request Rate: ${request_rate}%
- Active Connection Ratio: $(echo "scale=2; $active * 100 / $handled" | bc)%

Connection Analysis:
- Reading Connections: $reading
- Writing Connections: $writing
- Waiting Connections: $waiting
- Active Connection Distribution:
  * Reading: $(echo "scale=1; $reading * 100 / $active" | bc)%
  * Writing: $(echo "scale=1; $writing * 100 / $active" | bc)%
  * Waiting: $(echo "scale=1; $waiting * 100 / $active" | bc)%

Performance Assessment:
EOF
    
    # 性能评估
    if [ $active -lt 100 ]; then
        echo "- Connection Load: LOW" >> $ANALYSIS_FILE
    elif [ $active -lt 1000 ]; then
        echo "- Connection Load: MEDIUM" >> $ANALYSIS_FILE
    else
        echo "- Connection Load: HIGH" >> $ANALYSIS_FILE
    fi
    
    if (( $(echo "$acceptance_rate > 95" | bc -l) )); then
        echo "- Acceptance Rate: EXCELLENT" >> $ANALYSIS_FILE
    elif (( $(echo "$acceptance_rate > 90" | bc -l) )); then
        echo "- Acceptance Rate: GOOD" >> $ANALYSIS_FILE
    else
        echo "- Acceptance Rate: POOR" >> $ANALYSIS_FILE
    fi
    
    if (( $(echo "$request_rate > 95" | bc -l) )); then
        echo "- Request Rate: EXCELLENT" >> $ANALYSIS_FILE
    elif (( $(echo "$request_rate > 90" | bc -l) )); then
        echo "- Request Rate: GOOD" >> $ANALYSIS_FILE
    else
        echo "- Request Rate: POOR" >> $ANALYSIS_FILE
    fi
    
    # 添加建议
    echo "" >> $ANALYSIS_FILE
    echo "Recommendations:" >> $ANALYSIS_FILE
    
    if [ $active -gt 1000 ]; then
        echo "- Consider increasing worker_processes" >> $ANALYSIS_FILE
        echo "- Consider increasing worker_connections" >> $ANALYSIS_FILE
    fi
    
    if (( $(echo "$acceptance_rate < 90" | bc -l) )); then
        echo "- Check upstream server health" >> $ANALYSIS_FILE
        echo "- Review load balancing configuration" >> $ANALYSIS_FILE
    fi
    
    if (( $(echo "$request_rate < 90" | bc -l) )); then
        echo "- Check application performance" >> $ANALYSIS_FILE
        echo "- Review proxy configuration" >> $ANALYSIS_FILE
    fi
    
    if [ $waiting -gt $((active / 2)) ]; then
        echo "- Consider enabling keepalive" >> $ANALYSIS_FILE
        echo "- Review connection pooling" >> $ANALYSIS_FILE
    fi
    
    log_message "Performance analysis completed. Report saved to $ANALYSIS_FILE"
    
    return 0
}

# 监控性能趋势
monitor_trends() {
    local duration="$1"
    local interval=10
    
    log_message "Starting performance trend monitoring for ${duration}s"
    
    local start_time=$(date +%s)
    local end_time=$((start_time + duration))
    
    while [ $(date +%s) -lt $end_time ]; do
        local status=$(get_nginx_status)
        
        if [ -n "$status" ]; then
            local active=$(echo "$status" | grep "Active connections" | awk '{print $3}')
            local accepts=$(echo "$status" | awk 'NR==3 {print $1}')
            local handled=$(echo "$status" | awk 'NR==3 {print $2}')
            local requests=$(echo "$status" | awk 'NR==3 {print $3}')
            
            log_message "Trend Monitor - Active: $active, Accepts: $accepts, Handled: $handled, Requests: $requests"
        fi
        
        sleep $interval
    done
    
    log_message "Performance trend monitoring completed"
}

# 主函数
main() {
    case "$1" in
        "analyze")
            analyze_performance
            ;;
        "trends")
            monitor_trends "$2"
            ;;
        "status")
            get_nginx_status
            ;;
        *)
            echo "Usage: $0 {analyze|trends|status}"
            echo "  analyze - Analyze NGINX performance"
            echo "  trends <duration> - Monitor performance trends"
            echo "  status - Get current NGINX status"
            exit 1
            ;;
    esac
}

# 执行主函数
main "$@"

4. NGINX优化建议

4.1 系统级优化

#!/bin/bash
# NGINX系统级优化脚本
# /opt/scripts/nginx_system_optimization.sh

# 日志函数
log_message() {
    echo "$(date '+%Y-%m-%d %H:%M:%S') - $1"
}

# 优化系统参数
optimize_system_params() {
    log_message "Optimizing system parameters"
    
    # 优化文件描述符限制
    echo "* soft nofile 65535" >> /etc/security/limits.conf
    echo "* hard nofile 65535" >> /etc/security/limits.conf
    
    # 优化内核参数
    cat >> /etc/sysctl.conf << EOF
# NGINX优化参数
net.core.somaxconn = 65535
net.core.netdev_max_backlog = 5000
net.ipv4.tcp_max_syn_backlog = 65535
net.ipv4.tcp_fin_timeout = 30
net.ipv4.tcp_keepalive_time = 1200
net.ipv4.tcp_keepalive_intvl = 15
net.ipv4.tcp_keepalive_probes = 5
net.ipv4.tcp_tw_reuse = 1
net.ipv4.tcp_tw_recycle = 1
net.ipv4.tcp_max_tw_buckets = 5000
net.ipv4.ip_local_port_range = 1024 65535
net.ipv4.tcp_rmem = 4096 87380 16777216
net.ipv4.tcp_wmem = 4096 65536 16777216
net.ipv4.tcp_congestion_control = bbr
EOF
    
    # 应用内核参数
    sysctl -p
    
    log_message "System parameters optimized"
}

# 优化NGINX配置
optimize_nginx_config() {
    log_message "Optimizing NGINX configuration"
    
    # 备份原配置
    cp /etc/nginx/nginx.conf /etc/nginx/nginx.conf.backup
    
    # 优化工作进程
    sed -i 's/worker_processes.*/worker_processes auto;/' /etc/nginx/nginx.conf
    
    # 优化事件模型
    sed -i '/events {/,/}/c\
events {\
    use epoll;\
    worker_connections 65535;\
    multi_accept on;\
    accept_mutex off;\
}' /etc/nginx/nginx.conf
    
    # 优化HTTP模块
    sed -i '/http {/,/}/c\
http {\
    include /etc/nginx/mime.types;\
    default_type application/octet-stream;\
    sendfile on;\
    tcp_nopush on;\
    tcp_nodelay on;\
    keepalive_timeout 65;\
    keepalive_requests 1000;\
    client_max_body_size 100m;\
    client_header_buffer_size 4k;\
    large_client_header_buffers 8 16k;\
    client_body_buffer_size 128k;\
    gzip on;\
    gzip_vary on;\
    gzip_min_length 1024;\
    gzip_comp_level 6;\
    gzip_types text/plain text/css text/xml text/javascript application/json application/javascript application/xml+rss application/atom+xml image/svg+xml;\
    open_file_cache max=10000 inactive=20s;\
    open_file_cache_valid 30s;\
    open_file_cache_min_uses 2;\
    open_file_cache_errors on;\
}' /etc/nginx/nginx.conf
    
    # 测试配置
    nginx -t
    
    if [ $? -eq 0 ]; then
        log_message "NGINX configuration optimized successfully"
        return 0
    else
        log_message "ERROR: NGINX configuration optimization failed"
        # 恢复备份
        cp /etc/nginx/nginx.conf.backup /etc/nginx/nginx.conf
        return 1
    fi
}

# 优化文件系统
optimize_filesystem() {
    log_message "Optimizing filesystem"
    
    # 创建NGINX缓存目录
    mkdir -p /var/cache/nginx/{client_temp,proxy_temp,fastcgi_temp}
    chown -R nginx:nginx /var/cache/nginx
    chmod -R 755 /var/cache/nginx
    
    # 优化日志目录
    mkdir -p /var/log/nginx
    chown -R nginx:nginx /var/log/nginx
    chmod -R 755 /var/log/nginx
    
    log_message "Filesystem optimized"
}

# 主函数
main() {
    case "$1" in
        "system")
            optimize_system_params
            ;;
        "nginx")
            optimize_nginx_config
            ;;
        "filesystem")
            optimize_filesystem
            ;;
        "all")
            optimize_system_params
            optimize_nginx_config
            optimize_filesystem
            ;;
        *)
            echo "Usage: $0 {system|nginx|filesystem|all}"
            exit 1
            ;;
    esac
}

# 执行主函数
main "$@"

5. 总结

5.1 NGINX百万级请求处理总结

架构优势: NGINX的事件驱动架构支持高并发处理
配置优化: 合理的配置参数是性能的关键
系统优化: 系统级优化提升整体性能
监控管理: 实时监控确保系统稳定运行
负载均衡: 有效的负载均衡分散请求压力
缓存策略: 合理的缓存策略减少后端压力

5.2 NGINX性能优化要点

工作进程: 合理设置工作进程数量
连接数: 优化最大连接数配置
缓冲区: 调整缓冲区大小
压缩: 启用gzip压缩
缓存: 配置文件缓存和代理缓存
负载均衡: 选择合适的负载均衡算法

5.3 最佳实践建议

监控系统: 实时监控系统状态和性能
压力测试: 定期进行压力测试
配置优化: 根据实际需求优化配置
系统调优: 进行系统级性能调优
故障处理: 建立完善的故障处理机制
容量规划: 合理规划系统容量

通过本文的NGINX百万级请求处理运维实战指南，您可以掌握NGINX的高并发处理原理、性能优化策略、监控管理以及在企业级应用中的最佳实践，构建高效、稳定、可扩展的NGINX系统！