Linux系统资源监控与优化工具箱

105-1

#!/bin/bash

# ============================================
# 系统资源监控与优化工具箱
# 功能:实时监控CPU、内存、磁盘、网络,提供优化建议
# 使用方法:./sys-monitor.sh [选项]
# ============================================

# 颜色定义
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
BLUE='\033[0;34m'
CYAN='\033[0;36m'
PURPLE='\033[0;35m'
NC='\033[0m' # No Color

# 配置文件
CONFIG_DIR="$HOME/.sys-monitor"
LOG_FILE="$CONFIG_DIR/sys-monitor.log"
ALERT_LOG="$CONFIG_DIR/alerts.log"
CONFIG_FILE="$CONFIG_DIR/config"
REPORT_DIR="$CONFIG_DIR/reports"

# 阈值配置(可在设置中修改)
CPU_WARNING=70
CPU_CRITICAL=90
MEM_WARNING=70
MEM_CRITICAL=90
DISK_WARNING=80
DISK_CRITICAL=95
LOAD_WARNING=1
LOAD_CRITICAL=2

# 初始化函数
init_monitor() {
    mkdir -p "$CONFIG_DIR"
    mkdir -p "$REPORT_DIR"
    
    if [ ! -f "$CONFIG_FILE" ]; then
        save_config
    else
        load_config
    fi
    
    # 初始化日志
    echo "========================================" >> "$LOG_FILE"
    echo "系统监控启动时间: $(date)" >> "$LOG_FILE"
    echo "主机名: $(hostname)" >> "$LOG_FILE"
    echo "========================================" >> "$LOG_FILE"
}

# 保存配置
save_config() {
    cat > "$CONFIG_FILE" << EOF # 系统监控配置 CPU_WARNING=$CPU_WARNING CPU_CRITICAL=$CPU_CRITICAL MEM_WARNING=$MEM_WARNING MEM_CRITICAL=$MEM_CRITICAL DISK_WARNING=$DISK_WARNING DISK_CRITICAL=$DISK_CRITICAL ALERT_ENABLED=true MONITOR_INTERVAL=2 KEEP_LOGS_DAYS=7 EOF } # 加载配置 load_config() { if [ -f "$CONFIG_FILE" ]; then source "$CONFIG_FILE" fi } # ============================================ # 内存信息获取函数(完全修复版) # ============================================ # 方法1: 使用/proc/meminfo(最可靠) get_memory_info_from_proc() { if [ -f /proc/meminfo ]; then # 读取内存信息 local mem_total_kb=$(grep "^MemTotal:" /proc/meminfo | awk '{print $2}') local mem_free_kb=$(grep "^MemFree:" /proc/meminfo | awk '{print $2}') local buffers_kb=$(grep "^Buffers:" /proc/meminfo | awk '{print $2}') local cached_kb=$(grep "^Cached:" /proc/meminfo | awk '{print $2}') local sreclaimable_kb=$(grep "^SReclaimable:" /proc/meminfo | awk '{print $2}') # 转换为MB local mem_total_mb=$((mem_total_kb / 1024)) # 计算已用内存 (更准确的方法) # 已用内存 = 总内存 - 可用内存 # 可用内存 = MemFree + Buffers + Cached + SReclaimable if [ -n "$mem_free_kb" ] && [ -n "$buffers_kb" ] && [ -n "$cached_kb" ]; then local mem_available_kb=$((mem_free_kb + buffers_kb + cached_kb)) if [ -n "$sreclaimable_kb" ]; then mem_available_kb=$((mem_available_kb + sreclaimable_kb)) fi local mem_used_mb=$(((mem_total_kb - mem_available_kb) / 1024)) echo "$mem_total_mb $mem_used_mb" return 0 fi # 如果上面的方法失败,使用简单方法 if [ -n "$mem_total_kb" ]; then local mem_used_kb=$((mem_total_kb - mem_free_kb)) local mem_used_mb=$((mem_used_kb / 1024)) echo "$mem_total_mb $mem_used_mb" return 0 fi fi return 1 } # 方法2: 使用free命令 get_memory_info_from_free() { if command -v free &> /dev/null; then
        local free_output=$(free -m 2>/dev/null)
        if [ $? -eq 0 ]; then
            local mem_total=$(echo "$free_output" | awk '/^Mem:/{print $2}')
            local mem_used=$(echo "$free_output" | awk '/^Mem:/{print $3}')
            local mem_available=$(echo "$free_output" | awk '/^Mem:/{print $7}')
            
            # 如果有available字段,使用更准确的计算
            if [ -n "$mem_available" ] && [ "$mem_available" -gt 0 ]; then
                local mem_real_used=$((mem_total - mem_available))
                echo "$mem_total $mem_real_used"
            elif [ -n "$mem_total" ] && [ -n "$mem_used" ]; then
                echo "$mem_total $mem_used"
            fi
            return 0
        fi
    fi
    return 1
}

# 方法3: 使用vmstat
get_memory_info_from_vmstat() {
    if command -v vmstat &> /dev/null; then
        local vmstat_output=$(vmstat -s 2>/dev/null)
        if [ $? -eq 0 ]; then
            local mem_total_kb=$(echo "$vmstat_output" | grep -i "total memory" | awk '{print $1}')
            local mem_used_kb=$(echo "$vmstat_output" | grep -i "used memory" | awk '{print $1}')
            
            if [ -n "$mem_total_kb" ] && [ -n "$mem_used_kb" ]; then
                local mem_total_mb=$((mem_total_kb / 1024))
                local mem_used_mb=$((mem_used_kb / 1024))
                echo "$mem_total_mb $mem_used_mb"
                return 0
            fi
        fi
    fi
    return 1
}

# 主内存获取函数
get_memory_info() {
    # 尝试方法1: /proc/meminfo
    local mem_info=$(get_memory_info_from_proc)
    if [ $? -eq 0 ] && [ -n "$mem_info" ]; then
        echo "$mem_info"
        return 0
    fi
    
    # 尝试方法2: free命令
    mem_info=$(get_memory_info_from_free)
    if [ $? -eq 0 ] && [ -n "$mem_info" ]; then
        echo "$mem_info"
        return 0
    fi
    
    # 尝试方法3: vmstat
    mem_info=$(get_memory_info_from_vmstat)
    if [ $? -eq 0 ] && [ -n "$mem_info" ]; then
        echo "$mem_info"
        return 0
    fi
    
    # 所有方法都失败
    echo "0 0"
    return 1
}

# 安全地计算内存使用率
calculate_mem_percent() {
    local total_mem=$1
    local used_mem=$2
    
    # 检查参数是否有效
    if [ -z "$total_mem" ] || [ "$total_mem" -le 0 ] || [ -z "$used_mem" ] || [ "$used_mem" -lt 0 ]; then
        echo "0"
        return
    fi
    
    # 计算百分比(使用整数运算)
    local percent=0
    if [ "$total_mem" -gt 0 ]; then
        percent=$((used_mem * 100 / total_mem))
        # 确保百分比在合理范围内
        if [ "$percent" -lt 0 ]; then
            percent=0
        elif [ "$percent" -gt 100 ]; then
            percent=100
        fi
    fi
    
    echo "$percent"
}

# ============================================
# 其他信息获取函数
# ============================================

# 获取CPU使用率
get_cpu_usage() {
    # 方法1: 使用/proc/stat(最可靠)
    if [ -f /proc/stat ]; then
        # 获取第一次CPU统计
        local cpu_line=$(grep '^cpu ' /proc/stat)
        read cpu user nice system idle iowait irq softirq steal guest guest_nice <<< "$cpu_line"
        
        local total1=$((user + nice + system + idle + iowait + irq + softirq + steal))
        local idle1=$idle
        
        # 等待1秒
        sleep 0.5
        
        # 获取第二次CPU统计
        cpu_line=$(grep '^cpu ' /proc/stat)
        read cpu user nice system idle iowait irq softirq steal guest guest_nice <<< "$cpu_line" local total2=$((user + nice + system + idle + iowait + irq + softirq + steal)) local idle2=$idle # 计算CPU使用率 local total_diff=$((total2 - total1)) local idle_diff=$((idle2 - idle1)) if [ $total_diff -gt 0 ]; then local cpu_usage=$((100 * (total_diff - idle_diff) / total_diff)) echo "$cpu_usage" return 0 fi fi # 方法2: 使用top命令 if command -v top &> /dev/null; then
        local cpu_usage=$(top -bn1 2>/dev/null | grep "Cpu(s)" | awk '{print $2}' | cut -d'%' -f1)
        if [ -n "$cpu_usage" ]; then
            echo "${cpu_usage%.*}"
            return 0
        fi
    fi
    
    echo "0"
}

# 获取磁盘使用率
get_disk_usage() {
    # 方法1: 使用df命令
    if command -v df &> /dev/null; then
        local disk_usage=$(df -h / 2>/dev/null | awk 'NR==2 {print $5}' | sed 's/%//')
        if [ -n "$disk_usage" ]; then
            echo "$disk_usage"
            return 0
        fi
    fi
    
    # 方法2: 使用/proc/mounts
    if [ -f /proc/mounts ]; then
        local root_device=$(grep " / " /proc/mounts | awk '{print $1}')
        if [ -n "$root_device" ] && command -v df &> /dev/null; then
            local disk_usage=$(df -h "$root_device" 2>/dev/null | awk 'NR==2 {print $5}' | sed 's/%//')
            if [ -n "$disk_usage" ]; then
                echo "$disk_usage"
                return 0
            fi
        fi
    fi
    
    echo "0"
}

# 获取系统负载
get_load_average() {
    if [ -f /proc/loadavg ]; then
        cat /proc/loadavg | awk '{print $1, $2, $3}'
    elif command -v uptime &> /dev/null; then
        uptime | sed -n 's/.*load average: //p' | tr -d ','
    else
        echo "0 0 0"
    fi
}

# 获取系统信息
get_system_info() {
    # 获取操作系统信息
    if [ -f /etc/os-release ]; then
        OS_INFO=$(grep PRETTY_NAME /etc/os-release 2>/dev/null | cut -d'"' -f2)
    elif [ -f /etc/redhat-release ]; then
        OS_INFO=$(cat /etc/redhat-release 2>/dev/null)
    elif [ -f /etc/debian_version ]; then
        OS_INFO="Debian $(cat /etc/debian_version 2>/dev/null)"
    elif [ -f /etc/lsb-release ]; then
        OS_INFO=$(grep DISTRIB_DESCRIPTION /etc/lsb-release 2>/dev/null | cut -d'=' -f2 | tr -d '"')
    else
        OS_INFO=$(uname -o 2>/dev/null)
    fi
    
    OS_INFO=${OS_INFO:-"Unknown Linux"}
    
    # 获取内核版本
    KERNEL=$(uname -r 2>/dev/null)
    KERNEL=${KERNEL:-"Unknown"}
    
    # 获取系统架构
    ARCH=$(uname -m 2>/dev/null)
    ARCH=${ARCH:-"Unknown"}
    
    # 获取主机名
    HOSTNAME=$(hostname 2>/dev/null)
    HOSTNAME=${HOSTNAME:-"Unknown"}
    
    # 获取正常运行时间
    if [ -f /proc/uptime ]; then
        local uptime_sec=$(awk '{print int($1)}' /proc/uptime 2>/dev/null)
        local uptime_days=$((uptime_sec / 86400))
        local uptime_hours=$(( (uptime_sec % 86400) / 3600 ))
        local uptime_mins=$(( (uptime_sec % 3600) / 60 ))
        if [ $uptime_days -gt 0 ]; then
            UPTIME="${uptime_days}天 ${uptime_hours}小时"
        elif [ $uptime_hours -gt 0 ]; then
            UPTIME="${uptime_hours}小时 ${uptime_mins}分钟"
        else
            UPTIME="${uptime_mins}分钟"
        fi
    else
        UPTIME="unknown"
    fi
    
    # 获取CPU信息
    if [ -f /proc/cpuinfo ]; then
        CPU_MODEL=$(grep "model name" /proc/cpuinfo 2>/dev/null | head -1 | cut -d':' -f2 | sed 's/^ *//')
        CPU_CORES=$(grep -c "^processor" /proc/cpuinfo 2>/dev/null)
    fi
    
    CPU_MODEL=${CPU_MODEL:-"Unknown CPU"}
    CPU_CORES=${CPU_CORES:-1}
    
    # 获取内存信息(使用新函数)
    mem_info=$(get_memory_info)
    TOTAL_MEM=$(echo $mem_info | awk '{print $1}')
    USED_MEM=$(echo $mem_info | awk '{print $2}')
    
    # 获取磁盘信息
    if command -v df &> /dev/null; then
        TOTAL_DISK=$(df -h / 2>/dev/null | awk 'NR==2 {print $2}' || echo "unknown")
    else
        TOTAL_DISK="unknown"
    fi
}

# 显示标题
show_header() {
    clear
    echo -e "${PURPLE}"
    echo "╔══════════════════════════════════════════════════════════╗"
    echo "║          系统资源监控与优化工具箱 v1.0                  ║"
    echo "║           System Monitor & Optimizer Toolkit            ║"
    echo "╚══════════════════════════════════════════════════════════╝"
    echo -e "${NC}"
    
    get_system_info
    
    # 计算内存使用率
    local mem_percent=$(calculate_mem_percent "$TOTAL_MEM" "$USED_MEM")
    
    echo -e "${CYAN}系统概览:${NC}"
    echo -e "主机: ${GREEN}$HOSTNAME${NC} | 系统: ${YELLOW}$OS_INFO${NC}"
    echo -e "内核: ${BLUE}$KERNEL${NC} | 架构: ${BLUE}$ARCH${NC}"
    echo -e "运行: ${CYAN}$UPTIME${NC} | CPU: ${CPU_CORES}核 ${CPU_MODEL:0:30}"
    echo -e "内存: ${GREEN}${TOTAL_MEM}MB${NC} (使用率: ${mem_percent}%) | 磁盘: ${GREEN}$TOTAL_DISK${NC}"
    echo -e "${BLUE}══════════════════════════════════════════════════════════${NC}"
}

# 显示菜单
show_menu() {
    echo -e "\n${YELLOW}主菜单:${NC}"
    echo -e "${BLUE}══════════════════════════════════════════════════════════${NC}"
    echo -e "${GREEN}1. 实时系统监控${NC}       - 监控CPU、内存、磁盘、网络"
    echo -e "${GREEN}2. 资源使用分析${NC}       - 详细分析资源使用情况"
    echo -e "${GREEN}3. 进程管理${NC}           - 查看和管理进程"
    echo -e "${GREEN}4. 服务状态检查${NC}       - 检查系统服务状态"
    echo -e "${GREEN}5. 系统日志分析${NC}       - 分析系统日志"
    echo -e "${GREEN}6. 磁盘空间管理${NC}       - 磁盘使用分析和清理"
    echo -e "${GREEN}7. 网络连接监控${NC}       - 监控网络连接和流量"
    echo -e "${GREEN}8. 系统优化建议${NC}       - 获取系统优化建议"
    echo -e "${GREEN}9. 性能基准测试${NC}       - 运行性能基准测试"
    echo -e "${GREEN}10. 生成系统报告${NC}      - 生成详细系统报告"
    echo -e "${BLUE}══════════════════════════════════════════════════════════${NC}"
    echo -e "${GREEN}0. 退出${NC}"
    echo -e "${BLUE}══════════════════════════════════════════════════════════${NC}"
}

# 记录日志
log_message() {
    local message="$1"
    echo "[$(date '+%Y-%m-%d %H:%M:%S')] $message" >> "$LOG_FILE"
}

# 记录告警
log_alert() {
    local alert_type="$1"
    local message="$2"
    echo "[$(date '+%Y-%m-%d %H:%M:%S')] [$alert_type] $message" >> "$ALERT_LOG"
    echo -e "${RED}[告警] $message${NC}"
}

# 1. 实时系统监控
real_time_monitor() {
    local refresh_rate=${MONITOR_INTERVAL:-2}
    local monitor_duration=0
    
    show_header
    echo -e "\n${CYAN}实时系统监控 (按 q 退出)${NC}"
    echo -e "${BLUE}══════════════════════════════════════════════════════════${NC}"
    echo -e "${YELLOW}刷新间隔: ${refresh_rate}秒 | 按 '+' 增加间隔 | 按 '-' 减少间隔${NC}"
    echo -e "${BLUE}══════════════════════════════════════════════════════════${NC}"
    
    while true; do
        # 获取当前时间
        local current_time=$(date '+%H:%M:%S')
        
        # 获取CPU使用率
        local cpu_usage=$(get_cpu_usage)
        
        # 获取内存信息
        local mem_info=$(get_memory_info)
        local total_mem=$(echo $mem_info | awk '{print $1}')
        local used_mem=$(echo $mem_info | awk '{print $2}')
        local mem_percent=$(calculate_mem_percent "$total_mem" "$used_mem")
        
        # 获取磁盘使用率
        local disk_usage=$(get_disk_usage)
        
        # 获取系统负载
        local load_avg=$(get_load_average)
        local load1=$(echo $load_avg | awk '{print $1}')
        local load5=$(echo $load_avg | awk '{print $2}')
        local load15=$(echo $load_avg | awk '{print $3}')
        
        # 获取运行时间
        local uptime_sec=0
        local uptime_days=0
        local uptime_hours=0
        local uptime_mins=0
        
        if [ -f /proc/uptime ]; then
            uptime_sec=$(awk '{print int($1)}' /proc/uptime 2>/dev/null)
            uptime_days=$((uptime_sec / 86400))
            uptime_hours=$(( (uptime_sec % 86400) / 3600 ))
            uptime_mins=$(( (uptime_sec % 3600) / 60 ))
        fi
        
        # 清屏并显示信息
        clear
        # 显示简化标题
        echo -e "${PURPLE}"
        echo "╔══════════════════════════════════════════════════════════╗"
        echo "║          实时系统监控 (按q退出)                          ║"
        echo "╚══════════════════════════════════════════════════════════╝"
        echo -e "${NC}"
        
        echo -e "${CYAN}监控时间: $current_time | 刷新间隔: ${refresh_rate}秒${NC}"
        echo -e "${BLUE}══════════════════════════════════════════════════════════${NC}"
        
        # CPU使用率
        echo -e "\n${GREEN}CPU 使用率:${NC}"
        echo -n "  ["
        local cpu_bar_length=50
        local cpu_filled=0
        
        # 安全地计算填充长度
        if [ "$cpu_usage" -gt 0 ] 2>/dev/null && [ "$cpu_usage" -le 100 ]; then
            cpu_filled=$((cpu_usage * cpu_bar_length / 100))
        fi
        
        if [ "$cpu_filled" -gt "$cpu_bar_length" ]; then
            cpu_filled=$cpu_bar_length
        fi
        
        local cpu_empty=$((cpu_bar_length - cpu_filled))
        
        # 根据使用率显示不同颜色
        if [ "$cpu_usage" -ge "$CPU_CRITICAL" ] 2>/dev/null; then
            echo -ne "${RED}"
        elif [ "$cpu_usage" -ge "$CPU_WARNING" ] 2>/dev/null; then
            echo -ne "${YELLOW}"
        else
            echo -ne "${GREEN}"
        fi
        
        printf "%${cpu_filled}s" | tr " " "█"
        echo -ne "${NC}"
        printf "%${cpu_empty}s" | tr " " "░"
        echo -e "] ${cpu_usage}%"
        
        # 内存使用率
        echo -e "\n${GREEN}内存 使用率:${NC}"
        echo -n "  ["
        local mem_bar_length=50
        local mem_filled=0
        
        if [ "$mem_percent" -gt 0 ] 2>/dev/null && [ "$mem_percent" -le 100 ]; then
            mem_filled=$((mem_percent * mem_bar_length / 100))
        fi
        
        if [ "$mem_filled" -gt "$mem_bar_length" ]; then
            mem_filled=$mem_bar_length
        fi
        
        local mem_empty=$((mem_bar_length - mem_filled))
        
        if [ "$mem_percent" -ge "$MEM_CRITICAL" ] 2>/dev/null; then
            echo -ne "${RED}"
        elif [ "$mem_percent" -ge "$MEM_WARNING" ] 2>/dev/null; then
            echo -ne "${YELLOW}"
        else
            echo -ne "${GREEN}"
        fi
        
        printf "%${mem_filled}s" | tr " " "█"
        echo -ne "${NC}"
        printf "%${mem_empty}s" | tr " " "░"
        echo -e "] ${mem_percent}% (${used_mem}MB/${total_mem}MB)"
        
        # 磁盘使用率
        echo -e "\n${GREEN}磁盘 使用率(/):${NC}"
        echo -n "  ["
        local disk_bar_length=50
        local disk_filled=0
        
        if [ "$disk_usage" -gt 0 ] 2>/dev/null && [ "$disk_usage" -le 100 ]; then
            disk_filled=$((disk_usage * disk_bar_length / 100))
        fi
        
        if [ "$disk_filled" -gt "$disk_bar_length" ]; then
            disk_filled=$disk_bar_length
        fi
        
        local disk_empty=$((disk_bar_length - disk_filled))
        
        if [ "$disk_usage" -ge "$DISK_CRITICAL" ] 2>/dev/null; then
            echo -ne "${RED}"
        elif [ "$disk_usage" -ge "$DISK_WARNING" ] 2>/dev/null; then
            echo -ne "${YELLOW}"
        else
            echo -ne "${GREEN}"
        fi
        
        printf "%${disk_filled}s" | tr " " "█"
        echo -ne "${NC}"
        printf "%${disk_empty}s" | tr " " "░"
        echo -e "] ${disk_usage}%"
        
        # 系统负载
        echo -e "\n${GREEN}系统负载:${NC}"
        echo -e "  1分钟: ${load1:-0} | 5分钟: ${load5:-0} | 15分钟: ${load15:-0}"
        
        # 获取CPU核心数
        local cpu_cores=1
        if [ -f /proc/cpuinfo ]; then
            cpu_cores=$(grep -c "^processor" /proc/cpuinfo 2>/dev/null || echo 1)
        fi
        echo -e "  CPU核心数: ${cpu_cores}"
        
        # 其他信息
        echo -e "\n${GREEN}其他信息:${NC}"
        
        # 获取进程数
        local process_count=0
        if [ -f /proc/stat ]; then
            process_count=$(grep -c "^procs_running" /proc/stat)
        fi
        echo -e "  运行进程数: ${process_count}"
        
        echo -e "  系统运行: ${uptime_days}天 ${uptime_hours}小时 ${uptime_mins}分钟"
        
        # 检查告警条件
        if [ "$ALERT_ENABLED" = "true" ]; then
            echo -e "\n${YELLOW}告警检查:${NC}"
            
            # 检查CPU
            if [ "$cpu_usage" -ge "$CPU_CRITICAL" ] 2>/dev/null; then
                echo -e "  ${RED}⚠ CPU使用率过高: ${cpu_usage}%${NC}"
            elif [ "$cpu_usage" -ge "$CPU_WARNING" ] 2>/dev/null; then
                echo -e "  ${YELLOW}⚠ CPU使用率警告: ${cpu_usage}%${NC}"
            else
                echo -e "  ${GREEN}✓ CPU使用率正常${NC}"
            fi
            
            # 检查内存
            if [ "$mem_percent" -ge "$MEM_CRITICAL" ] 2>/dev/null; then
                echo -e "  ${RED}⚠ 内存使用率过高: ${mem_percent}%${NC}"
            elif [ "$mem_percent" -ge "$MEM_WARNING" ] 2>/dev/null; then
                echo -e "  ${YELLOW}⚠ 内存使用率警告: ${mem_percent}%${NC}"
            else
                echo -e "  ${GREEN}✓ 内存使用率正常${NC}"
            fi
            
            # 检查磁盘
            if [ "$disk_usage" -ge "$DISK_CRITICAL" ] 2>/dev/null; then
                echo -e "  ${RED}⚠ 磁盘使用率过高: ${disk_usage}%${NC}"
            elif [ "$disk_usage" -ge "$DISK_WARNING" ] 2>/dev/null; then
                echo -e "  ${YELLOW}⚠ 磁盘使用率警告: ${disk_usage}%${NC}"
            else
                echo -e "  ${GREEN}✓ 磁盘使用率正常${NC}"
            fi
        fi
        
        echo -e "\n${BLUE}══════════════════════════════════════════════════════════${NC}"
        echo -e "${YELLOW}按 q 退出 | 按 + 增加间隔 | 按 - 减少间隔${NC}"
        
        # 非阻塞读取键盘输入
        if read -t "$refresh_rate" -n 1 key; then
            case $key in
                q|Q)
                    echo -e "\n${GREEN}退出监控模式${NC}"
                    break
                    ;;
                +)
                    refresh_rate=$((refresh_rate + 1))
                    echo -e "\n${YELLOW}刷新间隔增加至 ${refresh_rate}秒${NC}"
                    sleep 1
                    ;;
                -)
                    if [ "$refresh_rate" -gt 1 ]; then
                        refresh_rate=$((refresh_rate - 1))
                        echo -e "\n${YELLOW}刷新间隔减少至 ${refresh_rate}秒${NC}"
                        sleep 1
                    fi
                    ;;
            esac
        fi
        
        monitor_duration=$((monitor_duration + refresh_rate))
    done
    
    log_message "实时监控运行 ${monitor_duration}秒"
}

# 2. 资源使用分析
resource_analysis() {
    show_header
    echo -e "\n${CYAN}资源使用详细分析${NC}"
    echo -e "${BLUE}══════════════════════════════════════════════════════════${NC}"
    
    # CPU详细分析
    echo -e "\n${YELLOW}1. CPU使用分析:${NC}"
    echo -e "${GREEN}══════════════════════════════════════════════════════════${NC}"
    
    echo -e "CPU型号: ${CPU_MODEL}"
    echo -e "CPU核心数: ${CPU_CORES}"
    
    # 显示CPU架构
    if [ -f /proc/cpuinfo ]; then
        echo -e "CPU架构: $(grep -m1 "model name" /proc/cpuinfo | cut -d: -f2 | sed 's/^ *//')"
    fi
    
    # 显示CPU频率
    if [ -f /proc/cpuinfo ]; then
        echo -e "CPU频率: $(grep -m1 "cpu MHz" /proc/cpuinfo | cut -d: -f2 | sed 's/^ *//') MHz"
    fi
    
    # 按进程统计CPU使用
    echo -e "\n${CYAN}CPU使用最高的进程 (前5):${NC}"
    if command -v ps &> /dev/null; then
        echo -e "${YELLOW}USER       PID     CPU% COMMAND${NC}"
        ps aux --sort=-%cpu 2>/dev/null | head -6 | tail -5 | awk '{printf "%-10s %-8s %-6s %-50s\n", $1, $2, $3, $11}'
    else
        echo -e "${YELLOW}无法获取进程信息${NC}"
    fi
    
    # 内存详细分析
    echo -e "\n${YELLOW}2. 内存使用分析:${NC}"
    echo -e "${GREEN}══════════════════════════════════════════════════════════${NC}"
    
    # 使用多种方法显示内存信息
    echo -e "${CYAN}方法1: 使用free命令${NC}"
    if command -v free &> /dev/null; then
        free -h 2>/dev/null || echo -e "${YELLOW}无法获取内存信息${NC}"
    fi
    
    echo -e "\n${CYAN}方法2: 使用/proc/meminfo${NC}"
    if [ -f /proc/meminfo ]; then
        echo -e "总内存: $(grep "^MemTotal:" /proc/meminfo | awk '{print $2/1024 " MB"}')"
        echo -e "空闲内存: $(grep "^MemFree:" /proc/meminfo | awk '{print $2/1024 " MB"}')"
        echo -e "可用内存: $(grep "^MemAvailable:" /proc/meminfo 2>/dev/null | awk '{print $2/1024 " MB"}' || echo "未知")"
        echo -e "缓存: $(grep "^Cached:" /proc/meminfo | awk '{print $2/1024 " MB"}')"
        echo -e "缓冲: $(grep "^Buffers:" /proc/meminfo | awk '{print $2/1024 " MB"}')"
    fi
    
    # 按进程统计内存使用
    echo -e "\n${CYAN}内存使用最高的进程 (前5):${NC}"
    if command -v ps &> /dev/null; then
        echo -e "${YELLOW}USER       PID     MEM% COMMAND${NC}"
        ps aux --sort=-%mem 2>/dev/null | head -6 | tail -5 | awk '{printf "%-10s %-8s %-6s %-50s\n", $1, $2, $4, $11}'
    fi
    
    # 磁盘详细分析
    echo -e "\n${YELLOW}3. 磁盘使用分析:${NC}"
    echo -e "${GREEN}══════════════════════════════════════════════════════════${NC}"
    
    if command -v df &> /dev/null; then
        echo -e "${CYAN}磁盘使用情况:${NC}"
        df -h 2>/dev/null | head -10 || echo -e "${YELLOW}无法获取磁盘信息${NC}"
    fi
    
    # 显示inode使用情况
    echo -e "\n${CYAN}Inode使用情况:${NC}"
    if command -v df &> /dev/null; then
        df -i 2>/dev/null | head -5
    fi
    
    # 总结和建议
    echo -e "\n${YELLOW}4. 分析总结:${NC}"
    echo -e "${GREEN}══════════════════════════════════════════════════════════${NC}"
    
    # 获取当前资源使用
    local cpu_usage=$(get_cpu_usage)
    local mem_info=$(get_memory_info)
    local total_mem=$(echo $mem_info | awk '{print $1}')
    local used_mem=$(echo $mem_info | awk '{print $2}')
    local mem_percent=$(calculate_mem_percent "$total_mem" "$used_mem")
    local disk_usage=$(get_disk_usage)
    
    echo -e "${CYAN}当前资源使用情况:${NC}"
    echo -e "  CPU使用率: ${cpu_usage}%"
    echo -e "  内存使用率: ${mem_percent}% (${used_mem}MB/${total_mem}MB)"
    echo -e "  磁盘使用率: ${disk_usage}%"
    
    # 提供建议
    echo -e "\n${CYAN}优化建议:${NC}"
    
    if [ "$cpu_usage" -gt 80 ] 2>/dev/null; then
        echo -e "  ${RED}⚠ CPU使用率较高,建议:${NC}"
        echo -e "    1. 检查并优化高CPU进程"
        echo -e "    2. 考虑升级CPU"
        echo -e "    3. 优化应用程序配置"
    else
        echo -e "  ${GREEN}✓ CPU使用率正常${NC}"
    fi
    
    if [ "$mem_percent" -gt 80 ] 2>/dev/null; then
        echo -e "  ${RED}⚠ 内存使用率较高,建议:${NC}"
        echo -e "    1. 关闭不必要的应用程序"
        echo -e "    2. 增加交换空间"
        echo -e "    3. 考虑增加物理内存"
        echo -e "    4. 优化应用程序内存使用"
    else
        echo -e "  ${GREEN}✓ 内存使用率正常${NC}"
    fi
    
    if [ "$disk_usage" -gt 80 ] 2>/dev/null; then
        echo -e "  ${RED}⚠ 磁盘使用率较高,建议:${NC}"
        echo -e "    1. 清理临时文件"
        echo -e "    2. 删除不需要的软件包"
        echo -e "    3. 清理日志文件"
        echo -e "    4. 考虑增加磁盘空间"
    else
        echo -e "  ${GREEN}✓ 磁盘使用率正常${NC}"
    fi
    
    log_message "资源使用分析"
    read -p "按回车键继续..."
}

# 3. 进程管理
process_management() {
    while true; do
        show_header
        echo -e "\n${CYAN}进程管理${NC}"
        echo -e "${BLUE}══════════════════════════════════════════════════════════${NC}"
        
        echo -e "\n${YELLOW}进程管理选项:${NC}"
        echo "1. 查看所有进程"
        echo "2. 查看高CPU进程"
        echo "3. 查看高内存进程"
        echo "4. 查找特定进程"
        echo "5. 结束进程"
        echo "0. 返回主菜单"
        echo -e "${BLUE}══════════════════════════════════════════════════════════${NC}"
        
        read -p "请选择 [0-5]: " choice
        
        case $choice in
            1)
                echo -e "\n${CYAN}所有进程 (前20个):${NC}"
                if command -v ps &> /dev/null; then
                    ps aux | head -20
                else
                    echo -e "${RED}无法获取进程信息${NC}"
                fi
                ;;
            2)
                echo -e "\n${CYAN}高CPU进程 (前10个):${NC}"
                if command -v ps &> /dev/null; then
                    echo -e "${YELLOW}USER       PID     CPU% COMMAND${NC}"
                    ps aux --sort=-%cpu | head -11 | tail -10 | awk '{printf "%-10s %-8s %-6s %-50s\n", $1, $2, $3, $11}'
                fi
                ;;
            3)
                echo -e "\n${CYAN}高内存进程 (前10个):${NC}"
                if command -v ps &> /dev/null; then
                    echo -e "${YELLOW}USER       PID     MEM% COMMAND${NC}"
                    ps aux --sort=-%mem | head -11 | tail -10 | awk '{printf "%-10s %-8s %-6s %-50s\n", $1, $2, $4, $11}'
                fi
                ;;
            4)
                read -p "输入要查找的进程名: " proc_name
                if [ -n "$proc_name" ]; then
                    echo -e "\n${CYAN}查找进程: $proc_name${NC}"
                    if command -v ps &> /dev/null; then
                        ps aux | grep -i "$proc_name" | grep -v grep
                    fi
                fi
                ;;
            5)
                read -p "输入要结束的进程PID: " pid
                if [ -n "$pid" ]; then
                    echo -e "${YELLOW}结束进程 $pid ...${NC}"
                    if kill "$pid" 2>/dev/null; then
                        echo -e "${GREEN}✓ 进程 $pid 已结束${NC}"
                    else
                        echo -e "${RED}✗ 无法结束进程 $pid${NC}"
                    fi
                fi
                ;;
            0)
                break
                ;;
            *)
                echo -e "${RED}无效选择${NC}"
                ;;
        esac
        
        if [ "$choice" -ne 0 ]; then
            read -p "按回车键继续..."
        fi
    done
}

# 4. 服务状态检查
service_check() {
    show_header
    echo -e "\n${CYAN}服务状态检查${NC}"
    echo -e "${BLUE}══════════════════════════════════════════════════════════${NC}"
    
    echo -e "\n${YELLOW}关键服务状态:${NC}"
    
    # 检查系统是使用systemd还是init
    if command -v systemctl &> /dev/null; then
        echo -e "${GREEN}使用systemd管理系统${NC}"
        
        # 检查常见服务
        services=("ssh" "cron" "nginx" "apache2" "mysql" "docker" "network" "systemd-logind")
        
        for service in "${services[@]}"; do
            if systemctl list-unit-files "$service.service" &>/dev/null 2>&1; then
                status=$(systemctl is-active "$service.service" 2>/dev/null)
                if [ "$status" = "active" ]; then
                    echo -e "  ${GREEN}✓ $service: 运行中${NC}"
                elif [ "$status" = "inactive" ]; then
                    echo -e "  ${YELLOW}○ $service: 已停止${NC}"
                elif [ "$status" = "failed" ]; then
                    echo -e "  ${RED}✗ $service: 失败${NC}"
                else
                    echo -e "  ${BLUE}? $service: 未知状态${NC}"
                fi
            fi
        done
        
        # 显示失败的服务
        echo -e "\n${YELLOW}失败的服务:${NC}"
        systemctl --failed 2>/dev/null | grep -v "0 loaded units listed" || echo -e "  ${GREEN}✓ 没有失败的服务${NC}"
        
    elif [ -d /etc/init.d ]; then
        echo -e "${GREEN}使用init.d管理系统${NC}"
        
        echo -e "\n${YELLOW}服务状态:${NC}"
        service --status-all 2>/dev/null | head -10
        
    else
        echo -e "${YELLOW}无法确定服务管理系统${NC}"
    fi
    
    # 检查网络服务
    echo -e "\n${YELLOW}网络服务端口:${NC}"
    if command -v ss &> /dev/null; then
        ss -tulpn 2>/dev/null | head -10
    elif command -v netstat &> /dev/null; then
        netstat -tulpn 2>/dev/null | head -10
    else
        echo -e "${YELLOW}无法检查网络端口${NC}"
    fi
    
    read -p "按回车键继续..."
}

# 主程序
main() {
    # 初始化
    init_monitor
    
    # 显示欢迎信息
    show_header
    
    # 检查是否以root运行
    if [ "$EUID" -ne 0 ]; then
        echo -e "${YELLOW}注意: 部分功能需要root权限${NC}"
        echo -e "${CYAN}可以使用sudo运行脚本: sudo $0${NC}"
        echo -e "${BLUE}══════════════════════════════════════════════════════════${NC}"
    fi
    
    while true; do
        show_header
        show_menu
        
        read -p "请选择操作 [0-10]: " main_choice
        
        case $main_choice in
            1) real_time_monitor ;;
            2) resource_analysis ;;
            3) process_management ;;
            4) service_check ;;
            5)
                show_header
                echo -e "\n${CYAN}系统日志分析${NC}"
                echo -e "${BLUE}══════════════════════════════════════════════════════════${NC}"
                echo -e "${YELLOW}功能开发中...${NC}"
                read -p "按回车键继续..."
                ;;
            6)
                show_header
                echo -e "\n${CYAN}磁盘空间管理${NC}"
                echo -e "${BLUE}══════════════════════════════════════════════════════════${NC}"
                echo -e "${YELLOW}功能开发中...${NC}"
                read -p "按回车键继续..."
                ;;
            7)
                show_header
                echo -e "\n${CYAN}网络连接监控${NC}"
                echo -e "${BLUE}══════════════════════════════════════════════════════════${NC}"
                echo -e "${YELLOW}功能开发中...${NC}"
                read -p "按回车键继续..."
                ;;
            8)
                show_header
                echo -e "\n${CYAN}系统优化建议${NC}"
                echo -e "${BLUE}══════════════════════════════════════════════════════════${NC}"
                echo -e "${YELLOW}功能开发中...${NC}"
                read -p "按回车键继续..."
                ;;
            9)
                show_header
                echo -e "\n${CYAN}性能基准测试${NC}"
                echo -e "${BLUE}══════════════════════════════════════════════════════════${NC}"
                echo -e "${YELLOW}功能开发中...${NC}"
                read -p "按回车键继续..."
                ;;
            10)
                show_header
                echo -e "\n${CYAN}生成系统报告${NC}"
                echo -e "${BLUE}══════════════════════════════════════════════════════════${NC}"
                echo -e "${YELLOW}功能开发中...${NC}"
                read -p "按回车键继续..."
                ;;
            0)
                echo -e "\n${GREEN}感谢使用系统监控工具!${NC}"
                echo -e "${CYAN}日志文件: $LOG_FILE${NC}"
                echo -e "${CYAN}告警日志: $ALERT_LOG${NC}"
                echo -e "${CYAN}报告目录: $REPORT_DIR${NC}"
                exit 0
                ;;
            *)
                echo -e "${RED}无效选择,请重新输入${NC}"
                sleep 1
                ;;
        esac
    done
}

# 运行主程序
main

 


文档仓库 » Linux系统资源监控与优化工具箱