Linux系统信息与健康检查脚本

#!/bin/bash

# ============================================
# 系统健康检查脚本
# 功能:收集系统信息并检查关键健康指标
# 使用方法:sudo ./system-check.sh
# ============================================

# 颜色定义
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
BLUE='\033[0;34m'
NC='\033[0m' # No Color

# 日志文件
LOG_FILE="/var/log/system-check-$(date +%Y%m%d-%H%M%S).log"

# 函数:打印带颜色的消息
print_msg() {
    local color=$1
    local msg=$2
    echo -e "${color}${msg}${NC}"
}

# 函数:检查命令是否成功执行
check_status() {
    if [ $? -eq 0 ]; then
        print_msg "$GREEN" "[✓] 成功"
    else
        print_msg "$RED" "[✗] 失败"
    fi
}

# 函数:记录日志
log_message() {
    echo "[$(date '+%Y-%m-%d %H:%M:%S')] $1" >> "$LOG_FILE"
}

# 函数:显示分隔线
print_separator() {
    echo "========================================"
}

# 函数:检查命令是否存在
check_command() {
    if ! command -v $1 &> /dev/null; then
        print_msg "$YELLOW" "注意:$1 命令未安装,跳过相关检查"
        return 1
    fi
    return 0
}

# 检查是否以root权限运行
check_root() {
    if [ "$EUID" -ne 0 ]; then
        print_msg "$YELLOW" "警告:建议使用root权限运行此脚本以获取完整信息"
        read -p "是否继续?(y/n): " -n 1 -r
        echo
        if [[ ! $REPLY =~ ^[Yy]$ ]]; then
            exit 1
        fi
    fi
}

# 系统基本信息
system_info() {
    print_msg "$BLUE" "\n1. 系统基本信息"
    print_separator
    
    echo "主机名: $(hostname)"
    if [ -f /etc/os-release ]; then
        echo "操作系统: $(grep PRETTY_NAME /etc/os-release | cut -d'"' -f2)"
    else
        echo "操作系统: $(uname -o)"
    fi
    echo "内核版本: $(uname -r)"
    echo "系统架构: $(uname -m)"
    echo "启动时间: $(who -b 2>/dev/null | awk '{print $3, $4}' || uptime -s 2>/dev/null || echo '无法获取')"
    echo "运行时间: $(uptime -p 2>/dev/null || echo '无法获取')"
    
    log_message "系统基本信息收集完成"
}

# CPU检查
cpu_check() {
    print_msg "$BLUE" "\n2. CPU信息"
    print_separator
    
    if check_command "lscpu"; then
        echo "CPU型号: $(lscpu | grep "Model name" | cut -d':' -f2 | sed 's/^ *//' 2>/dev/null || echo '无法获取')"
        echo "CPU核心数: $(nproc)"
    else
        echo "CPU核心数: $(grep -c '^processor' /proc/cpuinfo)"
    fi
    
    echo -n "CPU使用率: "
    if check_command "mpstat"; then
        mpstat 1 1 2>/dev/null | tail -2 || echo "无法获取"
    else
        echo "请安装 sysstat 包以获取详细CPU使用率"
    fi
    
    # 检查CPU负载
    load_avg=$(uptime | awk -F'load average:' '{print $2}' 2>/dev/null)
    cpu_cores=$(grep -c '^processor' /proc/cpuinfo 2>/dev/null || echo 1)
    
    if [ -n "$load_avg" ]; then
        load1=$(echo $load_avg | awk -F, '{print $1}' | tr -d ' ' | sed 's/,/./')
        echo "1分钟负载: $load1 (核心数: $cpu_cores)"
        
        if command -v bc &> /dev/null; then
            if (( $(echo "$load1 > $cpu_cores" | bc -l 2>/dev/null) )); then
                print_msg "$YELLOW" "警告:CPU负载较高"
            fi
        fi
    fi
    
    log_message "CPU检查完成"
}

# 内存检查
memory_check() {
    print_msg "$BLUE" "\n3. 内存信息"
    print_separator
    
    echo "内存使用情况:"
    free -h
    
    # 安全地计算内存使用率
    total_mem=$(free -m 2>/dev/null | awk '/^Mem:/{print $2}')
    used_mem=$(free -m 2>/dev/null | awk '/^Mem:/{print $3}')
    
    if [ -n "$total_mem" ] && [ "$total_mem" -gt 0 ]; then
        mem_percent=$((used_mem * 100 / total_mem))
        echo -e "\n内存使用率: ${mem_percent}%"
        
        if [ $mem_percent -gt 90 ]; then
            print_msg "$RED" "警告:内存使用率超过90%"
        elif [ $mem_percent -gt 70 ]; then
            print_msg "$YELLOW" "注意:内存使用率超过70%"
        fi
    else
        echo -e "\n内存使用率: 无法计算"
    fi
    
    # 显示交换空间
    echo -e "\n交换空间:"
    swapon --show 2>/dev/null || free -h | grep -i swap
    
    log_message "内存检查完成"
}

# 磁盘检查
disk_check() {
    print_msg "$BLUE" "\n4. 磁盘信息"
    print_separator
    
    echo "磁盘空间使用情况:"
    df -h 2>/dev/null | head -20
    
    echo ""
    echo "磁盘I/O统计:"
    if check_command "iostat"; then
        iostat -d 1 1 2>/dev/null | tail -n +4 || echo "无法获取I/O统计"
    else
        echo "请安装 sysstat 包以获取磁盘I/O统计: sudo apt-get install sysstat"
    fi
    
    # 检查根分区使用率
    root_usage=$(df / 2>/dev/null | tail -1 | awk '{print $5}' | sed 's/%//')
    if [ -n "$root_usage" ] && [ "$root_usage" -eq "$root_usage" ] 2>/dev/null; then
        if [ $root_usage -gt 90 ]; then
            print_msg "$RED" "警告:根分区使用率超过90%"
        elif [ $root_usage -gt 80 ]; then
            print_msg "$YELLOW" "注意:根分区使用率超过80%"
        fi
        ROOT_USAGE=$root_usage
    else
        ROOT_USAGE=0
        print_msg "$YELLOW" "注意:无法获取根分区使用率"
    fi
    
    # 显示磁盘inode使用情况
    echo -e "\nInode使用情况:"
    df -i 2>/dev/null | head -10
    
    log_message "磁盘检查完成"
}

# 网络检查
network_check() {
    print_msg "$BLUE" "\n5. 网络信息"
    print_separator
    
    echo "网络接口:"
    ip addr show 2>/dev/null | grep -E "^[0-9]+:" | head -10 || ifconfig 2>/dev/null | head -20
    
    echo ""
    echo "IP地址信息:"
    # 获取内网IP
    ip addr show 2>/dev/null | grep -E "inet (192\.168|10\.|172\.(1[6-9]|2[0-9]|3[0-1]))" | grep -v "127.0.0.1" || \
    ifconfig 2>/dev/null | grep -E "inet (addr:)?(192\.168|10\.|172\.)" || \
    echo "未找到内网IP或需要root权限"
    
    echo ""
    echo "网络连接状态(前10个):"
    if check_command "ss"; then
        ss -tun 2>/dev/null | head -11
    elif check_command "netstat"; then
        netstat -tun 2>/dev/null | head -11
    else
        echo "请安装 iproute2 或 net-tools 包"
    fi
    
    echo ""
    echo "路由表:"
    ip route 2>/dev/null | head -5 || route -n 2>/dev/null | head -5
    
    # 测试网络连通性
    echo ""
    echo -n "外网连通性测试: "
    if ping -c 1 -W 1 8.8.8.8 &> /dev/null; then
        print_msg "$GREEN" "正常"
    else
        print_msg "$YELLOW" "失败"
    fi
    
    log_message "网络检查完成"
}

# 服务检查
service_check() {
    print_msg "$BLUE" "\n6. 关键服务状态"
    print_separator
    
    # 检查系统是使用systemd还是sysvinit
    if [ -d /run/systemd/system ]; then
        echo "使用systemd管理系统服务"
        
        # 检查常见服务
        services=("sshd" "ssh" "nginx" "apache2" "httpd" "mysql" "mariadb" "postgresql" "docker" "crond" "cron")
        
        for service in "${services[@]}"; do
            if systemctl list-unit-files 2>/dev/null | grep -q "^${service}\."; then
                status=$(systemctl is-active $service 2>/dev/null || echo "unknown")
                if [ "$status" = "active" ]; then
                    echo -e "${service}: ${GREEN}运行中${NC}"
                elif [ "$status" = "inactive" ]; then
                    echo -e "${service}: ${YELLOW}未运行${NC}"
                else
                    echo -e "${service}: 状态未知"
                fi
            fi
        done
    elif [ -d /etc/init.d ]; then
        echo "使用sysvinit管理系统服务"
        echo "运行中的服务:"
        service --status-all 2>/dev/null | grep -E "\[ \+ \]" | head -10
    else
        echo "无法确定服务管理系统"
    fi
    
    log_message "服务检查完成"
}

# 安全检查
security_check() {
    print_msg "$BLUE" "\n7. 安全检查"
    print_separator
    
    echo "当前登录用户:"
    who
    
    echo ""
    echo "最近登录记录(前5条):"
    last -5 2>/dev/null || echo "需要root权限查看"
    
    # 检查失败的登录尝试
    echo ""
    echo "失败的登录尝试:"
    if [ -f /var/log/auth.log ]; then
        grep "Failed password" /var/log/auth.log 2>/dev/null | tail -3 || echo "无记录或需要root权限"
    elif [ -f /var/log/secure ]; then
        grep "Failed password" /var/log/secure 2>/dev/null | tail -3 || echo "无记录或需要root权限"
    else
        echo "认证日志文件未找到"
    fi
    
    # 检查sudo使用
    echo ""
    echo "sudo使用记录(最近3条):"
    if [ -f /var/log/auth.log ]; then
        grep "sudo:" /var/log/auth.log 2>/dev/null | tail -3 || echo "无记录"
    else
        echo "日志文件不可访问"
    fi
    
    log_message "安全检查完成"
}

# 软件包更新检查(仅限基于APT或YUM的系统)
package_check() {
    print_msg "$BLUE" "\n8. 软件包更新检查"
    print_separator
    
    if check_command "apt-get"; then
        echo "APT系统检测到 (Debian/Ubuntu)"
        apt-get update > /dev/null 2>&1
        updates=$(apt-get -s upgrade 2>/dev/null | grep -c "^Inst")
        if [ "$updates" -gt 0 ]; then
            print_msg "$YELLOW" "有 $updates 个可用更新"
            apt-get -s upgrade 2>/dev/null | grep "^Inst" | head -5
        else
            print_msg "$GREEN" "系统已是最新"
        fi
    elif check_command "yum"; then
        echo "YUM系统检测到 (RHEL/CentOS/Fedora)"
        updates=$(yum check-update --quiet 2>/dev/null | grep -vc "^$")
        if [ "$updates" -gt 0 ]; then
            print_msg "$YELLOW" "有 $updates 个可用更新"
            yum check-update 2>/dev/null | head -10
        else
            print_msg "$GREEN" "系统已是最新"
        fi
    else
        echo "不支持的包管理器"
    fi
    
    log_message "软件包检查完成"
}

# 主函数
main() {
    clear
    print_msg "$GREEN" "开始系统健康检查..."
    echo "检查时间: $(date)"
    echo "日志文件: $LOG_FILE"
    print_separator
    
    # 创建日志文件
    touch "$LOG_FILE" 2>/dev/null || {
        LOG_FILE="$HOME/system-check-$(date +%Y%m%d-%H%M%S).log"
        touch "$LOG_FILE"
        print_msg "$YELLOW" "无法写入/var/log,日志将保存到: $LOG_FILE"
    }
    
    log_message "开始系统健康检查"
    
    # 执行检查
    check_root
    system_info
    cpu_check
    memory_check
    disk_check
    network_check
    service_check
    security_check
    package_check
    
    print_msg "$GREEN" "\n✓ 系统检查完成"
    echo "详细日志已保存至: $LOG_FILE"
    
    # 生成摘要报告
    print_msg "$BLUE" "\n📊 检查摘要"
    print_separator
    echo "系统: $(hostname)"
    echo "运行时间: $(uptime -p 2>/dev/null || echo '未知')"
    
    # 获取内存使用率(如果可用)
    total_mem=$(free -m 2>/dev/null | awk '/^Mem:/{print $2}')
    used_mem=$(free -m 2>/dev/null | awk '/^Mem:/{print $3}')
    if [ -n "$total_mem" ] && [ "$total_mem" -gt 0 ]; then
        mem_percent=$((used_mem * 100 / total_mem))
        echo "内存使用率: ${mem_percent}%"
    else
        echo "内存使用率: 未知"
    fi
    
    # 获取根分区使用率(如果可用)
    root_usage=$(df / 2>/dev/null | tail -1 | awk '{print $5}' | sed 's/%//')
    if [ -n "$root_usage" ] && [ "$root_usage" -eq "$root_usage" ] 2>/dev/null; then
        echo "根分区使用率: ${root_usage}%"
    else
        echo "根分区使用率: 未知"
    fi
    
    echo "检查时间: $(date '+%Y-%m-%d %H:%M:%S')"
    
    log_message "系统检查完成"
}

# 执行主函数
main

安装缺失的命令:

如果缺少某些命令,可以安装:

# Debian/Ubuntu
sudo apt-get update
sudo apt-get install sysstat iproute2 net-tools

# RHEL/CentOS/Fedora
sudo yum install sysstat iproute net-tools

# 通用依赖
sudo apt-get install bc  # 或 sudo yum install bc

文档仓库 » Linux系统信息与健康检查脚本