# time sosreport
(补充:这里以显示 sosreport 命令所需执行时间为例)
# time sosreport
(补充:这里以显示 sosreport 命令所需执行时间为例)
作者:朱明宇
名称:显示系统常用信息
作用:显示系统常用信息
1. 在此脚本的分割线内写入相应的内容
2. 给此脚本添加执行权限
3. 执行此脚本
1. times=5 #显示系统常用信息的次数
2. sleeptime=0.3 #大部分行与行之间显示的间隔时间
1. 需要安装 sysstat 软件
2. 执行此脚本的用户能够使用 sudo ip a s 命令
3. 执行此脚本的用户能够使用 sudo ss -ntulap 命令
4. 搭建了 KVM 虚拟化平台后执行此脚本的用户能够使用 sudo virsh list 命令后才能实现
#!/bin/bash
####################### Separator ########################
times=5
sleeptime=0.3
####################### Separator ########################
nowtime=1
while (( nowtime <= times))
do
echo -e "Start Monitoring: \c"
for i in {1..94}
do
echo -e "#\c"
sleep 0.01
done
echo
sleep $sleeptime
host=`hostname`
echo -e "Name:\t\t\t\t\t\t\t \033[1m$host\033[0m"
ip=`sudo ip a s | awk '/[1-2]?[0-9]{0,2}\.[1-2]?[0-9]{0,2}/&&!/127.0.0.1/{print $2}' | awk -F/ '{print $1}'`
for iip in $(echo $ip)
do
sleep $sleeptime
echo -e "IP Address:\t\t\t\t\t\t \033[1m$iip\033[0m"
done
sleep $sleeptime
cpu=`top -bn 1 | awk -F',' '/^%Cpu/{print $4 }' | awk '{print $1}' | awk '{print 100-$1}'`
echo -e "CPU Usage (Total):\t\t\t\t\t \033[1m$cpu%\033[0m"
sleep $sleeptime
mem=`free | grep Mem | awk '{print $3/$2 * 100.0}' | egrep -o "[1]?[0-9]{0,2}\.[0-9]"`
echo -e "Memory Usage (Total):\t\t\t\t\t \033[1m$mem%\033[0m"
directory=`df -h | grep -v run | grep -v boot | awk '$1~/\/dev/{print $6}'`
for idirectory in `echo $directory`
do
sleep $sleeptime
directoryusage=`df -h | grep -v run | grep -v boot | awk '$1~/\/dev/{print}' | grep $idirectory$ | awk '{print $5}'`
if [ $idirectory == / -o $idirectory == /sda -o $idirectory == /sdb ];then
echo -e "Directory Usage ($idirectory):\t\t\t\t\t \033[1m$directoryusage\033[0m"
else
echo -e "Directory Usage ($idirectory):\t\t\t\t \033[1m$directoryusage\033[0m"
fi
done
sudo -l | grep 'virsh list' &> /dev/null
if [ $? -eq 0 ];then
sleep $sleeptime
virtual=`sudo virsh list | egrep [0-9] | wc -l`
echo -e "Number of Virtual Machines (Total):\t\t\t \033[1m$virtual\033[0m"
fi
sleep $sleeptime
user=`who | wc -l`
echo -e "Number of User Logins (Total):\t\t\t\t \033[1m$user\033[0m"
soft=`rpm -qa | wc -l`
echo -e "Number of Softwares (Total):\t\t\t\t \033[1m$soft\033[0m"
sleep $sleeptime
port=`sudo ss -ntulap | wc -l`
echo -e "Number of Open Ports (Total):\t\t\t\t \033[1m$port\033[0m"
which sar &> /dev/null
if [ $? -eq 0 ];then
networkcard=`ifconfig | awk -F: '/flags/&&!/lo/{print $1}'`
for inetworkcard in `echo $networkcard`
do
networkread="`sar -n DEV 1 1 | grep $inetworkcard | awk '/[0-9][0-9]:[0-9][0-9]/{print $3/1000}'` m/s"
networkwrite="`sar -n DEV 1 1 | grep $inetworkcard | awk '/[0-9][0-9]:[0-9][0-9]/{print $4/1000}'` m/s"
echo $inetworkcard | grep eth &> /dev/null
if [ $? -ne 0 ];then
echo -e "Network Card IO ($inetworkcard):\t\t\t\t \033[1m$networkread\033[0m (Read)\t\033[1m$networkwrite\033[0m (Write)"
else
echo -e "Network Card IO ($inetworkcard):\t\t\t\t\t \033[1m$networkread\033[0m (Read)\t\033[1m$networkwrite\033[0m (Write)"
fi
done
fi
which iostat &> /dev/null
if [ $? -eq 0 ];then
disk=`iostat -d -k 1 1 | awk '!/^$/&&!/Device/&&!/Linux/{print $1}'`
for idisk in `echo $disk`
do
sleep $sleeptime
diskread="`iostat -d -k 1 1 | grep $idisk | awk '{print $3/1000}'` m/s"
diskwrite="`iostat -d -k 1 1 | grep $idisk | awk '{print $4/1000}'` m/s"
echo $idisk | grep 'nvme' &> /dev/null
if [ $? -eq 0 ];then
echo -e "Disk IO (/dev/$idisk):\t\t\t\t\t \033[1m$diskread\033[0m (Read)\t\033[1m$diskwrite\033[0m (Write)"
else
echo -e "Disk IO (/dev/$idisk):\t\t\t\t\t \033[1m$diskread\033[0m (Read)\t\033[1m$diskwrite\033[0m (Write)"
fi
done
fi
echo -e "Complete Monitoring: \c"
for i in {1..91}
do
echo -e "#\c"
sleep 0.01
done
echo
sleep $sleeptime
let nowtime++
done
echo -e "Terminal Monitoring: \c"
for i in {1..91}
do
echo -e "#\c"
sleep 0.01
done
exit
watchdog: Bug: soft lockup - CPU......
当 CPU 的负载过高时,一个 CPU 在运行某一个进程时,在内核模式下超过 20 秒没有回应,则看门狗程序会将系统所有 CPU 软锁住,然后会让这些 CPU 显示各自正在运行的进程堆栈跟踪
# echo 20 > /proc/sys/kernel/watchdog_thresh
(补充:这里以将看门狗的值延长到 20 为例,也可以根据自己的需求延长更多,默认值为 10)
# echo "kernel.watchdog_thresh = 20" >> /etc/sysctl.conf
(补充:这里以将看门狗的值延长到 20 为例,也可以根据自己的需求延长更多,默认值为 10)
# sysctl -p /etc/sysctl.conf
开启 Kdump,等此报错再次发生时分析 Kdump 在内核崩溃时搜集信息 vmcore
作者:朱明宇
名称:批量比较服务器所有正在运行进程的变化
作用:批量比较服务器所有正在运行进程的变化
1. 服务器清单 $server_list 每台服务器占用 1 行
2. 在此脚本的分割线内写入相应的内容,并和此脚本放在同一目录下
3. 给此脚本添加执行权限
4. 执行此脚本
5. 此脚本执行完成后,会将运行结果写入当前目录下的 $compare_file 里
server_list=server_list.txt #服务器清单
first_time=first_time #存储第一次检结果的目录
second_time=second_time #存储第二次检查结果的目录
compare_file=comparison_results.txt #存储比较结果的文件
此脚本执行前必须要先保证执行本脚本的用户能无密码 ssh 远程这些远程服务器
#!/bin/bash
####################### Separator ########################
server_list=server_list.txt
first_time=first_time
second_time=second_time
compare_file=comparison_results.txt
####################### Separator ########################
mkdir $first_time &> /dev/null
mkdir $second_time &> /dev/null
echo > $compare_file
read -p "Please input first second or compare now: " choice
check(){
for server_name in `cat $1`
do
ssh -t $server_name "ps -A" | awk '{print $4}' > $2/$server_name
done
}
compare(){
for server_name in `cat $1`
do
echo $server_name >> $4
for process in `cat $2/$server_name`
do
grep $process $3/$server_name &> /dev/null
if [ $? -ne 0 ];then
echo $process >> $4
fi
done
echo >> $4
done
}
if [ $choice == first ];then
check $server_list $first_time
fi
if [ $choice == second ];then
check $server_list $second_time
fi
if [ $choice == compare ];then
compare $server_list $first_time $second_time $compare_file
fi
进程的真正优先级越小,则此进程则越能优先被执行
进程的真正优先级 = 进程默认优先级 + 修正值(nice 值)
从 -20 到 +19
# nice -n <correction value> <command>
或者:
# nice --adjustment=<correction value> <command>
或者:
# nice -<correction value> <command>
# nice -n 10 top
或者:
# nice --adjustment=10 top
或者:
# nice -10 top
(注意:这里的 -10 不是指负数 10 而是指正数 10)
(补充:这里以修正值为 10 启动 top 命令为例)
# top
或者:
# ps -ef
(
补充:
1) PRI 代表进程默认的优先级
2) NI 代表进程的修正值(nice 值)
3) 进程的真正优先级 = PRI + NI
4) 如果多个进程的真正优先级一样,则 root 用户的进程被优先执行
)