You can try my Nagios check, that I used some years ago on Linux and Unix.
Compared to the default check it starts an alert later and ends it sooner.
Code:
#!/bin/sh
#
# check_load5 - Nagios plugin, measures load with uptime
set -f
PATH=/bin:/usr/bin:/usr/sbin:/sbin:/usr/ucb
export PATH
print_usage() {
echo "Usage: $0 [ -r ] [ -w WLOAD ] [ -c CLOAD ]"
}
checkfloat(){
case $1 in
*[!0-9.]*|"") echo "$0: '$1' is not a floatingpoint"; print_usage; exit 3;;
esac
}
# default thresholds
warn=1.1
crit=2.2
#get args
while getopts "w:c:rh" OPT
do
case $OPT in
w) warn=$OPTARG;;
c) crit=$OPTARG;;
r)
if [ -f /proc/cpuinfo ]; then
ecores=`
awk '/^processor/ {lcpu++} /^core.id/ && s[$NF]++==0 {core++} /^physical id/ && t[$NF]++==0 {phys++} END {c=phys*core; print c?c*(lcpu/c)^0.25:lcpu+0}' /proc/cpuinfo
`
elif [ -x /usr/sbin/psrinfo ]; then
ecores=`
kstat -m cpu_info |
nawk '$2=="on-line" {lcpu++} ($1=="core_id" && s[$2]++==0) {c1++} ($1=="pg_id" && t[$2]++==0) {c2++} END {c=(c2==0||(c1&&c1<=c2))?c1:c2; print c?c*(lcpu/c)^0.25:lcpu+0}'
`
elif [ -x /usr/bin/lparstat ]; then
lcpu=`lparstat | sed -n 's/.*lcpu=\([0-9]*\).*/\1/p'`
ecores=`lsdev -c processor | awk 'END {print NR?NR*(lcpu/NR)^0.25:lcpu+0}' lcpu="$lcpu"`
else
ecores=1
fi
;;
h)
echo "This plugin tests the current system load average.
"
print_usage
echo "
Options:
-w WLOAD
Exit with WARNING status if all load average exceeds WLOAD
-c CLOAD
Exit with CRITICAL status if all load average exceeds CLOAD
the load average format is the same used by 'uptime' and 'w'
-r
Relative load, per CPU. Divide by the number of effective CPUs"
exit 3
;;
*) print_usage; exit 3;;
esac
done
if [ $OPTIND -le $# ]; then
print_usage; exit 3
fi
case $ecores in
""|0)
if [ -x /usr/sbin/ioscan ]; then
ecores=`/usr/sbin/ioscan -k -C processor | awk '$1~/^[0-9]/ {c++} END {print c?1/c:1}'`
else
ecores=1
fi
;;
esac
checkfloat "$warn"
checkfloat "$crit"
uptime | tr -d ',' | awk '
{
v1=$(NF-2)/cores; v2=$(NF-1)/cores; v3=$NF/cores
msg=sprintf ("- load average: %4.2f, %4.2f, %4.2f",v1,v2,v3)
graph=sprintf ("|load5=%5.3f;%5.3f;%5.3f;0;",v2,w,c)
}
(v1>c && v2>c && v3>c) { print "CRITICAL",msg graph; exit 2 }
(v1>w && v2>w && v3>w) { print "WARNING",msg graph; exit 1 }
{ print "OK",msg graph; exit 0 }
' w="$warn" c="$crit" cores=$ecores