With Nagios you can monitor almost everything and philosophy is simple.
Nagios uses plug-ins, say binary/Perl/shell script and check its returning value and according to that determines host/service state. So Nagios doesn't know and it is not interested to know what plug-in is monitoring. Here is the plug-in that checks status of all zpools in the system.
#!/usr/bin/sh #set -x # script name zpoolhealth.sh # ------------------------- # Nagios plugin : determines zpool health # Nagios plugin return values STATE_OK=0 STATE_WARNING=1 STATE_CRITICAL=2 STATE_UNKNOWN=3 STATE_DEPENDENT=4 # variables progname=`basename $0` tmpdir=/tmp okfile=${tmpdir}/${progname}.ok-zpool warnfile=${tmpdir}/${progname}.warn-zpool critfile=${tmpdir}/${progname}.crit-zpool # --- ERROR SUBROUTINE err() { echo "\n ERROR: $* \n" exit 1 } # --- END SCRIPT WITH OUTPUT endscript () { echo "${RESULT}" exit ${EXIT_STATUS} } # --- CLEANING SUBROUTINE tmp_file_cleaning () { [ -f ${okfile}.$$ ] && rm ${okfile}.$$ [ -f ${warnfile}.$$ ] && rm ${warnfile}.$$ [ -f ${critfile}.$$ ] && rm ${critfile}.$$ } # --- cleaning in case of script termination and regular exit trap tmp_file_cleaning HUP INT QUIT ABRT EXIT # ---- find zpools in the system myzpools=`zpool list -H | awk '{print $1}'` if [ "${myzpools}" = "no" ]; then echo "There is no zpool(s)"; exit 3 fi #echo My zpools: ${myzpools} # --- get zpool health and create temp files for zp in ${myzpools} do health=`zpool list -H ${zp} | awk '{print $6}'` if [ ${health} = ONLINE ] then printf "${zp} " >> ${okfile}.$$ elif [ ${health} = DEGRADED ] then printf "${zp} " >> ${warnfile}.$$ else printf "${zp} " >> ${critfile}.$$ fi done # --- check temp files and create output # all zpools are online if [ -f ${okfile}.$$ ] && [ ! -f ${warnfile}.$$ ] && [ ! -f ${critfile}.$$ ] then okpools=`cat ${okfile}.$$` RESULT="OK: ${okpools}" EXIT_STATUS="${STATE_OK}" # zpools are online, at least one is degraded and no critical ones elif [ -f ${okfile}.$$ ] && [ -f ${warnfile}.$$ ] && [ ! -f ${critfile}.$$ ] then warnpools=`cat ${warnfile}.$$` okpools=`cat ${okfile}.$$` RESULT="WARN(DEGRADED): ${warnpools} OK: ${okpools}" EXIT_STATUS="${STATE_WARNING}" # all zpools are degraded elif [ ! -f ${okfile}.$$ ] && [ -f ${warnfile}.$$ ] && [ ! -f ${critfile}.$$ ] then warnpools=`cat ${warnfile}.$$` RESULT="WARN(DEGRADED) ${warnpools}" EXIT_STATUS="${STATE_WARNING}" # there are zpools in each state elif [ -f ${okfile}.$$ ] && [ -f ${warnfile}.$$ ] && [ -f ${critfile}.$$ ] then okpools=`cat ${okfile}.$$` warnpools=`cat ${warnfile}.$$` critpools=`cat ${critfile}.$$` RESULT="CRIT(FAULT/OFFLINE/UNAVAIL): ${critpools} WARN(DEGRADED): ${warnpools} OK: ${okpools}" EXIT_STATUS="${STATE_CRITICAL}" # zpools are online and at least one is critical elif [ -f ${okfile}.$$ ] && [ ! -f ${warnfile}.$$ ] && [ -f ${critfile}.$$ ] then okpools=`cat ${okfile}.$$` critpools=`cat ${critfile}.$$` RESULT="CRIT(FAULT/OFFLINE/UNAVAIL): ${critpools} OK: ${okpools}" EXIT_STATUS="${STATE_CRITICAL}" # no online zpools, all are degraded and critical elif [ ! -f ${okfile}.$$ ] && [ -f ${warnfile}.$$ ] && [ -f ${critfile}.$$ ] then warnpools=`cat ${warnfile}.$$` critpools=`cat ${critfile}.$$` RESULT="CRIT(FAULT/OFFLINE/UNAVAIL): ${critpools} WARN(DEGRADED) ${warnpools}" EXIT_STATUS="${STATE_CRITICAL}" # all zpools are critical elif [ ! -f ${okfile}.$$ ] && [ ! -f ${warnfile}.$$ ] && [ -f ${critfile}.$$ ] then critpools=`cat ${critfile}.$$` RESULT="CRIT(FAULT/OFFLINE/UNAVAIL): ${critpools}" EXIT_STATUS="${STATE_CRITICAL}" fi # call subroutine to end script endscript |
Usually I create the README file with some info how to deploy plugin, etc.
################################################ README about Nagios plugin zpoolhealth.sh ################################################ 1. Copy plugin zpoolhealth.sh to remote host's directory /opt/csw/libexec/nagios-plugins/ 2. On the plugin, make permissions 755, owner root:bin -rwxr-xr-x 1 root bin 3516 Jan 23 10:32 zpoolhealth.sh 3. Add lines to remote host in file /opt/csw/etc/nrpe.cfg # check zpool status command[check_zpool_status]=/opt/csw/libexec/nagios-plugins/zpoolhealth.sh 4. Reset NRPE service on remote host {host}/> svcadm restart cswnrpe 5. Test how NRPE uses plugin on remote host, using CLI from Nagios machine (nagiosbox) {nagiosbox}/> /opt/csw/nagios/libexec/check_nrpe -H unixlab -c check_zpool_status OK: space.1 space0 6. Define Nagios service group on nagiosbox, file /etc/nagios/UNIX/services.cfg define servicegroup{ servicegroup_name zpool_status alias Zpool status } 7. Create service so Nagios can check the host, file /etc/nagios/UNIX/services.cfg define service{ use gen-service host_name unixlab ;first test on unixlab #hostgroup_name SUN,CC,FILESERVER ;if test ok, include others, copy plugin there service_description Zpool status servicegroups zpool_status check_command check-nrpe!check_zpool_status } 8. Refresh nagios service > svcadm refresh nagios -- Note: The script deployplugin.sh can be used to do next on multiple remote hosts: 1. copy plugin to remote host 2. backup nrpe.cfg on remote host 3. append required lines to nrpe.cfg on remote host 4. restart cswnrpe service on remote host |
So yes, if you want to deploy plugin on many machines, see this script.
#!/bin/sh #set -x # # script name deployplugin.sh # ----------------------------- # 1. copy plugin to remote host # 2. backup nrpe.cfg on remote host # 3. append required lines to nrpe.cfg on remote host # 4. restart cswnrpe service on remote host # -- error subroutine err() { echo "\n ERROR: $* \n" exit 1 } # variables backuptime=`date +%m-%d-%Y.%Hh%Mm%Ss` #time of nrpe.cfg backup nrpedir="/opt/csw/etc" nrpefile=nrpe.cfg # command to backup nrpe.cfg backupnrpe="cp -p ${nrpedir}/${nrpefile} ${nrpedir}/.${nrpefile}.${backuptime}" plugin_dest_dir="/opt/csw/libexec/nagios-plugins" # location of plugin plugin_src_dir="/etc/master/nagios.plugin/zpool_status" plugin=zpoolhealth.sh # list of hosts hostlist=' host-1 host-2 host-3 host-etc' for host in ${hostlist} do fping -q ${host} # -q = quiet if [ $? -eq 0 ]; then echo " --- OK --- Host ${host} is reachable, proceed." # check existance of directory /opt/csw/etc [ `ssh ${host} file ${nrpedir} | awk -F: '{print $2}'` = "directory" ] || \ err "\n Directory ${nrpedir} doesn't exist." # check existence of file nrpe.cfg [ "`ssh ${host} file ${nrpedir}/${nrpefile} | awk '{print $2, $3}'`" = "ascii text" ] || \ err "\n File ${nrpefile} doesn't exist." # on remote host, check existence of destination directory for plugins [ `ssh ${host} file ${plugin_dest_dir} | awk -F: '{print $2}'` = "directory" ] || \ err "\n Directory ${plugindir} doesn't exist." # copy plugin to remote host scp -p ${plugin_src_dir}/${plugin} ${host}:${plugin_dest_dir}/${plugin} || \ err "\n ${plugin_src_dir}/${plugin} can't be copied to ${host}" # backup nrpe.cfg on remote host ssh ${host} ${backupnrpe} || \ err "\n nrpe.cfg can't be backup-ed on ${host}" # add lines to the end ($) of nrpe.cfg # get file, append lines and create temp in mgmt.dc/tmp/ ssh ${host} cat ${nrpedir}/${nrpefile} | \ sed '$a\ # check zpool status' > /tmp/${nrpefile}.${host} || err "\n Can't append first line" cat /tmp/${nrpefile}.${host} | sed '$a\ command[check_zpool_status]=/opt/csw/libexec/nagios-plugins/zpoolhealth.sh' > /tmp/${nrpefile}.${host}.$$ || \ err "\n Can't append second line" mv /tmp/${nrpefile}.${host}.$$ /tmp/${nrpefile}.${host} # copy tmp file to remote host scp -p /tmp/${nrpefile}.${host} ${host}:${nrpedir}/${nrpefile} || \ err "\n Can't copy temp file /tmp/${nrpefile}.${host} to ${host}" # remove temp file rm /tmp/${nrpefile}.${host} || err "\n Can't remove temp file /tmp/${nrpefile}.${host}" # reset cswnrpe service ssh ${host} svcadm restart cswnrpe || \ err "\n The cswnrpe service can't be restarted" sleep 3 [ "`ssh ${host} svcs -H cswnrpe | awk '{print $1}'`" = "online" ] || \ echo "The cswnrpe service on ${host} is not online - check this later" else echo " ??????? Host ${host} is not reachable - check needed !" fi done exit 0 |