The LDAPBuild Process distribute the <buildhome>Directory-Info.com/config/ndscheck.sh file to /usr/bin/ndscheck.sh
Typical script as implemented is shown below.
#!/bin/bash
#:ndscheck.sh
# NOTE: All shell values cleared at this point
# Modification history:
# 2/18/2003-- Created
# 3/10/2003-- Fixed use of mailx
# 3/13/2003-- Tweaked logging and notification
# 3/14/2003-- Tweaked notification some more
# 3/21/2003-- Handle and alert on recurring restarts
# Fixed logging on some types of alerts
# Combined NOTIFY and URGENT messages into a single mailx command
# Check for previous run of ndscheck.sh and send URGENT alert
# if found
# Finalized logic on notification
# 4/4/2003 -- Changed checkAction for eDirectory to use eDirAutoStart
# 8/11/2003-- Commented out check for '.dsbackup' file
# 11/13/2003--Modified for 8.7.1 -
# Added ports for SSL on HTTPSTACK
# Commented out start of ndsimonitor and pki as these are loaded via the
# /usr/lib/nds-modules/ndsmodules.conf file jim@willeke.com
# 12/03/2003--Modified for changes in ndsenv and ndeenv_functions
# 1/9/2004 -- Changed reference to function 'eDirAutoStart' to 'f_edirautostart'
# Fixed load location for .ndsenv*
# 2005/6/7 -- msgtest Pass as a parameter and we will send a test message and exit
#
# netstat -n -a | grep LISTEN
# Look for LISTEN ports for the following services:
# NDS:524:'/etc/init.d/nds stop; /etc/init.d/nds start'
# LDAP:636:'/etc/init.d/nds stop; /etc/init.d/nds start'
# iMonitor:8389:'$bindir/ndsimonitor -u; $bindir/ndsimonitor -l'
# DXMLDriver:16384:'dxmlconfig command -t $TREENAME `cat /var/nds/.dsbackup` -d cn=B1ENTLoopback.cn=$SERVERNAME-driverset.ou=DirXML.$BaseDNdot -c start cn=$SERVERNAME-DSbackup.ou=Administration.$BaseDNdot'
#
# dxmlconfig command -t $TREENAME `cat /var/nds/.dsbackup` -d cn=B1ENTtoCCSAD.cn=$SERVERNAME-driverset.ou=DirXML.$BaseDNdot -c start cn=$SERVERNAME-DSbackup.ou=Administration.$BaseDNdot
#
# To pull driver names and their Authentication Context info (auto-detection of DirXML driver info):
# ice -v -o -SLDAP -L/var/nds/$TREENAME.der -dcn=$SERVERNAME-DSbackup,ou=Administration,$BaseDN -w`cat /var/nds/.dsbackup|awk -F" " '{print $2}'` -bou=DirXML,$BaseDN -F"objectClass=DirXML-Driver" -DDELIM -f /tmp/tmp.dxmlcheck.csv -tdn,cn,DirXML-ShimAuthServer
#
# To get just the Publisher port from the attribute
# echo "ndstest1.security.[Directory-Info.com].net:9192:ino0s701.svr.[Directory-Info.com].net:9292"|awk -F":" '{print $4}'
# Read eDirectory installation variables and subroutines
if [ -f /var/nds/.ndsenv ]
then
. /var/nds/.ndsenv_functions
. /var/nds/.ndsenv
else
printf "\nMissing /var/nds/.ndsenv -- cannot run!\n"
exit 1
fi
# Read variables specific to bash shell; defines what processes to check
if [ -f /var/nds/.ndsenv.bash ]
then
. /var/nds/.ndsenv.bash
else
printf "\nMissing /var/nds/.ndsenv.bash -- cannot run!\n"
exit 1
fi
# Because automatic detection of DirXML drivers is not implemented, this is not needed:
# ! -f /var/nds/$TREENAME.der -o
#if [ ! -f /var/nds/.dsbackup ]; then
# printf "\nSystem not PREPared for ndsbackup.sh.\n"
# exit 1
#fi
######################################################################
handleMESSAGE()
{
if test -f $ndscheckAlertFile; then
if [ "$NEWALERT" = "NO" ] && [ "$STATUS_COUNT" -le $ndscheckMAX_STATUS_COUNT ]; then
f_write_and_log "${MESSAGE}${DATE:0:4},${DATE:4:2},${DATE:6:2},`date '+%H:%M:%S'`,Alert already sent; new alert will not be sent.#STATUS_COUNT=$STATUS_COUNT"
printf "${MESSAGE}\nURGENT=${URGENT}\nRECURRENT=${RECURRENT}">$ndscheckAlertFile
echo $STATUS_COUNT>$ndsfailCountFile
elif [ "$NEWALERT" = "NO" ] && [ "$STATUS_COUNT" -gt $ndscheckMAX_STATUS_COUNT ]; then
printf "\n`date`\n" > $EMAIL_BODY
printf "\n${MESSAGE}" >> $EMAIL_BODY
if [ "$URGENT"="YES" ]; then
f_write_and_log "${MESSAGE}${DATE:0:4},${DATE:4:2},${DATE:6:2},`date '+%H:%M:%S'`,Wait expired; resending URGENT alert."
$bindir/mailx -s"URGENT -- ${SUBJECT}" $EMAIL_URGENT $EMAIL_NOTIFY < $EMAIL_BODY
else
f_write_and_log "${MESSAGE}${DATE:0:4},${DATE:4:2},${DATE:6:2},`date '+%H:%M:%S'`,Wait expired; resending NOTIFY alert."
$bindir/mailx -s"NOTIFY -- ${SUBJECT}" $EMAIL_NOTIFY < $EMAIL_BODY
fi
rm -f $EMAIL_BODY
printf "${MESSAGE}\nURGENT=${URGENT}\nRECURRENT=${RECURRENT}">$ndscheckAlertFile
echo "1">$ndsfailCountFile
fi
if [ "$NEWALERT" = "YES" ]; then
#printf "\n---- Original message ----\n"
#printf "`cat $ndscheckAlertFile`"
#printf "\n---- New message ----\n"
#printf "${MESSAGE}"
printf "\n`date`\n" > $EMAIL_BODY
printf "\n${MESSAGE}" >> $EMAIL_BODY
if [ "$URGENT"="YES" ]; then
f_write_and_log "${MESSAGE}${DATE:0:4},${DATE:4:2},${DATE:6:2},`date '+%H:%M:%S'`,Alert already sent but new URGENT alert situation detected."
$bindir/mailx -s"URGENT -- ${SUBJECT}" $EMAIL_URGENT $EMAIL_NOTIFY < $EMAIL_BODY
else
f_write_and_log "${MESSAGE}${DATE:0:4},${DATE:4:2},${DATE:6:2},`date '+%H:%M:%S'`,Alert already sent but new NOTIFY alert situation detected."
$bindir/mailx -s"NOTIFY -- ${SUBJECT}" $EMAIL_NOTIFY < $EMAIL_BODY
fi
rm -f $EMAIL_BODY
printf "${MESSAGE}\nURGENT=${URGENT}\nRECURRENT=${RECURRENT}">$ndscheckAlertFile
echo "1">$ndsfailCountFile
fi
fi
if test ! -f $ndscheckAlertFile; then
printf "\n`date`\n" > $EMAIL_BODY
printf "\n${MESSAGE}" >> $EMAIL_BODY
if [ "$URGENT" = "YES" ]; then
f_write_and_log "${MESSAGE}${DATE:0:4},${DATE:4:2},${DATE:6:2},`date '+%H:%M:%S'`,Alert message URGENT being sent."
$bindir/mailx -s"URGENT -- ${SUBJECT}" $EMAIL_URGENT $EMAIL_NOTIFY < $EMAIL_BODY
else
f_write_and_log "${MESSAGE}${DATE:0:4},${DATE:4:2},${DATE:6:2},`date '+%H:%M:%S'`,Alert message NOTIFY being sent."
$bindir/mailx -s"NOTIFY -- ${SUBJECT}" $EMAIL_NOTIFY < $EMAIL_BODY
fi
rm -f $EMAIL_BODY
printf "${MESSAGE}\nURGENT=${URGENT}\nRECURRENT=${RECURRENT}">$ndscheckAlertFile
echo "1">$ndsfailCountFile
fi
}
##############################################################
# Override ndsenv variables
##############################################################
# The following variables should be set in /var/nds/.ndsenv; if these aren't set
# for some reason, initialize them to some good default values
#
# Define maintenance window (start/end time); ndscheck.sh will not process
# during this window. In the form of "hhmmss". Defined in ndsenv
sMaintTime=${sMaintTime:="010000"}
eMaintTime=${eMaintTime:="013000"}
# File location to set flag -- ignores down condition during mainteance Defined in ndsenv
ndscheckMaintFlag=${ndscheckMaintFlag:=/tmp/nondscheck}
# Maximum polling internvals to ignore a down condition Defined in ndsenv
ndscheckMAX_STATUS_COUNT=${ndscheckMAX_STATUS_COUNT:=6}
# Define local environment variables
EMAIL_BODY=${EMAIL_BODY:=/tmp/ndscheckMail.tmp} # Temp file to hold email message
ndsfailCountFile=${ndsfailCountFile:=/tmp/ndsfailCount.tmp} # Temp file to hold counter for successive failures detected
ndsrestartCountFile=${ndsrestartCountFile:=/tmp/ndsrestartCount.tmp} # Temp file to hold counter for successive restarts
ndscheckAlertFile=${ndscheckAlertFile:=/tmp/ndscheckAlert.tmp} # Temp file to keep track of what services are down from poll to poll
STATUS_COUNT=0
NEWALERT="NO"
MESSAGE=''
OLDMESSAGE=''
SUBJECT="Subject: ${SERVERNAME} eDirectory status alert"
i_test="$1"
if [ "$i_test" = "msgtest" ]
then
f_messagesend
exit 0
fi
PATH=$PATH:/bin:/usr/local/bin
# Permanent log to keep 90 days worth of ndscheck status messages
log_file=/var/nds/ndscheck.log
# Set notification flags
NOTIFY="NO"
if test -f $ndscheckAlertFile; then
eval `grep "URGENT=" $ndscheckAlertFile`
eval `grep "RECURRENT=" $ndscheckAlertFile`
fi
URGENT=${URGENT:="NO"}
RECURRENT=${RECURRENT:="NO"}
if test -f $ndsfailCountFile; then
STATUS_COUNT=`cat $ndsfailCountFile`
fi
let "STATUS_COUNT = $STATUS_COUNT + 1"
# Check for a previous run of this script before continuing
if [ -f /tmp/ndscheck.pid ]; then
pid=`cat /tmp/ndscheck.pid`
if [ -n "`ps -fp $pid | grep -v "PPID"`" ]; then
MESSAGE="${MESSAGE}${DATE:0:4},${DATE:4:2},${DATE:6:2},`date '+%H:%M:%S'`,Previous $0 still running; please check server#Current PID=$$#Old PID=`cat /tmp/ndscheck.pid`\n"
NOTIFY="YES"; URGENT="YES"
if [ -f $ndscheckAlertFile ] && [ -z "`grep "Previous $0 still running" $ndscheckAlertFile`" ]; then
NEWALERT="YES"
fi
handleMESSAGE
exit 0
fi
fi
echo "$$" > /tmp/ndscheck.pid
if [[ ${DATE:8}00 > ${sMaintTime} && ${DATE:8}00 < ${eMaintTime} ]]; then
f_write_and_log "${DATE:0:4},${DATE:4:2},${DATE:6:2},`date '+%H:%M:%S'`,Scheduled maintenance window; exiting without action."
# Clear PID file
rm -f /tmp/ndscheck.pid
exit 0
fi
# Check for maintenance flag (ignore down condition temporarily)
if [ -f $ndscheckMaintFlag ]; then
checkDelay=`cat $ndscheckMaintFlag`
let "checkDelay = $checkDelay - 1"
if [ $checkDelay -le 0 ]; then
rm -f $ndscheckMaintFlag
f_write_and_log "${DATE:0:4},${DATE:4:2},${DATE:6:2},`date '+%H:%M:%S'`,manual maintenance in progress (0 left); exiting."
exit 0
else
# Never allow services to be down for more than 6 polling cycles (~1 hour)
if [ $checkDelay -gt $ndscheckMAX_STATUS_COUNT ]; then
checkDelay=$ndscheckMAX_STATUS_COUNT
fi
echo "$checkDelay">$ndscheckMaintFlag
f_write_and_log "${DATE:0:4},${DATE:4:2},${DATE:6:2},`date '+%H:%M:%S'`,manual maintenance in progress ($checkDelay left); exiting."
# Clear PID file
rm -f /tmp/ndscheck.pid
exit 0
fi
fi
# Define a default set of arrays if they haven't been setup for this server
if [ -z "$checkName" ]; then
checkName=(NDS LDAPS iMonitor)
# checkPort=(.524.*LISTEN .636.*LISTEN .8389.*LISTEN) JSW
checkPort=(.524.*LISTEN .636.*LISTEN .8389.*LISTEN .8636.*LISTEN)
checkAction=( \
"$SUDO /etc/init.d/nds stop; sleep 30; f_edirautostart; $bindir/ndsstat" \
# These services are restarted by 8.7.1 in /usr/lib/nds-modules/ndsmodules.conf
# "$SUDO $sbindir/npki -u; sleep 10; $SUDO $sbindir/npki -l" \
# "$SUDO $bindir/ndsimonitor -u; sleep 10; $SUDO $bindir/ndsimonitor -l" \
)
fi
# Run full check of all monitored ports
checkCount=${#checkName[@]}
index=0
# Loop through all services defined in the array
while [ $index -lt $checkCount ]; do
netstat -n -a | grep ${checkPort[$index]}>/dev/null 2>&1; es=$?
if [ $es -ne 0 ]; then
# Attempt corrective action and keep the output in a variable
result="`(eval ${checkAction[$index]}) 2>&1 | sed -e :a -e '$!N;s/\n/#/;ta' -e 'P;D'`"
MESSAGE="${MESSAGE}${DATE:0:4},${DATE:4:2},${DATE:6:2},`date '+%H:%M:%S'`,${checkName[$index]},${checkPort[$index]},Restart message=\"${result}\"\n"
if [ -f $ndscheckAlertFile ] && [ -z "`grep "${checkName[$index]},${checkPort[$index]}" $ndscheckAlertFile`" ]; then
NEWALERT="YES"
fi
# Wait up to 10 additional seconds before giving up on restart attempt
RETRY=10
netstat -n -a | grep ${checkPort[$index]}>/dev/null 2>&1; es=$?
while [ $es -ne 0 -a $RETRY -gt 0 ]; do
sleep 1
netstat -n -a | grep ${checkPort[$index]}>/dev/null 2>&1; es=$?
let "RETRY = $RETRY - 1"
done
# Check the result of the restart attempt
if [ $es -ne 0 ]; then
MESSAGE="${MESSAGE}${DATE:0:4},${DATE:4:2},${DATE:6:2},`date '+%H:%M:%S'`,${checkName[$index]},${checkPort[$index]},Restart failed\n"
NOTIFY="YES"; URGENT="YES"
else
# Service restarted OK
MESSAGE="${MESSAGE}${DATE:0:4},${DATE:4:2},${DATE:6:2},`date '+%H:%M:%S'`,${checkName[$index]},${checkPort[$index]},Restart OK\n"
NOTIFY="YES"
fi
# Flag problem URGENT if service is having a chronic problem
# egrep "All OK|B1ENTLoopback,.16384.*LISTEN,.*Restart message" $log_file | tail -6 | grep "Restart message" | wc -l | tr -d " "
# Figure out the number of events that relate to this service within the last $ndscheckMAX_STATUS_COUNT polling attempts
restartCount=`egrep "All OK|${checkName[$index]},${checkPort[$index]},.*Restart message" $log_file | tail -$ndscheckMAX_STATUS_COUNT | grep "Restart message" | wc -l | tr -d " "`
# If the restarts of this service > $ndscheckMAX_STATUS_COUNT/2, send an URGENT alert
if [ `expr $restartCount + 1` -ge `expr $ndscheckMAX_STATUS_COUNT \/ 2` ]; then
MESSAGE="${MESSAGE}${DATE:0:4},${DATE:4:2},${DATE:6:2},`date '+%H:%M:%S'`,${checkName[$index]},${checkPort[$index]},Recurring problem detected#restartCount=`expr $restartCount + 1`\n"
URGENT="YES"
if [ "$RECURRENT" = "NO" ]; then
NEWALERT="YES"
fi
RECURRENT="YES"
fi
fi
let "index = $index + 1"
done
# We've checked/restarted all services, now do something with the accumulated messages
if test "$MESSAGE"; then
handleMESSAGE
# Clear PID file
rm -f /tmp/ndscheck.pid
exit 0
fi
# If we made it this far, everything must be running
f_write_and_log "${DATE:0:4},${DATE:4:2},${DATE:6:2},`date '+%H:%M:%S'`,All OK"
if test -f $ndscheckAlertFile; then
# Send an all clear notification if previous message was sent
f_write_and_log "${DATE:0:4},${DATE:4:2},${DATE:6:2},`date '+%H:%M:%S'`,Alert message CORRECTED being sent."
printf "\n`date`\n" > $EMAIL_BODY
printf "\n${DATE:0:4},${DATE:4:2},${DATE:6:2},`date '+%H:%M:%S'`,All OK">> $EMAIL_BODY
if [ -z "`cat $ndscheckAlertFile | grep URGENT=YES`" ]; then
$bindir/mailx -s"CORRECTED -- ${SUBJECT}" $EMAIL_NOTIFY < $EMAIL_BODY
else
$bindir/mailx -s"CORRECTED -- ${SUBJECT}" $EMAIL_URGENT $EMAIL_NOTIFY < $EMAIL_BODY
fi
rm -f $EMAIL_BODY
rm -f $ndscheckAlertFile
fi
if test -f $ndsfailCountFile; then
rm -f $ndsfailCountFile
fi
# Groom log file (keep up to 90 days worth of entries)
cp $log_file /tmp/ndscheck.log.$$
tail -1259 /tmp/ndscheck.log.$$>$log_file
rm -f /tmp/ndscheck.log.$$
# Clear PID file
rm -f /tmp/ndscheck.pid