This page (revision-1) was last changed on 29-Nov-2024 16:16 by UnknownAuthor

Only authorized users are allowed to rename pages.

Only authorized users are allowed to delete pages.

Page revision history

Version Date Modified Size Author Changes ... Change note

Page References

Incoming links Outgoing links

Version management

Difference between version and

At line 1 added 348 lines
%%information
This is a custom script and may not be on your system.
%%
!Primary Purpose
This script checks to see if various ports are "listening" and if not, attempts to restart the service associated with the port.
!Deployed Location
Located at /usr/bin/ndscheck.sh
The [LDAPBuild Process|LDAPBuildProcess] distribute the <buildhome>[Directory-Info.com]/config/ndscheck.sh file to /usr/bin/ndscheck.sh
!Method of Execution
The script is run via [cron.tab|NDSCron.tab]
!Logging
The script creates a log file [/var/nds/ndscheck.log|ConfigFilesNdscheck.log]
!Alerting Methods
Upon an alert, an email message is sent to the defined email groups dependent on the level of the alert.
!Sending Test Message
Calling /usr/bin/ndscheck.sh and passing "msgtest" on the commandline will send a test message to the [EMAIL_NOTIFY] group.
!Whatch out for These Items
Currently the following items are "hard-coded" within the ndscheck.sh.
* Ports that is checked by the script See Line: checkPort=(.524.*LISTEN .636.*LISTEN .8389.*LISTEN .8636.*LISTEN)
* Maintenance time window: sMaintTime and eMaintTime values are from the [Script Variables|Shared Script Variables]
Typical script as implemented is shown below.
{{{
#!/bin/bash
#:ndscheck.sh
# NOTE: All shell values cleared at this point
# Modification history:
# 2/18/2003-- Created
# 3/10/2003-- Fixed use of mailx
# 3/13/2003-- Tweaked logging and notification
# 3/14/2003-- Tweaked notification some more
# 3/21/2003-- Handle and alert on recurring restarts
# Fixed logging on some types of alerts
# Combined NOTIFY and URGENT messages into a single mailx command
# Check for previous run of ndscheck.sh and send URGENT alert
# if found
# Finalized logic on notification
# 4/4/2003 -- Changed checkAction for eDirectory to use eDirAutoStart
# 8/11/2003-- Commented out check for '.dsbackup' file
# 11/13/2003--Modified for 8.7.1 -
# Added ports for SSL on HTTPSTACK
# Commented out start of ndsimonitor and pki as these are loaded via the
# /usr/lib/nds-modules/ndsmodules.conf file jim@willeke.com
# 12/03/2003--Modified for changes in ndsenv and ndeenv_functions
# 1/9/2004 -- Changed reference to function 'eDirAutoStart' to 'f_edirautostart'
# Fixed load location for .ndsenv*
# 2005/6/7 -- msgtest Pass as a parameter and we will send a test message and exit
#
# netstat -n -a | grep LISTEN
# Look for LISTEN ports for the following services:
# NDS:524:'/etc/init.d/nds stop; /etc/init.d/nds start'
# LDAP:636:'/etc/init.d/nds stop; /etc/init.d/nds start'
# iMonitor:8389:'$bindir/ndsimonitor -u; $bindir/ndsimonitor -l'
# DXMLDriver:16384:'dxmlconfig command -t $TREENAME `cat /var/nds/.dsbackup` -d cn=B1ENTLoopback.cn=$SERVERNAME-driverset.ou=DirXML.$BaseDNdot -c start cn=$SERVERNAME-DSbackup.ou=Administration.$BaseDNdot'
#
# dxmlconfig command -t $TREENAME `cat /var/nds/.dsbackup` -d cn=B1ENTtoCCSAD.cn=$SERVERNAME-driverset.ou=DirXML.$BaseDNdot -c start cn=$SERVERNAME-DSbackup.ou=Administration.$BaseDNdot
#
# To pull driver names and their Authentication Context info (auto-detection of DirXML driver info):
# ice -v -o -SLDAP -L/var/nds/$TREENAME.der -dcn=$SERVERNAME-DSbackup,ou=Administration,$BaseDN -w`cat /var/nds/.dsbackup|awk -F" " '{print $2}'` -bou=DirXML,$BaseDN -F"objectClass=DirXML-Driver" -DDELIM -f /tmp/tmp.dxmlcheck.csv -tdn,cn,DirXML-ShimAuthServer
#
# To get just the Publisher port from the attribute
# echo "ndstest1.security.[Directory-Info.com].net:9192:ino0s701.svr.[Directory-Info.com].net:9292"|awk -F":" '{print $4}'
# Read eDirectory installation variables and subroutines
if [ -f /var/nds/.ndsenv ]
then
. /var/nds/.ndsenv_functions
. /var/nds/.ndsenv
else
printf "\nMissing /var/nds/.ndsenv -- cannot run!\n"
exit 1
fi
# Read variables specific to bash shell; defines what processes to check
if [ -f /var/nds/.ndsenv.bash ]
then
. /var/nds/.ndsenv.bash
else
printf "\nMissing /var/nds/.ndsenv.bash -- cannot run!\n"
exit 1
fi
# Because automatic detection of DirXML drivers is not implemented, this is not needed:
# ! -f /var/nds/$TREENAME.der -o
#if [ ! -f /var/nds/.dsbackup ]; then
# printf "\nSystem not PREPared for ndsbackup.sh.\n"
# exit 1
#fi
######################################################################
handleMESSAGE()
{
if test -f $ndscheckAlertFile; then
if [ "$NEWALERT" = "NO" ] && [ "$STATUS_COUNT" -le $ndscheckMAX_STATUS_COUNT ]; then
f_write_and_log "${MESSAGE}${DATE:0:4},${DATE:4:2},${DATE:6:2},`date '+%H:%M:%S'`,Alert already sent; new alert will not be sent.#STATUS_COUNT=$STATUS_COUNT"
printf "${MESSAGE}\nURGENT=${URGENT}\nRECURRENT=${RECURRENT}">$ndscheckAlertFile
echo $STATUS_COUNT>$ndsfailCountFile
elif [ "$NEWALERT" = "NO" ] && [ "$STATUS_COUNT" -gt $ndscheckMAX_STATUS_COUNT ]; then
printf "\n`date`\n" > $EMAIL_BODY
printf "\n${MESSAGE}" >> $EMAIL_BODY
if [ "$URGENT"="YES" ]; then
f_write_and_log "${MESSAGE}${DATE:0:4},${DATE:4:2},${DATE:6:2},`date '+%H:%M:%S'`,Wait expired; resending URGENT alert."
$bindir/mailx -s"URGENT -- ${SUBJECT}" $EMAIL_URGENT $EMAIL_NOTIFY < $EMAIL_BODY
else
f_write_and_log "${MESSAGE}${DATE:0:4},${DATE:4:2},${DATE:6:2},`date '+%H:%M:%S'`,Wait expired; resending NOTIFY alert."
$bindir/mailx -s"NOTIFY -- ${SUBJECT}" $EMAIL_NOTIFY < $EMAIL_BODY
fi
rm -f $EMAIL_BODY
printf "${MESSAGE}\nURGENT=${URGENT}\nRECURRENT=${RECURRENT}">$ndscheckAlertFile
echo "1">$ndsfailCountFile
fi
if [ "$NEWALERT" = "YES" ]; then
#printf "\n---- Original message ----\n"
#printf "`cat $ndscheckAlertFile`"
#printf "\n---- New message ----\n"
#printf "${MESSAGE}"
printf "\n`date`\n" > $EMAIL_BODY
printf "\n${MESSAGE}" >> $EMAIL_BODY
if [ "$URGENT"="YES" ]; then
f_write_and_log "${MESSAGE}${DATE:0:4},${DATE:4:2},${DATE:6:2},`date '+%H:%M:%S'`,Alert already sent but new URGENT alert situation detected."
$bindir/mailx -s"URGENT -- ${SUBJECT}" $EMAIL_URGENT $EMAIL_NOTIFY < $EMAIL_BODY
else
f_write_and_log "${MESSAGE}${DATE:0:4},${DATE:4:2},${DATE:6:2},`date '+%H:%M:%S'`,Alert already sent but new NOTIFY alert situation detected."
$bindir/mailx -s"NOTIFY -- ${SUBJECT}" $EMAIL_NOTIFY < $EMAIL_BODY
fi
rm -f $EMAIL_BODY
printf "${MESSAGE}\nURGENT=${URGENT}\nRECURRENT=${RECURRENT}">$ndscheckAlertFile
echo "1">$ndsfailCountFile
fi
fi
if test ! -f $ndscheckAlertFile; then
printf "\n`date`\n" > $EMAIL_BODY
printf "\n${MESSAGE}" >> $EMAIL_BODY
if [ "$URGENT" = "YES" ]; then
f_write_and_log "${MESSAGE}${DATE:0:4},${DATE:4:2},${DATE:6:2},`date '+%H:%M:%S'`,Alert message URGENT being sent."
$bindir/mailx -s"URGENT -- ${SUBJECT}" $EMAIL_URGENT $EMAIL_NOTIFY < $EMAIL_BODY
else
f_write_and_log "${MESSAGE}${DATE:0:4},${DATE:4:2},${DATE:6:2},`date '+%H:%M:%S'`,Alert message NOTIFY being sent."
$bindir/mailx -s"NOTIFY -- ${SUBJECT}" $EMAIL_NOTIFY < $EMAIL_BODY
fi
rm -f $EMAIL_BODY
printf "${MESSAGE}\nURGENT=${URGENT}\nRECURRENT=${RECURRENT}">$ndscheckAlertFile
echo "1">$ndsfailCountFile
fi
}
##############################################################
# Override ndsenv variables
##############################################################
# The following variables should be set in /var/nds/.ndsenv; if these aren't set
# for some reason, initialize them to some good default values
#
# Define maintenance window (start/end time); ndscheck.sh will not process
# during this window. In the form of "hhmmss". Defined in ndsenv
sMaintTime=${sMaintTime:="010000"}
eMaintTime=${eMaintTime:="013000"}
# File location to set flag -- ignores down condition during mainteance Defined in ndsenv
ndscheckMaintFlag=${ndscheckMaintFlag:=/tmp/nondscheck}
# Maximum polling internvals to ignore a down condition Defined in ndsenv
ndscheckMAX_STATUS_COUNT=${ndscheckMAX_STATUS_COUNT:=6}
# Define local environment variables
EMAIL_BODY=${EMAIL_BODY:=/tmp/ndscheckMail.tmp} # Temp file to hold email message
ndsfailCountFile=${ndsfailCountFile:=/tmp/ndsfailCount.tmp} # Temp file to hold counter for successive failures detected
ndsrestartCountFile=${ndsrestartCountFile:=/tmp/ndsrestartCount.tmp} # Temp file to hold counter for successive restarts
ndscheckAlertFile=${ndscheckAlertFile:=/tmp/ndscheckAlert.tmp} # Temp file to keep track of what services are down from poll to poll
STATUS_COUNT=0
NEWALERT="NO"
MESSAGE=''
OLDMESSAGE=''
SUBJECT="Subject: ${SERVERNAME} eDirectory status alert"
i_test="$1"
if [ "$i_test" = "msgtest" ]
then
f_messagesend
exit 0
fi
PATH=$PATH:/bin:/usr/local/bin
# Permanent log to keep 90 days worth of ndscheck status messages
log_file=/var/nds/ndscheck.log
# Set notification flags
NOTIFY="NO"
if test -f $ndscheckAlertFile; then
eval `grep "URGENT=" $ndscheckAlertFile`
eval `grep "RECURRENT=" $ndscheckAlertFile`
fi
URGENT=${URGENT:="NO"}
RECURRENT=${RECURRENT:="NO"}
if test -f $ndsfailCountFile; then
STATUS_COUNT=`cat $ndsfailCountFile`
fi
let "STATUS_COUNT = $STATUS_COUNT + 1"
# Check for a previous run of this script before continuing
if [ -f /tmp/ndscheck.pid ]; then
pid=`cat /tmp/ndscheck.pid`
if [ -n "`ps -fp $pid | grep -v "PPID"`" ]; then
MESSAGE="${MESSAGE}${DATE:0:4},${DATE:4:2},${DATE:6:2},`date '+%H:%M:%S'`,Previous $0 still running; please check server#Current PID=$$#Old PID=`cat /tmp/ndscheck.pid`\n"
NOTIFY="YES"; URGENT="YES"
if [ -f $ndscheckAlertFile ] && [ -z "`grep "Previous $0 still running" $ndscheckAlertFile`" ]; then
NEWALERT="YES"
fi
handleMESSAGE
exit 0
fi
fi
echo "$$" > /tmp/ndscheck.pid
if [[ ${DATE:8}00 > ${sMaintTime} && ${DATE:8}00 < ${eMaintTime} ]]; then
f_write_and_log "${DATE:0:4},${DATE:4:2},${DATE:6:2},`date '+%H:%M:%S'`,Scheduled maintenance window; exiting without action."
# Clear PID file
rm -f /tmp/ndscheck.pid
exit 0
fi
# Check for maintenance flag (ignore down condition temporarily)
if [ -f $ndscheckMaintFlag ]; then
checkDelay=`cat $ndscheckMaintFlag`
let "checkDelay = $checkDelay - 1"
if [ $checkDelay -le 0 ]; then
rm -f $ndscheckMaintFlag
f_write_and_log "${DATE:0:4},${DATE:4:2},${DATE:6:2},`date '+%H:%M:%S'`,manual maintenance in progress (0 left); exiting."
exit 0
else
# Never allow services to be down for more than 6 polling cycles (~1 hour)
if [ $checkDelay -gt $ndscheckMAX_STATUS_COUNT ]; then
checkDelay=$ndscheckMAX_STATUS_COUNT
fi
echo "$checkDelay">$ndscheckMaintFlag
f_write_and_log "${DATE:0:4},${DATE:4:2},${DATE:6:2},`date '+%H:%M:%S'`,manual maintenance in progress ($checkDelay left); exiting."
# Clear PID file
rm -f /tmp/ndscheck.pid
exit 0
fi
fi
# Define a default set of arrays if they haven't been setup for this server
if [ -z "$checkName" ]; then
checkName=(NDS LDAPS iMonitor)
# checkPort=(.524.*LISTEN .636.*LISTEN .8389.*LISTEN) JSW
checkPort=(.524.*LISTEN .636.*LISTEN .8389.*LISTEN .8636.*LISTEN)
checkAction=( \
"$SUDO /etc/init.d/nds stop; sleep 30; f_edirautostart; $bindir/ndsstat" \
# These services are restarted by 8.7.1 in /usr/lib/nds-modules/ndsmodules.conf
# "$SUDO $sbindir/npki -u; sleep 10; $SUDO $sbindir/npki -l" \
# "$SUDO $bindir/ndsimonitor -u; sleep 10; $SUDO $bindir/ndsimonitor -l" \
)
fi
# Run full check of all monitored ports
checkCount=${#checkName[@]}
index=0
# Loop through all services defined in the array
while [ $index -lt $checkCount ]; do
netstat -n -a | grep ${checkPort[$index]}>/dev/null 2>&1; es=$?
if [ $es -ne 0 ]; then
# Attempt corrective action and keep the output in a variable
result="`(eval ${checkAction[$index]}) 2>&1 | sed -e :a -e '$!N;s/\n/#/;ta' -e 'P;D'`"
MESSAGE="${MESSAGE}${DATE:0:4},${DATE:4:2},${DATE:6:2},`date '+%H:%M:%S'`,${checkName[$index]},${checkPort[$index]},Restart message=\"${result}\"\n"
if [ -f $ndscheckAlertFile ] && [ -z "`grep "${checkName[$index]},${checkPort[$index]}" $ndscheckAlertFile`" ]; then
NEWALERT="YES"
fi
# Wait up to 10 additional seconds before giving up on restart attempt
RETRY=10
netstat -n -a | grep ${checkPort[$index]}>/dev/null 2>&1; es=$?
while [ $es -ne 0 -a $RETRY -gt 0 ]; do
sleep 1
netstat -n -a | grep ${checkPort[$index]}>/dev/null 2>&1; es=$?
let "RETRY = $RETRY - 1"
done
# Check the result of the restart attempt
if [ $es -ne 0 ]; then
MESSAGE="${MESSAGE}${DATE:0:4},${DATE:4:2},${DATE:6:2},`date '+%H:%M:%S'`,${checkName[$index]},${checkPort[$index]},Restart failed\n"
NOTIFY="YES"; URGENT="YES"
else
# Service restarted OK
MESSAGE="${MESSAGE}${DATE:0:4},${DATE:4:2},${DATE:6:2},`date '+%H:%M:%S'`,${checkName[$index]},${checkPort[$index]},Restart OK\n"
NOTIFY="YES"
fi
# Flag problem URGENT if service is having a chronic problem
# egrep "All OK|B1ENTLoopback,.16384.*LISTEN,.*Restart message" $log_file | tail -6 | grep "Restart message" | wc -l | tr -d " "
# Figure out the number of events that relate to this service within the last $ndscheckMAX_STATUS_COUNT polling attempts
restartCount=`egrep "All OK|${checkName[$index]},${checkPort[$index]},.*Restart message" $log_file | tail -$ndscheckMAX_STATUS_COUNT | grep "Restart message" | wc -l | tr -d " "`
# If the restarts of this service > $ndscheckMAX_STATUS_COUNT/2, send an URGENT alert
if [ `expr $restartCount + 1` -ge `expr $ndscheckMAX_STATUS_COUNT \/ 2` ]; then
MESSAGE="${MESSAGE}${DATE:0:4},${DATE:4:2},${DATE:6:2},`date '+%H:%M:%S'`,${checkName[$index]},${checkPort[$index]},Recurring problem detected#restartCount=`expr $restartCount + 1`\n"
URGENT="YES"
if [ "$RECURRENT" = "NO" ]; then
NEWALERT="YES"
fi
RECURRENT="YES"
fi
fi
let "index = $index + 1"
done
# We've checked/restarted all services, now do something with the accumulated messages
if test "$MESSAGE"; then
handleMESSAGE
# Clear PID file
rm -f /tmp/ndscheck.pid
exit 0
fi
# If we made it this far, everything must be running
f_write_and_log "${DATE:0:4},${DATE:4:2},${DATE:6:2},`date '+%H:%M:%S'`,All OK"
if test -f $ndscheckAlertFile; then
# Send an all clear notification if previous message was sent
f_write_and_log "${DATE:0:4},${DATE:4:2},${DATE:6:2},`date '+%H:%M:%S'`,Alert message CORRECTED being sent."
printf "\n`date`\n" > $EMAIL_BODY
printf "\n${DATE:0:4},${DATE:4:2},${DATE:6:2},`date '+%H:%M:%S'`,All OK">> $EMAIL_BODY
if [ -z "`cat $ndscheckAlertFile | grep URGENT=YES`" ]; then
$bindir/mailx -s"CORRECTED -- ${SUBJECT}" $EMAIL_NOTIFY < $EMAIL_BODY
else
$bindir/mailx -s"CORRECTED -- ${SUBJECT}" $EMAIL_URGENT $EMAIL_NOTIFY < $EMAIL_BODY
fi
rm -f $EMAIL_BODY
rm -f $ndscheckAlertFile
fi
if test -f $ndsfailCountFile; then
rm -f $ndsfailCountFile
fi
# Groom log file (keep up to 90 days worth of entries)
cp $log_file /tmp/ndscheck.log.$$
tail -1259 /tmp/ndscheck.log.$$>$log_file
rm -f /tmp/ndscheck.log.$$
# Clear PID file
rm -f /tmp/ndscheck.pid
}}}