This is a custom script and may not be on your system.

Primary Purpose#

This script checks to see if various ports are "listening" and if not, attempts to restart the service associated with the port.

Deployed Location#

Located at /usr/bin/ndscheck.sh

The LDAPBuild Process distribute the <buildhome>Directory-Info.com/config/ndscheck.sh file to /usr/bin/ndscheck.sh

Method of Execution #

The script is run via cron.tab

Logging#

The script creates a log file /var/nds/ndscheck.log

Alerting Methods#

Upon an alert, an email message is sent to the defined email groups dependent on the level of the alert.

Sending Test Message#

Calling /usr/bin/ndscheck.sh and passing "msgtest" on the commandline will send a test message to the EMAIL_NOTIFY group.

Whatch out for These Items#

Currently the following items are "hard-coded" within the ndscheck.sh.
  • Ports that is checked by the script See Line: checkPort=(.524.*LISTEN .636.*LISTEN .8389.*LISTEN .8636.*LISTEN)
  • Maintenance time window: sMaintTime and eMaintTime values are from the Script Variables

Typical script as implemented is shown below.

#!/bin/bash
#:ndscheck.sh
# NOTE: All shell values cleared at this point
# Modification history:
# 2/18/2003-- Created
# 3/10/2003-- Fixed use of mailx
# 3/13/2003-- Tweaked logging and notification
# 3/14/2003-- Tweaked notification some more
# 3/21/2003-- Handle and alert on recurring restarts
#             Fixed logging on some types of alerts
#             Combined NOTIFY and URGENT messages into a single mailx command
#             Check for previous run of ndscheck.sh and send URGENT alert
#              if found
#             Finalized logic on notification
# 4/4/2003 -- Changed checkAction for eDirectory to use eDirAutoStart
# 8/11/2003-- Commented out check for '.dsbackup' file
# 11/13/2003--Modified for 8.7.1 -
#             Added ports for SSL on HTTPSTACK
#             Commented out start of ndsimonitor and pki as these are loaded via the
#             /usr/lib/nds-modules/ndsmodules.conf file jim@willeke.com
# 12/03/2003--Modified for changes in ndsenv and ndeenv_functions
# 1/9/2004 -- Changed reference to function 'eDirAutoStart' to 'f_edirautostart'
#             Fixed load location for .ndsenv*
# 2005/6/7 -- msgtest Pass as a parameter and we will send a test message and exit
#
# netstat -n -a | grep LISTEN
# Look for LISTEN ports for the following services:
# NDS:524:'/etc/init.d/nds stop; /etc/init.d/nds start'
# LDAP:636:'/etc/init.d/nds stop; /etc/init.d/nds start'
# iMonitor:8389:'$bindir/ndsimonitor -u; $bindir/ndsimonitor -l'
# DXMLDriver:16384:'dxmlconfig command -t $TREENAME `cat /var/nds/.dsbackup` -d cn=B1ENTLoopback.cn=$SERVERNAME-driverset.ou=DirXML.$BaseDNdot -c start cn=$SERVERNAME-DSbackup.ou=Administration.$BaseDNdot'
#
# dxmlconfig command -t $TREENAME `cat /var/nds/.dsbackup` -d cn=B1ENTtoCCSAD.cn=$SERVERNAME-driverset.ou=DirXML.$BaseDNdot -c start cn=$SERVERNAME-DSbackup.ou=Administration.$BaseDNdot
#
# To pull driver names and their Authentication Context info (auto-detection of DirXML driver info):
# ice -v -o -SLDAP -L/var/nds/$TREENAME.der -dcn=$SERVERNAME-DSbackup,ou=Administration,$BaseDN -w`cat /var/nds/.dsbackup|awk -F" " '{print $2}'` -bou=DirXML,$BaseDN -F"objectClass=DirXML-Driver" -DDELIM -f /tmp/tmp.dxmlcheck.csv -tdn,cn,DirXML-ShimAuthServer
#
# To get just the Publisher port from the attribute
# echo "ndstest1.security.[Directory-Info.com].net:9192:ino0s701.svr.[Directory-Info.com].net:9292"|awk -F":" '{print $4}'

# Read eDirectory installation variables and subroutines
if [ -f /var/nds/.ndsenv ]
then
	. /var/nds/.ndsenv_functions
  	. /var/nds/.ndsenv
else
  	printf "\nMissing /var/nds/.ndsenv -- cannot run!\n"
  	exit 1
fi

# Read variables specific to bash shell; defines what processes to check
if [ -f /var/nds/.ndsenv.bash ]
then
	. /var/nds/.ndsenv.bash
else
	printf "\nMissing /var/nds/.ndsenv.bash -- cannot run!\n"
  	exit 1
fi



# Because automatic detection of DirXML drivers is not implemented, this is not needed:
# ! -f /var/nds/$TREENAME.der -o
#if [ ! -f /var/nds/.dsbackup ]; then
#  printf "\nSystem not PREPared for ndsbackup.sh.\n"
#  exit 1
#fi




######################################################################
handleMESSAGE()
{
  if test -f $ndscheckAlertFile; then
    if [ "$NEWALERT" = "NO" ] && [ "$STATUS_COUNT" -le $ndscheckMAX_STATUS_COUNT ]; then
      f_write_and_log "${MESSAGE}${DATE:0:4},${DATE:4:2},${DATE:6:2},`date '+%H:%M:%S'`,Alert already sent; new alert will not be sent.#STATUS_COUNT=$STATUS_COUNT"
      printf "${MESSAGE}\nURGENT=${URGENT}\nRECURRENT=${RECURRENT}">$ndscheckAlertFile
      echo $STATUS_COUNT>$ndsfailCountFile
    elif [ "$NEWALERT" = "NO" ] && [ "$STATUS_COUNT" -gt $ndscheckMAX_STATUS_COUNT ]; then
      printf "\n`date`\n"                         >  $EMAIL_BODY
      printf "\n${MESSAGE}"                       >> $EMAIL_BODY
      if [ "$URGENT"="YES" ]; then
        f_write_and_log "${MESSAGE}${DATE:0:4},${DATE:4:2},${DATE:6:2},`date '+%H:%M:%S'`,Wait expired; resending URGENT alert."
        $bindir/mailx -s"URGENT -- ${SUBJECT}" $EMAIL_URGENT $EMAIL_NOTIFY < $EMAIL_BODY
      else
        f_write_and_log "${MESSAGE}${DATE:0:4},${DATE:4:2},${DATE:6:2},`date '+%H:%M:%S'`,Wait expired; resending NOTIFY alert."
        $bindir/mailx -s"NOTIFY -- ${SUBJECT}" $EMAIL_NOTIFY < $EMAIL_BODY
      fi
      rm -f $EMAIL_BODY
      printf "${MESSAGE}\nURGENT=${URGENT}\nRECURRENT=${RECURRENT}">$ndscheckAlertFile
      echo "1">$ndsfailCountFile
    fi
    if [ "$NEWALERT" = "YES" ]; then
      #printf "\n---- Original message ----\n"
      #printf "`cat $ndscheckAlertFile`"
      #printf "\n---- New message ----\n"
      #printf "${MESSAGE}"
      printf "\n`date`\n"                         >  $EMAIL_BODY
      printf "\n${MESSAGE}"                       >> $EMAIL_BODY
      if [ "$URGENT"="YES" ]; then
        f_write_and_log "${MESSAGE}${DATE:0:4},${DATE:4:2},${DATE:6:2},`date '+%H:%M:%S'`,Alert already sent but new URGENT alert situation detected."
        $bindir/mailx -s"URGENT -- ${SUBJECT}" $EMAIL_URGENT $EMAIL_NOTIFY < $EMAIL_BODY
      else
        f_write_and_log "${MESSAGE}${DATE:0:4},${DATE:4:2},${DATE:6:2},`date '+%H:%M:%S'`,Alert already sent but new NOTIFY alert situation detected."
        $bindir/mailx -s"NOTIFY -- ${SUBJECT}" $EMAIL_NOTIFY < $EMAIL_BODY
      fi
      rm -f $EMAIL_BODY
      printf "${MESSAGE}\nURGENT=${URGENT}\nRECURRENT=${RECURRENT}">$ndscheckAlertFile
      echo "1">$ndsfailCountFile
    fi
  fi
  if test ! -f $ndscheckAlertFile; then
    printf "\n`date`\n"                         >  $EMAIL_BODY
    printf "\n${MESSAGE}"                       >> $EMAIL_BODY
    if [ "$URGENT" = "YES" ]; then
      f_write_and_log "${MESSAGE}${DATE:0:4},${DATE:4:2},${DATE:6:2},`date '+%H:%M:%S'`,Alert message URGENT being sent."
      $bindir/mailx -s"URGENT -- ${SUBJECT}" $EMAIL_URGENT $EMAIL_NOTIFY < $EMAIL_BODY
    else
      f_write_and_log "${MESSAGE}${DATE:0:4},${DATE:4:2},${DATE:6:2},`date '+%H:%M:%S'`,Alert message NOTIFY being sent."
      $bindir/mailx -s"NOTIFY -- ${SUBJECT}" $EMAIL_NOTIFY < $EMAIL_BODY
    fi
    rm -f $EMAIL_BODY
    printf "${MESSAGE}\nURGENT=${URGENT}\nRECURRENT=${RECURRENT}">$ndscheckAlertFile
    echo "1">$ndsfailCountFile
  fi
}

##############################################################
# Override ndsenv variables
##############################################################
# The following variables should be set in /var/nds/.ndsenv; if these aren't set
# for some reason, initialize them to some good default values
#
# Define maintenance window (start/end time); ndscheck.sh will not process
# during this window.  In the form of "hhmmss". Defined in ndsenv
sMaintTime=${sMaintTime:="010000"}
eMaintTime=${eMaintTime:="013000"}
# File location to set flag -- ignores down condition during mainteance Defined in ndsenv
 ndscheckMaintFlag=${ndscheckMaintFlag:=/tmp/nondscheck}
# Maximum polling internvals to ignore a down condition Defined in ndsenv
ndscheckMAX_STATUS_COUNT=${ndscheckMAX_STATUS_COUNT:=6}
# Define local environment variables
EMAIL_BODY=${EMAIL_BODY:=/tmp/ndscheckMail.tmp}            # Temp file to hold email message
ndsfailCountFile=${ndsfailCountFile:=/tmp/ndsfailCount.tmp}       # Temp file to hold counter for successive failures detected
ndsrestartCountFile=${ndsrestartCountFile:=/tmp/ndsrestartCount.tmp}  # Temp file to hold counter for successive restarts
ndscheckAlertFile=${ndscheckAlertFile:=/tmp/ndscheckAlert.tmp}    # Temp file to keep track of what services are down from poll to poll
STATUS_COUNT=0
NEWALERT="NO"
MESSAGE=''
OLDMESSAGE=''
SUBJECT="Subject: ${SERVERNAME} eDirectory status alert"
i_test="$1"
if [ "$i_test" = "msgtest" ]
then
	f_messagesend
exit 0
fi
PATH=$PATH:/bin:/usr/local/bin
# Permanent log to keep 90 days worth of ndscheck status messages
log_file=/var/nds/ndscheck.log

# Set notification flags
NOTIFY="NO"
if test -f $ndscheckAlertFile; then
  eval `grep "URGENT=" $ndscheckAlertFile`
  eval `grep "RECURRENT=" $ndscheckAlertFile`
fi
URGENT=${URGENT:="NO"}
RECURRENT=${RECURRENT:="NO"}

if test -f $ndsfailCountFile; then
  STATUS_COUNT=`cat $ndsfailCountFile`
fi
let "STATUS_COUNT = $STATUS_COUNT + 1"

# Check for a previous run of this script before continuing
if [ -f /tmp/ndscheck.pid ]; then
  pid=`cat /tmp/ndscheck.pid`
  if [ -n "`ps -fp $pid | grep -v "PPID"`" ]; then
    MESSAGE="${MESSAGE}${DATE:0:4},${DATE:4:2},${DATE:6:2},`date '+%H:%M:%S'`,Previous $0 still running; please check server#Current PID=$$#Old PID=`cat /tmp/ndscheck.pid`\n"
    NOTIFY="YES"; URGENT="YES"
    if [ -f $ndscheckAlertFile ] && [ -z "`grep "Previous $0 still running" $ndscheckAlertFile`" ]; then
      NEWALERT="YES"
    fi
    handleMESSAGE
    exit 0
  fi
fi

echo "$$" > /tmp/ndscheck.pid

if [[ ${DATE:8}00 > ${sMaintTime} && ${DATE:8}00 < ${eMaintTime} ]]; then
  f_write_and_log "${DATE:0:4},${DATE:4:2},${DATE:6:2},`date '+%H:%M:%S'`,Scheduled maintenance window; exiting without action."
  # Clear PID file
  rm -f /tmp/ndscheck.pid
  exit 0
fi

# Check for maintenance flag (ignore down condition temporarily)
if [ -f $ndscheckMaintFlag ]; then
  checkDelay=`cat $ndscheckMaintFlag`
  let "checkDelay = $checkDelay - 1"
  if [ $checkDelay -le 0 ]; then
    rm -f $ndscheckMaintFlag
    f_write_and_log "${DATE:0:4},${DATE:4:2},${DATE:6:2},`date '+%H:%M:%S'`,manual maintenance in progress (0 left); exiting."
    exit 0
  else
    # Never allow services to be down for more than 6 polling cycles (~1 hour)
    if [ $checkDelay -gt $ndscheckMAX_STATUS_COUNT ]; then
      checkDelay=$ndscheckMAX_STATUS_COUNT
    fi
    echo "$checkDelay">$ndscheckMaintFlag
    f_write_and_log "${DATE:0:4},${DATE:4:2},${DATE:6:2},`date '+%H:%M:%S'`,manual maintenance in progress ($checkDelay left); exiting."
    # Clear PID file
    rm -f /tmp/ndscheck.pid
    exit 0
  fi
fi

# Define a default set of arrays if they haven't been setup for this server
if [ -z "$checkName" ]; then
  checkName=(NDS LDAPS iMonitor)
  # checkPort=(.524.*LISTEN .636.*LISTEN .8389.*LISTEN) JSW
  checkPort=(.524.*LISTEN .636.*LISTEN .8389.*LISTEN .8636.*LISTEN)
  checkAction=( \
    "$SUDO /etc/init.d/nds stop; sleep 30; f_edirautostart; $bindir/ndsstat" \
    # These services are restarted by 8.7.1 in /usr/lib/nds-modules/ndsmodules.conf
    # "$SUDO $sbindir/npki -u; sleep 10; $SUDO $sbindir/npki -l" \
    # "$SUDO $bindir/ndsimonitor -u; sleep 10; $SUDO $bindir/ndsimonitor -l" \
  )
fi

# Run full check of all monitored ports
checkCount=${#checkName[@]}
index=0
# Loop through all services defined in the array
while [ $index -lt $checkCount ]; do
  netstat -n -a | grep ${checkPort[$index]}>/dev/null 2>&1; es=$?
  if [ $es -ne 0 ]; then
    # Attempt corrective action and keep the output in a variable
    result="`(eval ${checkAction[$index]}) 2>&1 | sed -e :a -e '$!N;s/\n/#/;ta' -e 'P;D'`"
    MESSAGE="${MESSAGE}${DATE:0:4},${DATE:4:2},${DATE:6:2},`date '+%H:%M:%S'`,${checkName[$index]},${checkPort[$index]},Restart message=\"${result}\"\n"
    if [ -f $ndscheckAlertFile ] && [ -z "`grep "${checkName[$index]},${checkPort[$index]}" $ndscheckAlertFile`" ]; then
      NEWALERT="YES"
    fi
    # Wait up to 10 additional seconds before giving up on restart attempt
    RETRY=10
    netstat -n -a | grep ${checkPort[$index]}>/dev/null 2>&1; es=$?
    while [ $es -ne 0 -a $RETRY -gt 0 ]; do
      sleep 1
      netstat -n -a | grep ${checkPort[$index]}>/dev/null 2>&1; es=$?
      let "RETRY = $RETRY - 1"
    done
    # Check the result of the restart attempt
    if [ $es -ne 0 ]; then
      MESSAGE="${MESSAGE}${DATE:0:4},${DATE:4:2},${DATE:6:2},`date '+%H:%M:%S'`,${checkName[$index]},${checkPort[$index]},Restart failed\n"
      NOTIFY="YES"; URGENT="YES"
    else
      # Service restarted OK
      MESSAGE="${MESSAGE}${DATE:0:4},${DATE:4:2},${DATE:6:2},`date '+%H:%M:%S'`,${checkName[$index]},${checkPort[$index]},Restart OK\n"
      NOTIFY="YES"
    fi
    # Flag problem URGENT if service is having a chronic problem
    # egrep "All OK|B1ENTLoopback,.16384.*LISTEN,.*Restart message" $log_file | tail -6 | grep "Restart message" | wc -l | tr -d " "
    # Figure out the number of events that relate to this service within the last $ndscheckMAX_STATUS_COUNT polling attempts
    restartCount=`egrep "All OK|${checkName[$index]},${checkPort[$index]},.*Restart message" $log_file | tail -$ndscheckMAX_STATUS_COUNT | grep "Restart message" | wc -l | tr -d " "`
    # If the restarts of this service > $ndscheckMAX_STATUS_COUNT/2, send an URGENT alert
    if [ `expr $restartCount + 1` -ge `expr $ndscheckMAX_STATUS_COUNT \/ 2`  ]; then
      MESSAGE="${MESSAGE}${DATE:0:4},${DATE:4:2},${DATE:6:2},`date '+%H:%M:%S'`,${checkName[$index]},${checkPort[$index]},Recurring problem detected#restartCount=`expr $restartCount + 1`\n"
      URGENT="YES"
      if [ "$RECURRENT" = "NO" ]; then
        NEWALERT="YES"
      fi
      RECURRENT="YES"
    fi
  fi
  let "index = $index + 1"
done

# We've checked/restarted all services, now do something with the accumulated messages
if test "$MESSAGE"; then
  handleMESSAGE
  # Clear PID file
  rm -f /tmp/ndscheck.pid
  exit 0
fi

# If we made it this far, everything must be running
f_write_and_log "${DATE:0:4},${DATE:4:2},${DATE:6:2},`date '+%H:%M:%S'`,All OK"
if test -f $ndscheckAlertFile; then
  # Send an all clear notification if previous message was sent
  f_write_and_log "${DATE:0:4},${DATE:4:2},${DATE:6:2},`date '+%H:%M:%S'`,Alert message CORRECTED being sent."
  printf "\n`date`\n"                         >  $EMAIL_BODY
  printf "\n${DATE:0:4},${DATE:4:2},${DATE:6:2},`date '+%H:%M:%S'`,All OK">> $EMAIL_BODY
  if [ -z "`cat $ndscheckAlertFile | grep URGENT=YES`" ]; then
    $bindir/mailx -s"CORRECTED -- ${SUBJECT}" $EMAIL_NOTIFY < $EMAIL_BODY
  else
    $bindir/mailx -s"CORRECTED -- ${SUBJECT}" $EMAIL_URGENT $EMAIL_NOTIFY < $EMAIL_BODY
  fi
  rm -f $EMAIL_BODY
  rm -f $ndscheckAlertFile
fi
if test -f $ndsfailCountFile; then
  rm -f $ndsfailCountFile
fi

# Groom log file (keep up to 90 days worth of entries)
cp $log_file /tmp/ndscheck.log.$$
tail -1259 /tmp/ndscheck.log.$$>$log_file
rm -f /tmp/ndscheck.log.$$

# Clear PID file
rm -f /tmp/ndscheck.pid

Add new attachment

Only authorized users are allowed to upload new attachments.
« This page (revision-3) was last changed on 26-Jul-2011 18:12 by jim