#!/bin/sh
#
#
# heartbeat     Start high-availability services
#
# Author:       Alan Robertson	<alanr@unix.sh>
# License:      GNU General Public License (GPL)
#
#		This script works correctly under SuSE, Debian,
#		Conectiva, Red Hat and a few others.  Please let me know if it
#		doesn't work under your distribution, and we'll fix it.
#		We don't hate anyone, and like for everyone to use
#		our software, no matter what OS or distribution you're using.
#
# chkconfig: - 75 05
# description: Startup script high-availability services.
# processname: heartbeat
# pidfile: /var/run/heartbeat.pid
# config: /etc/ha.d/ha.cf
#

### BEGIN INIT INFO
# Description: heartbeat is a basic high-availability subsystem.
#	It will start services at initialization, and when machines go up
#	or down.  This version will also perform IP address takeover using
#	gratuitous ARPs.  It works correctly for a 2-node configuration,
#	and is extensible to larger configurations.
#	
#	It implements the following kinds of heartbeats:
#		- Bidirectional Serial Rings ("raw" serial ports)
#		- UDP/IP broadcast (ethernet, etc)
#		- UDP/IP multicast (ethernet, etc)
#		- Unicast heartbeats
#		- "ping" heartbeats (for routers, switches, etc.)
#		(to be used for breaking ties in 2-node systems
#		 and monitoring networking availability)
# Short-Description: High-availability services.
# Provides:		heartbeat HA
# Required-Start:	$remote_fs $network $time $syslog
# Should-Start:		openhpid
# Required-Stop:	$remote_fs $network $time $syslog
# Should-Stop:		openhpid
# Default-Start:	2 3 4 5
# Default-Stop:		0 1 6
### END INIT INFO

test -r /lib/lsb/init-functions && . /lib/lsb/init-functions

HEARTBEAT=/usr/libexec/heartbeat/heartbeat

[ -r /etc/sysconfig/heartbeat ] && . /etc/sysconfig/heartbeat
[ -r /etc/sysconfig/pacemaker ] && . /etc/sysconfig/pacemaker
[ -r /etc/sysconfig/sbd ] && . /etc/sysconfig/sbd

HA_DIR=/etc/ha.d; export HA_DIR
CONFIG=$HA_DIR/ha.cf
. $HA_DIR/shellfuncs

LOCKDIR=/var/lock/subsys
RUNDIR=/var/run

LRMADMIN=/usr/sbin/lrmadmin
SBD_BIN=/usr/sbin/sbd

StartSBD() {
	test -x $SBD_BIN || return
	test -z "$SBD_DEVICE" && return

	# Would be nice if sbd would
	# cleanly handle double start internally
	ps -eo cmd | grep "^$SBD_BIN -d" > /dev/null && return

	echo -n "Starting SBD - "
	local ERROR
	if ! ERROR=`$SBD_BIN -d $SBD_DEVICE -D $SBD_OPTS watch 2>&1` ; then
		echo "SBD failed to start; aborting."
		if [ ! -z "$ERROR" ]; then
			echo
			echo "$ERROR"
		fi 
		exit 1
	fi
}

StopSBD() {
	test -x $SBD_BIN || return
	test -z "$SBD_DEVICE" && return

	echo -n "Stopping SBD - "
	local ERROR
	if ! ERROR=`$SBD_BIN -d $SBD_DEVICE -D $SBD_OPTS message LOCAL exit 2>&1` ; then
		echo "SBD failed to stop; aborting."
		if [ ! -z "$ERROR" ]; then
			echo
			echo "$ERROR"
		fi 
		exit 1
	fi
}

echo_failure() {
    echo " Heartbeat failure [rc=$1]. $rc_failed"
    return $1
}

echo_success() {
    echo "$rc_done"
}

if
  [ -r /etc/SuSE-release ]
then
  # rc.status is new since SuSE 7.0
  [ -r /etc/rc.status ] && . /etc/rc.status
  [ -r /etc/rc.config ] && . /etc/rc.config

  # Determine the base and follow a runlevel link name.
  base=${0##*/}
  link=${base#*[SK][0-9][0-9]}

fi
if
  [ -z "$rc_done" ]
then
  rc_done="Done."
  rc_failed="Failed."
  rc_skipped="Skipped."
fi


# exec 2>>/var/log/ha-debug

#	This should probably be it's own autoconf parameter
#	because RH has moved it from time to time...
#	and I suspect Conectiva and Mandrake also supply it.

DISTFUNCS=/etc/rc.d/init.d/functions
SUBSYS=heartbeat
MODPROBE=/sbin/modprobe
US=`uname -n`

# Set this to a 1 if you want to automatically load kernel modules
USE_MODULES=1

[ -x $HEARTBEAT ] || exit 0

#
#	Some environments like it if we use their functions...
#
if
  [ ! -r $DISTFUNCS ]
then
  # Provide our own versions of these functions
  status() {
	$HEARTBEAT -s
  }
  echo_failure() {
      echo " Heartbeat failure [rc=$1]. $rc_failed"
      return $1
  }
  echo_success() {
      echo "$rc_done"
  }
else
  . $DISTFUNCS
fi

#
#	See if they've configured things yet...
#
if
  [ ! -f $CONFIG ]
then
  echo -n "Heartbeat not configured: $CONFIG not found."
  echo_failure 1
  exit 0
fi

CRM_ENABLED_RV=""
CrmEnabled() {
  test -n "$CRM_ENABLED_RV" && return $CRM_ENABLED_RV
  local value
  value=`ha_parameter pacemaker | tr '[A-Z]' '[a-z]'`
  [ -n "$value" ] || value=`ha_parameter crm | tr '[A-Z]' '[a-z]'`
  case $value in
    y|yes|enable|on|true|1|manual|respawn)
    	CRM_ENABLED_RV=0
	return 0 ;;
    *)
	CRM_ENABLED_RV=1
	return 1 ;;
  esac
}

init_watchdog() {
  if
    [  -f /proc/devices -a  -x $MODPROBE ]
  then
    init_watchdog_linux
  fi
}

#
#	Install the softdog module if we need to
#
init_watchdog_linux() {
#
# 	We need to install it if watchdog is specified in $CONFIG, and
#	/dev/watchdog refers to a softdog device, or it /dev/watchdog
#	doesn't exist at all.
#
#	If we need /dev/watchdog, then we'll make it if necessary.
#
#	Whatever the user says we should use for watchdog device, that's
#	what we'll check for, use and create if necessary.  If they misspell
#	it, or don't put it under /dev, so will we.
#	Hope they do it right :-)
#
#
  insmod=no
  # What do they think /dev/watchdog is named?
  MISCDEV=`grep ' misc$' /proc/devices | cut -c1-4`
  MISCDEV=`echo $MISCDEV`
  WATCHDEV=`ha_parameter watchdog`
  WATCHDEV=`echo $WATCHDEV`
  if
    [ "X$WATCHDEV" != X ]
  then
    : Watchdog requested by $CONFIG file
  #
  #	We try and modprobe the module if there's no dev or the dev exists
  #	and points to the softdog major device.
  #
    if
      [ ! -c "$WATCHDEV" ]
    then
      insmod=yes
    else
      case `ls -l "$WATCHDEV" 2>/dev/null` in
      *$MISCDEV,*)
	    insmod=yes;;
      *)	: "$WATCHDEV isn't a softdog device (wrong major)" ;;
      esac
    fi
  else
    : No watchdog device specified in $CONFIG file.
  fi
  case $insmod in
    yes)
      if
        grep softdog /proc/modules >/dev/null 2>&1 
      then
        : softdog already loaded
      else
        $MODPROBE softdog nowayout=0 >/dev/null 2>&1
      fi;;
  esac
  if
    [ "X$WATCHDEV" != X -a ! -c "$WATCHDEV" -a $insmod = yes ]
  then
    minor=`cat /proc/misc | grep watchdog | cut -c1-4`
    mknod -m 600 $WATCHDEV c $MISCDEV $minor
  fi
} # init_watchdog_linux()

wait_for_lrmd() {
	local maxwait=20
	local i=0
	echo -n "waiting for lrmd to become responsive "
	while [ $i -lt $maxwait ]; do
		$LRMADMIN -C > /dev/null 2>&1 &&
			break
		sleep 2
		echo -n .
		i=$(($i+1))
	done
	if [ $i -lt $maxwait ]; then
		return 0
	else
		echo "lrmd apparently didn't start"
		return 1
	fi
}
set_lrmd_options() {
	CrmEnabled || return
	test -x $LRMADMIN || return
	if [ -n "$LRMD_MAX_CHILDREN" ]; then
		wait_for_lrmd || return
		$LRMADMIN -p max-children $LRMD_MAX_CHILDREN
	fi
}


#
#	Start the heartbeat daemon...
#

start_heartbeat() {
  if
    ERROR=`$HEARTBEAT 2>&1`
  then
    : OK
  else
    return $?
  fi
}

mkdir_chmod_chown()
{
	local d=$1 m=$2 o=$3
	test -n "$d" || return
	test -d "$d" || mkdir "$d" || return
	chmod "$m" "$d" && chown "$o" "$d"
}

#
#	Start Linux-HA
#

StartHA() {
  echo -n "Starting High-Availability services: "

  if
    CrmEnabled
  then
    : OK
  else
    $HA_NOARCHBIN/ResourceManager verifyallidle
  fi
  if
    [ $USE_MODULES = 1 ]
  then
    #	Create /dev/watchdog and load module if we should
    init_watchdog
  fi
  (
    # $RUNDIR may be a tmpfs on some systems
    umask 027

    # see http://developerbugs.linux-foundation.org/show_bug.cgi?id=2378
    # HA_RSCTMP is defined in ocf-directories, sourced from ocf-shellfuncs,
    # sourced from $HA_DIR/shellfuncs.
    mkdir_chmod_chown "$HA_RSCTMP" 1755 root:root

    for d in crm heartbeat heartbeat/ccm heartbeat/crm heartbeat/dopd; do
	    mkdir_chmod_chown "$RUNDIR/$d" 750 hacluster:haclient
    done
  )
  if
    [  -f $HA_DIR/ipresources -a ! -f $HA_DIR/haresources ]
  then
    mv $HA_DIR/ipresources $HA_DIR/haresources
  fi
  #	Start SBD, if enabled.
  #	Start failure will be fatal (exit this script)
  StartSBD
  #	Start heartbeat daemon
  if
    start_heartbeat
  then
    set_lrmd_options
    echo_success
    return 0 
  else
    RC=$?
    echo_failure $RC
    if [ ! -z "$ERROR" ]; then
      echo
      echo "$ERROR"
    fi 
    return $RC
  fi
}

#
#	Ask heartbeat to stop.  It will give up its resources...
#
StopHA() {
  echo -n "Stopping High-Availability services: "

  if
    $HEARTBEAT -k >/dev/null 2>&1	# Kill it
  then
    StopSBD
    echo_success
    return 0
  else
    RC=$?
    echo_failure $RC
    return $RC
  fi
}

StatusHA() {
  $HEARTBEAT -s
}

StandbyHA() {
  auto_failback=`ha_parameter auto_failback | tr '[A-Z]' '[a-z]'`
  nice_failback=`ha_parameter nice_failback | tr '[A-Z]' '[a-z]'`

  case "$auto_failback" in
    *legacy*)	echo "auto_failback is set to legacy.  Cannot enter standby."
		exit 1;;
  esac
  case "$nice_failback" in
    *off*)	echo "nice_failback is disabled.  Cannot enter standby."
		exit 1;;
  esac
  case "${auto_failback}${nice_failback}" in
    "")	echo "auto_failback defaulted to legacy.  Cannot enter standby."
		exit 1;;
  esac

  echo "auto_failback: $auto_failback"
  if
    StatusHA >/dev/null 2>&1
  then
    echo -n "Attempting to enter standby mode"
    if
      $HA_NOARCHBIN/hb_standby
    then
      # It's impossible to tell how long this will take.
      echo_success
    else
      echo_failure $?
    fi
  else
    echo "Heartbeat is not currently running."
    exit 1
  fi
}

#
#	Ask heartbeat to restart.  It will *keep* its resources
#
ReloadHA() {
  echo -n "Reloading High-Availability services: "

  if
    $HEARTBEAT -r # Restart, and keep your resources
  then
    echo_success
    return 0
  else
    RC=$?
    echo_failure $RC
    return $RC
  fi
}

RunStartStop() {
  # Run pre-startup script if it exists
  if
    [  -f $HA_DIR/resource.d/startstop ]
  then
    $HA_DIR/resource.d/startstop  "$@"
  fi
}

RC=0
# See how we were called.

case "$1" in
  start)
	RunStartStop pre-start
	StartHA
	RC=$?
	echo
	if
	  [ $RC -eq 0 ]
	then
	  [ ! -d $LOCKDIR ] && mkdir -p $LOCKDIR
	  touch $LOCKDIR/$SUBSYS
	fi
	RunStartStop post-start $RC
	;;

  standby)
	if CrmEnabled; then
	  echo "use 'crm_stanby -v on' instead"
	  RC=1
	else
	  StandbyHA
	  RC=$?
	fi
	;;

  status)
	StatusHA
	RC=$?;;

  stop)
	RunStartStop "pre-stop"
	StopHA
	RC=$?
	echo
        if
          [ $RC -eq 0 ]
        then
          rm -f $LOCKDIR/$SUBSYS
        fi
        RunStartStop post-stop $RC
	;;

  restart)
        sleeptime=`ha_parameter deadtime`
	StopHA
	if ! CrmEnabled ; then
		echo
		echo -n "Waiting to allow resource takeover to complete:"
		sleep $sleeptime
		sleep 10 # allow resource takeover to complete (hopefully).
		echo_success
	fi
	echo
	StartHA
	echo
	;;

  force-reload|reload)
	ReloadHA
	echo
	RC=$?
	;;

  *)
	echo "Usage: $0 {start|stop|status|restart|reload|force-reload}"
	exit 1
esac

exit $RC

