#!/bin/sh
#
#
#   Support:      linux-ha-dev@lists.tummy.com
#   License:      GNU General Public License (GPL)
#
#	New haresources format:
#
#	machine resource resource resource resource
#
#	Where a resource can be an IP address or a scriptname, or a scriptname
#	and single argument.
#
#	When it's a scriptname with an argument, the argument is connected to
#	the scriptname with "::".  Another way of expressing an IP address is
#	via IPaddr::ip-address, since the script name IPaddr is the one assumed
#	for resources which are spelled like an IP address.
#
#	As an illustration, the following two lines are identical in effect:
#
#	node1	123.45.67.89 httpd
#	node1	IPaddr::123.45.67.89 httpd
#
#       One can also pass multiple arguments to a script by separating each
#       argument with a double colon:
#
#       node1  10.0.0.170 Filesystem::/dev/sda1::/data1::ext2
#
#	Note:  The first resource on the line must be unique in the haresources
#	file.  Maybe I should add a resource type called Unique which can
#	put on the front of a line to meet this requirement.
#
#

# set -x
: 
: 
: ==================== Starting ResourceManager $* ==========================
 
unset LC_ALL; export LC_ALL # Make ifconfig work in France for David Jules :-)
unset LANGUAGE; export LANGUAGE # Make ifconfig work in France for Fabrice :-)
 
prefix=/usr
exec_prefix=/usr
HA_DIR=/etc/ha.d
HA_FUNCS=$HA_DIR/shellfuncs
export HA_DIR HA_FUNCS
INITDIR=/etc/rc.d/init.d
STANDBY=/usr/share/heartbeat/hb_standby
USAGE="usage: $0 listkeys machine | takegroup key | givegroup key|status resource";
. $HA_FUNCS

: ${HA_STOPRETRYMAX=10}

isip() {
  case $1 in
    [0-9]*.[0-9]*.[0-9]*.[0-9]*)	true;;
    *)					false;;
  esac
}

#
# Reverseargs doesn't deal with arguments containing  white space correctly.
# To fix this I think you'd have to change it to put out the arguments
# one per line, and then have the caller use them a line at a time using
# "read" or something.  This could be done either using a tmp file or
# by using eval on the argument numbers starting from $# and going
# down.  But, no existing resource script deals with these either, so
# I'm not too worried yet...
#
reverseargs() {
  L=""
  for arg in "$@"
  do
    case $L in
      ?*)	L="$arg $L";;
      "")	L=$arg;;
    esac
  done
  echo $L
}

#
#	Remove comments and extra blanks from haresources
#	translate all white space into single blanks.  Each line ends with
#	a blank, making parsing it easier for dumb shell scripts.
#
ipres() {
  grepflags=""
  case $# in
    0)	pat='.'			;;
    1)  pat=$1			;;
    2)  grepflags=$1; pat=$2	;;
  esac
  if [ -f $HA_DIR/haresources ]; then
  cat $HA_DIR/haresources |
	#
	#	Explanation of Sed -e expressions below:
	#
	#	1) Strip out comments
	#	2) Repeatedly join lines together when they're ended by '\'
        #		(":a" is a label.  "ta" is a conditional
	#		branch to "a:")
	#	3) Append a blank to the end of the line
	#	4) Compress multiple blanks/tabs into a single blank
	#	5) Remove lines that only contain whitespace or are empty
	#	6) Strip off a leading space (if any)
	#
	sed						\
		-e  's%#.*%%'				\
		-e :a -e '/\\$/N; s/\\\n//; ta'		\
		-e  's%$% %'				\
		-e  's%[	 ][	 ]*% %g' 	\
		-e  '/^[	 ]*$/ d' 		\
		-e  's%^ %%' 				|
	awk '/.*/ {$1 = tolower($1); print $0, ""}'	|
  	egrep $grepflags "$pat"
	fi
}

ipresline() {
	ipres " $1 "
}

KeyResources() {
	ipres -i "^$1 "  | cut -d' ' -f2
}

OurGroupKeys() {
	ipres -i "^${HA_CURHOST} "  | cut -d' ' -f2-
}

OtherGroupKeys() {
	ipres -iv "^${HA_CURHOST} "  | cut -d' ' -f2-
}

StandbyGroupType() {
  if
    OurGroupKeys | grep "^$1\$" >/dev/null 
  then
    echo local
  else
    echo foreign
  fi
}


canonname() {
  if
    isip $1
  then
    echo "IPaddr::$1"
  else
    echo $1
  fi
}

resource2script() {
  case `canonname $1` in
    *::*)	echo $1 | sed 's%::.*%%'
		;;
    *)		echo $1;;
  esac
}

# Return the list of arguments after the script name.
# multiple arguments are separated by :: delimiters
resource2arg() {
  case `canonname $1` in
    *::*)	echo $1 | sed 's%[^:]*::%%' | sed 's%::% %g'
		;;
  esac
}

scriptpath() {
  script=`canonname $1`
  script=`resource2script $script`
  for dir in $HA_RESOURCEDIR $INITDIR
  do
    if
      [ -f $dir/$script -a -x $dir/$script ]
    then
      echo $dir/$script;
      return 0;
    fi
  done

  ha_log "ERROR: Cannot locate resource script $script"
  false
}
#
#	Since we're patterned after the LSB's init scripts, here are
#	the exit codes we should be returning for status...
#
#	0	program is running
#	1	program is dead and /var/run pid file exists
#	2	program is dead and /var/lock lock file exists
#	3	program is stopped
#	4-100	reserved for future LSB use
#	100-149	reserved for distribution use
#	150-199	reserved for application use
#	200-254	reserved
#

we_own_resource() {
  arg=`resource2arg $1`
  spath=`scriptpath $1`;

  case `$spath $arg status` in
    *[Nn][Oo][Tt]\ *[Rr]unning*)	return 3;;
    *[Rr]unning*|*OK*)			return 0;;
    *)					return 3;;
  esac
}


doscript() {
  script=`resource2script $1`
  arg=`resource2arg $1`

  spath=`scriptpath $script`
  if
    [ -f "$spath" -a -x "$spath" ]
  then
    ha_log "info: Running $spath $arg $2"
    ha_debug "debug: Starting $spath $arg $2"
    $spath $arg "$2" 2>>$HA_DEBUGLOG
    RC=$?
    ha_debug "debug: $spath $arg $2 done. RC=$RC"
    case $RC in
      0);;
      *) ha_log "ERROR: Return code $RC from $spath";;
    esac
    return $RC;
  fi
  ha_log "ERROR: Cannot locate resource script $script"
  return 1;
}


# Arguments are: nodename resource1 resource2 ...
giveupresourcegroup() {

  ha_log "info: Releasing resource group: $*"
  shift
  rc=0
  for j in `reverseargs "$@"`
  do
    retries=0
    while
      doscript $j stop >>$HA_DEBUGLOG 2>&1
      rc1=$?
      [ $HA_STOPRETRYMAX -le 0 -o  $retries -lt $HA_STOPRETRYMAX ] &&
      [ $rc1 -ne 0 ]
    do
      sleep 1
      retries=`expr 1 + "$retries"`
      ha_log "info: Retrying failed stop operation [$j]"
    done
    if
      [ $rc1 -ne 0 ]
    then
      if
        we_own_resource $j
      then
        rc=$rc1
        RecoverFromFailedStop	# Bye Bye!
      else
        # Red Hat (and probably others) Kludge!
        ha_log "ERROR: Resource script for $j probably not LSB-compliant."
        ha_log "WARN: it ($j) MUST succeed on a stop when already stopped"
        ha_log "WARN: Machine reboot narrowly avoided!"
      fi
    fi
  done
  return $rc
}

# Arguments are: nodename resource1 resource2 ...
acquireresourcegroup() {
  ha_log "info: Acquiring resource group: $*"
  node="$1"
  shift
  rc=0;
  for j in "$@"
  do
    if
      we_own_resource "$j" || doscript "$j" start
    then
      : $j start succeeded
    else
      rc=$?
      ha_log "CRIT: Giving up resources due to failure of $j"
      giveupresourcegroup "$node" "$@"
      (sleep 30; $STANDBY `StandbyGroupType "$j"`) &
      break
    fi
  done
  return $rc
}

#
#	We may be given a resource to give up that we don't own...
#
GiveUpGroup() {
  #	Get the list of resources we've been requested to give up...
  haresources=`ipresline $1`
  if
    [ ! -z "$haresources" ]
  then
    giveupresourcegroup $haresources
  fi
}

TakeGroup() {
  #	Get the list of resources we've been requested to take...
  haresources=`ipresline $1`
  if
    [ ! -z "$haresources" ]
  then
    acquireresourcegroup $haresources
  fi
}

#
# Determine the status of all the resources in a resource group
#
# Results are echoed to stdout:
#
# NONE:	None of the resources in the resource group are held
#      (or there no such resource group)
# ALL:	All of the resources in the resource group are held
# SOME:	Some of the resources in the resource group are held
#
StatGroup() {
  result="FirstTime"

  set `ipresline $1`
  shift
  for resource
  do
    if
      we_own_resource $resource
    then
      case $result in
        FirstTime)	result=ALL;;
        NONE)		echo SOME; return 0;;
        SOME|ALL)		;;
      esac
    else
      case $result in
        FirstTime)	result=NONE;;
        ALL)		echo SOME; return 0;;
        SOME|NONE)	;;
      esac
    fi
  done

  case $result in
    FirstTime)	echo NONE;;
    *)		echo $result;;
  esac
}

#
#	Verify that all resources in the resource group are idle
#

VerifyAllIdle() {
  rc=0
  for rsc in `KeyResources ".*"`
  do
    if
      we_own_resource $rsc
    then
      ha_log "CRITICAL: Resource $rsc is active, and should not be!"
      ha_log "CRITICAL: Non-idle resources can affect data integrity!" >&2
      ha_log "info: If you don't know what this means, then get help!"
      ha_log "info: Read the docs and/or source to $0 for more details."
      cat <<-! >&2
	CRITICAL: Resource $rsc is active, and should not be!
	CRITICAL: Non-idle resources can affect data integrity!
	info: If you don't know what this means, then get help!
	info: Read the docs and/or the source to $0 for more details.
	!
      #
      # What this means is that if you have a shared disk and it's already mounted
      # before you start heartbeat, then you could have it mounted simultaneously
      # on both sides.  If this happens then your disk data is toast!
      # So, this is sometimes VERY BAD INDEED!
      #
      # The most *common* cause for this message is that you told your OS to manage one
      # of the IP addresses that you asked heartbeat to manage.  You can't put
      # both your OS startup scripts and heartbeat in charge of HA resources.
      # This particular case is discussed in detail in the docs.
      #
      rc=`expr $rc + 1`
    fi
  done
  if
    [ $rc -ne 0 ]
  then
    ha_log "CRITICAL: Non-idle resources will affect resource takeback!"
    ha_log "CRITICAL: Non-idle resources may affect data integrity!"
  fi
  exit $rc
}

#
#	If we are unable to stop a resource, then everything is in a
#	hosed state.  The only way out is through a reboot...
#
RecoverFromFailedStop() {
  ha_log "CRIT: Resource STOP failure. Reboot required!"
  ha_log "CRIT: Killing heartbeat ungracefully!"
  for name in heartbeat ipfail ccm
  do
    pkill -9 $name
  done
  if
    [ -x /sbin/reboot ] && /sbin/reboot -nf
  then
    : OK - reboot succeeded
  elif
    [ -x /sbin/init ] && /sbin/init 6
  then
    : OK - init 6 succeeded
  else
    ha_log "CRIT: Unable to force reboot."
  fi
}

case $1 in

  listkeys)	KeyResources "$2";;

  allkeys)	KeyResources '.*';;

  ourkeys)	OurGroupKeys;;

  otherkeys)	OtherGroupKeys;;

  status)	we_own_resource $2;;

  givegroup)	GiveUpGroup $2 >>$HA_DEBUGLOG 2>&1 ;;

  takegroup)	TakeGroup $2 >>$HA_DEBUGLOG 2>&1 ;;

  statgroup)	StatGroup $2 >>$HA_DEBUGLOG 2>&1 ;;

  verifyallidle)VerifyAllIdle ;;

  *)		echo "$USAGE" >&2
  		echo "" >&2
		exit 1;;
esac

