#!/bin/bash

CURRENTTIME=$(date +%Y%m%d-%H%M%S)

MGMTD_BIN=beegfs-mgmtd
META_BIN=beegfs-meta
STORAGE_BIN=beegfs-storage
CLIENT_BIN=beegfs-client # not really a binary, but name of config, init, etc.
CTL_BIN=beegfs

DEFAULT_LOG_PATH=/var/log
LOG_PATH=${DEFAULT_LOG_PATH}
STORAGE_LOG=${STORAGE_BIN}_${CURRENTTIME}.log
META_LOG=${META_BIN}_${CURRENTTIME}.log
CLIENT_LOG=${CLIENT_BIN}_${CURRENTTIME}.log

STORAGE_CFG_NAME=${STORAGE_BIN}.conf
META_CFG_NAME=${META_BIN}.conf
MGMTD_CFG_NAME=${MGMTD_BIN}.toml
CLIENT_CFG_NAME=${CLIENT_BIN}.conf

META_NUMID_FILE=nodeNumID
TARGET_NUMID_FILE=targetNumID

PREFERRED_MDS_FILE=/tmp/preferredMds.fod
PREFERRED_TARGET_FILE=/tmp/preferredTarget.fod

DEFAULT_STATUSFILE=/var/tmp/beeond.tmp
STATUSFILE=${DEFAULT_STATUSFILE}

NUM_META_SERVER=1
NUM_STORAGE_SERVER=0

BEEGFS_BIN_PATH=/opt/beegfs/sbin

DEFAULT_MGMTD_GRPC_PORT=8010
DEFAULT_PORT_SHIFT=1000

SSH="ssh"
SSH_PARAMS=( -qq -oNumberOfPasswordPrompts=0 -oStrictHostKeyChecking=no -n )
DEFAULT_PDSH_PATH=$(which pdsh 2>/dev/null)
PDSH_RCMD="ssh"

# source helper script
ABSOLUTE_PATH=$(dirname "$(readlink -e "$0")") # using readlink, because somone might be calling
                                               # this script using a symlink
if [ -e "${ABSOLUTE_PATH}/../lib/beegfs-ondemand-stoplocal" ]
then
   BEEOND_STOPLOCAL="${ABSOLUTE_PATH}/../lib/beegfs-ondemand-stoplocal"
else
   BEEOND_STOPLOCAL="${ABSOLUTE_PATH}/../scripts/lib/beegfs-ondemand-stoplocal"
fi

#shellcheck source=scripts/lib/beegfs-ondemand-stoplocal
source "${BEEOND_STOPLOCAL}"

# print usage
print_usage_and_exit()
{
   echo ""
   echo "BeeOND - BeeGFS OnDemand (http://www.beegfs.com)"
   echo ""
   echo "DESCRIPTION:"
   echo "   Script to set up or shut down a BeeGFS setup on the fly."
   echo ""
   echo "   Creates a new BeeGFS file system on a set of hosts. All necessary services"
   echo "   are automatically started and the file system is mounted. In the same way,"
   echo "   the file system can be unmounted again and the services will be shut down."
   echo "   Optionally, the contents of the file system can be deleted."
   echo ""
   echo "   This script can be used e.g. to automatically create a temporary scratch file"
   echo "   system for cluster nodes during a compute job, and to remove it after the job"
   echo "   is finished."
   echo ""
   echo "USAGE: $(basename "$0") <action> <options>"
   echo ""
   echo "ACTIONS:"
   echo "   The first argument to $(basename "$0") is considered to be an action that the"
   echo "   script should perform."
   echo ""
   echo "   The following actions are available:"
   echo ""
   echo "   start:"
   echo "      Start the file system on a number of nodes, specified by the node file."
   echo "      The necessary services will be started and the newly created file system"
   echo "      will be mounted at the specified mount point. Information about the"
   echo "      running file system are stored in a status file on each node."
   echo ""
   echo "      Mandatory arguments:"
   echo "         -n FILENAME => Node file with line-separated hostnames."
   echo "         -d PATH     => Path for BeeGFS data on servers."
   echo "         -c PATH     => Mount point for BeeGFS clients."
   echo ""
   echo "      Optional arguments:"
   echo "         -i FILENAME => Status information file name."
   echo "                        Default: ${DEFAULT_STATUSFILE}"
   echo "         -F          => Remove contents of data path before starting services."
   echo "                        This is useful if the processes and status file of a"
   echo "                        previous beeond session are gone, but the"
   echo "                        data is still there."
   echo "         -m NUM      => Number of metadata servers to start. Default: 1"
   echo "         -s NUM      => Number of storage servers to start."
   echo "                        Default: Number of hosts."
   echo "         -p NUM      => Network port shift. The standard BeeGFS network port"
   echo "                        numbers are shifted by this number. Useful in order to"
   echo "                        have several BeeGFS instances running on the same node."
   echo "                        Default: ${DEFAULT_PORT_SHIFT}"
   echo "         -f PATH     => Directory containing additional beegfs config files."
   echo "                        There can be one file for each service as well as the client."
   echo "                        They must be  named in the form beegfs-<service>.conf, where "
   echo "                        <service> can be meta, storage, mgmtd or client."
   echo "                        Only the options specified within the files are"
   echo "                        set/overwritten, the rest of the defaults will not be"
   echo "                        touched and still be applied. The directory and the "
   echo "                        files need to be present on every node."
   echo "         -L PATH     => Log file directory. If necessary, the directory will be"
   echo "                        created. Default: ${DEFAULT_LOG_PATH}"
   echo "         -l          => Prefer local storage nodes."
   echo "         -P          => Use pdsh for parallel startup. If this option is not"
   echo "                        given, ssh is used to start up the services on the nodes"
   echo "                        sequentially."
   echo "         -b PATH     => Path to the pdsh binary. Default: <auto-detected>"
   echo "         -r          => Use tmpfs for beegfs storage and metadata."
   echo "                        Note: On older Linux versions, tmpfs does not support"
   echo "                        extended attributes. If you get an error message"
   echo "                        from beegfs_meta reading \"Failed to store root"
   echo "                        directory\" you have to provide an additional config"
   echo "                        file beegfs-meta.conf containing the line"
   echo "                        storeUseExtendedAttribs = false"
   echo "         -k          => enable storage target mirroring"
   echo "                        Note: Needs an even number of storage servers (-s)."
   echo "         -j          => enable metadata server mirroring"
   echo "                        Note: Needs an even number of metadata servers (-m)."
   echo "         -q          => Suppress INFO messages, only print ERRORs."
   echo "         -t FILE     => Use FILE to define multiple storage targets and assign"
   echo "                        them to storage pools. The file needs to be in the"
   echo "                        following format:"
   echo ""
   echo "                        pool_1:/path/to/target_1,/path/to/target_2,..."
   echo "                        pool_2:/path/to/target_3,/path/to/target_4,..."
   echo "                        ..."
   echo ""
   echo "                        pool_n is the name of the storage pool, the comma separated"
   echo "                        list after the colon are the paths to the target directories"
   echo "                        that shall be part of this pool."
   echo "                        The lines can't contain whitespaces. BeeOND will look for"
   echo "                        these directories and add them as a storage target on all"
   echo "                        nodes where they exist. To avoid having unwanted targets"
   echo "                        in a pool, make sure each of the specified paths only"
   echo "                        exists on nodes where they are actually mounted on the"
   echo "                        desired storage medium."
   echo "                        BeeOND will then assign the targets to the corresponding"
   echo "                        storage pools and create a directory for each pool"
   echo "                        on the root level of the BeeGFS mount."
   echo "                        This option can only be used together with -F."
   echo "         -T          => Don't create and assign the pool directories when using -t."
   echo "         -G          => The base gRPC port (before port shifting) that the mgmtd"
   echo "                        uses in this BeeOND instance. Defaults to 8010 and only needs"
   echo "                        to be supplied if mgmtd is configured via configuration file"
   echo "                        (see -f) to use a base gRPC port other than 8010."
   echo ""
   echo "      Arguments that require a configuration directory (option -f) that is available"
   echo "      on all nodes and contains the required files (see option descriptions):"
   echo "         -C          => Enable connection authentication. Requires a \"conn.auth\""
   echo "                        file in the configuration directory."
   echo "         -E          => Enable TLS encryption between the mgmtmd and the command"
   echo "                        line configuration tool. Requires \"cert.pem\" and"
   echo "                        \"key.pem\" files in the configuration directory."
   echo "         -H          => Enable enterprise features. This mode is required by all"
   echo "                        other modes that enable enterprise features and requires a"
   echo "                        \"license.pem\" file in the configuration directory."
   echo ""
   echo "   stop:"
   echo "      Stop the file system on a number of nodes, specified by the node file."
   echo "      Use the information from the status file to unmount a file system on a"
   echo "      number of nodes specified by the node file, and shut down the services."
   echo ""
   echo "      Mandatory arguments:"
   echo "         -n FILENAME => Node file."
   echo ""
   echo "      Optional arguments:"
   echo "         -i FILENAME => Status information file name."
   echo "                        Default: ${DEFAULT_STATUSFILE}"
   echo "         -d          => Delete BeeGFS data on disks."
   echo "         -L          => Delete log files after successful shutdown."
   echo "         -c          => \"Cleanup\": Remove remaining processes and directories"
   echo "                        of a potentially unsuccessful shutdown of an earlier"
   echo "                        beeond instance. This switch silences the error"
   echo "                        message when a status information file is not found on a"
   echo "                        node or an unmount command fails; instead, a message is"
   echo "                        printed (if \"INFO\" messages are not suppressed) when a"
   echo "                        status file DOES exist, because this means there"
   echo "                        actually was an instance before that is now being"
   echo "                        cleaned up."
   echo "         -P          => Use pdsh for parallel shutdown. If this option is not"
   echo "                        given, ssh is used to unmount the file system and stop"
   echo "                        the services on all nodes sequentially."
   echo "         -b PATH     => Path to the pdsh binary. Default: ${DEFAULT_PDSH_PATH}"
   echo "         -q          => Suppress INFO messages, only print ERRORs."
   echo ""
   echo "   stoplocal:"
   echo "      Stop the file system on the local host only. This is recommended only as"
   echo "      an emergency measure, e.g. after a host encountered an error during the"
   echo "      distributed shutdown procedure. Uses the information from the status file"
   echo "      to unmount the file system and stop the services on the local host only."
   echo ""
   echo "      Optional arguments:"
   echo "         -i FILENAME => Status information file."
   echo "                        Default: ${DEFAULT_STATUSFILE}"
   echo "         -d          => Delete BeeGFS data on disks."
   echo "         -L          => Delete log files after successful shutdown. If the log"
   echo "                        directory is empty afterwards, it will also be removed."
   echo "         -c          => \"Cleanup\": Remove remaining processes and directories"
   echo "                        of a potentially unsuccessful shutdown of an earlier"
   echo "                        beeond instance. This switch silences the error"
   echo "                        message when the status information file is not found or"
   echo "                        the unmount command fails; instead, a message is printed"
   echo "                        (if \"INFO\" messages are not suppressed) when a status"
   echo "                        file DOES exist, because this means there actually was"
   echo "                        an instance before that is now being cleaned up."
   echo "         -q          => Suppress INFO messages, only print ERRORs."
   echo "         -u          => ONLY unmount the file system."
   echo "                        (Cannot be used in combination with \"-s\".)"
   echo "         -s          => ONLY stop non-client services. (*)"
   echo "                        (Cannot be used in combination with \"-u\".)"
   echo ""
   echo "EXAMPLES:"
   echo "   Start a beeond instance on the nodes given in nodefile, using the data"
   echo "   directory /data/beeond and the client mountpoint /mnt/beeond via pdsh"
   echo "   for parallel startup:"
   echo "      $(basename "$0") start -n nodefile -d /data/beeond -c /mnt/beeond -P"
   echo ""
   echo "   Stop the file system:"
   echo "      $(basename "$0") stop -n nodefile -P -L -d"
   echo ""
   exit 1
}

### internal functions for general usage ###
print_error()
{
   echo "ERROR: ${1}" >&2
   echo ""
}

print_error_and_exit()
{
   print_error "${1}"
   exit 1
}

print_info()
{
   local MESSAGE=${1}
   if [ "${QUIET}" != "true" ]
   then
      echo "INFO: ${MESSAGE}"
   fi
}

check_pdsh()
{
   #an array is passed here, so this makes parameter passing a bit more complex
   local HOSTS=$1

   print_info "Checking PDSH availability on the following hosts: ${HOSTS}"

   # execute cmd
   test -e "${PDSH}" &&\
   ${PDSH} -R ${PDSH_RCMD} -S -w "${HOSTS}" \
      "test \${SHELL} = '/bin/bash' || exit 2"
   RES=$?

   if [ $RES -eq 2 ]
   then
      print_error_and_exit "One or more hosts don't use /bin/bash as default shell."
   elif [ $RES -ne 0 ]
   then
      print_info "pdsh does not seem to work on all nodes. Disabling pdsh and using ssh instead"
      USE_PDSH=false

      # We have to repeat the reachability check using conventional SSH before continuing.
      IFS=,
      for HOST in ${HOSTS}
      do
         check_reachability "${HOST}"
      done
      unset IFS
      return
   fi

   ${PDSH} -R ${PDSH_RCMD} -S -w "${HOSTS}" \
      "if [ -e ${BEEOND_STOPLOCAL} ]; then true; else exit 2; fi" || \
      print_error_and_exit "Unable to find BeeOND helper program on one or more nodes.
Please make sure BeeOND is installed on all machines."
}

execute_ssh_cmd()
{
   local HOST="$1"
   local CMD="$2"

   # error checks
   if [ "${HOST}" = "" ] || [ "${CMD}" = "" ]
   then
      print_error_and_exit "Internal function 'execute_ssh_cmd' was called without a host or \
without a command"
   fi

   # execute cmd
   ${SSH} "${SSH_PARAMS[@]}" "${HOST}" "${CMD}"
}

execute_pdsh_cmd()
{
   local HOSTS="$1" # comma-separated list
   local CMD="$2"
   local CONTINUE_ON_ERROR="$3"
   local TMPTIME
   TMPTIME=$(date +%Y%m%d-%H%M%S)
   local TMPFAILFILE="/tmp/beegfs.pdsh_fail.${TMPTIME}"

   # error checks
   if [ "${HOSTS}" = "" ] || [ "${CMD}" = "" ]
   then
      print_error_and_exit "Internal function 'execute_pdsh_cmd' was called without a host or \
without a command"
   fi

   # execute cmd
   if ! ${PDSH} -R ${PDSH_RCMD} -S -w "${HOSTS}" "${CMD} || (touch ${TMPFAILFILE} && false)"
   then
      # pdsh returned non-zero, so there must have been an error on at least one node
      # (-S returns the greatest return value of all nodes).
      # the executed line created a file on the failing node
      # now we have to look on each node for this file if we are interested which node failed
      # for now, we do not do that; only abort and leave it to the user to investigate pdsh output
      if [ "${CONTINUE_ON_ERROR}" = "true" ]
      then
         print_error "Execution of a command failed. Please see pdsh output for more information."
         ERROR="true"
      else
         print_error_and_exit "Execution of a command failed. Please see pdsh output for more \
information."
      fi
   fi
}

check_reachability()
{
   local HOST="$1"

   # error checks
   if [ "${HOST}" = "" ]
   then
      print_error_and_exit "Internal function 'check_reachability' was called without a hostname"
   fi

   print_info "Checking reachability of host ${HOST}"

   execute_ssh_cmd "${HOST}" "test \${SHELL} = '/bin/bash'"
   RES=$?
   if [ $RES -eq 255 ]
   then
      print_error_and_exit "Host is unreachable via ssh: ${HOST}"
   elif [ $RES -eq 1 ]
   then
      print_error_and_exit "Host doesn't use /bin/bash as default shell: ${HOST}"
   elif [ $RES -ne 0 ]
   then
      print_error_and_exit "Error contacting host: ${HOST}"
   fi

   execute_ssh_cmd "${HOST}" "test -e ${BEEOND_STOPLOCAL}" || \
      print_error_and_exit "Could not find BeeOND helper program on host: ${HOST}
Please make sure BeeOND is installed on all machines."
}

check_hostfile()
{
   # hostfile set?
   if [ "${HOSTFILE}" = "" ]
   then
      print_error_and_exit "Node file undefined"
   fi

   # does it exist
   if [ ! -f "${HOSTFILE}" ]
   then
      print_error_and_exit "Node file does not exist: ${HOSTFILE}"
   fi
}

check_datapath()
{
   if [ "${DATA_PATH}" = "" ]
   then
      print_error_and_exit "Path for BeeGFS data undefined"
   fi
}

check_mountpoint()
{
   if [ "${MOUNTPOINT}" = "" ]
   then
      print_error_and_exit "Path for client mountpoint undefined"
   fi
}

check_statusfile()
{
   # checks for every node:
   # - whether the statusfile already exists (maybe a session is already running)
   # - whether the statusfile can be created (if not, we can't continue)

   local HOSTS=$1

   if [ "${HOSTS}" = "" ]
   then
      print_error_and_exit "Internal function 'check_statusfile' was called without a hostname"
   fi

   local CHECK_CMD="[ ! -e \"${STATUSFILE}\" ]"
   local TOUCH_CMD="touch \"${STATUSFILE}\""

   if [ "${USE_PDSH}" = "true" ]
   then
      # see if statusfile already exists
      if ! ${PDSH} -R ${PDSH_RCMD} -S -w "${HOSTS}" "${CHECK_CMD} || (echo \"Statusfile already exists.\" && false)"
      then
         print_error_and_exit "Statusfile ${STATUSFILE} on one ore more hosts already exists. \
Maybe a session is already running or the previous session was not properly \
shut down."
      fi

      # touch statusfile on every host, to make sure the file can be accessed
      if ! ${PDSH} -R ${PDSH_RCMD} -S -w "${HOSTS}" "${TOUCH_CMD}"
      then
         print_error_and_exit "Could not create status file ${STATUSFILE} on one ore more hosts."
      fi
   else
      IFS=,
      for HOST in ${HOSTS}
      do
         # see if statusfile already exists
         if ! ${SSH} "${SSH_PARAMS[@]}" "${HOST}" "${CHECK_CMD}"
         then
            print_error_and_exit "Status file ${STATUSFILE} on host ${HOST} already exists. \
Maybe a session is already running or the previous session was not properly \
shut down."
         fi
      done

      for HOST in ${HOSTS}
      do
         if ! ${SSH} "${SSH_PARAMS[@]}" "${HOST}" "${TOUCH_CMD}"
         then
            print_error_and_exit "Could not create status file ${STATUSFILE} on host ${HOST}"
         fi
      done
      unset IFS
   fi
}

create_log_path()
{
   local HOSTS
   HOSTS=$(IFS=,; echo "$*") # turn argument list into comma-separated string for PDSH

   if [ "${HOSTS}" = "" ]
   then
      print_error_and_exit "Internal function 'create_log_path' was called without a host."
   fi

   # if the path doesn't exist, it's created. If it already exists, nothing happens
   CMD="mkdir -p \"${LOG_PATH}\""

   if [ "${USE_PDSH}" = "true" ]
   then
      execute_pdsh_cmd "${HOSTS}" "${CMD}" "false"
   else
      # no pdsh: do it manually with a loop
      IFS=,
      for HOST in ${HOSTS}
      do
         if ! execute_ssh_cmd "${HOST}" "${CMD}"
         then
            print_error_and_exit "Could not create log path ${LOG_PATH} on host ${HOST}"
         fi
      done
      unset IFS
   fi
}

### internal functions for beegfs-ondemand start ###

start_tmpfs()
{
   local HOSTS=$1
   local DATAPATH=$2

   # error checks
   if [ "${HOSTS}" = "" ] || [ "${DATAPATH}" = "" ]
   then
      print_error_and_exit "Internal function 'start_tmpfs' called without all needes parameters"
   fi

   CMD="mkdir -p ${DATAPATH} && mount -t tmpfs tmpfs ${DATAPATH}"

   if [ "${USE_PDSH}" = "true" ]
   then
      print_info "Starting tempfs on the following hosts: ${HOSTS}"

      execute_pdsh_cmd "${HOSTS}" "${CMD}" "false"

      IFS=','
      for HOST in ${HOSTS}
      do
         if [ "${HOST}" = "" ]; then continue; fi
         add_to_status_file "${HOST}" tmpfs "${DATAPATH}" - -
      done
      unset IFS
   else
      # no pdsh => do it manually with ssh loop
      print_info "Starting tmpfs mounts"

      # for each host, start server
      IFS=,
      for HOST in ${HOSTS}
      do
         print_info "Starting tmpfs on host: ${HOST}"

         if ! execute_ssh_cmd "${HOST}" "${CMD}"
         then
            print_error_and_exit "Unable to start tmpfs on host: ${HOST}"
         else
            add_to_status_file "${HOST}" tmpfs "${DATAPATH}" "-" "-"
         fi
      done
      unset IFS
   fi
}

start_meta_servers()
{
   local HOSTS=$1 # comma seperated
   local DATAPATH=$2
   local MGMTD=$3
   local PORT_SHIFT=$4 # port shift can be empty!
   local CFG_PATH=$5 # may be empty
   local CFG_FILE=${CFG_PATH}/${META_CFG_NAME}

   local LOGFILE=${LOG_PATH}/${META_LOG}
   local PIDFILE=/var/run/${META_BIN}-${CURRENTTIME}.pid

   # error checks
   if [ "${HOSTS}" = "" ] || [ "${MGMTD}" = "" ] || [ "${DATAPATH}" = "" ]
   then
      print_error_and_exit "Internal function 'start_meta_servers_ssh' was called without all \
needed parameters"
   fi

   DATAPATH=${DATAPATH}/${META_BIN}

   PARAMS="sysMgmtdHost=${MGMTD} storeMetaDirectory=${DATAPATH} logStdFile=${LOGFILE} \
      ${CONNAUTH_LEGACY} runDaemonized=true pidFile=${PIDFILE}"

   if [ "${PORT_SHIFT}" != "" ]
   then
      PARAMS="${PARAMS} connPortShift=${PORT_SHIFT}"
   fi

   CMD="PARAMS=\"${PARAMS}\"; \
if [ -n \"${CFG_PATH}\" ] && [ -e \"${CFG_FILE}\" ]; then \
   PARAMS=\"\${PARAMS} cfgFile=${CFG_FILE}\"; fi; \
if [ \"${CLEAR_DATA}\" = \"true\" ]; then \
   rm -rf ${DATAPATH}; fi; \
${BEEGFS_BIN_PATH}/${META_BIN} \${PARAMS}"

   if [ "${USE_PDSH}" = "true" ]
   then
      print_info "Starting ${META_BIN} processes on the following hosts: ${HOSTS}"
      print_info "Metadata server log: ${LOGFILE}"

      execute_pdsh_cmd "${HOSTS}" "${CMD}" "false"

      if [ "${PREFER_LOCAL}" = "true" ]
      then
         # create the preferred MDS file (actually just a symlink to the node ID file)
         execute_pdsh_cmd "${HOSTS}" "rm -f ${PREFERRED_MDS_FILE}; \
            ln -s ${DATAPATH}/${META_NUMID_FILE} ${PREFERRED_MDS_FILE}" "false"
      fi


      execute_pdsh_cmd "${HOSTS}" "echo %h,${META_BIN},${DATAPATH},${LOGFILE},${PIDFILE} >> ${STATUSFILE}" "false"

   else
      # no pdsh => do it manually with ssh loop
      print_info "Starting ${META_BIN} processes"
      print_info "Metadata server log: ${LOGFILE}"

      # for each host, start server
      IFS=,
      for HOST in ${HOSTS}
      do
         print_info "Starting ${META_BIN} on host: ${HOST}"
         if ! execute_ssh_cmd "${HOST}" "${CMD}"
         then
            print_error_and_exit "Unable to start ${META_BIN} on host: ${HOST}"
         else
            add_to_status_file "${HOST}" "${META_BIN}" "${DATAPATH}" "${LOGFILE}" "${PIDFILE}"
            if [ "${PREFER_LOCAL}" = "true" ]
               then
                  # create the preferred MDS file (actually just a symlink to the node ID file)
                  execute_ssh_cmd "${HOST}" "rm -f ${PREFERRED_MDS_FILE}; \
                     ln -s ${DATAPATH}/${META_NUMID_FILE} ${PREFERRED_MDS_FILE}"
            fi
         fi
      done
      unset IFS
   fi

   if [ "${QUIET}" != "true" ]
   then
      echo ""
   fi
}

start_storage_servers()
{
   local HOSTS=$1
   local DATAPATH=$2
   local MGMTD=$3
   local PORT_SHIFT=$4 # port shift can be empty!
   local CFG_PATH=$5 # may be empty
   local CFG_FILE=${CFG_PATH}/${STORAGE_CFG_NAME}

   local LOGFILE=${LOG_PATH}/${STORAGE_LOG}
   local PIDFILE=/var/run/${STORAGE_BIN}-${CURRENTTIME}.pid

   # error checks
   if [ "${HOSTS}" = "" ] || [ "${MGMTD}" = "" ] || [ "${DATAPATH}" = "" ]
   then
      print_error_and_exit "Internal function 'start_storage_servers' was called without all \
needed parameters"
   fi

   DATAPATH=${DATAPATH}/${STORAGE_BIN}

   PARAMS="sysMgmtdHost=${MGMTD} logStdFile=${LOGFILE} runDaemonized=true pidFile=${PIDFILE} ${CONNAUTH_LEGACY}"

   if [ "${PORT_SHIFT}" != "" ]
   then
      PARAMS="${PARAMS} connPortShift=${PORT_SHIFT}"
   fi

   if [ "${TARGETFILE}" != "" ]
   then
      local ALL_TARGETS
      ALL_TARGETS=$(get_all_targets_from_targetfile)

      CMD="while read T; do \
            if [ -d \"\${T}\" ] ; then EXISTING_TARGETS=\"\${EXISTING_TARGETS}\${T},\"; \
               if [ \"${CLEAR_DATA}\" = \"true\" ]; then \
                  rm -rf \"${T}/*\"; \
               fi;
            fi ; \
         done < <(echo \"${ALL_TARGETS}\" | tr ',' '\n' ); \
         PARAMS=\"${PARAMS} storeStorageDirectory=\${EXISTING_TARGETS}\"; \
         if [ -n \"${CFG_PATH}\" ] && [ -e \"${CFG_FILE}\" ]; then \
            PARAMS=\"\${PARAMS} cfgFile=${CFG_FILE}\"; fi; \
         ${BEEGFS_BIN_PATH}/${STORAGE_BIN} \${PARAMS}"
   else
      CMD="PARAMS=\"${PARAMS} storeStorageDirectory=${DATAPATH}\"; \
         if [ -n \"${CFG_PATH}\" ] && [ -e \"${CFG_FILE}\" ]; then \
         PARAMS=\"\${PARAMS} cfgFile=${CFG_FILE}\"; fi; \
         if [ \"${CLEAR_DATA}\" = \"true\" ]; then \
         rm -rf ${DATAPATH}; fi; \
         ${BEEGFS_BIN_PATH}/${STORAGE_BIN} \${PARAMS}"
   fi


   if [ "${USE_PDSH}" = "true" ]
   then
      print_info "Starting ${STORAGE_BIN} processes on the following hosts: ${HOSTS}"
      # trailing ',' removed
      print_info "Storage server log: ${LOGFILE}"

      execute_pdsh_cmd "${HOSTS}" "${CMD}" "false"

      if [ "${PREFER_LOCAL}" = "true" ]
      then
         # create the preferred target file (actually just a symlink to the target ID file)
         execute_pdsh_cmd "${HOSTS}" "rm -f ${PREFERRED_TARGET_FILE}; \
            ln -s ${DATAPATH}/${TARGET_NUMID_FILE} ${PREFERRED_TARGET_FILE}" "false"
      fi

      execute_pdsh_cmd "${HOSTS}" "echo %h,${STORAGE_BIN},${DATAPATH},${LOGFILE},${PIDFILE} >> ${STATUSFILE}" "false"

   else
      # no pdsh => do it manually with ssh loop
      print_info "Starting ${STORAGE_BIN} processes"
      print_info "Storage server log: ${LOGFILE}"

      # for each host, start server
      IFS=,
      for HOST in ${HOSTS}
      do
         print_info "Starting ${STORAGE_BIN} on host: ${HOST}"
         if ! execute_ssh_cmd "${HOST}" "${CMD}"
         then
            print_error_and_exit "Unable to start ${STORAGE_BIN} on host: ${HOST}"
         else
            add_to_status_file "${HOST}" "${STORAGE_BIN}" "${DATAPATH}" "${LOGFILE}" "${PIDFILE}"
            if [ "${PREFER_LOCAL}" = "true" ]
            then
               # create the preferred target file (actually just a symlink to the target ID file)
               execute_ssh_cmd "${HOST}" "rm -f ${PREFERRED_TARGET_FILE}; \
                  ln -s ${DATAPATH}/${TARGET_NUMID_FILE} ${PREFERRED_TARGET_FILE}"
            fi
         fi
      done
      unset IFS
   fi

   if [ "${TARGETFILE}" != "" ]
   then
      create_storage_pools "${HOSTS}"
   fi

   if [ "${QUIET}" != "true" ]
   then
      echo ""
   fi
}

create_storage_pools()
{
   local HOSTS=$1

   if [ "${TARGETFILE}" != "" ]
   then
      while read LINE
      do
         IFS=: read POOL TARGETS <<< "${LINE}"
         TARGETS=$(echo "${TARGETS}" | tr -d "[:space:]")

         TARGET_IDS=
         while read HOST
         do
            CMD="echo \"${TARGETS}\" | tr ',' '\n' | \
               while read T; do if [ -f \"\${T}/${TARGET_NUMID_FILE}\" ]; then \
               echo -n \"\$(cat \"\${T}/targetNumID\") \"; fi; done"
            HOST_TARGETS=$(execute_ssh_cmd "${HOST}" "${CMD}")
            for TARGET_ID in $HOST_TARGETS
            do
               TARGET_IDS="${TARGET_IDS:+$TARGET_IDS,}storage:$TARGET_ID"
            done
         done < <(echo "${HOSTS}" | tr ',' '\n')

         if [ "$POOL" == "default" ] || [ "$POOL" == "Default" ]
         then
            "${CTL_BIN}" ${CTL_GLOBAL_PARAMS} \
               pool set-alias storage:1 "${POOL}" > /dev/null
         else
            # create pool with collected ids
            "${CTL_BIN}" ${CTL_GLOBAL_PARAMS} \
               pool create "${POOL}" --targets "${TARGET_IDS}" > /dev/null
         fi
      done < <(grep -v "^$" "${TARGETFILE}" | grep -v "^\s*\#")
   fi
}

assign_storage_pool_dirs()
{
   local POOLS
   POOLS=$("${CTL_BIN}" ${CTL_GLOBAL_PARAMS} \
	   pool list --columns alias | grep -v '^\s*$'| tail -n+2)

   while read LINE
   do
      read ALIAS <<< "${LINE}"

      "${CTL_BIN}" ${CTL_GLOBAL_PARAMS} \
      entry create dir --mount=none "/${ALIAS}" > /dev/null

      "${CTL_BIN}" ${CTL_GLOBAL_PARAMS} \
      entry set --mount=none --pool "${ALIAS}" "/${ALIAS}" &> /dev/null

   done < <(echo "${POOLS}")
}

check_targetfile()
{
   local CHECK1
   local CHECK2
   local LINE_REGEX

   LINE_REGEX='^\w+:([\w/_.-]+,?)+\s*$'

   CHECK1=$(grep -i -P "${LINE_REGEX}" "${TARGETFILE}")
   CHECK2=$(grep -i -P -v "${LINE_REGEX}" "${TARGETFILE}" | grep -v "^\s*\#" | \
      grep -v "^$")

   if [ "${CHECK1}" == "" ] || [ "${CHECK2}" != "" ]
   then
      print_error_and_exit "${TARGETFILE} contains invalid entries or is empty."
   fi

   CHECK1=$(grep -v "^$" "${TARGETFILE}" | grep -v "^\s*\#" | \
      tr -d ' ' | awk -F ':' '{print $1}' | sort | uniq -i -d)

   if [ "${CHECK1}" != "" ]
   then
      print_error_and_exit "${TARGETFILE} contains non-unique pool names."
   fi

   CHECK1=$(get_all_targets_from_targetfile | tr ',' '\n' | sort | uniq -i -d)

   if [ "${CHECK1}" != "" ]
   then
      print_error_and_exit "${TARGETFILE} contains non-unique target paths."
   fi

   if [ "${CLEAR_DATA}" != "true" ]
   then
      print_error_and_exit "Using storage pools requires the -F option \
to make sure no old data is left."
   fi

   if [ "${STORAGE_MIRROR}" == "true" ]
   then
      print_error_and_exit "Using storage pools doesn't support storage mirroring (-k)."
   fi
}

get_all_targets_from_targetfile()
{
   local ALL_TARGETS

   while read LINE
   do
      IFS=: read POOL TARGETS <<< ${LINE}
      while read T
      do
         T=$(echo "${T}" | tr -d "[:space:]")
         ALL_TARGETS="${ALL_TARGETS}${T},"
      done < <(echo "${TARGETS}" | tr ',' '\n')
   done < <(grep -v "^$" "${TARGETFILE}" | grep -v "^\s*\#")
   echo "$ALL_TARGETS"
}

start_mgmtd()
{
   local HOST=$1
   local DATAPATH=$2
   local PORT_SHIFT=$3 # port shift can be empty!
   local CFG_PATH=$4 # may be empty
   local CFG_FILE=${CFG_PATH}/${MGMTD_CFG_NAME}

   local PIDFILE=/var/run/${MGMTD_BIN}-${CURRENTTIME}.pid

   # error checks
   if [ "${HOST}" = "" ] || [ "${DATAPATH}" = "" ]
   then
      print_error_and_exit "Internal function 'start_mgmtd' was called without all needed \
parameters"
   fi

   DATAPATH=${DATAPATH}/${MGMTD_BIN}
   DBPATH=${DATAPATH}/beegfs-mgmtd.sqlite3

   # start server
   print_info "Starting ${MGMTD_BIN} processes"

   print_info "Starting ${MGMTD_BIN} on host: ${HOST}"

   PARAMS="--db-file ${DBPATH} --daemonize true --daemonize-pid-file ${PIDFILE} ${TLS_DISABLE} ${TLS_CERT_FILE} ${TLS_KEY_FILE} ${CONNAUTH_FLAG} ${LICENSE_FILE}"

   if [ "${PORT_SHIFT}" != "" ]
   then
      PARAMS="${PARAMS} --port-shift ${PORT_SHIFT}"
   fi

   CMD="PARAMS=\"${PARAMS}\"; \
if [ -n \"${CFG_PATH}\" ] && [ -e \"${CFG_FILE}\" ]; then \
   PARAMS=\"\${PARAMS} --config-file ${CFG_FILE}\"; fi; \
if [ \"${CLEAR_DATA}\" = \"true\" ]; then \
   rm -rf ${DATAPATH}; fi; \
${BEEGFS_BIN_PATH}/${MGMTD_BIN} --init --db-file ${DBPATH}; \
${BEEGFS_BIN_PATH}/${MGMTD_BIN} \${PARAMS}"

   if ! execute_ssh_cmd "${HOST}" "${CMD}"
   then
      print_error_and_exit "Unable to start ${MGMTD_BIN} on host: ${HOST}"
   else
      add_to_status_file "${HOST}" "${MGMTD_BIN}" "${DATAPATH}" "-" "${PIDFILE}"
   fi

   if [ "${QUIET}" != "true" ]
   then
      echo ""
   fi
}

start_clients()
{
   local HOSTS=$1
   local MGMTD=$2
   local MOUNTPOINT=$3
   local PORT_SHIFT=$4 # port shift can be empty!
   local CFG_PATH=$5 # may be empty
   local CLIENT_CFG_FILE=${CFG_PATH}/${CLIENT_CFG_NAME}

   local LOGFILE=${LOG_PATH}/${CLIENT_LOG}

   # error checks
   if [ "${HOSTS}" = "" ] || [ "${MGMTD}" = "" ] || [ "${MOUNTPOINT}" = "" ]
   then
      print_error_and_exit "Internal function 'start_clients_ssh' was called without all \
needed parameters"
   fi

   MODPROBE_CMD="modprobe beegfs"
   REBUILD_CMD="/etc/init.d/${CLIENT_BIN} rebuild"

   MOUNT_PARAMS="-osysMgmtdHost=${MGMTD},${CONNAUTH_LEGACY// /,}"

   if [ "${PORT_SHIFT}" != "" ]
   then
      MOUNT_PARAMS="${MOUNT_PARAMS},connPortShift=${PORT_SHIFT}"
   fi

   MOUNT_CMD="PARAMS=\"${MOUNT_PARAMS}\"; if [ -n \"${CFG_PATH}\" ] && \
[ -e \"${CLIENT_CFG_FILE}\" ]; then PARAMS=\"\${PARAMS},cfgFile=${CLIENT_CFG_FILE}\"; fi; \
if [ \"${PREFER_LOCAL}\" = \"true\" ] && [ -e \"${PREFERRED_MDS_FILE}\" ]; \
then PARAMS=\"\${PARAMS},tunePreferredMetaFile=${PREFERRED_MDS_FILE}\"; fi; \
if [ \"${PREFER_LOCAL}\" = \"true\" ] && [ -e \"${PREFERRED_TARGET_FILE}\" ]; \
then PARAMS=\"\${PARAMS},tunePreferredStorageFile=${PREFERRED_TARGET_FILE}\"; fi; \
mkdir -p ${MOUNTPOINT} && ${MODPROBE_CMD} && mount -t beegfs beegfs_ondemand ${MOUNTPOINT} \${PARAMS}"

   if [ "${USE_PDSH}" = "true" ]
   then
      # trailing ',' removed in output
      print_info "Starting ${CLIENT_BIN} processes on the following hosts: ${HOSTS}"
      print_info "Client log: ${LOGFILE}"

      execute_pdsh_cmd "${HOSTS}" "echo %h,${CLIENT_BIN},${MOUNTPOINT},${LOGFILE},- >> ${STATUSFILE}" "false"

      execute_pdsh_cmd "${HOSTS}" "${MODPROBE_CMD} || ${REBUILD_CMD}" "false"
      execute_pdsh_cmd "${HOSTS}" "${MOUNT_CMD}" "false"

      if [ "${PREFER_LOCAL}" = "true" ] #set target count to 1
      then
        CTL_CMD="${CTL_BIN} ${CTL_GLOBAL_PARAMS} \
           entry set --num-targets 1 --chunk-size 512ki ${MOUNTPOINT} > /dev/null"
        execute_pdsh_cmd "${HOSTS}" "${CTL_CMD}" "false"
      fi
   else
      # no pdsh => do it manually with ssh loop

      print_info "Starting ${CLIENT_BIN} processes"
      print_info "Client log: ${LOGFILE}"

      # for each host, start client
      IFS=,
      for HOST in ${HOSTS}
      do
         print_info "Starting ${CLIENT_BIN} on host: ${HOST}"

         if ! execute_ssh_cmd "${HOST}" "${MODPROBE_CMD}"
         then
            print_info "Module beegfs could not be loaded on host: ${HOST}. Trying to recompile \
from source."
            execute_ssh_cmd "${HOST}" "${REBUILD_CMD}"
         fi

         if ! execute_ssh_cmd "${HOST}" "${MOUNT_CMD}"
         then
            print_error_and_exit "Unable to start BeeGFS client on host: ${HOST}"
         else
            # NOTE : mountpoint as data path
            add_to_status_file "${HOST}" "${CLIENT_BIN}" "${MOUNTPOINT}" "${LOGFILE}" "-"

            if [ "${PREFER_LOCAL}" = "true" ] #set target count to 1
            then
                 CTL_CMD="${CTL_BIN} ${CTL_GLOBAL_PARAMS} \
                    entry set --num-targets 1 --chunk-size 512ki ${MOUNTPOINT} > /dev/null"
                 execute_ssh_cmd "${HOST}" "${CTL_CMD}"
            fi

         fi
      done
      unset IFS
   fi

   if [ "${QUIET}" != "true" ]
   then
      echo ""
   fi
}

add_to_status_file()
{
   local HOST=$1
   local SERVICE=$2
   local DATAPATH=$3
   local LOGFILE=$4
   local PIDFILE=$5

   # error checks
   if [ "${HOST}" = "" ] || [ "${SERVICE}" = "" ] || [ "${LOGFILE}" = "" ] || [ "${PIDFILE}" = "" ]
   then
      print_error_and_exit "Internal function 'add_to_status_file' was called without all \
needed parameters"
   fi

   INFO="${HOST},${SERVICE},${DATAPATH},${LOGFILE},${PIDFILE}"
   execute_ssh_cmd "${HOST}" "echo ${INFO} >> ${STATUSFILE}"
}

### internal functions for beegfs-ondemand stop ###

# build the argument string for the "stoplocal" function
make_stoplocal_args()
{
   local STOPLOCAL_ARGS=" -q" # quiet
   if [ "${DELETE_DATA}" = "true" ]
   then
      STOPLOCAL_ARGS="${STOPLOCAL_ARGS} -d" # delete data
   fi

   if [ "${DELETE_LOGS}" = "true" ]
   then
      STOPLOCAL_ARGS="${STOPLOCAL_ARGS} -L" # delete logs
   fi

   if [ "${CLEANUP}" = "true" ]
   then
      STOPLOCAL_ARGS="${STOPLOCAL_ARGS} -c" # don't complain about missing files (from properly shut
   fi                                       # down beegfs-ondemand instances)

   echo "${STOPLOCAL_ARGS}"
}

stop_procs()
{
   local HOSTS=$1
   local DELETE_DATA=$2
   local DELETE_LOGS=$3

   # prepare command for remote script
   STOPSERVERSCMD="source ${BEEOND_STOPLOCAL}; \
         do_stoplocal -s -i ${STATUSFILE} $(make_stoplocal_args)"

   # issue the stop server command via ssh/pdsh
   if [ "${USE_PDSH}" = "true" ]
   then
      print_info "Stopping remaining processes on the following hosts: ${HOSTS}"

      execute_pdsh_cmd "${HOSTS}" "${STOPSERVERSCMD}" "true"
   else
      # ssh mode - launch command for each host separately
      IFS=,
      for HOST in ${HOSTS}
      do
         print_info "Stopping remaining processes on host: ${HOST}"
         execute_ssh_cmd "${HOST}" "${STOPSERVERSCMD}"
      done
      unset IFS
   fi

   # delete the statusfile
   local DELETESTATUSFILECMD="rm -f ${STATUSFILE}"

   if [ "${USE_PDSH}" = "true" ]
   then
      print_info "Deleting status file on hosts: ${HOSTS}"

      execute_pdsh_cmd "${HOSTS}" "${DELETESTATUSFILECMD}" "true"
   else
      # ssh mode - launch command for each host separately
      IFS=,
      for HOST in ${HOSTS}
      do
         print_info "Deleting status file on host: ${HOST}"
         execute_ssh_cmd "${HOST}" "${DELETESTATUSFILECMD}"
      done
      unset IFS
   fi
}

unmount_clients()
{
   HOSTS=$1
   # prepare command for remote script
   local UMOUNTCMD
   UMOUNTCMD="source ${BEEOND_STOPLOCAL}; \
      do_stoplocal -u -i ${STATUSFILE} $(make_stoplocal_args)"

   if [ "${USE_PDSH}" = "true" ]
   then
      print_info "Unmounting file system on the following hosts: ${HOSTS}"

      execute_pdsh_cmd "${HOSTS}" "${UMOUNTCMD}" "true"
   else
      # ssh mode - launch command for each host separately
      IFS=,
      for HOST in ${HOSTS}
      do
         print_info "Unmounting file system on host: ${HOST}"
         execute_ssh_cmd "${HOST}" "${UMOUNTCMD}"
      done
      unset IFS
  fi
}

# Blocks until all targets are online/good
# Parameters: nodetype numNodes
wait_online_good()
{
   local NODE_TYPE=$1
   local NUM_NODES=$2

   #first, wait for the correct number of *nodes* to become available.
   echo -n "Waiting until all nodes have registered with mgmtd..."
   local CTLCMD="${CTL_BIN} ${CTL_GLOBAL_PARAMS}          \
         node list --node-type ${NODE_TYPE}"
   while [ ! "$(${CTLCMD} | head -n-1 | tail -n+2 | wc -l)" = "${NUM_NODES}" ]
   do
      echo -n "."
      sleep 1
   done
   echo

   # now wait for all *targets* to become online/good. (also works for metadata servers, since they
   # are internally treated as one target per server)
   echo -n "Waiting for all nodes/targets to be online and in sync..."
   local CTLCMD="${CTL_BIN} ${CTL_GLOBAL_PARAMS}          \
         target list --node-type ${NODE_TYPE} --state --columns reachability,consistency"
   while [ ! "$(${CTLCMD} | head -n-1 | tail -n+2 | grep "Online\s\+Good" -c)" = "$(${CTLCMD} | head -n-1 | tail -n+2 | wc -l)" ]
   do
      echo -n "."
      sleep 1
   done
   echo

   # and now, we wait until all targets have reported their available space and inodes. It should be
   # good enough to only check space, because both will be reported at the same time. Without this,
   # the automatic mirror group creation might fail, because it compares target sizes and free
   # inodes.
   echo -n "Waiting for all nodes/targets to report their available space..."
   local CTLCMD="${CTL_BIN} ${CTL_GLOBAL_PARAMS}          \
         target list --node-type ${NODE_TYPE} --raw --columns space"
   while [ ! "$(${CTLCMD} | head -n-1 | tail -n+2 | grep -s '^-\s\+$' | wc -l)" == 0 ]
   do
      echo -n "."
      sleep 1
   done
   echo

}

### main functions ###
do_start()
{
   CLEAR_DATA="false"
   USE_PDSH="false"
   PREFER_LOCAL="false"
   QUIET="false"
   USE_TMPFS="false"
   STORAGE_MIRROR="false"
   META_MIRROR="false"
   PORT_SHIFT=${DEFAULT_PORT_SHIFT}
   ASSIGN_STORAGE_POOL_DIRS=true
   CONNAUTH_FLAG="--auth-disable"
   CONNAUTH_LEGACY="connDisableAuthentication=true"
   TLS_DISABLE="--tls-disable"
   TLS_CERT_FILE=""
   TLS_KEY_FILE=""
   LICENSE_FILE=""
   MGMTD_GRPC_PORT=${DEFAULT_MGMTD_GRPC_PORT}

   while getopts ":c:d:f:Fi:m:n:p:G:lL:Pb:s:qrkjt:TCEH" opt; do
      case $opt in
         n)
            HOSTFILE=${OPTARG}
         ;;
         d)
            DATA_PATH=${OPTARG}
         ;;
         F)
            CLEAR_DATA="true"
         ;;
         c)
            MOUNTPOINT=${OPTARG}
         ;;
         i)
            STATUSFILE=${OPTARG}
         ;;
         L)
            LOG_PATH=${OPTARG}
         ;;
         m)
            if ! [[ ${OPTARG} =~ ^[0-9]+$ ]];
            then
               print_error_and_exit "number of metadata servers must be numeric";
            fi
            NUM_META_SERVER=${OPTARG}
         ;;
         p)
            if ! [[ ${OPTARG} =~ ^[0-9]+$ ]];
            then
               print_error_and_exit "port shift must be numeric";
            fi
            PORT_SHIFT=${OPTARG}
         ;;
         G)
            if ! [[ ${OPTARG} =~ ^[0-9]+$ ]];
            then
               print_error_and_exit "management gRPC port must be numeric";
            fi
            MGMTD_GRPC_PORT=${OPTARG}
         ;;
         P)
            USE_PDSH="true"
         ;;
         b)
            PDSH=${OPTARG}
         ;;
         s)
            if ! [[ ${OPTARG} =~ ^[0-9]+$ ]];
            then
               print_error_and_exit "number of storage servers must be numeric";
            fi
            NUM_STORAGE_SERVER=$OPTARG
         ;;
         f)
            if ! [[ -d ${OPTARG} ]]; then
               print_error_and_exit "The -f option expects a path to a directory: ${OPTARG}"
            fi
            CONFIGPATH=${OPTARG}
         ;;
         l)
            PREFER_LOCAL="true"
         ;;
         q)
            QUIET="true"
         ;;
         r)
            USE_TMPFS="true"
         ;;
         k)
            if [[ -z ${LICENSE_FILE} ]] ; then
               print_error_and_exit "To use mirroring, licensing (option -H) must be configured before the option -k"
            fi
            STORAGE_MIRROR="true"
         ;;
         j)
            if [[ -z ${LICENSE_FILE} ]] ; then
               print_error_and_exit "To use mirroring, licensing (option -H) must be configured before the option -j"
            fi
            META_MIRROR="true"
         ;;
         t)
            if [[ -z ${LICENSE_FILE} ]] ; then
               print_error_and_exit "To use storage pools, licensing (option -H) must be configured before the option -t"
            fi
            TARGETFILE=${OPTARG}
         ;;
         T)
            ASSIGN_STORAGE_POOL_DIRS="false"
         ;;
         C)
            if ! [[ -f ${CONFIGPATH}/conn.auth ]] ; then
               print_error_and_exit "To use connection authentication, a config path (option -f) that contains a \"conn.auth\" file and is available on all nodes needs to be specified before the option -C"
            fi
            CONNAUTH_FLAG="--auth-file ${CONFIGPATH}/conn.auth"
            CONNAUTH_LEGACY="connAuthFile=${CONFIGPATH}/conn.auth connDisableAuthentication=false"
         ;;
         E)
            if ! [[ -f ${CONFIGPATH}/cert.pem && -f ${CONFIGPATH}/key.pem ]] ; then
               print_error_and_exit "To use TLS encryption, a config path (option -f) that contains a \"cert.pem\" and a \"key.pem\" file and is available on all nodes needs to be specified before the option -E"
            fi
            TLS_DISABLE="--tls-disable=false"
            TLS_CERT_FILE="--tls-cert-file ${CONFIGPATH}/cert.pem"
            TLS_KEY_FILE="--tls-key-file ${CONFIGPATH}/key.pem"
         ;;
         H)
            if ! [[ -f ${CONFIGPATH}/license.pem ]] ; then
               print_error_and_exit "To use enterprise features, a config path (option -f) that contains a \"license.pem\" file and is available on all nodes needs to be specified before the option -H"
            fi
            LICENSE_FILE="--license-cert-file ${CONFIGPATH}/license.pem"
         ;;
         \?)
            echo "ERROR: invalid option: -${OPTARG}" >&2
            print_usage_and_exit
         ;;
         :)
            echo "ERROR: Option -${OPTARG} requires an argument" >&2
            print_usage_and_exit
         ;;
      esac
   done

   if [ "${USE_PDSH}" = "true" ]
   then
      PDSH=${PDSH:-${DEFAULT_PDSH_PATH}}

      if [ -z "${PDSH}" ]; then
         echo "Unable to autodetect pdsh. Please specify using the -b option."
         exit 1
      fi
   fi

   check_hostfile
   check_datapath
   check_mountpoint

   if [ "${STORAGE_MIRROR}" = "true" ] && [ "${PREFER_LOCAL}" = "true" ]
   then
      print_error_and_exit "Options -k and -l are mutually exclusive."
   fi

   if [ "${TARGETFILE}" != "" ]
   then
      check_targetfile
   fi


   print_info "Using status information file: ${STATUSFILE}"

   NODECOUNT=$(grep -v '^$' ${HOSTFILE} | uniq | wc -l) #ignore empty lines
   NODES=( $(grep -v '^$' ${HOSTFILE} | uniq) ) #store as array and ignore empty lines

   # make list of all nodes first - needed for clients and tmpfs mounts
   ALLNODES=$(IFS=,; echo "${NODES[*]}")

   if [ "${USE_PDSH}" = "true" ]
   then
      # check all nodes for reachability and working PDSH
      check_pdsh "${ALLNODES}"
   else
      # check reachability of all nodes
      for HOST in "${NODES[@]}"
      do
         check_reachability "${HOST}"
      done
   fi

   check_statusfile "${ALLNODES}"

   # if the number of meta servers given is 0 or greater than node count, start it on all hosts
   if [ ${NUM_META_SERVER} -eq 0 ] || [ ${NUM_META_SERVER} -gt "${NODECOUNT}" ]
   then
      NUM_META_SERVER=${NODECOUNT}
      print_info "Number of metadata servers automatically set to ${NUM_META_SERVER}"
   fi

   # if the number of storage servers given is 0 or greater than node count, start it on hosts
   if [ ${NUM_STORAGE_SERVER} -eq 0 ] || [ ${NUM_STORAGE_SERVER} -gt "${NODECOUNT}" ]
   then
      NUM_STORAGE_SERVER=${NODECOUNT}
      print_info "Number of storage servers automatically set to ${NUM_STORAGE_SERVER}"
   fi

   # create the log path on all nodes if it doesn't exist yet
   # without an existing logfile path, the server won't start up
   create_log_path "${NODES[@]}"

   # take the first host as master host
   MASTERHOST=${NODES[0]}

   # delete STATUS_FILE
   execute_ssh_cmd "${MASTERHOST}" "rm -f ${STATUSFILE}"

   # mount tmpfs
   if [ "${USE_TMPFS}" = "true" ]
   then
      start_tmpfs "${ALLNODES}" "${DATA_PATH}"
   fi

   # MASTERHOST is also mgmtd host
   MGMTD=${MASTERHOST}

   # The gRPC port for MGMTD, important for CTL
   MGMTD_GRPC_PORT=$((MGMTD_GRPC_PORT+PORT_SHIFT))

   # Combine variables relevant to CTL into one
   CTL_GLOBAL_PARAMS="--mgmtd-addr ${MGMTD}:${MGMTD_GRPC_PORT} ${TLS_DISABLE} ${TLS_CERT_FILE} ${CONNAUTH_FLAG}"

   # port shift and config path may be empty, but that's ok
   start_mgmtd "${MGMTD}" "${DATA_PATH}" "${PORT_SHIFT}" "${CONFIGPATH}"

   # take the first NUM_STORAGE_SERVER as storage servers
   STORAGENODES=$(IFS=,; echo "${NODES[*]:0:${NUM_STORAGE_SERVER}}")

   # port shift and config path may be empty, but that's ok
   start_storage_servers "${STORAGENODES}" "${DATA_PATH}" "${MGMTD}" "${PORT_SHIFT}" "${CONFIGPATH}"

   # take the first NUM_META_SERVER as metadata servers
   METANODES=$(IFS=,; echo "${NODES[*]:0:${NUM_META_SERVER}}")

   # port shift and config path may be empty, but that's ok
   start_meta_servers "${METANODES}" "${DATA_PATH}" "${MGMTD}" "${PORT_SHIFT}" "${CONFIGPATH}"

   # give the management daemon some time to get all information from servers
   wait_online_good storage "${NUM_STORAGE_SERVER}"
   wait_online_good meta "${NUM_META_SERVER}"

   # enable mirroring
   if [ "${STORAGE_MIRROR}" = "true" ]
   then
      if ! ${CTL_BIN} ${CTL_GLOBAL_PARAMS} \
            mirror autocreate storage > /dev/null
      then
         print_error_and_exit "Unable to create storage target buddy mirror groups."
      fi

      # all metadata servers need to know about the storage mirror groups
      sleep 8
   fi

   if [ "${META_MIRROR}" = "true" ]
   then
      if ! ${CTL_BIN} ${CTL_GLOBAL_PARAMS} \
            mirror autocreate meta > /dev/null
      then
         print_error_and_exit "Unable to create metadata server buddy mirror groups."
      fi

      # all metadata servers need to know about the newly created mirror groups
      sleep 8

      if ! ${CTL_BIN} ${CTL_GLOBAL_PARAMS} \
         mirror init --yes > /dev/null
      then
         print_error_and_exit "Unable to enable metadata mirroring."
      fi

      # wait for initial resync
      wait_online_good meta "${NUM_META_SERVER}"
   fi

   # take all hosts as client
   # port shift and config path may be empty, but that's ok
   start_clients "${ALLNODES}" "${MGMTD}" "${MOUNTPOINT}" "${PORT_SHIFT}" "${CONFIGPATH}"

   if [ "${ASSIGN_STORAGE_POOL_DIRS}" = "true" ] && [ "${TARGETFILE}" != "" ]
   then
      assign_storage_pool_dirs
   fi

   if [ "${STORAGE_MIRROR}" = "true" ]
   then
      if ! ${CTL_BIN} ${CTL_GLOBAL_PARAMS} \
            entry set --chunk-size 512ki --num-targets 4 --pattern mirrored "${MOUNTPOINT}" > /dev/null
      then
         print_error_and_exit "Unable to enable mirroring pattern."
      fi
   fi

   echo " ****************************************************************************** "
   echo "* BeeOND setup finished successfully! To configure the \`beegfs\` command line"
   echo "* utility to talk to the BeeOND mgmtd service, some additional configuration"
   echo "* might be necessary."
   echo "*"
   echo "* If there is more than one BeeGFS mounted on the node that runs \`beegfs\`,"
   echo "* the correct mgmtd will need to be configured by using"
   echo "*   --mgmtd-addr \"${MGMTD}:${MGMTD_GRPC_PORT}\" or"
   echo "*   export BEEGFS_MGMTD_ADDR=\"${MGMTD}:${MGMTD_GRPC_PORT}\""
   if ! [[ -z ${CONNAUTH_FLAG} ]]
   then
	echo "*"
	echo "* To configure \`beegfs\` to use the correct connection authentication file,"
	echo "* please use"
	echo "*   --auth-file \"${CONFIGPATH}/conn.auth\" or"
	echo "*   export BEEGFS_AUTH_FILE=\"${CONFIGPATH}/conn.auth\""
   fi
   if ! [[ -z ${TLS_DISABLE} ]]
   then
	echo "*"
	echo "* To configure \`beegfs\` to use TLS encryption when talking to mgmtd, use"
	echo "*   --tls-cert-file \"${CONFIGPATH}/cert.pem\" or"
	echo "*   export BEEGFS_TLS_CERT_FILE=\"${CONFIGPATH}/cert.pem\""
   fi
   echo " ****************************************************************************** "
}

do_stop()
{
   DELETE_DATA="false"
   USE_PDSH="false"
   DELETE_LOGS="false"
   QUIET="false"
   CLEANUP="false"

   while getopts "di:n:Pb:Lcq" opt; do
      case $opt in
         n)
            HOSTFILE=${OPTARG}
         ;;
         i)
            STATUSFILE=${OPTARG}
         ;;
         d)
            DELETE_DATA="true"
         ;;
         c)
            CLEANUP="true"
         ;;
         P)
            USE_PDSH="true"
         ;;
         b)
            PDSH=${OPTARG}
         ;;
         L)
            DELETE_LOGS="true"
         ;;
         q)
            QUIET="true"
         ;;
         \?)
            echo "ERROR: invalid option: -${OPTARG}" >&2
            print_usage_and_exit
         ;;
         :)
            echo "ERROR: Option -${OPTARG} requires an argument" >&2
            print_usage_and_exit
         ;;
      esac
   done

   if [ "${USE_PDSH}" = "true" ]
   then
      PDSH=${PDSH:-${DEFAULT_PDSH_PATH}}

      if [ -z "${PDSH}" ]; then
         echo "Unable to autodetect pdsh. Please specify using the -b option."
         exit 1
      fi
   fi

   check_hostfile

   print_info "Using status information file: ${STATUSFILE}"

   NODES=( $(grep -v '^$' ${HOSTFILE} | uniq) ) #store as array and ignore empty lines
   ALLNODES=$(IFS=,; echo "${NODES[*]}")

   if [ "${USE_PDSH}" = "true" ]
   then
      # check all nodes for reachability and working PDSH
      check_pdsh "${ALLNODES}"
   else
      # check reachability of all nodes
      for HOST in ${NODES[*]}
      do
         check_reachability "${HOST}"
      done
   fi

   # take the first host as master host
   MASTERHOST=${NODES[0]}

   ALLNODES=$(IFS=,; echo "${NODES[*]}")

   # read status file on master host and stop all servers
   unmount_clients "${ALLNODES}"

   # read status file on master host and stop all servers
   stop_procs "${ALLNODES}" ${DELETE_DATA} ${DELETE_LOGS}
}

# print help if no arguments given
if [ $# -eq 0 ] ; then
   print_usage_and_exit
fi


# parse arguments
ACTION=$1

if [ "${ACTION}" = "start" ]
then
   shift
   do_start "$@"
elif [ "${ACTION}" = "stop" ]
then
   shift
   ERROR="false" # store if we encountered an error, so that we can return a statuscode
                       # (because the stop function does not abort on error)
   do_stop "$@"
   if [ "${ERROR}" = "true" ]
   then
      exit 1
   fi;
elif [ "${ACTION}" = "stoplocal" ]
then
   shift
   do_stoplocal "$@"
   exit $?
else
   print_usage_and_exit
fi
