#!/bin/bash

# This file contains some functions used across all of the BeeOND scripts.

BEEOND_FILENAME_PREFIX=".beeond_"
BEEOND_COPY_FILE_LIST="${BEEOND_FILENAME_PREFIX}files_copy"
BEEOND_COPY_SCAN_LIST="${BEEOND_FILENAME_PREFIX}scan_list" # List of dirs that have to be scanned.
BEEOND_COPY_DIR_LIST="${BEEOND_FILENAME_PREFIX}dirs_copy"
BEEOND_START_FILE_LIST="${BEEOND_FILENAME_PREFIX}files_start"
BEEOND_END_FILE_LIST="${BEEOND_FILENAME_PREFIX}files_end"
BEEOND_END_UPDATED_FILE_LIST="${BEEOND_FILENAME_PREFIX}files_end_updated"
BEEOND_START_DIR_LIST="${BEEOND_FILENAME_PREFIX}dirs_start"
BEEOND_END_DIR_LIST="${BEEOND_FILENAME_PREFIX}dirs_end"
BEEOND_END_UPDATED_DIR_LIST="${BEEOND_FILENAME_PREFIX}dirs_end_updated"
BEEOND_SESSION_FILE="${BEEOND_FILENAME_PREFIX}session"

BEEOND_BATCH_SIZE=20

beeond_print_error()
{
   echo "ERROR: ${1}" >&2
   echo ""
}

beeond_print_error_and_exit()
{
   beeond_print_error "${1}" >&2
   exit 1
}

beeond_print_info()
{
   local MESSAGE="${1}"
   if [ "${QUIET}" != "true" ]
   then
      echo "INFO: ${MESSAGE}"
   fi
}

# Saves the session info file which contains the paths of the global store and the node file.
beeond_save_session_info()
{
   local NODEFILE="${1}"
   local GLOBAL_PATH="${2}"

   if ! printf "NodeFile=%q\nGlobalPath=%q\n" "${NODEFILE}" "${GLOBAL_PATH}" \
         > "${LOCAL_PATH}/${BEEOND_SESSION_FILE}"
   then
      beeond_print_error_and_exit "Could not write to session file."
   fi
}

# Generate the list of files in the beeond folder.
beeond_generate_file_list()
{
   local LOCAL_PATH="${1}"
   local LISTFILE="${2}"
   local REFERENCE_FILE="${3}"


   pushd "${LOCAL_PATH}"
   if [ "${REFERENCE_FILE}" = "" ]
   then     # No reference file - just generate the full list (e.g. on startup).

      beeond_print_info "Generating file list ${LISTFILE}..."

      find . ! -path ./${BEEOND_FILENAME_PREFIX}\* \( -type f -or -type l \)\
            -exec bash -c 'printf "%q\n" "$0"' {} \; \
            | grep -v ^\\$ | sort > "${LISTFILE}"

      # The grep statement filters out file names with newlines in them. While they are technically
      # legal they would cause problems later on due to the way the script run by parallel handles
      # the arguments.

   else     # Reference file given: Compare timestamps.

      beeond_print_info \
            "Generating file list ${LISTFILE}. Timestamp reference: ${REFERENCE_FILE}..."

      find . ! -path ./${BEEOND_FILENAME_PREFIX}\* \( -type f -or -type l \) \
            \( -cnewer "${REFERENCE_FILE}" -or -newer "${REFERENCE_FILE}" \) \
            -exec bash -c 'printf "%q\n" "$0"' {} \; \
            | grep -v ^\\$ | sort > "${LISTFILE}"

   fi
   popd
}

# Generate list of directories - this is necessary in case directories were created during the
# session and need to be created on the global store as well.
beeond_generate_directory_list()
{
   local LOCAL_PATH="${1}"
   local LISTFILE="${2}"
   local REFERENCE_FILE="${3}"

   pushd "${LOCAL_PATH}"
   if [ "${REFERENCE_FILE}" = "" ]
   then     # No reference file - just generate the full list (e.g. on startup).

      beeond_print_info "Generating directory list ${LISTFILE}..."

      find . ! -path . -type d \
            -exec bash -c 'printf "%q\n" "$0"' {} \; \
            | grep -v ^\\$ | sort > "${LISTFILE}"

   else     # Reference file given: Compare timestamps.

      beeond_print_info \
            "Generating directory list ${LISTFILE}. Timestamp reference: ${REFERENCE_FILE}..."

      find . ! -path . -type d \
            \( -cnewer "${REFERENCE_FILE}" -or -newer "${REFERENCE_FILE}" \) \
            -exec bash -c 'printf "%q\n" "$0"' {} \; \
            | grep -v ^\\$ | sort > "${LISTFILE}"

   fi
   popd
}

# Generate copy file list. If we do a parallel copy, we can't just list the paths to all the files
# to be copied, because we have to "flatten" the folder hierarchy (e.g. when the user says
# "copy dir/subfir/file_a anotherdir/file_b target_dir" we want to end up with
# target_dir/file_a and target_dir/file_b. To achieve this, we just save an explicit target path
# to each source file. We also have to keep a list of directories we encounter because we want to
# create them before we start copying.
beeond_generate_copy_lists()
{
   local TARGET="${1}"
   local NODE_LIST="${2}"
   local CONCURRENCY="${3}"
   shift 3

   # Note: We do relative path expansion here, (and not directly in the do_... functions)
   # because this is the first time we iterate over the source list.

   # Expand target path.
   if [ ! "${TARGET:0:1}" = "/" ]
   then
      TARGET="${PWD}/${TARGET}"
   fi

   # Delete possibly left over file lists.
   rm -f "${TARGET}/${BEEOND_COPY_SCAN_LIST}" \
         "${TARGET}/${BEEOND_COPY_DIR_LIST}" \
         "${TARGET}/${BEEOND_COPY_FILE_LIST}"

   # Generate lists: A list of files which can be used directly, and a list of directories which
   # have to be scanned first.
   for ENTRY in "$@"
   do
      # Expand path if it's relative.
      if [ ! "${ENTRY:0:1}" = "/" ]
      then
         ENTRY="${PWD}/${ENTRY}"
      fi

      beeond_print_info "Path to scan: ${ENTRY}"
      if [ -d "${ENTRY}" ]; then
         printf "%q\n" "${ENTRY}" >> "${TARGET}/${BEEOND_COPY_SCAN_LIST}"
      elif [ -f "${ENTRY}" ]; then
         printf "%q\n" >> "${TARGET}/${BEEOND_COPY_FILE_LIST}"
      else
         beeond_print_error_and_exit "File or directory does not exist: ${ENTRY}"
      fi
   done

   beeond_print_info "Scanning sources..."

   < "${TARGET}/${BEEOND_COPY_SCAN_LIST}" \
         ${PARALLEL} -S "${NODE_LIST}" -j"${CONCURRENCY}" --pipe --recend "\n" \
            -N${BEEOND_BATCH_SIZE} --controlmaster --will-cite \
         " \
            while read DIR; do \
               cd \"\${DIR}\"; \
               find . -type d -exec bash -c \\\'printf \"%s/%q\n\" \"\$0\" \"\$1\"\' \"\`basename \"\${DIR}\"\`\" \{\} \; \
               | grep -v ^\\$; \
            done;
         " \
   | sort > "${TARGET}/${BEEOND_COPY_DIR_LIST}"

   < "${TARGET}/${BEEOND_COPY_SCAN_LIST}" \
         ${PARALLEL} -S "${NODE_LIST}" -j"${CONCURRENCY}" --pipe --recend "\n" \
            -N${BEEOND_BATCH_SIZE} --controlmaster --will-cite \
         " \
            while read DIR; do \
               cd \"\${DIR}\"; \
               find . \( -type f -or -type l \) -exec bash -c \
                  \\\'printf \"%q %q\n\" \"\${PWD}/\$0\" \"${TARGET}/\`basename \"\${PWD}\"\`/\$0\"\' \{\} \; \
               | grep -v ^\\$; \
            done; \
         " \
   | sort > "${TARGET}/${BEEOND_COPY_FILE_LIST}"
}

# Parallel copy of the files from a previously generated file list to the target directory.
# First, the directory structure is generated, then the files are copied into it.
beeond_parallel_copy()
{
   local TARGET="${1}"
   local NODE_LIST="${2}"
   local CONCURRENCY="${3}"

   # Expand target path.
   if [ ! "${TARGET:0:1}" = "/" ]
   then
      TARGET="${PWD}/${TARGET}"
   fi

   beeond_print_info "Generating target directory structure..."

   < "${TARGET}/${BEEOND_COPY_DIR_LIST}" \
         ${PARALLEL} -S "${NODE_LIST}" -j"${CONCURRENCY}" --pipe --recend "\n" \
            -N${BEEOND_BATCH_SIZE} --controlmaster --will-cite \
         " \
            while read DIR; do \
               mkdir -pv \"${TARGET}/\${DIR}\"; \
            done; \
         "

   beeond_print_info "Copying files..."

   < "${TARGET}/${BEEOND_COPY_FILE_LIST}" \
      ${PARALLEL} -S "${NODE_LIST}" -j"${CONCURRENCY}" --pipe --recend "\n" \
         -N${BEEOND_BATCH_SIZE} --controlmaster --will-cite \
      " \
         while read -a LINE; do \
            cp -av \"\${LINE[0]}\" \"\${LINE[1]}\"; \
         done; \
      "

   # Delete temporary files.
#  rm "${TARGET}/${BEEOND_COPY_SCAN_LIST}" \
#        "${TARGET}/${BEEOND_COPY_DIR_LIST}" \
#        "${TARGET}/${BEEOND_COPY_FILE_LIST}"
}

# Remove all files from the global store that have been deleted during the session.
beeond_remove_removed_files()
{
   local GLOBAL_PATH="${1}"
   local LOCAL_PATH="${2}"
   local NODE_LIST="${3}"
   local CONCURRENCY="${4}"

   beeond_print_info "Deleting files:"
   comm -23 "${LOCAL_PATH}/${BEEOND_START_FILE_LIST}" "${LOCAL_PATH}/${BEEOND_END_FILE_LIST}" | \
         ${PARALLEL} -S "${NODE_LIST}" -j"${CONCURRENCY}" --pipe --recend "\n" \
            -N${BEEOND_BATCH_SIZE} --controlmaster --will-cite \
         " \
            while read FILE; do \
               rm -v \"${GLOBAL_PATH}/\${FILE}\"; \
            done; \
         "

   beeond_print_info "Deleting directories:"
   comm -23 "${LOCAL_PATH}/${BEEOND_START_DIR_LIST}" "${LOCAL_PATH}/${BEEOND_END_DIR_LIST}" | \
         tac | \
         xargs -I{} rmdir -v "${GLOBAL_PATH}/{}"
   # Not being done in parallel to avoid deleting a subdirectory before its parent (this is also
   # the reason the list is inverted (tac)).
}

# Copy back all files to the global store that have been updated during the session.
beeond_copy_updated_files()
{
   local GLOBAL_PATH="${1}"
   local LOCAL_PATH="${2}"
   local NODE_LIST="${3}"
   local CONCURRENCY="${4}"

   beeond_print_info "Creating new directories:"

   < "${LOCAL_PATH}/${BEEOND_END_DIR_LIST}" \
         ${PARALLEL} -S "${NODE_LIST}" -j"${CONCURRENCY}" --pipe --recend "\n" \
            -N${BEEOND_BATCH_SIZE} --controlmaster --will-cite \
         " \
            while read DIR; do \
               mkdir -pv \"${GLOBAL_PATH}/\${DIR}\"; \
            done; \
         "

   beeond_print_info "Copying back changed files:"

   < "${LOCAL_PATH}/${BEEOND_END_UPDATED_FILE_LIST}" \
         ${PARALLEL} -S "${NODE_LIST}" -j"${CONCURRENCY}" --pipe --recend "\n" \
            -N${BEEOND_BATCH_SIZE} --controlmaster --will-cite \
         " \
            while read FILE; do \
               cp -uv \"${LOCAL_PATH}/\${FILE}\" \"${GLOBAL_PATH}/\${FILE}\"; \
            done; \
         "

   # Copy files into updated directories. (When a directory is renamed or files
   # are moved to a directory, the files in it don't have their timestamp
   # updated. Therefore, we need to check all the updated directories again).

   beeond_print_info "Copying back changed files (updated dirs):"

   pushd "${LOCAL_PATH}"
   < "${LOCAL_PATH}/${BEEOND_END_UPDATED_DIR_LIST}" \
         xargs -I{} find {} -maxdepth 1 \( -type f -or -type l \) | \
         ${PARALLEL} -S "${NODE_LIST}" -j"${CONCURRENCY}" --pipe --recend "\n" \
            -N${BEEOND_BATCH_SIZE} --controlmaster --will-cite \
         " \
            while read FILE; do \
               cp -uv \"${LOCAL_PATH}/\${FILE}\" \"\`dirname \"${GLOBAL_PATH}/\${FILE}\"\`\"; \
            done; \
         "
   popd
}

# Stage in process: Copy all files from the global store to the local store.
beeond_stage_in()
{
   local GLOBAL_PATH="${1}"
   local LOCAL_PATH="${2}"
   local NODE_LIST="${3}"
   local CONCURRENCY="${4}"

   # Generate list of files that have to be copied.
   beeond_generate_file_list "${GLOBAL_PATH}" "${LOCAL_PATH}/${BEEOND_START_FILE_LIST}"
   beeond_generate_directory_list "${GLOBAL_PATH}" "${LOCAL_PATH}/${BEEOND_START_DIR_LIST}"

   beeond_print_info "Creating directory structure..."
   < "${LOCAL_PATH}/${BEEOND_START_DIR_LIST}" \
         ${PARALLEL} -S "${NODE_LIST}" -j"${CONCURRENCY}" --pipe --recend "\n" \
            -N${BEEOND_BATCH_SIZE} --controlmaster --will-cite \
         " \
            while read DIR; do \
               mkdir -pv \"${LOCAL_PATH}/\${DIR}\"; \
               touch --reference=\"${GLOBAL_PATH}/\${DIR}\" \"${LOCAL_PATH}/\${DIR}\"; \
            done; \
         "

   beeond_print_info "Copying files to local directory..."
   if ! < "${LOCAL_PATH}/${BEEOND_START_FILE_LIST}" \
         ${PARALLEL} -S "${NODE_LIST}" -j"${CONCURRENCY}" --pipe --recend "\n" \
            -N${BEEOND_BATCH_SIZE} --controlmaster --will-cite \
         " \
            while read FILE; do \
               cp -av \"${GLOBAL_PATH}/\${FILE}\" \"\`dirname \"${LOCAL_PATH}/\${FILE}\"\`\"/; \
            done;
         "
   then
      beeond_print_error "Stage-in copy did not succeed. Data is incompletely staged in."
   fi

   # Generate list of files and dirs that were actually copied to keep track if the user deletes
   # files during a session. (re-generate list here, so that if something went wrong during the
   # stage-in copy, we don't start deleting stuff from the global store by accident).
   beeond_generate_file_list "${LOCAL_PATH}" "${LOCAL_PATH}/${BEEOND_START_FILE_LIST}"
   beeond_generate_directory_list "${LOCAL_PATH}" "${LOCAL_PATH}/${BEEOND_START_DIR_LIST}"
}

# Stage out process: remove all the files that have been removed during the beeond session,
# and copy back the files which have been changed.
beeond_stage_out()
{
   local GLOBAL_PATH="${1}"
   local LOCAL_PATH="${2}"
   local NODE_LIST="${3}"
   local CONCURRENCY="${4}"

   beeond_print_info "Nodes for parallel stage out: ${NODE_LIST}."

   beeond_generate_file_list "${LOCAL_PATH}" "${LOCAL_PATH}/${BEEOND_END_FILE_LIST}"
   beeond_generate_file_list "${LOCAL_PATH}" "${LOCAL_PATH}/${BEEOND_END_UPDATED_FILE_LIST}" \
         "${BEEOND_START_FILE_LIST}"

   beeond_generate_directory_list "${LOCAL_PATH}" "${LOCAL_PATH}/${BEEOND_END_DIR_LIST}"
   beeond_generate_directory_list "${LOCAL_PATH}" "${LOCAL_PATH}/${BEEOND_END_UPDATED_DIR_LIST}" \
         "${BEEOND_START_DIR_LIST}"

   beeond_remove_removed_files "${GLOBAL_PATH}" "${LOCAL_PATH}" "${NODE_LIST}" "${CONCURRENCY}"

   beeond_copy_updated_files "${GLOBAL_PATH}" "${LOCAL_PATH}" "${NODE_LIST}" "${CONCURRENCY}"
}

# Parallel copy process: copy all files and directories (recursively) into the target directory.
beeond_copy()
{
   local NODE_LIST="${1}"
   local CONCURRENCY="${2}"
   shift 2

   beeond_print_info "Nodes for parallel copy: ${NODE_LIST}; Concurrency: ${CONCURRENCY}"

   beeond_generate_copy_lists "${@:$#}" "${NODE_LIST}" "${CONCURRENCY}" "${@:1:$#-1}"
   beeond_parallel_copy "${@:$#}" "${NODE_LIST}" "${CONCURRENCY}"

}
