From 0115c852e3eda6824238b0c2eb7fa52dfab30956 Mon Sep 17 00:00:00 2001 From: Andrea Dell'Amico Date: Wed, 14 Feb 2018 12:48:24 +0100 Subject: [PATCH] library/roles/smartgears/dataminer_app/templates/algorithms-updater.j2: Fix the check of the lock file logic. See https://support.d4science.org/issues/11182 --- R/templates/update_r_packages.sh.j2 | 4 ++- .../templates/algorithms-updater.j2 | 28 ++++++++++++++----- 2 files changed, 24 insertions(+), 8 deletions(-) diff --git a/R/templates/update_r_packages.sh.j2 b/R/templates/update_r_packages.sh.j2 index bed888dc..207707e3 100644 --- a/R/templates/update_r_packages.sh.j2 +++ b/R/templates/update_r_packages.sh.j2 @@ -77,7 +77,7 @@ function init_env() { OLDPROC=$( cat $LOCK_FILE ) OLDPROC_RUNNING=$( ps auwwx | grep -v grep | grep $OLDPROC | awk '{ print $2 }' ) RETVAL=$? - if [ $RETVAL -eq 0 ] ; then + if [ ! -z "$OLDPROC_RUNNING" ] ; then logger "update_r_packages: pid of the already running process: $OLDPROC_RUNNING" OLDPROC_RUNNING_TIME=$( ps -o etimes= -p ${OLDPROC_RUNNING} ) if [ $OLDPROC_RUNNING_TIME -gt $UPDATER_PROCESS_MAX_RUNTIME ] ; then @@ -92,6 +92,8 @@ function init_env() { logger "update_r_packages: lock file exist but the process not. Continuing." rm -fr $TMP_FILES_DIR fi + else + logger 'update_r_packages: no other jobs running, proceeding.' fi RETVAL= echo "$PROCNUM" > $LOCK_FILE diff --git a/smartgears/dataminer_app/templates/algorithms-updater.j2 b/smartgears/dataminer_app/templates/algorithms-updater.j2 index 866cfd54..bdaaa08d 100644 --- a/smartgears/dataminer_app/templates/algorithms-updater.j2 +++ b/smartgears/dataminer_app/templates/algorithms-updater.j2 @@ -3,6 +3,7 @@ set -e set -o pipefail +PROCNUM=$$ INFRA_REFERENCE={{ dataminer_infra_reference }} ADD_ALGORITHM_DIR={{ smartgears_user_home }}/algorithmInstaller ADD_ALGORITHM_PATH={{ smartgears_user_home }}/algorithmInstaller/addAlgorithm.sh @@ -15,7 +16,10 @@ ALGORITHMS_INSTALLED_FILE=${LOG_DIR}/already_installed_algorithms.txt ALGORITHMS_INSTALLED_SORTED_FILE=${LOG_DIR}/already_installed_algorithms_sorted.txt LOCK_FILE=${LOG_DIR}/.algorithms_updater.lock ALGO_DIR={{ dataminer_wps_algorithms_dest }}/${INFRA_REFERENCE} -RUNNING_JOB= +# In seconds. 60*60*6=21600s (6h) +UPDATER_PROCESS_MAX_RUNTIME=21600 +OLDPROC= +OLDPROC_RUNNING= RUNNING_JOB_RETVAL= trap "logger 'algorithms-updater: trap intercepted, exiting.' ; cleanup 1" SIGHUP SIGINT SIGTERM @@ -38,13 +42,23 @@ function check_lock_file() { if [ -f $LOCK_FILE ] ; then set +o pipefail set +e - RUNNING_JOB=$( /bin/pidof -s -x /usr/local/bin/algorithms-updater ) + OLDPROC=$( cat $LOCK_FILE ) + OLDPROC_RUNNING=$( ps auwwx | grep -v grep | grep $OLDPROC | awk '{ print $2 }' ) RUNNING_JOB_RETVAL=$? - if [ $RUNNING_JOB_RETVAL -eq 0 ] ; then - logger 'algorithms-updater: another job still running, exiting.' - rm -fr $OUT_DIR - exit 0 + if [ ! -z "$OLDPROC_RUNNING" ] ; then + logger "algorithms_updater: pid of the already running process: $OLDPROC_RUNNING" + OLDPROC_RUNNING_TIME=$( ps -o etimes= -p ${OLDPROC_RUNNING} ) + if [ $OLDPROC_RUNNING_TIME -gt $UPDATER_PROCESS_MAX_RUNTIME ] ; then + logger "algorithms_updater: process $OLDPROC_RUNNING was running for $OLDPROC_RUNNING_TIME seconds. Got stuck, killing it" + kill -9 $OLDPROC_RUNNING + rm -f $LOCK_FILE + else + logger "algorithms_updater: another process is running, exiting." + rm -fr $OUT_DIR + exit 0 + fi else + logger "algorithms_updater: lock file exist but the process not. Continuing." rm -f $LOCK_FILE fi else @@ -52,7 +66,7 @@ function check_lock_file() { fi set -o pipefail set -e - echo "$$" > $LOCK_FILE + echo "$PROCNUM" > $LOCK_FILE } function update_svn_repo() {