This is an automated email from the ASF dual-hosted git repository.

mikeb pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git

commit 06d24b46f67015222a3406e8307b92e913dd372d
Author: Philip Zeyliger <phi...@cloudera.com>
AuthorDate: Thu Jan 17 10:03:55 2019 -0800

    IMPALA-8089: Fixing run-all-tests timeout raciness.
    
    We observed some failures in "pkill -P $TIMEOUT_PID". pkill
    will fail if there are no child processes, which could happen
    if there is, at that point in time, no "sleep" process running.
    To fix this, we simply ignore the error condition here.
    
    I added some debugging to run-all-tests-timeout-check.sh as well.
    
    I'm not 100% certain this is the right fix.
    
    Change-Id: Ieccfda933f526c116e49c46bf34f7245b357cb07
    Reviewed-on: http://gerrit.cloudera.org:8080/12230
    Reviewed-by: Impala Public Jenkins <impala-public-jenk...@cloudera.com>
    Reviewed-by: Bikramjeet Vig <bikramjeet....@cloudera.com>
    Tested-by: Impala Public Jenkins <impala-public-jenk...@cloudera.com>
---
 bin/run-all-tests-timeout-check.sh | 7 +++++--
 bin/run-all-tests.sh               | 3 ++-
 2 files changed, 7 insertions(+), 3 deletions(-)

diff --git a/bin/run-all-tests-timeout-check.sh 
b/bin/run-all-tests-timeout-check.sh
index 8a4c680..dda9462 100755
--- a/bin/run-all-tests-timeout-check.sh
+++ b/bin/run-all-tests-timeout-check.sh
@@ -33,7 +33,7 @@ fi
 
 echo
 echo
-echo "**** Timout Timer Started for $SLEEP_TIMEOUT_S s! ****"
+echo "**** Timout Timer Started (pid $$, ppid $PPID) for $SLEEP_TIMEOUT_S s! 
****"
 echo
 echo
 
@@ -41,7 +41,10 @@ echo
 # Note: $SECONDS is a bash built-in that counts seconds since bash started.
 while ((SLEEP_TIMEOUT_S - SECONDS > 0)); do
   sleep 1
-  ps $PPID &> /dev/null || exit
+  if ! ps $PPID &> /dev/null; then
+    echo "Timeout Timer Exited because $PPID is gone."
+    exit
+  fi
 done
 
 echo
diff --git a/bin/run-all-tests.sh b/bin/run-all-tests.sh
index 147cc58..5743ff5 100755
--- a/bin/run-all-tests.sh
+++ b/bin/run-all-tests.sh
@@ -234,7 +234,8 @@ do
   # ${IMPALA_HOME}/tests/run-process-failure-tests.sh
 
   # Finally, kill the spawned timeout process and its child sleep process.
-  pkill -P $TIMEOUT_PID
+  # There may not be a sleep process, so ignore failure.
+  pkill -P $TIMEOUT_PID || true
   kill $TIMEOUT_PID
 
   if [[ $TEST_RET_CODE == 1 ]]; then

Reply via email to