Repository: impala
Updated Branches:
  refs/heads/master 28b4ad14f -> 147e962f2


IMPALA-7119: Restart whole minicluster when HDFS replication stalls

After loading data, we wait for HDFS to replicate
all of the blocks appropriately. If this takes too long,
we restart HDFS. However, HBase can fail if HDFS is
restarted and HBase is unable to write its logs.
In general, there is no real reason to keep HBase
and the other minicluster components running while
restarting HDFS.

This changes the HDFS health check to restart the
whole minicluster and Impala rather than just HDFS.

Testing:
 - Tested with a modified version that always does
   the restart in the HDFS health check and verified
   that the tests pass

Change-Id: I58ffe301708c78c26ee61aa754a06f46c224c6e2
Reviewed-on: http://gerrit.cloudera.org:8080/10665
Reviewed-by: Impala Public Jenkins <[email protected]>
Tested-by: Impala Public Jenkins <[email protected]>


Project: http://git-wip-us.apache.org/repos/asf/impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/impala/commit/147e962f
Tree: http://git-wip-us.apache.org/repos/asf/impala/tree/147e962f
Diff: http://git-wip-us.apache.org/repos/asf/impala/diff/147e962f

Branch: refs/heads/master
Commit: 147e962f2dc7507d36cde696640bd76e8821b37c
Parents: 28b4ad1
Author: Joe McDonnell <[email protected]>
Authored: Fri Jun 8 11:20:42 2018 -0700
Committer: Impala Public Jenkins <[email protected]>
Committed: Mon Jun 18 21:46:11 2018 +0000

----------------------------------------------------------------------
 testdata/bin/create-load-data.sh | 39 ++++++++++++++++++++++++++---------
 1 file changed, 29 insertions(+), 10 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/impala/blob/147e962f/testdata/bin/create-load-data.sh
----------------------------------------------------------------------
diff --git a/testdata/bin/create-load-data.sh b/testdata/bin/create-load-data.sh
index 2a246e2..b193585 100755
--- a/testdata/bin/create-load-data.sh
+++ b/testdata/bin/create-load-data.sh
@@ -134,6 +134,31 @@ echo "SNAPSHOT_FILE=${SNAPSHOT_FILE:-}"
 echo "CM_HOST=${CM_HOST:-}"
 echo "REMOTE_LOAD=${REMOTE_LOAD:-}"
 
+function start-impala {
+  : ${START_CLUSTER_ARGS=""}
+  START_CLUSTER_ARGS_INT=""
+  if [[ "${TARGET_FILESYSTEM}" == "local" ]]; then
+    START_CLUSTER_ARGS_INT+=("--impalad_args=--abort_on_config_error=false -s 
1")
+  else
+    START_CLUSTER_ARGS_INT+=("-s 3")
+  fi
+  START_CLUSTER_ARGS_INT+=("${START_CLUSTER_ARGS}")
+  ${IMPALA_HOME}/bin/start-impala-cluster.py 
--log_dir=${IMPALA_DATA_LOADING_LOGS_DIR} \
+    ${START_CLUSTER_ARGS_INT}
+}
+
+function restart-cluster {
+  # Break out each individual step for clarity
+  echo "Shutting down Impala"
+  ${IMPALA_HOME}/bin/start-impala-cluster.py --kill
+  echo "Shutting down the minicluster"
+  ${IMPALA_HOME}/testdata/bin/kill-all.sh
+  echo "Starting the minicluster"
+  ${IMPALA_HOME}/testdata/bin/run-all.sh
+  echo "Starting Impala"
+  start-impala
+}
+
 function load-custom-schemas {
   # HDFS commandline calls are slow, so consolidate the manipulation into
   # as few calls as possible by populating a temporary directory with the
@@ -500,7 +525,9 @@ function check-hdfs-health {
     if [[ "$NUMBER_UNDER_REPLICATED" -eq "$LAST_NUMBER_UNDER_REPLICATED" ]] ; 
then
       echo "There are under-replicated blocks in HDFS and HDFS is not making 
progress"\
           "in $SLEEP_SEC seconds. Attempting to restart HDFS to resolve this 
issue."
-      ${IMPALA_HOME}/testdata/bin/run-mini-dfs.sh
+      # IMPALA-7119: Other minicluster components (like HBase) can fail if 
HDFS is
+      # restarted by itself, so restart the whole cluster, including Impala.
+      restart-cluster
     fi
     LAST_NUMBER_UNDER_REPLICATED="$NUMBER_UNDER_REPLICATED"
     echo "$NUMBER_UNDER_REPLICATED under replicated blocks remaining."
@@ -515,16 +542,8 @@ if ${CLUSTER_DIR}/admin is_kerberized; then
 fi
 
 # Start Impala
-: ${START_CLUSTER_ARGS=""}
-if [[ "${TARGET_FILESYSTEM}" == "local" ]]; then
-  START_CLUSTER_ARGS="--impalad_args=--abort_on_config_error=false -s 1 
${START_CLUSTER_ARGS}"
-else
-  START_CLUSTER_ARGS="-s 3 ${START_CLUSTER_ARGS}"
-fi
 if [[ -z "$REMOTE_LOAD" ]]; then
-  run-step "Starting Impala cluster" start-impala-cluster.log \
-    ${IMPALA_HOME}/bin/start-impala-cluster.py 
--log_dir=${IMPALA_DATA_LOADING_LOGS_DIR} \
-    ${START_CLUSTER_ARGS}
+  run-step "Starting Impala cluster" start-impala-cluster.log start-impala
 fi
 
 # The hdfs environment script sets up kms (encryption) and cache pools (hdfs 
caching).

Reply via email to