Repository: impala Updated Branches: refs/heads/master 28b4ad14f -> 147e962f2
IMPALA-7119: Restart whole minicluster when HDFS replication stalls After loading data, we wait for HDFS to replicate all of the blocks appropriately. If this takes too long, we restart HDFS. However, HBase can fail if HDFS is restarted and HBase is unable to write its logs. In general, there is no real reason to keep HBase and the other minicluster components running while restarting HDFS. This changes the HDFS health check to restart the whole minicluster and Impala rather than just HDFS. Testing: - Tested with a modified version that always does the restart in the HDFS health check and verified that the tests pass Change-Id: I58ffe301708c78c26ee61aa754a06f46c224c6e2 Reviewed-on: http://gerrit.cloudera.org:8080/10665 Reviewed-by: Impala Public Jenkins <[email protected]> Tested-by: Impala Public Jenkins <[email protected]> Project: http://git-wip-us.apache.org/repos/asf/impala/repo Commit: http://git-wip-us.apache.org/repos/asf/impala/commit/147e962f Tree: http://git-wip-us.apache.org/repos/asf/impala/tree/147e962f Diff: http://git-wip-us.apache.org/repos/asf/impala/diff/147e962f Branch: refs/heads/master Commit: 147e962f2dc7507d36cde696640bd76e8821b37c Parents: 28b4ad1 Author: Joe McDonnell <[email protected]> Authored: Fri Jun 8 11:20:42 2018 -0700 Committer: Impala Public Jenkins <[email protected]> Committed: Mon Jun 18 21:46:11 2018 +0000 ---------------------------------------------------------------------- testdata/bin/create-load-data.sh | 39 ++++++++++++++++++++++++++--------- 1 file changed, 29 insertions(+), 10 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/impala/blob/147e962f/testdata/bin/create-load-data.sh ---------------------------------------------------------------------- diff --git a/testdata/bin/create-load-data.sh b/testdata/bin/create-load-data.sh index 2a246e2..b193585 100755 --- a/testdata/bin/create-load-data.sh +++ b/testdata/bin/create-load-data.sh @@ -134,6 +134,31 @@ echo "SNAPSHOT_FILE=${SNAPSHOT_FILE:-}" echo "CM_HOST=${CM_HOST:-}" echo "REMOTE_LOAD=${REMOTE_LOAD:-}" +function start-impala { + : ${START_CLUSTER_ARGS=""} + START_CLUSTER_ARGS_INT="" + if [[ "${TARGET_FILESYSTEM}" == "local" ]]; then + START_CLUSTER_ARGS_INT+=("--impalad_args=--abort_on_config_error=false -s 1") + else + START_CLUSTER_ARGS_INT+=("-s 3") + fi + START_CLUSTER_ARGS_INT+=("${START_CLUSTER_ARGS}") + ${IMPALA_HOME}/bin/start-impala-cluster.py --log_dir=${IMPALA_DATA_LOADING_LOGS_DIR} \ + ${START_CLUSTER_ARGS_INT} +} + +function restart-cluster { + # Break out each individual step for clarity + echo "Shutting down Impala" + ${IMPALA_HOME}/bin/start-impala-cluster.py --kill + echo "Shutting down the minicluster" + ${IMPALA_HOME}/testdata/bin/kill-all.sh + echo "Starting the minicluster" + ${IMPALA_HOME}/testdata/bin/run-all.sh + echo "Starting Impala" + start-impala +} + function load-custom-schemas { # HDFS commandline calls are slow, so consolidate the manipulation into # as few calls as possible by populating a temporary directory with the @@ -500,7 +525,9 @@ function check-hdfs-health { if [[ "$NUMBER_UNDER_REPLICATED" -eq "$LAST_NUMBER_UNDER_REPLICATED" ]] ; then echo "There are under-replicated blocks in HDFS and HDFS is not making progress"\ "in $SLEEP_SEC seconds. Attempting to restart HDFS to resolve this issue." - ${IMPALA_HOME}/testdata/bin/run-mini-dfs.sh + # IMPALA-7119: Other minicluster components (like HBase) can fail if HDFS is + # restarted by itself, so restart the whole cluster, including Impala. + restart-cluster fi LAST_NUMBER_UNDER_REPLICATED="$NUMBER_UNDER_REPLICATED" echo "$NUMBER_UNDER_REPLICATED under replicated blocks remaining." @@ -515,16 +542,8 @@ if ${CLUSTER_DIR}/admin is_kerberized; then fi # Start Impala -: ${START_CLUSTER_ARGS=""} -if [[ "${TARGET_FILESYSTEM}" == "local" ]]; then - START_CLUSTER_ARGS="--impalad_args=--abort_on_config_error=false -s 1 ${START_CLUSTER_ARGS}" -else - START_CLUSTER_ARGS="-s 3 ${START_CLUSTER_ARGS}" -fi if [[ -z "$REMOTE_LOAD" ]]; then - run-step "Starting Impala cluster" start-impala-cluster.log \ - ${IMPALA_HOME}/bin/start-impala-cluster.py --log_dir=${IMPALA_DATA_LOADING_LOGS_DIR} \ - ${START_CLUSTER_ARGS} + run-step "Starting Impala cluster" start-impala-cluster.log start-impala fi # The hdfs environment script sets up kms (encryption) and cache pools (hdfs caching).
