Repository: incubator-impala Updated Branches: refs/heads/master 839c45777 -> e301ca641
IMPALA-6108: Revert "IMPALA-6070: Parallel data load." We may be seeing a race with errors like "java.io.FileNotFoundException: File /tmp/hadoop-jenkins/mapred/local/1508958341829_tmp does not exist". This reverts commit e020c37106383be5416f882cbe11fc25efad8968. Change-Id: I46da93f4315a5a4bdaa96fa464cb51922bd6c419 Reviewed-on: http://gerrit.cloudera.org:8080/8386 Reviewed-by: Tim Armstrong <[email protected]> Tested-by: Impala Public Jenkins Project: http://git-wip-us.apache.org/repos/asf/incubator-impala/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-impala/commit/e301ca64 Tree: http://git-wip-us.apache.org/repos/asf/incubator-impala/tree/e301ca64 Diff: http://git-wip-us.apache.org/repos/asf/incubator-impala/diff/e301ca64 Branch: refs/heads/master Commit: e301ca6418be6645872ff738054d3ceed1fea548 Parents: 839c457 Author: Philip Zeyliger <[email protected]> Authored: Wed Oct 25 15:11:48 2017 -0700 Committer: Impala Public Jenkins <[email protected]> Committed: Thu Oct 26 02:07:50 2017 +0000 ---------------------------------------------------------------------- testdata/bin/create-load-data.sh | 11 +++-------- testdata/bin/run-hive-server.sh | 2 +- testdata/bin/run-step.sh | 36 +---------------------------------- 3 files changed, 5 insertions(+), 44 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/e301ca64/testdata/bin/create-load-data.sh ---------------------------------------------------------------------- diff --git a/testdata/bin/create-load-data.sh b/testdata/bin/create-load-data.sh index c5207a9..8640ded 100755 --- a/testdata/bin/create-load-data.sh +++ b/testdata/bin/create-load-data.sh @@ -449,15 +449,9 @@ fi if [ $SKIP_METADATA_LOAD -eq 0 ]; then run-step "Loading custom schemas" load-custom-schemas.log load-custom-schemas - # Run some steps in parallel, with run-step-backgroundable / run-step-wait-all. - # This is effective on steps that take a long time and don't depend on each - # other. Functional-query takes about ~35 minutes, and TPC-H and TPC-DS can - # finish while functional-query is running. - run-step-backgroundable "Loading functional-query data" load-functional-query.log \ + run-step "Loading functional-query data" load-functional-query.log \ load-data "functional-query" "exhaustive" - run-step-backgroundable "Loading TPC-H data" load-tpch.log load-data "tpch" "core" - run-step-backgroundable "Loading TPC-DS data" load-tpcds.log load-data "tpcds" "core" - run-step-wait-all + run-step "Loading TPC-H data" load-tpch.log load-data "tpch" "core" # Load tpch nested data. # TODO: Hacky and introduces more complexity into the system, but it is expedient. if [[ -n "$CM_HOST" ]]; then @@ -465,6 +459,7 @@ if [ $SKIP_METADATA_LOAD -eq 0 ]; then fi run-step "Loading nested data" load-nested.log \ ${IMPALA_HOME}/testdata/bin/load_nested.py ${LOAD_NESTED_ARGS:-} + run-step "Loading TPC-DS data" load-tpcds.log load-data "tpcds" "core" run-step "Loading auxiliary workloads" load-aux-workloads.log load-aux-workloads run-step "Loading dependent tables" copy-and-load-dependent-tables.log \ copy-and-load-dependent-tables http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/e301ca64/testdata/bin/run-hive-server.sh ---------------------------------------------------------------------- diff --git a/testdata/bin/run-hive-server.sh b/testdata/bin/run-hive-server.sh index 42d95b5..530b804 100755 --- a/testdata/bin/run-hive-server.sh +++ b/testdata/bin/run-hive-server.sh @@ -72,7 +72,7 @@ ${CLUSTER_BIN}/wait-for-metastore.py --transport=${METASTORE_TRANSPORT} if [ ${ONLY_METASTORE} -eq 0 ]; then # Starts a HiveServer2 instance on the port specified by the HIVE_SERVER2_THRIFT_PORT # environment variable. - HADOOP_HEAPSIZE="512" hive --service hiveserver2 > ${LOGDIR}/hive-server2.out 2>&1 & + hive --service hiveserver2 > ${LOGDIR}/hive-server2.out 2>&1 & # Wait for the HiveServer2 service to come up because callers of this script # may rely on it being available. http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/e301ca64/testdata/bin/run-step.sh ---------------------------------------------------------------------- diff --git a/testdata/bin/run-step.sh b/testdata/bin/run-step.sh index 9943013..45c5774 100755 --- a/testdata/bin/run-step.sh +++ b/testdata/bin/run-step.sh @@ -48,39 +48,5 @@ function run-step { return 1 fi ELAPSED_TIME=$(($SECONDS - $START_TIME)) - echo " ${MSG} OK (Took: $(($ELAPSED_TIME/60)) min $(($ELAPSED_TIME%60)) sec)" -} - -# Array to manage background tasks. -declare -a RUN_STEP_PIDS -declare -a RUN_STEP_MSGS - -# Runs the given step in the background. Many tasks may be started in the -# background, and all of them must be joined together with run-step-wait-all. -# No dependency management or maximums on number of tasks are provided. -function run-step-backgroundable { - MSG="$1" - run-step "$@" & - local pid=$! - echo "Started ${MSG} in background; pid $pid." - RUN_STEP_PIDS+=($pid) - RUN_STEP_MSGS+=("${MSG}") -} - -# Wait for all tasks that were run with run-step-backgroundable. -# Fails if any of the background tasks has failed. Clears $RUN_STEP_PIDS. -function run-step-wait-all { - local ret=0 - for idx in "${!RUN_STEP_PIDS[@]}"; do - pid="${RUN_STEP_PIDS[$idx]}" - msg="${RUN_STEP_MSGS[$idx]}" - - if ! wait $pid; then - ret=1 - echo "Background task $msg (pid $pid) failed." - fi - done - RUN_STEP_PIDS=() - RUN_STEP_MSGS=() - return $ret + echo " OK (Took: $(($ELAPSED_TIME/60)) min $(($ELAPSED_TIME%60)) sec)" }
