Repository: incubator-impala
Updated Branches:
  refs/heads/master 839c45777 -> e301ca641


IMPALA-6108: Revert "IMPALA-6070: Parallel data load."

We may be seeing a race with errors like "java.io.FileNotFoundException:
File /tmp/hadoop-jenkins/mapred/local/1508958341829_tmp does not exist".

This reverts commit e020c37106383be5416f882cbe11fc25efad8968.

Change-Id: I46da93f4315a5a4bdaa96fa464cb51922bd6c419
Reviewed-on: http://gerrit.cloudera.org:8080/8386
Reviewed-by: Tim Armstrong <[email protected]>
Tested-by: Impala Public Jenkins


Project: http://git-wip-us.apache.org/repos/asf/incubator-impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-impala/commit/e301ca64
Tree: http://git-wip-us.apache.org/repos/asf/incubator-impala/tree/e301ca64
Diff: http://git-wip-us.apache.org/repos/asf/incubator-impala/diff/e301ca64

Branch: refs/heads/master
Commit: e301ca6418be6645872ff738054d3ceed1fea548
Parents: 839c457
Author: Philip Zeyliger <[email protected]>
Authored: Wed Oct 25 15:11:48 2017 -0700
Committer: Impala Public Jenkins <[email protected]>
Committed: Thu Oct 26 02:07:50 2017 +0000

----------------------------------------------------------------------
 testdata/bin/create-load-data.sh | 11 +++--------
 testdata/bin/run-hive-server.sh  |  2 +-
 testdata/bin/run-step.sh         | 36 +----------------------------------
 3 files changed, 5 insertions(+), 44 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/e301ca64/testdata/bin/create-load-data.sh
----------------------------------------------------------------------
diff --git a/testdata/bin/create-load-data.sh b/testdata/bin/create-load-data.sh
index c5207a9..8640ded 100755
--- a/testdata/bin/create-load-data.sh
+++ b/testdata/bin/create-load-data.sh
@@ -449,15 +449,9 @@ fi
 
 if [ $SKIP_METADATA_LOAD -eq 0 ]; then
   run-step "Loading custom schemas" load-custom-schemas.log load-custom-schemas
-  # Run some steps in parallel, with run-step-backgroundable / 
run-step-wait-all.
-  # This is effective on steps that take a long time and don't depend on each
-  # other. Functional-query takes about ~35 minutes, and TPC-H and TPC-DS can
-  # finish while functional-query is running.
-  run-step-backgroundable "Loading functional-query data" 
load-functional-query.log \
+  run-step "Loading functional-query data" load-functional-query.log \
       load-data "functional-query" "exhaustive"
-  run-step-backgroundable "Loading TPC-H data" load-tpch.log load-data "tpch" 
"core"
-  run-step-backgroundable "Loading TPC-DS data" load-tpcds.log load-data 
"tpcds" "core"
-  run-step-wait-all
+  run-step "Loading TPC-H data" load-tpch.log load-data "tpch" "core"
   # Load tpch nested data.
   # TODO: Hacky and introduces more complexity into the system, but it is 
expedient.
   if [[ -n "$CM_HOST" ]]; then
@@ -465,6 +459,7 @@ if [ $SKIP_METADATA_LOAD -eq 0 ]; then
   fi
   run-step "Loading nested data" load-nested.log \
     ${IMPALA_HOME}/testdata/bin/load_nested.py ${LOAD_NESTED_ARGS:-}
+  run-step "Loading TPC-DS data" load-tpcds.log load-data "tpcds" "core"
   run-step "Loading auxiliary workloads" load-aux-workloads.log 
load-aux-workloads
   run-step "Loading dependent tables" copy-and-load-dependent-tables.log \
       copy-and-load-dependent-tables

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/e301ca64/testdata/bin/run-hive-server.sh
----------------------------------------------------------------------
diff --git a/testdata/bin/run-hive-server.sh b/testdata/bin/run-hive-server.sh
index 42d95b5..530b804 100755
--- a/testdata/bin/run-hive-server.sh
+++ b/testdata/bin/run-hive-server.sh
@@ -72,7 +72,7 @@ ${CLUSTER_BIN}/wait-for-metastore.py 
--transport=${METASTORE_TRANSPORT}
 if [ ${ONLY_METASTORE} -eq 0 ]; then
   # Starts a HiveServer2 instance on the port specified by the 
HIVE_SERVER2_THRIFT_PORT
   # environment variable.
-  HADOOP_HEAPSIZE="512" hive --service hiveserver2 > 
${LOGDIR}/hive-server2.out 2>&1 &
+  hive --service hiveserver2 > ${LOGDIR}/hive-server2.out 2>&1 &
 
   # Wait for the HiveServer2 service to come up because callers of this script
   # may rely on it being available.

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/e301ca64/testdata/bin/run-step.sh
----------------------------------------------------------------------
diff --git a/testdata/bin/run-step.sh b/testdata/bin/run-step.sh
index 9943013..45c5774 100755
--- a/testdata/bin/run-step.sh
+++ b/testdata/bin/run-step.sh
@@ -48,39 +48,5 @@ function run-step {
     return 1
   fi
   ELAPSED_TIME=$(($SECONDS - $START_TIME))
-  echo "  ${MSG} OK (Took: $(($ELAPSED_TIME/60)) min $(($ELAPSED_TIME%60)) 
sec)"
-}
-
-# Array to manage background tasks.
-declare -a RUN_STEP_PIDS
-declare -a RUN_STEP_MSGS
-
-# Runs the given step in the background. Many tasks may be started in the
-# background, and all of them must be joined together with run-step-wait-all.
-# No dependency management or maximums on number of tasks are provided.
-function run-step-backgroundable {
-  MSG="$1"
-  run-step "$@" &
-  local pid=$!
-  echo "Started ${MSG} in background; pid $pid."
-  RUN_STEP_PIDS+=($pid)
-  RUN_STEP_MSGS+=("${MSG}")
-}
-
-# Wait for all tasks that were run with run-step-backgroundable.
-# Fails if any of the background tasks has failed. Clears $RUN_STEP_PIDS.
-function run-step-wait-all {
-  local ret=0
-  for idx in "${!RUN_STEP_PIDS[@]}"; do
-    pid="${RUN_STEP_PIDS[$idx]}"
-    msg="${RUN_STEP_MSGS[$idx]}"
-
-    if ! wait $pid; then
-      ret=1
-      echo "Background task $msg (pid $pid) failed."
-    fi
-  done
-  RUN_STEP_PIDS=()
-  RUN_STEP_MSGS=()
-  return $ret
+  echo "    OK (Took: $(($ELAPSED_TIME/60)) min $(($ELAPSED_TIME%60)) sec)"
 }

Reply via email to