IMPALA-7003: Deflake erasure coding data loading

Erasure coding data loading is flaky in two ways:
1. HBase sometimes doesn't work because of HBase-19369
2. Nested data loading sometimes fails because the HDFS namenode cannot
   find enough good datanodes.

For problem 1, this patch enables erasure coding only on /test-warehouse
directory. For problem 2, this patch sets
dfs.namenode.redundancy.considerLoad to false, preventing namenode from
excluding heavily-loaded datanodes.

Change-Id: I219106cd3ec7ffab7a834700f2a722b165e5f66c
Reviewed-on: http://gerrit.cloudera.org:8080/10362
Reviewed-by: Alex Behm <[email protected]>
Tested-by: Impala Public Jenkins <[email protected]>


Project: http://git-wip-us.apache.org/repos/asf/impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/impala/commit/13a1acd7
Tree: http://git-wip-us.apache.org/repos/asf/impala/tree/13a1acd7
Diff: http://git-wip-us.apache.org/repos/asf/impala/diff/13a1acd7

Branch: refs/heads/master
Commit: 13a1acd7e42b533c39b9f4eea1c17823bde4c1c5
Parents: fab65d4
Author: Tianyi Wang <[email protected]>
Authored: Wed May 9 14:53:07 2018 -0700
Committer: Impala Public Jenkins <[email protected]>
Committed: Tue May 15 23:59:58 2018 +0000

----------------------------------------------------------------------
 bin/impala-config.sh                            |  2 +-
 testdata/bin/create-load-data.sh                | 37 ++++++++++++++------
 testdata/bin/load-test-warehouse-snapshot.sh    |  5 +++
 testdata/bin/setup-hdfs-env.sh                  |  2 +-
 .../common/etc/hadoop/conf/hdfs-site.xml.tmpl   | 10 ++++++
 5 files changed, 44 insertions(+), 12 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/impala/blob/13a1acd7/bin/impala-config.sh
----------------------------------------------------------------------
diff --git a/bin/impala-config.sh b/bin/impala-config.sh
index 97bec30..8e58332 100755
--- a/bin/impala-config.sh
+++ b/bin/impala-config.sh
@@ -452,7 +452,7 @@ elif [ "${TARGET_FILESYSTEM}" = "hdfs" ]; then
       return 1
     fi
     export HDFS_ERASURECODE_POLICY="RS-3-2-1024k"
-    export HDFS_ERASURECODE_PATH="/"
+    export HDFS_ERASURECODE_PATH="/test-warehouse"
   fi
 else
   echo "Unsupported filesystem '$TARGET_FILESYSTEM'"

http://git-wip-us.apache.org/repos/asf/impala/blob/13a1acd7/testdata/bin/create-load-data.sh
----------------------------------------------------------------------
diff --git a/testdata/bin/create-load-data.sh b/testdata/bin/create-load-data.sh
index 280105d..e5b0f87 100755
--- a/testdata/bin/create-load-data.sh
+++ b/testdata/bin/create-load-data.sh
@@ -96,14 +96,6 @@ do
   shift;
 done
 
-# The hdfs environment script sets up kms (encryption) and cache pools (hdfs 
caching).
-# On a non-hdfs filesystem, we don't test encryption or hdfs caching, so this 
setup is not
-# needed.
-if [[ "${TARGET_FILESYSTEM}" == "hdfs" ]]; then
-  run-step "Setting up HDFS environment" setup-hdfs-env.log \
-      ${IMPALA_HOME}/testdata/bin/setup-hdfs-env.sh
-fi
-
 if [[ $SKIP_METADATA_LOAD -eq 0  && "$SNAPSHOT_FILE" = "" ]]; then
   if [[ -z "$REMOTE_LOAD" ]]; then
     run-step "Loading Hive Builtins" load-hive-builtins.log \
@@ -291,6 +283,14 @@ function copy-and-load-dependent-tables {
     /tmp/alltypes_rc /tmp/alltypes_seq
   hadoop fs -mkdir -p /tmp/alltypes_seq/year=2009 \
     /tmp/alltypes_rc/year=2009
+
+  # The file written by hive to /test-warehouse will be strangely replicated 
rather than
+  # erasure coded if EC is not set in /tmp
+  if [[ -n "${HDFS_ERASURECODE_POLICY:-}" ]]; then
+    hdfs ec -setPolicy -policy "${HDFS_ERASURECODE_POLICY}" -path 
"/tmp/alltypes_rc"
+    hdfs ec -setPolicy -policy "${HDFS_ERASURECODE_POLICY}" -path 
"/tmp/alltypes_seq"
+  fi
+
   hadoop fs -cp /test-warehouse/alltypes_seq/year=2009/month=2/ 
/tmp/alltypes_seq/year=2009
   hadoop fs -cp /test-warehouse/alltypes_rc/year=2009/month=3/ 
/tmp/alltypes_rc/year=2009
 
@@ -468,7 +468,16 @@ function copy-and-load-ext-data-source {
     ${IMPALA_HOME}/testdata/bin/create-data-source-table.sql
 }
 
-function wait-hdfs-replication {
+function check-hdfs-health {
+  if [[ -n "${HDFS_ERASURECODE_POLICY:-}" ]]; then
+    if ! grep "Replicated Blocks:[[:space:]]*#[[:space:]]*Total 
size:[[:space:]]*0 B"\
+        <<< $(hdfs fsck /test-warehouse | tr '\n' '#'); then
+        echo "There are some replicated files despite that erasure coding is 
on"
+        echo "Failing the data loading job"
+        exit 1
+    fi
+    return
+  fi
   MAX_FSCK=30
   SLEEP_SEC=120
   LAST_NUMBER_UNDER_REPLICATED=-1
@@ -518,6 +527,14 @@ if [[ -z "$REMOTE_LOAD" ]]; then
     ${START_CLUSTER_ARGS}
 fi
 
+# The hdfs environment script sets up kms (encryption) and cache pools (hdfs 
caching).
+# On a non-hdfs filesystem, we don't test encryption or hdfs caching, so this 
setup is not
+# needed.
+if [[ "${TARGET_FILESYSTEM}" == "hdfs" ]]; then
+  run-step "Setting up HDFS environment" setup-hdfs-env.log \
+      ${IMPALA_HOME}/testdata/bin/setup-hdfs-env.sh
+fi
+
 if [ $SKIP_METADATA_LOAD -eq 0 ]; then
   run-step "Loading custom schemas" load-custom-schemas.log load-custom-schemas
   # Run some steps in parallel, with run-step-backgroundable / 
run-step-wait-all.
@@ -580,7 +597,7 @@ if [ "${TARGET_FILESYSTEM}" = "hdfs" ]; then
   run-step "Creating internal HBase table" create-internal-hbase-table.log \
       create-internal-hbase-table
 
-  run-step "Waiting for HDFS replication" wait-hdfs-replication.log 
wait-hdfs-replication
+  run-step "Checking HDFS health" check-hdfs-health.log check-hdfs-health
 fi
 
 # TODO: Investigate why all stats are not preserved. Theoretically, we only 
need to

http://git-wip-us.apache.org/repos/asf/impala/blob/13a1acd7/testdata/bin/load-test-warehouse-snapshot.sh
----------------------------------------------------------------------
diff --git a/testdata/bin/load-test-warehouse-snapshot.sh 
b/testdata/bin/load-test-warehouse-snapshot.sh
index 311a9ea..fe5dd2a 100755
--- a/testdata/bin/load-test-warehouse-snapshot.sh
+++ b/testdata/bin/load-test-warehouse-snapshot.sh
@@ -72,6 +72,11 @@ if [[ "$REPLY" =~ ^[Yy]$ ]]; then
     fi
     echo "Creating ${TEST_WAREHOUSE_DIR} directory"
     hadoop fs -mkdir -p ${FILESYSTEM_PREFIX}${TEST_WAREHOUSE_DIR}
+    if [[ -n "${HDFS_ERASURECODE_POLICY:-}" ]]; then
+      hdfs ec -enablePolicy -policy "${HDFS_ERASURECODE_POLICY}"
+      hdfs ec -setPolicy -policy "${HDFS_ERASURECODE_POLICY}" \
+        -path "${HDFS_ERASURECODE_PATH:=/test-warehouse}"
+    fi
 
     # TODO: commented out because of regressions in local end-to-end testing. 
See
     # IMPALA-4345

http://git-wip-us.apache.org/repos/asf/impala/blob/13a1acd7/testdata/bin/setup-hdfs-env.sh
----------------------------------------------------------------------
diff --git a/testdata/bin/setup-hdfs-env.sh b/testdata/bin/setup-hdfs-env.sh
index a07a9dd..552c48b 100755
--- a/testdata/bin/setup-hdfs-env.sh
+++ b/testdata/bin/setup-hdfs-env.sh
@@ -76,5 +76,5 @@ fi
 if [[ -n "${HDFS_ERASURECODE_POLICY:-}" ]]; then
   hdfs ec -enablePolicy -policy "${HDFS_ERASURECODE_POLICY}"
   hdfs ec -setPolicy -policy "${HDFS_ERASURECODE_POLICY}" \
-    -path "${HDFS_ERASURECODE_PATH:=/}"
+    -path "${HDFS_ERASURECODE_PATH:=/test-warehouse}"
 fi

http://git-wip-us.apache.org/repos/asf/impala/blob/13a1acd7/testdata/cluster/node_templates/common/etc/hadoop/conf/hdfs-site.xml.tmpl
----------------------------------------------------------------------
diff --git 
a/testdata/cluster/node_templates/common/etc/hadoop/conf/hdfs-site.xml.tmpl 
b/testdata/cluster/node_templates/common/etc/hadoop/conf/hdfs-site.xml.tmpl
index 6882fa3..717ae7c 100644
--- a/testdata/cluster/node_templates/common/etc/hadoop/conf/hdfs-site.xml.tmpl
+++ b/testdata/cluster/node_templates/common/etc/hadoop/conf/hdfs-site.xml.tmpl
@@ -114,12 +114,22 @@
     <value>true</value>
   </property>
 
+  <!-- The default behavior of the namenode is to exclude datanodes with the 
number of
+    connections 2x higher than the average among all the datanodes from being 
considered
+    for replication/EC. In the minicluster we have to use every datanode for 
every block
+    so this should be disabled. -->
+  <property>
+    <name>dfs.namenode.redundancy.considerLoad</name>
+    <value>false</value>
+  </property>
+
   <!-- Location of the KMS key provider -->
   <property>
     <name>dfs.encryption.key.provider.uri</name>
     <value>kms://[email protected]:9600/kms</value>
   </property>
 
+
   <!-- BEGIN Kerberos settings -->
   <!-- We use the MiniKdc; it generates a keytab and krb5.conf; we point
        everyone at that one keytab and go to town... -->

Reply via email to