IMPALA-7003: Deflake erasure coding data loading Erasure coding data loading is flaky in two ways: 1. HBase sometimes doesn't work because of HBase-19369 2. Nested data loading sometimes fails because the HDFS namenode cannot find enough good datanodes.
For problem 1, this patch enables erasure coding only on /test-warehouse directory. For problem 2, this patch sets dfs.namenode.redundancy.considerLoad to false, preventing namenode from excluding heavily-loaded datanodes. Change-Id: I219106cd3ec7ffab7a834700f2a722b165e5f66c Reviewed-on: http://gerrit.cloudera.org:8080/10362 Reviewed-by: Alex Behm <[email protected]> Tested-by: Impala Public Jenkins <[email protected]> Project: http://git-wip-us.apache.org/repos/asf/impala/repo Commit: http://git-wip-us.apache.org/repos/asf/impala/commit/13a1acd7 Tree: http://git-wip-us.apache.org/repos/asf/impala/tree/13a1acd7 Diff: http://git-wip-us.apache.org/repos/asf/impala/diff/13a1acd7 Branch: refs/heads/master Commit: 13a1acd7e42b533c39b9f4eea1c17823bde4c1c5 Parents: fab65d4 Author: Tianyi Wang <[email protected]> Authored: Wed May 9 14:53:07 2018 -0700 Committer: Impala Public Jenkins <[email protected]> Committed: Tue May 15 23:59:58 2018 +0000 ---------------------------------------------------------------------- bin/impala-config.sh | 2 +- testdata/bin/create-load-data.sh | 37 ++++++++++++++------ testdata/bin/load-test-warehouse-snapshot.sh | 5 +++ testdata/bin/setup-hdfs-env.sh | 2 +- .../common/etc/hadoop/conf/hdfs-site.xml.tmpl | 10 ++++++ 5 files changed, 44 insertions(+), 12 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/impala/blob/13a1acd7/bin/impala-config.sh ---------------------------------------------------------------------- diff --git a/bin/impala-config.sh b/bin/impala-config.sh index 97bec30..8e58332 100755 --- a/bin/impala-config.sh +++ b/bin/impala-config.sh @@ -452,7 +452,7 @@ elif [ "${TARGET_FILESYSTEM}" = "hdfs" ]; then return 1 fi export HDFS_ERASURECODE_POLICY="RS-3-2-1024k" - export HDFS_ERASURECODE_PATH="/" + export HDFS_ERASURECODE_PATH="/test-warehouse" fi else echo "Unsupported filesystem '$TARGET_FILESYSTEM'" http://git-wip-us.apache.org/repos/asf/impala/blob/13a1acd7/testdata/bin/create-load-data.sh ---------------------------------------------------------------------- diff --git a/testdata/bin/create-load-data.sh b/testdata/bin/create-load-data.sh index 280105d..e5b0f87 100755 --- a/testdata/bin/create-load-data.sh +++ b/testdata/bin/create-load-data.sh @@ -96,14 +96,6 @@ do shift; done -# The hdfs environment script sets up kms (encryption) and cache pools (hdfs caching). -# On a non-hdfs filesystem, we don't test encryption or hdfs caching, so this setup is not -# needed. -if [[ "${TARGET_FILESYSTEM}" == "hdfs" ]]; then - run-step "Setting up HDFS environment" setup-hdfs-env.log \ - ${IMPALA_HOME}/testdata/bin/setup-hdfs-env.sh -fi - if [[ $SKIP_METADATA_LOAD -eq 0 && "$SNAPSHOT_FILE" = "" ]]; then if [[ -z "$REMOTE_LOAD" ]]; then run-step "Loading Hive Builtins" load-hive-builtins.log \ @@ -291,6 +283,14 @@ function copy-and-load-dependent-tables { /tmp/alltypes_rc /tmp/alltypes_seq hadoop fs -mkdir -p /tmp/alltypes_seq/year=2009 \ /tmp/alltypes_rc/year=2009 + + # The file written by hive to /test-warehouse will be strangely replicated rather than + # erasure coded if EC is not set in /tmp + if [[ -n "${HDFS_ERASURECODE_POLICY:-}" ]]; then + hdfs ec -setPolicy -policy "${HDFS_ERASURECODE_POLICY}" -path "/tmp/alltypes_rc" + hdfs ec -setPolicy -policy "${HDFS_ERASURECODE_POLICY}" -path "/tmp/alltypes_seq" + fi + hadoop fs -cp /test-warehouse/alltypes_seq/year=2009/month=2/ /tmp/alltypes_seq/year=2009 hadoop fs -cp /test-warehouse/alltypes_rc/year=2009/month=3/ /tmp/alltypes_rc/year=2009 @@ -468,7 +468,16 @@ function copy-and-load-ext-data-source { ${IMPALA_HOME}/testdata/bin/create-data-source-table.sql } -function wait-hdfs-replication { +function check-hdfs-health { + if [[ -n "${HDFS_ERASURECODE_POLICY:-}" ]]; then + if ! grep "Replicated Blocks:[[:space:]]*#[[:space:]]*Total size:[[:space:]]*0 B"\ + <<< $(hdfs fsck /test-warehouse | tr '\n' '#'); then + echo "There are some replicated files despite that erasure coding is on" + echo "Failing the data loading job" + exit 1 + fi + return + fi MAX_FSCK=30 SLEEP_SEC=120 LAST_NUMBER_UNDER_REPLICATED=-1 @@ -518,6 +527,14 @@ if [[ -z "$REMOTE_LOAD" ]]; then ${START_CLUSTER_ARGS} fi +# The hdfs environment script sets up kms (encryption) and cache pools (hdfs caching). +# On a non-hdfs filesystem, we don't test encryption or hdfs caching, so this setup is not +# needed. +if [[ "${TARGET_FILESYSTEM}" == "hdfs" ]]; then + run-step "Setting up HDFS environment" setup-hdfs-env.log \ + ${IMPALA_HOME}/testdata/bin/setup-hdfs-env.sh +fi + if [ $SKIP_METADATA_LOAD -eq 0 ]; then run-step "Loading custom schemas" load-custom-schemas.log load-custom-schemas # Run some steps in parallel, with run-step-backgroundable / run-step-wait-all. @@ -580,7 +597,7 @@ if [ "${TARGET_FILESYSTEM}" = "hdfs" ]; then run-step "Creating internal HBase table" create-internal-hbase-table.log \ create-internal-hbase-table - run-step "Waiting for HDFS replication" wait-hdfs-replication.log wait-hdfs-replication + run-step "Checking HDFS health" check-hdfs-health.log check-hdfs-health fi # TODO: Investigate why all stats are not preserved. Theoretically, we only need to http://git-wip-us.apache.org/repos/asf/impala/blob/13a1acd7/testdata/bin/load-test-warehouse-snapshot.sh ---------------------------------------------------------------------- diff --git a/testdata/bin/load-test-warehouse-snapshot.sh b/testdata/bin/load-test-warehouse-snapshot.sh index 311a9ea..fe5dd2a 100755 --- a/testdata/bin/load-test-warehouse-snapshot.sh +++ b/testdata/bin/load-test-warehouse-snapshot.sh @@ -72,6 +72,11 @@ if [[ "$REPLY" =~ ^[Yy]$ ]]; then fi echo "Creating ${TEST_WAREHOUSE_DIR} directory" hadoop fs -mkdir -p ${FILESYSTEM_PREFIX}${TEST_WAREHOUSE_DIR} + if [[ -n "${HDFS_ERASURECODE_POLICY:-}" ]]; then + hdfs ec -enablePolicy -policy "${HDFS_ERASURECODE_POLICY}" + hdfs ec -setPolicy -policy "${HDFS_ERASURECODE_POLICY}" \ + -path "${HDFS_ERASURECODE_PATH:=/test-warehouse}" + fi # TODO: commented out because of regressions in local end-to-end testing. See # IMPALA-4345 http://git-wip-us.apache.org/repos/asf/impala/blob/13a1acd7/testdata/bin/setup-hdfs-env.sh ---------------------------------------------------------------------- diff --git a/testdata/bin/setup-hdfs-env.sh b/testdata/bin/setup-hdfs-env.sh index a07a9dd..552c48b 100755 --- a/testdata/bin/setup-hdfs-env.sh +++ b/testdata/bin/setup-hdfs-env.sh @@ -76,5 +76,5 @@ fi if [[ -n "${HDFS_ERASURECODE_POLICY:-}" ]]; then hdfs ec -enablePolicy -policy "${HDFS_ERASURECODE_POLICY}" hdfs ec -setPolicy -policy "${HDFS_ERASURECODE_POLICY}" \ - -path "${HDFS_ERASURECODE_PATH:=/}" + -path "${HDFS_ERASURECODE_PATH:=/test-warehouse}" fi http://git-wip-us.apache.org/repos/asf/impala/blob/13a1acd7/testdata/cluster/node_templates/common/etc/hadoop/conf/hdfs-site.xml.tmpl ---------------------------------------------------------------------- diff --git a/testdata/cluster/node_templates/common/etc/hadoop/conf/hdfs-site.xml.tmpl b/testdata/cluster/node_templates/common/etc/hadoop/conf/hdfs-site.xml.tmpl index 6882fa3..717ae7c 100644 --- a/testdata/cluster/node_templates/common/etc/hadoop/conf/hdfs-site.xml.tmpl +++ b/testdata/cluster/node_templates/common/etc/hadoop/conf/hdfs-site.xml.tmpl @@ -114,12 +114,22 @@ <value>true</value> </property> + <!-- The default behavior of the namenode is to exclude datanodes with the number of + connections 2x higher than the average among all the datanodes from being considered + for replication/EC. In the minicluster we have to use every datanode for every block + so this should be disabled. --> + <property> + <name>dfs.namenode.redundancy.considerLoad</name> + <value>false</value> + </property> + <!-- Location of the KMS key provider --> <property> <name>dfs.encryption.key.provider.uri</name> <value>kms://[email protected]:9600/kms</value> </property> + <!-- BEGIN Kerberos settings --> <!-- We use the MiniKdc; it generates a keytab and krb5.conf; we point everyone at that one keytab and go to town... -->
