Repository: incubator-impala Updated Branches: refs/heads/master 0bccb3ea0 -> 78776e9b5
IMPALA-6055: Fix hdfs encryption test far Hadoop 2.8+ Hadoop changed behavior regarding encrypted partitions and started automatically provisioning .Trash directories in encrypted partitions. This interplays poorly with the current test. Change-Id: I30234aa50fea93f316e75beea2ced002dcea0c24 Reviewed-on: http://gerrit.cloudera.org:8080/8274 Tested-by: Impala Public Jenkins Reviewed-by: Tim Armstrong <[email protected]> Project: http://git-wip-us.apache.org/repos/asf/incubator-impala/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-impala/commit/0e0f295e Tree: http://git-wip-us.apache.org/repos/asf/incubator-impala/tree/0e0f295e Diff: http://git-wip-us.apache.org/repos/asf/incubator-impala/diff/0e0f295e Branch: refs/heads/master Commit: 0e0f295e4653eb77fc30fd904d8ee54a69d3399f Parents: 0bccb3e Author: Zachary Amsden <[email protected]> Authored: Thu Oct 12 18:24:58 2017 -0700 Committer: Zach Amsden <[email protected]> Committed: Fri Oct 20 20:45:22 2017 +0000 ---------------------------------------------------------------------- tests/metadata/test_hdfs_encryption.py | 35 +++++++++++++++++++++++------ 1 file changed, 28 insertions(+), 7 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/0e0f295e/tests/metadata/test_hdfs_encryption.py ---------------------------------------------------------------------- diff --git a/tests/metadata/test_hdfs_encryption.py b/tests/metadata/test_hdfs_encryption.py index ff135b8..27eaaee 100644 --- a/tests/metadata/test_hdfs_encryption.py +++ b/tests/metadata/test_hdfs_encryption.py @@ -160,7 +160,7 @@ class TestHdfsEncryption(ImpalaTestSuite): self.client.execute("alter table {0}.t1 add partition(j=3)".format(TEST_DB)); # Clean up the trash directory to create an encrypted zone rc, stdout, stderr = exec_process( - "hadoop fs -rmr /user/{0}/.Trash/*".format(getpass.getuser())) + "hadoop fs -rm -r /user/{0}/.Trash/*".format(getpass.getuser())) assert rc == 0, 'Error deleting Trash: %s %s' % (stdout, stderr) # Create the necessary encryption zones self.create_encryption_zone("testkey1", "/test-warehouse/{0}.db/t1/j=1"\ @@ -169,8 +169,16 @@ class TestHdfsEncryption(ImpalaTestSuite): .format(TEST_DB)) self.create_encryption_zone("testkey1", "/test-warehouse/{0}.db/t1/j=3"\ .format(TEST_DB)) - self.create_encryption_zone("testkey2", "/user/{0}/.Trash/".format(\ - getpass.getuser())) + + # HDFS 2.8+ behavior is to create individual trash per encryption zone; + # don't create an encryption zone on .Trash in that case, otherwise + # recursive trash is created. + has_own_trash = self.hdfs_client.exists( + "/test-warehouse/{0}.db/t1/j=1/.Trash".format(TEST_DB)) + if not has_own_trash: + self.create_encryption_zone("testkey2", "/user/{0}/.Trash/".format(\ + getpass.getuser())) + # Load sample data into the partition directories self.hdfs_client.create_file("test-warehouse/{0}.db/t1/j=1/j1.txt"\ .format(TEST_DB), file_data='j1') @@ -178,12 +186,25 @@ class TestHdfsEncryption(ImpalaTestSuite): .format(TEST_DB), file_data='j2') self.hdfs_client.create_file("test-warehouse/{0}.db/t1/j=3/j3.txt"\ .format(TEST_DB), file_data='j3') + # Drop the partition (j=1) without purge and make sure partition directory still # exists. This behavior is expected due to the difference in encryption zones - self.execute_query_expect_failure(self.client, "alter table {0}.t1 drop \ - partition(j=1)".format(TEST_DB)); - assert self.hdfs_client.exists("test-warehouse/{0}.db/t1/j=1/j1.txt".format(TEST_DB)) - assert self.hdfs_client.exists("test-warehouse/{0}.db/t1/j=1".format(TEST_DB)) + # between the .Trash and the warehouse directory (prior to HDFS 2.8) + if not has_own_trash: + self.execute_query_expect_failure(self.client, "alter table {0}.t1 drop \ + partition(j=1)".format(TEST_DB)); + assert self.hdfs_client.exists("test-warehouse/{0}.db/t1/j=1/j1.txt".format(TEST_DB)) + assert self.hdfs_client.exists("test-warehouse/{0}.db/t1/j=1".format(TEST_DB)) + else: + # HDFS 2.8+ behavior succeeds the query and creates trash; the partition removal + # ends up destroying the directories which moves this back to the user's trash + self.client.execute("alter table {0}.t1 drop partition(j=1)".format(TEST_DB)); + assert self.hdfs_client.exists( + "/user/{0}/.Trash/Current/test-warehouse/{1}.db/t1/j=1/j1.txt"\ + .format(getpass.getuser(), TEST_DB)) + assert not self.hdfs_client.exists("test-warehouse/{0}.db/t1/j=1/j1.txt".format(TEST_DB)) + assert not self.hdfs_client.exists("test-warehouse/{0}.db/t1/j=1".format(TEST_DB)) + # Drop the partition j=2 (with purge) and make sure the partition directory is deleted self.client.execute("alter table {0}.t1 drop partition(j=2) purge".format(TEST_DB)) assert not self.hdfs_client.exists("test-warehouse/{0}.db/t1/j=2/j2.txt".format(TEST_DB))
