Repository: impala Updated Branches: refs/heads/2.x 2c893f46b -> 9c32594f7
IMPALA-6885: handle empty s3 dirs in recover_partitions test Two tests (test_duplicate_partitions, test_support_all_types) added only directories, which is a no-op on s3. This results in s3 test failures. Another test (test_recover_partitions) falsely passed for s3 since its test for a malformed partition name is ambiguous (did it pass because recovering correctly excluded or because the directory does not exist, as in the case of s3?). A recent change fixed the asserts in this test, so these tests likely were falsely reporting success. This change bundles directory creation with adding to it a new file, which is the common case in this test. As a result, the peculiarity with s3 is not handled via a special case each time the filesystem is modified for new partitions. This change also adds an explicit test for empty partition directories, since the first change leaves this case untested. Testing: - tested against a local minicluster and s3 Change-Id: Ic6c74f4f2466b5870322bdf373e8a6a4d1d679f4 Reviewed-on: http://gerrit.cloudera.org:8080/10210 Reviewed-by: Alex Behm <alex.b...@cloudera.com> Tested-by: Impala Public Jenkins <impala-public-jenk...@cloudera.com> Project: http://git-wip-us.apache.org/repos/asf/impala/repo Commit: http://git-wip-us.apache.org/repos/asf/impala/commit/9537dee2 Tree: http://git-wip-us.apache.org/repos/asf/impala/tree/9537dee2 Diff: http://git-wip-us.apache.org/repos/asf/impala/diff/9537dee2 Branch: refs/heads/2.x Commit: 9537dee2267e5292440568442f4c4e744c982c6e Parents: 2c893f4 Author: Vuk Ercegovac <vercego...@cloudera.com> Authored: Tue Apr 24 16:43:01 2018 -0700 Committer: Impala Public Jenkins <impala-public-jenk...@gerrit.cloudera.org> Committed: Fri Apr 27 20:10:19 2018 +0000 ---------------------------------------------------------------------- tests/common/skip.py | 2 + tests/metadata/test_recover_partitions.py | 82 ++++++++++++++++++-------- 2 files changed, 59 insertions(+), 25 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/impala/blob/9537dee2/tests/common/skip.py ---------------------------------------------------------------------- diff --git a/tests/common/skip.py b/tests/common/skip.py index 3113acf..7d515f7 100644 --- a/tests/common/skip.py +++ b/tests/common/skip.py @@ -44,6 +44,8 @@ class SkipIfS3: jira = partial(pytest.mark.skipif, IS_S3) hdfs_encryption = pytest.mark.skipif(IS_S3, reason="HDFS encryption is not supported with S3") + empty_directory = pytest.mark.skipif(IS_S3, + reason="Empty directories are not supported on S3") # These ones need test infra work to re-enable. udfs = pytest.mark.skipif(IS_S3, reason="udas/udfs not copied to S3") http://git-wip-us.apache.org/repos/asf/impala/blob/9537dee2/tests/metadata/test_recover_partitions.py ---------------------------------------------------------------------- diff --git a/tests/metadata/test_recover_partitions.py b/tests/metadata/test_recover_partitions.py index 2d17d3d..1894676 100644 --- a/tests/metadata/test_recover_partitions.py +++ b/tests/metadata/test_recover_partitions.py @@ -17,8 +17,9 @@ # # Impala tests for ALTER TABLE RECOVER PARTITIONS statement +import os from tests.common.impala_test_suite import ImpalaTestSuite -from tests.common.skip import SkipIfLocal +from tests.common.skip import SkipIfLocal, SkipIfS3 from tests.common.test_dimensions import ALL_NODES_ONLY from tests.common.test_dimensions import create_exec_option_dimension from tests.util.filesystem_utils import WAREHOUSE, IS_S3 @@ -78,9 +79,7 @@ class TestRecoverPartitions(ImpalaTestSuite): # Create a path for a new partition using hdfs client and add a file with some values. # Test that the partition can be recovered and that the inserted data are accessible. - self.filesystem_client.make_dir(TBL_LOCATION + LEAF_DIR) - self.filesystem_client.create_file(TBL_LOCATION + LEAF_DIR + FILE_PATH, - INSERTED_VALUE) + self.create_fs_partition(TBL_LOCATION, LEAF_DIR, FILE_PATH, INSERTED_VALUE) result = self.execute_query_expect_success(self.client, "SHOW PARTITIONS %s" % FQ_TBL_NAME) assert not self.has_value(PART_NAME, result.data) @@ -100,7 +99,7 @@ class TestRecoverPartitions(ImpalaTestSuite): result = self.execute_query_expect_success(self.client, "SHOW PARTITIONS %s" % FQ_TBL_NAME) old_length = len(result.data) - self.filesystem_client.make_dir(TBL_LOCATION + MALFORMED_DIR) + self.create_fs_partition(TBL_LOCATION, MALFORMED_DIR, FILE_PATH, INSERTED_VALUE) self.execute_query_expect_success(self.client, "ALTER TABLE %s RECOVER PARTITIONS" % FQ_TBL_NAME) result = self.execute_query_expect_success(self.client, @@ -111,9 +110,7 @@ class TestRecoverPartitions(ImpalaTestSuite): # Create a directory whose subdirectory names contain __HIVE_DEFAULT_PARTITION__ # and check that is recovered as a NULL partition. - self.filesystem_client.make_dir(TBL_LOCATION + NULL_DIR) - self.filesystem_client.create_file( - TBL_LOCATION + NULL_DIR + FILE_PATH, NULL_INSERTED_VALUE) + self.create_fs_partition(TBL_LOCATION, NULL_DIR, FILE_PATH, NULL_INSERTED_VALUE) result = self.execute_query_expect_success(self.client, "SHOW PARTITIONS %s" % FQ_TBL_NAME) assert not self.has_value(self.DEF_NULL_PART_KEY, result.data) @@ -150,9 +147,7 @@ class TestRecoverPartitions(ImpalaTestSuite): "ALTER TABLE %s PARTITION (i=1, p='p3') SET LOCATION '%s/%s.db/tmp' " % (FQ_TBL_NAME, WAREHOUSE, unique_database)) self.filesystem_client.delete_file_dir(TBL_LOCATION + LEAF_DIR, recursive=True) - self.filesystem_client.make_dir(TBL_LOCATION + LEAF_DIR); - self.filesystem_client.create_file(TBL_LOCATION + LEAF_DIR + FILE_PATH, - INSERTED_VALUE) + self.create_fs_partition(TBL_LOCATION, LEAF_DIR, FILE_PATH, INSERTED_VALUE) self.execute_query_expect_success(self.client, "ALTER TABLE %s RECOVER PARTITIONS" % FQ_TBL_NAME) # Ensure that no duplicate partitions are recovered. @@ -184,12 +179,7 @@ class TestRecoverPartitions(ImpalaTestSuite): PART_DIR = "s=part%d/" % i FILE_PATH = "test" INSERTED_VALUE = "666" - self.filesystem_client.make_dir(TBL_LOCATION + PART_DIR) - if IS_S3: - # S3 is a key/value store and directory creation is a NOP; actually - # create the file. - self.filesystem_client.create_file(TBL_LOCATION + PART_DIR + FILE_PATH, - INSERTED_VALUE) + self.create_fs_partition(TBL_LOCATION, PART_DIR, FILE_PATH, INSERTED_VALUE) result = self.execute_query_expect_success(self.client, "SHOW PARTITIONS %s" % FQ_TBL_NAME) @@ -230,8 +220,7 @@ class TestRecoverPartitions(ImpalaTestSuite): self.execute_query_expect_success(self.client, "ALTER TABLE %s ADD PARTITION(i=1, p='p4')" % FQ_TBL_NAME) - self.filesystem_client.make_dir(TBL_LOCATION + LEAF_DIR); - self.filesystem_client.create_file(TBL_LOCATION + LEAF_DIR + FILE_PATH, INSERTED_VALUE) + self.create_fs_partition(TBL_LOCATION, LEAF_DIR, FILE_PATH, INSERTED_VALUE) self.execute_query_expect_success(self.client, "ALTER TABLE %s RECOVER PARTITIONS" % FQ_TBL_NAME) result = self.execute_query_expect_success(self.client, @@ -245,8 +234,8 @@ class TestRecoverPartitions(ImpalaTestSuite): result = self.execute_query_expect_success(self.client, "SHOW PARTITIONS %s" % FQ_TBL_NAME) old_length = len(result.data) - self.filesystem_client.make_dir(TBL_LOCATION + SAME_VALUE_DIR1) - self.filesystem_client.make_dir(TBL_LOCATION + SAME_VALUE_DIR2) + self.create_fs_partition(TBL_LOCATION, SAME_VALUE_DIR1, FILE_PATH, INSERTED_VALUE) + self.create_fs_partition(TBL_LOCATION, SAME_VALUE_DIR2, FILE_PATH, INSERTED_VALUE) # Only one partition will be added. self.execute_query_expect_success(self.client, "ALTER TABLE %s RECOVER PARTITIONS" % FQ_TBL_NAME) @@ -274,9 +263,7 @@ class TestRecoverPartitions(ImpalaTestSuite): # Test that the recovered partitions are properly stored in Hive MetaStore. # Invalidate the table metadata and then check if the recovered partitions # are accessible. - self.filesystem_client.make_dir(TBL_LOCATION + LEAF_DIR) - self.filesystem_client.create_file(TBL_LOCATION + LEAF_DIR + FILE_PATH, - INSERTED_VALUE) + self.create_fs_partition(TBL_LOCATION, LEAF_DIR, FILE_PATH, INSERTED_VALUE) self.execute_query_expect_success(self.client, "ALTER TABLE %s RECOVER PARTITIONS" % FQ_TBL_NAME) result = self.execute_query_expect_success(self.client, @@ -323,7 +310,7 @@ class TestRecoverPartitions(ImpalaTestSuite): "SHOW PARTITIONS %s" % FQ_TBL_NAME) old_length = len(result.data) normal_dir = '/'.join(normal_values) - self.filesystem_client.make_dir(TBL_LOCATION + normal_dir) + self.create_fs_partition(TBL_LOCATION, normal_dir, "test", "5") # One partition will be added. self.execute_query_expect_success(self.client, "ALTER TABLE %s RECOVER PARTITIONS" % FQ_TBL_NAME) @@ -366,6 +353,51 @@ class TestRecoverPartitions(ImpalaTestSuite): "ALTER TABLE %s RECOVER PARTITIONS failed to handle encoded partitioned value" % FQ_TBL_NAME) + @SkipIfLocal.hdfs_client + @SkipIfS3.empty_directory + def test_empty_directory(self, vector, unique_database): + """Explicitly test how empty directories are handled when partitions are recovered.""" + + TBL_NAME = "test_recover_partitions" + FQ_TBL_NAME = unique_database + "." + TBL_NAME + TBL_LOCATION = self.__get_fs_location(unique_database, TBL_NAME) + + self.execute_query_expect_success(self.client, + "CREATE TABLE %s (c int) PARTITIONED BY (i int, s string)" % (FQ_TBL_NAME)) + + # Adds partition directories. + num_partitions = 10 + for i in xrange(1, num_partitions): + PART_DIR = "i=%d/s=part%d" % (i,i) + self.filesystem_client.make_dir(TBL_LOCATION + PART_DIR) + + # Adds a duplicate directory name. + self.filesystem_client.make_dir(TBL_LOCATION + "i=001/s=part1") + + # Adds a malformed directory name. + self.filesystem_client.make_dir(TBL_LOCATION + "i=wrong_type/s=part1") + + result = self.execute_query_expect_success(self.client, + "SHOW PARTITIONS %s" % FQ_TBL_NAME) + assert 0 == self.count_partition(result.data) + self.execute_query_expect_success(self.client, + "ALTER TABLE %s RECOVER PARTITIONS" % FQ_TBL_NAME) + result = self.execute_query_expect_success(self.client, + "SHOW PARTITIONS %s" % FQ_TBL_NAME) + assert num_partitions - 1 == self.count_partition(result.data) + for i in xrange(1, num_partitions): + PART_DIR = "part%d\t" % i + assert self.has_value(PART_DIR, result.data) + + def create_fs_partition(self, root_path, new_dir, new_file, value): + """Creates an fs directory and writes a file to it. Empty directories are no-op's + for S3, so enforcing a non-empty directory is less error prone across + filesystems.""" + partition_dir = os.path.join(root_path, new_dir) + self.filesystem_client.make_dir(partition_dir); + self.filesystem_client.create_file(os.path.join(partition_dir, new_file), + value) + def check_invalid_partition_values(self, fq_tbl_name, tbl_location, normal_values, invalid_values): """"Check that RECOVER PARTITIONS ignores partitions with invalid partition values."""