Repository: impala
Updated Branches:
  refs/heads/2.x 2c893f46b -> 9c32594f7


IMPALA-6885: handle empty s3 dirs in recover_partitions test

Two tests (test_duplicate_partitions, test_support_all_types)
added only directories, which is a no-op on s3. This results
in s3 test failures.

Another test (test_recover_partitions) falsely passed for s3
since its test for a malformed partition name is ambiguous
(did it pass because recovering correctly excluded or because
the directory does not exist, as in the case of s3?).

A recent change fixed the asserts in this test, so these tests
likely were falsely reporting success.

This change bundles directory creation with adding to it a new
file, which is the common case in this test. As a result, the
peculiarity with s3 is not handled via a special case each time
the filesystem is modified for new partitions.

This change also adds an explicit test for empty partition
directories, since the first change leaves this case untested.

Testing:
- tested against a local minicluster and s3

Change-Id: Ic6c74f4f2466b5870322bdf373e8a6a4d1d679f4
Reviewed-on: http://gerrit.cloudera.org:8080/10210
Reviewed-by: Alex Behm <alex.b...@cloudera.com>
Tested-by: Impala Public Jenkins <impala-public-jenk...@cloudera.com>


Project: http://git-wip-us.apache.org/repos/asf/impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/impala/commit/9537dee2
Tree: http://git-wip-us.apache.org/repos/asf/impala/tree/9537dee2
Diff: http://git-wip-us.apache.org/repos/asf/impala/diff/9537dee2

Branch: refs/heads/2.x
Commit: 9537dee2267e5292440568442f4c4e744c982c6e
Parents: 2c893f4
Author: Vuk Ercegovac <vercego...@cloudera.com>
Authored: Tue Apr 24 16:43:01 2018 -0700
Committer: Impala Public Jenkins <impala-public-jenk...@gerrit.cloudera.org>
Committed: Fri Apr 27 20:10:19 2018 +0000

----------------------------------------------------------------------
 tests/common/skip.py                      |  2 +
 tests/metadata/test_recover_partitions.py | 82 ++++++++++++++++++--------
 2 files changed, 59 insertions(+), 25 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/impala/blob/9537dee2/tests/common/skip.py
----------------------------------------------------------------------
diff --git a/tests/common/skip.py b/tests/common/skip.py
index 3113acf..7d515f7 100644
--- a/tests/common/skip.py
+++ b/tests/common/skip.py
@@ -44,6 +44,8 @@ class SkipIfS3:
   jira = partial(pytest.mark.skipif, IS_S3)
   hdfs_encryption = pytest.mark.skipif(IS_S3,
       reason="HDFS encryption is not supported with S3")
+  empty_directory = pytest.mark.skipif(IS_S3,
+      reason="Empty directories are not supported on S3")
 
   # These ones need test infra work to re-enable.
   udfs = pytest.mark.skipif(IS_S3, reason="udas/udfs not copied to S3")

http://git-wip-us.apache.org/repos/asf/impala/blob/9537dee2/tests/metadata/test_recover_partitions.py
----------------------------------------------------------------------
diff --git a/tests/metadata/test_recover_partitions.py 
b/tests/metadata/test_recover_partitions.py
index 2d17d3d..1894676 100644
--- a/tests/metadata/test_recover_partitions.py
+++ b/tests/metadata/test_recover_partitions.py
@@ -17,8 +17,9 @@
 #
 # Impala tests for ALTER TABLE RECOVER PARTITIONS statement
 
+import os
 from tests.common.impala_test_suite import ImpalaTestSuite
-from tests.common.skip import SkipIfLocal
+from tests.common.skip import SkipIfLocal, SkipIfS3
 from tests.common.test_dimensions import ALL_NODES_ONLY
 from tests.common.test_dimensions import create_exec_option_dimension
 from tests.util.filesystem_utils import WAREHOUSE, IS_S3
@@ -78,9 +79,7 @@ class TestRecoverPartitions(ImpalaTestSuite):
 
     # Create a path for a new partition using hdfs client and add a file with 
some values.
     # Test that the partition can be recovered and that the inserted data are 
accessible.
-    self.filesystem_client.make_dir(TBL_LOCATION + LEAF_DIR)
-    self.filesystem_client.create_file(TBL_LOCATION + LEAF_DIR + FILE_PATH,
-                                       INSERTED_VALUE)
+    self.create_fs_partition(TBL_LOCATION, LEAF_DIR, FILE_PATH, INSERTED_VALUE)
     result = self.execute_query_expect_success(self.client,
         "SHOW PARTITIONS %s" % FQ_TBL_NAME)
     assert not self.has_value(PART_NAME, result.data)
@@ -100,7 +99,7 @@ class TestRecoverPartitions(ImpalaTestSuite):
     result = self.execute_query_expect_success(self.client,
         "SHOW PARTITIONS %s" % FQ_TBL_NAME)
     old_length = len(result.data)
-    self.filesystem_client.make_dir(TBL_LOCATION + MALFORMED_DIR)
+    self.create_fs_partition(TBL_LOCATION, MALFORMED_DIR, FILE_PATH, 
INSERTED_VALUE)
     self.execute_query_expect_success(self.client,
         "ALTER TABLE %s RECOVER PARTITIONS" % FQ_TBL_NAME)
     result = self.execute_query_expect_success(self.client,
@@ -111,9 +110,7 @@ class TestRecoverPartitions(ImpalaTestSuite):
 
     # Create a directory whose subdirectory names contain 
__HIVE_DEFAULT_PARTITION__
     # and check that is recovered as a NULL partition.
-    self.filesystem_client.make_dir(TBL_LOCATION + NULL_DIR)
-    self.filesystem_client.create_file(
-        TBL_LOCATION + NULL_DIR + FILE_PATH, NULL_INSERTED_VALUE)
+    self.create_fs_partition(TBL_LOCATION, NULL_DIR, FILE_PATH, 
NULL_INSERTED_VALUE)
     result = self.execute_query_expect_success(self.client,
         "SHOW PARTITIONS %s" % FQ_TBL_NAME)
     assert not self.has_value(self.DEF_NULL_PART_KEY, result.data)
@@ -150,9 +147,7 @@ class TestRecoverPartitions(ImpalaTestSuite):
         "ALTER TABLE %s PARTITION (i=1, p='p3') SET LOCATION '%s/%s.db/tmp' "
         % (FQ_TBL_NAME, WAREHOUSE, unique_database))
     self.filesystem_client.delete_file_dir(TBL_LOCATION + LEAF_DIR, 
recursive=True)
-    self.filesystem_client.make_dir(TBL_LOCATION + LEAF_DIR);
-    self.filesystem_client.create_file(TBL_LOCATION + LEAF_DIR + FILE_PATH,
-                                       INSERTED_VALUE)
+    self.create_fs_partition(TBL_LOCATION, LEAF_DIR, FILE_PATH, INSERTED_VALUE)
     self.execute_query_expect_success(self.client,
         "ALTER TABLE %s RECOVER PARTITIONS" % FQ_TBL_NAME)
     # Ensure that no duplicate partitions are recovered.
@@ -184,12 +179,7 @@ class TestRecoverPartitions(ImpalaTestSuite):
         PART_DIR = "s=part%d/" % i
         FILE_PATH = "test"
         INSERTED_VALUE = "666"
-        self.filesystem_client.make_dir(TBL_LOCATION + PART_DIR)
-        if IS_S3:
-            # S3 is a key/value store and directory creation is a NOP; actually
-            # create the file.
-            self.filesystem_client.create_file(TBL_LOCATION + PART_DIR + 
FILE_PATH,
-                                               INSERTED_VALUE)
+        self.create_fs_partition(TBL_LOCATION, PART_DIR, FILE_PATH, 
INSERTED_VALUE)
 
     result = self.execute_query_expect_success(self.client,
         "SHOW PARTITIONS %s" % FQ_TBL_NAME)
@@ -230,8 +220,7 @@ class TestRecoverPartitions(ImpalaTestSuite):
 
     self.execute_query_expect_success(self.client,
         "ALTER TABLE %s ADD PARTITION(i=1, p='p4')" % FQ_TBL_NAME)
-    self.filesystem_client.make_dir(TBL_LOCATION + LEAF_DIR);
-    self.filesystem_client.create_file(TBL_LOCATION + LEAF_DIR + FILE_PATH, 
INSERTED_VALUE)
+    self.create_fs_partition(TBL_LOCATION, LEAF_DIR, FILE_PATH, INSERTED_VALUE)
     self.execute_query_expect_success(self.client,
         "ALTER TABLE %s RECOVER PARTITIONS" % FQ_TBL_NAME)
     result = self.execute_query_expect_success(self.client,
@@ -245,8 +234,8 @@ class TestRecoverPartitions(ImpalaTestSuite):
     result = self.execute_query_expect_success(self.client,
         "SHOW PARTITIONS %s" % FQ_TBL_NAME)
     old_length = len(result.data)
-    self.filesystem_client.make_dir(TBL_LOCATION + SAME_VALUE_DIR1)
-    self.filesystem_client.make_dir(TBL_LOCATION + SAME_VALUE_DIR2)
+    self.create_fs_partition(TBL_LOCATION, SAME_VALUE_DIR1, FILE_PATH, 
INSERTED_VALUE)
+    self.create_fs_partition(TBL_LOCATION, SAME_VALUE_DIR2, FILE_PATH, 
INSERTED_VALUE)
     # Only one partition will be added.
     self.execute_query_expect_success(self.client,
         "ALTER TABLE %s RECOVER PARTITIONS" % FQ_TBL_NAME)
@@ -274,9 +263,7 @@ class TestRecoverPartitions(ImpalaTestSuite):
     # Test that the recovered partitions are properly stored in Hive MetaStore.
     # Invalidate the table metadata and then check if the recovered partitions
     # are accessible.
-    self.filesystem_client.make_dir(TBL_LOCATION + LEAF_DIR)
-    self.filesystem_client.create_file(TBL_LOCATION + LEAF_DIR + FILE_PATH,
-                                       INSERTED_VALUE)
+    self.create_fs_partition(TBL_LOCATION, LEAF_DIR, FILE_PATH, INSERTED_VALUE)
     self.execute_query_expect_success(self.client,
         "ALTER TABLE %s RECOVER PARTITIONS" % FQ_TBL_NAME)
     result = self.execute_query_expect_success(self.client,
@@ -323,7 +310,7 @@ class TestRecoverPartitions(ImpalaTestSuite):
         "SHOW PARTITIONS %s" % FQ_TBL_NAME)
     old_length = len(result.data)
     normal_dir = '/'.join(normal_values)
-    self.filesystem_client.make_dir(TBL_LOCATION + normal_dir)
+    self.create_fs_partition(TBL_LOCATION, normal_dir, "test", "5")
     # One partition will be added.
     self.execute_query_expect_success(self.client,
         "ALTER TABLE %s RECOVER PARTITIONS" % FQ_TBL_NAME)
@@ -366,6 +353,51 @@ class TestRecoverPartitions(ImpalaTestSuite):
         "ALTER TABLE %s RECOVER PARTITIONS failed to handle encoded 
partitioned value" %
         FQ_TBL_NAME)
 
+  @SkipIfLocal.hdfs_client
+  @SkipIfS3.empty_directory
+  def test_empty_directory(self, vector, unique_database):
+    """Explicitly test how empty directories are handled when partitions are 
recovered."""
+
+    TBL_NAME = "test_recover_partitions"
+    FQ_TBL_NAME = unique_database + "." + TBL_NAME
+    TBL_LOCATION = self.__get_fs_location(unique_database, TBL_NAME)
+
+    self.execute_query_expect_success(self.client,
+        "CREATE TABLE %s (c int) PARTITIONED BY (i int, s string)" % 
(FQ_TBL_NAME))
+
+    # Adds partition directories.
+    num_partitions = 10
+    for i in xrange(1, num_partitions):
+        PART_DIR = "i=%d/s=part%d" % (i,i)
+        self.filesystem_client.make_dir(TBL_LOCATION + PART_DIR)
+
+    # Adds a duplicate directory name.
+    self.filesystem_client.make_dir(TBL_LOCATION + "i=001/s=part1")
+
+    # Adds a malformed directory name.
+    self.filesystem_client.make_dir(TBL_LOCATION + "i=wrong_type/s=part1")
+
+    result = self.execute_query_expect_success(self.client,
+        "SHOW PARTITIONS %s" % FQ_TBL_NAME)
+    assert 0 == self.count_partition(result.data)
+    self.execute_query_expect_success(self.client,
+        "ALTER TABLE %s RECOVER PARTITIONS" % FQ_TBL_NAME)
+    result = self.execute_query_expect_success(self.client,
+        "SHOW PARTITIONS %s" % FQ_TBL_NAME)
+    assert num_partitions - 1 == self.count_partition(result.data)
+    for i in xrange(1, num_partitions):
+        PART_DIR = "part%d\t" % i
+        assert self.has_value(PART_DIR, result.data)
+
+  def create_fs_partition(self, root_path, new_dir, new_file, value):
+    """Creates an fs directory and writes a file to it. Empty directories are 
no-op's
+       for S3, so enforcing a non-empty directory is less error prone across
+       filesystems."""
+    partition_dir = os.path.join(root_path, new_dir)
+    self.filesystem_client.make_dir(partition_dir);
+    self.filesystem_client.create_file(os.path.join(partition_dir, new_file),
+                                       value)
+
   def check_invalid_partition_values(self, fq_tbl_name, tbl_location,
     normal_values, invalid_values):
     """"Check that RECOVER PARTITIONS ignores partitions with invalid 
partition values."""

Reply via email to