(impala) 02/02: IMPALA-13330: Fix orc_schema_resolution in test_nested_types.py

csringhofer Tue, 03 Sep 2024 07:37:48 -0700

This is an automated email from the ASF dual-hosted git repository.

csringhofer pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git


commit b9b4a6d12243c20ae42b9c9b1e4683a48ee70505
Author: Riza Suminto <[email protected]>
AuthorDate: Mon Aug 26 12:10:48 2024 -0700

    IMPALA-13330: Fix orc_schema_resolution in test_nested_types.py
    
    test_nested_types.py declare 'orc_schema_resolution' dimension, but does
    not actually exercise it. None of the test actively inserting
    'orc_schema_resolution' dimension value into
    vector.get_value('exec_dimension').
    
    This patch fix that issue by declaring 'orc_schema_resolution' option
    using helper function add_exec_option_dimension() to automatically
    insert it into 'exec_option' dimension. Test classes also reorganized to
    reduce test skipping and deepcopy-ing.
    
    Following are notable changes:
    - Use 'unique_database' in test_struct_in_select_list to avoid collision
      during view creation.
    - Drop unused 'unique_database' fixture in
      TestNestedCollectionsInSelectList.
    - test_map_null_keys does not have 'mt_dop' dimension anymore since it
      only test how NULL map key are displayed.
    - Created common base class TestParquetArrayEncodingsBase for
      TestParquetArrayEncodings and TestParquetArrayEncodingsAmbiguous. The
      latter does not run with 'parquet_array_resolution' anymore since that
      query option is set directly within parquet-ambiguous-list-modern.test
      and parquet-ambiguous-list-legacy.test files.
    - Make ImpalaTestMatrix.add_dimensions() call
      ImpalaTestMatrix.clear_dimension() if given dimension.name is
      'exec_option' and independent_exec_option_names is not empty.
    
    The reduction of test count are follows:
    Before patch:
    168 core tests, 571 exhaustive tests
    After patch:
    161 core tests, 529 exhaustive tests
    
    Testing:
    - Ran and pass test_nested_types.py in exhaustive exploration.
    - Verified that no WARNING log printed by
      ImpalaTestSuite.validate_exec_option_dimension()
    
    Change-Id: Ib958cd34a56c949190b4f22e5da5dad2c0de25ff
    Reviewed-on: http://gerrit.cloudera.org:8080/21726
    Reviewed-by: Impala Public Jenkins <[email protected]>
    Tested-by: Impala Public Jenkins <[email protected]>
---
 .../queries/QueryTest/struct-in-select-list.test   |  14 +-
 tests/common/test_vector.py                        |   9 +-
 tests/query_test/test_nested_types.py              | 243 ++++++++++++---------
 3 files changed, 156 insertions(+), 110 deletions(-)

diff --git 
a/testdata/workloads/functional-query/queries/QueryTest/struct-in-select-list.test
 
b/testdata/workloads/functional-query/queries/QueryTest/struct-in-select-list.test
index f7ebf8ce7..f6e9b1982 100644
--- 
a/testdata/workloads/functional-query/queries/QueryTest/struct-in-select-list.test
+++ 
b/testdata/workloads/functional-query/queries/QueryTest/struct-in-select-list.test
@@ -472,11 +472,11 @@ INT,STRING
 ====
 ---- QUERY
 # CREATE VIEW AS SELECT where the select returns struct.
-drop view if exists struct_view;
-create view struct_view as
+drop view if exists $UNIQUE_DB.struct_view;
+create view $UNIQUE_DB.struct_view as
     select id, small_struct
     from complextypes_structs;
-select id, small_struct from struct_view;
+select id, small_struct from $UNIQUE_DB.struct_view;
 ---- RESULTS
 1,'NULL'
 2,'{"i":19191,"s":"small_struct_str"}'
@@ -517,10 +517,10 @@ INT,STRING
 ====
 ---- QUERY
 # Create a view containing structs and query the view.
-drop view if exists tmp_view;
-create view tmp_view as
+drop view if exists $UNIQUE_DB.tmp_view;
+create view $UNIQUE_DB.tmp_view as
     select id, str, tiny_struct, alltypes from complextypes_structs;
-select id, alltypes, tiny_struct from tmp_view;
+select id, alltypes, tiny_struct from $UNIQUE_DB.tmp_view;
 ---- RESULTS
 
1,'{"ti":100,"si":12348,"i":156789012,"bi":163234345342,"b":true,"f":1234.56005859375,"do":65323423.33,"da":"2021-05-30","ts":"2021-06-01
 10:19:04","s1":"some string","s2":"another 
str","c1":"x","c2":"xyz","vc":"somevarcha","de1":12345,"de2":null}','{"b":true}'
 
2,'{"ti":123,"si":4567,"i":1562322212,"bi":334333345342,"b":false,"f":NaN,"do":23233423.099,"da":null,"ts":"2020-06-11
 12:10:04","s1":null,"s2":"NULL","c1":"a","c2":"ab 
","vc":"varchar","de1":11223,"de2":null}','{"b":false}'
@@ -567,7 +567,7 @@ INT,STRING
 ---- QUERY
 # It's not supported to create a view with structs from a file format that 
doesn't
 # support selecting structs.
-create view tmp_view as select id, int_struct_col from 
functional.allcomplextypes;
+create view $UNIQUE_DB.tmp_view_no_struct_support as select id, int_struct_col 
from functional.allcomplextypes;
 ---- CATCH
 AnalysisException: Querying STRUCT is only supported for ORC and Parquet file 
formats.
 ====
diff --git a/tests/common/test_vector.py b/tests/common/test_vector.py
index da21b688d..3ccaa2dc2 100644
--- a/tests/common/test_vector.py
+++ b/tests/common/test_vector.py
@@ -59,7 +59,10 @@
 from __future__ import absolute_import, division, print_function
 from itertools import product
 from copy import deepcopy
+import logging
 
+
+LOG = logging.getLogger(__name__)
 EXEC_OPTION_KEY = 'exec_option'
 
 
@@ -129,7 +132,11 @@ class ImpalaTestMatrix(object):
   def add_dimension(self, dimension):
     self.dimensions[dimension.name] = dimension
     if dimension.name == EXEC_OPTION_KEY:
-      self.independent_exec_option_names.clear()
+      for name in list(self.independent_exec_option_names):
+        LOG.warn("Reassigning {} dimension will remove exec option {}={} that 
was "
+            "independently declared through add_exec_option_dimension.".format(
+              EXEC_OPTION_KEY, name, [v.value for v in self.dimensions[name]]))
+        self.clear_dimension(name)
 
   def assert_unique_exec_option_key(self, key):
     """Assert that 'exec_option' dimension exist and 'key' is not exist yet
diff --git a/tests/query_test/test_nested_types.py 
b/tests/query_test/test_nested_types.py
index ea0a3259c..16361d5eb 100644
--- a/tests/query_test/test_nested_types.py
+++ b/tests/query_test/test_nested_types.py
@@ -18,16 +18,21 @@
 from __future__ import absolute_import, division, print_function
 import os
 from copy import deepcopy
-import pytest
 
 from tests.beeswax.impala_beeswax import ImpalaBeeswaxException
 from tests.common.impala_test_suite import ImpalaTestSuite
 from tests.common.skip import SkipIfFS, SkipIfHive2, SkipIfNotHdfsMinicluster
-from tests.common.test_dimensions import (create_exec_option_dimension,
+from tests.common.test_dimensions import (
+    add_exec_option_dimension,
+    create_exec_option_dimension,
     create_exec_option_dimension_from_dict, create_client_protocol_dimension,
-    create_orc_dimension, orc_schema_resolution_constraint)
-from tests.common.test_vector import ImpalaTestDimension
-from tests.util.filesystem_utils import WAREHOUSE, get_fs_path, IS_HDFS
+    orc_schema_resolution_constraint)
+from tests.util.filesystem_utils import WAREHOUSE, get_fs_path
+
+
+MT_DOP_DIMS = [0, 2]
+ORC_RESOLUTION_DIMS = [0, 1]
+
 
 class TestNestedTypes(ImpalaTestSuite):
   """Functional tests for nested types, run for all file formats that support 
nested
@@ -36,76 +41,46 @@ class TestNestedTypes(ImpalaTestSuite):
   def get_workload(self):
     return 'functional-query'
 
-  @staticmethod
-  def orc_schema_resolution_constraint(vector):
-    """ Constraint to use multiple orc_schema_resolution only in case of orc 
files"""
-    file_format = vector.get_value('table_format').file_format
-    orc_schema_resolution = vector.get_value('orc_schema_resolution')
-    return file_format == 'orc' or orc_schema_resolution == 0
-
   @classmethod
   def add_test_dimensions(cls):
     super(TestNestedTypes, cls).add_test_dimensions()
+    add_exec_option_dimension(cls, 'mt_dop', MT_DOP_DIMS)
+    add_exec_option_dimension(cls, 'orc_schema_resolution', 
ORC_RESOLUTION_DIMS)
     cls.ImpalaTestMatrix.add_constraint(lambda v:
         v.get_value('table_format').file_format in ['parquet', 'orc'])
-    cls.ImpalaTestMatrix.add_dimension(
-        ImpalaTestDimension('mt_dop', 0, 2))
-    
cls.ImpalaTestMatrix.add_dimension(ImpalaTestDimension('orc_schema_resolution', 
0, 1))
     cls.ImpalaTestMatrix.add_constraint(orc_schema_resolution_constraint)
 
   def test_scanner_basic(self, vector):
     """Queries that do not materialize arrays."""
-    vector = deepcopy(vector)
-    vector.get_value('exec_option')['mt_dop'] = vector.get_value('mt_dop')
     self.run_test_case('QueryTest/nested-types-scanner-basic', vector)
 
   def test_scanner_array_materialization(self, vector):
     """Queries that materialize arrays."""
-    vector = deepcopy(vector)
-    vector.get_value('exec_option')['mt_dop'] = vector.get_value('mt_dop')
     self.run_test_case('QueryTest/nested-types-scanner-array-materialization', 
vector)
 
   def test_scanner_multiple_materialization(self, vector):
     """Queries that materialize the same array multiple times."""
-    vector = deepcopy(vector)
-    vector.get_value('exec_option')['mt_dop'] = vector.get_value('mt_dop')
     
self.run_test_case('QueryTest/nested-types-scanner-multiple-materialization', 
vector)
 
   def test_scanner_position(self, vector):
     """Queries that materialize the artifical position element."""
-    vector = deepcopy(vector)
-    vector.get_value('exec_option')['mt_dop'] = vector.get_value('mt_dop')
     self.run_test_case('QueryTest/nested-types-scanner-position', vector)
 
   def test_scanner_map(self, vector):
     """Queries that materialize maps. (Maps looks like arrays of key/value 
structs, so
     most map functionality is already tested by the array tests.)"""
-    vector = deepcopy(vector)
-    vector.get_value('exec_option')['mt_dop'] = vector.get_value('mt_dop')
     self.run_test_case('QueryTest/nested-types-scanner-maps', vector)
 
   def test_runtime(self, vector):
     """Queries that send collections through the execution runtime."""
-    vector = deepcopy(vector)
-    vector.get_value('exec_option')['mt_dop'] = vector.get_value('mt_dop')
     self.run_test_case('QueryTest/nested-types-runtime', vector)
 
   def test_subplan(self, vector):
     """Test subplans with various exec nodes inside it."""
-    vector = deepcopy(vector)
-    vector.get_value('exec_option')['mt_dop'] = vector.get_value('mt_dop')
     db_suffix = vector.get_value('table_format').db_suffix()
     self.run_test_case('QueryTest/nested-types-subplan', vector,
                        use_db='tpch_nested' + db_suffix)
 
-  def test_subplan_single_node(self, vector):
-    """Test subplans with various exec nodes inside it and num_nodes=1."""
-    vector = deepcopy(vector)
-    vector.get_value('exec_option')['mt_dop'] = vector.get_value('mt_dop')
-    new_vector = deepcopy(vector)
-    new_vector.get_value('exec_option')['num_nodes'] = 1
-    self.run_test_case('QueryTest/nested-types-subplan-single-node', 
new_vector)
-
   def test_with_clause(self, vector):
     """Queries using nested types and with WITH clause."""
     db_suffix = vector.get_value('table_format').db_suffix()
@@ -113,6 +88,30 @@ class TestNestedTypes(ImpalaTestSuite):
                        use_db='tpch_nested' + db_suffix)
 
 
+class TestNestedTypesSingleNode(ImpalaTestSuite):
+  """Functional tests for nested types, run for all file formats that support 
nested
+  types. All tests here runs with single node only."""
+  @classmethod
+  def get_workload(self):
+    return 'functional-query'
+
+  @classmethod
+  def add_test_dimensions(cls):
+    super(TestNestedTypesSingleNode, cls).add_test_dimensions()
+    # Runs in single node only.
+    cls.ImpalaTestMatrix.add_dimension(
+        create_exec_option_dimension(cluster_sizes=[1]))
+    add_exec_option_dimension(cls, 'mt_dop', MT_DOP_DIMS)
+    add_exec_option_dimension(cls, 'orc_schema_resolution', 
ORC_RESOLUTION_DIMS)
+    cls.ImpalaTestMatrix.add_constraint(lambda v:
+        v.get_value('table_format').file_format in ['parquet', 'orc'])
+    cls.ImpalaTestMatrix.add_constraint(orc_schema_resolution_constraint)
+
+  def test_subplan_single_node(self, vector):
+    """Test subplans with various exec nodes inside it and num_nodes=1."""
+    self.run_test_case('QueryTest/nested-types-subplan-single-node', vector)
+
+
 class TestNestedStructsInSelectList(ImpalaTestSuite):
   """Functional tests for nested structs provided in the select list."""
   @classmethod
@@ -122,10 +121,6 @@ class TestNestedStructsInSelectList(ImpalaTestSuite):
   @classmethod
   def add_test_dimensions(cls):
     super(TestNestedStructsInSelectList, cls).add_test_dimensions()
-    cls.ImpalaTestMatrix.add_constraint(lambda v:
-        v.get_value('table_format').file_format in ['parquet', 'orc'])
-    cls.ImpalaTestMatrix.add_dimension(
-        ImpalaTestDimension('mt_dop', 0, 2))
     cls.ImpalaTestMatrix.add_dimension(
         create_exec_option_dimension_from_dict({
             # Putting 'True' first because this way in non-exhaustive runs 
there are more
@@ -134,17 +129,23 @@ class TestNestedStructsInSelectList(ImpalaTestSuite):
             # The below two options are set to prevent the planner from 
disabling codegen
             # because of the small data size even when 'disable_codegen' is 
False.
             'disable_codegen_rows_threshold': [0],
-            'exec_single_node_rows_threshold': [0]}))
+            'exec_single_node_rows_threshold': [0],
+            'mt_dop': MT_DOP_DIMS}))
+    # Must declare 'orc_schema_resolution' using 'add_exec_option_dimension' 
so that
+    # 'orc_schema_resolution_constraint' can catch it.
+    add_exec_option_dimension(cls, 'orc_schema_resolution', 
ORC_RESOLUTION_DIMS)
     cls.ImpalaTestMatrix.add_dimension(create_client_protocol_dimension())
-    
cls.ImpalaTestMatrix.add_dimension(ImpalaTestDimension('orc_schema_resolution', 
0, 1))
+    cls.ImpalaTestMatrix.add_constraint(lambda v:
+        v.get_value('table_format').file_format in ['parquet', 'orc'])
     cls.ImpalaTestMatrix.add_constraint(orc_schema_resolution_constraint)
 
-  def test_struct_in_select_list(self, vector):
+  def test_struct_in_select_list(self, vector, unique_database):
     """Queries where a struct column is in the select list"""
     new_vector = deepcopy(vector)
     
new_vector.get_value('exec_option')['convert_legacy_hive_parquet_utc_timestamps']
 = 1
     new_vector.get_value('exec_option')['timezone'] = '"Europe/Budapest"'
-    self.run_test_case('QueryTest/struct-in-select-list', new_vector)
+    self.run_test_case('QueryTest/struct-in-select-list', new_vector,
+                       test_file_vars={'$UNIQUE_DB': unique_database})
 
   @SkipIfFS.hbase
   def test_struct_in_select_list_hbase(self, vector):
@@ -180,35 +181,30 @@ class TestNestedCollectionsInSelectList(ImpalaTestSuite):
   @classmethod
   def add_test_dimensions(cls):
     super(TestNestedCollectionsInSelectList, cls).add_test_dimensions()
-    cls.ImpalaTestMatrix.add_constraint(lambda v:
-        v.get_value('table_format').file_format in ['parquet', 'orc'])
-    cls.ImpalaTestMatrix.add_dimension(
-        ImpalaTestDimension('mt_dop', 0, 2))
     cls.ImpalaTestMatrix.add_dimension(
         create_exec_option_dimension_from_dict({
             'disable_codegen': ['False', 'True'],
             # The below two options are set to prevent the planner from 
disabling codegen
             # because of the small data size even when 'disable_codegen' is 
False.
             'disable_codegen_rows_threshold': [0],
-            'exec_single_node_rows_threshold': [0]}))
+            'exec_single_node_rows_threshold': [0],
+            'mt_dop': MT_DOP_DIMS}))
+    # Must declare 'orc_schema_resolution' using 'add_exec_option_dimension' 
so that
+    # 'orc_schema_resolution_constraint' can catch it.
+    add_exec_option_dimension(cls, 'orc_schema_resolution', 
ORC_RESOLUTION_DIMS)
     cls.ImpalaTestMatrix.add_dimension(create_client_protocol_dimension())
-    
cls.ImpalaTestMatrix.add_dimension(ImpalaTestDimension('orc_schema_resolution', 
0, 1))
+    cls.ImpalaTestMatrix.add_constraint(lambda v:
+        v.get_value('table_format').file_format in ['parquet', 'orc'])
     cls.ImpalaTestMatrix.add_constraint(orc_schema_resolution_constraint)
 
-  def test_array_in_select_list(self, vector, unique_database):
+  def test_array_in_select_list(self, vector):
     """Queries where an array column is in the select list"""
     self.run_test_case('QueryTest/nested-array-in-select-list', vector)
 
-  def test_map_in_select_list(self, vector, unique_database):
+  def test_map_in_select_list(self, vector):
     """Queries where a map column is in the select list"""
     self.run_test_case('QueryTest/nested-map-in-select-list', vector)
 
-  def test_map_null_keys(self, vector, unique_database):
-    """Queries where a map has null keys. Is only possible in ORC, not 
Parquet."""
-    if vector.get_value('table_format').file_format == 'parquet':
-      pytest.skip()
-    self.run_test_case('QueryTest/map_null_keys', vector)
-
 
 class TestMixedCollectionsAndStructsInSelectList(ImpalaTestSuite):
   """Functional tests for the case where collections and structs are embedded 
into one
@@ -220,19 +216,20 @@ class 
TestMixedCollectionsAndStructsInSelectList(ImpalaTestSuite):
   @classmethod
   def add_test_dimensions(cls):
     super(TestMixedCollectionsAndStructsInSelectList, 
cls).add_test_dimensions()
-    cls.ImpalaTestMatrix.add_constraint(lambda v:
-        v.get_value('table_format').file_format in ['parquet', 'orc'])
-    cls.ImpalaTestMatrix.add_dimension(
-        ImpalaTestDimension('mt_dop', 0, 2))
     cls.ImpalaTestMatrix.add_dimension(
         create_exec_option_dimension_from_dict({
             'disable_codegen': ['False', 'True'],
             # The below two options are set to prevent the planner from 
disabling codegen
             # because of the small data size even when 'disable_codegen' is 
False.
             'disable_codegen_rows_threshold': [0],
-            'exec_single_node_rows_threshold': [0]}))
+            'exec_single_node_rows_threshold': [0],
+            'mt_dop': MT_DOP_DIMS}))
+    # Must declare 'orc_schema_resolution' using 'add_exec_option_dimension' 
so that
+    # 'orc_schema_resolution_constraint' can catch it.
+    add_exec_option_dimension(cls, 'orc_schema_resolution', 
ORC_RESOLUTION_DIMS)
     cls.ImpalaTestMatrix.add_dimension(create_client_protocol_dimension())
-    
cls.ImpalaTestMatrix.add_dimension(ImpalaTestDimension('orc_schema_resolution', 
0, 1))
+    cls.ImpalaTestMatrix.add_constraint(lambda v:
+        v.get_value('table_format').file_format in ['parquet', 'orc'])
     cls.ImpalaTestMatrix.add_constraint(orc_schema_resolution_constraint)
 
   def test_mixed_complex_types_in_select_list(self, vector, unique_database):
@@ -250,9 +247,9 @@ class TestComputeStatsWithNestedTypes(ImpalaTestSuite):
   @classmethod
   def add_test_dimensions(cls):
     super(TestComputeStatsWithNestedTypes, cls).add_test_dimensions()
+    add_exec_option_dimension(cls, 'orc_schema_resolution', 
ORC_RESOLUTION_DIMS)
     cls.ImpalaTestMatrix.add_constraint(lambda v:
         v.get_value('table_format').file_format in ['parquet', 'orc'])
-    
cls.ImpalaTestMatrix.add_dimension(ImpalaTestDimension('orc_schema_resolution', 
0, 1))
     cls.ImpalaTestMatrix.add_constraint(orc_schema_resolution_constraint)
 
   def test_compute_stats_with_structs(self, vector):
@@ -269,9 +266,9 @@ class TestZippingUnnest(ImpalaTestSuite):
   @classmethod
   def add_test_dimensions(cls):
     super(TestZippingUnnest, cls).add_test_dimensions()
+    add_exec_option_dimension(cls, 'orc_schema_resolution', 
ORC_RESOLUTION_DIMS)
     cls.ImpalaTestMatrix.add_constraint(lambda v:
         v.get_value('table_format').file_format in ['parquet', 'orc'])
-    
cls.ImpalaTestMatrix.add_dimension(ImpalaTestDimension('orc_schema_resolution', 
0, 1))
     cls.ImpalaTestMatrix.add_constraint(orc_schema_resolution_constraint)
 
   def test_zipping_unnest_in_from_clause(self, vector):
@@ -311,9 +308,9 @@ class TestNestedTypesNoMtDop(ImpalaTestSuite):
   @classmethod
   def add_test_dimensions(cls):
     super(TestNestedTypesNoMtDop, cls).add_test_dimensions()
+    add_exec_option_dimension(cls, 'orc_schema_resolution', 
ORC_RESOLUTION_DIMS)
     cls.ImpalaTestMatrix.add_constraint(lambda v:
         v.get_value('table_format').file_format in ['parquet', 'orc'])
-    
cls.ImpalaTestMatrix.add_dimension(ImpalaTestDimension('orc_schema_resolution', 
0, 1))
     cls.ImpalaTestMatrix.add_constraint(orc_schema_resolution_constraint)
 
   def test_tpch(self, vector):
@@ -324,9 +321,11 @@ class TestNestedTypesNoMtDop(ImpalaTestSuite):
 
   def test_tpch_limit(self, vector):
     """Queries over the larger nested TPCH dataset with limits in their 
subplan."""
-    vector.get_value('exec_option')['batch_size'] = 10
-    db_suffix = vector.get_value('table_format').db_suffix()
-    self.run_test_case('QueryTest/nested-types-tpch-limit', vector,
+    # Override batch_size to 10.
+    new_vector = deepcopy(vector)
+    new_vector.get_value('exec_option')['batch_size'] = 10
+    db_suffix = new_vector.get_value('table_format').db_suffix()
+    self.run_test_case('QueryTest/nested-types-tpch-limit', new_vector,
                        use_db='tpch_nested' + db_suffix)
 
   @SkipIfNotHdfsMinicluster.tuned_for_minicluster
@@ -343,12 +342,6 @@ class TestNestedTypesNoMtDop(ImpalaTestSuite):
     self.run_test_case('QueryTest/nested-types-tpch-errors',
                        vector, use_db='tpch_nested' + db_suffix)
 
-  def test_parquet_stats(self, vector):
-    """Queries that test evaluation of Parquet row group statistics."""
-    if vector.get_value('table_format').file_format == 'orc':
-      pytest.skip('This test is specific to Parquet')
-    self.run_test_case('QueryTest/nested-types-parquet-stats', vector)
-
   @SkipIfFS.hive
   def test_upper_case_field_name(self, unique_database):
     """IMPALA-5994: Tests that a Hive-created table with a struct field name 
with upper
@@ -365,7 +358,7 @@ class TestNestedTypesNoMtDop(ImpalaTestSuite):
     """IMPALA-6370: Test that a partitioned table with nested types can be 
scanned."""
     table = "complextypes_partitioned"
     db_table = "{0}.{1}".format(unique_database, table)
-    table_format_info = vector.get_value('table_format')  # type: 
TableFormatInfo
+    table_format_info = vector.get_value('table_format')  # type is 
TableFormatInfo
     file_format = table_format_info.file_format
     db_suffix = table_format_info.db_suffix()
     self.client.execute("""
@@ -398,6 +391,27 @@ class TestNestedTypesNoMtDop(ImpalaTestSuite):
     self.run_test_case('QueryTest/nested-types-basic-partitioned', vector,
         unique_database)
 
+
+class TestNestedTypesNoMtDopOrc(ImpalaTestSuite):
+  """Functional tests for nested types against ORC format only and not need to 
be run
+  with mt_dop > 0."""
+  @classmethod
+  def get_workload(self):
+    return 'functional-query'
+
+  @classmethod
+  def add_test_dimensions(cls):
+    super(TestNestedTypesNoMtDopOrc, cls).add_test_dimensions()
+    add_exec_option_dimension(cls, 'orc_schema_resolution', 
ORC_RESOLUTION_DIMS)
+    cls.ImpalaTestMatrix.add_constraint(lambda v:
+        v.get_value('table_format').file_format in ['orc'])
+    cls.ImpalaTestMatrix.add_constraint(orc_schema_resolution_constraint)
+
+  def test_map_null_keys(self, vector):
+    """Queries where a map has null keys. Is only possible in ORC, not Parquet.
+    Does not need to exercise MT_DOP>0."""
+    self.run_test_case('QueryTest/map_null_keys', vector)
+
   # Skip this test on non-HDFS filesystems, because the test contains Hive
   # queries that hang in some cases due to IMPALA-9365.
   @SkipIfFS.hive
@@ -406,10 +420,6 @@ class TestNestedTypesNoMtDop(ImpalaTestSuite):
     """IMPALA-6370: Test that a partitioned table with nested types can be 
scanned."""
     table = "complextypes_partitioned"
     db_table = "{0}.{1}".format(unique_database, table)
-    table_format_info = vector.get_value('table_format')  # type: 
TableFormatInfo
-    file_format = table_format_info.file_format
-    if file_format != "orc":
-      pytest.skip('Full ACID tables are only supported in ORC format.')
 
     self.client.execute("""
         CREATE TABLE {0} (
@@ -437,7 +447,26 @@ class TestNestedTypesNoMtDop(ImpalaTestSuite):
     self.run_test_case('QueryTest/nested-types-basic-partitioned', vector,
         unique_database)
 
-class TestParquetArrayEncodings(ImpalaTestSuite):
+
+class TestNestedTypesNoMtDopParquet(ImpalaTestSuite):
+  """Functional tests for nested types against Parquet format only and not need
+  to be run with mt_dop > 0."""
+  @classmethod
+  def get_workload(self):
+    return 'functional-query'
+
+  @classmethod
+  def add_test_dimensions(cls):
+    super(TestNestedTypesNoMtDopParquet, cls).add_test_dimensions()
+    cls.ImpalaTestMatrix.add_constraint(lambda v:
+        v.get_value('table_format').file_format in ['parquet'])
+
+  def test_parquet_stats(self, vector):
+    """Queries that test evaluation of Parquet row group statistics."""
+    self.run_test_case('QueryTest/nested-types-parquet-stats', vector)
+
+
+class TestParquetArrayEncodingsBase(ImpalaTestSuite):
   TESTFILE_DIR = os.path.join(os.environ['IMPALA_HOME'],
                               "testdata/parquet_nested_types_encodings")
 
@@ -449,16 +478,28 @@ class TestParquetArrayEncodings(ImpalaTestSuite):
 
   @classmethod
   def add_test_dimensions(cls):
-    super(TestParquetArrayEncodings, cls).add_test_dimensions()
-    cls.ImpalaTestMatrix.add_dimension(ImpalaTestDimension(
-      'parquet_array_resolution', 
*TestParquetArrayEncodings.ARRAY_RESOLUTION_POLICIES))
+    super(TestParquetArrayEncodingsBase, cls).add_test_dimensions()
+    add_exec_option_dimension(cls, 'parquet_array_resolution',
+                              
TestParquetArrayEncodingsBase.ARRAY_RESOLUTION_POLICIES)
     cls.ImpalaTestMatrix.add_constraint(lambda v:
         v.get_value('table_format').file_format == 'parquet')
 
+  @classmethod
+  def _create_test_table(self, dbname, tablename, filename, columns):
+    """Creates a table in the given database with the given name and columns. 
Copies
+    the file with the given name from TESTFILE_DIR into the table."""
+    location = get_fs_path("/test-warehouse/%s.db/%s" % (dbname, tablename))
+    self.client.execute("create table %s.%s (%s) stored as parquet location 
'%s'" %
+                        (dbname, tablename, columns, location))
+    local_path = self.TESTFILE_DIR + "/" + filename
+    self.filesystem_client.copy_from_local(local_path, location)
+
+
+class TestParquetArrayEncodings(TestParquetArrayEncodingsBase):
+
   def __init_arr_res(self, vector):
-    arr_res = vector.get_value('parquet_array_resolution')
     qopts = vector.get_value('exec_option')
-    qopts['parquet_array_resolution'] = arr_res
+    arr_res = qopts['parquet_array_resolution']
     return (arr_res, qopts)
 
   # $ parquet-tools schema SingleFieldGroupInList.parquet
@@ -677,7 +718,7 @@ class TestParquetArrayEncodings(ImpalaTestSuite):
         "select cnt from %s t, (select count(*) cnt from t.col1) v" % 
full_name, qopts)
       assert result.data == ['3'] * mult
       result = self.execute_query(
-        "select cnt from %s t, t.col1 a1, (select count(*) cnt from a1.item) 
v"\
+        "select cnt from %s t, t.col1 a1, (select count(*) cnt from a1.item) v"
         % full_name, qopts)
       assert result.data == ['3', '3', '3'] * mult
 
@@ -688,7 +729,7 @@ class TestParquetArrayEncodings(ImpalaTestSuite):
       except Exception as e:
         assert expected_err in str(e)
       try:
-        self.execute_query("select cnt from %s t, (select count(*) cnt from 
t.col1) v"\
+        self.execute_query("select cnt from %s t, (select count(*) cnt from 
t.col1) v"
           % full_name, qopts)
       except Exception as e:
         assert expected_err in str(e)
@@ -747,6 +788,15 @@ class TestParquetArrayEncodings(ImpalaTestSuite):
       "select cnt from %s t, (select count(*) cnt from t.col1) v" % full_name, 
qopts)
     assert result.data == ['2']
 
+
+class TestParquetArrayEncodingsAmbiguous(TestParquetArrayEncodingsBase):
+
+  @classmethod
+  def add_test_dimensions(cls):
+    super(TestParquetArrayEncodingsAmbiguous, cls).add_test_dimensions()
+    # Drop 'parquet_array_resolution' dimension. It will be set inside .test 
files
+    cls.ImpalaTestMatrix.clear_dimension('parquet_array_resolution')
+
   # $ parquet-tools schema AmbiguousList_Modern.parquet
   # message org.apache.impala.nested {
   #   required group ambigArray (LIST) {
@@ -818,9 +868,6 @@ class TestParquetArrayEncodings(ImpalaTestSuite):
     """
 
     # The Parquet resolution policy is manually set in the .test files.
-    if vector.get_value('parquet_array_resolution') != "three_level":
-      pytest.skip("Test only run with three_level")
-
     ambig_modern_tbl = "ambig_modern"
     self._create_test_table(unique_database, ambig_modern_tbl,
         "AmbiguousList_Modern.parquet",
@@ -835,14 +882,6 @@ class TestParquetArrayEncodings(ImpalaTestSuite):
     self.run_test_case('QueryTest/parquet-ambiguous-list-legacy',
                         vector, unique_database)
 
-  def _create_test_table(self, dbname, tablename, filename, columns):
-    """Creates a table in the given database with the given name and columns. 
Copies
-    the file with the given name from TESTFILE_DIR into the table."""
-    location = get_fs_path("/test-warehouse/%s.db/%s" % (dbname, tablename))
-    self.client.execute("create table %s.%s (%s) stored as parquet location 
'%s'" %
-                        (dbname, tablename, columns, location))
-    local_path = self.TESTFILE_DIR + "/" + filename
-    self.filesystem_client.copy_from_local(local_path, location)
 
 class TestMaxNestingDepth(ImpalaTestSuite):
   # Should be kept in sync with the FE's Type.MAX_NESTING_DEPTH
@@ -856,9 +895,9 @@ class TestMaxNestingDepth(ImpalaTestSuite):
   @classmethod
   def add_test_dimensions(cls):
     super(TestMaxNestingDepth, cls).add_test_dimensions()
+    add_exec_option_dimension(cls, 'orc_schema_resolution', 
ORC_RESOLUTION_DIMS)
     cls.ImpalaTestMatrix.add_constraint(lambda v:
         v.get_value('table_format').file_format in ['parquet', 'orc'])
-    
cls.ImpalaTestMatrix.add_dimension(ImpalaTestDimension('orc_schema_resolution', 
0, 1))
     cls.ImpalaTestMatrix.add_constraint(orc_schema_resolution_constraint)
 
   def test_max_nesting_depth(self, vector, unique_database):

(impala) 02/02: IMPALA-13330: Fix orc_schema_resolution in test_nested_types.py

Reply via email to