(impala) branch master updated: IMPALA-12518: Combine all exec_option dimension in test_vector.py

michaelsmith Fri, 27 Oct 2023 14:18:54 -0700

This is an automated email from the ASF dual-hosted git repository.

michaelsmith pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git



The following commit(s) were added to refs/heads/master by this push:
     new feefcc639 IMPALA-12518: Combine all exec_option dimension in 
test_vector.py
feefcc639 is described below

commit feefcc63957a929bce9cb45f935d592cd088e2d3
Author: Riza Suminto <[email protected]>
AuthorDate: Wed Oct 25 20:21:54 2023 -0700

    IMPALA-12518: Combine all exec_option dimension in test_vector.py
    
    Before this patch, when writing pytest that exercise custom query option
    values, we need to declare it by making new test dimension, followed by
    deepcopying the original vector, and inserting the selected dimension
    value into 'exec_option' dictionary in generated vector.
    
    This patch simplify this steps by accounting dimensions that is intended
    to be part of 'exec_option' and automatically combining them during
    vector generation in test_vector.py. Such dimension should be registered
    via the new ImpalaTestMatrix.add_exec_option_dimension() function.
    
    function add_exec_option_dimension() in test_dimensions.py is renamed to
    add_mandatory_exec_option() to make it consistent with the same
    functionality in ImpalaTestMatrix and avoid confusion with the new
    ImpalaTestMatrix.add_exec_option_dimension() function. Function name
    add_exec_option_dimension() in test_dimensions.py is then repurposed as
    a shorthand for ImpalaTestMatrix.add_exec_option_dimension().
    
    The remaining changes for other pytest files will be done gradually.
    
    Testing:
    - Fix bug in TestIcebergV2Table and confirm that both True and False
      value for 'disable_optimized_iceberg_v2_read' options are exercised.
    - Run and pass all modified tests in this patch.
    
    Change-Id: I3adba260990fccf4d2f2e7c8c4e4fadc6fd43fe1
    Reviewed-on: http://gerrit.cloudera.org:8080/20625
    Reviewed-by: Zoltan Borok-Nagy <[email protected]>
    Tested-by: Impala Public Jenkins <[email protected]>
    Reviewed-by: Michael Smith <[email protected]>
---
 tests/common/test_dimensions.py          | 17 ++++++++--
 tests/common/test_vector.py              | 55 +++++++++++++++++++++++++++++---
 tests/custom_cluster/test_kudu.py        |  4 +--
 tests/query_test/test_async_codegen.py   |  4 +--
 tests/query_test/test_iceberg.py         |  5 +--
 tests/query_test/test_kudu.py            | 14 ++++----
 tests/query_test/test_runtime_filters.py | 18 +++++------
 7 files changed, 86 insertions(+), 31 deletions(-)

diff --git a/tests/common/test_dimensions.py b/tests/common/test_dimensions.py
index 9b957d3bb..aa2ce2db9 100644
--- a/tests/common/test_dimensions.py
+++ b/tests/common/test_dimensions.py
@@ -264,13 +264,24 @@ def 
create_exec_option_dimension_from_dict(exec_option_dimensions):
   # Build a test vector out of it
   return ImpalaTestDimension('exec_option', *exec_option_dimension_values)
 
-def add_exec_option_dimension(test_suite, key, value):
+
+def add_exec_option_dimension(test_suite, key, values):
+  """
+  Takes an ImpalaTestSuite object 'test_suite' and register new exec option 
dimension.
+  'key' must be a query option known to Impala, and 'values' must be a list of 
more than
+  one element.
+  """
+  test_suite.ImpalaTestMatrix.add_exec_option_dimension(
+    ImpalaTestDimension(key, *values))
+
+
+def add_mandatory_exec_option(test_suite, key, value):
   """
   Takes an ImpalaTestSuite object 'test_suite' and adds 'key=value' to every 
exec option
   test dimension, leaving the number of tests that will be run unchanged.
   """
-  for v in test_suite.ImpalaTestMatrix.dimensions["exec_option"]:
-    v.value[key] = value
+  test_suite.ImpalaTestMatrix.add_mandatory_exec_option(key, value)
+
 
 def extend_exec_option_dimension(test_suite, key, value):
   """
diff --git a/tests/common/test_vector.py b/tests/common/test_vector.py
index 182d962fe..8e0bebfd5 100644
--- a/tests/common/test_vector.py
+++ b/tests/common/test_vector.py
@@ -58,6 +58,10 @@
 
 from __future__ import absolute_import, division, print_function
 from itertools import product
+from copy import deepcopy
+
+EXEC_OPTION_KEY = 'exec_option'
+
 
 # A list of test dimension values.
 class ImpalaTestDimension(list):
@@ -104,19 +108,40 @@ class ImpalaTestMatrix(object):
   def __init__(self, *args):
     self.dimensions = dict((arg.name, arg) for arg in args)
     self.constraint_list = list()
+    self.independent_exec_option_names = set()
 
   def add_dimension(self, dimension):
     self.dimensions[dimension.name] = dimension
 
   def add_mandatory_exec_option(self, exec_option_key, exec_option_value):
-    for vector in self.dimensions['exec_option']:
+    assert EXEC_OPTION_KEY in self.dimensions, (
+      "Must have '" + EXEC_OPTION_KEY + "' dimension previously declared!")
+    for vector in self.dimensions[EXEC_OPTION_KEY]:
       vector.value[exec_option_key] = exec_option_value
 
+  def add_exec_option_dimension(self, dimension):
+    """
+    Add 'dimension' into 'self.dimensions' and memorize the name.
+    During vector generation, all dimensions registered through this method 
will be
+    embedded into 'exec_option' dimension. This is intended to maintain 
pairwise and
+    exhaustive combination correct while eliminating the need for individual 
test method
+    to append the custom exec options into 'exec_option' dimension themself.
+    """
+    assert EXEC_OPTION_KEY in self.dimensions, (
+      "Must have '" + EXEC_OPTION_KEY + "' dimension previously declared!")
+    assert len(dimension) > 1, (
+      "Dimension " + dimension.name + " must have more than 1 possible value! "
+      "Use add_mandatory_exec_option() instead.")
+    self.add_dimension(dimension)
+    self.independent_exec_option_names.add(dimension.name)
+
   def clear(self):
     self.dimensions.clear()
+    self.independent_exec_option_names = set()
 
   def clear_dimension(self, dimension_name):
     del self.dimensions[dimension_name]
+    self.independent_exec_option_names.discard(dimension_name)
 
   def has_dimension(self, dimension_name):
     return dimension_name in self.dimensions
@@ -132,9 +157,29 @@ class ImpalaTestMatrix(object):
     else:
       raise ValueError('Unknown exploration strategy: %s' % 
exploration_strategy)
 
+  def embed_independent_exec_options(self, vector_values):
+    if not self.independent_exec_option_names:
+      return vector_values
+    values = []
+    exec_values = []
+    exec_option = None
+    for val in vector_values:
+      if val.name == EXEC_OPTION_KEY:
+        exec_option = deepcopy(val.value)
+      elif val.name in self.independent_exec_option_names:
+        exec_values.append(val)
+      else:
+        values.append(val)
+    assert exec_option is not None, (
+      "Must have '" + EXEC_OPTION_KEY + "' dimension previously declared!")
+    for val in exec_values:
+      exec_option[val.name] = val.value
+    values.append(ImpalaTestVector.Value(EXEC_OPTION_KEY, exec_option))
+    return values
+
   def __generate_exhaustive_combinations(self):
-    return [ImpalaTestVector(vec) for vec in 
product(*self.__extract_vector_values())
-              if self.is_valid(vec)]
+    return [ImpalaTestVector(self.embed_independent_exec_options(vec))
+      for vec in product(*self.__extract_vector_values()) if 
self.is_valid(vec)]
 
   def __generate_pairwise_combinations(self):
     from allpairspy import AllPairs
@@ -144,8 +189,8 @@ class ImpalaTestMatrix(object):
     # results will be the same.
     if len(self.dimensions) == 1:
       return self.__generate_exhaustive_combinations()
-    return [ImpalaTestVector(vec) for vec in 
all_pairs(self.__extract_vector_values(),
-                                                 filter_func = self.is_valid)]
+    return [ImpalaTestVector(self.embed_independent_exec_options(vec))
+      for vec in all_pairs(self.__extract_vector_values(), 
filter_func=self.is_valid)]
 
   def add_constraint(self, constraint_func):
     self.constraint_list.append(constraint_func)
diff --git a/tests/custom_cluster/test_kudu.py 
b/tests/custom_cluster/test_kudu.py
index 49c4c4ce1..36bf55f0f 100644
--- a/tests/custom_cluster/test_kudu.py
+++ b/tests/custom_cluster/test_kudu.py
@@ -27,7 +27,7 @@ from tests.beeswax.impala_beeswax import 
ImpalaBeeswaxException
 from tests.common.custom_cluster_test_suite import CustomClusterTestSuite
 from tests.common.kudu_test_suite import KuduTestSuite
 from tests.common.skip import SkipIfKudu, SkipIfBuildType, SkipIf
-from tests.common.test_dimensions import add_exec_option_dimension
+from tests.common.test_dimensions import add_mandatory_exec_option
 from tests.util.event_processor_utils import EventProcessorUtils
 
 KUDU_MASTER_HOSTS = pytest.config.option.kudu_master_hosts
@@ -63,7 +63,7 @@ class TestKuduOperations(CustomKuduTest):
     super(TestKuduOperations, cls).add_test_dimensions()
     # The default read mode of READ_LATEST does not provide high enough 
consistency for
     # these tests.
-    add_exec_option_dimension(cls, "kudu_read_mode", "READ_AT_SNAPSHOT")
+    add_mandatory_exec_option(cls, "kudu_read_mode", "READ_AT_SNAPSHOT")
 
   @pytest.mark.execute_serially
   @CustomClusterTestSuite.with_args(impalad_args=\
diff --git a/tests/query_test/test_async_codegen.py 
b/tests/query_test/test_async_codegen.py
index acc073f24..80e1612e9 100644
--- a/tests/query_test/test_async_codegen.py
+++ b/tests/query_test/test_async_codegen.py
@@ -19,7 +19,7 @@ from __future__ import absolute_import, division, 
print_function
 import pytest
 
 from tests.common.impala_test_suite import ImpalaTestSuite
-from tests.common.test_dimensions import add_exec_option_dimension
+from tests.common.test_dimensions import add_mandatory_exec_option
 from tests.common.test_dimensions import create_exec_option_dimension
 from tests.common.test_result_verifier import extract_event_sequence
 
@@ -90,7 +90,7 @@ limit 10;
         
debug_action_options=[cls.DEBUG_ACTION_CODEGEN_FINISH_BEFORE_EXEC_START,
             cls.DEBUG_ACTION_CODEGEN_FINISH_DURING_EXEC,
             cls.DEBUG_ACTION_EXEC_FINISH_BEFORE_CODEGEN]))
-    add_exec_option_dimension(cls, "async_codegen", 1)
+    add_mandatory_exec_option(cls, "async_codegen", 1)
 
     cls.ImpalaTestMatrix.add_constraint(lambda vector: 
cls.__is_valid_test_vector(vector))
 
diff --git a/tests/query_test/test_iceberg.py b/tests/query_test/test_iceberg.py
index c47772bf4..f24ae2170 100644
--- a/tests/query_test/test_iceberg.py
+++ b/tests/query_test/test_iceberg.py
@@ -38,6 +38,7 @@ import json
 from tests.beeswax.impala_beeswax import ImpalaBeeswaxException
 from tests.common.iceberg_test_suite import IcebergTestSuite
 from tests.common.skip import SkipIf, SkipIfFS, SkipIfDockerizedCluster
+from tests.common.test_dimensions import add_exec_option_dimension
 from tests.common.test_vector import ImpalaTestDimension
 from tests.common.file_utils import (
   create_iceberg_table_from_directory,
@@ -1185,8 +1186,8 @@ class TestIcebergV2Table(IcebergTestSuite):
     super(TestIcebergV2Table, cls).add_test_dimensions()
     cls.ImpalaTestMatrix.add_constraint(
       lambda v: v.get_value('table_format').file_format == 'parquet')
-    cls.ImpalaTestMatrix.add_dimension(ImpalaTestDimension(
-      'disable_optimized_iceberg_v2_read', 0, 1))
+    add_exec_option_dimension(cls, 'disable_optimized_iceberg_v2_read', [0, 1])
+
   # The test uses pre-written Iceberg tables where the position delete files 
refer to
   # the data files via full URI, i.e. they start with 
'hdfs://localhost:2050/...'. In the
   # dockerised environment the namenode is accessible on a different 
hostname/port.
diff --git a/tests/query_test/test_kudu.py b/tests/query_test/test_kudu.py
index be3de8a58..1472bd130 100644
--- a/tests/query_test/test_kudu.py
+++ b/tests/query_test/test_kudu.py
@@ -46,7 +46,7 @@ from tests.common.kudu_test_suite import KuduTestSuite
 from tests.common.impala_cluster import ImpalaCluster
 from tests.common.skip import SkipIfNotHdfsMinicluster, SkipIfKudu, SkipIfHive2
 from tests.common.test_dimensions import (add_exec_option_dimension,
-    extend_exec_option_dimension)
+    add_mandatory_exec_option)
 from tests.verifiers.metric_verifier import MetricVerifier
 
 KUDU_MASTER_HOSTS = pytest.config.option.kudu_master_hosts
@@ -65,11 +65,10 @@ class TestKuduBasicDML(KuduTestSuite):
     super(TestKuduBasicDML, cls).add_test_dimensions()
     # The default read mode of READ_LATEST does not provide high enough 
consistency for
     # these tests.
-    add_exec_option_dimension(cls, "kudu_read_mode", "READ_AT_SNAPSHOT")
+    add_mandatory_exec_option(cls, "kudu_read_mode", "READ_AT_SNAPSHOT")
     # Run with and without multithreading to ensure Kudu DML works with both 
threading
     # models. E.g. see IMPALA-9782.
-    add_exec_option_dimension(cls, "mt_dop", "0")
-    extend_exec_option_dimension(cls, "mt_dop", "4")
+    add_exec_option_dimension(cls, "mt_dop", [0, 4])
 
   @SkipIfKudu.no_hybrid_clock
   def test_kudu_insert(self, vector, unique_database):
@@ -105,7 +104,7 @@ class TestKuduOperations(KuduTestSuite):
     super(TestKuduOperations, cls).add_test_dimensions()
     # The default read mode of READ_LATEST does not provide high enough 
consistency for
     # these tests.
-    add_exec_option_dimension(cls, "kudu_read_mode", "READ_AT_SNAPSHOT")
+    add_mandatory_exec_option(cls, "kudu_read_mode", "READ_AT_SNAPSHOT")
 
   @SkipIfKudu.no_hybrid_clock
   @SkipIfKudu.hms_integration_enabled
@@ -584,8 +583,7 @@ class TestKuduPartitioning(KuduTestSuite):
     super(TestKuduPartitioning, cls).add_test_dimensions()
 
     # Test both the interpreted and the codegen'd path.
-    add_exec_option_dimension(cls, "disable_codegen", "0")
-    extend_exec_option_dimension(cls, "disable_codegen", "1")
+    add_exec_option_dimension(cls, "disable_codegen", [0, 1])
 
   def test_partitions_evenly_distributed(self, vector, cursor,
       kudu_client, unique_database):
@@ -1524,7 +1522,7 @@ class TestKuduReadTokenSplit(KuduTestSuite):
     super(TestKuduReadTokenSplit, cls).add_test_dimensions()
     # The default read mode of READ_LATEST does not provide high enough 
consistency for
     # these tests.
-    add_exec_option_dimension(cls, "kudu_read_mode", "READ_AT_SNAPSHOT")
+    add_mandatory_exec_option(cls, "kudu_read_mode", "READ_AT_SNAPSHOT")
 
   @SkipIfKudu.no_hybrid_clock
   @SkipIfNotHdfsMinicluster.tuned_for_minicluster
diff --git a/tests/query_test/test_runtime_filters.py 
b/tests/query_test/test_runtime_filters.py
index 4be4305af..87f299d0e 100644
--- a/tests/query_test/test_runtime_filters.py
+++ b/tests/query_test/test_runtime_filters.py
@@ -27,7 +27,7 @@ from tests.common.environ import build_flavor_timeout, 
ImpalaTestClusterProperti
 from tests.common.impala_cluster import ImpalaCluster
 from tests.common.impala_test_suite import ImpalaTestSuite
 from tests.common.skip import SkipIfEC, SkipIfLocal, SkipIfFS
-from tests.common.test_dimensions import add_exec_option_dimension
+from tests.common.test_dimensions import add_mandatory_exec_option
 from tests.common.test_vector import ImpalaTestDimension
 from tests.verifiers.metric_verifier import MetricVerifier
 from tests.util.filesystem_utils import WAREHOUSE
@@ -70,7 +70,7 @@ class TestRuntimeFilters(ImpalaTestSuite):
         or v.get_value('mt_dop') == 0)
     # Enable query option ASYNC_CODEGEN for slow build
     if build_runs_slowly:
-      add_exec_option_dimension(cls, "async_codegen", 1)
+      add_mandatory_exec_option(cls, "async_codegen", 1)
 
   def test_basic_filters(self, vector):
     new_vector = deepcopy(vector)
@@ -184,7 +184,7 @@ class TestBloomFilters(ImpalaTestSuite):
         lambda v: v.get_value('table_format').file_format not in ['hbase'])
     # Enable query option ASYNC_CODEGEN for slow build
     if build_runs_slowly:
-      add_exec_option_dimension(cls, "async_codegen", 1)
+      add_mandatory_exec_option(cls, "async_codegen", 1)
 
   def test_bloom_filters(self, vector):
     vector.get_value('exec_option')['ENABLED_RUNTIME_FILTER_TYPES'] = 'BLOOM'
@@ -226,11 +226,11 @@ class TestMinMaxFilters(ImpalaTestSuite):
         lambda v: v.get_value('table_format').file_format in ['kudu'])
     # Enable query option ASYNC_CODEGEN for slow build
     if build_runs_slowly:
-      add_exec_option_dimension(cls, "async_codegen", 1)
+      add_mandatory_exec_option(cls, "async_codegen", 1)
     # IMPALA-10715. Enable only min/max since the bloom filters will return
     # rows only satisfying the join predicates. This test requires the return
     # of non-qualifying rows to succeed.
-    add_exec_option_dimension(cls, "ENABLED_RUNTIME_FILTER_TYPES", "MIN_MAX")
+    add_mandatory_exec_option(cls, "ENABLED_RUNTIME_FILTER_TYPES", "MIN_MAX")
 
   def test_min_max_filters(self, vector):
     self.execute_query("SET MINMAX_FILTER_THRESHOLD=0.5")
@@ -303,7 +303,7 @@ class TestOverlapMinMaxFilters(ImpalaTestSuite):
         lambda v: v.get_value('table_format').file_format in ['parquet'])
     # Enable query option ASYNC_CODEGEN for slow build
     if build_runs_slowly:
-      add_exec_option_dimension(cls, "async_codegen", 1)
+      add_mandatory_exec_option(cls, "async_codegen", 1)
 
   def test_overlap_min_max_filters(self, vector, unique_database):
     self.execute_query("SET MINMAX_FILTER_THRESHOLD=0.5")
@@ -356,7 +356,7 @@ class TestInListFilters(ImpalaTestSuite):
         lambda v: v.get_value('table_format').file_format in ['orc'])
     # Enable query option ASYNC_CODEGEN for slow build
     if build_runs_slowly:
-      add_exec_option_dimension(cls, "async_codegen", 1)
+      add_mandatory_exec_option(cls, "async_codegen", 1)
 
   def test_in_list_filters(self, vector):
     vector.get_value('exec_option')['enabled_runtime_filter_types'] = 'in_list'
@@ -378,7 +378,7 @@ class TestAllRuntimeFilters(ImpalaTestSuite):
       lambda v: v.get_value('table_format').file_format in ['kudu'])
     # Enable query option ASYNC_CODEGEN for slow build
     if build_runs_slowly:
-      add_exec_option_dimension(cls, "async_codegen", 1)
+      add_mandatory_exec_option(cls, "async_codegen", 1)
 
   def test_all_runtime_filters(self, vector):
     self.execute_query("SET ENABLED_RUNTIME_FILTER_TYPES=ALL")
@@ -410,7 +410,7 @@ class TestRuntimeRowFilters(ImpalaTestSuite):
     cls.ImpalaTestMatrix.add_dimension(ImpalaTestDimension('mt_dop', 0, 4))
     # Enable query option ASYNC_CODEGEN for slow build
     if build_runs_slowly:
-      add_exec_option_dimension(cls, "async_codegen", 1)
+      add_mandatory_exec_option(cls, "async_codegen", 1)
 
   def test_row_filters(self, vector):
     new_vector = deepcopy(vector)

(impala) branch master updated: IMPALA-12518: Combine all exec_option dimension in test_vector.py

Reply via email to