(impala) 01/02: IMPALA-13341: Fix mismatch exec option values in py.test files

michaelsmith Tue, 03 Sep 2024 15:20:12 -0700

This is an automated email from the ASF dual-hosted git repository.

michaelsmith pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git


commit 2db68dff4870cc7f5c5b40388fdaf0eb0fdcf617
Author: Riza Suminto <[email protected]>
AuthorDate: Wed Aug 28 15:34:23 2024 -0700

    IMPALA-13341: Fix mismatch exec option values in py.test files
    
    IMPALA-13323 added WARNING log if, for independently declared
    query option 'key',
    vector.get_value('exec_option')['key'] != vector.get_value('key').
    
    This patch eliminate such WARNING logs by fixing exec option declaration
    in test_mem_usage_scaling.py and test_update_stress.py, the only test
    producing the WARNING log. Here are the summary of the patch:
    
    - Declare 'mem_limit' using add_exec_option_dimension helper function in
      TestQueryMemLimitScaling so that 'mem_limit' dimension is not
      silently ignored.
    - Declare 'batch_size' using create_exec_option_dimension helper
      function in TestIcebergV2UpdateStress to override the default
      'exec_option' dimension (containing batch_size=0) that initialized by
      ImpalaTestSuite.add_test_dimensions().
    - Rename 'mem_limit' dimension to 'test_mem_limit' dimension for
      subclasses of TestLowMemoryLimits. The final 'mem_limit' option
      is still calculated from 'test_mem_limit' dimension.
    - Change the LOG.warn() into pytest.fail() to prevent new tests from
      repeating the same issue.
    - Address few flake8 warnings and errors.
    
    Testing:
    - Pass exhaustive tests for test_mem_usage_scaling.py and
      test_update_stress.py.
    
    Change-Id: Ic34187782c51c6d6fc0a688c9c5f72bf0cb2d45c
    Reviewed-on: http://gerrit.cloudera.org:8080/21733
    Reviewed-by: Impala Public Jenkins <[email protected]>
    Tested-by: Impala Public Jenkins <[email protected]>
---
 tests/common/impala_test_suite.py          |  2 +-
 tests/query_test/test_mem_usage_scaling.py | 37 +++++++++++++++---------------
 tests/stress/test_update_stress.py         | 14 +++++------
 3 files changed, 26 insertions(+), 27 deletions(-)

diff --git a/tests/common/impala_test_suite.py 
b/tests/common/impala_test_suite.py
index 14c378ab0..1b8994699 100644
--- a/tests/common/impala_test_suite.py
+++ b/tests/common/impala_test_suite.py
@@ -1376,7 +1376,7 @@ class ImpalaTestSuite(BaseTestSuite):
         LOG.warn("Exec option {} declared as independent dimension but not 
inserted "
                  "into {} dimension.".format(name, EXEC_OPTION_KEY))
       elif vector.get_value(name) != exec_option[name]:
-        LOG.warn("{}[{}]={} does not match against dimension {}={}.".format(
+        pytest.fail("{}[{}]={} does not match against dimension {}={}.".format(
           EXEC_OPTION_KEY, name, exec_option[name], name, 
vector.get_value(name)))
 
   @staticmethod
diff --git a/tests/query_test/test_mem_usage_scaling.py 
b/tests/query_test/test_mem_usage_scaling.py
index 180f515ef..ab4e1af32 100644
--- a/tests/query_test/test_mem_usage_scaling.py
+++ b/tests/query_test/test_mem_usage_scaling.py
@@ -31,7 +31,9 @@ from tests.common.skip import (
   SkipIfFS,
   SkipIf,
   SkipIfDockerizedCluster)
-from tests.common.test_dimensions import create_single_exec_option_dimension
+from tests.common.test_dimensions import (
+    add_exec_option_dimension,
+    create_single_exec_option_dimension)
 from tests.common.test_vector import ImpalaTestDimension
 from tests.verifiers.metric_verifier import MetricVerifier
 
@@ -41,6 +43,7 @@ MEM_LIMIT_TOO_LOW_FOR_RESERVATION = ("minimum memory 
reservation is greater than
   "available to the query for buffer reservations")
 MEM_LIMIT_ERROR_MSGS = [MEM_LIMIT_EXCEEDED_MSG, 
MEM_LIMIT_TOO_LOW_FOR_RESERVATION]
 
+
 @SkipIfNotHdfsMinicluster.tuned_for_minicluster
 class TestQueryMemLimitScaling(ImpalaTestSuite):
   """Test class to do functional validation of per query memory limits. """
@@ -61,20 +64,16 @@ class TestQueryMemLimitScaling(ImpalaTestSuite):
   def add_test_dimensions(cls):
     super(TestQueryMemLimitScaling, cls).add_test_dimensions()
     # add mem_limit as a test dimension.
-    new_dimension = ImpalaTestDimension('mem_limit',
-                                        *TestQueryMemLimitScaling.MEM_LIMITS)
-    cls.ImpalaTestMatrix.add_dimension(new_dimension)
+    add_exec_option_dimension(cls, 'mem_limit', 
TestQueryMemLimitScaling.MEM_LIMITS)
     if cls.exploration_strategy() != 'exhaustive':
-      cls.ImpalaTestMatrix.add_constraint(lambda v:\
+      cls.ImpalaTestMatrix.add_constraint(lambda v:
           v.get_value('table_format').file_format in ['parquet'])
 
   # Test running with different mem limits to exercise the dynamic memory
   # scaling functionality.
   def test_mem_usage_scaling(self, vector):
-    mem_limit = copy(vector.get_value('mem_limit'))
     table_format = vector.get_value('table_format')
     exec_options = copy(vector.get_value('exec_option'))
-    exec_options['mem_limit'] = mem_limit
     for query in self.QUERY:
       self.execute_query(query, exec_options, table_format=table_format)
 
@@ -91,7 +90,7 @@ class TestExprMemUsage(ImpalaTestSuite):
     super(TestExprMemUsage, cls).add_test_dimensions()
     cls.ImpalaTestMatrix.add_dimension(create_single_exec_option_dimension())
     if cls.exploration_strategy() != 'exhaustive':
-      cls.ImpalaTestMatrix.add_constraint(lambda v:\
+      cls.ImpalaTestMatrix.add_constraint(lambda v:
           v.get_value('table_format').file_format in ['parquet'])
 
   def test_scanner_mem_usage(self, vector):
@@ -102,11 +101,13 @@ class TestExprMemUsage(ImpalaTestSuite):
       "select count(*) from lineitem where lower(l_comment) = 'hello'", 
exec_options,
       table_format=vector.get_value('table_format'))
 
+
 class TestLowMemoryLimits(ImpalaTestSuite):
   '''Super class for the memory limit tests with the TPC-H and TPC-DS 
queries'''
 
   def low_memory_limit_test(self, vector, tpch_query, limit):
-    mem = vector.get_value('mem_limit')
+    # 'test_mem_limit' dimension is defined by subclasses of 
TestLowMemoryLimits.
+    mem = vector.get_value('test_mem_limit')
     # Mem consumption can be +-30MBs, depending on how many scanner threads are
     # running. Adding this extra mem in order to reduce false negatives in the 
tests.
     limit = limit + 30
@@ -155,9 +156,9 @@ class TestTpchMemLimitError(TestLowMemoryLimits):
     super(TestTpchMemLimitError, cls).add_test_dimensions()
 
     cls.ImpalaTestMatrix.add_dimension(
-      ImpalaTestDimension('mem_limit', *TestTpchMemLimitError.MEM_IN_MB))
+      ImpalaTestDimension('test_mem_limit', *TestTpchMemLimitError.MEM_IN_MB))
 
-    cls.ImpalaTestMatrix.add_constraint(lambda v:\
+    cls.ImpalaTestMatrix.add_constraint(lambda v:
         v.get_value('table_format').file_format in ['parquet'])
 
   def test_low_mem_limit_q1(self, vector):
@@ -247,8 +248,8 @@ class TestTpchPrimitivesMemLimitError(TestLowMemoryLimits):
 
   # Different values of mem limits and minimum mem limit (in MBs) each query 
is expected
   # to run without problem. Determined by manual binary search.
-  MIN_MEM = { 'primitive_broadcast_join_3': 115, 
'primitive_groupby_bigint_highndv': 110,
-              'primitive_orderby_all': 120}
+  MIN_MEM = {'primitive_broadcast_join_3': 115, 
'primitive_groupby_bigint_highndv': 110,
+      'primitive_orderby_all': 120}
 
   @classmethod
   def get_workload(self):
@@ -261,9 +262,9 @@ class TestTpchPrimitivesMemLimitError(TestLowMemoryLimits):
     super(TestTpchPrimitivesMemLimitError, cls).add_test_dimensions()
 
     cls.ImpalaTestMatrix.add_dimension(
-      ImpalaTestDimension('mem_limit', *cls.MEM_IN_MB))
+      ImpalaTestDimension('test_mem_limit', *cls.MEM_IN_MB))
 
-    cls.ImpalaTestMatrix.add_constraint(lambda v:\
+    cls.ImpalaTestMatrix.add_constraint(lambda v:
         v.get_value('table_format').file_format in ['parquet'])
 
   def run_primitive_query(self, vector, query_name):
@@ -289,7 +290,7 @@ class TestTpcdsMemLimitError(TestLowMemoryLimits):
 
   # Different values of mem limits and minimum mem limit (in MBs) each query 
is expected
   # to run without problem. Those values were determined by manual testing.
-  MIN_MEM_FOR_TPCDS = { 'q53' : 116}
+  MIN_MEM_FOR_TPCDS = {'q53': 116}
 
   @classmethod
   def get_workload(self):
@@ -302,9 +303,9 @@ class TestTpcdsMemLimitError(TestLowMemoryLimits):
     super(TestTpcdsMemLimitError, cls).add_test_dimensions()
 
     cls.ImpalaTestMatrix.add_dimension(
-      ImpalaTestDimension('mem_limit', *TestTpcdsMemLimitError.MEM_IN_MB))
+      ImpalaTestDimension('test_mem_limit', *TestTpcdsMemLimitError.MEM_IN_MB))
 
-    cls.ImpalaTestMatrix.add_constraint(lambda v:\
+    cls.ImpalaTestMatrix.add_constraint(lambda v:
         v.get_value('table_format').file_format in ['parquet'])
 
   def test_low_mem_limit_q53(self, vector):
diff --git a/tests/stress/test_update_stress.py 
b/tests/stress/test_update_stress.py
index 35f380e31..704fbb5a3 100644
--- a/tests/stress/test_update_stress.py
+++ b/tests/stress/test_update_stress.py
@@ -24,7 +24,7 @@ from multiprocessing import Value
 
 from tests.common.impala_test_suite import ImpalaTestSuite
 from tests.common.parametrize import UniqueDatabase
-from tests.common.test_vector import ImpalaTestDimension
+from tests.common.test_dimensions import create_exec_option_dimension
 from tests.stress.stress_util import run_tasks, Task
 from tests.util.filesystem_utils import IS_HDFS
 
@@ -45,13 +45,11 @@ class TestIcebergV2UpdateStress(ImpalaTestSuite):
     super(TestIcebergV2UpdateStress, cls).add_test_dimensions()
     cls.ImpalaTestMatrix.add_constraint(
       lambda v: v.get_value('table_format').file_format == 'parquet')
-    if cls.exploration_strategy() == 'core':
-      cls.ImpalaTestMatrix.add_dimension(
-        ImpalaTestDimension('batch_size', 
*TestIcebergV2UpdateStress.BATCH_SIZES))
-    else:
-      cls.ImpalaTestMatrix.add_dimension(
-        ImpalaTestDimension('batch_size',
-            *TestIcebergV2UpdateStress.EXHAUSTIVE_BATCH_SIZES))
+    batch_sizes = (TestIcebergV2UpdateStress.BATCH_SIZES
+        if cls.exploration_strategy() == 'core'
+        else TestIcebergV2UpdateStress.EXHAUSTIVE_BATCH_SIZES)
+    cls.ImpalaTestMatrix.add_dimension(create_exec_option_dimension(
+      batch_sizes=batch_sizes))
 
   def test_update_stress(self, vector, unique_database):
     self.run_test_case('QueryTest/iceberg-update-stress', vector,

(impala) 01/02: IMPALA-13341: Fix mismatch exec option values in py.test files

Reply via email to