This is an automated email from the ASF dual-hosted git repository. michaelsmith pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/impala.git
commit 2db68dff4870cc7f5c5b40388fdaf0eb0fdcf617 Author: Riza Suminto <[email protected]> AuthorDate: Wed Aug 28 15:34:23 2024 -0700 IMPALA-13341: Fix mismatch exec option values in py.test files IMPALA-13323 added WARNING log if, for independently declared query option 'key', vector.get_value('exec_option')['key'] != vector.get_value('key'). This patch eliminate such WARNING logs by fixing exec option declaration in test_mem_usage_scaling.py and test_update_stress.py, the only test producing the WARNING log. Here are the summary of the patch: - Declare 'mem_limit' using add_exec_option_dimension helper function in TestQueryMemLimitScaling so that 'mem_limit' dimension is not silently ignored. - Declare 'batch_size' using create_exec_option_dimension helper function in TestIcebergV2UpdateStress to override the default 'exec_option' dimension (containing batch_size=0) that initialized by ImpalaTestSuite.add_test_dimensions(). - Rename 'mem_limit' dimension to 'test_mem_limit' dimension for subclasses of TestLowMemoryLimits. The final 'mem_limit' option is still calculated from 'test_mem_limit' dimension. - Change the LOG.warn() into pytest.fail() to prevent new tests from repeating the same issue. - Address few flake8 warnings and errors. Testing: - Pass exhaustive tests for test_mem_usage_scaling.py and test_update_stress.py. Change-Id: Ic34187782c51c6d6fc0a688c9c5f72bf0cb2d45c Reviewed-on: http://gerrit.cloudera.org:8080/21733 Reviewed-by: Impala Public Jenkins <[email protected]> Tested-by: Impala Public Jenkins <[email protected]> --- tests/common/impala_test_suite.py | 2 +- tests/query_test/test_mem_usage_scaling.py | 37 +++++++++++++++--------------- tests/stress/test_update_stress.py | 14 +++++------ 3 files changed, 26 insertions(+), 27 deletions(-) diff --git a/tests/common/impala_test_suite.py b/tests/common/impala_test_suite.py index 14c378ab0..1b8994699 100644 --- a/tests/common/impala_test_suite.py +++ b/tests/common/impala_test_suite.py @@ -1376,7 +1376,7 @@ class ImpalaTestSuite(BaseTestSuite): LOG.warn("Exec option {} declared as independent dimension but not inserted " "into {} dimension.".format(name, EXEC_OPTION_KEY)) elif vector.get_value(name) != exec_option[name]: - LOG.warn("{}[{}]={} does not match against dimension {}={}.".format( + pytest.fail("{}[{}]={} does not match against dimension {}={}.".format( EXEC_OPTION_KEY, name, exec_option[name], name, vector.get_value(name))) @staticmethod diff --git a/tests/query_test/test_mem_usage_scaling.py b/tests/query_test/test_mem_usage_scaling.py index 180f515ef..ab4e1af32 100644 --- a/tests/query_test/test_mem_usage_scaling.py +++ b/tests/query_test/test_mem_usage_scaling.py @@ -31,7 +31,9 @@ from tests.common.skip import ( SkipIfFS, SkipIf, SkipIfDockerizedCluster) -from tests.common.test_dimensions import create_single_exec_option_dimension +from tests.common.test_dimensions import ( + add_exec_option_dimension, + create_single_exec_option_dimension) from tests.common.test_vector import ImpalaTestDimension from tests.verifiers.metric_verifier import MetricVerifier @@ -41,6 +43,7 @@ MEM_LIMIT_TOO_LOW_FOR_RESERVATION = ("minimum memory reservation is greater than "available to the query for buffer reservations") MEM_LIMIT_ERROR_MSGS = [MEM_LIMIT_EXCEEDED_MSG, MEM_LIMIT_TOO_LOW_FOR_RESERVATION] + @SkipIfNotHdfsMinicluster.tuned_for_minicluster class TestQueryMemLimitScaling(ImpalaTestSuite): """Test class to do functional validation of per query memory limits. """ @@ -61,20 +64,16 @@ class TestQueryMemLimitScaling(ImpalaTestSuite): def add_test_dimensions(cls): super(TestQueryMemLimitScaling, cls).add_test_dimensions() # add mem_limit as a test dimension. - new_dimension = ImpalaTestDimension('mem_limit', - *TestQueryMemLimitScaling.MEM_LIMITS) - cls.ImpalaTestMatrix.add_dimension(new_dimension) + add_exec_option_dimension(cls, 'mem_limit', TestQueryMemLimitScaling.MEM_LIMITS) if cls.exploration_strategy() != 'exhaustive': - cls.ImpalaTestMatrix.add_constraint(lambda v:\ + cls.ImpalaTestMatrix.add_constraint(lambda v: v.get_value('table_format').file_format in ['parquet']) # Test running with different mem limits to exercise the dynamic memory # scaling functionality. def test_mem_usage_scaling(self, vector): - mem_limit = copy(vector.get_value('mem_limit')) table_format = vector.get_value('table_format') exec_options = copy(vector.get_value('exec_option')) - exec_options['mem_limit'] = mem_limit for query in self.QUERY: self.execute_query(query, exec_options, table_format=table_format) @@ -91,7 +90,7 @@ class TestExprMemUsage(ImpalaTestSuite): super(TestExprMemUsage, cls).add_test_dimensions() cls.ImpalaTestMatrix.add_dimension(create_single_exec_option_dimension()) if cls.exploration_strategy() != 'exhaustive': - cls.ImpalaTestMatrix.add_constraint(lambda v:\ + cls.ImpalaTestMatrix.add_constraint(lambda v: v.get_value('table_format').file_format in ['parquet']) def test_scanner_mem_usage(self, vector): @@ -102,11 +101,13 @@ class TestExprMemUsage(ImpalaTestSuite): "select count(*) from lineitem where lower(l_comment) = 'hello'", exec_options, table_format=vector.get_value('table_format')) + class TestLowMemoryLimits(ImpalaTestSuite): '''Super class for the memory limit tests with the TPC-H and TPC-DS queries''' def low_memory_limit_test(self, vector, tpch_query, limit): - mem = vector.get_value('mem_limit') + # 'test_mem_limit' dimension is defined by subclasses of TestLowMemoryLimits. + mem = vector.get_value('test_mem_limit') # Mem consumption can be +-30MBs, depending on how many scanner threads are # running. Adding this extra mem in order to reduce false negatives in the tests. limit = limit + 30 @@ -155,9 +156,9 @@ class TestTpchMemLimitError(TestLowMemoryLimits): super(TestTpchMemLimitError, cls).add_test_dimensions() cls.ImpalaTestMatrix.add_dimension( - ImpalaTestDimension('mem_limit', *TestTpchMemLimitError.MEM_IN_MB)) + ImpalaTestDimension('test_mem_limit', *TestTpchMemLimitError.MEM_IN_MB)) - cls.ImpalaTestMatrix.add_constraint(lambda v:\ + cls.ImpalaTestMatrix.add_constraint(lambda v: v.get_value('table_format').file_format in ['parquet']) def test_low_mem_limit_q1(self, vector): @@ -247,8 +248,8 @@ class TestTpchPrimitivesMemLimitError(TestLowMemoryLimits): # Different values of mem limits and minimum mem limit (in MBs) each query is expected # to run without problem. Determined by manual binary search. - MIN_MEM = { 'primitive_broadcast_join_3': 115, 'primitive_groupby_bigint_highndv': 110, - 'primitive_orderby_all': 120} + MIN_MEM = {'primitive_broadcast_join_3': 115, 'primitive_groupby_bigint_highndv': 110, + 'primitive_orderby_all': 120} @classmethod def get_workload(self): @@ -261,9 +262,9 @@ class TestTpchPrimitivesMemLimitError(TestLowMemoryLimits): super(TestTpchPrimitivesMemLimitError, cls).add_test_dimensions() cls.ImpalaTestMatrix.add_dimension( - ImpalaTestDimension('mem_limit', *cls.MEM_IN_MB)) + ImpalaTestDimension('test_mem_limit', *cls.MEM_IN_MB)) - cls.ImpalaTestMatrix.add_constraint(lambda v:\ + cls.ImpalaTestMatrix.add_constraint(lambda v: v.get_value('table_format').file_format in ['parquet']) def run_primitive_query(self, vector, query_name): @@ -289,7 +290,7 @@ class TestTpcdsMemLimitError(TestLowMemoryLimits): # Different values of mem limits and minimum mem limit (in MBs) each query is expected # to run without problem. Those values were determined by manual testing. - MIN_MEM_FOR_TPCDS = { 'q53' : 116} + MIN_MEM_FOR_TPCDS = {'q53': 116} @classmethod def get_workload(self): @@ -302,9 +303,9 @@ class TestTpcdsMemLimitError(TestLowMemoryLimits): super(TestTpcdsMemLimitError, cls).add_test_dimensions() cls.ImpalaTestMatrix.add_dimension( - ImpalaTestDimension('mem_limit', *TestTpcdsMemLimitError.MEM_IN_MB)) + ImpalaTestDimension('test_mem_limit', *TestTpcdsMemLimitError.MEM_IN_MB)) - cls.ImpalaTestMatrix.add_constraint(lambda v:\ + cls.ImpalaTestMatrix.add_constraint(lambda v: v.get_value('table_format').file_format in ['parquet']) def test_low_mem_limit_q53(self, vector): diff --git a/tests/stress/test_update_stress.py b/tests/stress/test_update_stress.py index 35f380e31..704fbb5a3 100644 --- a/tests/stress/test_update_stress.py +++ b/tests/stress/test_update_stress.py @@ -24,7 +24,7 @@ from multiprocessing import Value from tests.common.impala_test_suite import ImpalaTestSuite from tests.common.parametrize import UniqueDatabase -from tests.common.test_vector import ImpalaTestDimension +from tests.common.test_dimensions import create_exec_option_dimension from tests.stress.stress_util import run_tasks, Task from tests.util.filesystem_utils import IS_HDFS @@ -45,13 +45,11 @@ class TestIcebergV2UpdateStress(ImpalaTestSuite): super(TestIcebergV2UpdateStress, cls).add_test_dimensions() cls.ImpalaTestMatrix.add_constraint( lambda v: v.get_value('table_format').file_format == 'parquet') - if cls.exploration_strategy() == 'core': - cls.ImpalaTestMatrix.add_dimension( - ImpalaTestDimension('batch_size', *TestIcebergV2UpdateStress.BATCH_SIZES)) - else: - cls.ImpalaTestMatrix.add_dimension( - ImpalaTestDimension('batch_size', - *TestIcebergV2UpdateStress.EXHAUSTIVE_BATCH_SIZES)) + batch_sizes = (TestIcebergV2UpdateStress.BATCH_SIZES + if cls.exploration_strategy() == 'core' + else TestIcebergV2UpdateStress.EXHAUSTIVE_BATCH_SIZES) + cls.ImpalaTestMatrix.add_dimension(create_exec_option_dimension( + batch_sizes=batch_sizes)) def test_update_stress(self, vector, unique_database): self.run_test_case('QueryTest/iceberg-update-stress', vector,
