This is an automated email from the ASF dual-hosted git repository.
michaelsmith pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git
The following commit(s) were added to refs/heads/master by this push:
new feefcc639 IMPALA-12518: Combine all exec_option dimension in
test_vector.py
feefcc639 is described below
commit feefcc63957a929bce9cb45f935d592cd088e2d3
Author: Riza Suminto <[email protected]>
AuthorDate: Wed Oct 25 20:21:54 2023 -0700
IMPALA-12518: Combine all exec_option dimension in test_vector.py
Before this patch, when writing pytest that exercise custom query option
values, we need to declare it by making new test dimension, followed by
deepcopying the original vector, and inserting the selected dimension
value into 'exec_option' dictionary in generated vector.
This patch simplify this steps by accounting dimensions that is intended
to be part of 'exec_option' and automatically combining them during
vector generation in test_vector.py. Such dimension should be registered
via the new ImpalaTestMatrix.add_exec_option_dimension() function.
function add_exec_option_dimension() in test_dimensions.py is renamed to
add_mandatory_exec_option() to make it consistent with the same
functionality in ImpalaTestMatrix and avoid confusion with the new
ImpalaTestMatrix.add_exec_option_dimension() function. Function name
add_exec_option_dimension() in test_dimensions.py is then repurposed as
a shorthand for ImpalaTestMatrix.add_exec_option_dimension().
The remaining changes for other pytest files will be done gradually.
Testing:
- Fix bug in TestIcebergV2Table and confirm that both True and False
value for 'disable_optimized_iceberg_v2_read' options are exercised.
- Run and pass all modified tests in this patch.
Change-Id: I3adba260990fccf4d2f2e7c8c4e4fadc6fd43fe1
Reviewed-on: http://gerrit.cloudera.org:8080/20625
Reviewed-by: Zoltan Borok-Nagy <[email protected]>
Tested-by: Impala Public Jenkins <[email protected]>
Reviewed-by: Michael Smith <[email protected]>
---
tests/common/test_dimensions.py | 17 ++++++++--
tests/common/test_vector.py | 55 +++++++++++++++++++++++++++++---
tests/custom_cluster/test_kudu.py | 4 +--
tests/query_test/test_async_codegen.py | 4 +--
tests/query_test/test_iceberg.py | 5 +--
tests/query_test/test_kudu.py | 14 ++++----
tests/query_test/test_runtime_filters.py | 18 +++++------
7 files changed, 86 insertions(+), 31 deletions(-)
diff --git a/tests/common/test_dimensions.py b/tests/common/test_dimensions.py
index 9b957d3bb..aa2ce2db9 100644
--- a/tests/common/test_dimensions.py
+++ b/tests/common/test_dimensions.py
@@ -264,13 +264,24 @@ def
create_exec_option_dimension_from_dict(exec_option_dimensions):
# Build a test vector out of it
return ImpalaTestDimension('exec_option', *exec_option_dimension_values)
-def add_exec_option_dimension(test_suite, key, value):
+
+def add_exec_option_dimension(test_suite, key, values):
+ """
+ Takes an ImpalaTestSuite object 'test_suite' and register new exec option
dimension.
+ 'key' must be a query option known to Impala, and 'values' must be a list of
more than
+ one element.
+ """
+ test_suite.ImpalaTestMatrix.add_exec_option_dimension(
+ ImpalaTestDimension(key, *values))
+
+
+def add_mandatory_exec_option(test_suite, key, value):
"""
Takes an ImpalaTestSuite object 'test_suite' and adds 'key=value' to every
exec option
test dimension, leaving the number of tests that will be run unchanged.
"""
- for v in test_suite.ImpalaTestMatrix.dimensions["exec_option"]:
- v.value[key] = value
+ test_suite.ImpalaTestMatrix.add_mandatory_exec_option(key, value)
+
def extend_exec_option_dimension(test_suite, key, value):
"""
diff --git a/tests/common/test_vector.py b/tests/common/test_vector.py
index 182d962fe..8e0bebfd5 100644
--- a/tests/common/test_vector.py
+++ b/tests/common/test_vector.py
@@ -58,6 +58,10 @@
from __future__ import absolute_import, division, print_function
from itertools import product
+from copy import deepcopy
+
+EXEC_OPTION_KEY = 'exec_option'
+
# A list of test dimension values.
class ImpalaTestDimension(list):
@@ -104,19 +108,40 @@ class ImpalaTestMatrix(object):
def __init__(self, *args):
self.dimensions = dict((arg.name, arg) for arg in args)
self.constraint_list = list()
+ self.independent_exec_option_names = set()
def add_dimension(self, dimension):
self.dimensions[dimension.name] = dimension
def add_mandatory_exec_option(self, exec_option_key, exec_option_value):
- for vector in self.dimensions['exec_option']:
+ assert EXEC_OPTION_KEY in self.dimensions, (
+ "Must have '" + EXEC_OPTION_KEY + "' dimension previously declared!")
+ for vector in self.dimensions[EXEC_OPTION_KEY]:
vector.value[exec_option_key] = exec_option_value
+ def add_exec_option_dimension(self, dimension):
+ """
+ Add 'dimension' into 'self.dimensions' and memorize the name.
+ During vector generation, all dimensions registered through this method
will be
+ embedded into 'exec_option' dimension. This is intended to maintain
pairwise and
+ exhaustive combination correct while eliminating the need for individual
test method
+ to append the custom exec options into 'exec_option' dimension themself.
+ """
+ assert EXEC_OPTION_KEY in self.dimensions, (
+ "Must have '" + EXEC_OPTION_KEY + "' dimension previously declared!")
+ assert len(dimension) > 1, (
+ "Dimension " + dimension.name + " must have more than 1 possible value! "
+ "Use add_mandatory_exec_option() instead.")
+ self.add_dimension(dimension)
+ self.independent_exec_option_names.add(dimension.name)
+
def clear(self):
self.dimensions.clear()
+ self.independent_exec_option_names = set()
def clear_dimension(self, dimension_name):
del self.dimensions[dimension_name]
+ self.independent_exec_option_names.discard(dimension_name)
def has_dimension(self, dimension_name):
return dimension_name in self.dimensions
@@ -132,9 +157,29 @@ class ImpalaTestMatrix(object):
else:
raise ValueError('Unknown exploration strategy: %s' %
exploration_strategy)
+ def embed_independent_exec_options(self, vector_values):
+ if not self.independent_exec_option_names:
+ return vector_values
+ values = []
+ exec_values = []
+ exec_option = None
+ for val in vector_values:
+ if val.name == EXEC_OPTION_KEY:
+ exec_option = deepcopy(val.value)
+ elif val.name in self.independent_exec_option_names:
+ exec_values.append(val)
+ else:
+ values.append(val)
+ assert exec_option is not None, (
+ "Must have '" + EXEC_OPTION_KEY + "' dimension previously declared!")
+ for val in exec_values:
+ exec_option[val.name] = val.value
+ values.append(ImpalaTestVector.Value(EXEC_OPTION_KEY, exec_option))
+ return values
+
def __generate_exhaustive_combinations(self):
- return [ImpalaTestVector(vec) for vec in
product(*self.__extract_vector_values())
- if self.is_valid(vec)]
+ return [ImpalaTestVector(self.embed_independent_exec_options(vec))
+ for vec in product(*self.__extract_vector_values()) if
self.is_valid(vec)]
def __generate_pairwise_combinations(self):
from allpairspy import AllPairs
@@ -144,8 +189,8 @@ class ImpalaTestMatrix(object):
# results will be the same.
if len(self.dimensions) == 1:
return self.__generate_exhaustive_combinations()
- return [ImpalaTestVector(vec) for vec in
all_pairs(self.__extract_vector_values(),
- filter_func = self.is_valid)]
+ return [ImpalaTestVector(self.embed_independent_exec_options(vec))
+ for vec in all_pairs(self.__extract_vector_values(),
filter_func=self.is_valid)]
def add_constraint(self, constraint_func):
self.constraint_list.append(constraint_func)
diff --git a/tests/custom_cluster/test_kudu.py
b/tests/custom_cluster/test_kudu.py
index 49c4c4ce1..36bf55f0f 100644
--- a/tests/custom_cluster/test_kudu.py
+++ b/tests/custom_cluster/test_kudu.py
@@ -27,7 +27,7 @@ from tests.beeswax.impala_beeswax import
ImpalaBeeswaxException
from tests.common.custom_cluster_test_suite import CustomClusterTestSuite
from tests.common.kudu_test_suite import KuduTestSuite
from tests.common.skip import SkipIfKudu, SkipIfBuildType, SkipIf
-from tests.common.test_dimensions import add_exec_option_dimension
+from tests.common.test_dimensions import add_mandatory_exec_option
from tests.util.event_processor_utils import EventProcessorUtils
KUDU_MASTER_HOSTS = pytest.config.option.kudu_master_hosts
@@ -63,7 +63,7 @@ class TestKuduOperations(CustomKuduTest):
super(TestKuduOperations, cls).add_test_dimensions()
# The default read mode of READ_LATEST does not provide high enough
consistency for
# these tests.
- add_exec_option_dimension(cls, "kudu_read_mode", "READ_AT_SNAPSHOT")
+ add_mandatory_exec_option(cls, "kudu_read_mode", "READ_AT_SNAPSHOT")
@pytest.mark.execute_serially
@CustomClusterTestSuite.with_args(impalad_args=\
diff --git a/tests/query_test/test_async_codegen.py
b/tests/query_test/test_async_codegen.py
index acc073f24..80e1612e9 100644
--- a/tests/query_test/test_async_codegen.py
+++ b/tests/query_test/test_async_codegen.py
@@ -19,7 +19,7 @@ from __future__ import absolute_import, division,
print_function
import pytest
from tests.common.impala_test_suite import ImpalaTestSuite
-from tests.common.test_dimensions import add_exec_option_dimension
+from tests.common.test_dimensions import add_mandatory_exec_option
from tests.common.test_dimensions import create_exec_option_dimension
from tests.common.test_result_verifier import extract_event_sequence
@@ -90,7 +90,7 @@ limit 10;
debug_action_options=[cls.DEBUG_ACTION_CODEGEN_FINISH_BEFORE_EXEC_START,
cls.DEBUG_ACTION_CODEGEN_FINISH_DURING_EXEC,
cls.DEBUG_ACTION_EXEC_FINISH_BEFORE_CODEGEN]))
- add_exec_option_dimension(cls, "async_codegen", 1)
+ add_mandatory_exec_option(cls, "async_codegen", 1)
cls.ImpalaTestMatrix.add_constraint(lambda vector:
cls.__is_valid_test_vector(vector))
diff --git a/tests/query_test/test_iceberg.py b/tests/query_test/test_iceberg.py
index c47772bf4..f24ae2170 100644
--- a/tests/query_test/test_iceberg.py
+++ b/tests/query_test/test_iceberg.py
@@ -38,6 +38,7 @@ import json
from tests.beeswax.impala_beeswax import ImpalaBeeswaxException
from tests.common.iceberg_test_suite import IcebergTestSuite
from tests.common.skip import SkipIf, SkipIfFS, SkipIfDockerizedCluster
+from tests.common.test_dimensions import add_exec_option_dimension
from tests.common.test_vector import ImpalaTestDimension
from tests.common.file_utils import (
create_iceberg_table_from_directory,
@@ -1185,8 +1186,8 @@ class TestIcebergV2Table(IcebergTestSuite):
super(TestIcebergV2Table, cls).add_test_dimensions()
cls.ImpalaTestMatrix.add_constraint(
lambda v: v.get_value('table_format').file_format == 'parquet')
- cls.ImpalaTestMatrix.add_dimension(ImpalaTestDimension(
- 'disable_optimized_iceberg_v2_read', 0, 1))
+ add_exec_option_dimension(cls, 'disable_optimized_iceberg_v2_read', [0, 1])
+
# The test uses pre-written Iceberg tables where the position delete files
refer to
# the data files via full URI, i.e. they start with
'hdfs://localhost:2050/...'. In the
# dockerised environment the namenode is accessible on a different
hostname/port.
diff --git a/tests/query_test/test_kudu.py b/tests/query_test/test_kudu.py
index be3de8a58..1472bd130 100644
--- a/tests/query_test/test_kudu.py
+++ b/tests/query_test/test_kudu.py
@@ -46,7 +46,7 @@ from tests.common.kudu_test_suite import KuduTestSuite
from tests.common.impala_cluster import ImpalaCluster
from tests.common.skip import SkipIfNotHdfsMinicluster, SkipIfKudu, SkipIfHive2
from tests.common.test_dimensions import (add_exec_option_dimension,
- extend_exec_option_dimension)
+ add_mandatory_exec_option)
from tests.verifiers.metric_verifier import MetricVerifier
KUDU_MASTER_HOSTS = pytest.config.option.kudu_master_hosts
@@ -65,11 +65,10 @@ class TestKuduBasicDML(KuduTestSuite):
super(TestKuduBasicDML, cls).add_test_dimensions()
# The default read mode of READ_LATEST does not provide high enough
consistency for
# these tests.
- add_exec_option_dimension(cls, "kudu_read_mode", "READ_AT_SNAPSHOT")
+ add_mandatory_exec_option(cls, "kudu_read_mode", "READ_AT_SNAPSHOT")
# Run with and without multithreading to ensure Kudu DML works with both
threading
# models. E.g. see IMPALA-9782.
- add_exec_option_dimension(cls, "mt_dop", "0")
- extend_exec_option_dimension(cls, "mt_dop", "4")
+ add_exec_option_dimension(cls, "mt_dop", [0, 4])
@SkipIfKudu.no_hybrid_clock
def test_kudu_insert(self, vector, unique_database):
@@ -105,7 +104,7 @@ class TestKuduOperations(KuduTestSuite):
super(TestKuduOperations, cls).add_test_dimensions()
# The default read mode of READ_LATEST does not provide high enough
consistency for
# these tests.
- add_exec_option_dimension(cls, "kudu_read_mode", "READ_AT_SNAPSHOT")
+ add_mandatory_exec_option(cls, "kudu_read_mode", "READ_AT_SNAPSHOT")
@SkipIfKudu.no_hybrid_clock
@SkipIfKudu.hms_integration_enabled
@@ -584,8 +583,7 @@ class TestKuduPartitioning(KuduTestSuite):
super(TestKuduPartitioning, cls).add_test_dimensions()
# Test both the interpreted and the codegen'd path.
- add_exec_option_dimension(cls, "disable_codegen", "0")
- extend_exec_option_dimension(cls, "disable_codegen", "1")
+ add_exec_option_dimension(cls, "disable_codegen", [0, 1])
def test_partitions_evenly_distributed(self, vector, cursor,
kudu_client, unique_database):
@@ -1524,7 +1522,7 @@ class TestKuduReadTokenSplit(KuduTestSuite):
super(TestKuduReadTokenSplit, cls).add_test_dimensions()
# The default read mode of READ_LATEST does not provide high enough
consistency for
# these tests.
- add_exec_option_dimension(cls, "kudu_read_mode", "READ_AT_SNAPSHOT")
+ add_mandatory_exec_option(cls, "kudu_read_mode", "READ_AT_SNAPSHOT")
@SkipIfKudu.no_hybrid_clock
@SkipIfNotHdfsMinicluster.tuned_for_minicluster
diff --git a/tests/query_test/test_runtime_filters.py
b/tests/query_test/test_runtime_filters.py
index 4be4305af..87f299d0e 100644
--- a/tests/query_test/test_runtime_filters.py
+++ b/tests/query_test/test_runtime_filters.py
@@ -27,7 +27,7 @@ from tests.common.environ import build_flavor_timeout,
ImpalaTestClusterProperti
from tests.common.impala_cluster import ImpalaCluster
from tests.common.impala_test_suite import ImpalaTestSuite
from tests.common.skip import SkipIfEC, SkipIfLocal, SkipIfFS
-from tests.common.test_dimensions import add_exec_option_dimension
+from tests.common.test_dimensions import add_mandatory_exec_option
from tests.common.test_vector import ImpalaTestDimension
from tests.verifiers.metric_verifier import MetricVerifier
from tests.util.filesystem_utils import WAREHOUSE
@@ -70,7 +70,7 @@ class TestRuntimeFilters(ImpalaTestSuite):
or v.get_value('mt_dop') == 0)
# Enable query option ASYNC_CODEGEN for slow build
if build_runs_slowly:
- add_exec_option_dimension(cls, "async_codegen", 1)
+ add_mandatory_exec_option(cls, "async_codegen", 1)
def test_basic_filters(self, vector):
new_vector = deepcopy(vector)
@@ -184,7 +184,7 @@ class TestBloomFilters(ImpalaTestSuite):
lambda v: v.get_value('table_format').file_format not in ['hbase'])
# Enable query option ASYNC_CODEGEN for slow build
if build_runs_slowly:
- add_exec_option_dimension(cls, "async_codegen", 1)
+ add_mandatory_exec_option(cls, "async_codegen", 1)
def test_bloom_filters(self, vector):
vector.get_value('exec_option')['ENABLED_RUNTIME_FILTER_TYPES'] = 'BLOOM'
@@ -226,11 +226,11 @@ class TestMinMaxFilters(ImpalaTestSuite):
lambda v: v.get_value('table_format').file_format in ['kudu'])
# Enable query option ASYNC_CODEGEN for slow build
if build_runs_slowly:
- add_exec_option_dimension(cls, "async_codegen", 1)
+ add_mandatory_exec_option(cls, "async_codegen", 1)
# IMPALA-10715. Enable only min/max since the bloom filters will return
# rows only satisfying the join predicates. This test requires the return
# of non-qualifying rows to succeed.
- add_exec_option_dimension(cls, "ENABLED_RUNTIME_FILTER_TYPES", "MIN_MAX")
+ add_mandatory_exec_option(cls, "ENABLED_RUNTIME_FILTER_TYPES", "MIN_MAX")
def test_min_max_filters(self, vector):
self.execute_query("SET MINMAX_FILTER_THRESHOLD=0.5")
@@ -303,7 +303,7 @@ class TestOverlapMinMaxFilters(ImpalaTestSuite):
lambda v: v.get_value('table_format').file_format in ['parquet'])
# Enable query option ASYNC_CODEGEN for slow build
if build_runs_slowly:
- add_exec_option_dimension(cls, "async_codegen", 1)
+ add_mandatory_exec_option(cls, "async_codegen", 1)
def test_overlap_min_max_filters(self, vector, unique_database):
self.execute_query("SET MINMAX_FILTER_THRESHOLD=0.5")
@@ -356,7 +356,7 @@ class TestInListFilters(ImpalaTestSuite):
lambda v: v.get_value('table_format').file_format in ['orc'])
# Enable query option ASYNC_CODEGEN for slow build
if build_runs_slowly:
- add_exec_option_dimension(cls, "async_codegen", 1)
+ add_mandatory_exec_option(cls, "async_codegen", 1)
def test_in_list_filters(self, vector):
vector.get_value('exec_option')['enabled_runtime_filter_types'] = 'in_list'
@@ -378,7 +378,7 @@ class TestAllRuntimeFilters(ImpalaTestSuite):
lambda v: v.get_value('table_format').file_format in ['kudu'])
# Enable query option ASYNC_CODEGEN for slow build
if build_runs_slowly:
- add_exec_option_dimension(cls, "async_codegen", 1)
+ add_mandatory_exec_option(cls, "async_codegen", 1)
def test_all_runtime_filters(self, vector):
self.execute_query("SET ENABLED_RUNTIME_FILTER_TYPES=ALL")
@@ -410,7 +410,7 @@ class TestRuntimeRowFilters(ImpalaTestSuite):
cls.ImpalaTestMatrix.add_dimension(ImpalaTestDimension('mt_dop', 0, 4))
# Enable query option ASYNC_CODEGEN for slow build
if build_runs_slowly:
- add_exec_option_dimension(cls, "async_codegen", 1)
+ add_mandatory_exec_option(cls, "async_codegen", 1)
def test_row_filters(self, vector):
new_vector = deepcopy(vector)