IMPALA-6845: TestHdfsQueries causes some tests to be run twice TestHdfsQueries is a subclass of TestQueries and inherits of all its 'test_*' methods, causing these tests to be run twice any time test_queries.py is run. This was not intentional (it was subclassed just to inherit 'add_test_dimensions') and causes test runs to take longer than necessary.
This patch removes the subclass relationship and copies the logic in add_test_dimensions() from TestQueries in HdfsTestQueries, with a convenience function added to minimize code duplication. Testing: - Ran test_queries.py under both 'core' and 'exhaustive' and checked that the same tests are run, except all now only a single time each. Change-Id: Ida659aa7b5131a6a7469baa93a41f7581bd0659a Reviewed-on: http://gerrit.cloudera.org:8080/10053 Reviewed-by: Michael Brown <mi...@cloudera.com> Tested-by: Impala Public Jenkins <impala-public-jenk...@cloudera.com> Project: http://git-wip-us.apache.org/repos/asf/impala/repo Commit: http://git-wip-us.apache.org/repos/asf/impala/commit/16bed5c3 Tree: http://git-wip-us.apache.org/repos/asf/impala/tree/16bed5c3 Diff: http://git-wip-us.apache.org/repos/asf/impala/diff/16bed5c3 Branch: refs/heads/master Commit: 16bed5c3a91d2575e1a0d3327735df15d51b5bf6 Parents: ffb74e7 Author: Thomas Tauber-Marshall <tmarsh...@cloudera.com> Authored: Thu Apr 12 23:11:05 2018 +0000 Committer: Impala Public Jenkins <impala-public-jenk...@cloudera.com> Committed: Fri Apr 13 21:46:17 2018 +0000 ---------------------------------------------------------------------- tests/common/test_dimensions.py | 17 ++++++++++++++++- tests/query_test/test_queries.py | 27 ++++++++++++--------------- 2 files changed, 28 insertions(+), 16 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/impala/blob/16bed5c3/tests/common/test_dimensions.py ---------------------------------------------------------------------- diff --git a/tests/common/test_dimensions.py b/tests/common/test_dimensions.py index df3f8c2..434b884 100644 --- a/tests/common/test_dimensions.py +++ b/tests/common/test_dimensions.py @@ -17,10 +17,11 @@ # Common test dimensions and associated utility functions. +import copy import os from itertools import product -from tests.common.test_vector import ImpalaTestDimension +from tests.common.test_vector import ImpalaTestDimension, ImpalaTestVector WORKLOAD_DIR = os.environ['IMPALA_WORKLOAD_DIR'] @@ -179,6 +180,20 @@ def create_exec_option_dimension_from_dict(exec_option_dimensions): # Build a test vector out of it return ImpalaTestDimension('exec_option', *exec_option_dimension_values) +def extend_exec_option_dimension(test_suite, key, value): + """ + Takes an ImpalaTestSuite object 'test_suite' and extends the exec option test dimension + by creating a copy of each existing exec option value that has 'key' set to 'value', + doubling the number of tests that will be run. + """ + dim = test_suite.ImpalaTestMatrix.dimensions["exec_option"] + new_value = [] + for v in dim: + new_value.append(ImpalaTestVector.Value(v.name, copy.copy(v.value))) + new_value[-1].value[key] = value + dim.extend(new_value) + test_suite.ImpalaTestMatrix.add_dimension(dim) + def get_dataset_from_workload(workload): # TODO: We need a better way to define the workload -> dataset mapping so we can # extract it without reading the actual test vector file http://git-wip-us.apache.org/repos/asf/impala/blob/16bed5c3/tests/query_test/test_queries.py ---------------------------------------------------------------------- diff --git a/tests/query_test/test_queries.py b/tests/query_test/test_queries.py index 4667d96..14ecefe 100644 --- a/tests/query_test/test_queries.py +++ b/tests/query_test/test_queries.py @@ -22,7 +22,7 @@ import pytest import re from tests.common.impala_test_suite import ImpalaTestSuite -from tests.common.test_dimensions import create_uncompressed_text_dimension +from tests.common.test_dimensions import create_uncompressed_text_dimension, extend_exec_option_dimension from tests.common.test_vector import ImpalaTestVector class TestQueries(ImpalaTestSuite): @@ -33,18 +33,9 @@ class TestQueries(ImpalaTestSuite): cls.ImpalaTestMatrix.add_constraint(lambda v:\ v.get_value('table_format').file_format == 'parquet') - # Manually adding a test dimension here to test the small query opt - # in exhaustive. - # TODO Cleanup required, allow adding values to dimensions without having to - # manually explode them + # Adding a test dimension here to test the small query opt in exhaustive. if cls.exploration_strategy() == 'exhaustive': - dim = cls.ImpalaTestMatrix.dimensions["exec_option"] - new_value = [] - for v in dim: - new_value.append(ImpalaTestVector.Value(v.name, copy.copy(v.value))) - new_value[-1].value["exec_single_node_rows_threshold"] = 100 - dim.extend(new_value) - cls.ImpalaTestMatrix.add_dimension(dim) + extend_exec_option_dimension(cls, "exec_single_node_rows_threshold", "100") @classmethod def get_workload(cls): @@ -215,9 +206,7 @@ class TestQueriesParquetTables(ImpalaTestSuite): self.run_test_case('QueryTest/single-node-large-sorts', vector) # Tests for queries in HDFS-specific tables, e.g. AllTypesAggMultiFilesNoPart. -# This is a subclass of TestQueries to get the extra test dimension for -# exec_single_node_rows_threshold in exhaustive. -class TestHdfsQueries(TestQueries): +class TestHdfsQueries(ImpalaTestSuite): @classmethod def add_test_dimensions(cls): super(TestHdfsQueries, cls).add_test_dimensions() @@ -225,6 +214,14 @@ class TestHdfsQueries(TestQueries): cls.ImpalaTestMatrix.add_constraint(lambda v:\ v.get_value('table_format').file_format != 'kudu') + # Adding a test dimension here to test the small query opt in exhaustive. + if cls.exploration_strategy() == 'exhaustive': + extend_exec_option_dimension(cls, "exec_single_node_rows_threshold", "100") + + @classmethod + def get_workload(cls): + return 'functional-query' + def test_hdfs_scan_node(self, vector): self.run_test_case('QueryTest/hdfs-scan-node', vector)