Repository: impala Updated Branches: refs/heads/2.x e0c09181f -> 8c922a6ef
http://git-wip-us.apache.org/repos/asf/impala/blob/8c922a6e/testdata/workloads/tpch/queries/sort-reservation-usage.test ---------------------------------------------------------------------- diff --git a/testdata/workloads/tpch/queries/sort-reservation-usage.test b/testdata/workloads/tpch/queries/sort-reservation-usage.test index 92f180d..af31ccb 100644 --- a/testdata/workloads/tpch/queries/sort-reservation-usage.test +++ b/testdata/workloads/tpch/queries/sort-reservation-usage.test @@ -1,17 +1,18 @@ ==== ---- QUERY # Test that in-mem sorts incrementally give up memory when emitting output. -# This query and the limit is calibrated to fail if the first sort does not -# give up memory to the second sort. +# This query and scratch limit are chosen so that the query fails if the first sort +# does not give up memory to the second sort. Scans the text formatted file so that +# the scan uses less reservation. set num_nodes=1; set scratch_limit=0; -set buffer_pool_limit=15m; +set buffer_pool_limit=35m; set default_spillable_buffer_size=64kb; SELECT * FROM (SELECT Rank() OVER(ORDER BY l_orderkey) AS rank, Rank() OVER(ORDER BY l_partkey) AS rank2 - FROM lineitem + FROM tpch.lineitem WHERE l_shipdate < '1992-05-09') a WHERE rank < 10 ORDER BY rank; http://git-wip-us.apache.org/repos/asf/impala/blob/8c922a6e/tests/common/test_dimensions.py ---------------------------------------------------------------------- diff --git a/tests/common/test_dimensions.py b/tests/common/test_dimensions.py index 4171e1f..3c1b5e7 100644 --- a/tests/common/test_dimensions.py +++ b/tests/common/test_dimensions.py @@ -131,13 +131,13 @@ SINGLE_NODE_ONLY = [1] ALL_NODES_ONLY = [0] ALL_DISABLE_CODEGEN_OPTIONS = [True, False] -def create_single_exec_option_dimension(): +def create_single_exec_option_dimension(num_nodes=0, disable_codegen_rows_threshold=5000): """Creates an exec_option dimension that will produce a single test vector""" - return create_exec_option_dimension(cluster_sizes=ALL_NODES_ONLY, - disable_codegen_options=[False], - # Make sure codegen kicks in for functional.alltypes. - disable_codegen_rows_threshold_options=[5000], - batch_sizes=[0]) + return create_exec_option_dimension(cluster_sizes=[num_nodes], + disable_codegen_options=[False], + # Make sure codegen kicks in for functional.alltypes. + disable_codegen_rows_threshold_options=[disable_codegen_rows_threshold], + batch_sizes=[0]) def create_exec_option_dimension(cluster_sizes=ALL_CLUSTER_SIZES, disable_codegen_options=ALL_DISABLE_CODEGEN_OPTIONS, @@ -145,13 +145,15 @@ def create_exec_option_dimension(cluster_sizes=ALL_CLUSTER_SIZES, sync_ddl=None, exec_single_node_option=[0], # We already run with codegen on and off explicitly - # don't need automatic toggling. - disable_codegen_rows_threshold_options=[0]): + disable_codegen_rows_threshold_options=[0], + debug_action_options=[None]): exec_option_dimensions = { 'abort_on_error': [1], 'exec_single_node_rows_threshold': exec_single_node_option, 'batch_size': batch_sizes, 'disable_codegen': disable_codegen_options, 'disable_codegen_rows_threshold': disable_codegen_rows_threshold_options, + 'debug_action': debug_action_options, 'num_nodes': cluster_sizes} if sync_ddl is not None: http://git-wip-us.apache.org/repos/asf/impala/blob/8c922a6e/tests/custom_cluster/test_scratch_disk.py ---------------------------------------------------------------------- diff --git a/tests/custom_cluster/test_scratch_disk.py b/tests/custom_cluster/test_scratch_disk.py index bd3c7e4..65bde66 100644 --- a/tests/custom_cluster/test_scratch_disk.py +++ b/tests/custom_cluster/test_scratch_disk.py @@ -39,7 +39,7 @@ class TestScratchDir(CustomClusterTestSuite): """ # Buffer pool limit that is low enough to force Impala to spill to disk when executing # spill_query. - buffer_pool_limit = "32m" + buffer_pool_limit = "45m" def count_nonempty_dirs(self, dirs): count = 0 http://git-wip-us.apache.org/repos/asf/impala/blob/8c922a6e/tests/query_test/test_mem_usage_scaling.py ---------------------------------------------------------------------- diff --git a/tests/query_test/test_mem_usage_scaling.py b/tests/query_test/test_mem_usage_scaling.py index c60b1c5..f13e320 100644 --- a/tests/query_test/test_mem_usage_scaling.py +++ b/tests/query_test/test_mem_usage_scaling.py @@ -95,7 +95,7 @@ class TestExprMemUsage(ImpalaTestSuite): class TestLowMemoryLimits(ImpalaTestSuite): '''Super class for the memory limit tests with the TPC-H and TPC-DS queries''' - def low_memory_limit_test(self, vector, tpch_query, limit, xfail_mem_limit=None): + def low_memory_limit_test(self, vector, tpch_query, limit): mem = vector.get_value('mem_limit') # Mem consumption can be +-30MBs, depending on how many scanner threads are # running. Adding this extra mem in order to reduce false negatives in the tests. @@ -112,13 +112,11 @@ class TestLowMemoryLimits(ImpalaTestSuite): try: self.run_test_case(tpch_query, new_vector) except ImpalaBeeswaxException as e: - if not expects_error and not xfail_mem_limit: raise + if not expects_error: raise found_expected_error = False for error_msg in MEM_LIMIT_ERROR_MSGS: if error_msg in str(e): found_expected_error = True assert found_expected_error, str(e) - if not expects_error and xfail_mem_limit: - pytest.xfail(xfail_mem_limit) class TestTpchMemLimitError(TestLowMemoryLimits): @@ -132,7 +130,7 @@ class TestTpchMemLimitError(TestLowMemoryLimits): 'Q6' : 25, 'Q7' : 200, 'Q8' : 125, 'Q9' : 200, 'Q10' : 162,\ 'Q11' : 112, 'Q12' : 150, 'Q13' : 125, 'Q14' : 125, 'Q15' : 125,\ 'Q16' : 137, 'Q17' : 137, 'Q18' : 196, 'Q19' : 112, 'Q20' : 162,\ - 'Q21' : 187, 'Q22' : 125} + 'Q21' : 230, 'Q22' : 125} @classmethod def get_workload(self): @@ -175,8 +173,7 @@ class TestTpchMemLimitError(TestLowMemoryLimits): self.low_memory_limit_test(vector, 'tpch-q8', self.MIN_MEM_FOR_TPCH['Q8']) def test_low_mem_limit_q9(self, vector): - self.low_memory_limit_test(vector, 'tpch-q9', self.MIN_MEM_FOR_TPCH['Q9'], - xfail_mem_limit="IMPALA-3328: TPC-H Q9 memory limit test is flaky") + self.low_memory_limit_test(vector, 'tpch-q9', self.MIN_MEM_FOR_TPCH['Q9']) @SkipIfLocal.mem_usage_different def test_low_mem_limit_q10(self, vector): http://git-wip-us.apache.org/repos/asf/impala/blob/8c922a6e/tests/query_test/test_query_mem_limit.py ---------------------------------------------------------------------- diff --git a/tests/query_test/test_query_mem_limit.py b/tests/query_test/test_query_mem_limit.py index 2fdd6eb..17ea9f5 100644 --- a/tests/query_test/test_query_mem_limit.py +++ b/tests/query_test/test_query_mem_limit.py @@ -118,7 +118,9 @@ class TestCodegenMemLimit(ImpalaTestSuite): @classmethod def add_test_dimensions(cls): super(TestCodegenMemLimit, cls).add_test_dimensions() - cls.ImpalaTestMatrix.add_dimension(create_single_exec_option_dimension()) + # Run with num_nodes=1 to avoid races between fragments allocating memory. + cls.ImpalaTestMatrix.add_dimension(create_single_exec_option_dimension( + num_nodes=1, disable_codegen_rows_threshold=0)) # Only run the query for parquet cls.ImpalaTestMatrix.add_constraint( lambda v: v.get_value('table_format').file_format == 'parquet') http://git-wip-us.apache.org/repos/asf/impala/blob/8c922a6e/tests/query_test/test_scanners.py ---------------------------------------------------------------------- diff --git a/tests/query_test/test_scanners.py b/tests/query_test/test_scanners.py index 9b252da..2d0fd0c 100644 --- a/tests/query_test/test_scanners.py +++ b/tests/query_test/test_scanners.py @@ -37,7 +37,9 @@ from tests.common.skip import ( SkipIfADLS, SkipIfIsilon, SkipIfLocal) -from tests.common.test_dimensions import create_single_exec_option_dimension +from tests.common.test_dimensions import ( + create_single_exec_option_dimension, + create_exec_option_dimension) from tests.common.test_result_verifier import ( parse_column_types, parse_column_labels, @@ -49,6 +51,11 @@ from tests.util.hdfs_util import NAMENODE from tests.util.get_parquet_metadata import get_parquet_metadata from tests.util.test_file_parser import QueryTestSectionReader +# Test scanners with denial of reservations at varying frequency. This will affect the +# number of scanner threads that can be spun up. +DEBUG_ACTION_DIMS = [None, + '-1:OPEN:SET_DENY_RESERVATION_PROBABILITY@0.5', + '-1:OPEN:SET_DENY_RESERVATION_PROBABILITY@1.0'] class TestScannersAllTableFormats(ImpalaTestSuite): BATCH_SIZES = [0, 1, 16] @@ -66,18 +73,20 @@ class TestScannersAllTableFormats(ImpalaTestSuite): cls.ImpalaTestMatrix.add_dimension(cls.create_table_info_dimension('pairwise')) cls.ImpalaTestMatrix.add_dimension( ImpalaTestDimension('batch_size', *TestScannersAllTableFormats.BATCH_SIZES)) + cls.ImpalaTestMatrix.add_dimension( + ImpalaTestDimension('debug_action', *DEBUG_ACTION_DIMS)) def test_scanners(self, vector): new_vector = deepcopy(vector) new_vector.get_value('exec_option')['batch_size'] = vector.get_value('batch_size') + new_vector.get_value('exec_option')['debug_action'] = vector.get_value('debug_action') self.run_test_case('QueryTest/scanners', new_vector) def test_hdfs_scanner_profile(self, vector): - if vector.get_value('table_format').file_format in ('kudu', 'hbase'): + if vector.get_value('table_format').file_format in ('kudu', 'hbase') or \ + vector.get_value('exec_option')['num_nodes'] != 0: pytest.skip() - new_vector = deepcopy(vector) - new_vector.get_value('exec_option')['num_nodes'] = 0 - self.run_test_case('QueryTest/hdfs_scanner_profile', new_vector) + self.run_test_case('QueryTest/hdfs_scanner_profile', vector) # Test all the scanners with a simple limit clause. The limit clause triggers # cancellation in the scanner code paths. @@ -171,6 +180,8 @@ class TestWideRow(ImpalaTestSuite): @classmethod def add_test_dimensions(cls): super(TestWideRow, cls).add_test_dimensions() + cls.ImpalaTestMatrix.add_dimension( + create_exec_option_dimension(debug_action_options=DEBUG_ACTION_DIMS)) # I can't figure out how to load a huge row into hbase cls.ImpalaTestMatrix.add_constraint( lambda v: v.get_value('table_format').file_format != 'hbase') @@ -202,6 +213,8 @@ class TestWideTable(ImpalaTestSuite): @classmethod def add_test_dimensions(cls): super(TestWideTable, cls).add_test_dimensions() + cls.ImpalaTestMatrix.add_dimension( + create_exec_option_dimension(debug_action_options=DEBUG_ACTION_DIMS)) cls.ImpalaTestMatrix.add_dimension(ImpalaTestDimension("num_cols", *cls.NUM_COLS)) # To cut down on test execution time, only run in exhaustive. if cls.exploration_strategy() != 'exhaustive': @@ -244,6 +257,8 @@ class TestParquet(ImpalaTestSuite): @classmethod def add_test_dimensions(cls): super(TestParquet, cls).add_test_dimensions() + cls.ImpalaTestMatrix.add_dimension( + create_exec_option_dimension(debug_action_options=DEBUG_ACTION_DIMS)) cls.ImpalaTestMatrix.add_constraint( lambda v: v.get_value('table_format').file_format == 'parquet') http://git-wip-us.apache.org/repos/asf/impala/blob/8c922a6e/tests/query_test/test_scanners_fuzz.py ---------------------------------------------------------------------- diff --git a/tests/query_test/test_scanners_fuzz.py b/tests/query_test/test_scanners_fuzz.py index c336a17..0b42e5d 100644 --- a/tests/query_test/test_scanners_fuzz.py +++ b/tests/query_test/test_scanners_fuzz.py @@ -48,6 +48,12 @@ class TestScannersFuzzing(ImpalaTestSuite): # Test a range of batch sizes to exercise different corner cases. BATCH_SIZES = [0, 1, 16, 10000] + # Test with denial of reservations at varying frequency. This will affect the number + # of scanner threads that can be spun up. + DEBUG_ACTION_VALUES = [None, + '-1:OPEN:SET_DENY_RESERVATION_PROBABILITY@0.5', + '-1:OPEN:SET_DENY_RESERVATION_PROBABILITY@1.0'] + @classmethod def get_workload(cls): return 'functional-query' @@ -59,7 +65,8 @@ class TestScannersFuzzing(ImpalaTestSuite): create_exec_option_dimension_from_dict({ 'abort_on_error' : cls.ABORT_ON_ERROR_VALUES, 'num_nodes' : cls.NUM_NODES_VALUES, - 'mem_limit' : cls.MEM_LIMITS})) + 'mem_limit' : cls.MEM_LIMITS, + 'debug_action' : cls.DEBUG_ACTION_VALUES})) # TODO: enable for more table formats once they consistently pass the fuzz test. cls.ImpalaTestMatrix.add_constraint(lambda v: v.get_value('table_format').file_format in ('avro', 'parquet') or http://git-wip-us.apache.org/repos/asf/impala/blob/8c922a6e/tests/query_test/test_sort.py ---------------------------------------------------------------------- diff --git a/tests/query_test/test_sort.py b/tests/query_test/test_sort.py index 59a28cd..70629c4 100644 --- a/tests/query_test/test_sort.py +++ b/tests/query_test/test_sort.py @@ -67,14 +67,23 @@ class TestQueryFullSort(ImpalaTestSuite): order by o_orderdate""" exec_option = copy(vector.get_value('exec_option')) table_format = vector.get_value('table_format') + exec_option['default_spillable_buffer_size'] = '8M' + + # Minimum memory for different parts of the plan. + sort_reservation_mb = 48 + if table_format.file_format == 'parquet': + scan_reservation_mb = 24 + else: + scan_reservation_mb = 8 + total_reservation_mb = sort_reservation_mb + scan_reservation_mb # The below memory value assume 8M pages. - exec_option['default_spillable_buffer_size'] = '8M' - buffer_pool_limit_values = ['-1', '48M'] # Unlimited and minimum memory. + # Test with unlimited and minimum memory for all file formats. + buffer_pool_limit_values = ['-1', '{0}M'.format(total_reservation_mb)] if self.exploration_strategy() == 'exhaustive' and \ table_format.file_format == 'parquet': # Test some intermediate values for parquet on exhaustive. - buffer_pool_limit_values += ['64M', '128M', '256M'] + buffer_pool_limit_values += ['128M', '256M'] for buffer_pool_limit in buffer_pool_limit_values: exec_option['buffer_pool_limit'] = buffer_pool_limit result = transpose_results(self.execute_query( @@ -83,7 +92,6 @@ class TestQueryFullSort(ImpalaTestSuite): def test_sort_join(self, vector): """With 200m memory limit this should be a 2-phase sort""" - query = """select o1.o_orderdate, o2.o_custkey, o1.o_comment from orders o1 join orders o2 on (o1.o_orderkey = o2.o_orderkey) order by o1.o_orderdate limit 100000""" http://git-wip-us.apache.org/repos/asf/impala/blob/8c922a6e/tests/query_test/test_spilling.py ---------------------------------------------------------------------- diff --git a/tests/query_test/test_spilling.py b/tests/query_test/test_spilling.py index 0b36429..05ada9f 100644 --- a/tests/query_test/test_spilling.py +++ b/tests/query_test/test_spilling.py @@ -95,3 +95,8 @@ class TestSpillingNoDebugActionDimensions(ImpalaTestSuite): setting debug_action to alternative values via query options.""" self.run_test_case('QueryTest/spilling-query-options', vector) + def test_spilling_no_debug_action(self, vector): + """Spilling tests that will not succeed if run with an arbitrary debug action. + These tests either run with no debug action set or set their own debug action.""" + self.run_test_case('QueryTest/spilling-no-debug-action', vector) +