O buffers to buffer pool

tarmstrong Fri, 23 Feb 2018 14:51:58 -0800

Repository: impala
Updated Branches:
  refs/heads/2.x e0c09181f -> 8c922a6ef



http://git-wip-us.apache.org/repos/asf/impala/blob/8c922a6e/testdata/workloads/tpch/queries/sort-reservation-usage.test
----------------------------------------------------------------------
diff --git a/testdata/workloads/tpch/queries/sort-reservation-usage.test 
b/testdata/workloads/tpch/queries/sort-reservation-usage.test
index 92f180d..af31ccb 100644
--- a/testdata/workloads/tpch/queries/sort-reservation-usage.test
+++ b/testdata/workloads/tpch/queries/sort-reservation-usage.test
@@ -1,17 +1,18 @@
 ====
 ---- QUERY
 # Test that in-mem sorts incrementally give up memory when emitting output.
-# This query and the limit is calibrated to fail if the first sort does not
-# give up memory to the second sort.
+# This query and scratch limit are chosen so that the query fails if the first 
sort
+# does not give up memory to the second sort. Scans the text formatted file so 
that
+# the scan uses less reservation.
 set num_nodes=1;
 set scratch_limit=0;
-set buffer_pool_limit=15m;
+set buffer_pool_limit=35m;
 set default_spillable_buffer_size=64kb;
 SELECT *
 FROM   (SELECT
         Rank() OVER(ORDER BY  l_orderkey) AS rank,
         Rank() OVER(ORDER BY  l_partkey) AS rank2
-        FROM lineitem
+        FROM tpch.lineitem
         WHERE l_shipdate < '1992-05-09') a
 WHERE rank < 10
 ORDER BY rank;

http://git-wip-us.apache.org/repos/asf/impala/blob/8c922a6e/tests/common/test_dimensions.py
----------------------------------------------------------------------
diff --git a/tests/common/test_dimensions.py b/tests/common/test_dimensions.py
index 4171e1f..3c1b5e7 100644
--- a/tests/common/test_dimensions.py
+++ b/tests/common/test_dimensions.py
@@ -131,13 +131,13 @@ SINGLE_NODE_ONLY = [1]
 ALL_NODES_ONLY = [0]
 ALL_DISABLE_CODEGEN_OPTIONS = [True, False]
 
-def create_single_exec_option_dimension():
+def create_single_exec_option_dimension(num_nodes=0, 
disable_codegen_rows_threshold=5000):
   """Creates an exec_option dimension that will produce a single test vector"""
-  return create_exec_option_dimension(cluster_sizes=ALL_NODES_ONLY,
-                                      disable_codegen_options=[False],
-                                      # Make sure codegen kicks in for 
functional.alltypes.
-                                      
disable_codegen_rows_threshold_options=[5000],
-                                      batch_sizes=[0])
+  return create_exec_option_dimension(cluster_sizes=[num_nodes],
+      disable_codegen_options=[False],
+      # Make sure codegen kicks in for functional.alltypes.
+      disable_codegen_rows_threshold_options=[disable_codegen_rows_threshold],
+      batch_sizes=[0])
 
 def create_exec_option_dimension(cluster_sizes=ALL_CLUSTER_SIZES,
                                  
disable_codegen_options=ALL_DISABLE_CODEGEN_OPTIONS,
@@ -145,13 +145,15 @@ def 
create_exec_option_dimension(cluster_sizes=ALL_CLUSTER_SIZES,
                                  sync_ddl=None, exec_single_node_option=[0],
                                  # We already run with codegen on and off 
explicitly -
                                  # don't need automatic toggling.
-                                 disable_codegen_rows_threshold_options=[0]):
+                                 disable_codegen_rows_threshold_options=[0],
+                                 debug_action_options=[None]):
   exec_option_dimensions = {
       'abort_on_error': [1],
       'exec_single_node_rows_threshold': exec_single_node_option,
       'batch_size': batch_sizes,
       'disable_codegen': disable_codegen_options,
       'disable_codegen_rows_threshold': disable_codegen_rows_threshold_options,
+      'debug_action': debug_action_options,
       'num_nodes': cluster_sizes}
 
   if sync_ddl is not None:

http://git-wip-us.apache.org/repos/asf/impala/blob/8c922a6e/tests/custom_cluster/test_scratch_disk.py
----------------------------------------------------------------------
diff --git a/tests/custom_cluster/test_scratch_disk.py 
b/tests/custom_cluster/test_scratch_disk.py
index bd3c7e4..65bde66 100644
--- a/tests/custom_cluster/test_scratch_disk.py
+++ b/tests/custom_cluster/test_scratch_disk.py
@@ -39,7 +39,7 @@ class TestScratchDir(CustomClusterTestSuite):
       """
   # Buffer pool limit that is low enough to force Impala to spill to disk when 
executing
   # spill_query.
-  buffer_pool_limit = "32m"
+  buffer_pool_limit = "45m"
 
   def count_nonempty_dirs(self, dirs):
     count = 0

http://git-wip-us.apache.org/repos/asf/impala/blob/8c922a6e/tests/query_test/test_mem_usage_scaling.py
----------------------------------------------------------------------
diff --git a/tests/query_test/test_mem_usage_scaling.py 
b/tests/query_test/test_mem_usage_scaling.py
index c60b1c5..f13e320 100644
--- a/tests/query_test/test_mem_usage_scaling.py
+++ b/tests/query_test/test_mem_usage_scaling.py
@@ -95,7 +95,7 @@ class TestExprMemUsage(ImpalaTestSuite):
 class TestLowMemoryLimits(ImpalaTestSuite):
   '''Super class for the memory limit tests with the TPC-H and TPC-DS 
queries'''
 
-  def low_memory_limit_test(self, vector, tpch_query, limit, 
xfail_mem_limit=None):
+  def low_memory_limit_test(self, vector, tpch_query, limit):
     mem = vector.get_value('mem_limit')
     # Mem consumption can be +-30MBs, depending on how many scanner threads are
     # running. Adding this extra mem in order to reduce false negatives in the 
tests.
@@ -112,13 +112,11 @@ class TestLowMemoryLimits(ImpalaTestSuite):
     try:
       self.run_test_case(tpch_query, new_vector)
     except ImpalaBeeswaxException as e:
-      if not expects_error and not xfail_mem_limit: raise
+      if not expects_error: raise
       found_expected_error = False
       for error_msg in MEM_LIMIT_ERROR_MSGS:
         if error_msg in str(e): found_expected_error = True
       assert found_expected_error, str(e)
-      if not expects_error and xfail_mem_limit:
-        pytest.xfail(xfail_mem_limit)
 
 
 class TestTpchMemLimitError(TestLowMemoryLimits):
@@ -132,7 +130,7 @@ class TestTpchMemLimitError(TestLowMemoryLimits):
                        'Q6' : 25, 'Q7' : 200, 'Q8' : 125, 'Q9' : 200, 'Q10' : 
162,\
                        'Q11' : 112, 'Q12' : 150, 'Q13' : 125, 'Q14' : 125, 
'Q15' : 125,\
                        'Q16' : 137, 'Q17' : 137, 'Q18' : 196, 'Q19' : 112, 
'Q20' : 162,\
-                       'Q21' : 187, 'Q22' : 125}
+                       'Q21' : 230, 'Q22' : 125}
 
   @classmethod
   def get_workload(self):
@@ -175,8 +173,7 @@ class TestTpchMemLimitError(TestLowMemoryLimits):
     self.low_memory_limit_test(vector, 'tpch-q8', self.MIN_MEM_FOR_TPCH['Q8'])
 
   def test_low_mem_limit_q9(self, vector):
-    self.low_memory_limit_test(vector, 'tpch-q9', self.MIN_MEM_FOR_TPCH['Q9'],
-            xfail_mem_limit="IMPALA-3328: TPC-H Q9 memory limit test is flaky")
+    self.low_memory_limit_test(vector, 'tpch-q9', self.MIN_MEM_FOR_TPCH['Q9'])
 
   @SkipIfLocal.mem_usage_different
   def test_low_mem_limit_q10(self, vector):

http://git-wip-us.apache.org/repos/asf/impala/blob/8c922a6e/tests/query_test/test_query_mem_limit.py
----------------------------------------------------------------------
diff --git a/tests/query_test/test_query_mem_limit.py 
b/tests/query_test/test_query_mem_limit.py
index 2fdd6eb..17ea9f5 100644
--- a/tests/query_test/test_query_mem_limit.py
+++ b/tests/query_test/test_query_mem_limit.py
@@ -118,7 +118,9 @@ class TestCodegenMemLimit(ImpalaTestSuite):
   @classmethod
   def add_test_dimensions(cls):
     super(TestCodegenMemLimit, cls).add_test_dimensions()
-    cls.ImpalaTestMatrix.add_dimension(create_single_exec_option_dimension())
+    # Run with num_nodes=1 to avoid races between fragments allocating memory.
+    cls.ImpalaTestMatrix.add_dimension(create_single_exec_option_dimension(
+        num_nodes=1, disable_codegen_rows_threshold=0))
     # Only run the query for parquet
     cls.ImpalaTestMatrix.add_constraint(
       lambda v: v.get_value('table_format').file_format == 'parquet')

http://git-wip-us.apache.org/repos/asf/impala/blob/8c922a6e/tests/query_test/test_scanners.py
----------------------------------------------------------------------
diff --git a/tests/query_test/test_scanners.py 
b/tests/query_test/test_scanners.py
index 9b252da..2d0fd0c 100644
--- a/tests/query_test/test_scanners.py
+++ b/tests/query_test/test_scanners.py
@@ -37,7 +37,9 @@ from tests.common.skip import (
     SkipIfADLS,
     SkipIfIsilon,
     SkipIfLocal)
-from tests.common.test_dimensions import create_single_exec_option_dimension
+from tests.common.test_dimensions import (
+    create_single_exec_option_dimension,
+    create_exec_option_dimension)
 from tests.common.test_result_verifier import (
     parse_column_types,
     parse_column_labels,
@@ -49,6 +51,11 @@ from tests.util.hdfs_util import NAMENODE
 from tests.util.get_parquet_metadata import get_parquet_metadata
 from tests.util.test_file_parser import QueryTestSectionReader
 
+# Test scanners with denial of reservations at varying frequency. This will 
affect the
+# number of scanner threads that can be spun up.
+DEBUG_ACTION_DIMS = [None,
+  '-1:OPEN:SET_DENY_RESERVATION_PROBABILITY@0.5',
+  '-1:OPEN:SET_DENY_RESERVATION_PROBABILITY@1.0']
 
 class TestScannersAllTableFormats(ImpalaTestSuite):
   BATCH_SIZES = [0, 1, 16]
@@ -66,18 +73,20 @@ class TestScannersAllTableFormats(ImpalaTestSuite):
       
cls.ImpalaTestMatrix.add_dimension(cls.create_table_info_dimension('pairwise'))
     cls.ImpalaTestMatrix.add_dimension(
         ImpalaTestDimension('batch_size', 
*TestScannersAllTableFormats.BATCH_SIZES))
+    cls.ImpalaTestMatrix.add_dimension(
+        ImpalaTestDimension('debug_action', *DEBUG_ACTION_DIMS))
 
   def test_scanners(self, vector):
     new_vector = deepcopy(vector)
     new_vector.get_value('exec_option')['batch_size'] = 
vector.get_value('batch_size')
+    new_vector.get_value('exec_option')['debug_action'] = 
vector.get_value('debug_action')
     self.run_test_case('QueryTest/scanners', new_vector)
 
   def test_hdfs_scanner_profile(self, vector):
-    if vector.get_value('table_format').file_format in ('kudu', 'hbase'):
+    if vector.get_value('table_format').file_format in ('kudu', 'hbase') or \
+       vector.get_value('exec_option')['num_nodes'] != 0:
       pytest.skip()
-    new_vector = deepcopy(vector)
-    new_vector.get_value('exec_option')['num_nodes'] = 0
-    self.run_test_case('QueryTest/hdfs_scanner_profile', new_vector)
+    self.run_test_case('QueryTest/hdfs_scanner_profile', vector)
 
 # Test all the scanners with a simple limit clause. The limit clause triggers
 # cancellation in the scanner code paths.
@@ -171,6 +180,8 @@ class TestWideRow(ImpalaTestSuite):
   @classmethod
   def add_test_dimensions(cls):
     super(TestWideRow, cls).add_test_dimensions()
+    cls.ImpalaTestMatrix.add_dimension(
+        create_exec_option_dimension(debug_action_options=DEBUG_ACTION_DIMS))
     # I can't figure out how to load a huge row into hbase
     cls.ImpalaTestMatrix.add_constraint(
       lambda v: v.get_value('table_format').file_format != 'hbase')
@@ -202,6 +213,8 @@ class TestWideTable(ImpalaTestSuite):
   @classmethod
   def add_test_dimensions(cls):
     super(TestWideTable, cls).add_test_dimensions()
+    cls.ImpalaTestMatrix.add_dimension(
+        create_exec_option_dimension(debug_action_options=DEBUG_ACTION_DIMS))
     cls.ImpalaTestMatrix.add_dimension(ImpalaTestDimension("num_cols", 
*cls.NUM_COLS))
     # To cut down on test execution time, only run in exhaustive.
     if cls.exploration_strategy() != 'exhaustive':
@@ -244,6 +257,8 @@ class TestParquet(ImpalaTestSuite):
   @classmethod
   def add_test_dimensions(cls):
     super(TestParquet, cls).add_test_dimensions()
+    cls.ImpalaTestMatrix.add_dimension(
+        create_exec_option_dimension(debug_action_options=DEBUG_ACTION_DIMS))
     cls.ImpalaTestMatrix.add_constraint(
       lambda v: v.get_value('table_format').file_format == 'parquet')
 

http://git-wip-us.apache.org/repos/asf/impala/blob/8c922a6e/tests/query_test/test_scanners_fuzz.py
----------------------------------------------------------------------
diff --git a/tests/query_test/test_scanners_fuzz.py 
b/tests/query_test/test_scanners_fuzz.py
index c336a17..0b42e5d 100644
--- a/tests/query_test/test_scanners_fuzz.py
+++ b/tests/query_test/test_scanners_fuzz.py
@@ -48,6 +48,12 @@ class TestScannersFuzzing(ImpalaTestSuite):
   # Test a range of batch sizes to exercise different corner cases.
   BATCH_SIZES = [0, 1, 16, 10000]
 
+  # Test with denial of reservations at varying frequency. This will affect 
the number
+  # of scanner threads that can be spun up.
+  DEBUG_ACTION_VALUES = [None,
+    '-1:OPEN:SET_DENY_RESERVATION_PROBABILITY@0.5',
+    '-1:OPEN:SET_DENY_RESERVATION_PROBABILITY@1.0']
+
   @classmethod
   def get_workload(cls):
     return 'functional-query'
@@ -59,7 +65,8 @@ class TestScannersFuzzing(ImpalaTestSuite):
         create_exec_option_dimension_from_dict({
           'abort_on_error' : cls.ABORT_ON_ERROR_VALUES,
           'num_nodes' : cls.NUM_NODES_VALUES,
-          'mem_limit' : cls.MEM_LIMITS}))
+          'mem_limit' : cls.MEM_LIMITS,
+          'debug_action' : cls.DEBUG_ACTION_VALUES}))
     # TODO: enable for more table formats once they consistently pass the fuzz 
test.
     cls.ImpalaTestMatrix.add_constraint(lambda v:
         v.get_value('table_format').file_format in ('avro', 'parquet') or

http://git-wip-us.apache.org/repos/asf/impala/blob/8c922a6e/tests/query_test/test_sort.py
----------------------------------------------------------------------
diff --git a/tests/query_test/test_sort.py b/tests/query_test/test_sort.py
index 59a28cd..70629c4 100644
--- a/tests/query_test/test_sort.py
+++ b/tests/query_test/test_sort.py
@@ -67,14 +67,23 @@ class TestQueryFullSort(ImpalaTestSuite):
       order by o_orderdate"""
     exec_option = copy(vector.get_value('exec_option'))
     table_format = vector.get_value('table_format')
+    exec_option['default_spillable_buffer_size'] = '8M'
+
+    # Minimum memory for different parts of the plan.
+    sort_reservation_mb = 48
+    if table_format.file_format == 'parquet':
+      scan_reservation_mb = 24
+    else:
+      scan_reservation_mb = 8
+    total_reservation_mb = sort_reservation_mb + scan_reservation_mb
 
     # The below memory value assume 8M pages.
-    exec_option['default_spillable_buffer_size'] = '8M'
-    buffer_pool_limit_values = ['-1', '48M'] # Unlimited and minimum memory.
+    # Test with unlimited and minimum memory for all file formats.
+    buffer_pool_limit_values = ['-1', '{0}M'.format(total_reservation_mb)]
     if self.exploration_strategy() == 'exhaustive' and \
         table_format.file_format == 'parquet':
       # Test some intermediate values for parquet on exhaustive.
-      buffer_pool_limit_values += ['64M', '128M', '256M']
+      buffer_pool_limit_values += ['128M', '256M']
     for buffer_pool_limit in buffer_pool_limit_values:
       exec_option['buffer_pool_limit'] = buffer_pool_limit
       result = transpose_results(self.execute_query(
@@ -83,7 +92,6 @@ class TestQueryFullSort(ImpalaTestSuite):
 
   def test_sort_join(self, vector):
     """With 200m memory limit this should be a 2-phase sort"""
-
     query = """select o1.o_orderdate, o2.o_custkey, o1.o_comment from orders 
o1 join
     orders o2 on (o1.o_orderkey = o2.o_orderkey) order by o1.o_orderdate limit 
100000"""
 

http://git-wip-us.apache.org/repos/asf/impala/blob/8c922a6e/tests/query_test/test_spilling.py
----------------------------------------------------------------------
diff --git a/tests/query_test/test_spilling.py 
b/tests/query_test/test_spilling.py
index 0b36429..05ada9f 100644
--- a/tests/query_test/test_spilling.py
+++ b/tests/query_test/test_spilling.py
@@ -95,3 +95,8 @@ class TestSpillingNoDebugActionDimensions(ImpalaTestSuite):
       setting debug_action to alternative values via query options."""
     self.run_test_case('QueryTest/spilling-query-options', vector)
 
+  def test_spilling_no_debug_action(self, vector):
+    """Spilling tests that will not succeed if run with an arbitrary debug 
action.
+       These tests either run with no debug action set or set their own debug 
action."""
+    self.run_test_case('QueryTest/spilling-no-debug-action', vector)
+

[01/15] impala git commit: IMPALA-4835: Part 3: switch I/O buffers to buffer pool

Reply via email to