IMPALA-2345,2991: test coverage for spilling and sorts Add missing coverage for sorting by CHAR and VARCHAR.
Add more coverage for spilling sorts. Fix spilling tests: ensure that they actually reliably spill (many of them had memory limits high enough that they could run entirely in memory). I ran this in a loop for a while to flush out flaky tests. The tests should be fairly predictable given that they're not run concurrently with other tests and we allocate enough block manager memory so that each operator can obtain its reservation. Change-Id: Ia2d2627a2c327dcdf269ea3216385b1af9dfa305 Reviewed-on: http://gerrit.cloudera.org:8080/2877 Reviewed-by: Tim Armstrong <[email protected]> Tested-by: Internal Jenkins Project: http://git-wip-us.apache.org/repos/asf/incubator-impala/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-impala/commit/34c95c95 Tree: http://git-wip-us.apache.org/repos/asf/incubator-impala/tree/34c95c95 Diff: http://git-wip-us.apache.org/repos/asf/incubator-impala/diff/34c95c95 Branch: refs/heads/master Commit: 34c95c95901ba3d81a2b30f17ebf194cec4ef1d1 Parents: a805e10 Author: Tim Armstrong <[email protected]> Authored: Fri Apr 22 11:14:02 2016 -0700 Committer: Tim Armstrong <[email protected]> Committed: Thu May 12 14:17:55 2016 -0700 ---------------------------------------------------------------------- .../queries/QueryTest/spilling.test | 273 +++++++++++++++++-- tests/custom_cluster/test_spilling.py | 4 +- 2 files changed, 257 insertions(+), 20 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/34c95c95/testdata/workloads/functional-query/queries/QueryTest/spilling.test ---------------------------------------------------------------------- diff --git a/testdata/workloads/functional-query/queries/QueryTest/spilling.test b/testdata/workloads/functional-query/queries/QueryTest/spilling.test index 1db90d3..a29c6c7 100644 --- a/testdata/workloads/functional-query/queries/QueryTest/spilling.test +++ b/testdata/workloads/functional-query/queries/QueryTest/spilling.test @@ -1,7 +1,6 @@ ==== ---- QUERY -set num_nodes=1; -set max_block_mgr_memory=265m; +set max_block_mgr_memory=25m; select l_orderkey, count(*) from lineitem group by 1 @@ -19,11 +18,18 @@ order by 1 limit 10 34,3 ---- TYPES BIGINT, BIGINT +---- RUNTIME_PROFILE +# Verify that spilling and passthrough were activated. +row_regex: .*SpilledPartitions: .* \([1-9][0-9]*\) +row_regex: .*NumRepartitions: .* \([1-9][0-9]*\) +row_regex: .*RowsPassedThrough: .* \([1-9][0-9]*\) ==== ---- QUERY # Test query with string grouping column and string agg columns +# Could only get it to spill reliably with num_nodes=1. +# TODO: revisit with new buffer pool. set num_nodes=1; -set max_block_mgr_memory=275m; +set max_block_mgr_memory=25m; select l_returnflag, l_orderkey, avg(l_tax), min(l_shipmode) from lineitem group by 1,2 @@ -34,9 +40,12 @@ order by 1,2 limit 3 'A',6,0.03,'TRUCK' ---- TYPES STRING, BIGINT, DECIMAL, STRING +---- RUNTIME_PROFILE +row_regex: .*SpilledPartitions: .* \([1-9][0-9]*\) +row_regex: .*NumRepartitions: .* \([1-9][0-9]*\) ==== ---- QUERY -set max_block_mgr_memory=275m; +set max_block_mgr_memory=25m; select l_orderkey, count(*) from lineitem group by 1 @@ -54,11 +63,17 @@ order by 1 limit 10; 34,3 ---- TYPES BIGINT, BIGINT +---- RUNTIME_PROFILE +row_regex: .*SpilledPartitions: .* \([1-9][0-9]*\) +row_regex: .*NumRepartitions: .* \([1-9][0-9]*\) +row_regex: .*RowsPassedThrough: .* \([1-9][0-9]*\) ==== ---- QUERY # Test query with string grouping column -set num_nodes=0; -set max_block_mgr_memory=275m; +# Could only get it to spill reliably with num_nodes=1. +# TODO: revisit with new buffer pool. +set num_nodes=1; +set max_block_mgr_memory=25m; select l_comment, count(*) from lineitem group by 1 @@ -71,11 +86,16 @@ order by count(*) desc limit 5 ' furiously ',845 ---- TYPES STRING, BIGINT +---- RUNTIME_PROFILE +row_regex: .*SpilledPartitions: .* \([1-9][0-9]*\) +row_regex: .*NumRepartitions: .* \([1-9][0-9]*\) ==== ---- QUERY # Test query with string grouping column and string agg columns -set num_nodes=0; -set max_block_mgr_memory=80m; +# Could only get it to spill reliably with num_nodes=1. +# TODO: revisit with new buffer pool. +set num_nodes=1; +set max_block_mgr_memory=25m; select l_returnflag, l_orderkey, round(avg(l_tax),2), min(l_shipmode) from lineitem group by 1,2 @@ -87,15 +107,13 @@ order by 1,2 limit 3; ---- TYPES STRING, BIGINT, DECIMAL, STRING ---- RUNTIME_PROFILE -# Verify that passthrough and spilling happened in the pre and merge agg. -# TODO: reenable if we can get it to spill in a non-flaky way -# row_regex: .*SpilledPartitions: .* \([1-9][0-9]*\) -# row_regex: .*RowsPassedThrough: .* \([1-9][0-9]*\) +# Verify that spilling happened in the agg. +row_regex: .*SpilledPartitions: .* \([1-9][0-9]*\) +row_regex: .*NumRepartitions: .* \([1-9][0-9]*\) ==== ---- QUERY -# Test query with string intermediate state. -set num_nodes=0; -set max_block_mgr_memory=275m; +# Test with string intermediate state (avg() uses string intermediate value). +set max_block_mgr_memory=25m; select l_orderkey, avg(l_orderkey) from lineitem group by 1 @@ -108,6 +126,11 @@ order by 1 limit 5 5,5 ---- TYPES BIGINT, DOUBLE +---- RUNTIME_PROFILE +# Verify that passthrough and spilling happened in the pre and merge agg. +row_regex: .*SpilledPartitions: .* \([1-9][0-9]*\) +row_regex: .*NumRepartitions: .* \([1-9][0-9]*\) +row_regex: .*RowsPassedThrough: .* \([1-9][0-9]*\) ==== ---- QUERY set num_nodes=0; @@ -128,6 +151,9 @@ l1.l_shipdate = l3.l_shipdate 1846743 ---- TYPES BIGINT +---- RUNTIME_PROFILE +# Verify that at least one of the joins was spilled. +row_regex: .*SpilledPartitions: .* \([1-9][0-9]*\) ==== ---- QUERY set num_nodes=0; @@ -138,6 +164,10 @@ select max(t1.total_count), max(t1.l_shipinstruct), max(t1.l_comment) from 6001215,'TAKE BACK RETURN','zzle? slyly final platelets sleep quickly. ' ---- TYPES BIGINT, STRING, STRING +---- RUNTIME_PROFILE +# Indirectly verify that the analytic spilled: if it spills a block, it must repin it. +row_regex: .*PinTime: [1-9][0-9]*.* +==== ---- QUERY # Run this query with very low memory. Since the tables are small, the PA/PHJ should be # using buffers much smaller than the io buffer. @@ -158,13 +188,16 @@ where a.id = b.id and b.id = c.id group by a.int_col 9,8 ---- TYPES INT, BIGINT +---- RUNTIME_PROFILE +# This query is not meant to spill. +row_regex: .*SpilledPartitions: 0 .* ==== ---- QUERY: TPCH-Q21 # Adding TPCH-Q21 in the spilling test to check for IMPALA-1471 (spilling left anti # and left outer joins were returning wrong results). # Q21 - Suppliers Who Kept Orders Waiting Query set num_nodes=0; -set max_block_mgr_memory=100m; +set max_block_mgr_memory=65m; select s_name, count(*) as numwait @@ -308,12 +341,14 @@ limit 100 'Supplier#000002483',12 ---- TYPES string, bigint +---- RUNTIME_PROFILE +# Verify that at least one of the joins was spilled. +row_regex: .*SpilledPartitions: .* \([1-9][0-9]*\) ==== ---- QUERY # Test aggregation spill with group_concat distinct -# TODO: get this to spill. set num_nodes=1; -set max_block_mgr_memory=265m; +set max_block_mgr_memory=100m; select l_orderkey, count(*), group_concat(distinct l_linestatus, '|') from lineitem group by 1 @@ -331,6 +366,9 @@ order by 1 limit 10 34,3,'O' ---- TYPES BIGINT, BIGINT, STRING +---- RUNTIME_PROFILE +# Verify that at least one of the aggs spilled. +row_regex: .*SpilledPartitions: .* \([1-9][0-9]*\) ==== ---- QUERY # Regression test for IMPALA-2612. The following query will cause CastToChar @@ -348,4 +386,203 @@ from lineitem 4502054 ---- TYPES BIGINT +---- RUNTIME_PROFILE +# Verify that the agg spilled. +row_regex: .*SpilledPartitions: .* \([1-9][0-9]*\) +==== +---- QUERY +# Test sort with inlined char column materialized by exprs. +# Set low memory limit to force spilling. +set num_nodes=0; +set max_block_mgr_memory=4m; +# IMPALA-3332: comparator makes local allocations that cause runaway memory consumption. +# When IMPALA-3332 is fixed, can reenable this memory limit. +#set mem_limit=200m; +set disable_outermost_topn=1; +select cast(l_comment as char(50)) +from lineitem +order by 1 +limit 20; +---- RESULTS +' Tiresias ' +' Tiresias ' +' Tiresias ' +' Tiresias ' +' Tiresias ' +' Tiresias ' +' Tiresias ' +' Tiresias ' +' Tiresias ' +' Tiresias ' +' Tiresias ' +' Tiresias ' +' Tiresias about the en ' +' Tiresias about the slyly ironic dinos ca ' +' Tiresias about the slyly unus ' +' Tiresias above ' +' Tiresias above the fox ' +' Tiresias above the furiously final th ' +' Tiresias above the slyly expr ' +' Tiresias above the stealthily p ' +---- TYPES +CHAR +---- RUNTIME_PROFILE +# Verify that the sort actually spilled +row_regex: .*TotalMergesPerformed: .* \([1-9][0-9]*\) +==== +---- QUERY +# Test sort with input inlined char column materialized before sort. +set num_nodes=0; +set mem_limit=200m; +set max_block_mgr_memory=4m; +set disable_outermost_topn=1; +select char_col +from (select cast(l_comment as char(50)) char_col + from lineitem) subquery +order by 1 +limit 20; +---- RESULTS +' Tiresias ' +' Tiresias ' +' Tiresias ' +' Tiresias ' +' Tiresias ' +' Tiresias ' +' Tiresias ' +' Tiresias ' +' Tiresias ' +' Tiresias ' +' Tiresias ' +' Tiresias ' +' Tiresias about the en ' +' Tiresias about the slyly ironic dinos ca ' +' Tiresias about the slyly unus ' +' Tiresias above ' +' Tiresias above the fox ' +' Tiresias above the furiously final th ' +' Tiresias above the slyly expr ' +' Tiresias above the stealthily p ' +---- TYPES +CHAR +---- RUNTIME_PROFILE +# Verify that the sort actually spilled +row_regex: .*TotalMergesPerformed: .* \([1-9][0-9]*\) +==== +---- QUERY +# Test sort with input non-inlined char column materialized before sort. +# Set low memory limit to force spilling. +set num_nodes=0; +set mem_limit=200m; +set max_block_mgr_memory=4m; +set disable_outermost_topn=1; +select char_col +from (select cast(l_comment as char(200)) char_col + from lineitem) subquery +order by 1 +limit 20; +---- RESULTS +' Tiresias ' +' Tiresias ' +' Tiresias ' +' Tiresias ' +' Tiresias ' +' Tiresias ' +' Tiresias ' +' Tiresias ' +' Tiresias ' +' Tiresias ' +' Tiresias ' +' Tiresias ' +' Tiresias about the en ' +' Tiresias about the slyly ironic dinos ca ' +' Tiresias about the slyly unus ' +' Tiresias above ' +' Tiresias above the fox ' +' Tiresias above the furiously final th ' +' Tiresias above the slyly expr ' +' Tiresias above the stealthily p ' +---- TYPES +CHAR +---- RUNTIME_PROFILE +# Verify that the sort actually spilled +row_regex: .*TotalMergesPerformed: .* \([1-9][0-9]*\) +==== +---- QUERY +# Test sort with varchar column materialized by exprs. +# Set low memory limit to force spilling. +set num_nodes=0; +set max_block_mgr_memory=4m; +# IMPALA-3332: comparator makes local allocations that cause runaway memory consumption. +# When IMPALA-3332 is fixed, can reenable this memory limit. +#set mem_limit=200m; +set disable_outermost_topn=1; +select cast(l_comment as varchar(50)) +from lineitem +order by 1 +limit 20; +---- RESULTS +' Tiresias ' +' Tiresias ' +' Tiresias ' +' Tiresias ' +' Tiresias ' +' Tiresias ' +' Tiresias ' +' Tiresias ' +' Tiresias ' +' Tiresias ' +' Tiresias ' +' Tiresias ' +' Tiresias about the en' +' Tiresias about the slyly ironic dinos ca' +' Tiresias about the slyly unus' +' Tiresias above' +' Tiresias above the fox' +' Tiresias above the furiously final th' +' Tiresias above the slyly expr' +' Tiresias above the stealthily p' +---- TYPES +STRING +---- RUNTIME_PROFILE +# Verify that the sort actually spilled +row_regex: .*TotalMergesPerformed: .* \([1-9][0-9]*\) +==== +---- QUERY +# Test sort with input varchar column materialized before sort. +# Set low memory limit to force spilling. +set num_nodes=0; +set mem_limit=200m; +set max_block_mgr_memory=4m; +set disable_outermost_topn=1; +select char_col +from (select cast(l_comment as varchar(50)) char_col + from lineitem) subquery +order by 1 +limit 20; +---- RESULTS +' Tiresias ' +' Tiresias ' +' Tiresias ' +' Tiresias ' +' Tiresias ' +' Tiresias ' +' Tiresias ' +' Tiresias ' +' Tiresias ' +' Tiresias ' +' Tiresias ' +' Tiresias ' +' Tiresias about the en' +' Tiresias about the slyly ironic dinos ca' +' Tiresias about the slyly unus' +' Tiresias above' +' Tiresias above the fox' +' Tiresias above the furiously final th' +' Tiresias above the slyly expr' +' Tiresias above the stealthily p' +---- TYPES +STRING +---- RUNTIME_PROFILE +# Verify that the sort actually spilled +row_regex: .*TotalMergesPerformed: .* \([1-9][0-9]*\) ==== http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/34c95c95/tests/custom_cluster/test_spilling.py ---------------------------------------------------------------------- diff --git a/tests/custom_cluster/test_spilling.py b/tests/custom_cluster/test_spilling.py index 8e17e92..541352f 100644 --- a/tests/custom_cluster/test_spilling.py +++ b/tests/custom_cluster/test_spilling.py @@ -27,8 +27,8 @@ class TestSpillStress(CustomClusterTestSuite): @classmethod def setup_class(cls): - #start impala with args - cls._start_impala_cluster(['--impalad_args=--"read_size=200000"', + # Start with 256KB buffers, to reduce data size required to force spilling. + cls._start_impala_cluster(['--impalad_args=--"read_size=262144"', 'catalogd_args="--load_catalog_in_background=false"']) super(CustomClusterTestSuite, cls).setup_class()
