Repository: incubator-impala Updated Branches: refs/heads/master aa0ee1e10 -> c4f903033
IMPALA-3200: more buffer pool end-to-end tests This adds most of the end-to-end tests described in the test plan. See http://goo.gl/v3Strz. * End-to-end test for disk spill encryption. * Admission control test for the case when acquiring initial reservation fails. * Initial reservation acquire failure test * scratch_limit tests for Join, Agg, Sort, Analytic * Memory usage scaling tests for Join, Agg, Sort, Analytic Also splits out the slow sort queries in test_spilling and moves them to exhaustive so the individual tests run faster and have better parallelism. Testing: Ran all the core tests. Will do a full exhaustive run before committing. Change-Id: I554aa5ddfef4f8e75295596e720a14eee1afa17f Reviewed-on: http://gerrit.cloudera.org:8080/7552 Reviewed-by: Tim Armstrong <tarmstr...@cloudera.com> Tested-by: Impala Public Jenkins Project: http://git-wip-us.apache.org/repos/asf/incubator-impala/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-impala/commit/c4f90303 Tree: http://git-wip-us.apache.org/repos/asf/incubator-impala/tree/c4f90303 Diff: http://git-wip-us.apache.org/repos/asf/incubator-impala/diff/c4f90303 Branch: refs/heads/master Commit: c4f903033ccd1965b937b77b159e4398eb91222e Parents: aa0ee1e Author: Tim Armstrong <tarmstr...@cloudera.com> Authored: Mon Jul 31 21:35:26 2017 -0700 Committer: Impala Public Jenkins <impala-public-jenk...@gerrit.cloudera.org> Committed: Mon Aug 7 00:57:46 2017 +0000 ---------------------------------------------------------------------- .../QueryTest/disk-spill-encryption.test | 15 ++ .../QueryTest/spilling-sorts-exhaustive.test | 195 +++++++++++++++++++ .../queries/QueryTest/spilling.test | 194 ------------------ .../queries/primitive_broadcast_join_3.test | 2 + .../primitive_groupby_bigint_highndv.test | 1 + .../primitive_groupby_decimal_highndv.test | 1 + .../queries/primitive_orderby_all.test | 10 + .../targeted-perf/targeted-perf_core.csv | 1 + .../targeted-perf/targeted-perf_dimensions.csv | 2 +- .../targeted-perf/targeted-perf_exhaustive.csv | 1 + .../custom_cluster/test_admission_controller.py | 32 ++- .../test_disk_spill_encryption.py | 32 +++ tests/query_test/test_mem_usage_scaling.py | 88 ++++++++- tests/query_test/test_scratch_limit.py | 51 +++-- tests/query_test/test_spilling.py | 5 + 15 files changed, 409 insertions(+), 221 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/c4f90303/testdata/workloads/functional-query/queries/QueryTest/disk-spill-encryption.test ---------------------------------------------------------------------- diff --git a/testdata/workloads/functional-query/queries/QueryTest/disk-spill-encryption.test b/testdata/workloads/functional-query/queries/QueryTest/disk-spill-encryption.test new file mode 100644 index 0000000..2c4fede --- /dev/null +++ b/testdata/workloads/functional-query/queries/QueryTest/disk-spill-encryption.test @@ -0,0 +1,15 @@ +==== +---- QUERY +set buffer_pool_limit=10m; +set default_spillable_buffer_size=64k; +select count(*) +from (select distinct o_orderdate, o_custkey, o_comment + from tpch_parquet.orders) v; +---- RESULTS +1500000 +---- TYPES +BIGINT +---- RUNTIME_PROFILE +# Verify that spilling was activated. +row_regex: .*SpilledPartitions: .* \([1-9][0-9]*\) +==== http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/c4f90303/testdata/workloads/functional-query/queries/QueryTest/spilling-sorts-exhaustive.test ---------------------------------------------------------------------- diff --git a/testdata/workloads/functional-query/queries/QueryTest/spilling-sorts-exhaustive.test b/testdata/workloads/functional-query/queries/QueryTest/spilling-sorts-exhaustive.test new file mode 100644 index 0000000..d391884 --- /dev/null +++ b/testdata/workloads/functional-query/queries/QueryTest/spilling-sorts-exhaustive.test @@ -0,0 +1,195 @@ +==== +---- QUERY +# Test sort with small char column materialized by exprs. +# Set low memory limit to force spilling. +# IMPALA-3332: comparator makes local allocations that cause runaway memory consumption. +set buffer_pool_limit=4m; +set mem_limit=200m; +set disable_outermost_topn=1; +select cast(l_comment as char(50)) +from lineitem +order by 1 +limit 20; +---- RESULTS +' Tiresias ' +' Tiresias ' +' Tiresias ' +' Tiresias ' +' Tiresias ' +' Tiresias ' +' Tiresias ' +' Tiresias ' +' Tiresias ' +' Tiresias ' +' Tiresias ' +' Tiresias ' +' Tiresias about the en ' +' Tiresias about the slyly ironic dinos ca ' +' Tiresias about the slyly unus ' +' Tiresias above ' +' Tiresias above the fox ' +' Tiresias above the furiously final th ' +' Tiresias above the slyly expr ' +' Tiresias above the stealthily p ' +---- TYPES +CHAR +---- RUNTIME_PROFILE +# Verify that the sort actually spilled +row_regex: .*SpilledRuns: .* \([1-9][0-9]*\) +row_regex: .*TotalMergesPerformed: .* \([1-9][0-9]*\) +==== +---- QUERY +# Test sort with small input char column materialized before sort. +set mem_limit=200m; +set buffer_pool_limit=4m; +set disable_outermost_topn=1; +select char_col +from (select cast(l_comment as char(50)) char_col + from lineitem) subquery +order by 1 +limit 20; +---- RESULTS +' Tiresias ' +' Tiresias ' +' Tiresias ' +' Tiresias ' +' Tiresias ' +' Tiresias ' +' Tiresias ' +' Tiresias ' +' Tiresias ' +' Tiresias ' +' Tiresias ' +' Tiresias ' +' Tiresias about the en ' +' Tiresias about the slyly ironic dinos ca ' +' Tiresias about the slyly unus ' +' Tiresias above ' +' Tiresias above the fox ' +' Tiresias above the furiously final th ' +' Tiresias above the slyly expr ' +' Tiresias above the stealthily p ' +---- TYPES +CHAR +---- RUNTIME_PROFILE +# Verify that the sort actually spilled +row_regex: .*SpilledRuns: .* \([1-9][0-9]*\) +row_regex: .*TotalMergesPerformed: .* \([1-9][0-9]*\) +==== +---- QUERY +# Test sort with large input char column materialized before sort. +# Set low memory limit to force spilling. +set mem_limit=200m; +set buffer_pool_limit=4m; +set disable_outermost_topn=1; +select char_col +from (select cast(l_comment as char(200)) char_col + from lineitem) subquery +order by 1 +limit 20; +---- RESULTS +' Tiresias ' +' Tiresias ' +' Tiresias ' +' Tiresias ' +' Tiresias ' +' Tiresias ' +' Tiresias ' +' Tiresias ' +' Tiresias ' +' Tiresias ' +' Tiresias ' +' Tiresias ' +' Tiresias about the en ' +' Tiresias about the slyly ironic dinos ca ' +' Tiresias about the slyly unus ' +' Tiresias above ' +' Tiresias above the fox ' +' Tiresias above the furiously final th ' +' Tiresias above the slyly expr ' +' Tiresias above the stealthily p ' +---- TYPES +CHAR +---- RUNTIME_PROFILE +# Verify that the sort actually spilled +row_regex: .*SpilledRuns: .* \([1-9][0-9]*\) +row_regex: .*TotalMergesPerformed: .* \([1-9][0-9]*\) +==== +---- QUERY +# Test sort with varchar column materialized by exprs. +# Set low memory limit to force spilling. +set buffer_pool_limit=4m; +# IMPALA-3332: comparator makes local allocations that cause runaway memory consumption. +set mem_limit=200m; +set disable_outermost_topn=1; +select cast(l_comment as varchar(50)) +from lineitem +order by 1 +limit 20; +---- RESULTS +' Tiresias ' +' Tiresias ' +' Tiresias ' +' Tiresias ' +' Tiresias ' +' Tiresias ' +' Tiresias ' +' Tiresias ' +' Tiresias ' +' Tiresias ' +' Tiresias ' +' Tiresias ' +' Tiresias about the en' +' Tiresias about the slyly ironic dinos ca' +' Tiresias about the slyly unus' +' Tiresias above' +' Tiresias above the fox' +' Tiresias above the furiously final th' +' Tiresias above the slyly expr' +' Tiresias above the stealthily p' +---- TYPES +STRING +---- RUNTIME_PROFILE +# Verify that the sort actually spilled +row_regex: .*SpilledRuns: .* \([1-9][0-9]*\) +row_regex: .*TotalMergesPerformed: .* \([1-9][0-9]*\) +==== +---- QUERY +# Test sort with input varchar column materialized before sort. +# Set low memory limit to force spilling. +set mem_limit=200m; +set buffer_pool_limit=4m; +set disable_outermost_topn=1; +select char_col +from (select cast(l_comment as varchar(50)) char_col + from lineitem) subquery +order by 1 +limit 20; +---- RESULTS +' Tiresias ' +' Tiresias ' +' Tiresias ' +' Tiresias ' +' Tiresias ' +' Tiresias ' +' Tiresias ' +' Tiresias ' +' Tiresias ' +' Tiresias ' +' Tiresias ' +' Tiresias ' +' Tiresias about the en' +' Tiresias about the slyly ironic dinos ca' +' Tiresias about the slyly unus' +' Tiresias above' +' Tiresias above the fox' +' Tiresias above the furiously final th' +' Tiresias above the slyly expr' +' Tiresias above the stealthily p' +---- TYPES +STRING +---- RUNTIME_PROFILE +# Verify that the sort actually spilled +row_regex: .*SpilledRuns: .* \([1-9][0-9]*\) +row_regex: .*TotalMergesPerformed: .* \([1-9][0-9]*\) + http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/c4f90303/testdata/workloads/functional-query/queries/QueryTest/spilling.test ---------------------------------------------------------------------- diff --git a/testdata/workloads/functional-query/queries/QueryTest/spilling.test b/testdata/workloads/functional-query/queries/QueryTest/spilling.test index d8335c6..2f81a9a 100644 --- a/testdata/workloads/functional-query/queries/QueryTest/spilling.test +++ b/testdata/workloads/functional-query/queries/QueryTest/spilling.test @@ -376,200 +376,6 @@ BIGINT row_regex: .*SpilledPartitions: .* \([1-9][0-9]*\) ==== ---- QUERY -# Test sort with small char column materialized by exprs. -# Set low memory limit to force spilling. -# IMPALA-3332: comparator makes local allocations that cause runaway memory consumption. -set buffer_pool_limit=4m; -set mem_limit=200m; -set disable_outermost_topn=1; -select cast(l_comment as char(50)) -from lineitem -order by 1 -limit 20; ----- RESULTS -' Tiresias ' -' Tiresias ' -' Tiresias ' -' Tiresias ' -' Tiresias ' -' Tiresias ' -' Tiresias ' -' Tiresias ' -' Tiresias ' -' Tiresias ' -' Tiresias ' -' Tiresias ' -' Tiresias about the en ' -' Tiresias about the slyly ironic dinos ca ' -' Tiresias about the slyly unus ' -' Tiresias above ' -' Tiresias above the fox ' -' Tiresias above the furiously final th ' -' Tiresias above the slyly expr ' -' Tiresias above the stealthily p ' ----- TYPES -CHAR ----- RUNTIME_PROFILE -# Verify that the sort actually spilled -row_regex: .*SpilledRuns: .* \([1-9][0-9]*\) -row_regex: .*TotalMergesPerformed: .* \([1-9][0-9]*\) -==== ----- QUERY -# Test sort with small input char column materialized before sort. -set mem_limit=200m; -set buffer_pool_limit=4m; -set disable_outermost_topn=1; -select char_col -from (select cast(l_comment as char(50)) char_col - from lineitem) subquery -order by 1 -limit 20; ----- RESULTS -' Tiresias ' -' Tiresias ' -' Tiresias ' -' Tiresias ' -' Tiresias ' -' Tiresias ' -' Tiresias ' -' Tiresias ' -' Tiresias ' -' Tiresias ' -' Tiresias ' -' Tiresias ' -' Tiresias about the en ' -' Tiresias about the slyly ironic dinos ca ' -' Tiresias about the slyly unus ' -' Tiresias above ' -' Tiresias above the fox ' -' Tiresias above the furiously final th ' -' Tiresias above the slyly expr ' -' Tiresias above the stealthily p ' ----- TYPES -CHAR ----- RUNTIME_PROFILE -# Verify that the sort actually spilled -row_regex: .*SpilledRuns: .* \([1-9][0-9]*\) -row_regex: .*TotalMergesPerformed: .* \([1-9][0-9]*\) -==== ----- QUERY -# Test sort with large input char column materialized before sort. -# Set low memory limit to force spilling. -set mem_limit=200m; -set buffer_pool_limit=4m; -set disable_outermost_topn=1; -select char_col -from (select cast(l_comment as char(200)) char_col - from lineitem) subquery -order by 1 -limit 20; ----- RESULTS -' Tiresias ' -' Tiresias ' -' Tiresias ' -' Tiresias ' -' Tiresias ' -' Tiresias ' -' Tiresias ' -' Tiresias ' -' Tiresias ' -' Tiresias ' -' Tiresias ' -' Tiresias ' -' Tiresias about the en ' -' Tiresias about the slyly ironic dinos ca ' -' Tiresias about the slyly unus ' -' Tiresias above ' -' Tiresias above the fox ' -' Tiresias above the furiously final th ' -' Tiresias above the slyly expr ' -' Tiresias above the stealthily p ' ----- TYPES -CHAR ----- RUNTIME_PROFILE -# Verify that the sort actually spilled -row_regex: .*SpilledRuns: .* \([1-9][0-9]*\) -row_regex: .*TotalMergesPerformed: .* \([1-9][0-9]*\) -==== ----- QUERY -# Test sort with varchar column materialized by exprs. -# Set low memory limit to force spilling. -set buffer_pool_limit=4m; -# IMPALA-3332: comparator makes local allocations that cause runaway memory consumption. -set mem_limit=200m; -set disable_outermost_topn=1; -select cast(l_comment as varchar(50)) -from lineitem -order by 1 -limit 20; ----- RESULTS -' Tiresias ' -' Tiresias ' -' Tiresias ' -' Tiresias ' -' Tiresias ' -' Tiresias ' -' Tiresias ' -' Tiresias ' -' Tiresias ' -' Tiresias ' -' Tiresias ' -' Tiresias ' -' Tiresias about the en' -' Tiresias about the slyly ironic dinos ca' -' Tiresias about the slyly unus' -' Tiresias above' -' Tiresias above the fox' -' Tiresias above the furiously final th' -' Tiresias above the slyly expr' -' Tiresias above the stealthily p' ----- TYPES -STRING ----- RUNTIME_PROFILE -# Verify that the sort actually spilled -row_regex: .*SpilledRuns: .* \([1-9][0-9]*\) -row_regex: .*TotalMergesPerformed: .* \([1-9][0-9]*\) -==== ----- QUERY -# Test sort with input varchar column materialized before sort. -# Set low memory limit to force spilling. -set mem_limit=200m; -set buffer_pool_limit=4m; -set disable_outermost_topn=1; -select char_col -from (select cast(l_comment as varchar(50)) char_col - from lineitem) subquery -order by 1 -limit 20; ----- RESULTS -' Tiresias ' -' Tiresias ' -' Tiresias ' -' Tiresias ' -' Tiresias ' -' Tiresias ' -' Tiresias ' -' Tiresias ' -' Tiresias ' -' Tiresias ' -' Tiresias ' -' Tiresias ' -' Tiresias about the en' -' Tiresias about the slyly ironic dinos ca' -' Tiresias about the slyly unus' -' Tiresias above' -' Tiresias above the fox' -' Tiresias above the furiously final th' -' Tiresias above the slyly expr' -' Tiresias above the stealthily p' ----- TYPES -STRING ----- RUNTIME_PROFILE -# Verify that the sort actually spilled -row_regex: .*SpilledRuns: .* \([1-9][0-9]*\) -row_regex: .*TotalMergesPerformed: .* \([1-9][0-9]*\) -==== ----- QUERY # IMPALA-1346/IMPALA-1546: fix sorter memory management so that it can complete # successfully when in same pipeline as a spilling join. set buffer_pool_limit=50m; http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/c4f90303/testdata/workloads/targeted-perf/queries/primitive_broadcast_join_3.test ---------------------------------------------------------------------- diff --git a/testdata/workloads/targeted-perf/queries/primitive_broadcast_join_3.test b/testdata/workloads/targeted-perf/queries/primitive_broadcast_join_3.test index fb82e49..a5c1716 100644 --- a/testdata/workloads/targeted-perf/queries/primitive_broadcast_join_3.test +++ b/testdata/workloads/targeted-perf/queries/primitive_broadcast_join_3.test @@ -13,5 +13,7 @@ WHERE s_name='Supplier#001880004' AND p_brand < 'Brand#30' AND o_orderdate < '1994-01-01'; ---- RESULTS +0 ---- TYPES +BIGINT ==== http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/c4f90303/testdata/workloads/targeted-perf/queries/primitive_groupby_bigint_highndv.test ---------------------------------------------------------------------- diff --git a/testdata/workloads/targeted-perf/queries/primitive_groupby_bigint_highndv.test b/testdata/workloads/targeted-perf/queries/primitive_groupby_bigint_highndv.test index 9af70f8..2830837 100644 --- a/testdata/workloads/targeted-perf/queries/primitive_groupby_bigint_highndv.test +++ b/testdata/workloads/targeted-perf/queries/primitive_groupby_bigint_highndv.test @@ -9,4 +9,5 @@ GROUP BY l_orderkey HAVING cnt > 9999999999999; ---- RESULTS ---- TYPES +BIGINT,BIGINT ==== http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/c4f90303/testdata/workloads/targeted-perf/queries/primitive_groupby_decimal_highndv.test ---------------------------------------------------------------------- diff --git a/testdata/workloads/targeted-perf/queries/primitive_groupby_decimal_highndv.test b/testdata/workloads/targeted-perf/queries/primitive_groupby_decimal_highndv.test index 8945b61..cfb976a 100644 --- a/testdata/workloads/targeted-perf/queries/primitive_groupby_decimal_highndv.test +++ b/testdata/workloads/targeted-perf/queries/primitive_groupby_decimal_highndv.test @@ -10,4 +10,5 @@ GROUP BY l_extendedprice HAVING cnt > 9999999999999; ---- RESULTS ---- TYPES +DECIMAL,BIGINT ==== http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/c4f90303/testdata/workloads/targeted-perf/queries/primitive_orderby_all.test ---------------------------------------------------------------------- diff --git a/testdata/workloads/targeted-perf/queries/primitive_orderby_all.test b/testdata/workloads/targeted-perf/queries/primitive_orderby_all.test index ae959df..76c9355 100644 --- a/testdata/workloads/targeted-perf/queries/primitive_orderby_all.test +++ b/testdata/workloads/targeted-perf/queries/primitive_orderby_all.test @@ -28,5 +28,15 @@ FROM ( ) a WHERE rank < 10; ---- RESULTS +1 +2 +3 +4 +5 +6 +7 +8 +9 ---- TYPES +BIGINT ==== http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/c4f90303/testdata/workloads/targeted-perf/targeted-perf_core.csv ---------------------------------------------------------------------- diff --git a/testdata/workloads/targeted-perf/targeted-perf_core.csv b/testdata/workloads/targeted-perf/targeted-perf_core.csv index dcf4bc6..6fd782a 100644 --- a/testdata/workloads/targeted-perf/targeted-perf_core.csv +++ b/testdata/workloads/targeted-perf/targeted-perf_core.csv @@ -5,3 +5,4 @@ file_format:seq, dataset:tpch, compression_codec:snap, compression_type:block file_format:rc, dataset:tpch, compression_codec:none, compression_type:none file_format:avro, dataset:tpch, compression_codec: none, compression_type: none file_format:avro, dataset:tpch, compression_codec: snap, compression_type: block +file_format:parquet, dataset:tpch, compression_codec: none, compression_type: none http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/c4f90303/testdata/workloads/targeted-perf/targeted-perf_dimensions.csv ---------------------------------------------------------------------- diff --git a/testdata/workloads/targeted-perf/targeted-perf_dimensions.csv b/testdata/workloads/targeted-perf/targeted-perf_dimensions.csv index 705d48e..1de34aa 100644 --- a/testdata/workloads/targeted-perf/targeted-perf_dimensions.csv +++ b/testdata/workloads/targeted-perf/targeted-perf_dimensions.csv @@ -1,4 +1,4 @@ -file_format: text,seq +file_format: text,seq,rc,avro,parquet,kudu dataset: tpch compression_codec: none,def,gzip,bzip,snap,lzo compression_type: none,block,record http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/c4f90303/testdata/workloads/targeted-perf/targeted-perf_exhaustive.csv ---------------------------------------------------------------------- diff --git a/testdata/workloads/targeted-perf/targeted-perf_exhaustive.csv b/testdata/workloads/targeted-perf/targeted-perf_exhaustive.csv index 8ef6907..098bb65 100644 --- a/testdata/workloads/targeted-perf/targeted-perf_exhaustive.csv +++ b/testdata/workloads/targeted-perf/targeted-perf_exhaustive.csv @@ -10,3 +10,4 @@ file_format: seq, dataset: tpch, compression_codec: bzip, compression_type: bloc file_format: seq, dataset: tpch, compression_codec: bzip, compression_type: record file_format: seq, dataset: tpch, compression_codec: snap, compression_type: block file_format: seq, dataset: tpch, compression_codec: snap, compression_type: record +file_format: parquet, dataset: tpch, compression_codec: none, compression_type: none http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/c4f90303/tests/custom_cluster/test_admission_controller.py ---------------------------------------------------------------------- diff --git a/tests/custom_cluster/test_admission_controller.py b/tests/custom_cluster/test_admission_controller.py index b9ae427..250298b 100644 --- a/tests/custom_cluster/test_admission_controller.py +++ b/tests/custom_cluster/test_admission_controller.py @@ -297,8 +297,8 @@ class TestAdmissionController(TestAdmissionControllerBase, HS2TestSuite): @pytest.mark.execute_serially @CustomClusterTestSuite.with_args( - impalad_args=impalad_admission_ctrl_flags(1, 1, 10 * 1024 * 1024, - 1024 * 1024 * 1024), + impalad_args=impalad_admission_ctrl_flags(max_requests=1, max_queued=1, + pool_max_mem=10 * 1024 * 1024, proc_mem_limit=1024 * 1024 * 1024), statestored_args=_STATESTORED_ARGS) def test_trivial_coord_query_limits(self): """Tests that trivial coordinator only queries have negligible resource requirements. @@ -316,13 +316,28 @@ class TestAdmissionController(TestAdmissionControllerBase, HS2TestSuite): assert re.search("Rejected query from pool default-pool : request memory needed " ".* is greater than pool max mem resources 10.00 MB", str(ex)) + @pytest.mark.execute_serially + @CustomClusterTestSuite.with_args( + impalad_args=impalad_admission_ctrl_flags(max_requests=1, max_queued=1, + pool_max_mem=10 * 1024 * 1024, proc_mem_limit=1024 * 1024 * 1024), + statestored_args=_STATESTORED_ARGS) + def test_initial_reservation(self): + """Test behaviour with admission control enabled if the initial reservation cannot be + acquired. The query options are set so that the query will be admitted, but acquiring + the initial reservation will fail because it is larger than mem_limit. + """ + query = "select distinct * from functional_parquet.alltypesagg" + opts = {'mem_limit': '10MB', 'num_nodes': '1'} + ex = self.execute_query_expect_failure(self.client, query, opts) + assert "Failed to get minimum memory reservation" in str(ex) + # Process mem_limit used in test_mem_limit_upper_bound PROC_MEM_TEST_LIMIT = 1024 * 1024 * 1024 @pytest.mark.execute_serially @CustomClusterTestSuite.with_args( - impalad_args=impalad_admission_ctrl_flags(1, 1, 10 * PROC_MEM_TEST_LIMIT, - PROC_MEM_TEST_LIMIT)) + impalad_args=impalad_admission_ctrl_flags(max_requests=1, max_queued=1, + pool_max_mem=10 * PROC_MEM_TEST_LIMIT, proc_mem_limit=PROC_MEM_TEST_LIMIT)) def test_mem_limit_upper_bound(self, vector): """ Test to ensure that a query is admitted if the requested memory is equal to the process mem limit""" @@ -754,8 +769,8 @@ class TestAdmissionControllerStress(TestAdmissionControllerBase): @pytest.mark.execute_serially @CustomClusterTestSuite.with_args( - impalad_args=impalad_admission_ctrl_flags(MAX_NUM_CONCURRENT_QUERIES, - MAX_NUM_QUEUED_QUERIES, -1), + impalad_args=impalad_admission_ctrl_flags(max_requests=MAX_NUM_CONCURRENT_QUERIES, + max_queued=MAX_NUM_QUEUED_QUERIES, pool_max_mem=-1), statestored_args=_STATESTORED_ARGS) def test_admission_controller_with_flags(self, vector): self.pool_name = 'default-pool' @@ -786,8 +801,9 @@ class TestAdmissionControllerStress(TestAdmissionControllerBase): @pytest.mark.execute_serially @CustomClusterTestSuite.with_args( - impalad_args=impalad_admission_ctrl_flags(MAX_NUM_CONCURRENT_QUERIES * 30, - MAX_NUM_QUEUED_QUERIES, MEM_TEST_LIMIT, MEM_TEST_LIMIT), + impalad_args=impalad_admission_ctrl_flags( + max_requests=MAX_NUM_CONCURRENT_QUERIES * 30, max_queued=MAX_NUM_QUEUED_QUERIES, + pool_max_mem=MEM_TEST_LIMIT, proc_mem_limit=MEM_TEST_LIMIT), statestored_args=_STATESTORED_ARGS) def test_mem_limit(self, vector): # Impala may set the proc mem limit lower than we think depending on the overcommit http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/c4f90303/tests/custom_cluster/test_disk_spill_encryption.py ---------------------------------------------------------------------- diff --git a/tests/custom_cluster/test_disk_spill_encryption.py b/tests/custom_cluster/test_disk_spill_encryption.py new file mode 100644 index 0000000..c9a5aeb --- /dev/null +++ b/tests/custom_cluster/test_disk_spill_encryption.py @@ -0,0 +1,32 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import pytest + +from tests.common.custom_cluster_test_suite import CustomClusterTestSuite + +class TestDiskSpillEncryption(CustomClusterTestSuite): + """ Tests to exercise disk spill encryption end-to-end. """ + + @classmethod + def get_workload(self): + return 'functional-query' + + @pytest.mark.execute_serially + @CustomClusterTestSuite.with_args("--disk_spill_encryption=true") + def test_spilling_query(self, vector): + self.run_test_case('QueryTest/disk-spill-encryption', vector) http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/c4f90303/tests/query_test/test_mem_usage_scaling.py ---------------------------------------------------------------------- diff --git a/tests/query_test/test_mem_usage_scaling.py b/tests/query_test/test_mem_usage_scaling.py index bbdc771..eac95e6 100644 --- a/tests/query_test/test_mem_usage_scaling.py +++ b/tests/query_test/test_mem_usage_scaling.py @@ -24,6 +24,11 @@ from tests.common.skip import SkipIfLocal from tests.common.test_dimensions import create_single_exec_option_dimension from tests.common.test_vector import ImpalaTestDimension +# Substrings of the expected error messages when the mem limit is too low +MEM_LIMIT_EXCEEDED_MSG = "Memory limit exceeded" +INITIAL_RESERVATION_MSG = "Failed to get minimum memory reservation" +MEM_LIMIT_ERROR_MSGS = [MEM_LIMIT_EXCEEDED_MSG, INITIAL_RESERVATION_MSG] + class TestQueryMemLimitScaling(ImpalaTestSuite): """Test class to do functional validation of per query memory limits. """ QUERY = ["select * from lineitem where l_orderkey = -1", @@ -35,6 +40,8 @@ class TestQueryMemLimitScaling(ImpalaTestSuite): @classmethod def get_workload(self): + # Note: this workload doesn't run exhaustively. See IMPALA-3947 before trying to move + # this test to exhaustive. return 'tpch' @classmethod @@ -58,9 +65,12 @@ class TestQueryMemLimitScaling(ImpalaTestSuite): for query in self.QUERY: self.execute_query(query, exec_options, table_format=table_format) + class TestExprMemUsage(ImpalaTestSuite): @classmethod def get_workload(cls): + # Note: this workload doesn't run exhaustively. See IMPALA-3947 before trying to move + # this test to exhaustive. return 'tpch' @classmethod @@ -80,10 +90,32 @@ class TestExprMemUsage(ImpalaTestSuite): table_format=vector.get_value('table_format')) +class TestInitialReservation(ImpalaTestSuite): + @classmethod + def get_workload(self): + # Note: this workload doesn't run exhaustively. See IMPALA-3947 before trying to move + # this test to exhaustive. + return 'tpch' + + @classmethod + def add_test_dimensions(cls): + super(TestInitialReservation, cls).add_test_dimensions() + cls.ImpalaTestMatrix.add_dimension(create_single_exec_option_dimension()) + cls.ImpalaTestMatrix.add_constraint(lambda v:\ + v.get_value('table_format').file_format in ['parquet']) + + def test_initial_reservation(self, vector): + """Test failure to get the initial reservation.""" + exec_options = copy(vector.get_value('exec_option')) + exec_options['mem_limit'] = '20m' + query = """select * from tpch_parquet.lineitem l1 + join tpch_parquet.lineitem l2 on l1.l_orderkey = l2.l_orderkey""" + result = self.execute_query_expect_failure(self.client, query, exec_options) + assert (INITIAL_RESERVATION_MSG in str(result)) + + class TestLowMemoryLimits(ImpalaTestSuite): '''Super class for the memory limit tests with the TPC-H and TPC-DS queries''' - EXPECTED_ERROR_MSGS = ["Memory limit exceeded", - "Failed to get minimum memory reservation"] def low_memory_limit_test(self, vector, tpch_query, limit, xfail_mem_limit=None): mem = vector.get_value('mem_limit') @@ -104,7 +136,7 @@ class TestLowMemoryLimits(ImpalaTestSuite): except ImpalaBeeswaxException as e: if not expects_error and not xfail_mem_limit: raise found_expected_error = False - for error_msg in TestLowMemoryLimits.EXPECTED_ERROR_MSGS: + for error_msg in MEM_LIMIT_ERROR_MSGS: if error_msg in str(e): found_expected_error = True assert found_expected_error, str(e) if not expects_error and xfail_mem_limit: @@ -112,7 +144,6 @@ class TestLowMemoryLimits(ImpalaTestSuite): class TestTpchMemLimitError(TestLowMemoryLimits): - # TODO: consider moving this test to exhaustive. # The mem limits that will be used. MEM_IN_MB = [20, 140, 180, 220, 275, 450, 700] @@ -127,6 +158,8 @@ class TestTpchMemLimitError(TestLowMemoryLimits): @classmethod def get_workload(self): + # Note: this workload doesn't run exhaustively. See IMPALA-3947 before trying to move + # this test to exhaustive. return 'tpch' @classmethod @@ -209,6 +242,51 @@ class TestTpchMemLimitError(TestLowMemoryLimits): self.low_memory_limit_test(vector, 'tpch-q22', self.MIN_MEM_FOR_TPCH['Q22']) +class TestTpchPrimitivesMemLimitError(TestLowMemoryLimits): + """ + Memory usage tests using targeted-perf queries to exercise specific operators. + """ + + # The mem limits that will be used. + MEM_IN_MB = [20, 100, 120, 200] + + # Different values of mem limits and minimum mem limit (in MBs) each query is expected + # to run without problem. Determined by manual binary search. + MIN_MEM = { 'primitive_broadcast_join_3': 115, 'primitive_groupby_bigint_highndv': 110, + 'primitive_orderby_all': 120} + + @classmethod + def get_workload(self): + # Note: this workload doesn't run exhaustively. See IMPALA-3947 before trying to move + # this test to exhaustive. + return 'targeted-perf' + + @classmethod + def add_test_dimensions(cls): + super(TestTpchPrimitivesMemLimitError, cls).add_test_dimensions() + + cls.ImpalaTestMatrix.add_dimension( + ImpalaTestDimension('mem_limit', *cls.MEM_IN_MB)) + + cls.ImpalaTestMatrix.add_constraint(lambda v:\ + v.get_value('table_format').file_format in ['parquet']) + + def run_primitive_query(self, vector, query_name): + self.low_memory_limit_test(vector, query_name, self.MIN_MEM[query_name]) + + def test_low_mem_limit_broadcast_join_3(self, vector): + """Test hash join memory requirements.""" + self.run_primitive_query(vector, 'primitive_broadcast_join_3') + + def test_low_mem_limit_groupby_bigint_highndv(self, vector): + """Test grouping aggregation memory requirements.""" + self.run_primitive_query(vector, 'primitive_groupby_bigint_highndv') + + def test_low_mem_limit_orderby_all(self, vector): + """Test sort and analytic memory requirements.""" + self.run_primitive_query(vector, 'primitive_orderby_all') + + class TestTpcdsMemLimitError(TestLowMemoryLimits): # The mem limits that will be used. MEM_IN_MB = [20, 100, 116, 150] @@ -219,6 +297,8 @@ class TestTpcdsMemLimitError(TestLowMemoryLimits): @classmethod def get_workload(self): + # Note: this workload doesn't run exhaustively. See IMPALA-3947 before trying to move + # this test to exhaustive. return 'tpcds' @classmethod http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/c4f90303/tests/query_test/test_scratch_limit.py ---------------------------------------------------------------------- diff --git a/tests/query_test/test_scratch_limit.py b/tests/query_test/test_scratch_limit.py index 6e19bb5..a2ccad9 100644 --- a/tests/query_test/test_scratch_limit.py +++ b/tests/query_test/test_scratch_limit.py @@ -20,14 +20,35 @@ class TestScratchLimit(ImpalaTestSuite): This class tests the functionality of setting the scratch limit as a query option """ - spill_query = """ + spilling_sort_query = """ select o_orderdate, o_custkey, o_comment from tpch.orders order by o_orderdate """ + spilling_agg_query = """ + select count(*) from ( + select distinct o_orderdate, o_custkey, o_comment + from tpch_parquet.orders) v; + """ + spilling_join_query = """ + select count(*) + from tpch_parquet.lineitem join tpch_parquet.orders on l_orderkey = o_orderkey + """ + # The analytic function in this query spills, but the sort generally spills first. + # Ideally we would have a spilling analytic without a sort to exercise it in isolation. + spilling_analytic_query = """ + SELECT i_item_sk, i_current_price, + SUM (i_current_price) + OVER (ORDER BY i_item_sk rows between 500000 preceding and 500000 following) running_total + FROM tpcds_parquet.item + ORDER BY i_brand, i_item_sk; + """ + + spilling_queries = [spilling_sort_query, spilling_agg_query, spilling_join_query, + spilling_analytic_query] # Block manager memory limit that is low enough to - # force Impala to spill to disk when executing 'spill_query' + # force Impala to spill to disk when executing 'spilling_sort_query' buffer_pool_limit = "64m" @classmethod @@ -44,17 +65,17 @@ class TestScratchLimit(ImpalaTestSuite): def test_with_high_scratch_limit(self, vector): """ - Query runs to completion with a scratch limit well above + Sort query runs to completion with a scratch limit well above its required scratch space which in this case is 128m. """ exec_option = vector.get_value('exec_option') exec_option['buffer_pool_limit'] = self.buffer_pool_limit exec_option['scratch_limit'] = '500m' - self.execute_query_expect_success(self.client, self.spill_query, exec_option) + self.execute_query_expect_success(self.client, self.spilling_sort_query, exec_option) def test_with_low_scratch_limit(self, vector): """ - Query throws the appropriate exception with a scratch limit well below + Sort query throws the appropriate exception with a scratch limit well below its required scratch space which in this case is 128m. """ exec_option = vector.get_value('exec_option') @@ -63,43 +84,45 @@ class TestScratchLimit(ImpalaTestSuite): expected_error = 'Scratch space limit of %s bytes exceeded' scratch_limit_in_bytes = 24 * 1024 * 1024 try: - self.execute_query(self.spill_query, exec_option) + self.execute_query(self.spilling_sort_query, exec_option) assert False, "Query was expected to fail" except ImpalaBeeswaxException as e: assert expected_error % scratch_limit_in_bytes in str(e) def test_with_zero_scratch_limit(self, vector): """ - Query throws the appropriate exception with a scratch limit of + Queries throws the appropriate exception with a scratch limit of zero which means no scratch space can be allocated. """ exec_option = vector.get_value('exec_option') exec_option['buffer_pool_limit'] = self.buffer_pool_limit exec_option['scratch_limit'] = '0' - self.execute_query_expect_failure(self.spill_query, exec_option) + for query in self.spilling_queries: + self.execute_query_expect_failure(query, exec_option) def test_with_unlimited_scratch_limit(self, vector): """ - Query runs to completion with a scratch Limit of -1 means default/no limit. + Sort query runs to completion with a scratch Limit of -1 means default/no limit. """ exec_option = vector.get_value('exec_option') exec_option['buffer_pool_limit'] = self.buffer_pool_limit exec_option['scratch_limit'] = '-1' - self.execute_query_expect_success(self.client, self.spill_query, exec_option) + self.execute_query_expect_success(self.client, self.spilling_sort_query, exec_option) def test_without_specifying_scratch_limit(self, vector): """ - Query runs to completion with the default setting of no scratch limit. + Sort query runs to completion with the default setting of no scratch limit. """ exec_option = vector.get_value('exec_option') exec_option['buffer_pool_limit'] = self.buffer_pool_limit - self.execute_query_expect_success(self.client, self.spill_query, exec_option) + self.execute_query_expect_success(self.client, self.spilling_sort_query, exec_option) def test_with_zero_scratch_limit_no_memory_limit(self, vector): """ - Query runs to completion without spilling as there is no limit on block memory manger. + Queries run to completion without spilling as there is no limit on block memory manger. Scratch limit of zero ensures spilling is disabled. """ exec_option = vector.get_value('exec_option') exec_option['scratch_limit'] = '0' - self.execute_query_expect_success(self.client, self.spill_query, exec_option) + for query in self.spilling_queries: + self.execute_query_expect_success(self.client, query, exec_option) http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/c4f90303/tests/query_test/test_spilling.py ---------------------------------------------------------------------- diff --git a/tests/query_test/test_spilling.py b/tests/query_test/test_spilling.py index d029f44..ce6f446 100644 --- a/tests/query_test/test_spilling.py +++ b/tests/query_test/test_spilling.py @@ -48,3 +48,8 @@ class TestSpilling(ImpalaTestSuite): def test_spilling(self, vector): self.run_test_case('QueryTest/spilling', vector) + + def test_spilling_sorts_exhaustive(self, vector): + if self.exploration_strategy() != 'exhaustive': + pytest.skip("only run large sorts on exhaustive") + self.run_test_case('QueryTest/spilling-sorts-exhaustive', vector)