Repository: impala Updated Branches: refs/heads/2.x 0ce9056a1 -> bee907c5f
IMPALA-6645: Enable disk spill encryption by default Perf: Targeted benchmarks with a heavily spilling query on a machine with PCLMULQDQ support show < 5% of CPU time spent in encryption and decryption. PCLMULQDQ was introduced in AMD Bulldozer (c. 2011) and Intel Westmere (c. 2010). Testing: Ran core tests with the change. Updated the custom cluster test to exercise the non-default configuration. Change-Id: Iee4be2a95d689f66c3663d99e4df0fb3968893a9 Reviewed-on: http://gerrit.cloudera.org:8080/10345 Reviewed-by: Tim Armstrong <[email protected]> Tested-by: Tim Armstrong <[email protected]> Project: http://git-wip-us.apache.org/repos/asf/impala/repo Commit: http://git-wip-us.apache.org/repos/asf/impala/commit/e4fcc31c Tree: http://git-wip-us.apache.org/repos/asf/impala/tree/e4fcc31c Diff: http://git-wip-us.apache.org/repos/asf/impala/diff/e4fcc31c Branch: refs/heads/2.x Commit: e4fcc31cf1385fa99c06a04c0c4edecfbf5dbd95 Parents: 0ce9056 Author: Tim Armstrong <[email protected]> Authored: Wed Apr 11 11:00:35 2018 -0700 Committer: Impala Public Jenkins <[email protected]> Committed: Wed May 16 00:53:41 2018 +0000 ---------------------------------------------------------------------- be/src/runtime/tmp-file-mgr.cc | 2 +- .../queries/QueryTest/basic-spilling.test | 16 +++++++++ .../QueryTest/disk-spill-encryption.test | 15 --------- .../test_disk_spill_configurations.py | 34 ++++++++++++++++++++ .../test_disk_spill_encryption.py | 32 ------------------ 5 files changed, 51 insertions(+), 48 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/impala/blob/e4fcc31c/be/src/runtime/tmp-file-mgr.cc ---------------------------------------------------------------------- diff --git a/be/src/runtime/tmp-file-mgr.cc b/be/src/runtime/tmp-file-mgr.cc index 04e15d4..b995518 100644 --- a/be/src/runtime/tmp-file-mgr.cc +++ b/be/src/runtime/tmp-file-mgr.cc @@ -38,7 +38,7 @@ #include "common/names.h" -DEFINE_bool(disk_spill_encryption, false, +DEFINE_bool(disk_spill_encryption, true, "Set this to encrypt and perform an integrity " "check on all data spilled to disk during a query"); DEFINE_string(scratch_dirs, "/tmp", "Writable scratch directories"); http://git-wip-us.apache.org/repos/asf/impala/blob/e4fcc31c/testdata/workloads/functional-query/queries/QueryTest/basic-spilling.test ---------------------------------------------------------------------- diff --git a/testdata/workloads/functional-query/queries/QueryTest/basic-spilling.test b/testdata/workloads/functional-query/queries/QueryTest/basic-spilling.test new file mode 100644 index 0000000..513ba2c --- /dev/null +++ b/testdata/workloads/functional-query/queries/QueryTest/basic-spilling.test @@ -0,0 +1,16 @@ +==== +---- QUERY +# A basic spilling query to exercise spill-to-disk end-to-end. +set buffer_pool_limit=90m; +set default_spillable_buffer_size=64k; +select count(*) +from (select distinct o_orderdate, o_custkey, o_comment + from tpch_parquet.orders) v; +---- RESULTS +1500000 +---- TYPES +BIGINT +---- RUNTIME_PROFILE +# Verify that spilling was activated. +row_regex: .*SpilledPartitions: .* \([1-9][0-9]*\) +==== http://git-wip-us.apache.org/repos/asf/impala/blob/e4fcc31c/testdata/workloads/functional-query/queries/QueryTest/disk-spill-encryption.test ---------------------------------------------------------------------- diff --git a/testdata/workloads/functional-query/queries/QueryTest/disk-spill-encryption.test b/testdata/workloads/functional-query/queries/QueryTest/disk-spill-encryption.test deleted file mode 100644 index 48649e5..0000000 --- a/testdata/workloads/functional-query/queries/QueryTest/disk-spill-encryption.test +++ /dev/null @@ -1,15 +0,0 @@ -==== ----- QUERY -set buffer_pool_limit=90m; -set default_spillable_buffer_size=64k; -select count(*) -from (select distinct o_orderdate, o_custkey, o_comment - from tpch_parquet.orders) v; ----- RESULTS -1500000 ----- TYPES -BIGINT ----- RUNTIME_PROFILE -# Verify that spilling was activated. -row_regex: .*SpilledPartitions: .* \([1-9][0-9]*\) -==== http://git-wip-us.apache.org/repos/asf/impala/blob/e4fcc31c/tests/custom_cluster/test_disk_spill_configurations.py ---------------------------------------------------------------------- diff --git a/tests/custom_cluster/test_disk_spill_configurations.py b/tests/custom_cluster/test_disk_spill_configurations.py new file mode 100644 index 0000000..efddd23 --- /dev/null +++ b/tests/custom_cluster/test_disk_spill_configurations.py @@ -0,0 +1,34 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import pytest + +from tests.common.custom_cluster_test_suite import CustomClusterTestSuite + +class TestDiskSpillConfigurations(CustomClusterTestSuite): + """Tests to exercise non-default disk spill configurations end-to-end.""" + + @classmethod + def get_workload(self): + return 'functional-query' + + @pytest.mark.execute_serially + @CustomClusterTestSuite.with_args("--disk_spill_encryption=false") + def test_disk_spill_encryption_disabled(self, vector): + """Disk spill encryption is enabled by default. We only need a custom cluster to test + the non-default configuration.""" + self.run_test_case('QueryTest/basic-spilling', vector) http://git-wip-us.apache.org/repos/asf/impala/blob/e4fcc31c/tests/custom_cluster/test_disk_spill_encryption.py ---------------------------------------------------------------------- diff --git a/tests/custom_cluster/test_disk_spill_encryption.py b/tests/custom_cluster/test_disk_spill_encryption.py deleted file mode 100644 index c9a5aeb..0000000 --- a/tests/custom_cluster/test_disk_spill_encryption.py +++ /dev/null @@ -1,32 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -import pytest - -from tests.common.custom_cluster_test_suite import CustomClusterTestSuite - -class TestDiskSpillEncryption(CustomClusterTestSuite): - """ Tests to exercise disk spill encryption end-to-end. """ - - @classmethod - def get_workload(self): - return 'functional-query' - - @pytest.mark.execute_serially - @CustomClusterTestSuite.with_args("--disk_spill_encryption=true") - def test_spilling_query(self, vector): - self.run_test_case('QueryTest/disk-spill-encryption', vector)
