IMPALA-6713: Fix request for unneeded memory in partial sort When a Sorter::Run is initialized, if it is an initial run and has varlen data, it requests an extra buffer to have space to sort the varlen data should it need to spill to disk.
This extra buffer is not needed in the case of partial sorts, which do not spill, and because this extra buffer was not included in the calculation of the minimum required reservation, requesting it caused the partial sort to fail in cases where the partial sort only had its minimum reservation available to use. The solution is to not request the extra memory for partial sorts. Testing: - Added a test to test_sort.py that ensures the partial sort can complete successfully even if additional memory requests beyond its minimum reservation are denied. Change-Id: I2d9c0863009021340d8b684669b371a2cfb1ecad Reviewed-on: http://gerrit.cloudera.org:8080/10031 Reviewed-by: Thomas Tauber-Marshall <[email protected]> Tested-by: Impala Public Jenkins <[email protected]> Project: http://git-wip-us.apache.org/repos/asf/impala/repo Commit: http://git-wip-us.apache.org/repos/asf/impala/commit/1f37ca2b Tree: http://git-wip-us.apache.org/repos/asf/impala/tree/1f37ca2b Diff: http://git-wip-us.apache.org/repos/asf/impala/diff/1f37ca2b Branch: refs/heads/2.x Commit: 1f37ca2b4e6a164b59a518e2005b76cbd7b5c209 Parents: b2316be Author: Thomas Tauber-Marshall <[email protected]> Authored: Thu Apr 12 03:36:06 2018 +0000 Committer: Impala Public Jenkins <[email protected]> Committed: Wed Apr 18 21:17:47 2018 +0000 ---------------------------------------------------------------------- be/src/runtime/sorter.cc | 5 +++-- tests/query_test/test_sort.py | 16 ++++++++++++++++ 2 files changed, 19 insertions(+), 2 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/impala/blob/1f37ca2b/be/src/runtime/sorter.cc ---------------------------------------------------------------------- diff --git a/be/src/runtime/sorter.cc b/be/src/runtime/sorter.cc index 5ef321b..ead4065 100644 --- a/be/src/runtime/sorter.cc +++ b/be/src/runtime/sorter.cc @@ -630,7 +630,8 @@ Sorter::Run::Run(Sorter* parent, TupleDescriptor* sort_tuple_desc, bool initial_ num_tuples_(0) {} Status Sorter::Run::Init() { - int num_to_create = 1 + has_var_len_slots_ + (has_var_len_slots_ && initial_run_); + int num_to_create = 1 + has_var_len_slots_ + + (has_var_len_slots_ && initial_run_ && sorter_->enable_spilling_); int64_t required_mem = num_to_create * sorter_->page_len_; if (!sorter_->buffer_pool_client_->IncreaseReservationToFit(required_mem)) { return Status(Substitute( @@ -641,7 +642,7 @@ Status Sorter::Run::Init() { RETURN_IF_ERROR(AddPage(&fixed_len_pages_)); if (has_var_len_slots_) { RETURN_IF_ERROR(AddPage(&var_len_pages_)); - if (initial_run_) { + if (initial_run_ && sorter_->enable_spilling_) { // Need additional var len page to reorder var len data in UnpinAllPages(). RETURN_IF_ERROR(var_len_copy_page_.Init(sorter_)); } http://git-wip-us.apache.org/repos/asf/impala/blob/1f37ca2b/tests/query_test/test_sort.py ---------------------------------------------------------------------- diff --git a/tests/query_test/test_sort.py b/tests/query_test/test_sort.py index 23de9f2..e44d086 100644 --- a/tests/query_test/test_sort.py +++ b/tests/query_test/test_sort.py @@ -203,3 +203,19 @@ class TestRandomSort(ImpalaTestSuite): functional.alltypestiny""" results = transpose_results(self.execute_query(query).data, lambda x: float(x)) assert (results == sorted(results)) + + +class TestPartialSort(ImpalaTestSuite): + """Test class to do functional validation of partial sorts.""" + + def test_partial_sort_min_reservation(self, unique_database): + """Test that the partial sort node can operate if it only gets its minimum + memory reservation.""" + table_name = "%s.kudu_test" % unique_database + self.client.set_configuration_option( + "debug_action", "-1:OPEN:[email protected]") + self.execute_query("""create table %s (col0 string primary key) + partition by hash(col0) partitions 8 stored as kudu""" % table_name) + result = self.execute_query( + "insert into %s select string_col from functional.alltypessmall" % table_name) + assert "PARTIAL SORT" in result.runtime_profile, result.runtime_profile
