IMPALA-5870: Improve runtime profile for partial sort
A recent change (IMPALA-5498) added the ability to do partial sorts,
which divide their input up into runs each of which is sorted
individually, avoiding the need to spill. Some of the debug output
wasn't updated vs. regular sorts, leading to confusion.
This patch removes the counters 'SpilledRuns' and 'MergesPerformed'
since they will always be 0, and it renames the 'IntialRunsCreated'
counter to 'RunsCreated' since the 'Initial' refers to the fact that
in a regular sort those runs may be spilled or merged.
It also adds a profile info string 'SortType' that can take the values
'Total', 'TopN', or 'Partial' to reflect the type of exec node being
used.
Example profile snippet for a partial sort:
SORT_NODE (id=2):(Total: 403.261us, non-child: 382.029us, % non-child: 94.73%)
SortType: Partial
ExecOption: Codegen Enabled
- NumRowsPerRun: (Avg: 44 (44) ; Min: 44 (44) ; Max: 44 (44) ; Number of
samples: 1)
- InMemorySortTime: 34.201us
- PeakMemoryUsage: 2.02 MB (2117632)
- RowsReturned: 44 (44)
- RowsReturnedRate: 109.11 K/sec
- RunsCreated: 1 (1)
- SortDataSize: 572.00 B (572)
Testing:
- Manually ran several sorting queries and inspected their profiles
- Updated a kudu_insert test that relied on the 'SpilledRuns' counter
to be 0 for a partial sort.
Change-Id: I2b15af78d8299db8edc44ff820c85db1cbe0be1b
Reviewed-on: http://gerrit.cloudera.org:8080/8123
Reviewed-by: Tim Armstrong <[email protected]>
Tested-by: Impala Public Jenkins
Project: http://git-wip-us.apache.org/repos/asf/incubator-impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-impala/commit/4d49099a
Tree: http://git-wip-us.apache.org/repos/asf/incubator-impala/tree/4d49099a
Diff: http://git-wip-us.apache.org/repos/asf/incubator-impala/diff/4d49099a
Branch: refs/heads/master
Commit: 4d49099a8bbea3f24f53272f321a19266dc932b8
Parents: 439f245
Author: Thomas Tauber-Marshall <[email protected]>
Authored: Thu Sep 21 12:04:25 2017 -0700
Committer: Impala Public Jenkins <[email protected]>
Committed: Wed Sep 27 18:55:26 2017 +0000
----------------------------------------------------------------------
be/src/exec/partial-sort-node.cc | 1 +
be/src/exec/sort-node.cc | 1 +
be/src/exec/topn-node.cc | 1 +
be/src/runtime/sorter.cc | 10 +++++++---
.../functional-query/queries/QueryTest/kudu_insert.test | 2 +-
5 files changed, 11 insertions(+), 4 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/4d49099a/be/src/exec/partial-sort-node.cc
----------------------------------------------------------------------
diff --git a/be/src/exec/partial-sort-node.cc b/be/src/exec/partial-sort-node.cc
index 88b2f26..107c29f 100644
--- a/be/src/exec/partial-sort-node.cc
+++ b/be/src/exec/partial-sort-node.cc
@@ -50,6 +50,7 @@ Status PartialSortNode::Init(const TPlanNode& tnode,
RuntimeState* state) {
*child(0)->row_desc(), state, &sort_tuple_exprs_));
is_asc_order_ = tnode.sort_node.sort_info.is_asc_order;
nulls_first_ = tnode.sort_node.sort_info.nulls_first;
+ runtime_profile()->AddInfoString("SortType", "Partial");
return Status::OK();
}
http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/4d49099a/be/src/exec/sort-node.cc
----------------------------------------------------------------------
diff --git a/be/src/exec/sort-node.cc b/be/src/exec/sort-node.cc
index 80df214..33b3acb 100644
--- a/be/src/exec/sort-node.cc
+++ b/be/src/exec/sort-node.cc
@@ -45,6 +45,7 @@ Status SortNode::Init(const TPlanNode& tnode, RuntimeState*
state) {
*child(0)->row_desc(), state, &sort_tuple_exprs_));
is_asc_order_ = tnode.sort_node.sort_info.is_asc_order;
nulls_first_ = tnode.sort_node.sort_info.nulls_first;
+ runtime_profile()->AddInfoString("SortType", "Total");
return Status::OK();
}
http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/4d49099a/be/src/exec/topn-node.cc
----------------------------------------------------------------------
diff --git a/be/src/exec/topn-node.cc b/be/src/exec/topn-node.cc
index 9b94fe7..5bba89d 100644
--- a/be/src/exec/topn-node.cc
+++ b/be/src/exec/topn-node.cc
@@ -65,6 +65,7 @@ Status TopNNode::Init(const TPlanNode& tnode, RuntimeState*
state) {
nulls_first_ = tnode.sort_node.sort_info.nulls_first;
DCHECK_EQ(conjuncts_.size(), 0)
<< "TopNNode should never have predicates to evaluate.";
+ runtime_profile()->AddInfoString("SortType", "TopN");
return Status::OK();
}
http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/4d49099a/be/src/runtime/sorter.cc
----------------------------------------------------------------------
diff --git a/be/src/runtime/sorter.cc b/be/src/runtime/sorter.cc
index d2e6bb2..16984ca 100644
--- a/be/src/runtime/sorter.cc
+++ b/be/src/runtime/sorter.cc
@@ -1516,9 +1516,13 @@ Status Sorter::Prepare(ObjectPool* obj_pool, MemPool*
expr_mem_pool) {
in_mem_tuple_sorter_.reset(new TupleSorter(compare_less_than_, page_len_,
sort_tuple_desc->byte_size(), state_));
- initial_runs_counter_ = ADD_COUNTER(profile_, "InitialRunsCreated",
TUnit::UNIT);
- spilled_runs_counter_ = ADD_COUNTER(profile_, "SpilledRuns", TUnit::UNIT);
- num_merges_counter_ = ADD_COUNTER(profile_, "TotalMergesPerformed",
TUnit::UNIT);
+ if (enable_spilling_) {
+ initial_runs_counter_ = ADD_COUNTER(profile_, "InitialRunsCreated",
TUnit::UNIT);
+ spilled_runs_counter_ = ADD_COUNTER(profile_, "SpilledRuns", TUnit::UNIT);
+ num_merges_counter_ = ADD_COUNTER(profile_, "TotalMergesPerformed",
TUnit::UNIT);
+ } else {
+ initial_runs_counter_ = ADD_COUNTER(profile_, "RunsCreated", TUnit::UNIT);
+ }
in_mem_sort_timer_ = ADD_TIMER(profile_, "InMemorySortTime");
sorted_data_size_ = ADD_COUNTER(profile_, "SortDataSize", TUnit::BYTES);
run_sizes_ = ADD_SUMMARY_STATS_COUNTER(profile_, "NumRowsPerRun",
TUnit::UNIT);
http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/4d49099a/testdata/workloads/functional-query/queries/QueryTest/kudu_insert.test
----------------------------------------------------------------------
diff --git
a/testdata/workloads/functional-query/queries/QueryTest/kudu_insert.test
b/testdata/workloads/functional-query/queries/QueryTest/kudu_insert.test
index 1150898..420e42c 100644
--- a/testdata/workloads/functional-query/queries/QueryTest/kudu_insert.test
+++ b/testdata/workloads/functional-query/queries/QueryTest/kudu_insert.test
@@ -449,5 +449,5 @@ set mem_limit=400m;
create table kudu_test primary key(a, b) partition by hash(a, b) partitions 8
stored as kudu as
select l_orderkey a, concat(l_comment, l_comment, l_comment) b from
tpch.lineitem
---- RUNTIME_PROFILE
-row_regex: .*SpilledRuns: 0 \(0\)
+row_regex: .*SortType: Partial
====