This is an automated email from the ASF dual-hosted git repository.
alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion.git
The following commit(s) were added to refs/heads/main by this push:
new 41e73782dd minor: SortExec measure elapsed_compute time when sorting
(#12099)
41e73782dd is described below
commit 41e73782ddd1bf72a2c18d19aca879df8a52c9b5
Author: Martin Hilton <[email protected]>
AuthorDate: Thu Aug 22 16:42:28 2024 +0100
minor: SortExec measure elapsed_compute time when sorting (#12099)
* minor: SortExec measure elapsed_compute time when sorting
Whilst investigating query execution performance I noticed that
some SortExec nodes were reporting suspiciously short elapsed_compute
times. It appears that the SortExec node wasn't running the
elapsed_compute timer when it doing the actual sorting operation.
* fix: apply review suggestions
---
datafusion/physical-plan/src/sorts/sort.rs | 8 ++++++++
1 file changed, 8 insertions(+)
diff --git a/datafusion/physical-plan/src/sorts/sort.rs
b/datafusion/physical-plan/src/sorts/sort.rs
index e7e1c5481f..a81b09948c 100644
--- a/datafusion/physical-plan/src/sorts/sort.rs
+++ b/datafusion/physical-plan/src/sorts/sort.rs
@@ -499,6 +499,12 @@ impl ExternalSorter {
metrics: BaselineMetrics,
) -> Result<SendableRecordBatchStream> {
assert_ne!(self.in_mem_batches.len(), 0);
+
+ // The elapsed compute timer is updated when the value is dropped.
+ // There is no need for an explicit call to drop.
+ let elapsed_compute = metrics.elapsed_compute().clone();
+ let _timer = elapsed_compute.timer();
+
if self.in_mem_batches.len() == 1 {
let batch = self.in_mem_batches.remove(0);
let reservation = self.reservation.take();
@@ -552,7 +558,9 @@ impl ExternalSorter {
let fetch = self.fetch;
let expressions = Arc::clone(&self.expr);
let stream = futures::stream::once(futures::future::lazy(move |_| {
+ let timer = metrics.elapsed_compute().timer();
let sorted = sort_batch(&batch, &expressions, fetch)?;
+ timer.done();
metrics.record_output(sorted.num_rows());
drop(batch);
drop(reservation);
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]