This is an automated email from the ASF dual-hosted git repository.
agrove pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion-comet.git
The following commit(s) were added to refs/heads/main by this push:
new d93747967 perf: remove BufReader wrapper when copying spill files to
shuffle output (#3861)
d93747967 is described below
commit d9374796764f6c76482bbd51f24bc141288da812
Author: Andy Grove <[email protected]>
AuthorDate: Wed Apr 1 04:23:34 2026 -0600
perf: remove BufReader wrapper when copying spill files to shuffle output
(#3861)
Remove the BufReader wrapper around spill file reads in
MultiPartitionShuffleRepartitioner::shuffle_write(). std::io::copy
already buffers internally, and wrapping in BufReader defeats the
copy_file_range/sendfile zero-copy specialization on Linux.
---
native/shuffle/src/partitioners/multi_partition.rs | 6 ++++--
1 file changed, 4 insertions(+), 2 deletions(-)
diff --git a/native/shuffle/src/partitioners/multi_partition.rs
b/native/shuffle/src/partitioners/multi_partition.rs
index 655bee351..7de9314f5 100644
--- a/native/shuffle/src/partitioners/multi_partition.rs
+++ b/native/shuffle/src/partitioners/multi_partition.rs
@@ -35,7 +35,7 @@ use itertools::Itertools;
use std::fmt;
use std::fmt::{Debug, Formatter};
use std::fs::{File, OpenOptions};
-use std::io::{BufReader, BufWriter, Seek, Write};
+use std::io::{BufWriter, Seek, Write};
use std::sync::Arc;
use tokio::time::Instant;
@@ -582,7 +582,9 @@ impl ShufflePartitioner for
MultiPartitionShuffleRepartitioner {
// if we wrote a spill file for this partition then copy the
// contents into the shuffle file
if let Some(spill_path) = self.partition_writers[i].path() {
- let mut spill_file =
BufReader::new(File::open(spill_path)?);
+ // Use raw File handle (not BufReader) so that
std::io::copy
+ // can use copy_file_range/sendfile for zero-copy on Linux.
+ let mut spill_file = File::open(spill_path)?;
let mut write_timer = self.metrics.write_time.timer();
std::io::copy(&mut spill_file, &mut output_data)?;
write_timer.stop();
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]