This is an automated email from the ASF dual-hosted git repository.

agrove pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion-comet.git


The following commit(s) were added to refs/heads/main by this push:
     new d93747967 perf: remove BufReader wrapper when copying spill files to 
shuffle output (#3861)
d93747967 is described below

commit d9374796764f6c76482bbd51f24bc141288da812
Author: Andy Grove <[email protected]>
AuthorDate: Wed Apr 1 04:23:34 2026 -0600

    perf: remove BufReader wrapper when copying spill files to shuffle output 
(#3861)
    
    Remove the BufReader wrapper around spill file reads in
    MultiPartitionShuffleRepartitioner::shuffle_write(). std::io::copy
    already buffers internally, and wrapping in BufReader defeats the
    copy_file_range/sendfile zero-copy specialization on Linux.
---
 native/shuffle/src/partitioners/multi_partition.rs | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/native/shuffle/src/partitioners/multi_partition.rs 
b/native/shuffle/src/partitioners/multi_partition.rs
index 655bee351..7de9314f5 100644
--- a/native/shuffle/src/partitioners/multi_partition.rs
+++ b/native/shuffle/src/partitioners/multi_partition.rs
@@ -35,7 +35,7 @@ use itertools::Itertools;
 use std::fmt;
 use std::fmt::{Debug, Formatter};
 use std::fs::{File, OpenOptions};
-use std::io::{BufReader, BufWriter, Seek, Write};
+use std::io::{BufWriter, Seek, Write};
 use std::sync::Arc;
 use tokio::time::Instant;
 
@@ -582,7 +582,9 @@ impl ShufflePartitioner for 
MultiPartitionShuffleRepartitioner {
                 // if we wrote a spill file for this partition then copy the
                 // contents into the shuffle file
                 if let Some(spill_path) = self.partition_writers[i].path() {
-                    let mut spill_file = 
BufReader::new(File::open(spill_path)?);
+                    // Use raw File handle (not BufReader) so that 
std::io::copy
+                    // can use copy_file_range/sendfile for zero-copy on Linux.
+                    let mut spill_file = File::open(spill_path)?;
                     let mut write_timer = self.metrics.write_time.timer();
                     std::io::copy(&mut spill_file, &mut output_data)?;
                     write_timer.stop();


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to