This is an automated email from the ASF dual-hosted git repository.

dheres pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git


The following commit(s) were added to refs/heads/main by this push:
     new a9f316bd85 Use in-memory buffer for arrow_writer benchmark (#7823)
a9f316bd85 is described below

commit a9f316bd85362d2757ece8af1483ab6df90941c0
Author: Jörn Horstmann <[email protected]>
AuthorDate: Sun Jun 29 19:07:15 2025 +0200

    Use in-memory buffer for arrow_writer benchmark (#7823)
    
    # Which issue does this PR close?
    
    Prerequisite for investigating parquet writing performance (#7822).
    
    # Rationale for this change
    
    The benchmark should measure the cpu overhead of parquet writing, not
    the os or filesystem parts of it. Running the benchmark showed that the
    file has nearly a 50% overhead, which makes profiling more difficult by
    hiding the bottlenecks inside the parquet code itself.
    
    # What changes are included in this PR?
    
    Use a Vec instead of an unbuffered File as the sink.
    
    # Are these changes tested?
    
    Tested by running the benchmark.
    
    # Are there any user-facing changes?
    
    No
---
 parquet/benches/arrow_writer.rs | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/parquet/benches/arrow_writer.rs b/parquet/benches/arrow_writer.rs
index 4166d962b5..a04e0bf183 100644
--- a/parquet/benches/arrow_writer.rs
+++ b/parquet/benches/arrow_writer.rs
@@ -19,8 +19,6 @@
 extern crate criterion;
 
 use criterion::{Criterion, Throughput};
-use std::env;
-use std::fs::File;
 
 extern crate arrow;
 extern crate parquet;
@@ -349,9 +347,8 @@ fn write_batch_enable_bloom_filter(batch: &RecordBatch) -> 
Result<()> {
 
 #[inline]
 fn write_batch_with_option(batch: &RecordBatch, props: 
Option<WriterProperties>) -> Result<()> {
-    let path = env::temp_dir().join("arrow_writer.temp");
-    let file = File::create(path).unwrap();
-    let mut writer = ArrowWriter::try_new(file, batch.schema(), props)?;
+    let mut file = vec![];
+    let mut writer = ArrowWriter::try_new(&mut file, batch.schema(), props)?;
 
     writer.write(batch)?;
     writer.close()?;

Reply via email to