This is an automated email from the ASF dual-hosted git repository.

dheres pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git


The following commit(s) were added to refs/heads/main by this push:
     new afdabb260a [Benchmarks] Make `partitions` default to number of cores 
instead of 2 (#8292)
afdabb260a is described below

commit afdabb260a32e1d3e2119b48b93e47d851cf765f
Author: Andy Grove <[email protected]>
AuthorDate: Wed Nov 22 00:24:42 2023 -0700

    [Benchmarks] Make `partitions` default to number of cores instead of 2 
(#8292)
    
    * Default partitions to num cores
    
    * update test
---
 benchmarks/src/sort.rs         | 5 +++--
 benchmarks/src/tpch/run.rs     | 6 +++---
 benchmarks/src/util/options.rs | 8 ++++----
 3 files changed, 10 insertions(+), 9 deletions(-)

diff --git a/benchmarks/src/sort.rs b/benchmarks/src/sort.rs
index 5643c85619..224f2b19c7 100644
--- a/benchmarks/src/sort.rs
+++ b/benchmarks/src/sort.rs
@@ -148,8 +148,9 @@ impl RunOpt {
             println!("Executing '{title}' (sorting by: {expr:?})");
             rundata.start_new_case(title);
             for i in 0..self.common.iterations {
-                let config =
-                    
SessionConfig::new().with_target_partitions(self.common.partitions);
+                let config = SessionConfig::new().with_target_partitions(
+                    self.common.partitions.unwrap_or(num_cpus::get()),
+                );
                 let ctx = SessionContext::new_with_config(config);
                 let (rows, elapsed) =
                     exec_sort(&ctx, &expr, &test_file, 
self.common.debug).await?;
diff --git a/benchmarks/src/tpch/run.rs b/benchmarks/src/tpch/run.rs
index 171b074d2a..5193d578fb 100644
--- a/benchmarks/src/tpch/run.rs
+++ b/benchmarks/src/tpch/run.rs
@@ -285,7 +285,7 @@ impl RunOpt {
     }
 
     fn partitions(&self) -> usize {
-        self.common.partitions
+        self.common.partitions.unwrap_or(num_cpus::get())
     }
 }
 
@@ -325,7 +325,7 @@ mod tests {
         let path = get_tpch_data_path()?;
         let common = CommonOpt {
             iterations: 1,
-            partitions: 2,
+            partitions: Some(2),
             batch_size: 8192,
             debug: false,
         };
@@ -357,7 +357,7 @@ mod tests {
         let path = get_tpch_data_path()?;
         let common = CommonOpt {
             iterations: 1,
-            partitions: 2,
+            partitions: Some(2),
             batch_size: 8192,
             debug: false,
         };
diff --git a/benchmarks/src/util/options.rs b/benchmarks/src/util/options.rs
index 1d86d10fb8..b9398e5b52 100644
--- a/benchmarks/src/util/options.rs
+++ b/benchmarks/src/util/options.rs
@@ -26,9 +26,9 @@ pub struct CommonOpt {
     #[structopt(short = "i", long = "iterations", default_value = "3")]
     pub iterations: usize,
 
-    /// Number of partitions to process in parallel
-    #[structopt(short = "n", long = "partitions", default_value = "2")]
-    pub partitions: usize,
+    /// Number of partitions to process in parallel. Defaults to number of 
available cores.
+    #[structopt(short = "n", long = "partitions")]
+    pub partitions: Option<usize>,
 
     /// Batch size when reading CSV or Parquet files
     #[structopt(short = "s", long = "batch-size", default_value = "8192")]
@@ -48,7 +48,7 @@ impl CommonOpt {
     /// Modify the existing config appropriately
     pub fn update_config(&self, config: SessionConfig) -> SessionConfig {
         config
-            .with_target_partitions(self.partitions)
+            .with_target_partitions(self.partitions.unwrap_or(num_cpus::get()))
             .with_batch_size(self.batch_size)
     }
 }

Reply via email to