This is an automated email from the ASF dual-hosted git repository.
dheres pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git
The following commit(s) were added to refs/heads/main by this push:
new afdabb260a [Benchmarks] Make `partitions` default to number of cores
instead of 2 (#8292)
afdabb260a is described below
commit afdabb260a32e1d3e2119b48b93e47d851cf765f
Author: Andy Grove <[email protected]>
AuthorDate: Wed Nov 22 00:24:42 2023 -0700
[Benchmarks] Make `partitions` default to number of cores instead of 2
(#8292)
* Default partitions to num cores
* update test
---
benchmarks/src/sort.rs | 5 +++--
benchmarks/src/tpch/run.rs | 6 +++---
benchmarks/src/util/options.rs | 8 ++++----
3 files changed, 10 insertions(+), 9 deletions(-)
diff --git a/benchmarks/src/sort.rs b/benchmarks/src/sort.rs
index 5643c85619..224f2b19c7 100644
--- a/benchmarks/src/sort.rs
+++ b/benchmarks/src/sort.rs
@@ -148,8 +148,9 @@ impl RunOpt {
println!("Executing '{title}' (sorting by: {expr:?})");
rundata.start_new_case(title);
for i in 0..self.common.iterations {
- let config =
-
SessionConfig::new().with_target_partitions(self.common.partitions);
+ let config = SessionConfig::new().with_target_partitions(
+ self.common.partitions.unwrap_or(num_cpus::get()),
+ );
let ctx = SessionContext::new_with_config(config);
let (rows, elapsed) =
exec_sort(&ctx, &expr, &test_file,
self.common.debug).await?;
diff --git a/benchmarks/src/tpch/run.rs b/benchmarks/src/tpch/run.rs
index 171b074d2a..5193d578fb 100644
--- a/benchmarks/src/tpch/run.rs
+++ b/benchmarks/src/tpch/run.rs
@@ -285,7 +285,7 @@ impl RunOpt {
}
fn partitions(&self) -> usize {
- self.common.partitions
+ self.common.partitions.unwrap_or(num_cpus::get())
}
}
@@ -325,7 +325,7 @@ mod tests {
let path = get_tpch_data_path()?;
let common = CommonOpt {
iterations: 1,
- partitions: 2,
+ partitions: Some(2),
batch_size: 8192,
debug: false,
};
@@ -357,7 +357,7 @@ mod tests {
let path = get_tpch_data_path()?;
let common = CommonOpt {
iterations: 1,
- partitions: 2,
+ partitions: Some(2),
batch_size: 8192,
debug: false,
};
diff --git a/benchmarks/src/util/options.rs b/benchmarks/src/util/options.rs
index 1d86d10fb8..b9398e5b52 100644
--- a/benchmarks/src/util/options.rs
+++ b/benchmarks/src/util/options.rs
@@ -26,9 +26,9 @@ pub struct CommonOpt {
#[structopt(short = "i", long = "iterations", default_value = "3")]
pub iterations: usize,
- /// Number of partitions to process in parallel
- #[structopt(short = "n", long = "partitions", default_value = "2")]
- pub partitions: usize,
+ /// Number of partitions to process in parallel. Defaults to number of
available cores.
+ #[structopt(short = "n", long = "partitions")]
+ pub partitions: Option<usize>,
/// Batch size when reading CSV or Parquet files
#[structopt(short = "s", long = "batch-size", default_value = "8192")]
@@ -48,7 +48,7 @@ impl CommonOpt {
/// Modify the existing config appropriately
pub fn update_config(&self, config: SessionConfig) -> SessionConfig {
config
- .with_target_partitions(self.partitions)
+ .with_target_partitions(self.partitions.unwrap_or(num_cpus::get()))
.with_batch_size(self.batch_size)
}
}