This is an automated email from the ASF dual-hosted git repository.
github-bot pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion.git
The following commit(s) were added to refs/heads/main by this push:
new 20434b0172 Reduce parquet struct projection benchmark data volume
(#21187)
20434b0172 is described below
commit 20434b01721905769c9a8f85e8b497a245ccf07c
Author: Matthew Kim <[email protected]>
AuthorDate: Fri Mar 27 00:34:17 2026 -0400
Reduce parquet struct projection benchmark data volume (#21187)
This PR reduces the data volume in the parquet struct projection
benchmark so it runs faster.
It amends the recently introduced benchmarks in
https://github.com/apache/datafusion/pull/21180.
---------
Co-authored-by: Adrian Garcia Badaracco
<[email protected]>
Co-authored-by: Claude Opus 4.6 (1M context) <[email protected]>
---
datafusion/core/benches/parquet_struct_projection.rs | 20 +++++++++++++++-----
1 file changed, 15 insertions(+), 5 deletions(-)
diff --git a/datafusion/core/benches/parquet_struct_projection.rs
b/datafusion/core/benches/parquet_struct_projection.rs
index d6cf86a91c..65b3905da8 100644
--- a/datafusion/core/benches/parquet_struct_projection.rs
+++ b/datafusion/core/benches/parquet_struct_projection.rs
@@ -38,14 +38,15 @@ use parquet::file::properties::{WriterProperties,
WriterVersion};
use std::hint::black_box;
use std::path::Path;
use std::sync::Arc;
+use std::time::Duration;
use tempfile::NamedTempFile;
use tokio::runtime::Runtime;
-const NUM_BATCHES: usize = 64;
-const WRITE_RECORD_BATCH_SIZE: usize = 4096;
-const ROW_GROUP_ROW_COUNT: usize = 65536;
-const EXPECTED_ROW_GROUPS: usize = 4;
-const LARGE_STRING_LEN: usize = 128 * 1024;
+const NUM_BATCHES: usize = 2;
+const WRITE_RECORD_BATCH_SIZE: usize = 256;
+const ROW_GROUP_ROW_COUNT: usize = 256;
+const EXPECTED_ROW_GROUPS: usize = 2;
+const LARGE_STRING_LEN: usize = 16 * 1024;
fn narrow_schema() -> SchemaRef {
let struct_fields = Fields::from(vec![
@@ -209,6 +210,9 @@ fn narrow_benchmarks(c: &mut Criterion) {
let ctx = create_context(&rt, &file_path, "t");
let mut group = c.benchmark_group("narrow_struct");
+ group.sample_size(10);
+ group.warm_up_time(Duration::from_secs(1));
+ group.measurement_time(Duration::from_secs(2));
// baseline: full struct, must decode both leaves
group.bench_function("select_struct", |b| {
@@ -253,6 +257,9 @@ fn wide_benchmarks(c: &mut Criterion) {
let ctx = create_context(&rt, &file_path, "t");
let mut group = c.benchmark_group("wide_struct");
+ group.sample_size(10);
+ group.warm_up_time(Duration::from_secs(1));
+ group.measurement_time(Duration::from_secs(2));
// baseline: full struct, must decode all 5 leaves
group.bench_function("select_struct", |b| {
@@ -359,6 +366,9 @@ fn nested_benchmarks(c: &mut Criterion) {
let ctx = create_context(&rt, &file_path, "t");
let mut group = c.benchmark_group("nested_struct");
+ group.sample_size(10);
+ group.warm_up_time(Duration::from_secs(1));
+ group.measurement_time(Duration::from_secs(2));
// baseline: full outer struct, decode all 3 leaves
group.bench_function("select_struct", |b| {
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]