Re: [PR] Add basic sql benchmark runner for running sql benchmarks [datafusion]

via GitHub Tue, 23 Jun 2026 11:47:31 -0700


alamb commented on code in PR #23052:
URL: https://github.com/apache/datafusion/pull/23052#discussion_r3462036810



##########
benchmarks/src/sql_benchmark_runner.rs:
##########
@@ -0,0 +1,1418 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! Shared SQL benchmark runner used by `benchmark_runner` and the Criterion

Review Comment:
   ❤️ 



##########
benchmarks/src/bin/benchmark_runner.rs:
##########
@@ -0,0 +1,39 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! DataFusion SQL benchmark runner.
+
+use datafusion_benchmarks::sql_benchmark_runner;
+
+#[cfg(feature = "snmalloc")]
+#[global_allocator]
+static ALLOC: snmalloc_rs::SnMalloc = snmalloc_rs::SnMalloc;
+
+// `cargo clippy --all-features` enables both allocator features, so prefer
+// `snmalloc` in that case and fall back to `mimalloc` otherwise.
+#[cfg(all(not(feature = "snmalloc"), feature = "mimalloc"))]
+#[global_allocator]
+static ALLOC: mimalloc::MiMalloc = mimalloc::MiMalloc;
+
+#[tokio::main]
+async fn main() {
+    env_logger::init();
+    if let Err(error) = sql_benchmark_runner::run_cli().await {

Review Comment:
   Minor nit -- it might make sense to move `run_cli` and relevant code such as 
argument processing, main, etc in the actual binary rather than a function in 
the a crate. 
   
   I don't think the argument processing will be used by anything other than 
this binary



##########
benchmarks/src/sql_benchmark_runner.rs:
##########
@@ -0,0 +1,1418 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! Shared SQL benchmark runner used by `benchmark_runner` and the Criterion
+//! SQL benchmark harness.
+
+use crate::sql_benchmark::SqlBenchmark;
+use crate::util::{BenchmarkRun, CommonOpt, print_memory_stats};
+use clap::{ArgAction, ArgMatches, CommandFactory, FromArgMatches, Parser};
+use criterion::{Criterion, SamplingMode};
+use datafusion::error::Result;
+use datafusion::prelude::SessionContext;
+use datafusion_common::{DataFusionError, exec_datafusion_err, 
instant::Instant};
+use datafusion_common_runtime::SpawnedTask;
+use std::any::Any;
+use std::collections::{BTreeMap, HashMap};
+use std::fs;
+use std::io::IsTerminal;
+use std::panic::{AssertUnwindSafe, catch_unwind};
+use std::path::{Path, PathBuf};
+use tokio::runtime::Runtime;
+
+#[derive(Debug, Clone, Default, PartialEq, Eq)]
+pub struct BenchmarkFilter {
+    pub name: Option<String>,
+    pub subgroup: Option<String>,
+    pub query: Option<String>,
+}
+
+#[derive(Debug, Clone)]
+pub struct SqlRunConfig {
+    pub common: CommonOpt,
+    pub filter: BenchmarkFilter,
+    pub persist_results: bool,
+    pub validate_results: bool,
+    pub output: Option<PathBuf>,
+}
+
+#[derive(Debug)]
+pub enum CliAction {
+    List,
+    Simple(SqlRunConfig),
+    Criterion {
+        config: SqlRunConfig,
+        save_baseline: Option<String>,
+    },
+}
+
+#[derive(Debug, Parser)]
+#[command(
+    name = "benchmark_runner",
+    about = "Run DataFusion SQL benchmarks",
+    styles = criterion_like_styles(),
+)]
+pub struct Cli {
+    #[arg(value_name = "BENCHMARK", help = "SQL benchmark group to run")]
+    pub benchmark: Option<String>,
+
+    #[arg(short = 'q', long = "query", env = "BENCH_QUERY")]
+    pub query: Option<String>,
+
+    #[arg(long = "subgroup", env = "BENCH_SUBGROUP")]
+    pub subgroup: Option<String>,
+
+    #[command(flatten)]
+    pub common: CommonOpt,
+
+    #[arg(
+        long = "criterion",
+        action = ArgAction::SetTrue,
+        help = "Run benchmarks with Criterion"
+    )]
+    pub criterion: bool,
+
+    #[arg(
+        short = 'o',
+        long = "output",
+        help = "Write simple runner results as JSON to this path"
+    )]
+    pub output: Option<PathBuf>,
+
+    #[arg(
+        long = "save-baseline",
+        value_name = "BASELINE",
+        help = "Save Criterion measurements to the named baseline"
+    )]
+    pub save_baseline: Option<String>,
+}
+
+/// Parses CLI arguments, runs the selected action, and prints any list output.
+pub async fn run_cli() -> Result<()> {
+    let matches = Cli::command().get_matches();
+    let action = cli_action_from_matches(&matches)?;
+    let output = run_cli_action(action, 
&default_sql_benchmark_directory()).await?;
+
+    if !output.is_empty() {
+        println!("{output}");
+    }
+
+    Ok(())
+}
+
+/// Runs the selected SQL benchmarks through a caller-provided Criterion 
instance.
+pub fn run_criterion_benchmarks_impl(

Review Comment:
   I recommend moving this into `benchmarks/benches/sql.rs` so that the 
criterion specific stuff is separated from the core benchmark running logic



##########
benchmarks/src/sql_benchmark_runner.rs:
##########
@@ -0,0 +1,1418 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! Shared SQL benchmark runner used by `benchmark_runner` and the Criterion
+//! SQL benchmark harness.
+
+use crate::sql_benchmark::SqlBenchmark;
+use crate::util::{BenchmarkRun, CommonOpt, print_memory_stats};
+use clap::{ArgAction, ArgMatches, CommandFactory, FromArgMatches, Parser};
+use criterion::{Criterion, SamplingMode};
+use datafusion::error::Result;
+use datafusion::prelude::SessionContext;
+use datafusion_common::{DataFusionError, exec_datafusion_err, 
instant::Instant};
+use datafusion_common_runtime::SpawnedTask;
+use std::any::Any;
+use std::collections::{BTreeMap, HashMap};
+use std::fs;
+use std::io::IsTerminal;
+use std::panic::{AssertUnwindSafe, catch_unwind};
+use std::path::{Path, PathBuf};
+use tokio::runtime::Runtime;
+
+#[derive(Debug, Clone, Default, PartialEq, Eq)]
+pub struct BenchmarkFilter {
+    pub name: Option<String>,
+    pub subgroup: Option<String>,
+    pub query: Option<String>,
+}
+
+#[derive(Debug, Clone)]
+pub struct SqlRunConfig {
+    pub common: CommonOpt,
+    pub filter: BenchmarkFilter,
+    pub persist_results: bool,
+    pub validate_results: bool,
+    pub output: Option<PathBuf>,
+}
+
+#[derive(Debug)]
+pub enum CliAction {
+    List,
+    Simple(SqlRunConfig),
+    Criterion {
+        config: SqlRunConfig,
+        save_baseline: Option<String>,
+    },
+}
+
+#[derive(Debug, Parser)]

Review Comment:
   see comment above -- I recommend moving the criterion / arg parsing out of 
this module (so this is only the shared implementation with the criterion 
runner)



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Re: [PR] Add basic sql benchmark runner for running sql benchmarks [datafusion]

Reply via email to