This is an automated email from the ASF dual-hosted git repository.
alamb pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git
The following commit(s) were added to refs/heads/master by this push:
new 07ae738b4 Add sqlplanner benchmarks (#5256)
07ae738b4 is described below
commit 07ae738b48b6001498a4b10e9750422f9d8e1b7f
Author: Andrew Lamb <[email protected]>
AuthorDate: Wed Feb 15 15:11:10 2023 +0100
Add sqlplanner benchmarks (#5256)
---
datafusion/core/benches/sql_planner.rs | 76 +++++++++++++++++++++++-----------
1 file changed, 52 insertions(+), 24 deletions(-)
diff --git a/datafusion/core/benches/sql_planner.rs
b/datafusion/core/benches/sql_planner.rs
index 1feba8045..559de4ba3 100644
--- a/datafusion/core/benches/sql_planner.rs
+++ b/datafusion/core/benches/sql_planner.rs
@@ -24,64 +24,92 @@ mod data_utils;
use crate::criterion::Criterion;
use arrow::datatypes::{DataType, Field, Schema};
use datafusion::datasource::MemTable;
-use datafusion::error::Result;
use datafusion::execution::context::SessionContext;
-use parking_lot::Mutex;
use std::sync::Arc;
use tokio::runtime::Runtime;
-fn plan(ctx: Arc<Mutex<SessionContext>>, sql: &str) {
+/// Create a logical plan from the specified sql
+fn logical_plan(ctx: &SessionContext, sql: &str) {
let rt = Runtime::new().unwrap();
- criterion::black_box(rt.block_on(ctx.lock().sql(sql)).unwrap());
+ criterion::black_box(rt.block_on(ctx.sql(sql)).unwrap());
}
-/// Create schema representing a large table
-pub fn create_schema(column_prefix: &str) -> Schema {
- let fields = (0..200)
+/// Create a physical ExecutionPlan (by way of logical plan)
+fn physical_plan(ctx: &SessionContext, sql: &str) {
+ let rt = Runtime::new().unwrap();
+ criterion::black_box(rt.block_on(async {
+ ctx.sql(sql)
+ .await
+ .unwrap()
+ .create_physical_plan()
+ .await
+ .unwrap()
+ }));
+}
+
+/// Create schema with the specified number of columns
+pub fn create_schema(column_prefix: &str, num_columns: usize) -> Schema {
+ let fields = (0..num_columns)
.map(|i| Field::new(format!("{column_prefix}{i}"), DataType::Int32,
true))
.collect();
Schema::new(fields)
}
-pub fn create_table_provider(column_prefix: &str) -> Result<Arc<MemTable>> {
- let schema = Arc::new(create_schema(column_prefix));
- MemTable::try_new(schema, vec![]).map(Arc::new)
+pub fn create_table_provider(column_prefix: &str, num_columns: usize) ->
Arc<MemTable> {
+ let schema = Arc::new(create_schema(column_prefix, num_columns));
+ MemTable::try_new(schema, vec![]).map(Arc::new).unwrap()
}
-fn create_context() -> Result<Arc<Mutex<SessionContext>>> {
+fn create_context() -> SessionContext {
let ctx = SessionContext::new();
- ctx.register_table("t1", create_table_provider("a")?)?;
- ctx.register_table("t2", create_table_provider("b")?)?;
- Ok(Arc::new(Mutex::new(ctx)))
+ ctx.register_table("t1", create_table_provider("a", 200))
+ .unwrap();
+ ctx.register_table("t2", create_table_provider("b", 200))
+ .unwrap();
+ ctx.register_table("t700", create_table_provider("c", 700))
+ .unwrap();
+ ctx
}
fn criterion_benchmark(c: &mut Criterion) {
- let ctx = create_context().unwrap();
+ let ctx = create_context();
+
+ // Test simplest
+ // https://github.com/apache/arrow-datafusion/issues/5157
+ c.bench_function("logical_select_one_from_700", |b| {
+ b.iter(|| logical_plan(&ctx, "SELECT c1 FROM t700"))
+ });
+
+ // Test simplest
+ // https://github.com/apache/arrow-datafusion/issues/5157
+ c.bench_function("physical_select_one_from_700", |b| {
+ b.iter(|| physical_plan(&ctx, "SELECT c1 FROM t700"))
+ });
- c.bench_function("trivial join low numbered columns", |b| {
+ c.bench_function("logical_trivial_join_low_numbered_columns", |b| {
b.iter(|| {
- plan(
- ctx.clone(),
+ logical_plan(
+ &ctx,
"SELECT t1.a2, t2.b2 \
FROM t1, t2 WHERE a1 = b1",
)
})
});
- c.bench_function("trivial join high numbered columns", |b| {
+ c.bench_function("logical_trivial_join_high_numbered_columns", |b| {
b.iter(|| {
- plan(
- ctx.clone(),
+ logical_plan(
+ &ctx,
"SELECT t1.a99, t2.b99 \
FROM t1, t2 WHERE a199 = b199",
)
})
});
- c.bench_function("aggregate with join", |b| {
+ c.bench_function("logical_aggregate_with_join", |b| {
b.iter(|| {
- plan(
- ctx.clone(),
+ logical_plan(
+ &ctx,
"SELECT t1.a99, MIN(t2.b1), MAX(t2.b199), AVG(t2.b123),
COUNT(t2.b73) \
FROM t1 JOIN t2 ON t1.a199 = t2.b199 GROUP BY t1.a99",
)