This is an automated email from the ASF dual-hosted git repository.
alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git
The following commit(s) were added to refs/heads/main by this push:
new 545275bff3 Start setting up tpch planning benchmarks (#8665)
545275bff3 is described below
commit 545275bff316507226c68cb9d5a0739a0d90f32e
Author: Matthew Turner <[email protected]>
AuthorDate: Sat Dec 30 09:12:26 2023 -0500
Start setting up tpch planning benchmarks (#8665)
* Start setting up tpch planning benchmarks
* Add remaining tpch queries
* Fix bench function
* Clippy
---
datafusion/core/benches/sql_planner.rs | 156 +++++++++++++++++++++++++++++++++
1 file changed, 156 insertions(+)
diff --git a/datafusion/core/benches/sql_planner.rs
b/datafusion/core/benches/sql_planner.rs
index 7a41b6bec6..1754129a76 100644
--- a/datafusion/core/benches/sql_planner.rs
+++ b/datafusion/core/benches/sql_planner.rs
@@ -60,6 +60,104 @@ pub fn create_table_provider(column_prefix: &str,
num_columns: usize) -> Arc<Mem
MemTable::try_new(schema, vec![]).map(Arc::new).unwrap()
}
+pub fn create_tpch_schemas() -> [(String, Schema); 8] {
+ let lineitem_schema = Schema::new(vec![
+ Field::new("l_orderkey", DataType::Int64, false),
+ Field::new("l_partkey", DataType::Int64, false),
+ Field::new("l_suppkey", DataType::Int64, false),
+ Field::new("l_linenumber", DataType::Int32, false),
+ Field::new("l_quantity", DataType::Decimal128(15, 2), false),
+ Field::new("l_extendedprice", DataType::Decimal128(15, 2), false),
+ Field::new("l_discount", DataType::Decimal128(15, 2), false),
+ Field::new("l_tax", DataType::Decimal128(15, 2), false),
+ Field::new("l_returnflag", DataType::Utf8, false),
+ Field::new("l_linestatus", DataType::Utf8, false),
+ Field::new("l_shipdate", DataType::Date32, false),
+ Field::new("l_commitdate", DataType::Date32, false),
+ Field::new("l_receiptdate", DataType::Date32, false),
+ Field::new("l_shipinstruct", DataType::Utf8, false),
+ Field::new("l_shipmode", DataType::Utf8, false),
+ Field::new("l_comment", DataType::Utf8, false),
+ ]);
+
+ let orders_schema = Schema::new(vec![
+ Field::new("o_orderkey", DataType::Int64, false),
+ Field::new("o_custkey", DataType::Int64, false),
+ Field::new("o_orderstatus", DataType::Utf8, false),
+ Field::new("o_totalprice", DataType::Decimal128(15, 2), false),
+ Field::new("o_orderdate", DataType::Date32, false),
+ Field::new("o_orderpriority", DataType::Utf8, false),
+ Field::new("o_clerk", DataType::Utf8, false),
+ Field::new("o_shippriority", DataType::Int32, false),
+ Field::new("o_comment", DataType::Utf8, false),
+ ]);
+
+ let part_schema = Schema::new(vec![
+ Field::new("p_partkey", DataType::Int64, false),
+ Field::new("p_name", DataType::Utf8, false),
+ Field::new("p_mfgr", DataType::Utf8, false),
+ Field::new("p_brand", DataType::Utf8, false),
+ Field::new("p_type", DataType::Utf8, false),
+ Field::new("p_size", DataType::Int32, false),
+ Field::new("p_container", DataType::Utf8, false),
+ Field::new("p_retailprice", DataType::Decimal128(15, 2), false),
+ Field::new("p_comment", DataType::Utf8, false),
+ ]);
+
+ let supplier_schema = Schema::new(vec![
+ Field::new("s_suppkey", DataType::Int64, false),
+ Field::new("s_name", DataType::Utf8, false),
+ Field::new("s_address", DataType::Utf8, false),
+ Field::new("s_nationkey", DataType::Int64, false),
+ Field::new("s_phone", DataType::Utf8, false),
+ Field::new("s_acctbal", DataType::Decimal128(15, 2), false),
+ Field::new("s_comment", DataType::Utf8, false),
+ ]);
+
+ let partsupp_schema = Schema::new(vec![
+ Field::new("ps_partkey", DataType::Int64, false),
+ Field::new("ps_suppkey", DataType::Int64, false),
+ Field::new("ps_availqty", DataType::Int32, false),
+ Field::new("ps_supplycost", DataType::Decimal128(15, 2), false),
+ Field::new("ps_comment", DataType::Utf8, false),
+ ]);
+
+ let customer_schema = Schema::new(vec![
+ Field::new("c_custkey", DataType::Int64, false),
+ Field::new("c_name", DataType::Utf8, false),
+ Field::new("c_address", DataType::Utf8, false),
+ Field::new("c_nationkey", DataType::Int64, false),
+ Field::new("c_phone", DataType::Utf8, false),
+ Field::new("c_acctbal", DataType::Decimal128(15, 2), false),
+ Field::new("c_mktsegment", DataType::Utf8, false),
+ Field::new("c_comment", DataType::Utf8, false),
+ ]);
+
+ let nation_schema = Schema::new(vec![
+ Field::new("n_nationkey", DataType::Int64, false),
+ Field::new("n_name", DataType::Utf8, false),
+ Field::new("n_regionkey", DataType::Int64, false),
+ Field::new("n_comment", DataType::Utf8, false),
+ ]);
+
+ let region_schema = Schema::new(vec![
+ Field::new("r_regionkey", DataType::Int64, false),
+ Field::new("r_name", DataType::Utf8, false),
+ Field::new("r_comment", DataType::Utf8, false),
+ ]);
+
+ [
+ ("lineitem".to_string(), lineitem_schema),
+ ("orders".to_string(), orders_schema),
+ ("part".to_string(), part_schema),
+ ("supplier".to_string(), supplier_schema),
+ ("partsupp".to_string(), partsupp_schema),
+ ("customer".to_string(), customer_schema),
+ ("nation".to_string(), nation_schema),
+ ("region".to_string(), region_schema),
+ ]
+}
+
fn create_context() -> SessionContext {
let ctx = SessionContext::new();
ctx.register_table("t1", create_table_provider("a", 200))
@@ -68,6 +166,16 @@ fn create_context() -> SessionContext {
.unwrap();
ctx.register_table("t700", create_table_provider("c", 700))
.unwrap();
+
+ let tpch_schemas = create_tpch_schemas();
+ tpch_schemas.iter().for_each(|(name, schema)| {
+ ctx.register_table(
+ name,
+ Arc::new(MemTable::try_new(Arc::new(schema.clone()),
vec![]).unwrap()),
+ )
+ .unwrap();
+ });
+
ctx
}
@@ -115,6 +223,54 @@ fn criterion_benchmark(c: &mut Criterion) {
)
})
});
+
+ let q1_sql =
std::fs::read_to_string("../../benchmarks/queries/q1.sql").unwrap();
+ let q2_sql =
std::fs::read_to_string("../../benchmarks/queries/q2.sql").unwrap();
+ let q3_sql =
std::fs::read_to_string("../../benchmarks/queries/q3.sql").unwrap();
+ let q4_sql =
std::fs::read_to_string("../../benchmarks/queries/q4.sql").unwrap();
+ let q5_sql =
std::fs::read_to_string("../../benchmarks/queries/q5.sql").unwrap();
+ let q6_sql =
std::fs::read_to_string("../../benchmarks/queries/q6.sql").unwrap();
+ let q7_sql =
std::fs::read_to_string("../../benchmarks/queries/q7.sql").unwrap();
+ let q8_sql =
std::fs::read_to_string("../../benchmarks/queries/q8.sql").unwrap();
+ let q9_sql =
std::fs::read_to_string("../../benchmarks/queries/q9.sql").unwrap();
+ let q10_sql =
std::fs::read_to_string("../../benchmarks/queries/q10.sql").unwrap();
+ let q11_sql =
std::fs::read_to_string("../../benchmarks/queries/q11.sql").unwrap();
+ let q12_sql =
std::fs::read_to_string("../../benchmarks/queries/q12.sql").unwrap();
+ let q13_sql =
std::fs::read_to_string("../../benchmarks/queries/q13.sql").unwrap();
+ let q14_sql =
std::fs::read_to_string("../../benchmarks/queries/q14.sql").unwrap();
+ // let q15_sql =
std::fs::read_to_string("../../benchmarks/queries/q15.sql").unwrap();
+ let q16_sql =
std::fs::read_to_string("../../benchmarks/queries/q16.sql").unwrap();
+ let q17_sql =
std::fs::read_to_string("../../benchmarks/queries/q17.sql").unwrap();
+ let q18_sql =
std::fs::read_to_string("../../benchmarks/queries/q18.sql").unwrap();
+ let q19_sql =
std::fs::read_to_string("../../benchmarks/queries/q19.sql").unwrap();
+ let q20_sql =
std::fs::read_to_string("../../benchmarks/queries/q20.sql").unwrap();
+ let q21_sql =
std::fs::read_to_string("../../benchmarks/queries/q21.sql").unwrap();
+ let q22_sql =
std::fs::read_to_string("../../benchmarks/queries/q22.sql").unwrap();
+
+ c.bench_function("physical_plan_tpch", |b| {
+ b.iter(|| physical_plan(&ctx, &q1_sql));
+ b.iter(|| physical_plan(&ctx, &q2_sql));
+ b.iter(|| physical_plan(&ctx, &q3_sql));
+ b.iter(|| physical_plan(&ctx, &q4_sql));
+ b.iter(|| physical_plan(&ctx, &q5_sql));
+ b.iter(|| physical_plan(&ctx, &q6_sql));
+ b.iter(|| physical_plan(&ctx, &q7_sql));
+ b.iter(|| physical_plan(&ctx, &q8_sql));
+ b.iter(|| physical_plan(&ctx, &q9_sql));
+ b.iter(|| physical_plan(&ctx, &q10_sql));
+ b.iter(|| physical_plan(&ctx, &q11_sql));
+ b.iter(|| physical_plan(&ctx, &q12_sql));
+ b.iter(|| physical_plan(&ctx, &q13_sql));
+ b.iter(|| physical_plan(&ctx, &q14_sql));
+ // b.iter(|| physical_plan(&ctx, &q15_sql));
+ b.iter(|| physical_plan(&ctx, &q16_sql));
+ b.iter(|| physical_plan(&ctx, &q17_sql));
+ b.iter(|| physical_plan(&ctx, &q18_sql));
+ b.iter(|| physical_plan(&ctx, &q19_sql));
+ b.iter(|| physical_plan(&ctx, &q20_sql));
+ b.iter(|| physical_plan(&ctx, &q21_sql));
+ b.iter(|| physical_plan(&ctx, &q22_sql));
+ });
}
criterion_group!(benches, criterion_benchmark);