This is an automated email from the ASF dual-hosted git repository.
github-bot pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion.git
The following commit(s) were added to refs/heads/main by this push:
new 38cc8e6874 test: add SMJ benchmarks from #21184 (#21188)
38cc8e6874 is described below
commit 38cc8e68745f4f955d9a458a4dfabfe5085a94ea
Author: Matt Butrovich <[email protected]>
AuthorDate: Thu Mar 26 17:03:33 2026 -0400
test: add SMJ benchmarks from #21184 (#21188)
See #21184 for reason of this benchmark.
---
benchmarks/src/smj.rs | 48 +++++++++++++++++++++++++++++++++++++++++++++++-
1 file changed, 47 insertions(+), 1 deletion(-)
diff --git a/benchmarks/src/smj.rs b/benchmarks/src/smj.rs
index f0ee1dd0cd..d782762a1b 100644
--- a/benchmarks/src/smj.rs
+++ b/benchmarks/src/smj.rs
@@ -39,7 +39,7 @@ use futures::StreamExt;
#[derive(Debug, Args, Clone)]
#[command(verbatim_doc_comment)]
pub struct RunOpt {
- /// Query number (between 1 and 20). If not specified, runs all queries
+ /// Query number (between 1 and 23). If not specified, runs all queries
#[arg(short, long)]
query: Option<usize>,
@@ -410,6 +410,52 @@ const SMJ_QUERIES: &[&str] = &[
FROM t1_sorted JOIN t2_sorted ON t1_sorted.key = t2_sorted.key
GROUP BY t1_sorted.key
"#,
+ // Q21: INNER 10M x 10M | unique keys (1:1) | 50% join filter
+ r#"
+ WITH t1_sorted AS (
+ SELECT value as key, value as data
+ FROM range(10000000) ORDER BY value
+ ),
+ t2_sorted AS (
+ SELECT value as key, value as data
+ FROM range(10000000) ORDER BY value
+ )
+ SELECT t1_sorted.key, t1_sorted.data as d1, t2_sorted.data as d2
+ FROM t1_sorted JOIN t2_sorted
+ ON t1_sorted.key = t2_sorted.key
+ AND t1_sorted.data + t2_sorted.data < 10000000
+ "#,
+ // Q22: LEFT 10M x 10M | unique keys (1:1) | 50% join filter
+ r#"
+ WITH t1_sorted AS (
+ SELECT value as key, value as data
+ FROM range(10000000) ORDER BY value
+ ),
+ t2_sorted AS (
+ SELECT value as key, value as data
+ FROM range(10000000) ORDER BY value
+ )
+ SELECT t1_sorted.key, t1_sorted.data as d1, t2_sorted.data as d2
+ FROM t1_sorted LEFT JOIN t2_sorted
+ ON t1_sorted.key = t2_sorted.key
+ AND t1_sorted.data + t2_sorted.data < 10000000
+ "#,
+ // Q23: FULL 10M x 10M | unique keys (1:1) | 50% join filter
+ r#"
+ WITH t1_sorted AS (
+ SELECT value as key, value as data
+ FROM range(10000000) ORDER BY value
+ ),
+ t2_sorted AS (
+ SELECT value as key, value as data
+ FROM range(10000000) ORDER BY value
+ )
+ SELECT t1_sorted.key as k1, t1_sorted.data as d1,
+ t2_sorted.key as k2, t2_sorted.data as d2
+ FROM t1_sorted FULL JOIN t2_sorted
+ ON t1_sorted.key = t2_sorted.key
+ AND t1_sorted.data + t2_sorted.data < 10000000
+ "#,
];
impl RunOpt {
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]