This is an automated email from the ASF dual-hosted git repository.

github-bot pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion.git


The following commit(s) were added to refs/heads/main by this push:
     new 38cc8e6874 test: add SMJ benchmarks from #21184 (#21188)
38cc8e6874 is described below

commit 38cc8e68745f4f955d9a458a4dfabfe5085a94ea
Author: Matt Butrovich <[email protected]>
AuthorDate: Thu Mar 26 17:03:33 2026 -0400

    test: add SMJ benchmarks from #21184 (#21188)
    
    See #21184 for reason of this benchmark.
---
 benchmarks/src/smj.rs | 48 +++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 47 insertions(+), 1 deletion(-)

diff --git a/benchmarks/src/smj.rs b/benchmarks/src/smj.rs
index f0ee1dd0cd..d782762a1b 100644
--- a/benchmarks/src/smj.rs
+++ b/benchmarks/src/smj.rs
@@ -39,7 +39,7 @@ use futures::StreamExt;
 #[derive(Debug, Args, Clone)]
 #[command(verbatim_doc_comment)]
 pub struct RunOpt {
-    /// Query number (between 1 and 20). If not specified, runs all queries
+    /// Query number (between 1 and 23). If not specified, runs all queries
     #[arg(short, long)]
     query: Option<usize>,
 
@@ -410,6 +410,52 @@ const SMJ_QUERIES: &[&str] = &[
         FROM t1_sorted JOIN t2_sorted ON t1_sorted.key = t2_sorted.key
         GROUP BY t1_sorted.key
     "#,
+    // Q21: INNER 10M x 10M | unique keys (1:1) | 50% join filter
+    r#"
+        WITH t1_sorted AS (
+            SELECT value as key, value as data
+            FROM range(10000000) ORDER BY value
+        ),
+        t2_sorted AS (
+            SELECT value as key, value as data
+            FROM range(10000000) ORDER BY value
+        )
+        SELECT t1_sorted.key, t1_sorted.data as d1, t2_sorted.data as d2
+        FROM t1_sorted JOIN t2_sorted
+          ON t1_sorted.key = t2_sorted.key
+         AND t1_sorted.data + t2_sorted.data < 10000000
+    "#,
+    // Q22: LEFT 10M x 10M | unique keys (1:1) | 50% join filter
+    r#"
+        WITH t1_sorted AS (
+            SELECT value as key, value as data
+            FROM range(10000000) ORDER BY value
+        ),
+        t2_sorted AS (
+            SELECT value as key, value as data
+            FROM range(10000000) ORDER BY value
+        )
+        SELECT t1_sorted.key, t1_sorted.data as d1, t2_sorted.data as d2
+        FROM t1_sorted LEFT JOIN t2_sorted
+          ON t1_sorted.key = t2_sorted.key
+         AND t1_sorted.data + t2_sorted.data < 10000000
+    "#,
+    // Q23: FULL 10M x 10M | unique keys (1:1) | 50% join filter
+    r#"
+        WITH t1_sorted AS (
+            SELECT value as key, value as data
+            FROM range(10000000) ORDER BY value
+        ),
+        t2_sorted AS (
+            SELECT value as key, value as data
+            FROM range(10000000) ORDER BY value
+        )
+        SELECT t1_sorted.key as k1, t1_sorted.data as d1,
+               t2_sorted.key as k2, t2_sorted.data as d2
+        FROM t1_sorted FULL JOIN t2_sorted
+          ON t1_sorted.key = t2_sorted.key
+         AND t1_sorted.data + t2_sorted.data < 10000000
+    "#,
 ];
 
 impl RunOpt {


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to