singhpk234 commented on code in PR #194:
URL: 
https://github.com/apache/arrow-datafusion-comet/pull/194#discussion_r1522271886


##########
spark/src/test/scala/org/apache/comet/exec/CometExecSuite.scala:
##########
@@ -58,6 +58,50 @@ class CometExecSuite extends CometTestBase {
     }
   }
 
+  test("HashJoin without join filter") {
+    withSQLConf(
+      SQLConf.PREFER_SORTMERGEJOIN.key -> "false",
+      SQLConf.ADAPTIVE_AUTO_BROADCASTJOIN_THRESHOLD.key -> "-1",
+      SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "-1") {
+      withParquetTable((0 until 10).map(i => (i, i % 5)), "tbl_a") {
+        withParquetTable((0 until 10).map(i => (i % 10, i + 2)), "tbl_b") {
+          val df1 =
+            sql(
+              "SELECT /*+ SHUFFLE_HASH(tbl_a) */ * FROM tbl_a JOIN tbl_b ON 
tbl_a._2 = tbl_b._1")
+          checkSparkAnswerAndOperator(df1)
+
+          // TODO: Spark 3.4 returns SortMergeJoin for this query even with 
SHUFFLE_HASH hint.
+          // We need to investigate why this happens and fix it.
+          /*
+          val df2 =
+            sql("SELECT /*+ SHUFFLE_HASH(tbl_a) */ * FROM tbl_a LEFT JOIN 
tbl_b ON tbl_a._2 = tbl_b._1")
+          checkSparkAnswerAndOperator(df2)
+
+          val df3 =
+            sql("SELECT /*+ SHUFFLE_HASH(tbl_b) */ * FROM tbl_b LEFT JOIN 
tbl_a ON tbl_a._2 = tbl_b._1")
+          checkSparkAnswerAndOperator(df3)

Review Comment:
   :( This is strange can for some reason `SPARK_TESTING` might not be set in 
env ? 



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to