beliefer commented on code in PR #41860:
URL: https://github.com/apache/spark/pull/41860#discussion_r1371555182


##########
sql/core/src/test/scala/org/apache/spark/sql/InjectRuntimeFilterSuite.scala:
##########
@@ -644,4 +644,76 @@ class InjectRuntimeFilterSuite extends QueryTest with 
SQLTestUtils with SharedSp
         "Missing or unexpected reused ReusedSubqueryExec in the plan")
     }
   }
+
+  test("Runtime bloom filter join: should add bf for left outer join even if 
left side is" +
+    " smaller than broadcast threshold") {
+    
withSQLConf(SQLConf.RUNTIME_BLOOM_FILTER_APPLICATION_SIDE_SCAN_SIZE_THRESHOLD.key
 -> "600",
+      SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "300",
+      SQLConf.RUNTIME_BLOOM_FILTER_CREATION_SIDE_THRESHOLD.key -> "4000",
+      SQLConf.CBO_ENABLED.key -> "true") {

Review Comment:
   +1. Please remove `SQLConf.CBO_ENABLED.key`.



##########
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/InjectRuntimeFilter.scala:
##########
@@ -209,15 +210,35 @@ object InjectRuntimeFilter extends Rule[LogicalPlan] with 
PredicateHelper with J
       REGEXP_EXTRACT_FAMILY, REGEXP_REPLACE)
   }
 
-  private def isProbablyShuffleJoin(left: LogicalPlan,
-      right: LogicalPlan, hint: JoinHint): Boolean = {
-    !hintToBroadcastLeft(hint) && !hintToBroadcastRight(hint) &&
-      !canBroadcastBySize(left, conf) && !canBroadcastBySize(right, conf)
+  // Whether it is a shuffle join or not should be based on the actual left and
+  // right table. For some join like left outer join, it will be a shuffle join
+  // even if left side table size is smaller than broadcast threshold.

Review Comment:
   Could we simplify this comment?
   `Whether it is a shuffle join or not based on the join type, the left size 
and right size.`



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to