beliefer commented on code in PR #41860:
URL: https://github.com/apache/spark/pull/41860#discussion_r1371555182
##########
sql/core/src/test/scala/org/apache/spark/sql/InjectRuntimeFilterSuite.scala:
##########
@@ -644,4 +644,76 @@ class InjectRuntimeFilterSuite extends QueryTest with
SQLTestUtils with SharedSp
"Missing or unexpected reused ReusedSubqueryExec in the plan")
}
}
+
+ test("Runtime bloom filter join: should add bf for left outer join even if
left side is" +
+ " smaller than broadcast threshold") {
+
withSQLConf(SQLConf.RUNTIME_BLOOM_FILTER_APPLICATION_SIDE_SCAN_SIZE_THRESHOLD.key
-> "600",
+ SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "300",
+ SQLConf.RUNTIME_BLOOM_FILTER_CREATION_SIDE_THRESHOLD.key -> "4000",
+ SQLConf.CBO_ENABLED.key -> "true") {
Review Comment:
+1. Please remove `SQLConf.CBO_ENABLED.key`.
##########
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/InjectRuntimeFilter.scala:
##########
@@ -209,15 +210,35 @@ object InjectRuntimeFilter extends Rule[LogicalPlan] with
PredicateHelper with J
REGEXP_EXTRACT_FAMILY, REGEXP_REPLACE)
}
- private def isProbablyShuffleJoin(left: LogicalPlan,
- right: LogicalPlan, hint: JoinHint): Boolean = {
- !hintToBroadcastLeft(hint) && !hintToBroadcastRight(hint) &&
- !canBroadcastBySize(left, conf) && !canBroadcastBySize(right, conf)
+ // Whether it is a shuffle join or not should be based on the actual left and
+ // right table. For some join like left outer join, it will be a shuffle join
+ // even if left side table size is smaller than broadcast threshold.
Review Comment:
Could we simplify this comment?
`Whether it is a shuffle join or not based on the join type, the left size
and right size.`
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]