leanken commented on a change in pull request #29104:
URL: https://github.com/apache/spark/pull/29104#discussion_r456351353
##########
File path:
sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
##########
@@ -2665,6 +2665,23 @@ object SQLConf {
.checkValue(_ > 0, "The difference must be positive.")
.createWithDefault(4)
+ val NOT_IN_SUBQUERY_SINGLE_COLUMN_OPTIMIZE_ENABLED =
+ buildConf("spark.sql.notInSubquery.singleColumn.optimize.enabled")
+ .internal()
+ .doc("When true, single column not in subquery execution in
BroadcastNestedLoopJoinExec " +
+ "will be optimized from M*N calculation into M*log(N) calculation
using HashMap lookup " +
+ "instead of Looping lookup.")
+ .booleanConf
+ .createWithDefault(false)
+
+ val NOT_IN_SUBQUERY_SINGLE_COLUMN_OPTIMIZE_ROW_COUNT_THRESHOLD =
Review comment:
done remove this config and use spark.sql.autoBroadcastJoinThreshold
##########
File path:
sql/core/src/main/scala/org/apache/spark/sql/execution/joins/BroadcastNestedLoopJoinExec.scala
##########
@@ -205,6 +224,108 @@ case class BroadcastNestedLoopJoinExec(
}
}
+ case class NotInSubquerySingleColumnOptimizeParams(
+ buildSideHashedRelation: HashedRelation,
+ isNullExists: Boolean,
+ isBuildRowsEmpty: Boolean)
+
+ private def notInSubquerySingleColumnOptimizeEnabled: Boolean = {
+ if (SQLConf.get.notInSubquerySingleColumnOptimizeEnabled &&
right.output.length == 1) {
Review comment:
done.
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]