cloud-fan commented on code in PR #40865:
URL: https://github.com/apache/spark/pull/40865#discussion_r1177491250
##########
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/subquery.scala:
##########
@@ -599,10 +599,39 @@ object RewriteCorrelatedScalarSubquery extends
Rule[LogicalPlan] with AliasHelpe
if (Utils.isTesting) {
assert(mayHaveCountBug.isDefined)
}
+
+ def queryOutputFoldable(list: Seq[NamedExpression]): Boolean = {
+ trimAliases(list.filter(p =>
p.exprId.equals(query.output.head.exprId)).head).foldable
+ }
+
+ // SPARK-43156: We can judge whether the column returned by subquery is
+ // foldable (already handle by [[NullPropagation]]). If it is, it
means that
+ // the result of this value has no substantial relationship with the
data,
+ // and the presence or absence of data will not affect this column. So
in
+ // this case, this column can be extracted from the JOIN to ensure
that this
+ // value can be obtained regardless of whether the data JOIN is
successful or not.
+ lazy val resultFoldable = {
+ query match {
+ case Project(expressions, _) =>
+ queryOutputFoldable(expressions)
+ case Aggregate(_, expressions, _) =>
+ queryOutputFoldable(expressions)
+ case _ =>
+ false
+ }
+ }
+
if (resultWithZeroTups.isEmpty) {
// CASE 1: Subquery guaranteed not to have the COUNT bug because it
evaluates to NULL
// with zero tuples.
planWithoutCountBug
+ } else if (mayHaveCountBug.getOrElse(false) && resultFoldable &&
Review Comment:
are you using the latest Spark version? It works fine if I only fix
`l.copy(value = null)`
```
scala> sql("SELECT (SELECT count(1) FROM t2 WHERE t1.c1 = t2.c2) FROM
t1").show
+------------------+
|scalarsubquery(c1)|
+------------------+
| 1|
| 0|
+------------------+
```
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]