Github user cloud-fan commented on a diff in the pull request:
https://github.com/apache/spark/pull/18968#discussion_r134231496
--- Diff:
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala
---
@@ -138,46 +138,56 @@ case class Not(child: Expression)
case class In(value: Expression, list: Seq[Expression]) extends Predicate {
require(list != null, "list should not be null")
+
+ lazy val valExprs = value match {
+ case cns: CreateNamedStruct => cns.valExprs
+ case expr => Seq(expr)
+ }
+
+ override lazy val resolved: Boolean = {
+ lazy val checkForInSubquery = list match {
+ case (l @ ListQuery(sub, children, _)) :: Nil =>
+ // SPARK-21759:
+ // TODO: Update this check if we combine the optimizer rules for
subquery rewriting.
+ //
+ // In `CheckAnalysis`, we already check if the size of subquery
plan output match the size
+ // of value expressions. However, we can add extra correlated
predicate references into
+ // the top of subquery plan when pulling up correlated predicates.
Thus, we add extra check
+ // here to make sure we don't mess the query plan.
+
+ // Try to find out if any extra subquery output doesn't in the
subquery condition.
+ val isAllExtraOutputInCondition =
sub.output.drop(valExprs.length).forall { attr =>
+ children.exists(_.references.contains(attr))
+ }
+ sub.output.length >= valExprs.length && isAllExtraOutputInCondition
+ case _ => true
+ }
+ // Scala doesn't allow us refer super.resolved.
+ childrenResolved && checkInputDataTypes().isSuccess &&
checkForInSubquery
+ }
+
override def checkInputDataTypes(): TypeCheckResult = {
list match {
- case ListQuery(sub, _, _) :: Nil =>
- val valExprs = value match {
- case cns: CreateNamedStruct => cns.valExprs
- case expr => Seq(expr)
+ case (l @ ListQuery(sub, children, _)) :: Nil =>
+ val mismatchedColumns =
valExprs.zip(sub.output.take(valExprs.length)).flatMap {
--- End diff --
nit: no need to call `take`, `zip` will take care of it.
---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at [email protected] or file a JIRA ticket
with INFRA.
---
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]