Github user gatorsmile commented on a diff in the pull request:

    https://github.com/apache/spark/pull/17713#discussion_r115115028
  
    --- Diff: 
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
 ---
    @@ -1196,206 +1195,6 @@ class Analyzer(
         }
     
         /**
    -     * Validates to make sure the outer references appearing inside the 
subquery
    -     * are legal. This function also returns the list of expressions
    -     * that contain outer references. These outer references would be kept 
as children
    -     * of subquery expressions by the caller of this function.
    -     */
    -    private def checkAndGetOuterReferences(sub: LogicalPlan): 
Seq[Expression] = {
    -      val outerReferences = ArrayBuffer.empty[Expression]
    -
    -      // Validate that correlated aggregate expression do not contain a 
mixture
    -      // of outer and local references.
    -      def checkMixedReferencesInsideAggregateExpr(expr: Expression): Unit 
= {
    -        expr.foreach {
    -          case a: AggregateExpression if containsOuter(a) =>
    -            val outer = a.collect { case OuterReference(e) => 
e.toAttribute }
    -            val local = a.references -- outer
    -            if (local.nonEmpty) {
    -              val msg =
    -                s"""
    -                   |Found an aggregate expression in a correlated 
predicate that has both
    -                   |outer and local references, which is not supported yet.
    -                   |Aggregate expression: 
${SubExprUtils.stripOuterReference(a).sql},
    -                   |Outer references: ${outer.map(_.sql).mkString(", ")},
    -                   |Local references: ${local.map(_.sql).mkString(", ")}.
    -                 """.stripMargin.replace("\n", " ").trim()
    -              failAnalysis(msg)
    -            }
    -          case _ =>
    -        }
    -      }
    -
    -      // Make sure a plan's subtree does not contain outer references
    -      def failOnOuterReferenceInSubTree(p: LogicalPlan): Unit = {
    -        if (hasOuterReferences(p)) {
    -          failAnalysis(s"Accessing outer query column is not allowed 
in:\n$p")
    -        }
    -      }
    -
    -      // Make sure a plan's expressions do not contain :
    -      // 1. Aggregate expressions that have mixture of outer and local 
references.
    -      // 2. Expressions containing outer references on plan nodes other 
than Filter.
    -      def failOnInvalidOuterReference(p: LogicalPlan): Unit = {
    -        p.expressions.foreach(checkMixedReferencesInsideAggregateExpr)
    -        if (!p.isInstanceOf[Filter] && 
p.expressions.exists(containsOuter)) {
    -          failAnalysis(
    -            "Expressions referencing the outer query are not supported 
outside of WHERE/HAVING " +
    -              s"clauses:\n$p")
    -        }
    -      }
    -
    -      // SPARK-17348: A potential incorrect result case.
    -      // When a correlated predicate is a non-equality predicate,
    -      // certain operators are not permitted from the operator
    -      // hosting the correlated predicate up to the operator on the outer 
table.
    -      // Otherwise, the pull up of the correlated predicate
    -      // will generate a plan with a different semantics
    -      // which could return incorrect result.
    -      // Currently we check for Aggregate and Window operators
    -      //
    -      // Below shows an example of a Logical Plan during Analyzer phase 
that
    -      // show this problem. Pulling the correlated predicate [outer(c2#77) 
>= ..]
    -      // through the Aggregate (or Window) operator could alter the result 
of
    -      // the Aggregate.
    -      //
    -      // Project [c1#76]
    -      // +- Project [c1#87, c2#88]
    -      // :  (Aggregate or Window operator)
    -      // :  +- Filter [outer(c2#77) >= c2#88)]
    -      // :     +- SubqueryAlias t2, `t2`
    -      // :        +- Project [_1#84 AS c1#87, _2#85 AS c2#88]
    -      // :           +- LocalRelation [_1#84, _2#85]
    -      // +- SubqueryAlias t1, `t1`
    -      // +- Project [_1#73 AS c1#76, _2#74 AS c2#77]
    -      // +- LocalRelation [_1#73, _2#74]
    -      def failOnNonEqualCorrelatedPredicate(found: Boolean, p: 
LogicalPlan): Unit = {
    -        if (found) {
    -          // Report a non-supported case as an exception
    -          failAnalysis(s"Correlated column is not allowed in a 
non-equality predicate:\n$p")
    -        }
    -      }
    -
    -      var foundNonEqualCorrelatedPred : Boolean = false
    -
    -      // Simplify the predicates before validating any unsupported 
correlation patterns
    -      // in the plan.
    -      BooleanSimplification(sub).foreachUp {
    -
    -        // Whitelist operators allowed in a correlated subquery
    -        // There are 4 categories:
    -        // 1. Operators that are allowed anywhere in a correlated 
subquery, and,
    -        //    by definition of the operators, they either do not contain
    -        //    any columns or cannot host outer references.
    -        // 2. Operators that are allowed anywhere in a correlated subquery
    -        //    so long as they do not host outer references.
    -        // 3. Operators that need special handlings. These operators are
    -        //    Project, Filter, Join, Aggregate, and Generate.
    -        //
    -        // Any operators that are not in the above list are allowed
    -        // in a correlated subquery only if they are not on a correlation 
path.
    -        // In other word, these operators are allowed only under a 
correlation point.
    -        //
    -        // A correlation path is defined as the sub-tree of all the 
operators that
    -        // are on the path from the operator hosting the correlated 
expressions
    -        // up to the operator producing the correlated values.
    -
    -        // Category 1:
    -        // BroadcastHint, Distinct, LeafNode, Repartition, and 
SubqueryAlias
    -        case _: BroadcastHint | _: Distinct | _: LeafNode | _: Repartition 
| _: SubqueryAlias =>
    -
    -        // Category 2:
    -        // These operators can be anywhere in a correlated subquery.
    -        // so long as they do not host outer references in the operators.
    -        case s: Sort =>
    -          failOnInvalidOuterReference(s)
    -        case r: RepartitionByExpression =>
    -          failOnInvalidOuterReference(r)
    -
    -        // Category 3:
    -        // Filter is one of the two operators allowed to host correlated 
expressions.
    -        // The other operator is Join. Filter can be anywhere in a 
correlated subquery.
    -        case f: Filter =>
    -          // Find all predicates with an outer reference.
    -          val (correlated, _) = 
splitConjunctivePredicates(f.condition).partition(containsOuter)
    -
    -          // Find any non-equality correlated predicates
    -          foundNonEqualCorrelatedPred = foundNonEqualCorrelatedPred || 
correlated.exists {
    -            case _: EqualTo | _: EqualNullSafe => false
    -            case _ => true
    -          }
    -
    -          failOnInvalidOuterReference(f)
    -          // The aggregate expressions are treated in a special way by 
getOuterReferences. If the
    -          // aggregate expression contains only outer reference attributes 
then the entire aggregate
    -          // expression is isolated as an OuterReference.
    -          // i.e min(OuterReference(b)) => OuterReference(min(b))
    -          outerReferences ++= getOuterReferences(correlated)
    --- End diff --
    
    Why this PR removes this line?


---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at [email protected] or file a JIRA ticket
with INFRA.
---

---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to