maropu commented on a change in pull request #31728:
URL: https://github.com/apache/spark/pull/31728#discussion_r587213670



##########
File path: 
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
##########
@@ -1928,33 +1925,51 @@ class Analyzer(override val catalogManager: 
CatalogManager)
    * returning the original attribute. In this case `d` will be resolved in 
subsequent passes
    * after `b` is resolved.
    */
-  protected[sql] def resolveExpressionBottomUp(
+  def resolveExpressionByPlanOutput(

Review comment:
       nit: `private`?

##########
File path: 
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
##########
@@ -1928,33 +1925,51 @@ class Analyzer(override val catalogManager: 
CatalogManager)
    * returning the original attribute. In this case `d` will be resolved in 
subsequent passes
    * after `b` is resolved.
    */
-  protected[sql] def resolveExpressionBottomUp(
+  def resolveExpressionByPlanOutput(
       expr: Expression,
       plan: LogicalPlan,
       throws: Boolean = false): Expression = {
-    if (expr.resolved) return expr
-    // Resolve expression in one round.
-    // If throws == false or the desired attribute doesn't exist
-    // (like try to resolve `a.b` but `a` doesn't exist), fail and return the 
origin one.
-    // Else, throw exception.
-    try {
-      expr transformUp {
-        case GetColumnByOrdinal(ordinal, _) => plan.output(ordinal)
-        case u @ UnresolvedAttribute(nameParts) =>
-          val result =
-            withPosition(u) {
-              plan.resolve(nameParts, resolver)
-                .orElse(resolveLiteralFunction(nameParts, u, plan))
-                .getOrElse(u)
-            }
-          logDebug(s"Resolving $u to $result")
-          result
-        case UnresolvedExtractValue(child, fieldName) if child.resolved =>
-          ExtractValue(child, fieldName, resolver)
-      }
-    } catch {
-      case a: AnalysisException if !throws => expr
-    }
+    resolveExpression(
+      expr,
+      resolveColumnByName = u => {
+        plan.resolve(u.nameParts, 
resolver).orElse(resolveLiteralFunction(u.nameParts, u, plan))

Review comment:
       Could we move the `orElse` part into `resolveExpression `? It seems it 
is the same between `resolveExpressionByPlanOutput` and 
`resolveExpressionByPlanChildren `.

##########
File path: 
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
##########
@@ -1914,49 +1857,116 @@ class Analyzer(override val catalogManager: 
CatalogManager)
   }
 
   /**
-   * Resolves the attribute, column value and extract value expressions(s) by 
traversing the
-   * input expression in bottom-up manner. In order to resolve the nested 
complex type fields
-   * correctly, this function makes use of `throws` parameter to control when 
to raise an
-   * AnalysisException.
+   * Resolves `UnresolvedAttribute`, `GetColumnByOrdinal` and extract value 
expressions(s) by
+   * traversing the input expression in top-down manner. It must be top-down 
because we need to
+   * skip over unbound lambda function expression. The lambda expressions are 
resolved in a
+   * different place [[ResolveLambdaVariables]].
    *
    * Example :
-   * SELECT a.b FROM t ORDER BY b[0].d
+   * SELECT transform(array(1, 2, 3), (x, i) -> x + i)"
    *
-   * In the above example, in b needs to be resolved before d can be resolved. 
Given we are
-   * doing a bottom up traversal, it will first attempt to resolve d and fail 
as b has not
-   * been resolved yet. If `throws` is false, this function will handle the 
exception by
-   * returning the original attribute. In this case `d` will be resolved in 
subsequent passes
-   * after `b` is resolved.
+   * In the case above, x and i are resolved as lambda variables in 
[[ResolveLambdaVariables]].
    */
-  protected[sql] def resolveExpressionBottomUp(
+  private def resolveExpression(
       expr: Expression,
-      plan: LogicalPlan,
-      throws: Boolean = false): Expression = {
-    if (expr.resolved) return expr
+      resolveColumnByName: UnresolvedAttribute => Option[Expression],
+      resolveColumnByOrdinal: Int => Attribute,
+      trimAlias: Boolean,
+      throws: Boolean): Expression = {
     // Resolve expression in one round.
-    // If throws == false or the desired attribute doesn't exist
-    // (like try to resolve `a.b` but `a` doesn't exist), fail and return the 
origin one.
-    // Else, throw exception.
-    try {
-      expr transformUp {
-        case GetColumnByOrdinal(ordinal, _) => plan.output(ordinal)
-        case u @ UnresolvedAttribute(nameParts) =>
-          val result =
-            withPosition(u) {
-              plan.resolve(nameParts, resolver)
-                .orElse(resolveLiteralFunction(nameParts, u, plan))
-                .getOrElse(u)
-            }
+    def innerResolve(e: Expression, isTopLevel: Boolean): Expression = {
+      if (e.resolved) return e
+      e match {
+        case f: LambdaFunction if !f.bound => f
+        case GetColumnByOrdinal(ordinal, _) => resolveColumnByOrdinal(ordinal)
+        case u: UnresolvedAttribute =>
+          val resolved = withPosition(u) {
+            resolveColumnByName(u).getOrElse(u)
+          }
+          val result = resolved match {
+            // When trimAlias = true, we will trim unnecessary alias of 
`GetStructField` and
+            // we won't trim the alias of top-level `GetStructField`. Since we 
will call
+            // CleanupAliases later in Analyzer, trim non top-level 
unnecessary alias of
+            // `GetStructField` here is safe.
+            case Alias(s: GetStructField, _) if trimAlias && !isTopLevel => s
+            case others => others
+          }
           logDebug(s"Resolving $u to $result")
           result
         case UnresolvedExtractValue(child, fieldName) if child.resolved =>
           ExtractValue(child, fieldName, resolver)
+        case _ => e.mapChildren(innerResolve(_, isTopLevel = false))
       }
+    }
+
+    try {
+      innerResolve(expr, isTopLevel = true)
     } catch {
-      case a: AnalysisException if !throws => expr
+      case _: AnalysisException if !throws => expr
     }
   }
 
+  /**
+   * Resolves `UnresolvedAttribute`, `GetColumnByOrdinal` and extract value 
expressions(s) by the
+   * input plan's output attributes. In order to resolve the nested complex 
type fields correctly,
+   * this function makes use of `throws` parameter to control when to raise an 
AnalysisException.
+   *
+   * Example :
+   * SELECT a.b FROM t ORDER BY b[0].d
+   *
+   * In the above example, in b needs to be resolved before d can be resolved. 
Given we are
+   * doing a bottom up traversal, it will first attempt to resolve d and fail 
as b has not
+   * been resolved yet. If `throws` is false, this function will handle the 
exception by
+   * returning the original attribute. In this case `d` will be resolved in 
subsequent passes
+   * after `b` is resolved.
+   */
+  def resolveExpressionByPlanOutput(
+      expr: Expression,
+      plan: LogicalPlan,
+      throws: Boolean = false): Expression = {
+    resolveExpression(
+      expr,
+      resolveColumnByName = u => {
+        plan.resolve(u.nameParts, 
resolver).orElse(resolveLiteralFunction(u.nameParts, u, plan))
+      },
+      resolveColumnByOrdinal = ordinal => {
+        assert(ordinal >= 0 && ordinal < plan.output.length)
+        plan.output(ordinal)
+      },
+      trimAlias = false,
+      throws = throws)
+  }
+
+  /**
+   * Resolves `UnresolvedAttribute`, `GetColumnByOrdinal` and extract value 
expressions(s) by the
+   * input plan's children output attributes.
+   *
+   * @param e The expression need to be resolved.
+   * @param q The LogicalPlan whose children are used to resolve expression's 
attribute.
+   * @param trimAlias When true, trim unnecessary alias of GetStructField`. 
Note that,
+   *                  we cannot trim the alias of top-level `GetStructField`, 
as we should
+   *                  resolve `UnresolvedAttribute` to a named expression. The 
caller side
+   *                  can trim the alias of top-level `GetStructField` if it's 
safe to do so.
+   * @return resolved Expression.
+   */
+  def resolveExpressionByPlanChildren(

Review comment:
       ditto: `private`?




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
[email protected]



---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to