cloud-fan commented on a change in pull request #31728:
URL: https://github.com/apache/spark/pull/31728#discussion_r587258728
##########
File path:
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
##########
@@ -1914,49 +1857,126 @@ class Analyzer(override val catalogManager:
CatalogManager)
}
/**
- * Resolves the attribute, column value and extract value expressions(s) by
traversing the
- * input expression in bottom-up manner. In order to resolve the nested
complex type fields
- * correctly, this function makes use of `throws` parameter to control when
to raise an
- * AnalysisException.
+ * Resolves `UnresolvedAttribute`, `GetColumnByOrdinal` and extract value
expressions(s) by
+ * traversing the input expression in top-down manner. It must be top-down
because we need to
+ * skip over unbound lambda function expression. The lambda expressions are
resolved in a
+ * different place [[ResolveLambdaVariables]].
*
* Example :
- * SELECT a.b FROM t ORDER BY b[0].d
+ * SELECT transform(array(1, 2, 3), (x, i) -> x + i)"
*
- * In the above example, in b needs to be resolved before d can be resolved.
Given we are
- * doing a bottom up traversal, it will first attempt to resolve d and fail
as b has not
- * been resolved yet. If `throws` is false, this function will handle the
exception by
- * returning the original attribute. In this case `d` will be resolved in
subsequent passes
- * after `b` is resolved.
+ * In the case above, x and i are resolved as lambda variables in
[[ResolveLambdaVariables]].
*/
- protected[sql] def resolveExpressionBottomUp(
+ private def resolveExpression(
expr: Expression,
plan: LogicalPlan,
- throws: Boolean = false): Expression = {
- if (expr.resolved) return expr
+ resolveColumnByName: Seq[String] => Option[Expression],
+ resolveColumnByOrdinal: Int => Attribute,
+ trimAlias: Boolean,
+ throws: Boolean): Expression = {
// Resolve expression in one round.
- // If throws == false or the desired attribute doesn't exist
- // (like try to resolve `a.b` but `a` doesn't exist), fail and return the
origin one.
- // Else, throw exception.
- try {
- expr transformUp {
- case GetColumnByOrdinal(ordinal, _) => plan.output(ordinal)
+ def innerResolve(e: Expression, isTopLevel: Boolean): Expression = {
+ if (e.resolved) return e
+ e match {
+ case f: LambdaFunction if !f.bound => f
+ case GetColumnByOrdinal(ordinal, _) => resolveColumnByOrdinal(ordinal)
case u @ UnresolvedAttribute(nameParts) =>
- val result =
- withPosition(u) {
- plan.resolve(nameParts, resolver)
- .orElse(resolveLiteralFunction(nameParts, u, plan))
- .getOrElse(u)
- }
+ val resolved = withPosition(u) {
+ resolveColumnByName(nameParts)
+ .orElse(resolveLiteralFunction(nameParts, u, plan))
+ .getOrElse(u)
+ }
+ val result = resolved match {
+ // When trimAlias = true, we will trim unnecessary alias of
`GetStructField` and
+ // we won't trim the alias of top-level `GetStructField`. Since we
will call
+ // CleanupAliases later in Analyzer, trim non top-level
unnecessary alias of
+ // `GetStructField` here is safe.
+ case Alias(s: GetStructField, _) if trimAlias && !isTopLevel => s
+ case others => others
+ }
logDebug(s"Resolving $u to $result")
result
- case UnresolvedExtractValue(child, fieldName) if child.resolved =>
- ExtractValue(child, fieldName, resolver)
+ case u @ UnresolvedExtractValue(child, fieldName) =>
+ val newChild = innerResolve(child, isTopLevel = false)
+ if (newChild.resolved) {
+ ExtractValue(newChild, fieldName, resolver)
+ } else {
+ u.copy(child = newChild)
+ }
+ case _ => e.mapChildren(innerResolve(_, isTopLevel = false))
}
+ }
+
+ try {
+ innerResolve(expr, isTopLevel = true)
} catch {
- case a: AnalysisException if !throws => expr
+ case _: AnalysisException if !throws => expr
}
}
+ /**
+ * Resolves `UnresolvedAttribute`, `GetColumnByOrdinal` and extract value
expressions(s) by the
+ * input plan's output attributes. In order to resolve the nested complex
type fields correctly,
+ * this function makes use of `throws` parameter to control when to raise an
AnalysisException.
+ *
+ * Example :
+ * SELECT a.b FROM t ORDER BY b[0].d
+ *
+ * In the above example, in b needs to be resolved before d can be resolved.
Given we are
Review comment:
This is copied from the old code verbatimly. It's actually not accurate
as `b[0].d` won't fail if `b` can't be resolved. It just returns the unresolved
expression.
Let me rewrite this doc.
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]