cloud-fan commented on code in PR #44532:
URL: https://github.com/apache/spark/pull/44532#discussion_r1445746187
##########
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ColumnResolutionHelper.scala:
##########
@@ -485,80 +485,86 @@ trait ColumnResolutionHelper extends Logging with
DataTypeErrorsBase {
// 4. if more than one matching nodes are found, fail due to ambiguous
column reference;
// 5. resolve the expression with the matching node, if any error occurs
here, return the
// original expression as it is.
- private def tryResolveColumnByPlanId(
+ private def tryResolveDataFrameColumns(
e: Expression,
- q: LogicalPlan,
- idToPlan: mutable.HashMap[Long, LogicalPlan] = mutable.HashMap.empty):
Expression = e match {
+ q: Seq[LogicalPlan]): Expression = e match {
case u: UnresolvedAttribute =>
- resolveUnresolvedAttributeByPlanId(
- u, q, idToPlan: mutable.HashMap[Long, LogicalPlan]
- ).getOrElse(u)
+ resolveDataFrameColumn(u, q).getOrElse(u)
case _ if e.containsPattern(UNRESOLVED_ATTRIBUTE) =>
- e.mapChildren(c => tryResolveColumnByPlanId(c, q, idToPlan))
+ e.mapChildren(c => tryResolveDataFrameColumns(c, q))
case _ => e
}
- private def resolveUnresolvedAttributeByPlanId(
+ private def resolveDataFrameColumn(
u: UnresolvedAttribute,
- q: LogicalPlan,
- idToPlan: mutable.HashMap[Long, LogicalPlan]): Option[NamedExpression] =
{
+ q: Seq[LogicalPlan]): Option[NamedExpression] = {
val planIdOpt = u.getTagValue(LogicalPlan.PLAN_ID_TAG)
if (planIdOpt.isEmpty) return None
val planId = planIdOpt.get
logDebug(s"Extract plan_id $planId from $u")
- val plan = idToPlan.getOrElseUpdate(planId, {
- findPlanById(u, planId, q).getOrElse {
- // For example:
- // df1 = spark.createDataFrame([Row(a = 1, b = 2, c = 3)]])
- // df2 = spark.createDataFrame([Row(a = 1, b = 2)]])
- // df1.select(df2.a) <- illegal reference df2.a
- throw new AnalysisException(
- errorClass = "_LEGACY_ERROR_TEMP_3051",
- messageParameters = Map(
- "u" -> u.toString,
- "planId" -> planId.toString,
- "q" -> q.toString))
- }
- })
+ val isMetadataAccess = u.getTagValue(LogicalPlan.IS_METADATA_COL).nonEmpty
+ val (resolved, matched) = resolveDataFrameColumnByPlanId(u, planId,
isMetadataAccess, q)
+ if (!matched) {
+ // Can not find the target plan node with plan id, e.g.
+ // df1 = spark.createDataFrame([Row(a = 1, b = 2, c = 3)]])
+ // df2 = spark.createDataFrame([Row(a = 1, b = 2)]])
+ // df1.select(df2.a) <- illegal reference df2.a
+ throw QueryCompilationErrors.cannotResolveColumn(u)
+ }
+ resolved
+ }
- val isMetadataAccess = u.getTagValue(LogicalPlan.IS_METADATA_COL).isDefined
- try {
- if (!isMetadataAccess) {
- plan.resolve(u.nameParts, conf.resolver)
- } else if (u.nameParts.size == 1) {
- plan.getMetadataAttributeByNameOpt(u.nameParts.head)
- } else {
- None
+ private def resolveDataFrameColumnByPlanId(
+ u: UnresolvedAttribute,
+ id: Long,
+ isMetadataAccess: Boolean,
+ q: Seq[LogicalPlan]): (Option[NamedExpression], Boolean) = {
+ q.iterator.map(resolveDataFrameColumnRecursively(u, id, isMetadataAccess,
_))
+ .foldLeft((Option.empty[NamedExpression], false)) {
+ case ((r1, m1), (r2, m2)) =>
+ if (r1.nonEmpty && r2.nonEmpty) {
+ throw QueryCompilationErrors.ambiguousColumnReferences(u)
+ }
+ (if (r1.nonEmpty) r1 else r2, m1 | m2)
}
- } catch {
- case e: AnalysisException =>
- logDebug(s"Fail to resolve $u with $plan due to $e")
- None
- }
}
- private def findPlanById(
+ private def resolveDataFrameColumnRecursively(
u: UnresolvedAttribute,
id: Long,
- plan: LogicalPlan): Option[LogicalPlan] = {
- if (plan.getTagValue(LogicalPlan.PLAN_ID_TAG).contains(id)) {
- Some(plan)
- } else if (plan.children.length == 1) {
- findPlanById(u, id, plan.children.head)
- } else if (plan.children.length > 1) {
- val matched = plan.children.flatMap(findPlanById(u, id, _))
- if (matched.length > 1) {
- throw new AnalysisException(
- errorClass = "AMBIGUOUS_COLUMN_REFERENCE",
- messageParameters = Map("name" -> toSQLId(u.nameParts)),
- origin = u.origin
- )
- } else {
- matched.headOption
+ isMetadataAccess: Boolean,
+ p: LogicalPlan): (Option[NamedExpression], Boolean) = {
+ val (resolved, matched) = if
(p.getTagValue(LogicalPlan.PLAN_ID_TAG).contains(id)) {
+ var resolved = Option.empty[NamedExpression]
Review Comment:
nit:
```
val resolved = try {
if ...
} catch {
...
None
}
(resolved, true)
```
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]