cloud-fan commented on code in PR #44532:
URL: https://github.com/apache/spark/pull/44532#discussion_r1440104963


##########
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ColumnResolutionHelper.scala:
##########
@@ -487,42 +487,81 @@ trait ColumnResolutionHelper extends Logging with 
DataTypeErrorsBase {
   //       original expression as it is.
   private def tryResolveColumnByPlanId(
       e: Expression,
-      q: LogicalPlan,
-      idToPlan: mutable.HashMap[Long, LogicalPlan] = mutable.HashMap.empty): 
Expression = e match {
+      q: LogicalPlan): Expression = e match {
     case u: UnresolvedAttribute =>
-      resolveUnresolvedAttributeByPlanId(
-        u, q, idToPlan: mutable.HashMap[Long, LogicalPlan]
-      ).getOrElse(u)
+      resolveUnresolvedAttributeByPlanId(u, q).getOrElse(u)
     case _ if e.containsPattern(UNRESOLVED_ATTRIBUTE) =>
-      e.mapChildren(c => tryResolveColumnByPlanId(c, q, idToPlan))
+      e.mapChildren(c => tryResolveColumnByPlanId(c, q))
     case _ => e
   }
 
   private def resolveUnresolvedAttributeByPlanId(
       u: UnresolvedAttribute,
-      q: LogicalPlan,
-      idToPlan: mutable.HashMap[Long, LogicalPlan]): Option[NamedExpression] = 
{
+      q: LogicalPlan): Option[NamedExpression] = {
     val planIdOpt = u.getTagValue(LogicalPlan.PLAN_ID_TAG)
     if (planIdOpt.isEmpty) return None
     val planId = planIdOpt.get
     logDebug(s"Extract plan_id $planId from $u")
 
-    val plan = idToPlan.getOrElseUpdate(planId, {
-      findPlanById(u, planId, q).getOrElse {
-        // For example:
-        //  df1 = spark.createDataFrame([Row(a = 1, b = 2, c = 3)]])
-        //  df2 = spark.createDataFrame([Row(a = 1, b = 2)]])
-        //  df1.select(df2.a)   <-   illegal reference df2.a
-        throw new AnalysisException(
-          errorClass = "_LEGACY_ERROR_TEMP_3051",
-          messageParameters = Map(
-            "u" -> u.toString,
-            "planId" -> planId.toString,
-            "q" -> q.toString))
+    val isMetadataAccess = u.getTagValue(LogicalPlan.IS_METADATA_COL).isDefined
+
+    var result = Option.empty[NamedExpression]
+    var numMatched = 0
+    resolveUnresolvedAttributeByPlanId(u, planId, isMetadataAccess, q).foreach 
{
+      case Some(resolved) =>
+        numMatched += 1
+        if (result.isEmpty) {
+          result = Some(resolved)
+        } else {
+          throw new AnalysisException(
+            errorClass = "AMBIGUOUS_COLUMN_REFERENCE",
+            messageParameters = Map("name" -> toSQLId(u.nameParts)),
+            origin = u.origin
+          )
+        }
+      case _ => numMatched += 1
+    }
+    if (numMatched == 0) {
+      // For example:
+      //  df1 = spark.createDataFrame([Row(a = 1, b = 2, c = 3)]])
+      //  df2 = spark.createDataFrame([Row(a = 1, b = 2)]])
+      //  df1.select(df2.a)   <-   illegal reference df2.a
+      throw new AnalysisException(
+        errorClass = "_LEGACY_ERROR_TEMP_3051",
+        messageParameters = Map(
+          "u" -> u.toString,
+          "planId" -> planId.toString,
+          "q" -> q.toString))
+    }
+    result
+  }
+
+  private def resolveUnresolvedAttributeByPlanId(
+      u: UnresolvedAttribute,
+      id: Long,
+      isMetadataAccess: Boolean,
+      p: LogicalPlan): Iterator[Option[NamedExpression]] = {
+    if (p.getTagValue(LogicalPlan.PLAN_ID_TAG).contains(id)) {
+      Iterator.single(
+        resolveUnresolvedAttributeByPlan(u, p, isMetadataAccess))
+    } else {
+      p.children.iterator.flatMap { child =>
+        val outputSet = if (isMetadataAccess) {

Review Comment:
   This is more efficient as plans like `Project` can prune the resolved column 
earlier. The outer `resolveUnresolvedAttributeByPlanId` should fail if there is 
no column matched.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to