zhengruifeng commented on code in PR #44532:
URL: https://github.com/apache/spark/pull/44532#discussion_r1442794622
##########
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ColumnResolutionHelper.scala:
##########
@@ -487,42 +487,69 @@ trait ColumnResolutionHelper extends Logging with
DataTypeErrorsBase {
// original expression as it is.
private def tryResolveColumnByPlanId(
e: Expression,
- q: LogicalPlan,
- idToPlan: mutable.HashMap[Long, LogicalPlan] = mutable.HashMap.empty):
Expression = e match {
+ q: Seq[LogicalPlan]): Expression = e match {
case u: UnresolvedAttribute =>
- resolveUnresolvedAttributeByPlanId(
- u, q, idToPlan: mutable.HashMap[Long, LogicalPlan]
- ).getOrElse(u)
+ u.getTagValue(LogicalPlan.PLAN_ID_TAG) match {
+ case Some(id) =>
+ resolveUnresolvedAttributeByPlanId(u, id, q)
+ case _ => u
+ }
case _ if e.containsPattern(UNRESOLVED_ATTRIBUTE) =>
- e.mapChildren(c => tryResolveColumnByPlanId(c, q, idToPlan))
+ e.mapChildren(c => tryResolveColumnByPlanId(c, q))
case _ => e
}
private def resolveUnresolvedAttributeByPlanId(
u: UnresolvedAttribute,
- q: LogicalPlan,
- idToPlan: mutable.HashMap[Long, LogicalPlan]): Option[NamedExpression] =
{
- val planIdOpt = u.getTagValue(LogicalPlan.PLAN_ID_TAG)
- if (planIdOpt.isEmpty) return None
- val planId = planIdOpt.get
- logDebug(s"Extract plan_id $planId from $u")
-
- val plan = idToPlan.getOrElseUpdate(planId, {
- findPlanById(u, planId, q).getOrElse {
- // For example:
- // df1 = spark.createDataFrame([Row(a = 1, b = 2, c = 3)]])
- // df2 = spark.createDataFrame([Row(a = 1, b = 2)]])
- // df1.select(df2.a) <- illegal reference df2.a
- throw new AnalysisException(
- errorClass = "_LEGACY_ERROR_TEMP_3051",
- messageParameters = Map(
- "u" -> u.toString,
- "planId" -> planId.toString,
- "q" -> q.toString))
+ id: Long,
+ q: Seq[LogicalPlan]): NamedExpression = {
+ val isMetadataAccess = u.getTagValue(LogicalPlan.IS_METADATA_COL).isDefined
+ // resolve at most 2 ambiguous references
+ val resolved = q.iterator
+ .flatMap(resolveUnresolvedAttributeByPlanId(u, id, isMetadataAccess, _))
+ .take(2).toSeq
+ if (resolved.isEmpty) {
+ // e.g. df1.select(df2.a) <- illegal reference df2.a
+ throw QueryCompilationErrors.cannotResolveColumn(u)
+ } else if (resolved.length > 1) {
+ throw QueryCompilationErrors.ambiguousColumnReferences(u)
+ }
+ resolved.head
+ }
+
+ private def resolveUnresolvedAttributeByPlanId(
+ u: UnresolvedAttribute,
+ id: Long,
+ isMetadataAccess: Boolean,
+ p: LogicalPlan): Option[NamedExpression] = {
+ if (p.getTagValue(LogicalPlan.PLAN_ID_TAG).contains(id)) {
+ resolveUnresolvedAttributeByPlan(u, p, isMetadataAccess)
+ } else {
+ val candidates = p.children.flatMap { child =>
Review Comment:
got it, will change back
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]