cloud-fan commented on a change in pull request #29585:
URL: https://github.com/apache/spark/pull/29585#discussion_r479906329



##########
File path: 
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala
##########
@@ -203,3 +203,28 @@ abstract class BinaryNode extends LogicalPlan {
 abstract class OrderPreservingUnaryNode extends UnaryNode {
   override final def outputOrdering: Seq[SortOrder] = child.outputOrdering
 }
+
+object LogicalPlanIntegrity {
+
+  private def canGetOutputAttrs(p: LogicalPlan): Boolean = {
+    p.resolved && !p.expressions.exists { e =>
+      // Some plans cannot call `output` because their expressions have 
`Unevaluable`,
+      // e.g., `Join` having a `ExistenceJoin` type.
+      e.collectFirst { case _: Unevaluable => true }.isDefined
+    }
+  }
+
+  /**
+   * This method checks if the same expression ID, `ExprId`, refer to an 
unique attribute.
+   * Some plan transformers (e.g., `RemoveNoopOperators`) rewrite logical
+   * plans based on this assumption.
+   */
+  def hasUniqueExprIdsForAttributes(plan: LogicalPlan): Boolean = {
+    val allOutputAttrs = plan.collect { case p if canGetOutputAttrs(p) =>
+      p.output.filter(_.resolved).map(_.canonicalized.asInstanceOf[Attribute])
+    }
+    val groupedAttrsByExprId = allOutputAttrs
+      .flatten.groupBy(_.exprId).values.map(_.distinct)

Review comment:
       what do we actually check? name, data type and nullability?

##########
File path: 
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala
##########
@@ -203,3 +203,28 @@ abstract class BinaryNode extends LogicalPlan {
 abstract class OrderPreservingUnaryNode extends UnaryNode {
   override final def outputOrdering: Seq[SortOrder] = child.outputOrdering
 }
+
+object LogicalPlanIntegrity {
+
+  private def canGetOutputAttrs(p: LogicalPlan): Boolean = {
+    p.resolved && !p.expressions.exists { e =>
+      // Some plans cannot call `output` because their expressions have 
`Unevaluable`,
+      // e.g., `Join` having a `ExistenceJoin` type.
+      e.collectFirst { case _: Unevaluable => true }.isDefined
+    }
+  }
+
+  /**
+   * This method checks if the same expression ID, `ExprId`, refer to an 
unique attribute.
+   * Some plan transformers (e.g., `RemoveNoopOperators`) rewrite logical
+   * plans based on this assumption.
+   */
+  def hasUniqueExprIdsForAttributes(plan: LogicalPlan): Boolean = {
+    val allOutputAttrs = plan.collect { case p if canGetOutputAttrs(p) =>
+      p.output.filter(_.resolved).map(_.canonicalized.asInstanceOf[Attribute])
+    }
+    val groupedAttrsByExprId = allOutputAttrs
+      .flatten.groupBy(_.exprId).values.map(_.distinct)

Review comment:
       what do we actually check? name, data type and nullability?




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
[email protected]



---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to