cloud-fan commented on code in PR #39556:
URL: https://github.com/apache/spark/pull/39556#discussion_r1069573290


##########
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/AliasAwareOutputExpression.scala:
##########
@@ -0,0 +1,113 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.plans
+
+import scala.collection.mutable
+import scala.collection.mutable.ArrayBuffer
+
+import org.apache.spark.sql.catalyst.SQLConfHelper
+import org.apache.spark.sql.catalyst.expressions.{Alias, Attribute, 
Empty2Null, Expression, NamedExpression, SortOrder}
+import org.apache.spark.sql.internal.SQLConf
+
+/**
+ * A trait that provides functionality to handle aliases in the 
`outputExpressions`.
+ */
+trait AliasAwareOutputExpression[T <: QueryPlan[T]]
+  extends SQLConfHelper { self: QueryPlan[T] =>
+  private val aliasCandidateLimit = 
conf.getConf(SQLConf.OUTPUT_EXPRESSION_ALIAS_CANDIDATE_LIMIT)
+  protected def outputExpressions: Seq[NamedExpression]
+  /**
+   * This method can be used to strip expression which does not affect the 
result, for example:
+   * strip the expression which is ordering Agnostic for output ordering.
+   */
+  protected def strip(expr: Expression): Expression = expr
+
+  lazy val aliasMap = {
+    val attrWithAliasMap = new mutable.HashMap[Expression, 
ArrayBuffer[Attribute]]()
+
+    def updateAttrWithAliasMap(key: Expression, target: Attribute): Unit = {
+      val aliasArray = 
attrWithAliasMap.getOrElseUpdate(strip(key).canonicalized,
+        new ArrayBuffer[Attribute]())
+      // pre-filter if the number of alias has bigger than candidate limit
+      if (aliasArray.size < aliasCandidateLimit) {
+        aliasArray.append(target)
+      }
+    }
+
+    outputExpressions.foreach {
+      case a @ Alias(child, _) =>
+        updateAttrWithAliasMap(child, a.toAttribute)
+      case _ =>
+    }
+    attrWithAliasMap.toMap
+  }
+
+  protected def hasAlias: Boolean = aliasMap.nonEmpty
+
+  // Return a set of Expression which normalize the original expression to the 
aliased.
+  protected def normalizeExpression(expr: Expression): Seq[Expression] = {
+    if (aliasCandidateLimit < 1) {
+      expr :: Nil
+    } else {
+      val normalizedCandidates = new ArrayBuffer[Expression]()
+      // Stop loop if the size of candidates exceed limit
+      for ((origin, aliases) <- aliasMap if normalizedCandidates.size < 
aliasCandidateLimit) {
+        for (alias <- aliases if normalizedCandidates.size < 
aliasCandidateLimit) {
+          val newExpr = expr.transformDown {

Review Comment:
   I'm not sure this is correct. It only replace one alias from the input 
expressions. What happens if the output ordering is `a + b` and the alias is `a 
as x, b as y`? will we return `x + y`?



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to