viirya commented on a change in pull request #27056: [SPARK-27217][SQL] Nested 
schema pruning with Aggregation
URL: https://github.com/apache/spark/pull/27056#discussion_r362931836
 
 

 ##########
 File path: 
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/NestedColumnAliasing.scala
 ##########
 @@ -167,3 +169,47 @@ object NestedColumnAliasing {
     case _ => false
   }
 }
+
+object AggregateNestedColumnAliasing {
+
+  private def canPrune(child: LogicalPlan, references: AttributeSet): Boolean 
= child match {
+    case p: Project => !p.references.subsetOf(references)
+    case _ => !child.outputSet.subsetOf(references)
+  }
+
+  private def unAlias(exp: Expression): Expression = exp match {
+    case a: Alias => a.child
+    case _ => exp
+  }
+
+  def unapply(plan: LogicalPlan): Option[LogicalPlan] = plan match {
+    case a @ Aggregate(groupingExpressions, aggregateExpressions, child)
+      if canPrune(child, a.references) =>
+      val allExpressions = (aggregateExpressions ++ 
groupingExpressions).map(unAlias).distinct
+      val (nestedFieldReferences, otherRootReferences) =
+        allExpressions.flatMap(collectRootReferenceAndExtractValue).partition {
+          case _: ExtractValue => true
+          case _ => false
+        }
+
+      val aliasSub = nestedFieldReferences.asInstanceOf[Seq[ExtractValue]]
+        .filter(!_.references.subsetOf(AttributeSet(otherRootReferences)))
+        .groupBy(_.references.head).flatMap {
+        case (attr, nestedFields: Seq[ExtractValue]) =>
+          val nestedFieldToAlias = nestedFields.distinct.map { f =>
+            Alias(f, f.sql)()
+          }
+
+          if (nestedFieldToAlias.nonEmpty &&
+            nestedFieldToAlias.length < totalFieldNum(attr.dataType)) {
+            Some(nestedFieldToAlias)
+          } else {
+            None
+          }
+      }
+      val newProjectList: Seq[NamedExpression] =
 
 Review comment:
   This code seems be copied from `NestedColumnAliasing`. I think we can reuse 
the methods like `getAliasSubMap`.

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
[email protected]


With regards,
Apache Git Services

---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to