Github user ravipesala commented on a diff in the pull request:
https://github.com/apache/carbondata/pull/1542#discussion_r153068582
--- Diff:
integration/spark2/src/main/scala/org/apache/spark/sql/hive/CarbonPreAggregateRules.scala
---
@@ -797,33 +798,59 @@ object CarbonPreInsertionCasts extends
Rule[LogicalPlan] {
* @return
*/
private def transformAggregatePlan(logicalPlan: LogicalPlan):
LogicalPlan = {
+ val validExpressionsMap =
scala.collection.mutable.LinkedHashMap.empty[String, NamedExpression]
logicalPlan transform {
case aggregate@Aggregate(_, aExp, _) =>
- val newExpressions = aExp.flatMap {
- case alias@Alias(attrExpression: AggregateExpression, _) =>
- attrExpression.aggregateFunction match {
- case Average(attr: AttributeReference) =>
- Seq(Alias(attrExpression
- .copy(aggregateFunction = Sum(attr),
- resultId = NamedExpression.newExprId), attr.name +
"_sum")(),
- Alias(attrExpression
- .copy(aggregateFunction = Count(attr),
- resultId = NamedExpression.newExprId), attr.name +
"_count")())
- case Average(cast@Cast(attr: AttributeReference, _)) =>
- Seq(Alias(attrExpression
- .copy(aggregateFunction = Sum(cast),
- resultId = NamedExpression.newExprId),
- attr.name + "_sum")(),
- Alias(attrExpression
- .copy(aggregateFunction = Count(cast),
- resultId = NamedExpression.newExprId), attr.name +
"_count")())
- case _ => Seq(alias)
- }
- case namedExpr: NamedExpression => Seq(namedExpr)
+ aExp.foreach {
+ case alias: Alias =>
+ validExpressionsMap ++=
validateAggregateFunctionAndGetAlias(alias)
--- End diff --
Is the duplicate columns will be removed from aggexpressions? For example
agg table is created with sum(col1) and avg(col1) then aggregation table should
be created with sum(col1) and count(col1) only. sum(col1) should not be
duplicated. Is this handled here?
---