[GitHub] spark pull request #15484: [SPARK-17868][SQL] Do not use bitmasks during par...

tejasapatil Sat, 22 Oct 2016 14:54:06 -0700

Github user tejasapatil commented on a diff in the pull request:

    https://github.com/apache/spark/pull/15484#discussion_r84585794
  
    --- Diff: 
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
 ---
    @@ -255,98 +265,125 @@ class Analyzer(
           expr transform {
             case e: GroupingID =>
               if (e.groupByExprs.isEmpty || e.groupByExprs == groupByExprs) {
    -            gid
    +            Alias(gid, toPrettySQL(e))()
               } else {
                 throw new AnalysisException(
                   s"Columns of grouping_id (${e.groupByExprs.mkString(",")}) 
does not match " +
                     s"grouping columns (${groupByExprs.mkString(",")})")
               }
    -        case Grouping(col: Expression) =>
    +        case e @ Grouping(col: Expression) =>
               val idx = groupByExprs.indexOf(col)
               if (idx >= 0) {
    -            Cast(BitwiseAnd(ShiftRight(gid, Literal(groupByExprs.length - 
1 - idx)),
    -              Literal(1)), ByteType)
    +            Alias(Cast(BitwiseAnd(ShiftRight(gid, 
Literal(groupByExprs.length - 1 - idx)),
    +              Literal(1)), ByteType), toPrettySQL(e))()
               } else {
                 throw new AnalysisException(s"Column of grouping ($col) can't 
be found " +
                   s"in grouping columns ${groupByExprs.mkString(",")}")
               }
           }
         }
     
    -    // This require transformUp to replace grouping()/grouping_id() in 
resolved Filter/Sort
    -    def apply(plan: LogicalPlan): LogicalPlan = plan transformUp {
    -      case a if !a.childrenResolved => a // be sure all of the children 
are resolved.
    -      case p if p.expressions.exists(hasGroupingAttribute) =>
    -        failAnalysis(
    -          s"${VirtualColumn.hiveGroupingIdName} is deprecated; use 
grouping_id() instead")
    -
    -      case Aggregate(Seq(c @ Cube(groupByExprs)), aggregateExpressions, 
child) =>
    -        GroupingSets(bitmasks(c), groupByExprs, child, 
aggregateExpressions)
    -      case Aggregate(Seq(r @ Rollup(groupByExprs)), aggregateExpressions, 
child) =>
    -        GroupingSets(bitmasks(r), groupByExprs, child, 
aggregateExpressions)
    +    /*
    +     * Create new alias for all group by expressions for `Expand` operator.
    +     */
    +    private def constructGroupByAlias(groupByExprs: Seq[Expression]): 
Seq[Alias] = {
    +      groupByExprs.map {
    +        case e: NamedExpression => Alias(e, e.name)()
    +        case other => Alias(other, other.toString)()
    +      }
    +    }
     
    -      // Ensure all the expressions have been resolved.
    -      case x: GroupingSets if x.expressions.forall(_.resolved) =>
    -        val gid = AttributeReference(VirtualColumn.groupingIdName, 
IntegerType, false)()
    -
    -        // Expand works by setting grouping expressions to null as 
determined by the bitmasks. To
    -        // prevent these null values from being used in an aggregate 
instead of the original value
    -        // we need to create new aliases for all group by expressions that 
will only be used for
    -        // the intended purpose.
    -        val groupByAliases: Seq[Alias] = x.groupByExprs.map {
    -          case e: NamedExpression => Alias(e, e.name)()
    -          case other => Alias(other, other.toString)()
    +    /*
    +     * Construct [[Expand]] operator with grouping sets.
    +     */
    +    private def constructExpand(
    +        selectedGroupByExprs: Seq[Seq[Expression]],
    +        child: LogicalPlan,
    +        groupByAliases: Seq[Alias],
    +        gid: Attribute): LogicalPlan = {
    +      // Change the nullability of group by aliases if necessary. For 
example, if we have
    +      // GROUPING SETS ((a,b), a), we do not need to change the 
nullability of a, but we
    +      // should change the nullabilty of b to be TRUE.
    +      // TODO: For Cube/Rollup just set nullability to be `true`.
    +      val expandedAttributes = groupByAliases.zipWithIndex.map { case (a, 
idx) =>
    --- End diff --
    
    +1. Looking at it more, I feel `zipWithIndex` is not needed at all and the 
`map` would suffice.



---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at infrastruct...@apache.org or file a JIRA ticket
with INFRA.
---

---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org

[GitHub] spark pull request #15484: [SPARK-17868][SQL] Do not use bitmasks during par...

Reply via email to