AngersZhuuuu commented on a change in pull request #30212:
URL: https://github.com/apache/spark/pull/30212#discussion_r532164002
##########
File path:
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/grouping.scala
##########
@@ -39,45 +41,64 @@ trait GroupingSet extends Expression with CodegenFallback {
override def eval(input: InternalRow): Any = throw new
UnsupportedOperationException
}
-// scalastyle:off line.size.limit line.contains.tab
-@ExpressionDescription(
- usage = """
- _FUNC_([col1[, col2 ..]]) - create a multi-dimensional cube using the
specified columns
- so that we can run aggregation on them.
- """,
- examples = """
- Examples:
- > SELECT name, age, count(*) FROM VALUES (2, 'Alice'), (5, 'Bob')
people(age, name) GROUP BY _FUNC_(name, age);
- Bob 5 1
- Alice 2 1
- Alice NULL 1
- NULL 2 1
- NULL NULL 2
- Bob NULL 1
- NULL 5 1
- """,
- since = "2.0.0")
-// scalastyle:on line.size.limit line.contains.tab
-case class Cube(groupByExprs: Seq[Expression]) extends GroupingSet {}
+object GroupingSet {
+ /*
+ * GROUP BY a, b, c WITH ROLLUP
Review comment:
> (I know this is not your mistake though) could you remove the single
leading space?
Done
##########
File path:
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/grouping.scala
##########
@@ -39,45 +41,64 @@ trait GroupingSet extends Expression with CodegenFallback {
override def eval(input: InternalRow): Any = throw new
UnsupportedOperationException
}
-// scalastyle:off line.size.limit line.contains.tab
-@ExpressionDescription(
- usage = """
- _FUNC_([col1[, col2 ..]]) - create a multi-dimensional cube using the
specified columns
- so that we can run aggregation on them.
- """,
- examples = """
- Examples:
- > SELECT name, age, count(*) FROM VALUES (2, 'Alice'), (5, 'Bob')
people(age, name) GROUP BY _FUNC_(name, age);
- Bob 5 1
- Alice 2 1
- Alice NULL 1
- NULL 2 1
- NULL NULL 2
- Bob NULL 1
- NULL 5 1
- """,
- since = "2.0.0")
-// scalastyle:on line.size.limit line.contains.tab
-case class Cube(groupByExprs: Seq[Expression]) extends GroupingSet {}
+object GroupingSet {
+ /*
+ * GROUP BY a, b, c WITH ROLLUP
+ * is equivalent to
+ * GROUP BY a, b, c GROUPING SETS ( (a, b, c), (a, b), (a), ( ) ).
+ * Group Count: N + 1 (N is the number of group expressions)
+ *
+ * We need to get all of its subsets for the rule described above, the
subset is
+ * represented as sequence of expressions.
+ */
+ def rollupExprs(exprs: Seq[Seq[Expression]]): Seq[Seq[Expression]] =
+ exprs.inits.map(_.flatten).toIndexedSeq
-// scalastyle:off line.size.limit line.contains.tab
-@ExpressionDescription(
- usage = """
- _FUNC_([col1[, col2 ..]]) - create a multi-dimensional rollup using the
specified columns
- so that we can run aggregation on them.
- """,
- examples = """
- Examples:
- > SELECT name, age, count(*) FROM VALUES (2, 'Alice'), (5, 'Bob')
people(age, name) GROUP BY _FUNC_(name, age);
- Bob 5 1
- Alice 2 1
- Alice NULL 1
- NULL NULL 2
- Bob NULL 1
- """,
- since = "2.0.0")
-// scalastyle:on line.size.limit line.contains.tab
-case class Rollup(groupByExprs: Seq[Expression]) extends GroupingSet {}
+ /*
+ * GROUP BY a, b, c WITH CUBE
Review comment:
Done
##########
File path:
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/grouping.scala
##########
@@ -39,45 +41,64 @@ trait GroupingSet extends Expression with CodegenFallback {
override def eval(input: InternalRow): Any = throw new
UnsupportedOperationException
}
-// scalastyle:off line.size.limit line.contains.tab
-@ExpressionDescription(
- usage = """
- _FUNC_([col1[, col2 ..]]) - create a multi-dimensional cube using the
specified columns
- so that we can run aggregation on them.
- """,
- examples = """
- Examples:
- > SELECT name, age, count(*) FROM VALUES (2, 'Alice'), (5, 'Bob')
people(age, name) GROUP BY _FUNC_(name, age);
- Bob 5 1
- Alice 2 1
- Alice NULL 1
- NULL 2 1
- NULL NULL 2
- Bob NULL 1
- NULL 5 1
- """,
- since = "2.0.0")
-// scalastyle:on line.size.limit line.contains.tab
-case class Cube(groupByExprs: Seq[Expression]) extends GroupingSet {}
+object GroupingSet {
+ /*
+ * GROUP BY a, b, c WITH ROLLUP
+ * is equivalent to
+ * GROUP BY a, b, c GROUPING SETS ( (a, b, c), (a, b), (a), ( ) ).
+ * Group Count: N + 1 (N is the number of group expressions)
+ *
+ * We need to get all of its subsets for the rule described above, the
subset is
+ * represented as sequence of expressions.
+ */
+ def rollupExprs(exprs: Seq[Seq[Expression]]): Seq[Seq[Expression]] =
+ exprs.inits.map(_.flatten).toIndexedSeq
-// scalastyle:off line.size.limit line.contains.tab
-@ExpressionDescription(
- usage = """
- _FUNC_([col1[, col2 ..]]) - create a multi-dimensional rollup using the
specified columns
- so that we can run aggregation on them.
- """,
- examples = """
- Examples:
- > SELECT name, age, count(*) FROM VALUES (2, 'Alice'), (5, 'Bob')
people(age, name) GROUP BY _FUNC_(name, age);
- Bob 5 1
- Alice 2 1
- Alice NULL 1
- NULL NULL 2
- Bob NULL 1
- """,
- since = "2.0.0")
-// scalastyle:on line.size.limit line.contains.tab
-case class Rollup(groupByExprs: Seq[Expression]) extends GroupingSet {}
+ /*
+ * GROUP BY a, b, c WITH CUBE
+ * is equivalent to
+ * GROUP BY a, b, c GROUPING SETS ( (a, b, c), (a, b), (b, c), (a, c), (a),
(b), (c), ( ) ).
+ * Group Count: 2 ^ N (N is the number of group expressions)
+ *
+ * We need to get all of its subsets for a given GROUPBY expression, the
subsets are
+ * represented as sequence of expressions.
+ */
+ def cubeExprs(exprs: Seq[Seq[Expression]]): Seq[Seq[Expression]] = {
+ // `cubeExprs0` is recursive and returns a lazy Stream. Here we call
`toIndexedSeq` to
+ // materialize it and avoid serialization problems later on.
+ cubeExprs0(exprs).toIndexedSeq
+ }
+
+ def cubeExprs0(exprs: Seq[Seq[Expression]]): Seq[Seq[Expression]] =
exprs.toList match {
+ case x :: xs =>
+ val initial = cubeExprs0(xs)
+ initial.map(x ++ _) ++ initial
+ case Nil =>
+ Seq(Seq.empty)
+ }
+}
+
+case class Cube(groupingSets: Seq[Seq[Expression]]) extends GroupingSet {
+ import GroupingSet._
+ override def groupByExprs: Seq[Expression] =
groupingSets.flatMap(_.distinct).distinct
+
+ override def selectedGroupByExprs: Seq[Seq[Expression]] =
cubeExprs(groupingSets)
+}
+
+case class Rollup(groupingSets: Seq[Seq[Expression]]) extends GroupingSet {
+ import GroupingSet._
+ override def groupByExprs: Seq[Expression] =
groupingSets.flatMap(_.distinct).distinct
+
+ override def selectedGroupByExprs: Seq[Seq[Expression]] =
rollupExprs(groupingSets)
Review comment:
DOne
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]