spark git commit: Revert "[SPARK-24369][SQL] Correct handling for multiple distinct aggregations having the same argument set"

lixiao Fri, 01 Jun 2018 11:52:17 -0700

Repository: spark
Updated Branches:
  refs/heads/master 6039b1323 -> d2c3de7ef



Revert "[SPARK-24369][SQL] Correct handling for multiple distinct aggregations 
having the same argument set"

This reverts commit 1e46f92f956a00d04d47340489b6125d44dbd47b.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/d2c3de7e
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/d2c3de7e
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/d2c3de7e

Branch: refs/heads/master
Commit: d2c3de7efcfacadff20b023924d4566a5bf9ad7a
Parents: 6039b13
Author: Xiao Li <gatorsm...@gmail.com>
Authored: Fri Jun 1 11:51:10 2018 -0700
Committer: Xiao Li <gatorsm...@gmail.com>
Committed: Fri Jun 1 11:51:10 2018 -0700

----------------------------------------------------------------------
 .../catalyst/optimizer/RewriteDistinctAggregates.scala   |  7 +++----
 .../org/apache/spark/sql/execution/SparkStrategies.scala |  2 +-
 .../src/test/resources/sql-tests/inputs/group-by.sql     |  6 +-----
 .../test/resources/sql-tests/results/group-by.sql.out    | 11 +----------
 4 files changed, 6 insertions(+), 20 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/d2c3de7e/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/RewriteDistinctAggregates.scala
----------------------------------------------------------------------
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/RewriteDistinctAggregates.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/RewriteDistinctAggregates.scala
index bc898ab..4448ace 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/RewriteDistinctAggregates.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/RewriteDistinctAggregates.scala
@@ -115,8 +115,7 @@ object RewriteDistinctAggregates extends Rule[LogicalPlan] {
     }
 
     // Extract distinct aggregate expressions.
-    val distincgAggExpressions = aggExpressions.filter(_.isDistinct)
-    val distinctAggGroups = distincgAggExpressions.groupBy { e =>
+    val distinctAggGroups = aggExpressions.filter(_.isDistinct).groupBy { e =>
         val unfoldableChildren = 
e.aggregateFunction.children.filter(!_.foldable).toSet
         if (unfoldableChildren.nonEmpty) {
           // Only expand the unfoldable children
@@ -133,7 +132,7 @@ object RewriteDistinctAggregates extends Rule[LogicalPlan] {
     }
 
     // Aggregation strategy can handle queries with a single distinct group.
-    if (distincgAggExpressions.size > 1) {
+    if (distinctAggGroups.size > 1) {
       // Create the attributes for the grouping id and the group by clause.
       val gid = AttributeReference("gid", IntegerType, nullable = false)()
       val groupByMap = a.groupingExpressions.collect {
@@ -152,7 +151,7 @@ object RewriteDistinctAggregates extends Rule[LogicalPlan] {
       }
 
       // Setup unique distinct aggregate children.
-      val distinctAggChildren = distinctAggGroups.keySet.flatten.toSeq
+      val distinctAggChildren = distinctAggGroups.keySet.flatten.toSeq.distinct
       val distinctAggChildAttrMap = 
distinctAggChildren.map(expressionAttributePair)
       val distinctAggChildAttrs = distinctAggChildAttrMap.map(_._2)
 

http://git-wip-us.apache.org/repos/asf/spark/blob/d2c3de7e/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala
----------------------------------------------------------------------
diff --git 
a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala 
b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala
index b9452b5..b97a87a 100644
--- 
a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala
+++ 
b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala
@@ -386,7 +386,7 @@ abstract class SparkStrategies extends 
QueryPlanner[SparkPlan] {
           aggregateExpressions.partition(_.isDistinct)
         if 
(functionsWithDistinct.map(_.aggregateFunction.children).distinct.length > 1) {
           // This is a sanity check. We should not reach here when we have 
multiple distinct
-          // column sets. Our `RewriteDistinctAggregates` should take care 
this case.
+          // column sets. Our MultipleDistinctRewriter should take care this 
case.
           sys.error("You hit a query analyzer bug. Please report your query to 
" +
               "Spark user mailing list.")
         }

http://git-wip-us.apache.org/repos/asf/spark/blob/d2c3de7e/sql/core/src/test/resources/sql-tests/inputs/group-by.sql
----------------------------------------------------------------------
diff --git a/sql/core/src/test/resources/sql-tests/inputs/group-by.sql 
b/sql/core/src/test/resources/sql-tests/inputs/group-by.sql
index 2c18d6a..c5070b7 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/group-by.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/group-by.sql
@@ -68,8 +68,4 @@ SELECT 1 from (
   FROM (select 1 as x) a
   WHERE false
 ) b
-where b.z != b.z;
-
--- SPARK-24369 multiple distinct aggregations having the same argument set
-SELECT corr(DISTINCT x, y), corr(DISTINCT y, x), count(*)
-  FROM (VALUES (1, 1), (2, 2), (2, 2)) t(x, y);
+where b.z != b.z

http://git-wip-us.apache.org/repos/asf/spark/blob/d2c3de7e/sql/core/src/test/resources/sql-tests/results/group-by.sql.out
----------------------------------------------------------------------
diff --git a/sql/core/src/test/resources/sql-tests/results/group-by.sql.out 
b/sql/core/src/test/resources/sql-tests/results/group-by.sql.out
index 581aa17..c1abc6d 100644
--- a/sql/core/src/test/resources/sql-tests/results/group-by.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/group-by.sql.out
@@ -1,5 +1,5 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 27
+-- Number of queries: 26
 
 
 -- !query 0
@@ -241,12 +241,3 @@ where b.z != b.z
 struct<1:int>
 -- !query 25 output
 
-
-
--- !query 26
-SELECT corr(DISTINCT x, y), corr(DISTINCT y, x), count(*)
-  FROM (VALUES (1, 1), (2, 2), (2, 2)) t(x, y)
--- !query 26 schema
-struct<corr(DISTINCT CAST(x AS DOUBLE), CAST(y AS 
DOUBLE)):double,corr(DISTINCT CAST(y AS DOUBLE), CAST(x AS 
DOUBLE)):double,count(1):bigint>
--- !query 26 output
-1.0    1.0     3


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

spark git commit: Revert "[SPARK-24369][SQL] Correct handling for multiple distinct aggregations having the same argument set"

Reply via email to