Repository: spark Updated Branches: refs/heads/master 6039b1323 -> d2c3de7ef
Revert "[SPARK-24369][SQL] Correct handling for multiple distinct aggregations having the same argument set" This reverts commit 1e46f92f956a00d04d47340489b6125d44dbd47b. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/d2c3de7e Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/d2c3de7e Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/d2c3de7e Branch: refs/heads/master Commit: d2c3de7efcfacadff20b023924d4566a5bf9ad7a Parents: 6039b13 Author: Xiao Li <gatorsm...@gmail.com> Authored: Fri Jun 1 11:51:10 2018 -0700 Committer: Xiao Li <gatorsm...@gmail.com> Committed: Fri Jun 1 11:51:10 2018 -0700 ---------------------------------------------------------------------- .../catalyst/optimizer/RewriteDistinctAggregates.scala | 7 +++---- .../org/apache/spark/sql/execution/SparkStrategies.scala | 2 +- .../src/test/resources/sql-tests/inputs/group-by.sql | 6 +----- .../test/resources/sql-tests/results/group-by.sql.out | 11 +---------- 4 files changed, 6 insertions(+), 20 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/spark/blob/d2c3de7e/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/RewriteDistinctAggregates.scala ---------------------------------------------------------------------- diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/RewriteDistinctAggregates.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/RewriteDistinctAggregates.scala index bc898ab..4448ace 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/RewriteDistinctAggregates.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/RewriteDistinctAggregates.scala @@ -115,8 +115,7 @@ object RewriteDistinctAggregates extends Rule[LogicalPlan] { } // Extract distinct aggregate expressions. - val distincgAggExpressions = aggExpressions.filter(_.isDistinct) - val distinctAggGroups = distincgAggExpressions.groupBy { e => + val distinctAggGroups = aggExpressions.filter(_.isDistinct).groupBy { e => val unfoldableChildren = e.aggregateFunction.children.filter(!_.foldable).toSet if (unfoldableChildren.nonEmpty) { // Only expand the unfoldable children @@ -133,7 +132,7 @@ object RewriteDistinctAggregates extends Rule[LogicalPlan] { } // Aggregation strategy can handle queries with a single distinct group. - if (distincgAggExpressions.size > 1) { + if (distinctAggGroups.size > 1) { // Create the attributes for the grouping id and the group by clause. val gid = AttributeReference("gid", IntegerType, nullable = false)() val groupByMap = a.groupingExpressions.collect { @@ -152,7 +151,7 @@ object RewriteDistinctAggregates extends Rule[LogicalPlan] { } // Setup unique distinct aggregate children. - val distinctAggChildren = distinctAggGroups.keySet.flatten.toSeq + val distinctAggChildren = distinctAggGroups.keySet.flatten.toSeq.distinct val distinctAggChildAttrMap = distinctAggChildren.map(expressionAttributePair) val distinctAggChildAttrs = distinctAggChildAttrMap.map(_._2) http://git-wip-us.apache.org/repos/asf/spark/blob/d2c3de7e/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala ---------------------------------------------------------------------- diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala index b9452b5..b97a87a 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala @@ -386,7 +386,7 @@ abstract class SparkStrategies extends QueryPlanner[SparkPlan] { aggregateExpressions.partition(_.isDistinct) if (functionsWithDistinct.map(_.aggregateFunction.children).distinct.length > 1) { // This is a sanity check. We should not reach here when we have multiple distinct - // column sets. Our `RewriteDistinctAggregates` should take care this case. + // column sets. Our MultipleDistinctRewriter should take care this case. sys.error("You hit a query analyzer bug. Please report your query to " + "Spark user mailing list.") } http://git-wip-us.apache.org/repos/asf/spark/blob/d2c3de7e/sql/core/src/test/resources/sql-tests/inputs/group-by.sql ---------------------------------------------------------------------- diff --git a/sql/core/src/test/resources/sql-tests/inputs/group-by.sql b/sql/core/src/test/resources/sql-tests/inputs/group-by.sql index 2c18d6a..c5070b7 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/group-by.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/group-by.sql @@ -68,8 +68,4 @@ SELECT 1 from ( FROM (select 1 as x) a WHERE false ) b -where b.z != b.z; - --- SPARK-24369 multiple distinct aggregations having the same argument set -SELECT corr(DISTINCT x, y), corr(DISTINCT y, x), count(*) - FROM (VALUES (1, 1), (2, 2), (2, 2)) t(x, y); +where b.z != b.z http://git-wip-us.apache.org/repos/asf/spark/blob/d2c3de7e/sql/core/src/test/resources/sql-tests/results/group-by.sql.out ---------------------------------------------------------------------- diff --git a/sql/core/src/test/resources/sql-tests/results/group-by.sql.out b/sql/core/src/test/resources/sql-tests/results/group-by.sql.out index 581aa17..c1abc6d 100644 --- a/sql/core/src/test/resources/sql-tests/results/group-by.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/group-by.sql.out @@ -1,5 +1,5 @@ -- Automatically generated by SQLQueryTestSuite --- Number of queries: 27 +-- Number of queries: 26 -- !query 0 @@ -241,12 +241,3 @@ where b.z != b.z struct<1:int> -- !query 25 output - - --- !query 26 -SELECT corr(DISTINCT x, y), corr(DISTINCT y, x), count(*) - FROM (VALUES (1, 1), (2, 2), (2, 2)) t(x, y) --- !query 26 schema -struct<corr(DISTINCT CAST(x AS DOUBLE), CAST(y AS DOUBLE)):double,corr(DISTINCT CAST(y AS DOUBLE), CAST(x AS DOUBLE)):double,count(1):bigint> --- !query 26 output -1.0 1.0 3 --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org