spark git commit: [SPARK-16639][SQL] The query with having condition that contains grouping by column should work

wenchen Thu, 28 Jul 2016 07:38:07 -0700

Repository: spark
Updated Branches:
  refs/heads/branch-2.0 0fd2dfb6d -> 825c83717



[SPARK-16639][SQL] The query with having condition that contains grouping by 
column should work

## What changes were proposed in this pull request?

The query with having condition that contains grouping by column will be failed 
during analysis. E.g.,

    create table tbl(a int, b string);
    select count(b) from tbl group by a + 1 having a + 1 = 2;

Having condition should be able to use grouping by column.

## How was this patch tested?

Jenkins tests.

Author: Liang-Chi Hsieh <sim...@tw.ibm.com>

Closes #14296 from viirya/having-contains-grouping-column.

(cherry picked from commit 9ade77c3fa2e1bf436b79368a97d5980c12fe215)
Signed-off-by: Wenchen Fan <wenc...@databricks.com>


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/825c8371
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/825c8371
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/825c8371

Branch: refs/heads/branch-2.0
Commit: 825c8371784468ff976526deffd97ad7df997738
Parents: 0fd2dfb
Author: Liang-Chi Hsieh <sim...@tw.ibm.com>
Authored: Thu Jul 28 22:33:33 2016 +0800
Committer: Wenchen Fan <wenc...@databricks.com>
Committed: Thu Jul 28 22:37:24 2016 +0800

----------------------------------------------------------------------
 .../spark/sql/catalyst/analysis/Analyzer.scala  | 15 ++++++++++++-
 .../org/apache/spark/sql/SQLQuerySuite.scala    | 22 +++++++++++++++-----
 2 files changed, 31 insertions(+), 6 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/825c8371/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
----------------------------------------------------------------------
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
index 61162cc..2efa997 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
@@ -246,7 +246,7 @@ class Analyzer(
       }.isDefined
     }
 
-    private def hasGroupingFunction(e: Expression): Boolean = {
+    private[sql] def hasGroupingFunction(e: Expression): Boolean = {
       e.collectFirst {
         case g: Grouping => g
         case g: GroupingID => g
@@ -1207,6 +1207,19 @@ class Analyzer(
                 val alias = Alias(ae, ae.toString)()
                 aggregateExpressions += alias
                 alias.toAttribute
+              // Grouping functions are handled in the rule 
[[ResolveGroupingAnalytics]].
+              case e: Expression if grouping.exists(_.semanticEquals(e)) &&
+                  !ResolveGroupingAnalytics.hasGroupingFunction(e) &&
+                  !aggregate.output.exists(_.semanticEquals(e)) =>
+                e match {
+                  case ne: NamedExpression =>
+                    aggregateExpressions += ne
+                    ne.toAttribute
+                  case _ =>
+                    val alias = Alias(e, e.toString)()
+                    aggregateExpressions += alias
+                    alias.toAttribute
+                }
             }
 
             // Push the aggregate expressions into the aggregate (if any).

http://git-wip-us.apache.org/repos/asf/spark/blob/825c8371/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
----------------------------------------------------------------------
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala 
b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
index be84dff..d965901 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
@@ -39,11 +39,23 @@ class SQLQuerySuite extends QueryTest with SharedSQLContext 
{
   setupTestData()
 
   test("having clause") {
-    Seq(("one", 1), ("two", 2), ("three", 3), ("one", 5)).toDF("k", "v")
-      .createOrReplaceTempView("hav")
-    checkAnswer(
-      sql("SELECT k, sum(v) FROM hav GROUP BY k HAVING sum(v) > 2"),
-      Row("one", 6) :: Row("three", 3) :: Nil)
+    withTempView("hav") {
+      Seq(("one", 1), ("two", 2), ("three", 3), ("one", 5)).toDF("k", "v")
+        .createOrReplaceTempView("hav")
+      checkAnswer(
+        sql("SELECT k, sum(v) FROM hav GROUP BY k HAVING sum(v) > 2"),
+        Row("one", 6) :: Row("three", 3) :: Nil)
+    }
+  }
+
+  test("having condition contains grouping column") {
+    withTempView("hav") {
+      Seq(("one", 1), ("two", 2), ("three", 3), ("one", 5)).toDF("k", "v")
+        .createOrReplaceTempView("hav")
+      checkAnswer(
+        sql("SELECT count(k) FROM hav GROUP BY v + 1 HAVING v + 1 = 2"),
+        Row(1) :: Nil)
+    }
   }
 
   test("SPARK-8010: promote numeric to string") {


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

spark git commit: [SPARK-16639][SQL] The query with having condition that contains grouping by column should work

Reply via email to