spark git commit: [SPARK-4650][SQL] Supporting multi column support in countDistinct function like count(distinct c1, c2..) in Spark SQL

marmbrus Mon, 01 Dec 2014 13:30:10 -0800

Repository: spark
Updated Branches:
  refs/heads/branch-1.2 f2bb90a29 -> 5006aab9d



[SPARK-4650][SQL] Supporting multi column support in countDistinct function 
like count(distinct c1,c2..) in Spark SQL

Supporting multi column support in countDistinct function like count(distinct 
c1,c2..) in Spark SQL

Author: ravipesala <ravindra.pes...@huawei.com>
Author: Michael Armbrust <mich...@databricks.com>

Closes #3511 from ravipesala/countdistinct and squashes the following commits:

cc4dbb1 [ravipesala] style
070e12a [ravipesala] Supporting multi column support in count(distinct c1,c2..) 
in Spark SQL

(cherry picked from commit 6a9ff19dc06745144d5b311d4f87073c81d53a8f)
Signed-off-by: Michael Armbrust <mich...@databricks.com>


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/5006aab9
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/5006aab9
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/5006aab9

Branch: refs/heads/branch-1.2
Commit: 5006aab9d6f8dd4ce3dd11d388f96790c04cf25c
Parents: f2bb90a
Author: ravipesala <ravindra.pes...@huawei.com>
Authored: Mon Dec 1 13:26:44 2014 -0800
Committer: Michael Armbrust <mich...@databricks.com>
Committed: Mon Dec 1 13:29:35 2014 -0800

----------------------------------------------------------------------
 .../main/scala/org/apache/spark/sql/catalyst/SqlParser.scala  | 3 ++-
 .../src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala   | 7 +++++++
 2 files changed, 9 insertions(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/5006aab9/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/SqlParser.scala
----------------------------------------------------------------------
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/SqlParser.scala 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/SqlParser.scala
index a9ff10f..a2bcd73 100755
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/SqlParser.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/SqlParser.scala
@@ -277,7 +277,8 @@ class SqlParser extends AbstractSparkSQLParser {
     | SUM   ~> "(" ~> DISTINCT ~> expression <~ ")" ^^ { case exp => 
SumDistinct(exp) }
     | COUNT ~  "(" ~> "*"                    <~ ")" ^^ { case _ => 
Count(Literal(1)) }
     | COUNT ~  "(" ~> expression             <~ ")" ^^ { case exp => 
Count(exp) }
-    | COUNT ~> "(" ~> DISTINCT ~> expression <~ ")" ^^ { case exp => 
CountDistinct(exp :: Nil) }
+    | COUNT ~> "(" ~> DISTINCT ~> repsep(expression, ",") <~ ")" ^^
+      { case exps => CountDistinct(exps) }
     | APPROXIMATE ~ COUNT ~ "(" ~ DISTINCT ~> expression <~ ")" ^^
       { case exp => ApproxCountDistinct(exp) }
     | APPROXIMATE ~> "(" ~> floatLit ~ ")" ~ COUNT ~ "(" ~ DISTINCT ~ 
expression <~ ")" ^^

http://git-wip-us.apache.org/repos/asf/spark/blob/5006aab9/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
----------------------------------------------------------------------
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala 
b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
index 84ee305..f83e647 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
@@ -992,4 +992,11 @@ class SQLQuerySuite extends QueryTest with 
BeforeAndAfterAll {
       "nulldata2 on nulldata1.value <=> nulldata2.value"),
         (1 to 2).map(i => Seq(i)))
   }
+
+  test("Multi-column COUNT(DISTINCT ...)") {
+    val data = TestData(1,"val_1") :: TestData(2,"val_2") :: Nil
+    val rdd = sparkContext.parallelize((0 to 1).map(i => data(i)))
+    rdd.registerTempTable("distinctData")
+    checkAnswer(sql("SELECT COUNT(DISTINCT key,value) FROM distinctData"), 2)
+  }
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

spark git commit: [SPARK-4650][SQL] Supporting multi column support in countDistinct function like count(distinct c1, c2..) in Spark SQL

Reply via email to