git commit: SPARK-1822: Some minor cleanup work on SchemaRDD.count()

rxin Sun, 25 May 2014 01:45:20 -0700

Repository: spark
Updated Branches:
  refs/heads/branch-1.0 291567d60 -> aeffc2004



SPARK-1822: Some minor cleanup work on SchemaRDD.count()

Minor cleanup following #841.

Author: Reynold Xin <[email protected]>

Closes #868 from rxin/schema-count and squashes the following commits:

5442651 [Reynold Xin] SPARK-1822: Some minor cleanup work on SchemaRDD.count()

(cherry picked from commit d66642e3978a76977414c2fdaedebaad35662667)
Signed-off-by: Reynold Xin <[email protected]>


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/aeffc200
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/aeffc200
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/aeffc200

Branch: refs/heads/branch-1.0
Commit: aeffc2004e2ee4b8f060c44575a90e1dbcf901e3
Parents: 291567d
Author: Reynold Xin <[email protected]>
Authored: Sun May 25 01:44:49 2014 -0700
Committer: Reynold Xin <[email protected]>
Committed: Sun May 25 01:45:01 2014 -0700

----------------------------------------------------------------------
 python/pyspark/sql.py                                        | 5 ++++-
 sql/core/src/main/scala/org/apache/spark/sql/SchemaRDD.scala | 8 ++++----
 .../src/test/scala/org/apache/spark/sql/DslQuerySuite.scala  | 2 +-
 sql/core/src/test/scala/org/apache/spark/sql/TestData.scala  | 2 +-
 4 files changed, 10 insertions(+), 7 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/aeffc200/python/pyspark/sql.py
----------------------------------------------------------------------
diff --git a/python/pyspark/sql.py b/python/pyspark/sql.py
index f2001af..fa4b9c7 100644
--- a/python/pyspark/sql.py
+++ b/python/pyspark/sql.py
@@ -323,7 +323,10 @@ class SchemaRDD(RDD):
 
     def count(self):
         """
-        Return the number of elements in this RDD.
+        Return the number of elements in this RDD. Unlike the base RDD
+        implementation of count, this implementation leverages the query
+        optimizer to compute the count on the SchemaRDD, which supports
+        features such as filter pushdown.
 
         >>> srdd = sqlCtx.inferSchema(rdd)
         >>> srdd.count()

http://git-wip-us.apache.org/repos/asf/spark/blob/aeffc200/sql/core/src/main/scala/org/apache/spark/sql/SchemaRDD.scala
----------------------------------------------------------------------
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/SchemaRDD.scala 
b/sql/core/src/main/scala/org/apache/spark/sql/SchemaRDD.scala
index 452da3d..9883ebc 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/SchemaRDD.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/SchemaRDD.scala
@@ -276,12 +276,12 @@ class SchemaRDD(
 
   /**
    * :: Experimental ::
-   * Overriding base RDD implementation to leverage query optimizer
+   * Return the number of elements in the RDD. Unlike the base RDD 
implementation of count, this
+   * implementation leverages the query optimizer to compute the count on the 
SchemaRDD, which
+   * supports features such as filter pushdown.
    */
   @Experimental
-  override def count(): Long = {
-    groupBy()(Count(Literal(1))).collect().head.getLong(0)
-  }
+  override def count(): Long = 
groupBy()(Count(Literal(1))).collect().head.getLong(0)
 
   /**
    * :: Experimental ::

http://git-wip-us.apache.org/repos/asf/spark/blob/aeffc200/sql/core/src/test/scala/org/apache/spark/sql/DslQuerySuite.scala
----------------------------------------------------------------------
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DslQuerySuite.scala 
b/sql/core/src/test/scala/org/apache/spark/sql/DslQuerySuite.scala
index 233132a..94ba13b 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DslQuerySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DslQuerySuite.scala
@@ -124,7 +124,7 @@ class DslQuerySuite extends QueryTest {
   }
 
   test("zero count") {
-    assert(testData4.count() === 0)
+    assert(emptyTableData.count() === 0)
   }
 
   test("inner join where, one match per row") {

http://git-wip-us.apache.org/repos/asf/spark/blob/aeffc200/sql/core/src/test/scala/org/apache/spark/sql/TestData.scala
----------------------------------------------------------------------
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/TestData.scala 
b/sql/core/src/test/scala/org/apache/spark/sql/TestData.scala
index b1eecb4..944f520 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/TestData.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/TestData.scala
@@ -47,7 +47,7 @@ object TestData {
       (1, null) ::
       (2, 2) :: Nil)
 
-  val testData4 = logical.LocalRelation('a.int, 'b.int)
+  val emptyTableData = logical.LocalRelation('a.int, 'b.int)
 
   case class UpperCaseData(N: Int, L: String)
   val upperCaseData =

git commit: SPARK-1822: Some minor cleanup work on SchemaRDD.count()

Reply via email to