spark git commit: [SPARK-23627][SQL] Provide isEmpty in Dataset

gurwls223 Mon, 14 May 2018 23:12:07 -0700

Repository: spark
Updated Branches:
  refs/heads/master 9059f1ee6 -> e29176fd7



[SPARK-23627][SQL] Provide isEmpty in Dataset

## What changes were proposed in this pull request?

This PR adds isEmpty() in DataSet

## How was this patch tested?

Unit tests added

Please review http://spark.apache.org/contributing.html before opening a pull 
request.

Author: Goun Na <[email protected]>
Author: goungoun <[email protected]>

Closes #20800 from goungoun/SPARK-23627.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/e29176fd
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/e29176fd
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/e29176fd

Branch: refs/heads/master
Commit: e29176fd7dbcef04a29c4922ba655d58144fed24
Parents: 9059f1e
Author: Goun Na <[email protected]>
Authored: Tue May 15 14:11:20 2018 +0800
Committer: hyukjinkwon <[email protected]>
Committed: Tue May 15 14:11:20 2018 +0800

----------------------------------------------------------------------
 .../src/main/scala/org/apache/spark/sql/Dataset.scala     | 10 ++++++++++
 .../test/scala/org/apache/spark/sql/DatasetSuite.scala    |  8 ++++++++
 2 files changed, 18 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/e29176fd/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
----------------------------------------------------------------------
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala 
b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
index d518e07..f001f16 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
@@ -512,6 +512,16 @@ class Dataset[T] private[sql](
   def isLocal: Boolean = logicalPlan.isInstanceOf[LocalRelation]
 
   /**
+   * Returns true if the `Dataset` is empty.
+   *
+   * @group basic
+   * @since 2.4.0
+   */
+  def isEmpty: Boolean = withAction("isEmpty", 
limit(1).groupBy().count().queryExecution) { plan =>
+    plan.executeCollect().head.getLong(0) == 0
+  }
+
+  /**
    * Returns true if this Dataset contains one or more sources that 
continuously
    * return data as it arrives. A Dataset that reads data from a streaming 
source
    * must be executed as a `StreamingQuery` using the `start()` method in

http://git-wip-us.apache.org/repos/asf/spark/blob/e29176fd/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala
----------------------------------------------------------------------
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala 
b/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala
index e0f4d2b..d477d78 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala
@@ -1425,6 +1425,14 @@ class DatasetSuite extends QueryTest with 
SharedSQLContext {
     }
   }
 
+  test("SPARK-23627: provide isEmpty in DataSet") {
+    val ds1 = spark.emptyDataset[Int]
+    val ds2 = Seq(1, 2, 3).toDS()
+
+    assert(ds1.isEmpty == true)
+    assert(ds2.isEmpty == false)
+  }
+
   test("SPARK-22472: add null check for top-level primitive values") {
     // If the primitive values are from Option, we need to do runtime null 
check.
     val ds = Seq(Some(1), None).toDS().as[Int]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

spark git commit: [SPARK-23627][SQL] Provide isEmpty in Dataset

Reply via email to