Repository: spark Updated Branches: refs/heads/master 9059f1ee6 -> e29176fd7
[SPARK-23627][SQL] Provide isEmpty in Dataset ## What changes were proposed in this pull request? This PR adds isEmpty() in DataSet ## How was this patch tested? Unit tests added Please review http://spark.apache.org/contributing.html before opening a pull request. Author: Goun Na <[email protected]> Author: goungoun <[email protected]> Closes #20800 from goungoun/SPARK-23627. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/e29176fd Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/e29176fd Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/e29176fd Branch: refs/heads/master Commit: e29176fd7dbcef04a29c4922ba655d58144fed24 Parents: 9059f1e Author: Goun Na <[email protected]> Authored: Tue May 15 14:11:20 2018 +0800 Committer: hyukjinkwon <[email protected]> Committed: Tue May 15 14:11:20 2018 +0800 ---------------------------------------------------------------------- .../src/main/scala/org/apache/spark/sql/Dataset.scala | 10 ++++++++++ .../test/scala/org/apache/spark/sql/DatasetSuite.scala | 8 ++++++++ 2 files changed, 18 insertions(+) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/spark/blob/e29176fd/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala ---------------------------------------------------------------------- diff --git a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala index d518e07..f001f16 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala @@ -512,6 +512,16 @@ class Dataset[T] private[sql]( def isLocal: Boolean = logicalPlan.isInstanceOf[LocalRelation] /** + * Returns true if the `Dataset` is empty. + * + * @group basic + * @since 2.4.0 + */ + def isEmpty: Boolean = withAction("isEmpty", limit(1).groupBy().count().queryExecution) { plan => + plan.executeCollect().head.getLong(0) == 0 + } + + /** * Returns true if this Dataset contains one or more sources that continuously * return data as it arrives. A Dataset that reads data from a streaming source * must be executed as a `StreamingQuery` using the `start()` method in http://git-wip-us.apache.org/repos/asf/spark/blob/e29176fd/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala ---------------------------------------------------------------------- diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala index e0f4d2b..d477d78 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala @@ -1425,6 +1425,14 @@ class DatasetSuite extends QueryTest with SharedSQLContext { } } + test("SPARK-23627: provide isEmpty in DataSet") { + val ds1 = spark.emptyDataset[Int] + val ds2 = Seq(1, 2, 3).toDS() + + assert(ds1.isEmpty == true) + assert(ds2.isEmpty == false) + } + test("SPARK-22472: add null check for top-level primitive values") { // If the primitive values are from Option, we need to do runtime null check. val ds = Seq(Some(1), None).toDS().as[Int] --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
