[spark] branch master updated: [SPARK-43124][SQL] Dataset.show projects CommandResults locally

gurwls223 Thu, 20 Apr 2023 17:34:04 -0700

This is an automated email from the ASF dual-hosted git repository.

gurwls223 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git



The following commit(s) were added to refs/heads/master by this push:
     new 16a28b1a961 [SPARK-43124][SQL] Dataset.show projects CommandResults 
locally
16a28b1a961 is described below

commit 16a28b1a961052a250dcf05b7c249c92156e1077
Author: Peter Toth <peter.t...@gmail.com>
AuthorDate: Fri Apr 21 09:33:45 2023 +0900

    [SPARK-43124][SQL] Dataset.show projects CommandResults locally
    
    ### What changes were proposed in this pull request?
    `DataSet.show()` currently triggers a job for a simple `show tables` 
command. This is because the command output contains an `isTemporary` boolean 
column that needs to be casted to string when we use `show()` on the dataset.
    This PR converts `CommandResult` to `LocalRelation` and let 
`ConvertToLocalRelation` to do the casting locally to avoid triggering job 
execution.
    
    ### Why are the changes needed?
    A simple `show tables` shouldn not require an executor.
    
    ### Does this PR introduce _any_ user-facing change?
    No.
    
    ### How was this patch tested?
    Added new UT.
    
    Closes #40779 from 
peter-toth/SPARK-43124-dataset-show-projects-commandresults-locally.
    
    Authored-by: Peter Toth <peter.t...@gmail.com>
    Signed-off-by: Hyukjin Kwon <gurwls...@apache.org>
---
 .../src/main/scala/org/apache/spark/sql/Dataset.scala |  8 +++++++-
 .../scala/org/apache/spark/sql/DatasetSuite.scala     | 19 +++++++++++++++++++
 2 files changed, 26 insertions(+), 1 deletion(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala 
b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
index be37fdae025..d33a36a8380 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
@@ -272,7 +272,13 @@ class Dataset[T] private[sql](
   private[sql] def getRows(
       numRows: Int,
       truncate: Int): Seq[Seq[String]] = {
-    val newDf = toDF()
+    val newDf = logicalPlan match {
+      case c: CommandResult =>
+        // Convert to `LocalRelation` and let `ConvertToLocalRelation` do the 
casting locally to
+        // avoid triggering a job
+        Dataset.ofRows(sparkSession, LocalRelation(c.output, c.rows))
+      case _ => toDF()
+    }
     val castCols = newDf.logicalPlan.output.map { col =>
       // Since binary types in top-level schema fields have a specific format 
to print,
       // so we do not cast them to strings here.
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala 
b/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala
index 75cee407819..167aea79209 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala
@@ -2474,6 +2474,25 @@ class DatasetSuite extends QueryTest
     )
     assert(result == expected)
   }
+
+  test("SPARK-43124: Show does not trigger job execution on CommandResults") {
+    withSQLConf(SQLConf.OPTIMIZER_EXCLUDED_RULES.key -> "") {
+      withTable("t1") {
+        sql("create table t1(c int) using parquet")
+
+        @volatile var jobCounter = 0
+        val listener = new SparkListener {
+          override def onJobStart(jobStart: SparkListenerJobStart): Unit = {
+            jobCounter += 1
+          }
+        }
+        withListener(spark.sparkContext, listener) { _ =>
+          sql("show tables").show()
+        }
+        assert(jobCounter === 0)
+      }
+    }
+  }
 }
 
 class DatasetLargeResultCollectingSuite extends QueryTest


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

[spark] branch master updated: [SPARK-43124][SQL] Dataset.show projects CommandResults locally

Reply via email to