This is an automated email from the ASF dual-hosted git repository.
gurwls223 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new 16a28b1a961 [SPARK-43124][SQL] Dataset.show projects CommandResults
locally
16a28b1a961 is described below
commit 16a28b1a961052a250dcf05b7c249c92156e1077
Author: Peter Toth <[email protected]>
AuthorDate: Fri Apr 21 09:33:45 2023 +0900
[SPARK-43124][SQL] Dataset.show projects CommandResults locally
### What changes were proposed in this pull request?
`DataSet.show()` currently triggers a job for a simple `show tables`
command. This is because the command output contains an `isTemporary` boolean
column that needs to be casted to string when we use `show()` on the dataset.
This PR converts `CommandResult` to `LocalRelation` and let
`ConvertToLocalRelation` to do the casting locally to avoid triggering job
execution.
### Why are the changes needed?
A simple `show tables` shouldn not require an executor.
### Does this PR introduce _any_ user-facing change?
No.
### How was this patch tested?
Added new UT.
Closes #40779 from
peter-toth/SPARK-43124-dataset-show-projects-commandresults-locally.
Authored-by: Peter Toth <[email protected]>
Signed-off-by: Hyukjin Kwon <[email protected]>
---
.../src/main/scala/org/apache/spark/sql/Dataset.scala | 8 +++++++-
.../scala/org/apache/spark/sql/DatasetSuite.scala | 19 +++++++++++++++++++
2 files changed, 26 insertions(+), 1 deletion(-)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
index be37fdae025..d33a36a8380 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
@@ -272,7 +272,13 @@ class Dataset[T] private[sql](
private[sql] def getRows(
numRows: Int,
truncate: Int): Seq[Seq[String]] = {
- val newDf = toDF()
+ val newDf = logicalPlan match {
+ case c: CommandResult =>
+ // Convert to `LocalRelation` and let `ConvertToLocalRelation` do the
casting locally to
+ // avoid triggering a job
+ Dataset.ofRows(sparkSession, LocalRelation(c.output, c.rows))
+ case _ => toDF()
+ }
val castCols = newDf.logicalPlan.output.map { col =>
// Since binary types in top-level schema fields have a specific format
to print,
// so we do not cast them to strings here.
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala
b/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala
index 75cee407819..167aea79209 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala
@@ -2474,6 +2474,25 @@ class DatasetSuite extends QueryTest
)
assert(result == expected)
}
+
+ test("SPARK-43124: Show does not trigger job execution on CommandResults") {
+ withSQLConf(SQLConf.OPTIMIZER_EXCLUDED_RULES.key -> "") {
+ withTable("t1") {
+ sql("create table t1(c int) using parquet")
+
+ @volatile var jobCounter = 0
+ val listener = new SparkListener {
+ override def onJobStart(jobStart: SparkListenerJobStart): Unit = {
+ jobCounter += 1
+ }
+ }
+ withListener(spark.sparkContext, listener) { _ =>
+ sql("show tables").show()
+ }
+ assert(jobCounter === 0)
+ }
+ }
+ }
}
class DatasetLargeResultCollectingSuite extends QueryTest
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]