Repository: spark Updated Branches: refs/heads/master f067acefa -> c4a6519c4
[SPARK-19218][SQL] Fix SET command to show a result correctly and in a sorted order ## What changes were proposed in this pull request? This PR aims to fix the following two things. 1. `sql("SET -v").collect()` or `sql("SET -v").show()` raises the following exceptions for String configuration with default value, `null`. For the test, please see [Jenkins result](https://amplab.cs.berkeley.edu/jenkins/job/SparkPullRequestBuilder/71539/testReport/) and https://github.com/apache/spark/commit/60953bf1f1ba144e709fdae3903a390ff9479fd0 in #16624 . ``` sbt.ForkMain$ForkError: java.lang.RuntimeException: Error while decoding: java.lang.NullPointerException createexternalrow(input[0, string, false].toString, input[1, string, false].toString, input[2, string, false].toString, StructField(key,StringType,false), StructField(value,StringType,false), StructField(meaning,StringType,false)) :- input[0, string, false].toString : +- input[0, string, false] :- input[1, string, false].toString : +- input[1, string, false] +- input[2, string, false].toString +- input[2, string, false] ``` 2. Currently, `SET` and `SET -v` commands show unsorted result. We had better show a sorted result for UX. Also, this is compatible with Hive. **BEFORE** ``` scala> sql("set").show(false) ... |spark.driver.host |10.22.16.140 | |spark.driver.port |63893 | |spark.repl.class.uri |spark://10.22.16.140:63893/classes | ... |spark.app.name |Spark shell | |spark.driver.memory |4G | |spark.executor.id |driver | |spark.submit.deployMode |client | |spark.master |local[*] | |spark.home |/Users/dhyun/spark | |spark.sql.catalogImplementation|hive | |spark.app.id |local-1484333618945 | ``` **AFTER** ``` scala> sql("set").show(false) ... |spark.app.id |local-1484333925649 | |spark.app.name |Spark shell | |spark.driver.host |10.22.16.140 | |spark.driver.memory |4G | |spark.driver.port |64994 | |spark.executor.id |driver | |spark.jars | | |spark.master |local[*] | |spark.repl.class.uri |spark://10.22.16.140:64994/classes | |spark.sql.catalogImplementation|hive | |spark.submit.deployMode |client | ``` ## How was this patch tested? Jenkins with a new test case. Author: Dongjoon Hyun <dongj...@apache.org> Closes #16579 from dongjoon-hyun/SPARK-19218. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/c4a6519c Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/c4a6519c Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/c4a6519c Branch: refs/heads/master Commit: c4a6519c44f29950ef3d706a4f79e006ec8bc6b5 Parents: f067ace Author: Dongjoon Hyun <dongj...@apache.org> Authored: Mon Jan 23 01:21:44 2017 -0800 Committer: gatorsmile <gatorsm...@gmail.com> Committed: Mon Jan 23 01:21:44 2017 -0800 ---------------------------------------------------------------------- .../sql/execution/command/SetCommand.scala | 7 ++--- .../org/apache/spark/sql/internal/SQLConf.scala | 5 ++++ .../org/apache/spark/sql/SQLQuerySuite.scala | 27 ++++++++++++++++++++ 3 files changed, 36 insertions(+), 3 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/spark/blob/c4a6519c/sql/core/src/main/scala/org/apache/spark/sql/execution/command/SetCommand.scala ---------------------------------------------------------------------- diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/SetCommand.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/SetCommand.scala index dc8d975..7afa4e7 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/SetCommand.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/SetCommand.scala @@ -79,7 +79,7 @@ case class SetCommand(kv: Option[(String, Option[String])]) extends RunnableComm // Queries all key-value pairs that are set in the SQLConf of the sparkSession. case None => val runFunc = (sparkSession: SparkSession) => { - sparkSession.conf.getAll.map { case (k, v) => Row(k, v) }.toSeq + sparkSession.conf.getAll.toSeq.sorted.map { case (k, v) => Row(k, v) } } (keyValueOutput, runFunc) @@ -87,8 +87,9 @@ case class SetCommand(kv: Option[(String, Option[String])]) extends RunnableComm // SQLConf of the sparkSession. case Some(("-v", None)) => val runFunc = (sparkSession: SparkSession) => { - sparkSession.sessionState.conf.getAllDefinedConfs.map { case (key, defaultValue, doc) => - Row(key, defaultValue, doc) + sparkSession.sessionState.conf.getAllDefinedConfs.sorted.map { + case (key, defaultValue, doc) => + Row(key, Option(defaultValue).getOrElse("<undefined>"), doc) } } val schema = StructType( http://git-wip-us.apache.org/repos/asf/spark/blob/c4a6519c/sql/core/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala ---------------------------------------------------------------------- diff --git a/sql/core/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/core/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala index 645b0fa..d0c86ff 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala @@ -50,6 +50,11 @@ object SQLConf { sqlConfEntries.put(entry.key, entry) } + // For testing only + private[sql] def unregister(entry: ConfigEntry[_]): Unit = sqlConfEntries.synchronized { + sqlConfEntries.remove(entry.key) + } + private[sql] object SQLConfigBuilder { def apply(key: String): ConfigBuilder = new ConfigBuilder(key).onCreate(register) http://git-wip-us.apache.org/repos/asf/spark/blob/c4a6519c/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala ---------------------------------------------------------------------- diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala index 8f1beaa..07b787a 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala @@ -982,6 +982,33 @@ class SQLQuerySuite extends QueryTest with SharedSQLContext { spark.sessionState.conf.clear() } + test("SPARK-19218 SET command should show a result in a sorted order") { + val overrideConfs = sql("SET").collect() + sql(s"SET test.key3=1") + sql(s"SET test.key2=2") + sql(s"SET test.key1=3") + val result = sql("SET").collect() + assert(result === + (overrideConfs ++ Seq( + Row("test.key1", "3"), + Row("test.key2", "2"), + Row("test.key3", "1"))).sortBy(_.getString(0)) + ) + spark.sessionState.conf.clear() + } + + test("SPARK-19218 `SET -v` should not fail with null value configuration") { + import SQLConf._ + val confEntry = SQLConfigBuilder("spark.test").doc("doc").stringConf.createWithDefault(null) + + try { + val result = sql("SET -v").collect() + assert(result === result.sortBy(_.getString(0))) + } finally { + SQLConf.unregister(confEntry) + } + } + test("SET commands with illegal or inappropriate argument") { spark.sessionState.conf.clear() // Set negative mapred.reduce.tasks for automatically determining --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org