Repository: spark
Updated Branches:
  refs/heads/master 6f2fa6c54 -> db8784fea


[SPARK-17899][SQL] add a debug mode to keep raw table properties in 
HiveExternalCatalog

## What changes were proposed in this pull request?

Currently `HiveExternalCatalog` will filter out the Spark SQL internal table 
properties, e.g. `spark.sql.sources.provider`, `spark.sql.sources.schema`, etc. 
This is reasonable for external users as they don't want to see these internal 
properties in `DESC TABLE`.

However, as a Spark developer, sometimes we do wanna see the raw table 
properties. This PR adds a new internal SQL conf, `spark.sql.debug`, to enable 
debug mode and keep these raw table properties.

This config can also be used in similar places where we wanna retain debug 
information in the future.

## How was this patch tested?

new test in MetastoreDataSourcesSuite

Author: Wenchen Fan <wenc...@databricks.com>

Closes #15458 from cloud-fan/debug.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/db8784fe
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/db8784fe
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/db8784fe

Branch: refs/heads/master
Commit: db8784feaa605adcbd37af4bc8b7146479b631f8
Parents: 6f2fa6c
Author: Wenchen Fan <wenc...@databricks.com>
Authored: Thu Oct 13 03:26:29 2016 -0400
Committer: Andrew Or <andrewo...@gmail.com>
Committed: Thu Oct 13 03:26:29 2016 -0400

----------------------------------------------------------------------
 .../org/apache/spark/sql/internal/SQLConf.scala |  5 ++++
 .../spark/sql/internal/SQLConfSuite.scala       | 24 ++++++++++++--------
 .../spark/sql/hive/HiveExternalCatalog.scala    |  9 ++++++--
 .../sql/hive/MetastoreDataSourcesSuite.scala    | 17 +++++++++++++-
 4 files changed, 42 insertions(+), 13 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/db8784fe/sql/core/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
----------------------------------------------------------------------
diff --git 
a/sql/core/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala 
b/sql/core/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
index 9e7c1ec..192083e 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
@@ -915,4 +915,9 @@ object StaticSQLConf {
     .internal()
     .intConf
     .createWithDefault(4000)
+
+  val DEBUG_MODE = buildConf("spark.sql.debug")
+    .internal()
+    .booleanConf
+    .createWithDefault(false)
 }

http://git-wip-us.apache.org/repos/asf/spark/blob/db8784fe/sql/core/src/test/scala/org/apache/spark/sql/internal/SQLConfSuite.scala
----------------------------------------------------------------------
diff --git 
a/sql/core/src/test/scala/org/apache/spark/sql/internal/SQLConfSuite.scala 
b/sql/core/src/test/scala/org/apache/spark/sql/internal/SQLConfSuite.scala
index f545de0..df640ff 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/internal/SQLConfSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/internal/SQLConfSuite.scala
@@ -19,6 +19,7 @@ package org.apache.spark.sql.internal
 
 import org.apache.hadoop.fs.Path
 
+import org.apache.spark.SparkContext
 import org.apache.spark.sql._
 import org.apache.spark.sql.execution.WholeStageCodegenExec
 import org.apache.spark.sql.internal.StaticSQLConf._
@@ -254,18 +255,21 @@ class SQLConfSuite extends QueryTest with 
SharedSQLContext {
     }
   }
 
-  test("global SQL conf comes from SparkConf") {
-    val newSession = SparkSession.builder()
-      .config(SCHEMA_STRING_LENGTH_THRESHOLD.key, "2000")
-      .getOrCreate()
-
-    assert(newSession.conf.get(SCHEMA_STRING_LENGTH_THRESHOLD.key) == "2000")
-    checkAnswer(
-      newSession.sql(s"SET ${SCHEMA_STRING_LENGTH_THRESHOLD.key}"),
-      Row(SCHEMA_STRING_LENGTH_THRESHOLD.key, "2000"))
+  test("static SQL conf comes from SparkConf") {
+    val previousValue = sparkContext.conf.get(SCHEMA_STRING_LENGTH_THRESHOLD)
+    try {
+      sparkContext.conf.set(SCHEMA_STRING_LENGTH_THRESHOLD, 2000)
+      val newSession = new SparkSession(sparkContext)
+      assert(newSession.conf.get(SCHEMA_STRING_LENGTH_THRESHOLD) == 2000)
+      checkAnswer(
+        newSession.sql(s"SET ${SCHEMA_STRING_LENGTH_THRESHOLD.key}"),
+        Row(SCHEMA_STRING_LENGTH_THRESHOLD.key, "2000"))
+    } finally {
+      sparkContext.conf.set(SCHEMA_STRING_LENGTH_THRESHOLD, previousValue)
+    }
   }
 
-  test("cannot set/unset global SQL conf") {
+  test("cannot set/unset static SQL conf") {
     val e1 = intercept[AnalysisException](sql(s"SET 
${SCHEMA_STRING_LENGTH_THRESHOLD.key}=10"))
     assert(e1.message.contains("Cannot modify the value of a static config"))
     val e2 = 
intercept[AnalysisException](spark.conf.unset(SCHEMA_STRING_LENGTH_THRESHOLD.key))

http://git-wip-us.apache.org/repos/asf/spark/blob/db8784fe/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala
----------------------------------------------------------------------
diff --git 
a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala 
b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala
index e1c0cad..ed18972 100644
--- 
a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala
+++ 
b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala
@@ -37,7 +37,7 @@ import 
org.apache.spark.sql.execution.command.{ColumnStatStruct, DDLUtils}
 import org.apache.spark.sql.execution.datasources.CaseInsensitiveMap
 import org.apache.spark.sql.hive.client.HiveClient
 import org.apache.spark.sql.internal.HiveSerDe
-import 
org.apache.spark.sql.internal.StaticSQLConf.SCHEMA_STRING_LENGTH_THRESHOLD
+import org.apache.spark.sql.internal.StaticSQLConf._
 import org.apache.spark.sql.types.{DataType, StructType}
 
 
@@ -461,13 +461,18 @@ private[spark] class HiveExternalCatalog(conf: SparkConf, 
hadoopConf: Configurat
         } else {
           table.storage
         }
+        val tableProps = if (conf.get(DEBUG_MODE)) {
+          table.properties
+        } else {
+          getOriginalTableProperties(table)
+        }
         table.copy(
           storage = storage,
           schema = getSchemaFromTableProperties(table),
           provider = Some(provider),
           partitionColumnNames = getPartitionColumnsFromTableProperties(table),
           bucketSpec = getBucketSpecFromTableProperties(table),
-          properties = getOriginalTableProperties(table))
+          properties = tableProps)
       } getOrElse {
         table.copy(provider = Some("hive"))
       }

http://git-wip-us.apache.org/repos/asf/spark/blob/db8784fe/sql/hive/src/test/scala/org/apache/spark/sql/hive/MetastoreDataSourcesSuite.scala
----------------------------------------------------------------------
diff --git 
a/sql/hive/src/test/scala/org/apache/spark/sql/hive/MetastoreDataSourcesSuite.scala
 
b/sql/hive/src/test/scala/org/apache/spark/sql/hive/MetastoreDataSourcesSuite.scala
index 0477122..7cc6179 100644
--- 
a/sql/hive/src/test/scala/org/apache/spark/sql/hive/MetastoreDataSourcesSuite.scala
+++ 
b/sql/hive/src/test/scala/org/apache/spark/sql/hive/MetastoreDataSourcesSuite.scala
@@ -23,6 +23,7 @@ import scala.collection.mutable.ArrayBuffer
 
 import org.apache.hadoop.fs.Path
 
+import org.apache.spark.SparkContext
 import org.apache.spark.sql._
 import org.apache.spark.sql.catalyst.TableIdentifier
 import org.apache.spark.sql.catalyst.catalog.{CatalogStorageFormat, 
CatalogTable, CatalogTableType}
@@ -31,7 +32,7 @@ import org.apache.spark.sql.hive.HiveExternalCatalog._
 import org.apache.spark.sql.hive.client.HiveClient
 import org.apache.spark.sql.hive.test.TestHiveSingleton
 import org.apache.spark.sql.internal.SQLConf
-import 
org.apache.spark.sql.internal.StaticSQLConf.SCHEMA_STRING_LENGTH_THRESHOLD
+import org.apache.spark.sql.internal.StaticSQLConf._
 import org.apache.spark.sql.test.SQLTestUtils
 import org.apache.spark.sql.types._
 import org.apache.spark.util.Utils
@@ -1324,4 +1325,18 @@ class MetastoreDataSourcesSuite extends QueryTest with 
SQLTestUtils with TestHiv
       hiveClient.dropTable("default", "t", ignoreIfNotExists = true, purge = 
true)
     }
   }
+
+  test("should keep data source entries in table properties when debug mode is 
on") {
+    val previousValue = sparkSession.sparkContext.conf.get(DEBUG_MODE)
+    try {
+      sparkSession.sparkContext.conf.set(DEBUG_MODE, true)
+      val newSession = sparkSession.newSession()
+      newSession.sql("CREATE TABLE abc(i int) USING json")
+      val tableMeta = 
newSession.sessionState.catalog.getTableMetadata(TableIdentifier("abc"))
+      assert(tableMeta.properties(DATASOURCE_SCHEMA_NUMPARTS).toInt == 1)
+      assert(tableMeta.properties(DATASOURCE_PROVIDER) == "json")
+    } finally {
+      sparkSession.sparkContext.conf.set(DEBUG_MODE, previousValue)
+    }
+  }
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

Reply via email to