Repository: spark
Updated Branches:
  refs/heads/master b78cf13bf -> 2d799d080


[SPARK-21504][SQL] Add spark version info into table metadata

## What changes were proposed in this pull request?
This PR is to add the spark version info in the table metadata. When creating 
the table, this value is assigned. It can help users find which version of 
Spark was used to create the table.

## How was this patch tested?
N/A

Author: gatorsmile <gatorsm...@gmail.com>

Closes #18709 from gatorsmile/addVersion.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/2d799d08
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/2d799d08
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/2d799d08

Branch: refs/heads/master
Commit: 2d799d08081032828cc2c95cbf58a268653c7a05
Parents: b78cf13
Author: gatorsmile <gatorsm...@gmail.com>
Authored: Wed Aug 9 08:46:25 2017 -0700
Committer: gatorsmile <gatorsm...@gmail.com>
Committed: Wed Aug 9 08:46:25 2017 -0700

----------------------------------------------------------------------
 .../sql/catalyst/catalog/ExternalCatalog.scala    |  4 +++-
 .../spark/sql/catalyst/catalog/interface.scala    |  7 ++++++-
 .../spark/sql/catalyst/trees/TreeNodeSuite.scala  |  4 +++-
 .../describe-table-after-alter-table.sql.out      | 15 ++++++++++-----
 .../resources/sql-tests/results/describe.sql.out  | 18 ++++++++++++------
 .../sql-tests/results/show-tables.sql.out         |  9 ++++++---
 .../org/apache/spark/sql/SQLQueryTestSuite.scala  |  3 ++-
 .../spark/sql/execution/command/DDLSuite.scala    |  1 +
 .../spark/sql/hive/HiveExternalCatalog.scala      | 12 +++++++++++-
 .../spark/sql/hive/execution/HiveDDLSuite.scala   |  1 +
 10 files changed, 55 insertions(+), 19 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/2d799d08/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalog.scala
----------------------------------------------------------------------
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalog.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalog.scala
index 6000d48..68644f4 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalog.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalog.scala
@@ -106,8 +106,10 @@ abstract class ExternalCatalog
   final def createTable(tableDefinition: CatalogTable, ignoreIfExists: 
Boolean): Unit = {
     val db = tableDefinition.database
     val name = tableDefinition.identifier.table
+    val tableDefinitionWithVersion =
+      tableDefinition.copy(createVersion = org.apache.spark.SPARK_VERSION)
     postToAll(CreateTablePreEvent(db, name))
-    doCreateTable(tableDefinition, ignoreIfExists)
+    doCreateTable(tableDefinitionWithVersion, ignoreIfExists)
     postToAll(CreateTableEvent(db, name))
   }
 

http://git-wip-us.apache.org/repos/asf/spark/blob/2d799d08/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala
----------------------------------------------------------------------
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala
index 9531456..f865106 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala
@@ -205,6 +205,9 @@ case class BucketSpec(
  *                           configured.
  * @param ignoredProperties is a list of table properties that are used by the 
underlying table
  *                          but ignored by Spark SQL yet.
+ * @param createVersion records the version of Spark that created this table 
metadata. The default
+ *                      is an empty string. We expect it will be read from the 
catalog or filled by
+ *                      ExternalCatalog.createTable. For temporary views, the 
value will be empty.
  */
 case class CatalogTable(
     identifier: TableIdentifier,
@@ -217,6 +220,7 @@ case class CatalogTable(
     owner: String = "",
     createTime: Long = System.currentTimeMillis,
     lastAccessTime: Long = -1,
+    createVersion: String = "",
     properties: Map[String, String] = Map.empty,
     stats: Option[CatalogStatistics] = None,
     viewText: Option[String] = None,
@@ -302,8 +306,9 @@ case class CatalogTable(
     identifier.database.foreach(map.put("Database", _))
     map.put("Table", identifier.table)
     if (owner.nonEmpty) map.put("Owner", owner)
-    map.put("Created", new Date(createTime).toString)
+    map.put("Created Time", new Date(createTime).toString)
     map.put("Last Access", new Date(lastAccessTime).toString)
+    map.put("Created By", "Spark " + createVersion)
     map.put("Type", tableType.name)
     provider.foreach(map.put("Provider", _))
     bucketSpec.foreach(map ++= _.toLinkedHashMap)

http://git-wip-us.apache.org/repos/asf/spark/blob/2d799d08/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/trees/TreeNodeSuite.scala
----------------------------------------------------------------------
diff --git 
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/trees/TreeNodeSuite.scala
 
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/trees/TreeNodeSuite.scala
index 4fc947a..f83c637 100644
--- 
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/trees/TreeNodeSuite.scala
+++ 
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/trees/TreeNodeSuite.scala
@@ -480,7 +480,8 @@ class TreeNodeSuite extends SparkFunSuite {
         CatalogTableType.MANAGED,
         CatalogStorageFormat.empty,
         StructType(StructField("a", IntegerType, true) :: Nil),
-        createTime = 0L),
+        createTime = 0L,
+        createVersion = "2.x"),
 
       JObject(
         "product-class" -> classOf[CatalogTable].getName,
@@ -509,6 +510,7 @@ class TreeNodeSuite extends SparkFunSuite {
         "owner" -> "",
         "createTime" -> 0,
         "lastAccessTime" -> -1,
+        "createVersion" -> "2.x",
         "tracksPartitionsInCatalog" -> false,
         "properties" -> JNull,
         "unsupportedFeatures" -> List.empty[String],

http://git-wip-us.apache.org/repos/asf/spark/blob/2d799d08/sql/core/src/test/resources/sql-tests/results/describe-table-after-alter-table.sql.out
----------------------------------------------------------------------
diff --git 
a/sql/core/src/test/resources/sql-tests/results/describe-table-after-alter-table.sql.out
 
b/sql/core/src/test/resources/sql-tests/results/describe-table-after-alter-table.sql.out
index 4bf4633..7873085 100644
--- 
a/sql/core/src/test/resources/sql-tests/results/describe-table-after-alter-table.sql.out
+++ 
b/sql/core/src/test/resources/sql-tests/results/describe-table-after-alter-table.sql.out
@@ -23,8 +23,9 @@ d                     string
 # Detailed Table Information                                               
 Database               default                                     
 Table                  table_with_comment                          
-Created [not included in comparison]
+Created Time [not included in comparison]
 Last Access [not included in comparison]
+Created By [not included in comparison]
 Type                   MANAGED                                     
 Provider               parquet                                     
 Comment                added                                       
@@ -52,8 +53,9 @@ d                     string
 # Detailed Table Information                                               
 Database               default                                     
 Table                  table_with_comment                          
-Created [not included in comparison]
+Created Time [not included in comparison]
 Last Access [not included in comparison]
+Created By [not included in comparison]
 Type                   MANAGED                                     
 Provider               parquet                                     
 Comment                modified comment                            
@@ -88,8 +90,9 @@ b                     int
 # Detailed Table Information                                               
 Database               default                                     
 Table                  table_comment                               
-Created [not included in comparison]
+Created Time [not included in comparison]
 Last Access [not included in comparison]
+Created By [not included in comparison]
 Type                   MANAGED                                     
 Provider               parquet                                     
 Location [not included in comparison]sql/core/spark-warehouse/table_comment
@@ -114,8 +117,9 @@ b                           int
 # Detailed Table Information                                               
 Database               default                                     
 Table                  table_comment                               
-Created [not included in comparison]
+Created Time [not included in comparison]
 Last Access [not included in comparison]
+Created By [not included in comparison]
 Type                   MANAGED                                     
 Provider               parquet                                     
 Comment                added comment                               
@@ -141,8 +145,9 @@ b                           int
 # Detailed Table Information                                               
 Database               default                                     
 Table                  table_comment                               
-Created [not included in comparison]
+Created Time [not included in comparison]
 Last Access [not included in comparison]
+Created By [not included in comparison]
 Type                   MANAGED                                     
 Provider               parquet                                     
 Location [not included in comparison]sql/core/spark-warehouse/table_comment

http://git-wip-us.apache.org/repos/asf/spark/blob/2d799d08/sql/core/src/test/resources/sql-tests/results/describe.sql.out
----------------------------------------------------------------------
diff --git a/sql/core/src/test/resources/sql-tests/results/describe.sql.out 
b/sql/core/src/test/resources/sql-tests/results/describe.sql.out
index e2b79e8..b91f2c0 100644
--- a/sql/core/src/test/resources/sql-tests/results/describe.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/describe.sql.out
@@ -120,8 +120,9 @@ d                           string
 # Detailed Table Information                                               
 Database               default                                     
 Table                  t                                           
-Created [not included in comparison]
+Created Time [not included in comparison]
 Last Access [not included in comparison]
+Created By [not included in comparison]
 Type                   MANAGED                                     
 Provider               parquet                                     
 Num Buckets            2                                           
@@ -151,8 +152,9 @@ d                           string
 # Detailed Table Information                                               
 Database               default                                     
 Table                  t                                           
-Created [not included in comparison]
+Created Time [not included in comparison]
 Last Access [not included in comparison]
+Created By [not included in comparison]
 Type                   MANAGED                                     
 Provider               parquet                                     
 Num Buckets            2                                           
@@ -190,8 +192,9 @@ d                           string
 # Detailed Table Information                                               
 Database               default                                     
 Table                  t                                           
-Created [not included in comparison]
+Created Time [not included in comparison]
 Last Access [not included in comparison]
+Created By [not included in comparison]
 Type                   MANAGED                                     
 Provider               parquet                                     
 Num Buckets            2                                           
@@ -228,8 +231,9 @@ d                           string
 # Detailed Table Information                                               
 Database               default                                     
 Table                  t                                           
-Created [not included in comparison]
+Created Time [not included in comparison]
 Last Access [not included in comparison]
+Created By [not included in comparison]
 Type                   MANAGED                                     
 Provider               parquet                                     
 Num Buckets            2                                           
@@ -458,8 +462,9 @@ d                           string
 # Detailed Table Information                                               
 Database               default                                     
 Table                  v                                           
-Created [not included in comparison]
+Created Time [not included in comparison]
 Last Access [not included in comparison]
+Created By [not included in comparison]
 Type                   VIEW                                        
 View Text              SELECT * FROM t                             
 View Default Database  default                                     
@@ -480,8 +485,9 @@ d                           string
 # Detailed Table Information                                               
 Database               default                                     
 Table                  v                                           
-Created [not included in comparison]
+Created Time [not included in comparison]
 Last Access [not included in comparison]
+Created By [not included in comparison]
 Type                   VIEW                                        
 View Text              SELECT * FROM t                             
 View Default Database  default                                     

http://git-wip-us.apache.org/repos/asf/spark/blob/2d799d08/sql/core/src/test/resources/sql-tests/results/show-tables.sql.out
----------------------------------------------------------------------
diff --git a/sql/core/src/test/resources/sql-tests/results/show-tables.sql.out 
b/sql/core/src/test/resources/sql-tests/results/show-tables.sql.out
index 8f2a54f..da729cd 100644
--- a/sql/core/src/test/resources/sql-tests/results/show-tables.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/show-tables.sql.out
@@ -119,8 +119,9 @@ SHOW TABLE EXTENDED LIKE 'show_t*'
 struct<database:string,tableName:string,isTemporary:boolean,information:string>
 -- !query 12 output
 show_t3        true    Table: show_t3
-Created [not included in comparison]
+Created Time [not included in comparison]
 Last Access [not included in comparison]
+Created By [not included in comparison]
 Type: VIEW
 Schema: root
  |-- e: integer (nullable = true)
@@ -128,8 +129,9 @@ Schema: root
 
 showdb show_t1 false   Database: showdb
 Table: show_t1
-Created [not included in comparison]
+Created Time [not included in comparison]
 Last Access [not included in comparison]
+Created By [not included in comparison]
 Type: MANAGED
 Provider: parquet
 Location [not included in comparison]sql/core/spark-warehouse/showdb.db/show_t1
@@ -144,8 +146,9 @@ Schema: root
 
 showdb show_t2 false   Database: showdb
 Table: show_t2
-Created [not included in comparison]
+Created Time [not included in comparison]
 Last Access [not included in comparison]
+Created By [not included in comparison]
 Type: MANAGED
 Provider: parquet
 Location [not included in comparison]sql/core/spark-warehouse/showdb.db/show_t2

http://git-wip-us.apache.org/repos/asf/spark/blob/2d799d08/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala
----------------------------------------------------------------------
diff --git 
a/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala 
b/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala
index d9130fd..aa000bd 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala
@@ -228,7 +228,8 @@ class SQLQueryTestSuite extends QueryTest with 
SharedSQLContext {
       // Get answer, but also get rid of the #1234 expression ids that show up 
in explain plans
       val answer = 
df.queryExecution.hiveResultString().map(_.replaceAll("#\\d+", "#x")
         .replaceAll("Location.*/sql/core/", s"Location 
${notIncludedMsg}sql/core/")
-        .replaceAll("Created.*", s"Created $notIncludedMsg")
+        .replaceAll("Created By.*", s"Created By $notIncludedMsg")
+        .replaceAll("Created Time.*", s"Created Time $notIncludedMsg")
         .replaceAll("Last Access.*", s"Last Access $notIncludedMsg"))
 
       // If the output is not pre-sorted, sort it.

http://git-wip-us.apache.org/repos/asf/spark/blob/2d799d08/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala
----------------------------------------------------------------------
diff --git 
a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala 
b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala
index 5c0a6aa..9332f77 100644
--- 
a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala
+++ 
b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala
@@ -68,6 +68,7 @@ class InMemoryCatalogedDDLSuite extends DDLSuite with 
SharedSQLContext with Befo
       provider = Some("parquet"),
       partitionColumnNames = Seq("a", "b"),
       createTime = 0L,
+      createVersion = org.apache.spark.SPARK_VERSION,
       tracksPartitionsInCatalog = true)
   }
 

http://git-wip-us.apache.org/repos/asf/spark/blob/2d799d08/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala
----------------------------------------------------------------------
diff --git 
a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala 
b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala
index 172317c..19e5f78 100644
--- 
a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala
+++ 
b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala
@@ -390,6 +390,9 @@ private[spark] class HiveExternalCatalog(conf: SparkConf, 
hadoopConf: Configurat
     val bucketSpec = table.bucketSpec
 
     val properties = new mutable.HashMap[String, String]
+
+    properties.put(CREATED_SPARK_VERSION, table.createVersion)
+
     // Serialized JSON schema string may be too long to be stored into a 
single metastore table
     // property. In this case, we split the JSON string and store each part as 
a separate table
     // property.
@@ -594,7 +597,8 @@ private[spark] class HiveExternalCatalog(conf: SparkConf, 
hadoopConf: Configurat
       // Set the `schema`, `partitionColumnNames` and `bucketSpec` from the 
old table definition,
       // to retain the spark specific format if it is.
       val propsFromOldTable = oldTableDef.properties.filter { case (k, v) =>
-        k.startsWith(DATASOURCE_PREFIX) || k.startsWith(STATISTICS_PREFIX)
+        k.startsWith(DATASOURCE_PREFIX) || k.startsWith(STATISTICS_PREFIX) ||
+          k.startsWith(CREATED_SPARK_VERSION)
       }
       val newTableProps = propsFromOldTable ++ tableDefinition.properties + 
partitionProviderProp
       val newDef = tableDefinition.copy(
@@ -700,6 +704,9 @@ private[spark] class HiveExternalCatalog(conf: SparkConf, 
hadoopConf: Configurat
         table = restoreDataSourceTable(table, provider)
     }
 
+    // Restore version info
+    val version: String = table.properties.getOrElse(CREATED_SPARK_VERSION, 
"2.2 or prior")
+
     // Restore Spark's statistics from information in Metastore.
     val statsProps = 
table.properties.filterKeys(_.startsWith(STATISTICS_PREFIX))
 
@@ -735,6 +742,7 @@ private[spark] class HiveExternalCatalog(conf: SparkConf, 
hadoopConf: Configurat
 
     // Get the original table properties as defined by the user.
     table.copy(
+      createVersion = version,
       properties = table.properties.filterNot { case (key, _) => 
key.startsWith(SPARK_SQL_PREFIX) })
   }
 
@@ -1208,6 +1216,8 @@ object HiveExternalCatalog {
   val TABLE_PARTITION_PROVIDER_CATALOG = "catalog"
   val TABLE_PARTITION_PROVIDER_FILESYSTEM = "filesystem"
 
+  val CREATED_SPARK_VERSION = SPARK_SQL_PREFIX + "create.version"
+
   /**
    * Returns the fully qualified name used in table properties for a 
particular column stat.
    * For example, for column "mycol", and "min" stat, this should return

http://git-wip-us.apache.org/repos/asf/spark/blob/2d799d08/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala
----------------------------------------------------------------------
diff --git 
a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala
 
b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala
index 5b62e37..0007d25 100644
--- 
a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala
+++ 
b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala
@@ -90,6 +90,7 @@ class HiveCatalogedDDLSuite extends DDLSuite with 
TestHiveSingleton with BeforeA
       provider = if (isDataSource) Some("parquet") else Some("hive"),
       partitionColumnNames = Seq("a", "b"),
       createTime = 0L,
+      createVersion = org.apache.spark.SPARK_VERSION,
       tracksPartitionsInCatalog = true)
   }
 


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

Reply via email to