Repository: spark Updated Branches: refs/heads/master b78cf13bf -> 2d799d080
[SPARK-21504][SQL] Add spark version info into table metadata ## What changes were proposed in this pull request? This PR is to add the spark version info in the table metadata. When creating the table, this value is assigned. It can help users find which version of Spark was used to create the table. ## How was this patch tested? N/A Author: gatorsmile <gatorsm...@gmail.com> Closes #18709 from gatorsmile/addVersion. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/2d799d08 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/2d799d08 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/2d799d08 Branch: refs/heads/master Commit: 2d799d08081032828cc2c95cbf58a268653c7a05 Parents: b78cf13 Author: gatorsmile <gatorsm...@gmail.com> Authored: Wed Aug 9 08:46:25 2017 -0700 Committer: gatorsmile <gatorsm...@gmail.com> Committed: Wed Aug 9 08:46:25 2017 -0700 ---------------------------------------------------------------------- .../sql/catalyst/catalog/ExternalCatalog.scala | 4 +++- .../spark/sql/catalyst/catalog/interface.scala | 7 ++++++- .../spark/sql/catalyst/trees/TreeNodeSuite.scala | 4 +++- .../describe-table-after-alter-table.sql.out | 15 ++++++++++----- .../resources/sql-tests/results/describe.sql.out | 18 ++++++++++++------ .../sql-tests/results/show-tables.sql.out | 9 ++++++--- .../org/apache/spark/sql/SQLQueryTestSuite.scala | 3 ++- .../spark/sql/execution/command/DDLSuite.scala | 1 + .../spark/sql/hive/HiveExternalCatalog.scala | 12 +++++++++++- .../spark/sql/hive/execution/HiveDDLSuite.scala | 1 + 10 files changed, 55 insertions(+), 19 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/spark/blob/2d799d08/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalog.scala ---------------------------------------------------------------------- diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalog.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalog.scala index 6000d48..68644f4 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalog.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalog.scala @@ -106,8 +106,10 @@ abstract class ExternalCatalog final def createTable(tableDefinition: CatalogTable, ignoreIfExists: Boolean): Unit = { val db = tableDefinition.database val name = tableDefinition.identifier.table + val tableDefinitionWithVersion = + tableDefinition.copy(createVersion = org.apache.spark.SPARK_VERSION) postToAll(CreateTablePreEvent(db, name)) - doCreateTable(tableDefinition, ignoreIfExists) + doCreateTable(tableDefinitionWithVersion, ignoreIfExists) postToAll(CreateTableEvent(db, name)) } http://git-wip-us.apache.org/repos/asf/spark/blob/2d799d08/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala ---------------------------------------------------------------------- diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala index 9531456..f865106 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala @@ -205,6 +205,9 @@ case class BucketSpec( * configured. * @param ignoredProperties is a list of table properties that are used by the underlying table * but ignored by Spark SQL yet. + * @param createVersion records the version of Spark that created this table metadata. The default + * is an empty string. We expect it will be read from the catalog or filled by + * ExternalCatalog.createTable. For temporary views, the value will be empty. */ case class CatalogTable( identifier: TableIdentifier, @@ -217,6 +220,7 @@ case class CatalogTable( owner: String = "", createTime: Long = System.currentTimeMillis, lastAccessTime: Long = -1, + createVersion: String = "", properties: Map[String, String] = Map.empty, stats: Option[CatalogStatistics] = None, viewText: Option[String] = None, @@ -302,8 +306,9 @@ case class CatalogTable( identifier.database.foreach(map.put("Database", _)) map.put("Table", identifier.table) if (owner.nonEmpty) map.put("Owner", owner) - map.put("Created", new Date(createTime).toString) + map.put("Created Time", new Date(createTime).toString) map.put("Last Access", new Date(lastAccessTime).toString) + map.put("Created By", "Spark " + createVersion) map.put("Type", tableType.name) provider.foreach(map.put("Provider", _)) bucketSpec.foreach(map ++= _.toLinkedHashMap) http://git-wip-us.apache.org/repos/asf/spark/blob/2d799d08/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/trees/TreeNodeSuite.scala ---------------------------------------------------------------------- diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/trees/TreeNodeSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/trees/TreeNodeSuite.scala index 4fc947a..f83c637 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/trees/TreeNodeSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/trees/TreeNodeSuite.scala @@ -480,7 +480,8 @@ class TreeNodeSuite extends SparkFunSuite { CatalogTableType.MANAGED, CatalogStorageFormat.empty, StructType(StructField("a", IntegerType, true) :: Nil), - createTime = 0L), + createTime = 0L, + createVersion = "2.x"), JObject( "product-class" -> classOf[CatalogTable].getName, @@ -509,6 +510,7 @@ class TreeNodeSuite extends SparkFunSuite { "owner" -> "", "createTime" -> 0, "lastAccessTime" -> -1, + "createVersion" -> "2.x", "tracksPartitionsInCatalog" -> false, "properties" -> JNull, "unsupportedFeatures" -> List.empty[String], http://git-wip-us.apache.org/repos/asf/spark/blob/2d799d08/sql/core/src/test/resources/sql-tests/results/describe-table-after-alter-table.sql.out ---------------------------------------------------------------------- diff --git a/sql/core/src/test/resources/sql-tests/results/describe-table-after-alter-table.sql.out b/sql/core/src/test/resources/sql-tests/results/describe-table-after-alter-table.sql.out index 4bf4633..7873085 100644 --- a/sql/core/src/test/resources/sql-tests/results/describe-table-after-alter-table.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/describe-table-after-alter-table.sql.out @@ -23,8 +23,9 @@ d string # Detailed Table Information Database default Table table_with_comment -Created [not included in comparison] +Created Time [not included in comparison] Last Access [not included in comparison] +Created By [not included in comparison] Type MANAGED Provider parquet Comment added @@ -52,8 +53,9 @@ d string # Detailed Table Information Database default Table table_with_comment -Created [not included in comparison] +Created Time [not included in comparison] Last Access [not included in comparison] +Created By [not included in comparison] Type MANAGED Provider parquet Comment modified comment @@ -88,8 +90,9 @@ b int # Detailed Table Information Database default Table table_comment -Created [not included in comparison] +Created Time [not included in comparison] Last Access [not included in comparison] +Created By [not included in comparison] Type MANAGED Provider parquet Location [not included in comparison]sql/core/spark-warehouse/table_comment @@ -114,8 +117,9 @@ b int # Detailed Table Information Database default Table table_comment -Created [not included in comparison] +Created Time [not included in comparison] Last Access [not included in comparison] +Created By [not included in comparison] Type MANAGED Provider parquet Comment added comment @@ -141,8 +145,9 @@ b int # Detailed Table Information Database default Table table_comment -Created [not included in comparison] +Created Time [not included in comparison] Last Access [not included in comparison] +Created By [not included in comparison] Type MANAGED Provider parquet Location [not included in comparison]sql/core/spark-warehouse/table_comment http://git-wip-us.apache.org/repos/asf/spark/blob/2d799d08/sql/core/src/test/resources/sql-tests/results/describe.sql.out ---------------------------------------------------------------------- diff --git a/sql/core/src/test/resources/sql-tests/results/describe.sql.out b/sql/core/src/test/resources/sql-tests/results/describe.sql.out index e2b79e8..b91f2c0 100644 --- a/sql/core/src/test/resources/sql-tests/results/describe.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/describe.sql.out @@ -120,8 +120,9 @@ d string # Detailed Table Information Database default Table t -Created [not included in comparison] +Created Time [not included in comparison] Last Access [not included in comparison] +Created By [not included in comparison] Type MANAGED Provider parquet Num Buckets 2 @@ -151,8 +152,9 @@ d string # Detailed Table Information Database default Table t -Created [not included in comparison] +Created Time [not included in comparison] Last Access [not included in comparison] +Created By [not included in comparison] Type MANAGED Provider parquet Num Buckets 2 @@ -190,8 +192,9 @@ d string # Detailed Table Information Database default Table t -Created [not included in comparison] +Created Time [not included in comparison] Last Access [not included in comparison] +Created By [not included in comparison] Type MANAGED Provider parquet Num Buckets 2 @@ -228,8 +231,9 @@ d string # Detailed Table Information Database default Table t -Created [not included in comparison] +Created Time [not included in comparison] Last Access [not included in comparison] +Created By [not included in comparison] Type MANAGED Provider parquet Num Buckets 2 @@ -458,8 +462,9 @@ d string # Detailed Table Information Database default Table v -Created [not included in comparison] +Created Time [not included in comparison] Last Access [not included in comparison] +Created By [not included in comparison] Type VIEW View Text SELECT * FROM t View Default Database default @@ -480,8 +485,9 @@ d string # Detailed Table Information Database default Table v -Created [not included in comparison] +Created Time [not included in comparison] Last Access [not included in comparison] +Created By [not included in comparison] Type VIEW View Text SELECT * FROM t View Default Database default http://git-wip-us.apache.org/repos/asf/spark/blob/2d799d08/sql/core/src/test/resources/sql-tests/results/show-tables.sql.out ---------------------------------------------------------------------- diff --git a/sql/core/src/test/resources/sql-tests/results/show-tables.sql.out b/sql/core/src/test/resources/sql-tests/results/show-tables.sql.out index 8f2a54f..da729cd 100644 --- a/sql/core/src/test/resources/sql-tests/results/show-tables.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/show-tables.sql.out @@ -119,8 +119,9 @@ SHOW TABLE EXTENDED LIKE 'show_t*' struct<database:string,tableName:string,isTemporary:boolean,information:string> -- !query 12 output show_t3 true Table: show_t3 -Created [not included in comparison] +Created Time [not included in comparison] Last Access [not included in comparison] +Created By [not included in comparison] Type: VIEW Schema: root |-- e: integer (nullable = true) @@ -128,8 +129,9 @@ Schema: root showdb show_t1 false Database: showdb Table: show_t1 -Created [not included in comparison] +Created Time [not included in comparison] Last Access [not included in comparison] +Created By [not included in comparison] Type: MANAGED Provider: parquet Location [not included in comparison]sql/core/spark-warehouse/showdb.db/show_t1 @@ -144,8 +146,9 @@ Schema: root showdb show_t2 false Database: showdb Table: show_t2 -Created [not included in comparison] +Created Time [not included in comparison] Last Access [not included in comparison] +Created By [not included in comparison] Type: MANAGED Provider: parquet Location [not included in comparison]sql/core/spark-warehouse/showdb.db/show_t2 http://git-wip-us.apache.org/repos/asf/spark/blob/2d799d08/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala ---------------------------------------------------------------------- diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala index d9130fd..aa000bd 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala @@ -228,7 +228,8 @@ class SQLQueryTestSuite extends QueryTest with SharedSQLContext { // Get answer, but also get rid of the #1234 expression ids that show up in explain plans val answer = df.queryExecution.hiveResultString().map(_.replaceAll("#\\d+", "#x") .replaceAll("Location.*/sql/core/", s"Location ${notIncludedMsg}sql/core/") - .replaceAll("Created.*", s"Created $notIncludedMsg") + .replaceAll("Created By.*", s"Created By $notIncludedMsg") + .replaceAll("Created Time.*", s"Created Time $notIncludedMsg") .replaceAll("Last Access.*", s"Last Access $notIncludedMsg")) // If the output is not pre-sorted, sort it. http://git-wip-us.apache.org/repos/asf/spark/blob/2d799d08/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala ---------------------------------------------------------------------- diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala index 5c0a6aa..9332f77 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala @@ -68,6 +68,7 @@ class InMemoryCatalogedDDLSuite extends DDLSuite with SharedSQLContext with Befo provider = Some("parquet"), partitionColumnNames = Seq("a", "b"), createTime = 0L, + createVersion = org.apache.spark.SPARK_VERSION, tracksPartitionsInCatalog = true) } http://git-wip-us.apache.org/repos/asf/spark/blob/2d799d08/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala ---------------------------------------------------------------------- diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala index 172317c..19e5f78 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala @@ -390,6 +390,9 @@ private[spark] class HiveExternalCatalog(conf: SparkConf, hadoopConf: Configurat val bucketSpec = table.bucketSpec val properties = new mutable.HashMap[String, String] + + properties.put(CREATED_SPARK_VERSION, table.createVersion) + // Serialized JSON schema string may be too long to be stored into a single metastore table // property. In this case, we split the JSON string and store each part as a separate table // property. @@ -594,7 +597,8 @@ private[spark] class HiveExternalCatalog(conf: SparkConf, hadoopConf: Configurat // Set the `schema`, `partitionColumnNames` and `bucketSpec` from the old table definition, // to retain the spark specific format if it is. val propsFromOldTable = oldTableDef.properties.filter { case (k, v) => - k.startsWith(DATASOURCE_PREFIX) || k.startsWith(STATISTICS_PREFIX) + k.startsWith(DATASOURCE_PREFIX) || k.startsWith(STATISTICS_PREFIX) || + k.startsWith(CREATED_SPARK_VERSION) } val newTableProps = propsFromOldTable ++ tableDefinition.properties + partitionProviderProp val newDef = tableDefinition.copy( @@ -700,6 +704,9 @@ private[spark] class HiveExternalCatalog(conf: SparkConf, hadoopConf: Configurat table = restoreDataSourceTable(table, provider) } + // Restore version info + val version: String = table.properties.getOrElse(CREATED_SPARK_VERSION, "2.2 or prior") + // Restore Spark's statistics from information in Metastore. val statsProps = table.properties.filterKeys(_.startsWith(STATISTICS_PREFIX)) @@ -735,6 +742,7 @@ private[spark] class HiveExternalCatalog(conf: SparkConf, hadoopConf: Configurat // Get the original table properties as defined by the user. table.copy( + createVersion = version, properties = table.properties.filterNot { case (key, _) => key.startsWith(SPARK_SQL_PREFIX) }) } @@ -1208,6 +1216,8 @@ object HiveExternalCatalog { val TABLE_PARTITION_PROVIDER_CATALOG = "catalog" val TABLE_PARTITION_PROVIDER_FILESYSTEM = "filesystem" + val CREATED_SPARK_VERSION = SPARK_SQL_PREFIX + "create.version" + /** * Returns the fully qualified name used in table properties for a particular column stat. * For example, for column "mycol", and "min" stat, this should return http://git-wip-us.apache.org/repos/asf/spark/blob/2d799d08/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala ---------------------------------------------------------------------- diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala index 5b62e37..0007d25 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala @@ -90,6 +90,7 @@ class HiveCatalogedDDLSuite extends DDLSuite with TestHiveSingleton with BeforeA provider = if (isDataSource) Some("parquet") else Some("hive"), partitionColumnNames = Seq("a", "b"), createTime = 0L, + createVersion = org.apache.spark.SPARK_VERSION, tracksPartitionsInCatalog = true) } --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org