This is an automated email from the ASF dual-hosted git repository. wenchen pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push: new d7d31d0 [SPARK-37827][SQL] Put the some built-in table properties into V1Table.propertie to adapt to V2 command d7d31d0 is described below commit d7d31d0606170847721e5c604ea7993fb202596f Author: PengLei <peng.8...@gmail.com> AuthorDate: Wed Jan 12 21:22:18 2022 +0800 [SPARK-37827][SQL] Put the some built-in table properties into V1Table.propertie to adapt to V2 command ### What changes were proposed in this pull request? Add V2 table build-in properties into V1Table.properties to adapt to V2 command. ### Why are the changes needed? [#discuss](https://github.com/apache/spark/pull/34773#issuecomment-1005857748) ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? Add ut testcase and existed ut testcase. Closes #35131 from Peng-Lei/SPARK-37827. Authored-by: PengLei <peng.8...@gmail.com> Signed-off-by: Wenchen Fan <wenc...@databricks.com> --- .../spark/sql/catalyst/catalog/interface.scala | 6 ++- .../spark/sql/connector/catalog/V1Table.scala | 20 +++++++++- .../sql-tests/results/charvarchar.sql.out | 9 ++--- .../resources/sql-tests/results/describe.sql.out | 6 +-- .../resources/sql-tests/results/group-by.sql.out | 2 +- .../results/postgreSQL/create_view.sql.out | 42 +++++++-------------- .../sql-tests/results/show-tables.sql.out | 1 - .../spark/sql/connector/DataSourceV2SQLSuite.scala | 33 +++++++++++++++++ .../command/ShowTblPropertiesSuiteBase.scala | 5 ++- .../datasources/v2/V2SessionCatalogSuite.scala | 43 +++++++++++++--------- 10 files changed, 103 insertions(+), 64 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala index 3dc5aca..70ccb06 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala @@ -386,7 +386,7 @@ case class CatalogTable( val tableProperties = properties .filterKeys(!_.startsWith(VIEW_PREFIX)) .toSeq.sortBy(_._1) - .map(p => p._1 + "=" + p._2).mkString("[", ", ", "]") + .map(p => p._1 + "=" + p._2) val partitionColumns = partitionColumnNames.map(quoteIdentifier).mkString("[", ", ", "]") val lastAccess = { if (lastAccessTime <= 0) "UNKNOWN" else new Date(lastAccessTime).toString @@ -414,7 +414,9 @@ case class CatalogTable( } } - if (properties.nonEmpty) map.put("Table Properties", tableProperties) + if (tableProperties.nonEmpty) { + map.put("Table Properties", tableProperties.mkString("[", ", ", "]")) + } stats.foreach(s => map.put("Statistics", s.simpleString)) map ++= storage.toLinkedHashMap if (tracksPartitionsInCatalog) map.put("Partition Provider", "Catalog") diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/V1Table.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/V1Table.scala index a8a1b9a..07f66a6 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/V1Table.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/V1Table.scala @@ -23,8 +23,9 @@ import scala.collection.JavaConverters._ import scala.collection.mutable import org.apache.spark.sql.catalyst.TableIdentifier -import org.apache.spark.sql.catalyst.catalog.CatalogTable +import org.apache.spark.sql.catalyst.catalog.{CatalogTable, CatalogTableType} import org.apache.spark.sql.catalyst.util.quoteIfNeeded +import org.apache.spark.sql.connector.catalog.V1Table.addV2TableProperties import org.apache.spark.sql.connector.expressions.{LogicalExpressions, Transform} import org.apache.spark.sql.types.StructType @@ -55,7 +56,7 @@ private[sql] case class V1Table(v1Table: CatalogTable) extends Table { } } - override lazy val properties: util.Map[String, String] = v1Table.properties.asJava + override lazy val properties: util.Map[String, String] = addV2TableProperties(v1Table).asJava override lazy val schema: StructType = v1Table.schema @@ -82,6 +83,21 @@ private[sql] case class V1Table(v1Table: CatalogTable) extends Table { override def toString: String = s"V1Table($name)" } +private[sql] object V1Table { + def addV2TableProperties(v1Table: CatalogTable): Map[String, String] = { + val external = v1Table.tableType == CatalogTableType.EXTERNAL + + v1Table.properties ++ + v1Table.storage.properties.map { case (key, value) => + TableCatalog.OPTION_PREFIX + key -> value } ++ + v1Table.provider.map(TableCatalog.PROP_PROVIDER -> _) ++ + v1Table.comment.map(TableCatalog.PROP_COMMENT -> _) ++ + v1Table.storage.locationUri.map(TableCatalog.PROP_LOCATION -> _.toString) ++ + (if (external) Some(TableCatalog.PROP_EXTERNAL -> "true") else None) ++ + Some(TableCatalog.PROP_OWNER -> v1Table.owner) + } +} + /** * A V2 table with V1 fallback support. This is used to fallback to V1 table when the V2 one * doesn't implement specific capabilities but V1 already has. diff --git a/sql/core/src/test/resources/sql-tests/results/charvarchar.sql.out b/sql/core/src/test/resources/sql-tests/results/charvarchar.sql.out index 00180ee..fcd207c 100644 --- a/sql/core/src/test/resources/sql-tests/results/charvarchar.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/charvarchar.sql.out @@ -193,8 +193,7 @@ Type VIEW View Text select * from char_tbl View Original Text select * from char_tbl View Catalog and Namespace spark_catalog.default -View Query Output Columns [c, v] -Table Properties [] +View Query Output Columns [c, v] -- !query @@ -342,8 +341,7 @@ Type VIEW View Text select * from char_tbl2 View Original Text select * from char_tbl2 View Catalog and Namespace spark_catalog.default -View Query Output Columns [c, v] -Table Properties [] +View Query Output Columns [c, v] -- !query @@ -459,8 +457,7 @@ Type VIEW View Text select * from char_tbl2 View Original Text select * from char_tbl2 View Catalog and Namespace spark_catalog.default -View Query Output Columns [c, v] -Table Properties [] +View Query Output Columns [c, v] -- !query diff --git a/sql/core/src/test/resources/sql-tests/results/describe.sql.out b/sql/core/src/test/resources/sql-tests/results/describe.sql.out index 8a2a59e..04259c0 100644 --- a/sql/core/src/test/resources/sql-tests/results/describe.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/describe.sql.out @@ -507,8 +507,7 @@ Type VIEW View Text SELECT * FROM t View Original Text SELECT * FROM t View Catalog and Namespace spark_catalog.default -View Query Output Columns [a, b, c, d] -Table Properties [] +View Query Output Columns [a, b, c, d] -- !query @@ -531,8 +530,7 @@ Type VIEW View Text SELECT * FROM t View Original Text SELECT * FROM t View Catalog and Namespace spark_catalog.default -View Query Output Columns [a, b, c, d] -Table Properties [] +View Query Output Columns [a, b, c, d] -- !query diff --git a/sql/core/src/test/resources/sql-tests/results/group-by.sql.out b/sql/core/src/test/resources/sql-tests/results/group-by.sql.out index dce5fd0..cd0fa48 100644 --- a/sql/core/src/test/resources/sql-tests/results/group-by.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/group-by.sql.out @@ -1,5 +1,5 @@ -- Automatically generated by SQLQueryTestSuite --- Number of queries: 74 +-- Number of queries: 76 -- !query diff --git a/sql/core/src/test/resources/sql-tests/results/postgreSQL/create_view.sql.out b/sql/core/src/test/resources/sql-tests/results/postgreSQL/create_view.sql.out index ab65a8f..a2064a4 100644 --- a/sql/core/src/test/resources/sql-tests/results/postgreSQL/create_view.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/postgreSQL/create_view.sql.out @@ -250,8 +250,7 @@ Type VIEW View Text SELECT * FROM base_table View Original Text SELECT * FROM base_table View Catalog and Namespace spark_catalog.temp_view_test -View Query Output Columns [a, id] -Table Properties [] +View Query Output Columns [a, id] -- !query @@ -306,8 +305,7 @@ Type VIEW View Text SELECT * FROM base_table View Original Text SELECT * FROM base_table View Catalog and Namespace spark_catalog.temp_view_test -View Query Output Columns [a, id] -Table Properties [] +View Query Output Columns [a, id] -- !query @@ -352,8 +350,7 @@ View Original Text SELECT t1.a AS t1_a, t2.a AS t2_a FROM base_table t1, base_table2 t2 WHERE t1.id = t2.id View Catalog and Namespace spark_catalog.temp_view_test -View Query Output Columns [t1_a, t2_a] -Table Properties [] +View Query Output Columns [t1_a, t2_a] -- !query @@ -406,8 +403,7 @@ Type VIEW View Text SELECT * FROM base_table WHERE id IN (SELECT id FROM base_table2) View Original Text SELECT * FROM base_table WHERE id IN (SELECT id FROM base_table2) View Catalog and Namespace spark_catalog.temp_view_test -View Query Output Columns [a, id] -Table Properties [] +View Query Output Columns [a, id] -- !query @@ -436,8 +432,7 @@ Type VIEW View Text SELECT t1.id, t2.a FROM base_table t1, (SELECT * FROM base_table2) t2 View Original Text SELECT t1.id, t2.a FROM base_table t1, (SELECT * FROM base_table2) t2 View Catalog and Namespace spark_catalog.temp_view_test -View Query Output Columns [id, a] -Table Properties [] +View Query Output Columns [id, a] -- !query @@ -466,8 +461,7 @@ Type VIEW View Text SELECT * FROM base_table WHERE EXISTS (SELECT 1 FROM base_table2) View Original Text SELECT * FROM base_table WHERE EXISTS (SELECT 1 FROM base_table2) View Catalog and Namespace spark_catalog.temp_view_test -View Query Output Columns [a, id] -Table Properties [] +View Query Output Columns [a, id] -- !query @@ -496,8 +490,7 @@ Type VIEW View Text SELECT * FROM base_table WHERE NOT EXISTS (SELECT 1 FROM base_table2) View Original Text SELECT * FROM base_table WHERE NOT EXISTS (SELECT 1 FROM base_table2) View Catalog and Namespace spark_catalog.temp_view_test -View Query Output Columns [a, id] -Table Properties [] +View Query Output Columns [a, id] -- !query @@ -526,8 +519,7 @@ Type VIEW View Text SELECT * FROM base_table WHERE EXISTS (SELECT 1) View Original Text SELECT * FROM base_table WHERE EXISTS (SELECT 1) View Catalog and Namespace spark_catalog.temp_view_test -View Query Output Columns [a, id] -Table Properties [] +View Query Output Columns [a, id] -- !query @@ -662,8 +654,7 @@ Type VIEW View Text SELECT * FROM t1 CROSS JOIN t2 View Original Text SELECT * FROM t1 CROSS JOIN t2 View Catalog and Namespace spark_catalog.testviewschm2 -View Query Output Columns [num, name, num2, value] -Table Properties [] +View Query Output Columns [num, name, num2, value] -- !query @@ -703,8 +694,7 @@ Type VIEW View Text SELECT * FROM t1 INNER JOIN t2 ON t1.num = t2.num2 View Original Text SELECT * FROM t1 INNER JOIN t2 ON t1.num = t2.num2 View Catalog and Namespace spark_catalog.testviewschm2 -View Query Output Columns [num, name, num2, value] -Table Properties [] +View Query Output Columns [num, name, num2, value] -- !query @@ -744,8 +734,7 @@ Type VIEW View Text SELECT * FROM t1 LEFT JOIN t2 ON t1.num = t2.num2 View Original Text SELECT * FROM t1 LEFT JOIN t2 ON t1.num = t2.num2 View Catalog and Namespace spark_catalog.testviewschm2 -View Query Output Columns [num, name, num2, value] -Table Properties [] +View Query Output Columns [num, name, num2, value] -- !query @@ -785,8 +774,7 @@ Type VIEW View Text SELECT * FROM t1 LEFT JOIN t2 ON t1.num = t2.num2 AND t2.value = 'xxx' View Original Text SELECT * FROM t1 LEFT JOIN t2 ON t1.num = t2.num2 AND t2.value = 'xxx' View Catalog and Namespace spark_catalog.testviewschm2 -View Query Output Columns [num, name, num2, value] -Table Properties [] +View Query Output Columns [num, name, num2, value] -- !query @@ -887,8 +875,7 @@ View Original Text SELECT * FROM tbl1 WHERE tbl1.a BETWEEN (SELECT d FROM tbl2 WHERE c = 1) AND (SELECT e FROM tbl3 WHERE f = 2) AND EXISTS (SELECT g FROM tbl4 LEFT JOIN tbl3 ON tbl4.h = tbl3.f) View Catalog and Namespace spark_catalog.testviewschm2 -View Query Output Columns [a, b] -Table Properties [] +View Query Output Columns [a, b] -- !query @@ -926,8 +913,7 @@ BETWEEN (SELECT d FROM tbl2 WHERE c = 1) AND (SELECT e FROM tbl3 WHERE f = 2) AND EXISTS (SELECT g FROM tbl4 LEFT JOIN tbl3 ON tbl4.h = tbl3.f) AND NOT EXISTS (SELECT g FROM tbl4 LEFT JOIN tmptbl ON tbl4.h = tmptbl.j) View Catalog and Namespace spark_catalog.testviewschm2 -View Query Output Columns [a, b] -Table Properties [] +View Query Output Columns [a, b] -- !query diff --git a/sql/core/src/test/resources/sql-tests/results/show-tables.sql.out b/sql/core/src/test/resources/sql-tests/results/show-tables.sql.out index 276989d..1390043 100644 --- a/sql/core/src/test/resources/sql-tests/results/show-tables.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/show-tables.sql.out @@ -127,7 +127,6 @@ Created Time [not included in comparison] Last Access [not included in comparison] Created By [not included in comparison] Type: VIEW -Table Properties: [] Schema: root |-- e: integer (nullable = true) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala index 1fcfd94..3667a10 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala @@ -2842,6 +2842,39 @@ class DataSourceV2SQLSuite } } + test("SPARK-37827: put build-in properties into V1Table.properties to adapt v2 command") { + val t = "tbl" + withTable(t) { + sql( + s""" + |CREATE TABLE $t ( + | a bigint, + | b bigint + |) + |using parquet + |OPTIONS ( + | from = 0, + | to = 1) + |COMMENT 'This is a comment' + |TBLPROPERTIES ('prop1' = '1', 'prop2' = '2') + |PARTITIONED BY (a) + |LOCATION '/tmp' + """.stripMargin) + + val table = spark.sessionState.catalogManager.v2SessionCatalog.asTableCatalog + .loadTable(Identifier.of(Array("default"), t)) + val properties = table.properties + assert(properties.get(TableCatalog.PROP_PROVIDER) == "parquet") + assert(properties.get(TableCatalog.PROP_COMMENT) == "This is a comment") + assert(properties.get(TableCatalog.PROP_LOCATION) == "file:/tmp") + assert(properties.containsKey(TableCatalog.PROP_OWNER)) + assert(properties.get(s"${TableCatalog.OPTION_PREFIX}from") == "0") + assert(properties.get(s"${TableCatalog.OPTION_PREFIX}to") == "1") + assert(properties.get("prop1") == "1") + assert(properties.get("prop2") == "2") + } + } + private def testNotSupportedV2Command(sqlCommand: String, sqlParams: String): Unit = { val e = intercept[AnalysisException] { sql(s"$sqlCommand $sqlParams") diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/ShowTblPropertiesSuiteBase.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/ShowTblPropertiesSuiteBase.scala index 7f9e927..fca1c68 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/ShowTblPropertiesSuiteBase.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/ShowTblPropertiesSuiteBase.scala @@ -42,8 +42,9 @@ trait ShowTblPropertiesSuiteBase extends QueryTest with DDLCommandTestUtils { val status = "new" spark.sql(s"CREATE TABLE $tbl (id bigint, data string) $defaultUsing " + s"TBLPROPERTIES ('user'='$user', 'status'='$status')") - - val properties = sql(s"SHOW TBLPROPERTIES $tbl").filter("key != 'transient_lastDdlTime'") + val properties = sql(s"SHOW TBLPROPERTIES $tbl") + .filter("key != 'transient_lastDdlTime'") + .filter("key != 'option.serialization.format'") val schema = new StructType() .add("key", StringType, nullable = false) .add("value", StringType, nullable = false) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/v2/V2SessionCatalogSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/v2/V2SessionCatalogSuite.scala index 7465ab6..86f4dc4 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/v2/V2SessionCatalogSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/v2/V2SessionCatalogSuite.scala @@ -122,7 +122,7 @@ class V2SessionCatalogTableSuite extends V2SessionCatalogBaseSuite { val parsed = CatalystSqlParser.parseMultipartIdentifier(table.name) assert(parsed == Seq("db", "test_table")) assert(table.schema == schema) - assert(table.properties.asScala == Map()) + assert(filterV2TableProperties(table.properties) == Map()) assert(catalog.tableExists(testIdent)) } @@ -140,7 +140,7 @@ class V2SessionCatalogTableSuite extends V2SessionCatalogBaseSuite { val parsed = CatalystSqlParser.parseMultipartIdentifier(table.name) assert(parsed == Seq("db", "test_table")) assert(table.schema == schema) - assert(table.properties == properties) + assert(filterV2TableProperties(table.properties).asJava == properties) assert(catalog.tableExists(testIdent)) } @@ -253,15 +253,15 @@ class V2SessionCatalogTableSuite extends V2SessionCatalogBaseSuite { val table = catalog.createTable(testIdent, schema, Array.empty, emptyProps) - assert(table.properties.asScala == Map()) + assert(filterV2TableProperties(table.properties) == Map()) val updated = catalog.alterTable(testIdent, TableChange.setProperty("prop-1", "1")) - assert(updated.properties.asScala == Map("prop-1" -> "1")) + assert(filterV2TableProperties(updated.properties) == Map("prop-1" -> "1")) val loaded = catalog.loadTable(testIdent) - assert(loaded.properties.asScala == Map("prop-1" -> "1")) + assert(filterV2TableProperties(loaded.properties) == Map("prop-1" -> "1")) - assert(table.properties.asScala == Map()) + assert(filterV2TableProperties(table.properties) == Map()) } test("alterTable: add property to existing") { @@ -272,15 +272,15 @@ class V2SessionCatalogTableSuite extends V2SessionCatalogBaseSuite { val table = catalog.createTable(testIdent, schema, Array.empty, properties) - assert(table.properties.asScala == Map("prop-1" -> "1")) + assert(filterV2TableProperties(table.properties) == Map("prop-1" -> "1")) val updated = catalog.alterTable(testIdent, TableChange.setProperty("prop-2", "2")) - assert(updated.properties.asScala == Map("prop-1" -> "1", "prop-2" -> "2")) + assert(filterV2TableProperties(updated.properties) == Map("prop-1" -> "1", "prop-2" -> "2")) val loaded = catalog.loadTable(testIdent) - assert(loaded.properties.asScala == Map("prop-1" -> "1", "prop-2" -> "2")) + assert(filterV2TableProperties(loaded.properties) == Map("prop-1" -> "1", "prop-2" -> "2")) - assert(table.properties.asScala == Map("prop-1" -> "1")) + assert(filterV2TableProperties(table.properties) == Map("prop-1" -> "1")) } test("alterTable: remove existing property") { @@ -291,15 +291,15 @@ class V2SessionCatalogTableSuite extends V2SessionCatalogBaseSuite { val table = catalog.createTable(testIdent, schema, Array.empty, properties) - assert(table.properties.asScala == Map("prop-1" -> "1")) + assert(filterV2TableProperties(table.properties) == Map("prop-1" -> "1")) val updated = catalog.alterTable(testIdent, TableChange.removeProperty("prop-1")) - assert(updated.properties.asScala == Map()) + assert(filterV2TableProperties(updated.properties) == Map()) val loaded = catalog.loadTable(testIdent) - assert(loaded.properties.asScala == Map()) + assert(filterV2TableProperties(loaded.properties) == Map()) - assert(table.properties.asScala == Map("prop-1" -> "1")) + assert(filterV2TableProperties(table.properties) == Map("prop-1" -> "1")) } test("alterTable: remove missing property") { @@ -307,15 +307,15 @@ class V2SessionCatalogTableSuite extends V2SessionCatalogBaseSuite { val table = catalog.createTable(testIdent, schema, Array.empty, emptyProps) - assert(table.properties.asScala == Map()) + assert(filterV2TableProperties(table.properties) == Map()) val updated = catalog.alterTable(testIdent, TableChange.removeProperty("prop-1")) - assert(updated.properties.asScala == Map()) + assert(filterV2TableProperties(updated.properties) == Map()) val loaded = catalog.loadTable(testIdent) - assert(loaded.properties.asScala == Map()) + assert(filterV2TableProperties(loaded.properties) == Map()) - assert(table.properties.asScala == Map()) + assert(filterV2TableProperties(table.properties) == Map()) } test("alterTable: add top-level column") { @@ -781,6 +781,13 @@ class V2SessionCatalogTableSuite extends V2SessionCatalogBaseSuite { assert(exc.message.contains(testIdentNewOtherDb.namespace.quoted)) assert(exc.message.contains("RENAME TABLE source and destination databases do not match")) } + + private def filterV2TableProperties( + properties: util.Map[String, String]): Map[String, String] = { + properties.asScala.filter(kv => !CatalogV2Util.TABLE_RESERVED_PROPERTIES.contains(kv._1)) + .filter(!_._1.startsWith(TableCatalog.OPTION_PREFIX)) + .filter(_._1 != TableCatalog.PROP_EXTERNAL).toMap + } } class V2SessionCatalogNamespaceSuite extends V2SessionCatalogBaseSuite { --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org