This is an automated email from the ASF dual-hosted git repository. wenchen pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push: new 617ac1aec748 [SPARK-48031] Decompose viewSchemaMode config, add SHOW CREATE TABLE support 617ac1aec748 is described below commit 617ac1aec7481d6063af539b02980692e98beb70 Author: Serge Rielau <se...@rielau.com> AuthorDate: Mon May 20 16:01:24 2024 +0800 [SPARK-48031] Decompose viewSchemaMode config, add SHOW CREATE TABLE support ### What changes were proposed in this pull request? We separate enablement of WITH SCHEMA ... clause from the change in default from SCHEMA BINDING to SCHEMA COMPENSATION. This allows user to upgrade in two steps: 1. Enable the feature, and deal with DESCRIBE EXTENDED. 2. Get their affairs in order by ALTER VIEW to SCHEMA BINDING for those views they aim to keep in that mode 3. Switch the default. ### Why are the changes needed? It allows customers to upgrade more safely. ### Does this PR introduce _any_ user-facing change? Yes ### How was this patch tested? Added more tests ### Was this patch authored or co-authored using generative AI tooling? No Closes #46652 from srielau/SPARK-48031-view-evolutiion-part2. Lead-authored-by: Serge Rielau <se...@rielau.com> Co-authored-by: Wenchen Fan <cloud0...@gmail.com> Signed-off-by: Wenchen Fan <wenc...@databricks.com> --- docs/sql-migration-guide.md | 3 +- .../sql/catalyst/catalog/SessionCatalog.scala | 6 +- .../spark/sql/catalyst/catalog/interface.scala | 6 +- .../spark/sql/catalyst/parser/AstBuilder.scala | 14 +- .../org/apache/spark/sql/internal/SQLConf.scala | 26 ++- .../spark/sql/execution/command/tables.scala | 7 + .../view-schema-binding-config.sql.out | 166 +++++++++++++-- .../analyzer-results/view-schema-binding.sql.out | 24 +-- .../inputs/view-schema-binding-config.sql | 52 +++-- .../sql-tests/inputs/view-schema-binding.sql | 2 +- .../sql-tests/results/charvarchar.sql.out | 1 + .../sql-tests/results/show-create-table.sql.out | 6 + .../results/view-schema-binding-config.sql.out | 231 ++++++++++++++++++--- .../sql-tests/results/view-schema-binding.sql.out | 25 +-- .../apache/spark/sql/execution/SQLViewSuite.scala | 2 +- .../spark/sql/execution/SQLViewTestSuite.scala | 7 +- 16 files changed, 453 insertions(+), 125 deletions(-) diff --git a/docs/sql-migration-guide.md b/docs/sql-migration-guide.md index 15205e9284cd..02a4fae5d262 100644 --- a/docs/sql-migration-guide.md +++ b/docs/sql-migration-guide.md @@ -54,7 +54,8 @@ license: | - Since Spark 4.0, The default value for `spark.sql.legacy.ctePrecedencePolicy` has been changed from `EXCEPTION` to `CORRECTED`. Instead of raising an error, inner CTE definitions take precedence over outer definitions. - Since Spark 4.0, The default value for `spark.sql.legacy.timeParserPolicy` has been changed from `EXCEPTION` to `CORRECTED`. Instead of raising an `INCONSISTENT_BEHAVIOR_CROSS_VERSION` error, `CANNOT_PARSE_TIMESTAMP` will be raised if ANSI mode is enable. `NULL` will be returned if ANSI mode is disabled. See [Datetime Patterns for Formatting and Parsing](sql-ref-datetime-pattern.html). - Since Spark 4.0, A bug falsely allowing `!` instead of `NOT` when `!` is not a prefix operator has been fixed. Clauses such as `expr ! IN (...)`, `expr ! BETWEEN ...`, or `col ! NULL` now raise syntax errors. To restore the previous behavior, set `spark.sql.legacy.bangEqualsNot` to `true`. -- Since Spark 4.0, Views allow control over how they react to underlying query changes. By default views tolerate column type changes in the query and compensate with casts. To restore the previous behavior, allowing up-casts only, set `spark.sql.viewSchemaBindingMode` to `DISABLED`. This disables the feature and also disallows the `WITH SCHEMA` clause. +- Since Spark 4.0, By default views tolerate column type changes in the query and compensate with casts. To restore the previous behavior, allowing up-casts only, set `spark.sql.legacy.viewSchemaCompensation` to `false`. +- Since Spark 4.0, Views allow control over how they react to underlying query changes. By default views tolerate column type changes in the query and compensate with casts. To disable thsi feature set `spark.sql.legacy.viewSchemaBindingMode` to `false`. This also removes the clause from `DESCRIBE EXTENDED` and `SHOW CREATE TABLE`. ## Upgrading from Spark SQL 3.5.1 to 3.5.2 diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala index 96883afcfc5c..dbf2102a183a 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala @@ -940,7 +940,11 @@ class SessionCatalog( ** For schema (type) evolution, we take the column as is. */ case SchemaBinding => UpCast(col, toField.dataType) - case SchemaUnsupported => UpCast(col, toField.dataType) + case SchemaUnsupported => if (conf.viewSchemaCompensation) { + Cast(col, toField.dataType, ansiEnabled = true) + } else { + UpCast(col, toField.dataType) + } case SchemaCompensation => Cast(col, toField.dataType, ansiEnabled = true) case SchemaTypeEvolution => col case other => throw SparkException.internalError("Unexpected ViewSchemaMode") diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala index 333a84c62b4b..28f8d5cf2608 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala @@ -403,10 +403,10 @@ case class CatalogTable( /** * Return the schema binding mode. Defaults to SchemaCompensation if not a view or an older - * version, unless the viewSchemaBindingMode config is set to DISABLED + * version, unless the viewSchemaBindingMode config is set to false */ def viewSchemaMode: ViewSchemaMode = { - if (SQLConf.get.viewSchemaBindingMode == "DISABLED") { + if (!SQLConf.get.viewSchemaBindingEnabled) { SchemaUnsupported } else { val schemaMode = properties.getOrElse(VIEW_SCHEMA_MODE, SchemaCompensation.toString) @@ -510,7 +510,7 @@ case class CatalogTable( if (tableType == CatalogTableType.VIEW) { viewText.foreach(map.put("View Text", _)) viewOriginalText.foreach(map.put("View Original Text", _)) - if (SQLConf.get.viewSchemaBindingMode != "DISABLED") { + if (SQLConf.get.viewSchemaBindingEnabled) { map.put("View Schema Mode", viewSchemaMode.toString) } if (viewCatalogAndNamespace.nonEmpty) { diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala index 083ccc5f3302..b6816f5bb292 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala @@ -5023,19 +5023,23 @@ class AstBuilder extends DataTypeAstBuilder with SQLConfHelper with Logging { override def visitSchemaBinding(ctx: SchemaBindingContext): ViewSchemaMode = { if (ctx == null) { // No schema binding specified, return the session default - if (conf.viewSchemaBindingMode == "COMPENSATION") { - SchemaCompensation + if (conf.viewSchemaBindingEnabled) { + if (conf.viewSchemaCompensation) { + SchemaCompensation + } else { + SchemaBinding + } } else { SchemaUnsupported } - } else if (conf.viewSchemaBindingMode == "DISABLED") { + } else if (!conf.viewSchemaBindingEnabled) { // If the feature is disabled, throw an exception withOrigin(ctx) { throw new ParseException( errorClass = "FEATURE_NOT_ENABLED", messageParameters = Map("featureName" -> "VIEW ... WITH SCHEMA ...", - "configKey" -> "spark.sql.viewSchemaBindingMode", - "configValue" -> "COMPENSATION"), + "configKey" -> "spark.sql.legacy.viewSchemaBindingMode", + "configValue" -> "true"), ctx) } } else if (ctx.COMPENSATION != null) { diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala index afae4ebb5395..6df49df272ce 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala @@ -1700,15 +1700,21 @@ object SQLConf { .booleanConf .createWithDefault(true) - val VIEW_SCHEMA_BINDING_MODE = buildConf("spark.sql.viewSchemaBindingMode") - .doc("Set to DISABLE to disable the WITH SCHEMA clause for view DDL and suppress the line in " + - " DESCRIBE EXTENDED. The default, and only other value, is COMPENSATION. Views without " + - " WITH SCHEMA clause are defaulted to WITH SCHEMA COMPENSATION.") + val VIEW_SCHEMA_BINDING_ENABLED = buildConf("spark.sql.legacy.viewSchemaBindingMode") + .internal() + .doc("Set to false to disable the WITH SCHEMA clause for view DDL and suppress the line in " + + "DESCRIBE EXTENDED and SHOW CREATE TABLE.") .version("4.0.0") - .stringConf - .transform(_.toUpperCase(Locale.ROOT)) - .checkValues(Set("COMPENSATION", "DISABLED")) - .createWithDefault("COMPENSATION") + .booleanConf + .createWithDefault(true) + + val VIEW_SCHEMA_COMPENSATION = buildConf("spark.sql.legacy.viewSchemaCompensation") + .internal() + .doc("Set to false to revert default view schema binding mode from WITH SCHEMA COMPENSATION " + + "to WITH SCHEMA BINDING.") + .version("4.0.0") + .booleanConf + .createWithDefault(true) // The output committer class used by data sources. The specified class needs to be a // subclass of org.apache.hadoop.mapreduce.OutputCommitter. @@ -5524,7 +5530,9 @@ class SQLConf extends Serializable with Logging with SqlApiConf { def groupByAliases: Boolean = getConf(GROUP_BY_ALIASES) - def viewSchemaBindingMode: String = getConf(VIEW_SCHEMA_BINDING_MODE) + def viewSchemaBindingEnabled: Boolean = getConf(VIEW_SCHEMA_BINDING_ENABLED) + + def viewSchemaCompensation: Boolean = getConf(VIEW_SCHEMA_COMPENSATION) def defaultCacheStorageLevel: StorageLevel = StorageLevel.fromString(getConf(DEFAULT_CACHE_STORAGE_LEVEL)) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala index 990b7da339a2..ee0074dfe61b 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala @@ -1113,6 +1113,7 @@ trait ShowCreateTableCommandBase extends SQLConfHelper { showViewDataColumns(metadata, builder) showTableComment(metadata, builder) showViewProperties(metadata, builder) + showViewSchemaBinding(metadata, builder) showViewText(metadata, builder) } @@ -1142,6 +1143,12 @@ trait ShowCreateTableCommandBase extends SQLConfHelper { } } + private def showViewSchemaBinding(metadata: CatalogTable, builder: StringBuilder): Unit = { + if (SQLConf.get.viewSchemaBindingEnabled) { + builder ++= s"WITH SCHEMA ${metadata.viewSchemaMode.toString}\n" + } + } + private def showViewText(metadata: CatalogTable, builder: StringBuilder): Unit = { builder ++= metadata.viewText.mkString("AS ", "", "\n") } diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/view-schema-binding-config.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/view-schema-binding-config.sql.out index ba4f643ffdef..616eb27bdbc7 100644 --- a/sql/core/src/test/resources/sql-tests/analyzer-results/view-schema-binding-config.sql.out +++ b/sql/core/src/test/resources/sql-tests/analyzer-results/view-schema-binding-config.sql.out @@ -1,21 +1,14 @@ -- Automatically generated by SQLQueryTestSuite -- !query -SET spark.sql.viewSchemaBindingMode +SET spark.sql.legacy.viewSchemaBindingMode -- !query analysis -SetCommand (spark.sql.viewSchemaBindingMode,None) +SetCommand (spark.sql.legacy.viewSchemaBindingMode,None) -- !query -SET spark.sql.viewSchemaBindingMode = EVOLUTION +SET spark.sql.legacy.viewSchemaBindingMode = false -- !query analysis -java.lang.IllegalArgumentException -The value of spark.sql.viewSchemaBindingMode should be one of COMPENSATION, DISABLED, but was EVOLUTION - - --- !query -SET spark.sql.viewSchemaBindingMode = DISABLED --- !query analysis -SetCommand (spark.sql.viewSchemaBindingMode,Some(DISABLED)) +SetCommand (spark.sql.legacy.viewSchemaBindingMode,Some(false)) -- !query @@ -26,8 +19,8 @@ org.apache.spark.sql.catalyst.parser.ParseException "errorClass" : "FEATURE_NOT_ENABLED", "sqlState" : "56038", "messageParameters" : { - "configKey" : "spark.sql.viewSchemaBindingMode", - "configValue" : "COMPENSATION", + "configKey" : "spark.sql.legacy.viewSchemaBindingMode", + "configValue" : "true", "featureName" : "VIEW ... WITH SCHEMA ..." }, "queryContext" : [ { @@ -48,8 +41,8 @@ org.apache.spark.sql.catalyst.parser.ParseException "errorClass" : "FEATURE_NOT_ENABLED", "sqlState" : "56038", "messageParameters" : { - "configKey" : "spark.sql.viewSchemaBindingMode", - "configValue" : "COMPENSATION", + "configKey" : "spark.sql.legacy.viewSchemaBindingMode", + "configValue" : "true", "featureName" : "VIEW ... WITH SCHEMA ..." }, "queryContext" : [ { @@ -70,8 +63,8 @@ org.apache.spark.sql.catalyst.parser.ParseException "errorClass" : "FEATURE_NOT_ENABLED", "sqlState" : "56038", "messageParameters" : { - "configKey" : "spark.sql.viewSchemaBindingMode", - "configValue" : "COMPENSATION", + "configKey" : "spark.sql.legacy.viewSchemaBindingMode", + "configValue" : "true", "featureName" : "VIEW ... WITH SCHEMA ..." }, "queryContext" : [ { @@ -92,8 +85,8 @@ org.apache.spark.sql.catalyst.parser.ParseException "errorClass" : "FEATURE_NOT_ENABLED", "sqlState" : "56038", "messageParameters" : { - "configKey" : "spark.sql.viewSchemaBindingMode", - "configValue" : "COMPENSATION", + "configKey" : "spark.sql.legacy.viewSchemaBindingMode", + "configValue" : "true", "featureName" : "VIEW ... WITH SCHEMA ..." }, "queryContext" : [ { @@ -126,6 +119,12 @@ SHOW TABLE EXTENDED LIKE 'v' ShowTablesCommand default, v, [namespace#x, tableName#x, isTemporary#x, information#x], true +-- !query +SHOW CREATE TABLE v +-- !query analysis +ShowCreateTableCommand `spark_catalog`.`default`.`v`, [createtab_stmt#x] + + -- !query DROP VIEW IF EXISTS v -- !query analysis @@ -198,6 +197,12 @@ DESCRIBE EXTENDED v DescribeTableCommand `spark_catalog`.`default`.`v`, true, [col_name#x, data_type#x, comment#x] +-- !query +SHOW CREATE TABLE v +-- !query analysis +ShowCreateTableCommand `spark_catalog`.`default`.`v`, [createtab_stmt#x] + + -- !query DROP TABLE IF EXISTS t -- !query analysis @@ -211,6 +216,109 @@ CREATE TABLE t(c1 BIGINT NOT NULL) USING PARQUET CreateDataSourceTableCommand `spark_catalog`.`default`.`t`, false +-- !query +SELECT * FROM v +-- !query analysis +Project [c1#x] ++- SubqueryAlias spark_catalog.default.v + +- View (`spark_catalog`.`default`.`v`, [c1#x]) + +- Project [cast(c1#xL as int) AS c1#x] + +- Project [c1#xL] + +- SubqueryAlias spark_catalog.default.t + +- Relation spark_catalog.default.t[c1#xL] parquet + + +-- !query +DESCRIBE EXTENDED v +-- !query analysis +DescribeTableCommand `spark_catalog`.`default`.`v`, true, [col_name#x, data_type#x, comment#x] + + +-- !query +SET spark.sql.legacy.viewSchemaBindingMode = true +-- !query analysis +SetCommand (spark.sql.legacy.viewSchemaBindingMode,Some(true)) + + +-- !query +SET spark.sql.legacy.viewSchemaCompensation = false +-- !query analysis +SetCommand (spark.sql.legacy.viewSchemaCompensation,Some(false)) + + +-- !query +SET spark.sql.ansi.enabled = false +-- !query analysis +SetCommand (spark.sql.ansi.enabled,Some(false)) + + +-- !query +DROP TABLE IF EXISTS t +-- !query analysis +DropTable true, false ++- ResolvedIdentifier V2SessionCatalog(spark_catalog), default.t + + +-- !query +CREATE TABLE t(c1 INT NOT NULL) USING PARQUET +-- !query analysis +CreateDataSourceTableCommand `spark_catalog`.`default`.`t`, false + + +-- !query +CREATE OR REPLACE VIEW v AS SELECT * FROM t +-- !query analysis +CreateViewCommand `spark_catalog`.`default`.`v`, SELECT * FROM t, false, true, PersistedView, BINDING, true + +- Project [c1#x] + +- SubqueryAlias spark_catalog.default.t + +- Relation spark_catalog.default.t[c1#x] parquet + + +-- !query +SELECT * FROM v +-- !query analysis +Project [c1#x] ++- SubqueryAlias spark_catalog.default.v + +- View (`spark_catalog`.`default`.`v`, [c1#x]) + +- Project [cast(c1#x as int) AS c1#x] + +- Project [c1#x] + +- SubqueryAlias spark_catalog.default.t + +- Relation spark_catalog.default.t[c1#x] parquet + + +-- !query +DESCRIBE EXTENDED v +-- !query analysis +DescribeTableCommand `spark_catalog`.`default`.`v`, true, [col_name#x, data_type#x, comment#x] + + +-- !query +SHOW CREATE TABLE v +-- !query analysis +ShowCreateTableCommand `spark_catalog`.`default`.`v`, [createtab_stmt#x] + + +-- !query +DROP TABLE t +-- !query analysis +DropTable false, false ++- ResolvedIdentifier V2SessionCatalog(spark_catalog), default.t + + +-- !query +CREATE TABLE t(c1 BIGINT NOT NULL) USING PARQUET +-- !query analysis +CreateDataSourceTableCommand `spark_catalog`.`default`.`t`, false + + +-- !query +INSERT INTO t VALUES (1) +-- !query analysis +InsertIntoHadoopFsRelationCommand file:[not included in comparison]/{warehouse_dir}/t, false, Parquet, [path=file:[not included in comparison]/{warehouse_dir}/t], Append, `spark_catalog`.`default`.`t`, org.apache.spark.sql.execution.datasources.InMemoryFileIndex(file:[not included in comparison]/{warehouse_dir}/t), [c1] ++- Project [cast(col1#x as bigint) AS c1#xL] + +- LocalRelation [col1#x] + + -- !query SELECT * FROM v -- !query analysis @@ -234,15 +342,15 @@ DescribeTableCommand `spark_catalog`.`default`.`v`, true, [col_name#x, data_type -- !query -SET spark.sql.viewSchemaBindingMode = COMPENSATION +SHOW CREATE TABLE v -- !query analysis -SetCommand (spark.sql.viewSchemaBindingMode,Some(COMPENSATION)) +ShowCreateTableCommand `spark_catalog`.`default`.`v`, [createtab_stmt#x] -- !query -SET spark.sql.ansi.enabled = false +SET spark.sql.legacy.viewSchemaCompensation = true -- !query analysis -SetCommand (spark.sql.ansi.enabled,Some(false)) +SetCommand (spark.sql.legacy.viewSchemaCompensation,Some(true)) -- !query @@ -285,6 +393,12 @@ DESCRIBE EXTENDED v DescribeTableCommand `spark_catalog`.`default`.`v`, true, [col_name#x, data_type#x, comment#x] +-- !query +SHOW CREATE TABLE v +-- !query analysis +ShowCreateTableCommand `spark_catalog`.`default`.`v`, [createtab_stmt#x] + + -- !query DROP TABLE t -- !query analysis @@ -324,6 +438,12 @@ DESCRIBE EXTENDED v DescribeTableCommand `spark_catalog`.`default`.`v`, true, [col_name#x, data_type#x, comment#x] +-- !query +SHOW CREATE TABLE v +-- !query analysis +ShowCreateTableCommand `spark_catalog`.`default`.`v`, [createtab_stmt#x] + + -- !query DROP TABLE IF EXISTS t -- !query analysis diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/view-schema-binding.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/view-schema-binding.sql.out index 5a55f4d76b64..75cae1f19d46 100644 --- a/sql/core/src/test/resources/sql-tests/analyzer-results/view-schema-binding.sql.out +++ b/sql/core/src/test/resources/sql-tests/analyzer-results/view-schema-binding.sql.out @@ -151,9 +151,9 @@ DescribeTableCommand `spark_catalog`.`default`.`v`, true, [col_name#x, data_type -- !query -SET spark.sql.viewSchemaBindingMode=DISABLED +SET spark.sql.legacy.viewSchemaCompensation=false -- !query analysis -SetCommand (spark.sql.viewSchemaBindingMode,Some(DISABLED)) +SetCommand (spark.sql.legacy.viewSchemaCompensation,Some(false)) -- !query @@ -172,7 +172,7 @@ CreateDataSourceTableCommand `spark_catalog`.`default`.`t`, false -- !query CREATE OR REPLACE VIEW v AS SELECT * FROM t -- !query analysis -CreateViewCommand `spark_catalog`.`default`.`v`, SELECT * FROM t, false, true, PersistedView, UNSUPPORTED, true +CreateViewCommand `spark_catalog`.`default`.`v`, SELECT * FROM t, false, true, PersistedView, BINDING, true +- Project [c1#x] +- SubqueryAlias spark_catalog.default.t +- Relation spark_catalog.default.t[c1#x] parquet @@ -199,23 +199,7 @@ DescribeTableCommand `spark_catalog`.`default`.`v`, true, [col_name#x, data_type -- !query ALTER VIEW v WITH SCHEMA BINDING -- !query analysis -org.apache.spark.sql.catalyst.parser.ParseException -{ - "errorClass" : "FEATURE_NOT_ENABLED", - "sqlState" : "56038", - "messageParameters" : { - "configKey" : "spark.sql.viewSchemaBindingMode", - "configValue" : "COMPENSATION", - "featureName" : "VIEW ... WITH SCHEMA ..." - }, - "queryContext" : [ { - "objectType" : "", - "objectName" : "", - "startIndex" : 14, - "stopIndex" : 32, - "fragment" : "WITH SCHEMA BINDING" - } ] -} +AlterViewSchemaBindingCommand `spark_catalog`.`default`.`v`, BINDING -- !query diff --git a/sql/core/src/test/resources/sql-tests/inputs/view-schema-binding-config.sql b/sql/core/src/test/resources/sql-tests/inputs/view-schema-binding-config.sql index fccdcb920fd3..3a1c676cbc9c 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/view-schema-binding-config.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/view-schema-binding-config.sql @@ -1,14 +1,11 @@ -- This test suits check the spark.sql.viewSchemaBindingMode configuration. -- It can be DISABLED and COMPENSATION --- Verify the default binding is COMPENSATION -SET spark.sql.viewSchemaBindingMode; - --- Verify which values are allowed -SET spark.sql.viewSchemaBindingMode = EVOLUTION; +-- Verify the default binding is true +SET spark.sql.legacy.viewSchemaBindingMode; -- 1. Test DISABLED mode. -SET spark.sql.viewSchemaBindingMode = DISABLED; +SET spark.sql.legacy.viewSchemaBindingMode = false; -- 1.a Attempts to use the SCHEMA BINDING clause fail with FEATURE_NOT_ENABLED CREATE OR REPLACE VIEW v WITH SCHEMA BINDING AS SELECT 1; @@ -16,10 +13,11 @@ CREATE OR REPLACE VIEW v WITH SCHEMA COMPENSATION AS SELECT 1; CREATE OR REPLACE VIEW v WITH SCHEMA TYPE EVOLUTION AS SELECT 1; CREATE OR REPLACE VIEW v WITH SCHEMA EVOLUTION AS SELECT 1; --- 1.b Existing SHOW and DESCRIBE should behave as before Spark 4.4.0 +-- 1.b Existing SHOW and DESCRIBE should behave as before Spark 4.0.0 CREATE OR REPLACE VIEW v AS SELECT 1; DESCRIBE EXTENDED v; SHOW TABLE EXTENDED LIKE 'v'; +SHOW CREATE TABLE v; DROP VIEW IF EXISTS v; CREATE OR REPLACE TEMPORARY VIEW v AS SELECT 1; @@ -34,6 +32,7 @@ CREATE OR REPLACE VIEW v AS SELECT * FROM t; SELECT * FROM v; -- Baseline: v(c1 INT); DESCRIBE EXTENDED v; +SHOW CREATE TABLE v; -- Widen the column c1 in t DROP TABLE IF EXISTS t; @@ -44,29 +43,56 @@ SELECT * FROM v; -- The view still describes as v(c1 INT); DESCRIBE EXTENDED v; --- 2. Test COMPENSATION mode. In this mode Spark tolerates any supported CAST, not just up cast -SET spark.sql.viewSchemaBindingMode = COMPENSATION; +-- 2. Test true mode. In this mode Spark tolerates any supported CAST, not just up cast +SET spark.sql.legacy.viewSchemaBindingMode = true; +SET spark.sql.legacy.viewSchemaCompensation = false; -- To verify ANSI_MODE is enforced even if ANSI_MODE is turned off. SET spark.sql.ansi.enabled = false; --- 2.a In COMPENSATION views get invalidated if the type can't cast +-- 2.a In BINDING views get invalidated if the type can't cast DROP TABLE IF EXISTS t; CREATE TABLE t(c1 INT NOT NULL) USING PARQUET; CREATE OR REPLACE VIEW v AS SELECT * FROM t; SELECT * FROM v; -- Baseline: v(c1 INT); DESCRIBE EXTENDED v; +SHOW CREATE TABLE v; -- Widen the column c1 in t DROP TABLE t; CREATE TABLE t(c1 BIGINT NOT NULL) USING PARQUET; INSERT INTO t VALUES (1); + +-- This fails +SELECT * FROM v; +-- The view still describes as v(c1 BIGINT) +DESCRIBE EXTENDED v; +SHOW CREATE TABLE v; + +-- 2.b Switch to default COMPENSATION +SET spark.sql.legacy.viewSchemaCompensation = true; + +DROP TABLE IF EXISTS t; +CREATE TABLE t(c1 INT NOT NULL) USING PARQUET; +CREATE OR REPLACE VIEW v AS SELECT * FROM t; +SELECT * FROM v; +-- Baseline: v(c1 INT); +DESCRIBE EXTENDED v; +SHOW CREATE TABLE v; + +-- Widen the column c1 in t +DROP TABLE t; +CREATE TABLE t(c1 BIGINT NOT NULL) USING PARQUET; +INSERT INTO t VALUES (1); + +-- This now succeeds SELECT * FROM v; -- The view still describes as v(c1 BIGINT) DESCRIBE EXTENDED v; +SHOW CREATE TABLE v; --- 2.b In COMPENSATION views ignore added columns and change the type +-- 2.c In COMPENSATION views ignore added columns and change the type -- Expect the added column to be ignore, but the type will be tolerated, as long as it can cast DROP TABLE IF EXISTS t; CREATE TABLE t(c1 STRING NOT NULL, c2 INT) USING PARQUET; @@ -89,7 +115,7 @@ SELECT * FROM v; -- The view still describes as v(c1 INT); DESCRIBE EXTENDED v; --- 2.c Still can't drop a column, though +-- 2.d Still can't drop a column, though DROP TABLE IF EXISTS t; CREATE TABLE t(c1 INT, c2 INT) USING PARQUET; INSERT INTO t VALUES (1, 2); @@ -106,7 +132,7 @@ CREATE TABLE t(c1 INT NOT NULL) USING PARQUET; SELECT * FROM v; DESCRIBE EXTENDED v; --- 2.d Attempt to rename a column +-- 2.e Attempt to rename a column DROP TABLE IF EXISTS t; CREATE TABLE t(c3 INT NOT NULL, c2 INT) USING PARQUET; SELECT * FROM v; diff --git a/sql/core/src/test/resources/sql-tests/inputs/view-schema-binding.sql b/sql/core/src/test/resources/sql-tests/inputs/view-schema-binding.sql index 9ccae83e7467..413322db10d2 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/view-schema-binding.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/view-schema-binding.sql @@ -36,7 +36,7 @@ SELECT * FROM v; DESCRIBE EXTENDED v; -- Test ALTER VIEW ... WITH SCHEMA BINDING -SET spark.sql.viewSchemaBindingMode=DISABLED; +SET spark.sql.legacy.viewSchemaCompensation=false; DROP TABLE IF EXISTS t; CREATE TABLE t(c1 INT NOT NULL) USING PARQUET; diff --git a/sql/core/src/test/resources/sql-tests/results/charvarchar.sql.out b/sql/core/src/test/resources/sql-tests/results/charvarchar.sql.out index 3e7dc1858b42..568c9f3b29e8 100644 --- a/sql/core/src/test/resources/sql-tests/results/charvarchar.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/charvarchar.sql.out @@ -223,6 +223,7 @@ struct<createtab_stmt:string> CREATE VIEW default.char_view ( c, v) +WITH SCHEMA COMPENSATION AS select * from char_tbl diff --git a/sql/core/src/test/resources/sql-tests/results/show-create-table.sql.out b/sql/core/src/test/resources/sql-tests/results/show-create-table.sql.out index dcb96b9d2dce..ad96e7e106ad 100644 --- a/sql/core/src/test/resources/sql-tests/results/show-create-table.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/show-create-table.sql.out @@ -327,6 +327,7 @@ struct<createtab_stmt:string> CREATE VIEW default.view_spark_30302 ( aaa, bbb) +WITH SCHEMA COMPENSATION AS SELECT a, b FROM tbl @@ -338,6 +339,7 @@ struct<createtab_stmt:string> CREATE VIEW default.view_spark_30302 ( aaa, bbb) +WITH SCHEMA COMPENSATION AS SELECT a, b FROM tbl @@ -368,6 +370,7 @@ CREATE VIEW default.view_spark_30302 ( aaa COMMENT 'comment with \'quoted text\' for aaa', bbb) COMMENT 'This is a comment with \'quoted text\' for view' +WITH SCHEMA COMPENSATION AS SELECT a, b FROM tbl @@ -380,6 +383,7 @@ CREATE VIEW default.view_spark_30302 ( aaa COMMENT 'comment with \'quoted text\' for aaa', bbb) COMMENT 'This is a comment with \'quoted text\' for view' +WITH SCHEMA COMPENSATION AS SELECT a, b FROM tbl @@ -412,6 +416,7 @@ CREATE VIEW default.view_spark_30302 ( TBLPROPERTIES ( 'a' = '1', 'b' = '2') +WITH SCHEMA COMPENSATION AS SELECT a, b FROM tbl @@ -426,6 +431,7 @@ CREATE VIEW default.view_spark_30302 ( TBLPROPERTIES ( 'a' = '1', 'b' = '2') +WITH SCHEMA COMPENSATION AS SELECT a, b FROM tbl diff --git a/sql/core/src/test/resources/sql-tests/results/view-schema-binding-config.sql.out b/sql/core/src/test/resources/sql-tests/results/view-schema-binding-config.sql.out index 9afee1d54c22..1ea9dd5afe88 100644 --- a/sql/core/src/test/resources/sql-tests/results/view-schema-binding-config.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/view-schema-binding-config.sql.out @@ -1,27 +1,18 @@ -- Automatically generated by SQLQueryTestSuite -- !query -SET spark.sql.viewSchemaBindingMode +SET spark.sql.legacy.viewSchemaBindingMode -- !query schema struct<key:string,value:string> -- !query output -spark.sql.viewSchemaBindingMode COMPENSATION +spark.sql.legacy.viewSchemaBindingMode true -- !query -SET spark.sql.viewSchemaBindingMode = EVOLUTION --- !query schema -struct<> --- !query output -java.lang.IllegalArgumentException -The value of spark.sql.viewSchemaBindingMode should be one of COMPENSATION, DISABLED, but was EVOLUTION - - --- !query -SET spark.sql.viewSchemaBindingMode = DISABLED +SET spark.sql.legacy.viewSchemaBindingMode = false -- !query schema struct<key:string,value:string> -- !query output -spark.sql.viewSchemaBindingMode DISABLED +spark.sql.legacy.viewSchemaBindingMode false -- !query @@ -34,8 +25,8 @@ org.apache.spark.sql.catalyst.parser.ParseException "errorClass" : "FEATURE_NOT_ENABLED", "sqlState" : "56038", "messageParameters" : { - "configKey" : "spark.sql.viewSchemaBindingMode", - "configValue" : "COMPENSATION", + "configKey" : "spark.sql.legacy.viewSchemaBindingMode", + "configValue" : "true", "featureName" : "VIEW ... WITH SCHEMA ..." }, "queryContext" : [ { @@ -58,8 +49,8 @@ org.apache.spark.sql.catalyst.parser.ParseException "errorClass" : "FEATURE_NOT_ENABLED", "sqlState" : "56038", "messageParameters" : { - "configKey" : "spark.sql.viewSchemaBindingMode", - "configValue" : "COMPENSATION", + "configKey" : "spark.sql.legacy.viewSchemaBindingMode", + "configValue" : "true", "featureName" : "VIEW ... WITH SCHEMA ..." }, "queryContext" : [ { @@ -82,8 +73,8 @@ org.apache.spark.sql.catalyst.parser.ParseException "errorClass" : "FEATURE_NOT_ENABLED", "sqlState" : "56038", "messageParameters" : { - "configKey" : "spark.sql.viewSchemaBindingMode", - "configValue" : "COMPENSATION", + "configKey" : "spark.sql.legacy.viewSchemaBindingMode", + "configValue" : "true", "featureName" : "VIEW ... WITH SCHEMA ..." }, "queryContext" : [ { @@ -106,8 +97,8 @@ org.apache.spark.sql.catalyst.parser.ParseException "errorClass" : "FEATURE_NOT_ENABLED", "sqlState" : "56038", "messageParameters" : { - "configKey" : "spark.sql.viewSchemaBindingMode", - "configValue" : "COMPENSATION", + "configKey" : "spark.sql.legacy.viewSchemaBindingMode", + "configValue" : "true", "featureName" : "VIEW ... WITH SCHEMA ..." }, "queryContext" : [ { @@ -169,6 +160,16 @@ Schema: root |-- 1: integer (nullable = false) +-- !query +SHOW CREATE TABLE v +-- !query schema +struct<createtab_stmt:string> +-- !query output +CREATE VIEW default.v ( + `1`) +AS SELECT 1 + + -- !query DROP VIEW IF EXISTS v -- !query schema @@ -271,6 +272,85 @@ View Catalog and Namespace spark_catalog.default View Query Output Columns [c1] +-- !query +SHOW CREATE TABLE v +-- !query schema +struct<createtab_stmt:string> +-- !query output +CREATE VIEW default.v ( + c1) +AS SELECT * FROM t + + +-- !query +DROP TABLE IF EXISTS t +-- !query schema +struct<> +-- !query output + + + +-- !query +CREATE TABLE t(c1 BIGINT NOT NULL) USING PARQUET +-- !query schema +struct<> +-- !query output + + + +-- !query +SELECT * FROM v +-- !query schema +struct<c1:int> +-- !query output + + + +-- !query +DESCRIBE EXTENDED v +-- !query schema +struct<col_name:string,data_type:string,comment:string> +-- !query output +c1 int + +# Detailed Table Information +Catalog spark_catalog +Database default +Table v +Created Time [not included in comparison] +Last Access [not included in comparison] +Created By [not included in comparison] +Type VIEW +View Text SELECT * FROM t +View Original Text SELECT * FROM t +View Catalog and Namespace spark_catalog.default +View Query Output Columns [c1] + + +-- !query +SET spark.sql.legacy.viewSchemaBindingMode = true +-- !query schema +struct<key:string,value:string> +-- !query output +spark.sql.legacy.viewSchemaBindingMode true + + +-- !query +SET spark.sql.legacy.viewSchemaCompensation = false +-- !query schema +struct<key:string,value:string> +-- !query output +spark.sql.legacy.viewSchemaCompensation false + + +-- !query +SET spark.sql.ansi.enabled = false +-- !query schema +struct<key:string,value:string> +-- !query output +spark.sql.ansi.enabled false + + -- !query DROP TABLE IF EXISTS t -- !query schema @@ -279,6 +359,71 @@ struct<> +-- !query +CREATE TABLE t(c1 INT NOT NULL) USING PARQUET +-- !query schema +struct<> +-- !query output + + + +-- !query +CREATE OR REPLACE VIEW v AS SELECT * FROM t +-- !query schema +struct<> +-- !query output + + + +-- !query +SELECT * FROM v +-- !query schema +struct<c1:int> +-- !query output + + + +-- !query +DESCRIBE EXTENDED v +-- !query schema +struct<col_name:string,data_type:string,comment:string> +-- !query output +c1 int + +# Detailed Table Information +Catalog spark_catalog +Database default +Table v +Created Time [not included in comparison] +Last Access [not included in comparison] +Created By [not included in comparison] +Type VIEW +View Text SELECT * FROM t +View Original Text SELECT * FROM t +View Schema Mode BINDING +View Catalog and Namespace spark_catalog.default +View Query Output Columns [c1] + + +-- !query +SHOW CREATE TABLE v +-- !query schema +struct<createtab_stmt:string> +-- !query output +CREATE VIEW default.v ( + c1) +WITH SCHEMA BINDING +AS SELECT * FROM t + + +-- !query +DROP TABLE t +-- !query schema +struct<> +-- !query output + + + -- !query CREATE TABLE t(c1 BIGINT NOT NULL) USING PARQUET -- !query schema @@ -287,6 +432,14 @@ struct<> +-- !query +INSERT INTO t VALUES (1) +-- !query schema +struct<> +-- !query output + + + -- !query SELECT * FROM v -- !query schema @@ -322,24 +475,28 @@ Created By [not included in comparison] Type VIEW View Text SELECT * FROM t View Original Text SELECT * FROM t +View Schema Mode BINDING View Catalog and Namespace spark_catalog.default View Query Output Columns [c1] -- !query -SET spark.sql.viewSchemaBindingMode = COMPENSATION +SHOW CREATE TABLE v -- !query schema -struct<key:string,value:string> +struct<createtab_stmt:string> -- !query output -spark.sql.viewSchemaBindingMode COMPENSATION +CREATE VIEW default.v ( + c1) +WITH SCHEMA BINDING +AS SELECT * FROM t -- !query -SET spark.sql.ansi.enabled = false +SET spark.sql.legacy.viewSchemaCompensation = true -- !query schema struct<key:string,value:string> -- !query output -spark.sql.ansi.enabled false +spark.sql.legacy.viewSchemaCompensation true -- !query @@ -396,6 +553,17 @@ View Catalog and Namespace spark_catalog.default View Query Output Columns [c1] +-- !query +SHOW CREATE TABLE v +-- !query schema +struct<createtab_stmt:string> +-- !query output +CREATE VIEW default.v ( + c1) +WITH SCHEMA COMPENSATION +AS SELECT * FROM t + + -- !query DROP TABLE t -- !query schema @@ -450,6 +618,17 @@ View Catalog and Namespace spark_catalog.default View Query Output Columns [c1] +-- !query +SHOW CREATE TABLE v +-- !query schema +struct<createtab_stmt:string> +-- !query output +CREATE VIEW default.v ( + c1) +WITH SCHEMA COMPENSATION +AS SELECT * FROM t + + -- !query DROP TABLE IF EXISTS t -- !query schema diff --git a/sql/core/src/test/resources/sql-tests/results/view-schema-binding.sql.out b/sql/core/src/test/resources/sql-tests/results/view-schema-binding.sql.out index 6aafe016db28..a4e5820cb7ce 100644 --- a/sql/core/src/test/resources/sql-tests/results/view-schema-binding.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/view-schema-binding.sql.out @@ -223,11 +223,11 @@ View Query Output Columns [c1, c2] -- !query -SET spark.sql.viewSchemaBindingMode=DISABLED +SET spark.sql.legacy.viewSchemaCompensation=false -- !query schema struct<key:string,value:string> -- !query output -spark.sql.viewSchemaBindingMode DISABLED +spark.sql.legacy.viewSchemaCompensation false -- !query @@ -279,6 +279,7 @@ Created By [not included in comparison] Type VIEW View Text SELECT * FROM t View Original Text SELECT * FROM t +View Schema Mode BINDING View Catalog and Namespace spark_catalog.default View Query Output Columns [c1] @@ -288,23 +289,7 @@ ALTER VIEW v WITH SCHEMA BINDING -- !query schema struct<> -- !query output -org.apache.spark.sql.catalyst.parser.ParseException -{ - "errorClass" : "FEATURE_NOT_ENABLED", - "sqlState" : "56038", - "messageParameters" : { - "configKey" : "spark.sql.viewSchemaBindingMode", - "configValue" : "COMPENSATION", - "featureName" : "VIEW ... WITH SCHEMA ..." - }, - "queryContext" : [ { - "objectType" : "", - "objectName" : "", - "startIndex" : 14, - "stopIndex" : 32, - "fragment" : "WITH SCHEMA BINDING" - } ] -} + -- !query @@ -324,6 +309,7 @@ Created By [not included in comparison] Type VIEW View Text SELECT * FROM t View Original Text SELECT * FROM t +View Schema Mode BINDING View Catalog and Namespace spark_catalog.default View Query Output Columns [c1] @@ -379,6 +365,7 @@ Created By [not included in comparison] Type VIEW View Text SELECT * FROM t View Original Text SELECT * FROM t +View Schema Mode BINDING View Catalog and Namespace spark_catalog.default View Query Output Columns [c1] diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewSuite.scala index d54606e9b7ce..91031ef642a0 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewSuite.scala @@ -899,7 +899,7 @@ abstract class SQLViewSuite extends QueryTest with SQLTestUtils { test("resolve a view when the dataTypes of referenced table columns changed") { withTable("tab1") { - withSQLConf("spark.sql.viewSchemaBindingMode" -> "DISABLED") { + withSQLConf("spark.sql.legacy.viewSchemaCompensation" -> "false") { spark.range(1, 10).selectExpr("id", "id + 1 id1").write.saveAsTable("tab1") withView("testView") { sql("CREATE VIEW testView AS SELECT * FROM tab1") diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewTestSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewTestSuite.scala index d2740f9eac78..f15c989fc072 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewTestSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewTestSuite.scala @@ -736,7 +736,8 @@ class PersistedViewTestSuite extends SQLViewTestSuite with SharedSparkSession { Seq(true, false).foreach { serde => withView(viewName) { createView(viewName, "SELECT 1 AS a") - val expected = s"CREATE VIEW ${formattedViewName(viewName)} ( a) AS SELECT 1 AS a" + val expected = s"CREATE VIEW ${formattedViewName(viewName)} ( a) " + + "WITH SCHEMA COMPENSATION AS SELECT 1 AS a" assert(getShowCreateDDL(formattedViewName(viewName), serde) == expected) } } @@ -748,7 +749,7 @@ class PersistedViewTestSuite extends SQLViewTestSuite with SharedSparkSession { withView(viewName) { createView(viewName, "SELECT 1 AS a, 2 AS b", Seq("a", "b COMMENT 'b column'")) val expected = s"CREATE VIEW ${formattedViewName(viewName)}" + - s" ( a, b COMMENT 'b column') AS SELECT 1 AS a, 2 AS b" + s" ( a, b COMMENT 'b column') WITH SCHEMA COMPENSATION AS SELECT 1 AS a, 2 AS b" assert(getShowCreateDDL(formattedViewName(viewName), serde) == expected) } } @@ -764,7 +765,7 @@ class PersistedViewTestSuite extends SQLViewTestSuite with SharedSparkSession { val expected = s"CREATE VIEW ${formattedViewName(viewName)} ( c1 COMMENT 'bla', c2)" + " COMMENT 'table comment'" + " TBLPROPERTIES ( 'prop1' = 'value1', 'prop2' = 'value2')" + - " AS SELECT 1 AS c1, '2' AS c2" + " WITH SCHEMA COMPENSATION AS SELECT 1 AS c1, '2' AS c2" assert(getShowCreateDDL(formattedViewName(viewName), serde) == expected) } } --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org