This is an automated email from the ASF dual-hosted git repository.
cloud-fan pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new 27b785236f54 [SPARK-57054][SQL] Make view collation sticky across
ALTER SCHEMA DEFAULT COLLATION
27b785236f54 is described below
commit 27b785236f54dffab638514a8f92eea1a6ef31ec
Author: Marko Ilić <[email protected]>
AuthorDate: Sat May 30 02:58:22 2026 +0800
[SPARK-57054][SQL] Make view collation sticky across ALTER SCHEMA DEFAULT
COLLATION
### What changes were proposed in this pull request?
Remove the `AlterViewAs` rewrite case from
`ApplyDefaultCollation.resolveDefaultCollation`. It used to fold the
namespace's current default collation into a view whose persisted
`PROP_COLLATION` was empty, on every `ALTER VIEW AS`.
### Why are the changes needed?
A view's collation should be fixed at creation time. The removed rewrite
let a later `ALTER SCHEMA ... DEFAULT COLLATION` retroactively change an
existing view's literal types on the next `ALTER VIEW AS`:
```sql
CREATE SCHEMA s;
CREATE VIEW v AS SELECT 'a' AS c1; -- UTF8_BINARY
ALTER SCHEMA s DEFAULT COLLATION UTF8_LCASE;
ALTER VIEW v AS SELECT 'x' AS c1; -- was: UTF8_LCASE; now:
UTF8_BINARY
```
### Does this PR introduce _any_ user-facing change?
Yes. `ALTER VIEW AS` no longer picks up the namespace's current default
collation for a view that was created without one — the view's collation stays
sticky from creation.
### How was this patch tested?
Updated the existing V1 and V2 tests to assert the new sticky behavior.
### Was this patch authored or co-authored using generative AI tooling?
No.
Closes #56100 from ilicmarkodb/marko-ilic_data/spark_view_def_collation.
Authored-by: Marko Ilić <[email protected]>
Signed-off-by: Wenchen Fan <[email protected]>
---
.../catalyst/analysis/ApplyDefaultCollation.scala | 42 +--------------------
.../sql/connector/catalog/CatalogV2Util.scala | 8 ++--
.../catalyst/analysis/ResolveSessionCatalog.scala | 18 ++-------
.../apache/spark/sql/execution/command/views.scala | 17 +--------
.../view-with-default-collation.sql.out | 6 +--
.../sql/collation/DefaultCollationTestSuite.scala | 12 ++----
.../execution/command/v2/AlterViewAsSuite.scala | 43 +++++++++++++++++-----
7 files changed, 49 insertions(+), 97 deletions(-)
diff --git
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ApplyDefaultCollation.scala
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ApplyDefaultCollation.scala
index ebc2ac2a6b2f..ca9418a0c6ef 100644
---
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ApplyDefaultCollation.scala
+++
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ApplyDefaultCollation.scala
@@ -26,7 +26,7 @@ import org.apache.spark.sql.catalyst.rules.Rule
import org.apache.spark.sql.catalyst.trees.CurrentOrigin
import org.apache.spark.sql.catalyst.types.DataTypeUtils.{areSameBaseType,
isDefaultStringCharOrVarcharType, replaceDefaultStringCharAndVarcharTypes}
import org.apache.spark.sql.catalyst.util.CharVarcharUtils
-import org.apache.spark.sql.connector.catalog.{CatalogV2Util,
SupportsNamespaces, TableCatalog, V1ViewInfo, ViewInfo}
+import org.apache.spark.sql.connector.catalog.{SupportsNamespaces,
TableCatalog}
import org.apache.spark.sql.types.{DataType, StringHelper, StringType}
/**
@@ -206,46 +206,6 @@ object ApplyDefaultCollation extends Rule[LogicalPlan] {
newCreateView.copyTagsFrom(createView)
newCreateView
- // We match against ResolvedPersistentView because temporary views
don't have a
- // schema/catalog. The rewrite covers both v1 (session-catalog,
[[V1ViewInfo]]) and
- // non-session v2 views: when the existing view has no
`PROP_COLLATION` and the
- // namespace supplies a default, fold that default into the resolved
`ViewInfo`. For
- // v1, `V1ViewInfo` is rebuilt around a `CatalogTable` whose typed
`collation` field
- // holds the new value; `V1ViewInfo.builderFrom` bridges that into the
v2
- // `properties()` bag, so downstream consumers
(`fetchDefaultCollation`,
- // `AlterV2ViewExec`'s `existingProp(PROP_COLLATION)`) see it under
either surface.
- // For v2, we rebuild the existing `ViewInfo` with `PROP_COLLATION`
set so the same
- // downstream consumers see it on the regular `info.properties` path.
- case alterViewAs @ AlterViewAs(rpv @ ResolvedPersistentView(
- catalog: SupportsNamespaces, identifier, info), _, _, _, _)
- if
Option(info.properties.get(TableCatalog.PROP_COLLATION)).isEmpty =>
- // Only rewrite when the namespace actually supplies a default.
[[ViewInfo]] /
- // [[V1ViewInfo]] are non-case classes, so a copy with
structurally-identical fields
- // still reads as a different reference -- if we rewrote
unconditionally, the
- // resolution batch would see the plan change every iteration and
never reach
- // fixed point. Looking up the namespace default takes one
`loadNamespaceMetadata`
- // round trip, so do it once here and bail out before rewriting if
the namespace has
- // no default.
- getCollationFromSchemaMetadata(catalog, identifier.namespace())
match {
- case Some(newCollation) =>
- val newInfo: ViewInfo = info match {
- case v1Info: V1ViewInfo =>
- new V1ViewInfo(v1Info.v1Table.copy(collation =
Some(newCollation)))
- case _ =>
- CatalogV2Util.viewInfoBuilderFrom(info)
- .withCollation(newCollation)
- .build()
- }
- val newRpv = rpv.copy(info = newInfo)
- val newAlterViewAs =
CurrentOrigin.withOrigin(alterViewAs.origin) {
- alterViewAs.copy(child = newRpv)
- }
- newAlterViewAs.copyTagsFrom(alterViewAs)
- newAlterViewAs
- case None =>
- alterViewAs
- }
-
case createUserDefinedFunction@CreateUserDefinedFunction(
ResolvedIdentifier(catalog: SupportsNamespaces, identifier),
_, _, _, _, _, collation, _, _, _, _, _, _) if collation.isEmpty =>
diff --git
a/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/CatalogV2Util.scala
b/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/CatalogV2Util.scala
index a7c5cf5e5431..e42d5f3a8445 100644
---
a/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/CatalogV2Util.scala
+++
b/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/CatalogV2Util.scala
@@ -535,10 +535,10 @@ private[sql] object CatalogV2Util {
/**
* Construct a [[ViewInfo.Builder]] seeded from an existing view's metadata.
Used by ALTER
- * VIEW execs (SET / UNSET TBLPROPERTIES, ALTER VIEW ... WITH SCHEMA
BINDING) and by
- * `ApplyDefaultCollation` -- override the one field that changes, then
`build` to produce
- * the replacement payload for [[ViewCatalog#replaceView]]. Every other
field flows through
- * unchanged so a metadata-only mutation does not perturb the view body.
+ * VIEW execs (SET / UNSET TBLPROPERTIES, ALTER VIEW ... WITH SCHEMA
BINDING) -- override
+ * the one field that changes, then `build` to produce the replacement
payload for
+ * [[ViewCatalog#replaceView]]. Every other field flows through unchanged so
a metadata-only
+ * mutation does not perturb the view body.
*/
def viewInfoBuilderFrom(existing: ViewInfo): ViewInfo.Builder = {
val builder = new ViewInfo.Builder()
diff --git
a/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala
b/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala
index 8774feb4d911..cfd52707bbc2 100644
---
a/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala
+++
b/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala
@@ -28,7 +28,7 @@ import org.apache.spark.sql.catalyst.plans.logical._
import org.apache.spark.sql.catalyst.rules.Rule
import org.apache.spark.sql.catalyst.util.{quoteIfNeeded, toPrettySQL,
CharVarcharUtils, ResolveDefaultColumns => DefaultCols}
import org.apache.spark.sql.catalyst.util.ResolveDefaultColumns._
-import org.apache.spark.sql.connector.catalog.{CatalogExtension,
CatalogManager, CatalogPlugin, CatalogV2Util, LookupCatalog,
SupportsNamespaces, TableCatalog, V1Table, ViewCatalog}
+import org.apache.spark.sql.connector.catalog.{CatalogExtension,
CatalogManager, CatalogPlugin, CatalogV2Util, LookupCatalog,
SupportsNamespaces, V1Table, ViewCatalog}
import org.apache.spark.sql.connector.expressions.Transform
import org.apache.spark.sql.errors.{QueryCompilationErrors,
QueryExecutionErrors}
import org.apache.spark.sql.execution.command._
@@ -541,20 +541,8 @@ class ResolveSessionCatalog(val catalogManager:
CatalogManager)
// The final `_, _` are AlterViewAs.isAnalyzed and referredTempFunctions.
We drop both:
// AlterViewAsCommand is a separate AnalysisOnlyCommand and gets its own
markAsAnalyzed pass
// from HandleSpecialCommand after this rewrite.
- case alterViewAs @ AlterViewAs(
- ResolvedViewIdentifier(ident), originalText, query, _, _) =>
- // For session-catalog persistent views, pick up the analysis-time
collation off the
- // resolved `ViewInfo` -- `ApplyDefaultCollation` rewrites that property
to fill the
- // namespace default when the existing view had none, and
`alterPermanentView` wants
- // the post-rewrite value so the persisted `CatalogTable.collation`
matches the
- // collated literal types in the analyzed plan. Temp views don't carry a
`ViewInfo`,
- // so they pass through without a collation override.
- val collation = alterViewAs.child match {
- case rpv: ResolvedPersistentView =>
- Option(rpv.info.properties.get(TableCatalog.PROP_COLLATION))
- case _ => None
- }
- AlterViewAsCommand(ident, originalText, query, collation = collation)
+ case AlterViewAs(ResolvedViewIdentifier(ident), originalText, query, _, _)
=>
+ AlterViewAsCommand(ident, originalText, query)
case AlterViewSchemaBinding(ResolvedViewIdentifier(ident), viewSchemaMode)
=>
AlterViewSchemaBindingCommand(ident, viewSchemaMode)
diff --git
a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/views.scala
b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/views.scala
index 8407f20777d9..411682f35f6d 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/views.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/views.scala
@@ -235,14 +235,7 @@ case class AlterViewAsCommand(
originalText: String,
query: LogicalPlan,
isAnalyzed: Boolean = false,
- referredTempFunctions: Seq[String] = Seq.empty,
- // Analysis-time collation for the resolved view. `ApplyDefaultCollation`
may have folded
- // the namespace default into the resolved view's `CatalogTable.collation`
if it was empty;
- // `ResolveSessionCatalog` then reads
`ResolvedPersistentView.info.properties`'s
- // `PROP_COLLATION` and passes it here. Only `alterPermanentView` consumes
it: `Some(x)`
- // overwrites `CatalogTable.collation`, `None` falls through to the
existing typed field
- // so callers that omit this argument keep the existing view's collation
untouched.
- collation: Option[String] = None)
+ referredTempFunctions: Seq[String] = Seq.empty)
extends RunnableCommand with AnalysisOnlyCommand with CTEInChildren {
import ViewHelper._
@@ -317,13 +310,7 @@ case class AlterViewAsCommand(
schema = newSchema,
properties = newProperties,
viewOriginalText = Some(originalText),
- viewText = Some(originalText),
- // Prefer the analysis-time collation -- `ApplyDefaultCollation` may
have filled the
- // namespace's default `PROP_COLLATION` into a previously-empty value,
and we want that
- // to be persisted alongside the matching collated literal types in
`newSchema`. Fall
- // back to the existing typed field for backward compat with callers
that don't pass
- // `collation`.
- collation = collation.orElse(viewMeta.collation))
+ viewText = Some(originalText))
session.sessionState.catalog.alterTable(updatedViewMeta)
}
diff --git
a/sql/core/src/test/resources/sql-tests/analyzer-results/view-with-default-collation.sql.out
b/sql/core/src/test/resources/sql-tests/analyzer-results/view-with-default-collation.sql.out
index a1257ab482c6..02f40446a283 100644
---
a/sql/core/src/test/resources/sql-tests/analyzer-results/view-with-default-collation.sql.out
+++
b/sql/core/src/test/resources/sql-tests/analyzer-results/view-with-default-collation.sql.out
@@ -465,7 +465,7 @@ CreateViewCommand `spark_catalog`.`default`.`v`,
UTF8_LCASE, SELECT 1, false, fa
-- !query
ALTER VIEW v AS SELECT 'a' AS c1, 'b' AS c2
-- !query analysis
-AlterViewAsCommand `spark_catalog`.`default`.`v`, SELECT 'a' AS c1, 'b' AS c2,
true, UTF8_LCASE
+AlterViewAsCommand `spark_catalog`.`default`.`v`, SELECT 'a' AS c1, 'b' AS c2,
true
+- Project [a AS c1#x, b AS c2#x]
+- OneRowRelation
@@ -495,7 +495,7 @@ Project [collation(c2#x) AS collation(c2)#x]
-- !query
ALTER VIEW v AS SELECT 'c' AS c3 WHERE 'a' = 'A'
-- !query analysis
-AlterViewAsCommand `spark_catalog`.`default`.`v`, SELECT 'c' AS c3 WHERE 'a' =
'A', true, UTF8_LCASE
+AlterViewAsCommand `spark_catalog`.`default`.`v`, SELECT 'c' AS c3 WHERE 'a' =
'A', true
+- Project [c AS c3#x]
+- Filter (a = A)
+- OneRowRelation
@@ -556,7 +556,7 @@ ALTER VIEW v AS
-- !query analysis
AlterViewAsCommand `spark_catalog`.`default`.`v`, SELECT *, 'c' AS c4, (SELECT
(SELECT CASE 'š' = 'S' WHEN TRUE THEN 'd' ELSE 'b' END)) AS c5
FROM t
- WHERE c1 = 'A' AND 'ć' = 'Č', true, sr_CI_AI
+ WHERE c1 = 'A' AND 'ć' = 'Č', true
+- Project [c1#x, c2#x, c3#x, c AS c4#x, scalar-subquery#x [] AS c5#x]
: +- Project [scalar-subquery#x [] AS scalarsubquery()#x]
: : +- Project [CASE WHEN ((š = S) = true) THEN d ELSE b END AS
CASE WHEN (('š' collate sr_CI_AI = 'S' collate sr_CI_AI) = true) THEN 'd'
collate sr_CI_AI ELSE 'b' collate sr_CI_AI END#x]
diff --git
a/sql/core/src/test/scala/org/apache/spark/sql/collation/DefaultCollationTestSuite.scala
b/sql/core/src/test/scala/org/apache/spark/sql/collation/DefaultCollationTestSuite.scala
index 29ee48e02919..82bb61648023 100644
---
a/sql/core/src/test/scala/org/apache/spark/sql/collation/DefaultCollationTestSuite.scala
+++
b/sql/core/src/test/scala/org/apache/spark/sql/collation/DefaultCollationTestSuite.scala
@@ -1556,14 +1556,8 @@ abstract class DefaultCollationTestSuiteV1 extends
DefaultCollationTestSuite {
}
}
- testString("ALTER VIEW AS picks up the namespace's default collation when
the existing " +
- "view has none") {
+ testString("ALTER SCHEMA DEFAULT COLLATION does not retroactively change a
view's collation") {
_ =>
- // The view is created in a schema with no default collation, so the stored
- // `CatalogTable.collation` is `None`. After the schema gains a default,
the next
- // `ALTER VIEW AS` must fold that default into both the analyzed plan's
literal types
- // (so `assertTableColumnCollation` sees it on read) and the persisted
CatalogTable so
- // the AnalysisContext fallback fires on every subsequent read.
withDatabase(testSchema) {
sql(s"CREATE SCHEMA $testSchema")
sql(s"USE $testSchema")
@@ -1573,8 +1567,8 @@ abstract class DefaultCollationTestSuiteV1 extends
DefaultCollationTestSuite {
sql(s"ALTER SCHEMA $testSchema DEFAULT COLLATION UTF8_LCASE")
sql(s"ALTER VIEW $testView AS SELECT 'x' AS c1, 'y' AS c2")
- assertTableColumnCollation(testView, "c1", "UTF8_LCASE")
- assertTableColumnCollation(testView, "c2", "UTF8_LCASE")
+ assertTableColumnCollation(testView, "c1", "UTF8_BINARY")
+ assertTableColumnCollation(testView, "c2", "UTF8_BINARY")
}
}
}
diff --git
a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/AlterViewAsSuite.scala
b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/AlterViewAsSuite.scala
index 2ed112c3b22c..a3a023ec4d5c 100644
---
a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/AlterViewAsSuite.scala
+++
b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/AlterViewAsSuite.scala
@@ -25,12 +25,11 @@ import org.apache.spark.sql.types.StringType
class AlterViewAsSuite extends command.AlterViewAsSuiteBase with
ViewCommandSuiteBase {
- test("V2: ALTER VIEW AS picks up the namespace's default collation when the
existing view " +
- "has none") {
- // Create the namespace with no default collation; create a view in it
(PROP_COLLATION
- // unset). Then set the namespace default and ALTER VIEW AS -- the new
ViewInfo must end
- // up with PROP_COLLATION = UTF8_LCASE (so v1Table.toCatalogTable's
`collation` field is
- // set, and view-read time picks up UTF8_LCASE via
AnalysisContext.collation).
+ test("V2: ALTER NAMESPACE DEFAULT COLLATION does not retroactively change a
view's collation") {
+ // A view created in a namespace without a default collation keeps its
creation-time
+ // collation behavior. A later `ALTER NAMESPACE ... DEFAULT COLLATION`
followed by
+ // `ALTER VIEW AS` does not fold the new namespace default into the view
-- `PROP_COLLATION`
+ // stays at its create-time value (empty here) and the body literals stay
UTF8_BINARY.
withSQLConf(SQLConf.SCHEMA_LEVEL_COLLATIONS_ENABLED.key -> "true") {
val viewName = "v2_alter_collation_inherit"
val view = s"$catalog.$namespace.$viewName"
@@ -45,11 +44,35 @@ class AlterViewAsSuite extends command.AlterViewAsSuiteBase
with ViewCommandSuit
sql(s"ALTER VIEW $view AS SELECT 'x' AS c1, 'y' AS c2")
val stored = viewCatalog.getStoredView(Array(namespace), viewName)
- assert(stored.properties().get(TableCatalog.PROP_COLLATION) ==
"UTF8_LCASE")
- // Read-time the view body's literal types reflect the inherited
collation.
+
assert(Option(stored.properties().get(TableCatalog.PROP_COLLATION)).isEmpty)
val df = spark.table(view)
- assert(df.schema("c1").dataType === StringType("UTF8_LCASE"))
- assert(df.schema("c2").dataType === StringType("UTF8_LCASE"))
+ assert(df.schema("c1").dataType === StringType)
+ assert(df.schema("c2").dataType === StringType)
+ }
+ }
+
+ test("V2: ALTER VIEW after ALTER NAMESPACE DEFAULT COLLATION keeps the
inherited collation") {
+ withSQLConf(SQLConf.SCHEMA_LEVEL_COLLATIONS_ENABLED.key -> "true") {
+ val ns = "ns"
+ val viewName = "v2"
+ val view = s"$catalog.$ns.$viewName"
+ withNamespace(s"$catalog.$ns") {
+ sql(s"CREATE NAMESPACE $catalog.$ns DEFAULT COLLATION UTF8_LCASE")
+ sql(s"CREATE VIEW $view AS SELECT 'a' AS c1")
+ assert(viewCatalog
+ .getStoredView(Array(ns), viewName)
+ .properties()
+ .get(TableCatalog.PROP_COLLATION) == "UTF8_LCASE")
+
+ sql(s"ALTER NAMESPACE $catalog.$ns DEFAULT COLLATION UNICODE")
+ sql(s"ALTER VIEW $view AS SELECT 'x' AS c1, 'y' AS c2")
+
+ val stored = viewCatalog.getStoredView(Array(ns), viewName)
+ assert(stored.properties().get(TableCatalog.PROP_COLLATION) ==
"UTF8_LCASE")
+ val df = spark.table(view)
+ assert(df.schema("c1").dataType === StringType("UTF8_LCASE"))
+ assert(df.schema("c2").dataType === StringType("UTF8_LCASE"))
+ }
}
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]