This is an automated email from the ASF dual-hosted git repository.

cloud-fan pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
     new 27b785236f54 [SPARK-57054][SQL] Make view collation sticky across 
ALTER SCHEMA DEFAULT COLLATION
27b785236f54 is described below

commit 27b785236f54dffab638514a8f92eea1a6ef31ec
Author: Marko Ilić <[email protected]>
AuthorDate: Sat May 30 02:58:22 2026 +0800

    [SPARK-57054][SQL] Make view collation sticky across ALTER SCHEMA DEFAULT 
COLLATION
    
    ### What changes were proposed in this pull request?
    
    Remove the `AlterViewAs` rewrite case from 
`ApplyDefaultCollation.resolveDefaultCollation`. It used to fold the 
namespace's current default collation into a view whose persisted 
`PROP_COLLATION` was empty, on every `ALTER VIEW AS`.
    
    ### Why are the changes needed?
    
    A view's collation should be fixed at creation time. The removed rewrite 
let a later `ALTER SCHEMA ... DEFAULT COLLATION` retroactively change an 
existing view's literal types on the next `ALTER VIEW AS`:
    
    ```sql
    CREATE SCHEMA s;
    CREATE VIEW v AS SELECT 'a' AS c1;            -- UTF8_BINARY
    ALTER SCHEMA s DEFAULT COLLATION UTF8_LCASE;
    ALTER VIEW v AS SELECT 'x' AS c1;             -- was: UTF8_LCASE; now: 
UTF8_BINARY
    ```
    
    ### Does this PR introduce _any_ user-facing change?
    
    Yes. `ALTER VIEW AS` no longer picks up the namespace's current default 
collation for a view that was created without one — the view's collation stays 
sticky from creation.
    
    ### How was this patch tested?
    
    Updated the existing V1 and V2 tests to assert the new sticky behavior.
    
    ### Was this patch authored or co-authored using generative AI tooling?
    
    No.
    
    Closes #56100 from ilicmarkodb/marko-ilic_data/spark_view_def_collation.
    
    Authored-by: Marko Ilić <[email protected]>
    Signed-off-by: Wenchen Fan <[email protected]>
---
 .../catalyst/analysis/ApplyDefaultCollation.scala  | 42 +--------------------
 .../sql/connector/catalog/CatalogV2Util.scala      |  8 ++--
 .../catalyst/analysis/ResolveSessionCatalog.scala  | 18 ++-------
 .../apache/spark/sql/execution/command/views.scala | 17 +--------
 .../view-with-default-collation.sql.out            |  6 +--
 .../sql/collation/DefaultCollationTestSuite.scala  | 12 ++----
 .../execution/command/v2/AlterViewAsSuite.scala    | 43 +++++++++++++++++-----
 7 files changed, 49 insertions(+), 97 deletions(-)

diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ApplyDefaultCollation.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ApplyDefaultCollation.scala
index ebc2ac2a6b2f..ca9418a0c6ef 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ApplyDefaultCollation.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ApplyDefaultCollation.scala
@@ -26,7 +26,7 @@ import org.apache.spark.sql.catalyst.rules.Rule
 import org.apache.spark.sql.catalyst.trees.CurrentOrigin
 import org.apache.spark.sql.catalyst.types.DataTypeUtils.{areSameBaseType, 
isDefaultStringCharOrVarcharType, replaceDefaultStringCharAndVarcharTypes}
 import org.apache.spark.sql.catalyst.util.CharVarcharUtils
-import org.apache.spark.sql.connector.catalog.{CatalogV2Util, 
SupportsNamespaces, TableCatalog, V1ViewInfo, ViewInfo}
+import org.apache.spark.sql.connector.catalog.{SupportsNamespaces, 
TableCatalog}
 import org.apache.spark.sql.types.{DataType, StringHelper, StringType}
 
 /**
@@ -206,46 +206,6 @@ object ApplyDefaultCollation extends Rule[LogicalPlan] {
           newCreateView.copyTagsFrom(createView)
           newCreateView
 
-        // We match against ResolvedPersistentView because temporary views 
don't have a
-        // schema/catalog. The rewrite covers both v1 (session-catalog, 
[[V1ViewInfo]]) and
-        // non-session v2 views: when the existing view has no 
`PROP_COLLATION` and the
-        // namespace supplies a default, fold that default into the resolved 
`ViewInfo`. For
-        // v1, `V1ViewInfo` is rebuilt around a `CatalogTable` whose typed 
`collation` field
-        // holds the new value; `V1ViewInfo.builderFrom` bridges that into the 
v2
-        // `properties()` bag, so downstream consumers 
(`fetchDefaultCollation`,
-        // `AlterV2ViewExec`'s `existingProp(PROP_COLLATION)`) see it under 
either surface.
-        // For v2, we rebuild the existing `ViewInfo` with `PROP_COLLATION` 
set so the same
-        // downstream consumers see it on the regular `info.properties` path.
-        case alterViewAs @ AlterViewAs(rpv @ ResolvedPersistentView(
-        catalog: SupportsNamespaces, identifier, info), _, _, _, _)
-            if 
Option(info.properties.get(TableCatalog.PROP_COLLATION)).isEmpty =>
-          // Only rewrite when the namespace actually supplies a default. 
[[ViewInfo]] /
-          // [[V1ViewInfo]] are non-case classes, so a copy with 
structurally-identical fields
-          // still reads as a different reference -- if we rewrote 
unconditionally, the
-          // resolution batch would see the plan change every iteration and 
never reach
-          // fixed point. Looking up the namespace default takes one 
`loadNamespaceMetadata`
-          // round trip, so do it once here and bail out before rewriting if 
the namespace has
-          // no default.
-          getCollationFromSchemaMetadata(catalog, identifier.namespace()) 
match {
-            case Some(newCollation) =>
-              val newInfo: ViewInfo = info match {
-                case v1Info: V1ViewInfo =>
-                  new V1ViewInfo(v1Info.v1Table.copy(collation = 
Some(newCollation)))
-                case _ =>
-                  CatalogV2Util.viewInfoBuilderFrom(info)
-                    .withCollation(newCollation)
-                    .build()
-              }
-              val newRpv = rpv.copy(info = newInfo)
-              val newAlterViewAs = 
CurrentOrigin.withOrigin(alterViewAs.origin) {
-                alterViewAs.copy(child = newRpv)
-              }
-              newAlterViewAs.copyTagsFrom(alterViewAs)
-              newAlterViewAs
-            case None =>
-              alterViewAs
-          }
-
         case createUserDefinedFunction@CreateUserDefinedFunction(
         ResolvedIdentifier(catalog: SupportsNamespaces, identifier),
         _, _, _, _, _, collation, _, _, _, _, _, _) if collation.isEmpty =>
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/CatalogV2Util.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/CatalogV2Util.scala
index a7c5cf5e5431..e42d5f3a8445 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/CatalogV2Util.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/CatalogV2Util.scala
@@ -535,10 +535,10 @@ private[sql] object CatalogV2Util {
 
   /**
    * Construct a [[ViewInfo.Builder]] seeded from an existing view's metadata. 
Used by ALTER
-   * VIEW execs (SET / UNSET TBLPROPERTIES, ALTER VIEW ... WITH SCHEMA 
BINDING) and by
-   * `ApplyDefaultCollation` -- override the one field that changes, then 
`build` to produce
-   * the replacement payload for [[ViewCatalog#replaceView]]. Every other 
field flows through
-   * unchanged so a metadata-only mutation does not perturb the view body.
+   * VIEW execs (SET / UNSET TBLPROPERTIES, ALTER VIEW ... WITH SCHEMA 
BINDING) -- override
+   * the one field that changes, then `build` to produce the replacement 
payload for
+   * [[ViewCatalog#replaceView]]. Every other field flows through unchanged so 
a metadata-only
+   * mutation does not perturb the view body.
    */
   def viewInfoBuilderFrom(existing: ViewInfo): ViewInfo.Builder = {
     val builder = new ViewInfo.Builder()
diff --git 
a/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala
 
b/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala
index 8774feb4d911..cfd52707bbc2 100644
--- 
a/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala
+++ 
b/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala
@@ -28,7 +28,7 @@ import org.apache.spark.sql.catalyst.plans.logical._
 import org.apache.spark.sql.catalyst.rules.Rule
 import org.apache.spark.sql.catalyst.util.{quoteIfNeeded, toPrettySQL, 
CharVarcharUtils, ResolveDefaultColumns => DefaultCols}
 import org.apache.spark.sql.catalyst.util.ResolveDefaultColumns._
-import org.apache.spark.sql.connector.catalog.{CatalogExtension, 
CatalogManager, CatalogPlugin, CatalogV2Util, LookupCatalog, 
SupportsNamespaces, TableCatalog, V1Table, ViewCatalog}
+import org.apache.spark.sql.connector.catalog.{CatalogExtension, 
CatalogManager, CatalogPlugin, CatalogV2Util, LookupCatalog, 
SupportsNamespaces, V1Table, ViewCatalog}
 import org.apache.spark.sql.connector.expressions.Transform
 import org.apache.spark.sql.errors.{QueryCompilationErrors, 
QueryExecutionErrors}
 import org.apache.spark.sql.execution.command._
@@ -541,20 +541,8 @@ class ResolveSessionCatalog(val catalogManager: 
CatalogManager)
     // The final `_, _` are AlterViewAs.isAnalyzed and referredTempFunctions. 
We drop both:
     // AlterViewAsCommand is a separate AnalysisOnlyCommand and gets its own 
markAsAnalyzed pass
     // from HandleSpecialCommand after this rewrite.
-    case alterViewAs @ AlterViewAs(
-        ResolvedViewIdentifier(ident), originalText, query, _, _) =>
-      // For session-catalog persistent views, pick up the analysis-time 
collation off the
-      // resolved `ViewInfo` -- `ApplyDefaultCollation` rewrites that property 
to fill the
-      // namespace default when the existing view had none, and 
`alterPermanentView` wants
-      // the post-rewrite value so the persisted `CatalogTable.collation` 
matches the
-      // collated literal types in the analyzed plan. Temp views don't carry a 
`ViewInfo`,
-      // so they pass through without a collation override.
-      val collation = alterViewAs.child match {
-        case rpv: ResolvedPersistentView =>
-          Option(rpv.info.properties.get(TableCatalog.PROP_COLLATION))
-        case _ => None
-      }
-      AlterViewAsCommand(ident, originalText, query, collation = collation)
+    case AlterViewAs(ResolvedViewIdentifier(ident), originalText, query, _, _) 
=>
+      AlterViewAsCommand(ident, originalText, query)
 
     case AlterViewSchemaBinding(ResolvedViewIdentifier(ident), viewSchemaMode) 
=>
       AlterViewSchemaBindingCommand(ident, viewSchemaMode)
diff --git 
a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/views.scala 
b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/views.scala
index 8407f20777d9..411682f35f6d 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/views.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/views.scala
@@ -235,14 +235,7 @@ case class AlterViewAsCommand(
     originalText: String,
     query: LogicalPlan,
     isAnalyzed: Boolean = false,
-    referredTempFunctions: Seq[String] = Seq.empty,
-    // Analysis-time collation for the resolved view. `ApplyDefaultCollation` 
may have folded
-    // the namespace default into the resolved view's `CatalogTable.collation` 
if it was empty;
-    // `ResolveSessionCatalog` then reads 
`ResolvedPersistentView.info.properties`'s
-    // `PROP_COLLATION` and passes it here. Only `alterPermanentView` consumes 
it: `Some(x)`
-    // overwrites `CatalogTable.collation`, `None` falls through to the 
existing typed field
-    // so callers that omit this argument keep the existing view's collation 
untouched.
-    collation: Option[String] = None)
+    referredTempFunctions: Seq[String] = Seq.empty)
   extends RunnableCommand with AnalysisOnlyCommand with CTEInChildren {
 
   import ViewHelper._
@@ -317,13 +310,7 @@ case class AlterViewAsCommand(
       schema = newSchema,
       properties = newProperties,
       viewOriginalText = Some(originalText),
-      viewText = Some(originalText),
-      // Prefer the analysis-time collation -- `ApplyDefaultCollation` may 
have filled the
-      // namespace's default `PROP_COLLATION` into a previously-empty value, 
and we want that
-      // to be persisted alongside the matching collated literal types in 
`newSchema`. Fall
-      // back to the existing typed field for backward compat with callers 
that don't pass
-      // `collation`.
-      collation = collation.orElse(viewMeta.collation))
+      viewText = Some(originalText))
 
     session.sessionState.catalog.alterTable(updatedViewMeta)
   }
diff --git 
a/sql/core/src/test/resources/sql-tests/analyzer-results/view-with-default-collation.sql.out
 
b/sql/core/src/test/resources/sql-tests/analyzer-results/view-with-default-collation.sql.out
index a1257ab482c6..02f40446a283 100644
--- 
a/sql/core/src/test/resources/sql-tests/analyzer-results/view-with-default-collation.sql.out
+++ 
b/sql/core/src/test/resources/sql-tests/analyzer-results/view-with-default-collation.sql.out
@@ -465,7 +465,7 @@ CreateViewCommand `spark_catalog`.`default`.`v`, 
UTF8_LCASE, SELECT 1, false, fa
 -- !query
 ALTER VIEW v AS SELECT 'a' AS c1, 'b' AS c2
 -- !query analysis
-AlterViewAsCommand `spark_catalog`.`default`.`v`, SELECT 'a' AS c1, 'b' AS c2, 
true, UTF8_LCASE
+AlterViewAsCommand `spark_catalog`.`default`.`v`, SELECT 'a' AS c1, 'b' AS c2, 
true
    +- Project [a AS c1#x, b AS c2#x]
       +- OneRowRelation
 
@@ -495,7 +495,7 @@ Project [collation(c2#x) AS collation(c2)#x]
 -- !query
 ALTER VIEW v AS SELECT 'c' AS c3 WHERE 'a' = 'A'
 -- !query analysis
-AlterViewAsCommand `spark_catalog`.`default`.`v`, SELECT 'c' AS c3 WHERE 'a' = 
'A', true, UTF8_LCASE
+AlterViewAsCommand `spark_catalog`.`default`.`v`, SELECT 'c' AS c3 WHERE 'a' = 
'A', true
    +- Project [c AS c3#x]
       +- Filter (a = A)
          +- OneRowRelation
@@ -556,7 +556,7 @@ ALTER VIEW v AS
 -- !query analysis
 AlterViewAsCommand `spark_catalog`.`default`.`v`, SELECT *, 'c' AS c4, (SELECT 
(SELECT CASE 'š' = 'S' WHEN TRUE THEN 'd' ELSE 'b' END)) AS c5
     FROM t
-    WHERE c1 = 'A' AND 'ć' = 'Č', true, sr_CI_AI
+    WHERE c1 = 'A' AND 'ć' = 'Č', true
    +- Project [c1#x, c2#x, c3#x, c AS c4#x, scalar-subquery#x [] AS c5#x]
       :  +- Project [scalar-subquery#x [] AS scalarsubquery()#x]
       :     :  +- Project [CASE WHEN ((š = S) = true) THEN d ELSE b END AS 
CASE WHEN (('š' collate sr_CI_AI = 'S' collate sr_CI_AI) = true) THEN 'd' 
collate sr_CI_AI ELSE 'b' collate sr_CI_AI END#x]
diff --git 
a/sql/core/src/test/scala/org/apache/spark/sql/collation/DefaultCollationTestSuite.scala
 
b/sql/core/src/test/scala/org/apache/spark/sql/collation/DefaultCollationTestSuite.scala
index 29ee48e02919..82bb61648023 100644
--- 
a/sql/core/src/test/scala/org/apache/spark/sql/collation/DefaultCollationTestSuite.scala
+++ 
b/sql/core/src/test/scala/org/apache/spark/sql/collation/DefaultCollationTestSuite.scala
@@ -1556,14 +1556,8 @@ abstract class DefaultCollationTestSuiteV1 extends 
DefaultCollationTestSuite {
     }
   }
 
-  testString("ALTER VIEW AS picks up the namespace's default collation when 
the existing " +
-      "view has none") {
+  testString("ALTER SCHEMA DEFAULT COLLATION does not retroactively change a 
view's collation") {
     _ =>
-    // The view is created in a schema with no default collation, so the stored
-    // `CatalogTable.collation` is `None`. After the schema gains a default, 
the next
-    // `ALTER VIEW AS` must fold that default into both the analyzed plan's 
literal types
-    // (so `assertTableColumnCollation` sees it on read) and the persisted 
CatalogTable so
-    // the AnalysisContext fallback fires on every subsequent read.
     withDatabase(testSchema) {
       sql(s"CREATE SCHEMA $testSchema")
       sql(s"USE $testSchema")
@@ -1573,8 +1567,8 @@ abstract class DefaultCollationTestSuiteV1 extends 
DefaultCollationTestSuite {
 
         sql(s"ALTER SCHEMA $testSchema DEFAULT COLLATION UTF8_LCASE")
         sql(s"ALTER VIEW $testView AS SELECT 'x' AS c1, 'y' AS c2")
-        assertTableColumnCollation(testView, "c1", "UTF8_LCASE")
-        assertTableColumnCollation(testView, "c2", "UTF8_LCASE")
+        assertTableColumnCollation(testView, "c1", "UTF8_BINARY")
+        assertTableColumnCollation(testView, "c2", "UTF8_BINARY")
       }
     }
   }
diff --git 
a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/AlterViewAsSuite.scala
 
b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/AlterViewAsSuite.scala
index 2ed112c3b22c..a3a023ec4d5c 100644
--- 
a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/AlterViewAsSuite.scala
+++ 
b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/AlterViewAsSuite.scala
@@ -25,12 +25,11 @@ import org.apache.spark.sql.types.StringType
 
 class AlterViewAsSuite extends command.AlterViewAsSuiteBase with 
ViewCommandSuiteBase {
 
-  test("V2: ALTER VIEW AS picks up the namespace's default collation when the 
existing view " +
-      "has none") {
-    // Create the namespace with no default collation; create a view in it 
(PROP_COLLATION
-    // unset). Then set the namespace default and ALTER VIEW AS -- the new 
ViewInfo must end
-    // up with PROP_COLLATION = UTF8_LCASE (so v1Table.toCatalogTable's 
`collation` field is
-    // set, and view-read time picks up UTF8_LCASE via 
AnalysisContext.collation).
+  test("V2: ALTER NAMESPACE DEFAULT COLLATION does not retroactively change a 
view's collation") {
+    // A view created in a namespace without a default collation keeps its 
creation-time
+    // collation behavior. A later `ALTER NAMESPACE ... DEFAULT COLLATION` 
followed by
+    // `ALTER VIEW AS` does not fold the new namespace default into the view 
-- `PROP_COLLATION`
+    // stays at its create-time value (empty here) and the body literals stay 
UTF8_BINARY.
     withSQLConf(SQLConf.SCHEMA_LEVEL_COLLATIONS_ENABLED.key -> "true") {
       val viewName = "v2_alter_collation_inherit"
       val view = s"$catalog.$namespace.$viewName"
@@ -45,11 +44,35 @@ class AlterViewAsSuite extends command.AlterViewAsSuiteBase 
with ViewCommandSuit
       sql(s"ALTER VIEW $view AS SELECT 'x' AS c1, 'y' AS c2")
 
       val stored = viewCatalog.getStoredView(Array(namespace), viewName)
-      assert(stored.properties().get(TableCatalog.PROP_COLLATION) == 
"UTF8_LCASE")
-      // Read-time the view body's literal types reflect the inherited 
collation.
+      
assert(Option(stored.properties().get(TableCatalog.PROP_COLLATION)).isEmpty)
       val df = spark.table(view)
-      assert(df.schema("c1").dataType === StringType("UTF8_LCASE"))
-      assert(df.schema("c2").dataType === StringType("UTF8_LCASE"))
+      assert(df.schema("c1").dataType === StringType)
+      assert(df.schema("c2").dataType === StringType)
+    }
+  }
+
+  test("V2: ALTER VIEW after ALTER NAMESPACE DEFAULT COLLATION keeps the 
inherited collation") {
+    withSQLConf(SQLConf.SCHEMA_LEVEL_COLLATIONS_ENABLED.key -> "true") {
+      val ns = "ns"
+      val viewName = "v2"
+      val view = s"$catalog.$ns.$viewName"
+      withNamespace(s"$catalog.$ns") {
+        sql(s"CREATE NAMESPACE $catalog.$ns DEFAULT COLLATION UTF8_LCASE")
+        sql(s"CREATE VIEW $view AS SELECT 'a' AS c1")
+        assert(viewCatalog
+          .getStoredView(Array(ns), viewName)
+          .properties()
+          .get(TableCatalog.PROP_COLLATION) == "UTF8_LCASE")
+
+        sql(s"ALTER NAMESPACE $catalog.$ns DEFAULT COLLATION UNICODE")
+        sql(s"ALTER VIEW $view AS SELECT 'x' AS c1, 'y' AS c2")
+
+        val stored = viewCatalog.getStoredView(Array(ns), viewName)
+        assert(stored.properties().get(TableCatalog.PROP_COLLATION) == 
"UTF8_LCASE")
+        val df = spark.table(view)
+        assert(df.schema("c1").dataType === StringType("UTF8_LCASE"))
+        assert(df.schema("c2").dataType === StringType("UTF8_LCASE"))
+      }
     }
   }
 


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to