This is an automated email from the ASF dual-hosted git repository.

pan3793 pushed a commit to branch branch-4.0
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/branch-4.0 by this push:
     new b7a942e36844 [SPARK-56755][SQL] Fix SHOW CREATE TABLE for v2 table 
partitioned by bucket transform
b7a942e36844 is described below

commit b7a942e36844cf657142fd3229aa1e5ec2919902
Author: Cheng Pan <[email protected]>
AuthorDate: Sat May 9 15:13:17 2026 +0800

    [SPARK-56755][SQL] Fix SHOW CREATE TABLE for v2 table partitioned by bucket 
transform
    
    ### What changes were proposed in this pull request?
    
    In `ShowCreateTableExec`, transform `BucketTransform` to `CLUSTERED BY ... 
[SORTED BY ...] INTO n BUCKETS` only for v1 tables. For v2 tables, treat 
`BucketTransform` as a normal transform, preserve it in `PARTITIONED BY ...` 
clause.
    
    ### Why are the changes needed?
    
    `BucketTransform` is a specific case for v1 table, and it is restricted to 
have no more than one bucket transform. While such restrictions do not apply to 
v2 table, for example, `SHOW CREATE TABLE` output is incorrect and misleading 
for an iceberg table that is partitioned by two bucket transforms.
    
    ```
    spark-sql (default)> create table t1(id int, user_id int, item_id int, dt 
string) using iceberg partitioned by (bucket(4, user_id), bucket(2, item_id), 
dt);
    Time taken: 1.397 seconds
    spark-sql (default)> show create table t1;
    CREATE TABLE spark_catalog.default.t1 (
      id INT,
      user_id INT,
      item_id INT,
      dt STRING COLLATE UTF8_BINARY)
    USING iceberg
    PARTITIONED BY (dt)
    CLUSTERED BY (item_id)
    INTO 2 BUCKETS
    LOCATION 'hdfs://hadoop-master1.orb.local:8020/warehouse/t1'
    TBLPROPERTIES (
      'current-snapshot-id' = 'none',
      'format' = 'iceberg/parquet',
      'format-version' = '2',
      'write.parquet.compression-codec' = 'zstd')
    
    Time taken: 0.253 seconds, Fetched 1 row(s)
    ```
    
    ### Does this PR introduce _any_ user-facing change?
    
    Yes, `SHOW CREATE TABLE ...` correctly displays the `PARTITIONED BY` clause 
for v2 table that has bucket partition transform(s).
    
    ### How was this patch tested?
    
    New UT.
    
    ### Was this patch authored or co-authored using generative AI tooling?
    
    No.
    
    Closes #55718 from pan3793/SPARK-56755.
    
    Authored-by: Cheng Pan <[email protected]>
    Signed-off-by: Cheng Pan <[email protected]>
    (cherry picked from commit 6650e3f911f413640b5b205a336d72113d4bf4d7)
    Signed-off-by: Cheng Pan <[email protected]>
---
 .../datasources/v2/ShowCreateTableExec.scala       |  5 +++--
 .../command/v2/ShowCreateTableSuite.scala          | 23 +++++++++++++++++++---
 2 files changed, 23 insertions(+), 5 deletions(-)

diff --git 
a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ShowCreateTableExec.scala
 
b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ShowCreateTableExec.scala
index 4195560c5cc1..bbabe597151b 100644
--- 
a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ShowCreateTableExec.scala
+++ 
b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ShowCreateTableExec.scala
@@ -25,7 +25,7 @@ import org.apache.spark.sql.catalyst.analysis.ResolvedTable
 import org.apache.spark.sql.catalyst.catalog.BucketSpec
 import org.apache.spark.sql.catalyst.expressions.Attribute
 import org.apache.spark.sql.catalyst.util.{escapeSingleQuotedString, 
CharVarcharUtils}
-import org.apache.spark.sql.connector.catalog.{CatalogV2Util, Table, 
TableCatalog}
+import org.apache.spark.sql.connector.catalog.{CatalogV2Util, Table, 
TableCatalog, V1Table}
 import org.apache.spark.sql.connector.expressions.BucketTransform
 import org.apache.spark.sql.execution.LeafExecNode
 import org.apache.spark.unsafe.types.UTF8String
@@ -92,7 +92,8 @@ case class ShowCreateTableExec(
       val transforms = new ArrayBuffer[String]
       var bucketSpec = Option.empty[BucketSpec]
       table.partitioning.map {
-        case BucketTransform(numBuckets, col, sortCol) =>
+        case BucketTransform(numBuckets, col, sortCol) if 
table.isInstanceOf[V1Table] =>
+          require(bucketSpec.isEmpty, "V1Table can not define multiple bucket 
transforms")
           if (sortCol.isEmpty) {
             bucketSpec = Some(BucketSpec(numBuckets, 
col.map(_.fieldNames.mkString(".")), Nil))
           } else {
diff --git 
a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/ShowCreateTableSuite.scala
 
b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/ShowCreateTableSuite.scala
index f72127cbd1de..ba136c34a2f1 100644
--- 
a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/ShowCreateTableSuite.scala
+++ 
b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/ShowCreateTableSuite.scala
@@ -138,9 +138,26 @@ class ShowCreateTableSuite extends 
command.ShowCreateTableSuiteBase with Command
         "b STRING,",
         "ts TIMESTAMP)",
         defaultUsing,
-        "PARTITIONED BY (a, years(ts), months(ts), days(ts), hours(ts))",
-        "CLUSTERED BY (b)",
-        "INTO 16 BUCKETS"
+        "PARTITIONED BY (a, bucket(16, b), years(ts), months(ts), days(ts), 
hours(ts))"
+      ))
+    }
+  }
+
+  test("SPARK-56755: show create table[partitioned by multi bucket 
transforms]") {
+    withNamespaceAndTable(ns, table) { t =>
+      sql(
+        s"""
+           |CREATE TABLE $t (a INT, b STRING, ts TIMESTAMP) $defaultUsing
+           |PARTITIONED BY (bucket(4, a), bucket(8, b), years(ts))
+         """.stripMargin)
+      val showDDL = getShowCreateDDL(t, false)
+      assert(showDDL === Array(
+        s"CREATE TABLE $t (",
+        "a INT,",
+        "b STRING COLLATE UTF8_BINARY,",
+        "ts TIMESTAMP)",
+        defaultUsing,
+        "PARTITIONED BY (bucket(4, a), bucket(8, b), years(ts))"
       ))
     }
   }


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to