This is an automated email from the ASF dual-hosted git repository.
pan3793 pushed a commit to branch branch-4.0
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/branch-4.0 by this push:
new b7a942e36844 [SPARK-56755][SQL] Fix SHOW CREATE TABLE for v2 table
partitioned by bucket transform
b7a942e36844 is described below
commit b7a942e36844cf657142fd3229aa1e5ec2919902
Author: Cheng Pan <[email protected]>
AuthorDate: Sat May 9 15:13:17 2026 +0800
[SPARK-56755][SQL] Fix SHOW CREATE TABLE for v2 table partitioned by bucket
transform
### What changes were proposed in this pull request?
In `ShowCreateTableExec`, transform `BucketTransform` to `CLUSTERED BY ...
[SORTED BY ...] INTO n BUCKETS` only for v1 tables. For v2 tables, treat
`BucketTransform` as a normal transform, preserve it in `PARTITIONED BY ...`
clause.
### Why are the changes needed?
`BucketTransform` is a specific case for v1 table, and it is restricted to
have no more than one bucket transform. While such restrictions do not apply to
v2 table, for example, `SHOW CREATE TABLE` output is incorrect and misleading
for an iceberg table that is partitioned by two bucket transforms.
```
spark-sql (default)> create table t1(id int, user_id int, item_id int, dt
string) using iceberg partitioned by (bucket(4, user_id), bucket(2, item_id),
dt);
Time taken: 1.397 seconds
spark-sql (default)> show create table t1;
CREATE TABLE spark_catalog.default.t1 (
id INT,
user_id INT,
item_id INT,
dt STRING COLLATE UTF8_BINARY)
USING iceberg
PARTITIONED BY (dt)
CLUSTERED BY (item_id)
INTO 2 BUCKETS
LOCATION 'hdfs://hadoop-master1.orb.local:8020/warehouse/t1'
TBLPROPERTIES (
'current-snapshot-id' = 'none',
'format' = 'iceberg/parquet',
'format-version' = '2',
'write.parquet.compression-codec' = 'zstd')
Time taken: 0.253 seconds, Fetched 1 row(s)
```
### Does this PR introduce _any_ user-facing change?
Yes, `SHOW CREATE TABLE ...` correctly displays the `PARTITIONED BY` clause
for v2 table that has bucket partition transform(s).
### How was this patch tested?
New UT.
### Was this patch authored or co-authored using generative AI tooling?
No.
Closes #55718 from pan3793/SPARK-56755.
Authored-by: Cheng Pan <[email protected]>
Signed-off-by: Cheng Pan <[email protected]>
(cherry picked from commit 6650e3f911f413640b5b205a336d72113d4bf4d7)
Signed-off-by: Cheng Pan <[email protected]>
---
.../datasources/v2/ShowCreateTableExec.scala | 5 +++--
.../command/v2/ShowCreateTableSuite.scala | 23 +++++++++++++++++++---
2 files changed, 23 insertions(+), 5 deletions(-)
diff --git
a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ShowCreateTableExec.scala
b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ShowCreateTableExec.scala
index 4195560c5cc1..bbabe597151b 100644
---
a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ShowCreateTableExec.scala
+++
b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ShowCreateTableExec.scala
@@ -25,7 +25,7 @@ import org.apache.spark.sql.catalyst.analysis.ResolvedTable
import org.apache.spark.sql.catalyst.catalog.BucketSpec
import org.apache.spark.sql.catalyst.expressions.Attribute
import org.apache.spark.sql.catalyst.util.{escapeSingleQuotedString,
CharVarcharUtils}
-import org.apache.spark.sql.connector.catalog.{CatalogV2Util, Table,
TableCatalog}
+import org.apache.spark.sql.connector.catalog.{CatalogV2Util, Table,
TableCatalog, V1Table}
import org.apache.spark.sql.connector.expressions.BucketTransform
import org.apache.spark.sql.execution.LeafExecNode
import org.apache.spark.unsafe.types.UTF8String
@@ -92,7 +92,8 @@ case class ShowCreateTableExec(
val transforms = new ArrayBuffer[String]
var bucketSpec = Option.empty[BucketSpec]
table.partitioning.map {
- case BucketTransform(numBuckets, col, sortCol) =>
+ case BucketTransform(numBuckets, col, sortCol) if
table.isInstanceOf[V1Table] =>
+ require(bucketSpec.isEmpty, "V1Table can not define multiple bucket
transforms")
if (sortCol.isEmpty) {
bucketSpec = Some(BucketSpec(numBuckets,
col.map(_.fieldNames.mkString(".")), Nil))
} else {
diff --git
a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/ShowCreateTableSuite.scala
b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/ShowCreateTableSuite.scala
index f72127cbd1de..ba136c34a2f1 100644
---
a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/ShowCreateTableSuite.scala
+++
b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/ShowCreateTableSuite.scala
@@ -138,9 +138,26 @@ class ShowCreateTableSuite extends
command.ShowCreateTableSuiteBase with Command
"b STRING,",
"ts TIMESTAMP)",
defaultUsing,
- "PARTITIONED BY (a, years(ts), months(ts), days(ts), hours(ts))",
- "CLUSTERED BY (b)",
- "INTO 16 BUCKETS"
+ "PARTITIONED BY (a, bucket(16, b), years(ts), months(ts), days(ts),
hours(ts))"
+ ))
+ }
+ }
+
+ test("SPARK-56755: show create table[partitioned by multi bucket
transforms]") {
+ withNamespaceAndTable(ns, table) { t =>
+ sql(
+ s"""
+ |CREATE TABLE $t (a INT, b STRING, ts TIMESTAMP) $defaultUsing
+ |PARTITIONED BY (bucket(4, a), bucket(8, b), years(ts))
+ """.stripMargin)
+ val showDDL = getShowCreateDDL(t, false)
+ assert(showDDL === Array(
+ s"CREATE TABLE $t (",
+ "a INT,",
+ "b STRING COLLATE UTF8_BINARY,",
+ "ts TIMESTAMP)",
+ defaultUsing,
+ "PARTITIONED BY (bucket(4, a), bucket(8, b), years(ts))"
))
}
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]