Re: [PR] Spark: Support CREATE TABLE LIKE with Spark [iceberg]

via GitHub Sun, 15 Mar 2026 21:44:26 -0700


Copilot commented on code in PR #14269:
URL: https://github.com/apache/iceberg/pull/14269#discussion_r2938116771



##########
spark/v4.0/spark-extensions/src/test/java/org/apache/iceberg/spark/extensions/TestTables.java:
##########
@@ -0,0 +1,158 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.iceberg.spark.extensions;
+
+import static org.assertj.core.api.Assertions.assertThat;
+
+import org.apache.iceberg.Parameter;
+import org.apache.iceberg.ParameterizedTestExtension;
+import org.apache.iceberg.Parameters;
+import org.apache.iceberg.PartitionSpec;
+import org.apache.iceberg.Schema;
+import org.apache.iceberg.Table;
+import org.apache.iceberg.spark.SparkCatalogConfig;
+import org.apache.iceberg.types.Types;
+import org.junit.jupiter.api.AfterEach;
+import org.junit.jupiter.api.BeforeEach;
+import org.junit.jupiter.api.TestTemplate;
+import org.junit.jupiter.api.extension.ExtendWith;
+
+@ExtendWith(ParameterizedTestExtension.class)
+public class TestTables extends ExtensionsTestBase {
+
+  @Parameter(index = 3)
+  private String srcTableName;
+
+  @BeforeEach
+  public void beforeEach() {
+    sql(
+        "CREATE TABLE %s (id bigint NOT NULL, data string) "
+            + "USING iceberg TBLPROPERTIES (p1=1, p2='a') PARTITIONED BY 
(truncate(id, 3))",
+        srcTableName);
+    sql("INSERT INTO %s VALUES (1, 'a'), (2, 'b'), (3, 'c'), (4, 'd'), (5, 
'e')", srcTableName);
+  }
+
+  @AfterEach
+  public void afterEach() {
+    sql("DROP TABLE IF EXISTS %s", srcTableName);
+    sql("DROP TABLE IF EXISTS %s", tableName);
+  }
+
+  @Parameters(name = "catalogName = {0}, implementation = {1}, config = {2}, 
srcTableName = {3}")
+  protected static Object[][] parameters() {
+    return new Object[][] {
+      {
+        SparkCatalogConfig.HIVE.catalogName(),
+        SparkCatalogConfig.HIVE.implementation(),
+        SparkCatalogConfig.HIVE.properties(),
+        SparkCatalogConfig.HIVE.catalogName() + ".default.source"
+      },
+      {
+        SparkCatalogConfig.HADOOP.catalogName(),
+        SparkCatalogConfig.HADOOP.implementation(),
+        SparkCatalogConfig.HADOOP.properties(),
+        SparkCatalogConfig.HADOOP.catalogName() + ".default.source"
+      },
+      {
+        SparkCatalogConfig.SPARK_SESSION.catalogName(),
+        SparkCatalogConfig.SPARK_SESSION.implementation(),
+        SparkCatalogConfig.SPARK_SESSION.properties(),
+        "default.source"
+      }
+    };
+  }
+
+  @TestTemplate
+  public void testNotPartitionedTable() {
+    sql("CREATE OR REPLACE TABLE %s (id bigint NOT NULL, data string) USING 
iceberg", srcTableName);
+    sql("INSERT INTO %s VALUES (1, 'a'), (2, 'b'), (3, 'c'), (4, 'd'), (5, 
'e')", srcTableName);
+    sql("CREATE TABLE %s LIKE %s", tableName, srcTableName);
+
+    Schema expectedSchema =
+        new Schema(
+            Types.NestedField.required(1, "id", Types.LongType.get()),
+            Types.NestedField.optional(2, "data", Types.StringType.get()));
+
+    Table table = validationCatalog.loadTable(tableIdent);
+
+    assertThat(table.schema().asStruct())
+        .as("Should have expected nullable schema")
+        .isEqualTo(expectedSchema.asStruct());
+    assertThat(table.spec().fields()).as("Should not be an 
partitioned").isEmpty();
+  }

Review Comment:
   Assertion message has grammatical typos: "expected nullable schema" (schema 
includes a required field) and "Should not be an partitioned". Updating these 
strings will make test failures easier to interpret.



##########
spark/v4.0/spark-extensions/src/test/java/org/apache/iceberg/spark/extensions/TestTables.java:
##########
@@ -0,0 +1,158 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.iceberg.spark.extensions;
+
+import static org.assertj.core.api.Assertions.assertThat;
+
+import org.apache.iceberg.Parameter;
+import org.apache.iceberg.ParameterizedTestExtension;
+import org.apache.iceberg.Parameters;
+import org.apache.iceberg.PartitionSpec;
+import org.apache.iceberg.Schema;
+import org.apache.iceberg.Table;
+import org.apache.iceberg.spark.SparkCatalogConfig;
+import org.apache.iceberg.types.Types;
+import org.junit.jupiter.api.AfterEach;
+import org.junit.jupiter.api.BeforeEach;
+import org.junit.jupiter.api.TestTemplate;
+import org.junit.jupiter.api.extension.ExtendWith;
+
+@ExtendWith(ParameterizedTestExtension.class)
+public class TestTables extends ExtensionsTestBase {
+
+  @Parameter(index = 3)
+  private String srcTableName;
+
+  @BeforeEach
+  public void beforeEach() {
+    sql(
+        "CREATE TABLE %s (id bigint NOT NULL, data string) "
+            + "USING iceberg TBLPROPERTIES (p1=1, p2='a') PARTITIONED BY 
(truncate(id, 3))",
+        srcTableName);
+    sql("INSERT INTO %s VALUES (1, 'a'), (2, 'b'), (3, 'c'), (4, 'd'), (5, 
'e')", srcTableName);
+  }
+
+  @AfterEach
+  public void afterEach() {
+    sql("DROP TABLE IF EXISTS %s", srcTableName);
+    sql("DROP TABLE IF EXISTS %s", tableName);
+  }
+
+  @Parameters(name = "catalogName = {0}, implementation = {1}, config = {2}, 
srcTableName = {3}")
+  protected static Object[][] parameters() {
+    return new Object[][] {
+      {
+        SparkCatalogConfig.HIVE.catalogName(),
+        SparkCatalogConfig.HIVE.implementation(),
+        SparkCatalogConfig.HIVE.properties(),
+        SparkCatalogConfig.HIVE.catalogName() + ".default.source"
+      },
+      {
+        SparkCatalogConfig.HADOOP.catalogName(),
+        SparkCatalogConfig.HADOOP.implementation(),
+        SparkCatalogConfig.HADOOP.properties(),
+        SparkCatalogConfig.HADOOP.catalogName() + ".default.source"
+      },
+      {
+        SparkCatalogConfig.SPARK_SESSION.catalogName(),
+        SparkCatalogConfig.SPARK_SESSION.implementation(),
+        SparkCatalogConfig.SPARK_SESSION.properties(),
+        "default.source"
+      }
+    };
+  }
+
+  @TestTemplate
+  public void testNotPartitionedTable() {
+    sql("CREATE OR REPLACE TABLE %s (id bigint NOT NULL, data string) USING 
iceberg", srcTableName);
+    sql("INSERT INTO %s VALUES (1, 'a'), (2, 'b'), (3, 'c'), (4, 'd'), (5, 
'e')", srcTableName);
+    sql("CREATE TABLE %s LIKE %s", tableName, srcTableName);
+
+    Schema expectedSchema =
+        new Schema(
+            Types.NestedField.required(1, "id", Types.LongType.get()),
+            Types.NestedField.optional(2, "data", Types.StringType.get()));
+
+    Table table = validationCatalog.loadTable(tableIdent);
+
+    assertThat(table.schema().asStruct())
+        .as("Should have expected nullable schema")
+        .isEqualTo(expectedSchema.asStruct());
+    assertThat(table.spec().fields()).as("Should not be an 
partitioned").isEmpty();
+  }
+
+  @TestTemplate
+  public void testPartitionedTable() {
+    sql("CREATE TABLE %s LIKE %s", tableName, srcTableName);
+
+    Schema expectedSchema =
+        new Schema(
+            Types.NestedField.required(1, "id", Types.LongType.get()),
+            Types.NestedField.optional(2, "data", Types.StringType.get()));
+
+    PartitionSpec expectedSpec = 
PartitionSpec.builderFor(expectedSchema).truncate("id", 3).build();
+
+    Table table = validationCatalog.loadTable(tableIdent);
+
+    assertThat(table.schema().asStruct())
+        .as("Should have expected nullable schema")
+        .isEqualTo(expectedSchema.asStruct());
+    assertThat(table.spec()).as("Should be partitioned by 
id").isEqualTo(expectedSpec);
+  }
+
+  @TestTemplate
+  public void testTableExists() {
+    sql("CREATE OR REPLACE TABLE %s (id bigint NOT NULL) USING iceberg", 
tableName);
+    sql("INSERT INTO %s VALUES (1)", tableName);
+    sql("CREATE TABLE IF NOT EXISTS %s LIKE %s", tableName, srcTableName);
+
+    Schema expectedSchema = new Schema(Types.NestedField.required(1, "id", 
Types.LongType.get()));
+
+    Table table = validationCatalog.loadTable(tableIdent);
+
+    assertThat(table.schema().asStruct())
+        .as("Should have expected nullable schema")
+        .isEqualTo(expectedSchema.asStruct());
+    assertThat(table.spec().fields()).as("Should not be an 
partitioned").isEmpty();

Review Comment:
   Assertion message "Should not be an partitioned" contains a grammatical 
error; consider changing it to "should not be partitioned" for clearer test 
output.
   



##########
spark/v4.0/spark-extensions/src/main/scala/org/apache/spark/sql/execution/datasources/v2/CreateV2TableLikeExec.scala:
##########
@@ -0,0 +1,80 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.spark.sql.execution.datasources.v2
+
+import org.apache.iceberg.PartitionSpec
+import org.apache.iceberg.Schema
+import org.apache.iceberg.SortDirection
+import org.apache.iceberg.SortOrder

Review Comment:
   `SortDirection` and `SortOrder` are imported but never used. This repo 
treats unused imports as compilation errors under Scala 2.13 (see 
baseline.gradle `-Wconf:cat=unused:error`), so the build will fail. Remove the 
unused imports or use them if sort order is intended to be copied.
   



##########
spark/v4.0/spark-extensions/src/main/scala/org/apache/spark/sql/catalyst/parser/extensions/IcebergSparkSqlExtensionsParser.scala:
##########
@@ -140,16 +140,22 @@ class IcebergSparkSqlExtensionsParser(delegate: 
ParserInterface)
       .replaceAll("`", "")
       .trim()
 
-    normalized.startsWith("alter table") && (normalized.contains("add 
partition field") ||
-      normalized.contains("drop partition field") ||
-      normalized.contains("replace partition field") ||
-      normalized.contains("write ordered by") ||
-      normalized.contains("write locally ordered by") ||
-      normalized.contains("write distributed by") ||
-      normalized.contains("write unordered") ||
-      normalized.contains("set identifier fields") ||
-      normalized.contains("drop identifier fields") ||
-      isSnapshotRefDdl(normalized))
+      isCreateTableLike(normalized) || (
+      normalized.startsWith("alter table") && (
+          normalized.contains("add partition field") ||

Review Comment:
   `isIcebergCommand` now routes any SQL starting with `create table` that 
contains ` like ` through the Iceberg extensions parser. For non-Iceberg 
catalogs/providers, this will produce a `CreateIcebergTableLike` logical plan 
that is *not* handled by Spark’s normal planner and will fail unless both 
identifiers resolve to `SparkCatalog`/`SparkSessionCatalog`. Consider narrowing 
the predicate (e.g., require `USING iceberg` or an Iceberg catalog prefix) so 
enabling Iceberg extensions doesn’t break non-Iceberg `CREATE TABLE ... LIKE 
...` statements.



##########
spark/v4.0/spark-extensions/src/main/antlr/org.apache.spark.sql.catalyst.parser.extensions/IcebergSqlExtensions.g4:
##########
@@ -66,7 +66,8 @@ singleStatement
     ;
 
 statement
-    : ALTER TABLE multipartIdentifier ADD PARTITION FIELD transform (AS 
name=identifier)?   #addPartitionField
+    : CREATE TABLE (IF NOT EXISTS)? multipartIdentifier LIKE 
multipartIdentifier (TBLPROPERTIES '(' tableProperty (',' tableProperty)* ')')? 
#createTableLike
+    | ALTER TABLE multipartIdentifier ADD PARTITION FIELD transform (AS 
name=identifier)?   #addPartitionField

Review Comment:
   The new `CREATE TABLE ... LIKE ...` grammar does not allow an optional 
`USING iceberg` clause (the syntax shown in the linked issue). As a result, 
`CREATE TABLE t LIKE s USING iceberg` will still fail to parse. Consider 
extending the rule to accept `USING iceberg` (and/or validate the provider is 
`iceberg`) to match Spark users’ expectations and the issue reproduction.



##########
docs/docs/spark-ddl.md:
##########
@@ -104,6 +102,16 @@ TBLPROPERTIES ('key'='value')
 AS SELECT ...
 ```
 
+## `CREATE TABLE ... LIKE ...`
+
+Iceberg supports `CREATE TABLE ... LIKE ...`.
+This command is available when using Iceberg [SQL 
extensions](spark-configuration.md#sql-extensions).
+
+```sql
+CREATE TABLE prod.db.new_table

Review Comment:
   Docs show `CREATE TABLE ... LIKE ...` without mentioning whether `USING 
iceberg` is supported (as in the linked issue) and without documenting the 
supported modifiers (`IF NOT EXISTS`, `TBLPROPERTIES (...)`). Please clarify 
the exact supported syntax/limitations here so users don’t try `... USING 
iceberg` and hit a parse error.
   



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Re: [PR] Spark: Support CREATE TABLE LIKE with Spark [iceberg]

Reply via email to