Copilot commented on code in PR #14269: URL: https://github.com/apache/iceberg/pull/14269#discussion_r2938116771
########## spark/v4.0/spark-extensions/src/test/java/org/apache/iceberg/spark/extensions/TestTables.java: ########## @@ -0,0 +1,158 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.iceberg.spark.extensions; + +import static org.assertj.core.api.Assertions.assertThat; + +import org.apache.iceberg.Parameter; +import org.apache.iceberg.ParameterizedTestExtension; +import org.apache.iceberg.Parameters; +import org.apache.iceberg.PartitionSpec; +import org.apache.iceberg.Schema; +import org.apache.iceberg.Table; +import org.apache.iceberg.spark.SparkCatalogConfig; +import org.apache.iceberg.types.Types; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.TestTemplate; +import org.junit.jupiter.api.extension.ExtendWith; + +@ExtendWith(ParameterizedTestExtension.class) +public class TestTables extends ExtensionsTestBase { + + @Parameter(index = 3) + private String srcTableName; + + @BeforeEach + public void beforeEach() { + sql( + "CREATE TABLE %s (id bigint NOT NULL, data string) " + + "USING iceberg TBLPROPERTIES (p1=1, p2='a') PARTITIONED BY (truncate(id, 3))", + srcTableName); + sql("INSERT INTO %s VALUES (1, 'a'), (2, 'b'), (3, 'c'), (4, 'd'), (5, 'e')", srcTableName); + } + + @AfterEach + public void afterEach() { + sql("DROP TABLE IF EXISTS %s", srcTableName); + sql("DROP TABLE IF EXISTS %s", tableName); + } + + @Parameters(name = "catalogName = {0}, implementation = {1}, config = {2}, srcTableName = {3}") + protected static Object[][] parameters() { + return new Object[][] { + { + SparkCatalogConfig.HIVE.catalogName(), + SparkCatalogConfig.HIVE.implementation(), + SparkCatalogConfig.HIVE.properties(), + SparkCatalogConfig.HIVE.catalogName() + ".default.source" + }, + { + SparkCatalogConfig.HADOOP.catalogName(), + SparkCatalogConfig.HADOOP.implementation(), + SparkCatalogConfig.HADOOP.properties(), + SparkCatalogConfig.HADOOP.catalogName() + ".default.source" + }, + { + SparkCatalogConfig.SPARK_SESSION.catalogName(), + SparkCatalogConfig.SPARK_SESSION.implementation(), + SparkCatalogConfig.SPARK_SESSION.properties(), + "default.source" + } + }; + } + + @TestTemplate + public void testNotPartitionedTable() { + sql("CREATE OR REPLACE TABLE %s (id bigint NOT NULL, data string) USING iceberg", srcTableName); + sql("INSERT INTO %s VALUES (1, 'a'), (2, 'b'), (3, 'c'), (4, 'd'), (5, 'e')", srcTableName); + sql("CREATE TABLE %s LIKE %s", tableName, srcTableName); + + Schema expectedSchema = + new Schema( + Types.NestedField.required(1, "id", Types.LongType.get()), + Types.NestedField.optional(2, "data", Types.StringType.get())); + + Table table = validationCatalog.loadTable(tableIdent); + + assertThat(table.schema().asStruct()) + .as("Should have expected nullable schema") + .isEqualTo(expectedSchema.asStruct()); + assertThat(table.spec().fields()).as("Should not be an partitioned").isEmpty(); + } Review Comment: Assertion message has grammatical typos: "expected nullable schema" (schema includes a required field) and "Should not be an partitioned". Updating these strings will make test failures easier to interpret. ########## spark/v4.0/spark-extensions/src/test/java/org/apache/iceberg/spark/extensions/TestTables.java: ########## @@ -0,0 +1,158 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.iceberg.spark.extensions; + +import static org.assertj.core.api.Assertions.assertThat; + +import org.apache.iceberg.Parameter; +import org.apache.iceberg.ParameterizedTestExtension; +import org.apache.iceberg.Parameters; +import org.apache.iceberg.PartitionSpec; +import org.apache.iceberg.Schema; +import org.apache.iceberg.Table; +import org.apache.iceberg.spark.SparkCatalogConfig; +import org.apache.iceberg.types.Types; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.TestTemplate; +import org.junit.jupiter.api.extension.ExtendWith; + +@ExtendWith(ParameterizedTestExtension.class) +public class TestTables extends ExtensionsTestBase { + + @Parameter(index = 3) + private String srcTableName; + + @BeforeEach + public void beforeEach() { + sql( + "CREATE TABLE %s (id bigint NOT NULL, data string) " + + "USING iceberg TBLPROPERTIES (p1=1, p2='a') PARTITIONED BY (truncate(id, 3))", + srcTableName); + sql("INSERT INTO %s VALUES (1, 'a'), (2, 'b'), (3, 'c'), (4, 'd'), (5, 'e')", srcTableName); + } + + @AfterEach + public void afterEach() { + sql("DROP TABLE IF EXISTS %s", srcTableName); + sql("DROP TABLE IF EXISTS %s", tableName); + } + + @Parameters(name = "catalogName = {0}, implementation = {1}, config = {2}, srcTableName = {3}") + protected static Object[][] parameters() { + return new Object[][] { + { + SparkCatalogConfig.HIVE.catalogName(), + SparkCatalogConfig.HIVE.implementation(), + SparkCatalogConfig.HIVE.properties(), + SparkCatalogConfig.HIVE.catalogName() + ".default.source" + }, + { + SparkCatalogConfig.HADOOP.catalogName(), + SparkCatalogConfig.HADOOP.implementation(), + SparkCatalogConfig.HADOOP.properties(), + SparkCatalogConfig.HADOOP.catalogName() + ".default.source" + }, + { + SparkCatalogConfig.SPARK_SESSION.catalogName(), + SparkCatalogConfig.SPARK_SESSION.implementation(), + SparkCatalogConfig.SPARK_SESSION.properties(), + "default.source" + } + }; + } + + @TestTemplate + public void testNotPartitionedTable() { + sql("CREATE OR REPLACE TABLE %s (id bigint NOT NULL, data string) USING iceberg", srcTableName); + sql("INSERT INTO %s VALUES (1, 'a'), (2, 'b'), (3, 'c'), (4, 'd'), (5, 'e')", srcTableName); + sql("CREATE TABLE %s LIKE %s", tableName, srcTableName); + + Schema expectedSchema = + new Schema( + Types.NestedField.required(1, "id", Types.LongType.get()), + Types.NestedField.optional(2, "data", Types.StringType.get())); + + Table table = validationCatalog.loadTable(tableIdent); + + assertThat(table.schema().asStruct()) + .as("Should have expected nullable schema") + .isEqualTo(expectedSchema.asStruct()); + assertThat(table.spec().fields()).as("Should not be an partitioned").isEmpty(); + } + + @TestTemplate + public void testPartitionedTable() { + sql("CREATE TABLE %s LIKE %s", tableName, srcTableName); + + Schema expectedSchema = + new Schema( + Types.NestedField.required(1, "id", Types.LongType.get()), + Types.NestedField.optional(2, "data", Types.StringType.get())); + + PartitionSpec expectedSpec = PartitionSpec.builderFor(expectedSchema).truncate("id", 3).build(); + + Table table = validationCatalog.loadTable(tableIdent); + + assertThat(table.schema().asStruct()) + .as("Should have expected nullable schema") + .isEqualTo(expectedSchema.asStruct()); + assertThat(table.spec()).as("Should be partitioned by id").isEqualTo(expectedSpec); + } + + @TestTemplate + public void testTableExists() { + sql("CREATE OR REPLACE TABLE %s (id bigint NOT NULL) USING iceberg", tableName); + sql("INSERT INTO %s VALUES (1)", tableName); + sql("CREATE TABLE IF NOT EXISTS %s LIKE %s", tableName, srcTableName); + + Schema expectedSchema = new Schema(Types.NestedField.required(1, "id", Types.LongType.get())); + + Table table = validationCatalog.loadTable(tableIdent); + + assertThat(table.schema().asStruct()) + .as("Should have expected nullable schema") + .isEqualTo(expectedSchema.asStruct()); + assertThat(table.spec().fields()).as("Should not be an partitioned").isEmpty(); Review Comment: Assertion message "Should not be an partitioned" contains a grammatical error; consider changing it to "should not be partitioned" for clearer test output. ########## spark/v4.0/spark-extensions/src/main/scala/org/apache/spark/sql/execution/datasources/v2/CreateV2TableLikeExec.scala: ########## @@ -0,0 +1,80 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.spark.sql.execution.datasources.v2 + +import org.apache.iceberg.PartitionSpec +import org.apache.iceberg.Schema +import org.apache.iceberg.SortDirection +import org.apache.iceberg.SortOrder Review Comment: `SortDirection` and `SortOrder` are imported but never used. This repo treats unused imports as compilation errors under Scala 2.13 (see baseline.gradle `-Wconf:cat=unused:error`), so the build will fail. Remove the unused imports or use them if sort order is intended to be copied. ########## spark/v4.0/spark-extensions/src/main/scala/org/apache/spark/sql/catalyst/parser/extensions/IcebergSparkSqlExtensionsParser.scala: ########## @@ -140,16 +140,22 @@ class IcebergSparkSqlExtensionsParser(delegate: ParserInterface) .replaceAll("`", "") .trim() - normalized.startsWith("alter table") && (normalized.contains("add partition field") || - normalized.contains("drop partition field") || - normalized.contains("replace partition field") || - normalized.contains("write ordered by") || - normalized.contains("write locally ordered by") || - normalized.contains("write distributed by") || - normalized.contains("write unordered") || - normalized.contains("set identifier fields") || - normalized.contains("drop identifier fields") || - isSnapshotRefDdl(normalized)) + isCreateTableLike(normalized) || ( + normalized.startsWith("alter table") && ( + normalized.contains("add partition field") || Review Comment: `isIcebergCommand` now routes any SQL starting with `create table` that contains ` like ` through the Iceberg extensions parser. For non-Iceberg catalogs/providers, this will produce a `CreateIcebergTableLike` logical plan that is *not* handled by Spark’s normal planner and will fail unless both identifiers resolve to `SparkCatalog`/`SparkSessionCatalog`. Consider narrowing the predicate (e.g., require `USING iceberg` or an Iceberg catalog prefix) so enabling Iceberg extensions doesn’t break non-Iceberg `CREATE TABLE ... LIKE ...` statements. ########## spark/v4.0/spark-extensions/src/main/antlr/org.apache.spark.sql.catalyst.parser.extensions/IcebergSqlExtensions.g4: ########## @@ -66,7 +66,8 @@ singleStatement ; statement - : ALTER TABLE multipartIdentifier ADD PARTITION FIELD transform (AS name=identifier)? #addPartitionField + : CREATE TABLE (IF NOT EXISTS)? multipartIdentifier LIKE multipartIdentifier (TBLPROPERTIES '(' tableProperty (',' tableProperty)* ')')? #createTableLike + | ALTER TABLE multipartIdentifier ADD PARTITION FIELD transform (AS name=identifier)? #addPartitionField Review Comment: The new `CREATE TABLE ... LIKE ...` grammar does not allow an optional `USING iceberg` clause (the syntax shown in the linked issue). As a result, `CREATE TABLE t LIKE s USING iceberg` will still fail to parse. Consider extending the rule to accept `USING iceberg` (and/or validate the provider is `iceberg`) to match Spark users’ expectations and the issue reproduction. ########## docs/docs/spark-ddl.md: ########## @@ -104,6 +102,16 @@ TBLPROPERTIES ('key'='value') AS SELECT ... ``` +## `CREATE TABLE ... LIKE ...` + +Iceberg supports `CREATE TABLE ... LIKE ...`. +This command is available when using Iceberg [SQL extensions](spark-configuration.md#sql-extensions). + +```sql +CREATE TABLE prod.db.new_table Review Comment: Docs show `CREATE TABLE ... LIKE ...` without mentioning whether `USING iceberg` is supported (as in the linked issue) and without documenting the supported modifiers (`IF NOT EXISTS`, `TBLPROPERTIES (...)`). Please clarify the exact supported syntax/limitations here so users don’t try `... USING iceberg` and hit a parse error. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected] --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
