This is an automated email from the ASF dual-hosted git repository. michaelsmith pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/impala.git
commit a5256e8418fcac2850be63ee3f88af06a46ec2c8 Author: Zoltan Borok-Nagy <[email protected]> AuthorDate: Fri Mar 13 17:27:01 2026 +0100 IMPALA-14836: Fix cloning Iceberg V3 tables Before this patch, cloning (CREATE TABLE LIKE) Iceberg V3 tables did not work properly: * it didn't preserve the format-version * hidden row lineage columns were added as regular columns This patch fixes the above issues in CatalogOpExecutor.createTableLike() Testing * e2e tests added Change-Id: Icd55f3e6e3659004f637ceb875c240ae315fb7cb Generated-by: Claude Sonnet 4.6 Reviewed-on: http://gerrit.cloudera.org:8080/24101 Reviewed-by: Impala Public Jenkins <[email protected]> Tested-by: Impala Public Jenkins <[email protected]> --- .../apache/impala/service/CatalogOpExecutor.java | 8 ++++- .../queries/QueryTest/iceberg-v3-basic.test | 42 ++++++++++++++++++++++ 2 files changed, 49 insertions(+), 1 deletion(-) diff --git a/fe/src/main/java/org/apache/impala/service/CatalogOpExecutor.java b/fe/src/main/java/org/apache/impala/service/CatalogOpExecutor.java index b563a2708..197356b7f 100644 --- a/fe/src/main/java/org/apache/impala/service/CatalogOpExecutor.java +++ b/fe/src/main/java/org/apache/impala/service/CatalogOpExecutor.java @@ -4650,6 +4650,10 @@ public class CatalogOpExecutor { tableProperties.remove(Catalogs.LOCATION); tableProperties.remove(IcebergTable.ICEBERG_CATALOG); tableProperties.remove(IcebergTable.ICEBERG_TABLE_IDENTIFIER); + // Explicitly preserve the format version, since it is a metadata-level field + // in Iceberg and may not be present in iceApiTable.properties(). + tableProperties.put(TableProperties.FORMAT_VERSION, + String.valueOf(srcIceTable.getFormatVersion())); // The table identifier of the new table will be 'database.table' TableIdentifier identifier = IcebergUtil @@ -4667,8 +4671,10 @@ public class CatalogOpExecutor { .put(IcebergTable.ICEBERG_TABLE_IDENTIFIER, identifier.toString()); tableProperties.put(IcebergTable.ICEBERG_TABLE_IDENTIFIER, identifier.toString()); } + // Use getColumnsInHiveOrder() to exclude hidden V3 row lineage columns + // from the schema of the new table. List<TColumn> columns = new ArrayList<>(); - for (Column col: srcIceTable.getColumns()) columns.add(col.toThrift()); + for (Column col: srcIceTable.getColumnsInHiveOrder()) columns.add(col.toThrift()); TIcebergPartitionSpec partitionSpec = srcIceTable.getDefaultPartitionSpec() .toThrift(); createIcebergTable(tbl, wantMinimalResult, response, catalogTimeline, diff --git a/testdata/workloads/functional-query/queries/QueryTest/iceberg-v3-basic.test b/testdata/workloads/functional-query/queries/QueryTest/iceberg-v3-basic.test index 5cf5bcb13..ff748d8f3 100644 --- a/testdata/workloads/functional-query/queries/QueryTest/iceberg-v3-basic.test +++ b/testdata/workloads/functional-query/queries/QueryTest/iceberg-v3-basic.test @@ -28,4 +28,46 @@ SELECT * FROM ice_v3; 4,'hive','new_value' ---- TYPES INT,STRING,STRING +==== +---- QUERY +# CREATE TABLE LIKE an Iceberg V3 table must preserve the format version and +# must not include the hidden row lineage columns (_file_row_id, +# _file_last_updated_sequence_number) as regular user columns. +# The cloned table must report format-version=3. +CREATE TABLE ice_v3_like LIKE ice_v3; +DESCRIBE FORMATTED ice_v3_like; +---- RESULTS: VERIFY_IS_SUBSET +'','format-version ','3 ' +---- TYPES +STRING,STRING,STRING +==== +---- QUERY +# The cloned table must expose only the user columns, not the hidden row +# lineage metadata columns. +DESCRIBE ice_v3_like; +---- RESULTS +'i','int','','true' +'s','string','','true' +'new_col','string','','true' +---- TYPES +STRING,STRING,STRING,STRING +==== +---- QUERY +# Data written into the cloned V3 table must be readable correctly. +INSERT INTO ice_v3_like VALUES (10, 'hello', 'world'); +SELECT * FROM ice_v3_like; +---- RESULTS +10,'hello','world' +---- TYPES +INT,STRING,STRING +==== +---- QUERY +# Verify that first-row-id is assigned to the V3 data file. +SELECT _file_row_id, ICEBERG__FIRST__ROW__ID, + _file_last_updated_sequence_number, ICEBERG__DATA__SEQUENCE__NUMBER, + * FROM ice_v3_like; +---- RESULTS +NULL,0,NULL,1,10,'hello','world' +---- TYPES +BIGINT,BIGINT,BIGINT,BIGINT,INT,STRING,STRING ==== \ No newline at end of file
