This is an automated email from the ASF dual-hosted git repository. wzhou pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/impala.git
commit 47c71bbb32d34d4583856af227206934b6f15136 Author: LPL <[email protected]> AuthorDate: Thu Jan 12 19:23:53 2023 +0800 IMPALA-11798: Property 'external.table.purge' should not be ignored when CREATE Iceberg tables Table property 'external.table.purge' should not be ignored when creating Iceberg tables, except that when 'iceberg.catalog' is not the Hive Catalog for managed tables, because we need to call 'org.apache.hadoop.hive.metastore.IMetaStoreClient#createTable' and HMS will override 'external.table.purge' to 'TRUE'. Testing: * existing tests * add e2e tests Change-Id: I2649dd38fbe050044817d6c425ef447245aa2829 Reviewed-on: http://gerrit.cloudera.org:8080/19416 Reviewed-by: Impala Public Jenkins <[email protected]> Tested-by: Impala Public Jenkins <[email protected]> --- .../apache/impala/analysis/CreateTableStmt.java | 11 +++- .../main/java/org/apache/impala/catalog/Table.java | 1 + .../impala/catalog/iceberg/IcebergCatalogs.java | 9 ++- .../impala/catalog/iceberg/IcebergHiveCatalog.java | 10 ++-- .../java/org/apache/impala/util/IcebergUtil.java | 15 +++-- .../queries/QueryTest/iceberg-create.test | 67 +++++++++++++++++++--- 6 files changed, 90 insertions(+), 23 deletions(-) diff --git a/fe/src/main/java/org/apache/impala/analysis/CreateTableStmt.java b/fe/src/main/java/org/apache/impala/analysis/CreateTableStmt.java index 0c36b8eb9..ad72039bd 100644 --- a/fe/src/main/java/org/apache/impala/analysis/CreateTableStmt.java +++ b/fe/src/main/java/org/apache/impala/analysis/CreateTableStmt.java @@ -37,9 +37,7 @@ import org.apache.impala.common.ImpalaRuntimeException; import org.apache.impala.common.RuntimeEnv; import org.apache.impala.service.BackendConfig; import org.apache.impala.thrift.TBucketInfo; -import org.apache.impala.thrift.TCompressionCodec; import org.apache.impala.thrift.TCreateTableParams; -import org.apache.impala.thrift.THdfsCompression; import org.apache.impala.thrift.THdfsFileFormat; import org.apache.impala.thrift.TIcebergCatalog; import org.apache.impala.thrift.TIcebergFileFormat; @@ -60,7 +58,6 @@ import com.google.common.base.Strings; import com.google.common.collect.Lists; import com.google.common.collect.Maps; import com.google.common.primitives.Ints; -import com.google.common.primitives.Longs; /** * Represents a CREATE TABLE statement. @@ -768,6 +765,14 @@ public class CreateTableStmt extends StatementBase { default: throw new AnalysisException(String.format( "Unknown Iceberg catalog type: %s", catalog)); } + // HMS will override 'external.table.purge' to 'TRUE' When 'iceberg.catalog' is not + // the Hive Catalog for managed tables. + if (!isExternal() && !IcebergUtil.isHiveCatalog(getTblProperties()) + && "false".equalsIgnoreCase(getTblProperties().get( + Table.TBL_PROP_EXTERNAL_TABLE_PURGE))) { + analyzer_.addWarning("The table property 'external.table.purge' will be set " + + "to 'TRUE' on newly created managed Iceberg tables."); + } } private void validateTableInHiveCatalog() throws AnalysisException { diff --git a/fe/src/main/java/org/apache/impala/catalog/Table.java b/fe/src/main/java/org/apache/impala/catalog/Table.java index 3260e31e8..aaad451df 100644 --- a/fe/src/main/java/org/apache/impala/catalog/Table.java +++ b/fe/src/main/java/org/apache/impala/catalog/Table.java @@ -188,6 +188,7 @@ public abstract class Table extends CatalogObjectImpl implements FeTable { // Table property key to determined if HMS translated a managed table to external table public static final String TBL_PROP_EXTERNAL_TABLE_PURGE = "external.table.purge"; + public static final String TBL_PROP_EXTERNAL_TABLE_PURGE_DEFAULT = "TRUE"; // this field represents the last event id in metastore upto which this table is // synced. It is used if the flag sync_to_latest_event_on_ddls is set to true. diff --git a/fe/src/main/java/org/apache/impala/catalog/iceberg/IcebergCatalogs.java b/fe/src/main/java/org/apache/impala/catalog/iceberg/IcebergCatalogs.java index 488cf348c..e4fa00284 100644 --- a/fe/src/main/java/org/apache/impala/catalog/iceberg/IcebergCatalogs.java +++ b/fe/src/main/java/org/apache/impala/catalog/iceberg/IcebergCatalogs.java @@ -17,6 +17,9 @@ package org.apache.impala.catalog.iceberg; +import static org.apache.impala.catalog.Table.TBL_PROP_EXTERNAL_TABLE_PURGE; +import static org.apache.impala.catalog.Table.TBL_PROP_EXTERNAL_TABLE_PURGE_DEFAULT; + import java.util.Map; import java.util.Properties; @@ -29,7 +32,6 @@ import org.apache.iceberg.Schema; import org.apache.iceberg.SchemaParser; import org.apache.iceberg.Table; import org.apache.iceberg.catalog.TableIdentifier; -import org.apache.iceberg.exceptions.NoSuchTableException; import org.apache.iceberg.hadoop.ConfigProperties; import org.apache.iceberg.mr.Catalogs; import org.apache.iceberg.mr.InputFormatConfig; @@ -55,7 +57,7 @@ public class IcebergCatalogs implements IcebergCatalog { return instance_; } - private Configuration configuration_; + private final Configuration configuration_; private IcebergCatalogs() { configuration_ = new HiveConf(IcebergCatalogs.class); @@ -99,7 +101,8 @@ public class IcebergCatalogs implements IcebergCatalog { properties.setProperty(InputFormatConfig.TABLE_SCHEMA, SchemaParser.toJson(schema)); properties.setProperty(InputFormatConfig.PARTITION_SPEC, PartitionSpecParser.toJson(spec)); - properties.setProperty("external.table.purge", "TRUE"); + properties.setProperty(TBL_PROP_EXTERNAL_TABLE_PURGE, tableProps.getOrDefault( + TBL_PROP_EXTERNAL_TABLE_PURGE, TBL_PROP_EXTERNAL_TABLE_PURGE_DEFAULT)); return Catalogs.createTable(configuration_, properties); } diff --git a/fe/src/main/java/org/apache/impala/catalog/iceberg/IcebergHiveCatalog.java b/fe/src/main/java/org/apache/impala/catalog/iceberg/IcebergHiveCatalog.java index b4b5f4b32..d0263ca32 100644 --- a/fe/src/main/java/org/apache/impala/catalog/iceberg/IcebergHiveCatalog.java +++ b/fe/src/main/java/org/apache/impala/catalog/iceberg/IcebergHiveCatalog.java @@ -17,6 +17,9 @@ package org.apache.impala.catalog.iceberg; +import static org.apache.impala.catalog.Table.TBL_PROP_EXTERNAL_TABLE_PURGE; +import static org.apache.impala.catalog.Table.TBL_PROP_EXTERNAL_TABLE_PURGE_DEFAULT; + import java.util.Map; import java.util.HashMap; import org.apache.hadoop.hive.conf.HiveConf; @@ -24,12 +27,10 @@ import org.apache.iceberg.PartitionSpec; import org.apache.iceberg.Schema; import org.apache.iceberg.Table; import org.apache.iceberg.catalog.TableIdentifier; -import org.apache.iceberg.exceptions.NoSuchTableException; import org.apache.iceberg.hadoop.ConfigProperties; import org.apache.iceberg.hive.HiveCatalog; import org.apache.impala.catalog.FeIcebergTable; import org.apache.impala.catalog.TableLoadingException; -import org.apache.impala.common.FileSystemUtil; import org.apache.impala.thrift.TIcebergCatalog; import org.apache.impala.util.IcebergUtil; @@ -48,7 +49,7 @@ public class IcebergHiveCatalog implements IcebergCatalog { return instance_; } - private HiveCatalog hiveCatalog_; + private final HiveCatalog hiveCatalog_; private IcebergHiveCatalog() { setContextClassLoader(); @@ -67,7 +68,8 @@ public class IcebergHiveCatalog implements IcebergCatalog { PartitionSpec spec, String location, Map<String, String> properties) { - properties.put("external.table.purge", "TRUE"); + properties.put(TBL_PROP_EXTERNAL_TABLE_PURGE, properties.getOrDefault( + TBL_PROP_EXTERNAL_TABLE_PURGE, TBL_PROP_EXTERNAL_TABLE_PURGE_DEFAULT)); return hiveCatalog_.createTable(identifier, schema, spec, location, properties); } diff --git a/fe/src/main/java/org/apache/impala/util/IcebergUtil.java b/fe/src/main/java/org/apache/impala/util/IcebergUtil.java index 2c304b00c..5dbbf61f2 100644 --- a/fe/src/main/java/org/apache/impala/util/IcebergUtil.java +++ b/fe/src/main/java/org/apache/impala/util/IcebergUtil.java @@ -239,10 +239,14 @@ public class IcebergUtil { */ public static boolean isHiveCatalog( org.apache.hadoop.hive.metastore.api.Table msTable) { - TIcebergCatalog tCat = getTIcebergCatalog(msTable); + return isHiveCatalog(msTable.getParameters()); + } + + public static boolean isHiveCatalog(Map<String, String> props) { + TIcebergCatalog tCat = getTIcebergCatalog(props); if (tCat == TIcebergCatalog.HIVE_CATALOG) return true; if (tCat == TIcebergCatalog.CATALOGS) { - String catName = msTable.getParameters().get(IcebergTable.ICEBERG_CATALOG); + String catName = props.get(IcebergTable.ICEBERG_CATALOG); tCat = IcebergCatalogs.getInstance().getUnderlyingCatalogType(catName); return tCat == TIcebergCatalog.HIVE_CATALOG; } @@ -255,8 +259,11 @@ public class IcebergUtil { */ public static TIcebergCatalog getTIcebergCatalog( org.apache.hadoop.hive.metastore.api.Table msTable) { - return getTIcebergCatalog( - msTable.getParameters().get(IcebergTable.ICEBERG_CATALOG)); + return getTIcebergCatalog(msTable.getParameters()); + } + + public static TIcebergCatalog getTIcebergCatalog(Map<String, String> props) { + return getTIcebergCatalog(props.get(IcebergTable.ICEBERG_CATALOG)); } /** diff --git a/testdata/workloads/functional-query/queries/QueryTest/iceberg-create.test b/testdata/workloads/functional-query/queries/QueryTest/iceberg-create.test index dc0c86ac0..c8187fd81 100644 --- a/testdata/workloads/functional-query/queries/QueryTest/iceberg-create.test +++ b/testdata/workloads/functional-query/queries/QueryTest/iceberg-create.test @@ -439,6 +439,9 @@ create table ice_part_hadoop_tbl ( ==== ---- QUERY describe formatted ice_part_hadoop_tbl; +---- RESULTS: VERIFY_IS_NOT_IN +'Num Buckets: ','0 ','NULL' +'Bucket Columns: ','[] ','NULL' ---- RESULTS: VERIFY_IS_SUBSET '','NULL','NULL' '# Partition Transform Information','NULL','NULL' @@ -450,9 +453,7 @@ describe formatted ice_part_hadoop_tbl; 'col_truncate','TRUNCATE[2]','NULL' 'col_bucket','BUCKET[2]','NULL' 'col_identity','IDENTITY','NULL' ----- RESULTS: VERIFY_IS_NOT_IN -'Num Buckets: ','0 ','NULL' -'Bucket Columns: ','[] ','NULL' +'','external.table.purge','TRUE ' ---- TYPES string, string, string ==== @@ -482,6 +483,9 @@ create table ice_part_hadoop_catalog ( ==== ---- QUERY describe formatted ice_part_hadoop_catalog; +---- RESULTS: VERIFY_IS_NOT_IN +'Num Buckets: ','0 ','NULL' +'Bucket Columns: ','[] ','NULL' ---- RESULTS: VERIFY_IS_SUBSET '','NULL','NULL' '# Partition Transform Information','NULL','NULL' @@ -493,9 +497,7 @@ describe formatted ice_part_hadoop_catalog; 'col_truncate','TRUNCATE[2]','NULL' 'col_bucket','BUCKET[2]','NULL' 'col_identity','IDENTITY','NULL' ----- RESULTS: VERIFY_IS_NOT_IN -'Num Buckets: ','0 ','NULL' -'Bucket Columns: ','[] ','NULL' +'','external.table.purge','TRUE ' ---- TYPES string, string, string ==== @@ -522,6 +524,9 @@ create table ice_part_catalogs ( ==== ---- QUERY describe formatted ice_part_catalogs; +---- RESULTS: VERIFY_IS_NOT_IN +'Num Buckets: ','0 ','NULL' +'Bucket Columns: ','[] ','NULL' ---- RESULTS: VERIFY_IS_SUBSET '','NULL','NULL' '# Partition Transform Information','NULL','NULL' @@ -533,9 +538,7 @@ describe formatted ice_part_catalogs; 'col_truncate','TRUNCATE[2]','NULL' 'col_bucket','BUCKET[2]','NULL' 'col_identity','IDENTITY','NULL' ----- RESULTS: VERIFY_IS_NOT_IN -'Num Buckets: ','0 ','NULL' -'Bucket Columns: ','[] ','NULL' +'','external.table.purge','TRUE ' ---- TYPES string, string, string ==== @@ -551,6 +554,8 @@ describe formatted ice_non_partition; '# col_name ','transform_type ','NULL' 'Num Buckets: ','0 ','NULL' 'Bucket Columns: ','[] ','NULL' +---- RESULTS: VERIFY_IS_SUBSET +'','external.table.purge','TRUE ' ---- TYPES string, string, string ==== @@ -566,4 +571,48 @@ describe formatted iceberg_stored_by; '','table_type ','ICEBERG ' ---- TYPES string, string, string +==== +---- QUERY +create table ice_tbl (i int) stored as iceberg tblproperties('external.table.purge'='FALSE'); +---- RESULTS +'Table has been created.' +==== +---- QUERY +describe formatted ice_tbl; +---- RESULTS: VERIFY_IS_SUBSET +'','external.table.purge','FALSE ' +---- TYPES +string, string, string +==== +---- QUERY +create table ice_hive_cat_tbl (i int) stored as iceberg tblproperties( + 'iceberg.catalog' = 'ice_hive_cat', + 'external.table.purge' = 'FALSE' +); +---- RESULTS +'Table has been created.' +==== +---- QUERY +describe formatted ice_hive_cat_tbl; +---- RESULTS: VERIFY_IS_SUBSET +'','external.table.purge','FALSE ' +---- TYPES +string, string, string +==== +---- QUERY +create table ice_hadoop_tbl (i int) stored as iceberg tblproperties( + 'iceberg.catalog' = 'hadoop.tables', + 'external.table.purge' = 'FALSE' +); +---- ERRORS +The table property 'external.table.purge' will be set to 'TRUE' on newly created managed Iceberg tables. +---- RESULTS +'Table has been created.' +==== +---- QUERY +describe formatted ice_hadoop_tbl; +---- RESULTS: VERIFY_IS_SUBSET +'','external.table.purge','TRUE ' +---- TYPES +string, string, string ==== \ No newline at end of file
