This is an automated email from the ASF dual-hosted git repository.

wzhou pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git

commit 47c71bbb32d34d4583856af227206934b6f15136
Author: LPL <[email protected]>
AuthorDate: Thu Jan 12 19:23:53 2023 +0800

    IMPALA-11798: Property 'external.table.purge' should not be ignored when 
CREATE Iceberg tables
    
    Table property 'external.table.purge' should not be ignored when
    creating Iceberg tables, except that when 'iceberg.catalog' is not the
    Hive Catalog for managed tables, because we need to call
    'org.apache.hadoop.hive.metastore.IMetaStoreClient#createTable' and HMS
    will override 'external.table.purge' to 'TRUE'.
    
    Testing:
     * existing tests
     * add e2e tests
    
    Change-Id: I2649dd38fbe050044817d6c425ef447245aa2829
    Reviewed-on: http://gerrit.cloudera.org:8080/19416
    Reviewed-by: Impala Public Jenkins <[email protected]>
    Tested-by: Impala Public Jenkins <[email protected]>
---
 .../apache/impala/analysis/CreateTableStmt.java    | 11 +++-
 .../main/java/org/apache/impala/catalog/Table.java |  1 +
 .../impala/catalog/iceberg/IcebergCatalogs.java    |  9 ++-
 .../impala/catalog/iceberg/IcebergHiveCatalog.java | 10 ++--
 .../java/org/apache/impala/util/IcebergUtil.java   | 15 +++--
 .../queries/QueryTest/iceberg-create.test          | 67 +++++++++++++++++++---
 6 files changed, 90 insertions(+), 23 deletions(-)

diff --git a/fe/src/main/java/org/apache/impala/analysis/CreateTableStmt.java 
b/fe/src/main/java/org/apache/impala/analysis/CreateTableStmt.java
index 0c36b8eb9..ad72039bd 100644
--- a/fe/src/main/java/org/apache/impala/analysis/CreateTableStmt.java
+++ b/fe/src/main/java/org/apache/impala/analysis/CreateTableStmt.java
@@ -37,9 +37,7 @@ import org.apache.impala.common.ImpalaRuntimeException;
 import org.apache.impala.common.RuntimeEnv;
 import org.apache.impala.service.BackendConfig;
 import org.apache.impala.thrift.TBucketInfo;
-import org.apache.impala.thrift.TCompressionCodec;
 import org.apache.impala.thrift.TCreateTableParams;
-import org.apache.impala.thrift.THdfsCompression;
 import org.apache.impala.thrift.THdfsFileFormat;
 import org.apache.impala.thrift.TIcebergCatalog;
 import org.apache.impala.thrift.TIcebergFileFormat;
@@ -60,7 +58,6 @@ import com.google.common.base.Strings;
 import com.google.common.collect.Lists;
 import com.google.common.collect.Maps;
 import com.google.common.primitives.Ints;
-import com.google.common.primitives.Longs;
 
 /**
  * Represents a CREATE TABLE statement.
@@ -768,6 +765,14 @@ public class CreateTableStmt extends StatementBase {
       default: throw new AnalysisException(String.format(
           "Unknown Iceberg catalog type: %s", catalog));
     }
+    // HMS will override 'external.table.purge' to 'TRUE' When 
'iceberg.catalog' is not
+    // the Hive Catalog for managed tables.
+    if (!isExternal() && !IcebergUtil.isHiveCatalog(getTblProperties())
+        && "false".equalsIgnoreCase(getTblProperties().get(
+            Table.TBL_PROP_EXTERNAL_TABLE_PURGE))) {
+      analyzer_.addWarning("The table property 'external.table.purge' will be 
set "
+          + "to 'TRUE' on newly created managed Iceberg tables.");
+    }
   }
 
   private void validateTableInHiveCatalog() throws AnalysisException {
diff --git a/fe/src/main/java/org/apache/impala/catalog/Table.java 
b/fe/src/main/java/org/apache/impala/catalog/Table.java
index 3260e31e8..aaad451df 100644
--- a/fe/src/main/java/org/apache/impala/catalog/Table.java
+++ b/fe/src/main/java/org/apache/impala/catalog/Table.java
@@ -188,6 +188,7 @@ public abstract class Table extends CatalogObjectImpl 
implements FeTable {
 
   // Table property key to determined if HMS translated a managed table to 
external table
   public static final String TBL_PROP_EXTERNAL_TABLE_PURGE = 
"external.table.purge";
+  public static final String TBL_PROP_EXTERNAL_TABLE_PURGE_DEFAULT = "TRUE";
 
   // this field represents the last event id in metastore upto which this 
table is
   // synced. It is used if the flag sync_to_latest_event_on_ddls is set to 
true.
diff --git 
a/fe/src/main/java/org/apache/impala/catalog/iceberg/IcebergCatalogs.java 
b/fe/src/main/java/org/apache/impala/catalog/iceberg/IcebergCatalogs.java
index 488cf348c..e4fa00284 100644
--- a/fe/src/main/java/org/apache/impala/catalog/iceberg/IcebergCatalogs.java
+++ b/fe/src/main/java/org/apache/impala/catalog/iceberg/IcebergCatalogs.java
@@ -17,6 +17,9 @@
 
 package org.apache.impala.catalog.iceberg;
 
+import static org.apache.impala.catalog.Table.TBL_PROP_EXTERNAL_TABLE_PURGE;
+import static 
org.apache.impala.catalog.Table.TBL_PROP_EXTERNAL_TABLE_PURGE_DEFAULT;
+
 import java.util.Map;
 import java.util.Properties;
 
@@ -29,7 +32,6 @@ import org.apache.iceberg.Schema;
 import org.apache.iceberg.SchemaParser;
 import org.apache.iceberg.Table;
 import org.apache.iceberg.catalog.TableIdentifier;
-import org.apache.iceberg.exceptions.NoSuchTableException;
 import org.apache.iceberg.hadoop.ConfigProperties;
 import org.apache.iceberg.mr.Catalogs;
 import org.apache.iceberg.mr.InputFormatConfig;
@@ -55,7 +57,7 @@ public class IcebergCatalogs implements IcebergCatalog {
     return instance_;
   }
 
-  private Configuration configuration_;
+  private final Configuration configuration_;
 
   private IcebergCatalogs() {
     configuration_ = new HiveConf(IcebergCatalogs.class);
@@ -99,7 +101,8 @@ public class IcebergCatalogs implements IcebergCatalog {
     properties.setProperty(InputFormatConfig.TABLE_SCHEMA, 
SchemaParser.toJson(schema));
     properties.setProperty(InputFormatConfig.PARTITION_SPEC,
         PartitionSpecParser.toJson(spec));
-    properties.setProperty("external.table.purge", "TRUE");
+    properties.setProperty(TBL_PROP_EXTERNAL_TABLE_PURGE, 
tableProps.getOrDefault(
+        TBL_PROP_EXTERNAL_TABLE_PURGE, TBL_PROP_EXTERNAL_TABLE_PURGE_DEFAULT));
     return Catalogs.createTable(configuration_, properties);
   }
 
diff --git 
a/fe/src/main/java/org/apache/impala/catalog/iceberg/IcebergHiveCatalog.java 
b/fe/src/main/java/org/apache/impala/catalog/iceberg/IcebergHiveCatalog.java
index b4b5f4b32..d0263ca32 100644
--- a/fe/src/main/java/org/apache/impala/catalog/iceberg/IcebergHiveCatalog.java
+++ b/fe/src/main/java/org/apache/impala/catalog/iceberg/IcebergHiveCatalog.java
@@ -17,6 +17,9 @@
 
 package org.apache.impala.catalog.iceberg;
 
+import static org.apache.impala.catalog.Table.TBL_PROP_EXTERNAL_TABLE_PURGE;
+import static 
org.apache.impala.catalog.Table.TBL_PROP_EXTERNAL_TABLE_PURGE_DEFAULT;
+
 import java.util.Map;
 import java.util.HashMap;
 import org.apache.hadoop.hive.conf.HiveConf;
@@ -24,12 +27,10 @@ import org.apache.iceberg.PartitionSpec;
 import org.apache.iceberg.Schema;
 import org.apache.iceberg.Table;
 import org.apache.iceberg.catalog.TableIdentifier;
-import org.apache.iceberg.exceptions.NoSuchTableException;
 import org.apache.iceberg.hadoop.ConfigProperties;
 import org.apache.iceberg.hive.HiveCatalog;
 import org.apache.impala.catalog.FeIcebergTable;
 import org.apache.impala.catalog.TableLoadingException;
-import org.apache.impala.common.FileSystemUtil;
 import org.apache.impala.thrift.TIcebergCatalog;
 import org.apache.impala.util.IcebergUtil;
 
@@ -48,7 +49,7 @@ public class IcebergHiveCatalog implements IcebergCatalog {
     return instance_;
   }
 
-  private HiveCatalog hiveCatalog_;
+  private final HiveCatalog hiveCatalog_;
 
   private IcebergHiveCatalog() {
     setContextClassLoader();
@@ -67,7 +68,8 @@ public class IcebergHiveCatalog implements IcebergCatalog {
       PartitionSpec spec,
       String location,
       Map<String, String> properties) {
-    properties.put("external.table.purge", "TRUE");
+    properties.put(TBL_PROP_EXTERNAL_TABLE_PURGE, properties.getOrDefault(
+        TBL_PROP_EXTERNAL_TABLE_PURGE, TBL_PROP_EXTERNAL_TABLE_PURGE_DEFAULT));
     return hiveCatalog_.createTable(identifier, schema, spec, location, 
properties);
   }
 
diff --git a/fe/src/main/java/org/apache/impala/util/IcebergUtil.java 
b/fe/src/main/java/org/apache/impala/util/IcebergUtil.java
index 2c304b00c..5dbbf61f2 100644
--- a/fe/src/main/java/org/apache/impala/util/IcebergUtil.java
+++ b/fe/src/main/java/org/apache/impala/util/IcebergUtil.java
@@ -239,10 +239,14 @@ public class IcebergUtil {
    */
   public static boolean isHiveCatalog(
       org.apache.hadoop.hive.metastore.api.Table msTable) {
-    TIcebergCatalog tCat = getTIcebergCatalog(msTable);
+    return isHiveCatalog(msTable.getParameters());
+  }
+
+  public static boolean isHiveCatalog(Map<String, String> props) {
+    TIcebergCatalog tCat = getTIcebergCatalog(props);
     if (tCat == TIcebergCatalog.HIVE_CATALOG) return true;
     if (tCat == TIcebergCatalog.CATALOGS) {
-      String catName = 
msTable.getParameters().get(IcebergTable.ICEBERG_CATALOG);
+      String catName = props.get(IcebergTable.ICEBERG_CATALOG);
       tCat = IcebergCatalogs.getInstance().getUnderlyingCatalogType(catName);
       return tCat == TIcebergCatalog.HIVE_CATALOG;
     }
@@ -255,8 +259,11 @@ public class IcebergUtil {
    */
   public static TIcebergCatalog getTIcebergCatalog(
       org.apache.hadoop.hive.metastore.api.Table msTable) {
-    return getTIcebergCatalog(
-        msTable.getParameters().get(IcebergTable.ICEBERG_CATALOG));
+    return getTIcebergCatalog(msTable.getParameters());
+  }
+
+  public static TIcebergCatalog getTIcebergCatalog(Map<String, String> props) {
+    return getTIcebergCatalog(props.get(IcebergTable.ICEBERG_CATALOG));
   }
 
   /**
diff --git 
a/testdata/workloads/functional-query/queries/QueryTest/iceberg-create.test 
b/testdata/workloads/functional-query/queries/QueryTest/iceberg-create.test
index dc0c86ac0..c8187fd81 100644
--- a/testdata/workloads/functional-query/queries/QueryTest/iceberg-create.test
+++ b/testdata/workloads/functional-query/queries/QueryTest/iceberg-create.test
@@ -439,6 +439,9 @@ create table ice_part_hadoop_tbl (
 ====
 ---- QUERY
 describe formatted ice_part_hadoop_tbl;
+---- RESULTS: VERIFY_IS_NOT_IN
+'Num Buckets:        ','0                   ','NULL'
+'Bucket Columns:     ','[]                  ','NULL'
 ---- RESULTS: VERIFY_IS_SUBSET
 '','NULL','NULL'
 '# Partition Transform Information','NULL','NULL'
@@ -450,9 +453,7 @@ describe formatted ice_part_hadoop_tbl;
 'col_truncate','TRUNCATE[2]','NULL'
 'col_bucket','BUCKET[2]','NULL'
 'col_identity','IDENTITY','NULL'
----- RESULTS: VERIFY_IS_NOT_IN
-'Num Buckets:        ','0                   ','NULL'
-'Bucket Columns:     ','[]                  ','NULL'
+'','external.table.purge','TRUE                '
 ---- TYPES
 string, string, string
 ====
@@ -482,6 +483,9 @@ create table ice_part_hadoop_catalog (
 ====
 ---- QUERY
 describe formatted ice_part_hadoop_catalog;
+---- RESULTS: VERIFY_IS_NOT_IN
+'Num Buckets:        ','0                   ','NULL'
+'Bucket Columns:     ','[]                  ','NULL'
 ---- RESULTS: VERIFY_IS_SUBSET
 '','NULL','NULL'
 '# Partition Transform Information','NULL','NULL'
@@ -493,9 +497,7 @@ describe formatted ice_part_hadoop_catalog;
 'col_truncate','TRUNCATE[2]','NULL'
 'col_bucket','BUCKET[2]','NULL'
 'col_identity','IDENTITY','NULL'
----- RESULTS: VERIFY_IS_NOT_IN
-'Num Buckets:        ','0                   ','NULL'
-'Bucket Columns:     ','[]                  ','NULL'
+'','external.table.purge','TRUE                '
 ---- TYPES
 string, string, string
 ====
@@ -522,6 +524,9 @@ create table ice_part_catalogs (
 ====
 ---- QUERY
 describe formatted ice_part_catalogs;
+---- RESULTS: VERIFY_IS_NOT_IN
+'Num Buckets:        ','0                   ','NULL'
+'Bucket Columns:     ','[]                  ','NULL'
 ---- RESULTS: VERIFY_IS_SUBSET
 '','NULL','NULL'
 '# Partition Transform Information','NULL','NULL'
@@ -533,9 +538,7 @@ describe formatted ice_part_catalogs;
 'col_truncate','TRUNCATE[2]','NULL'
 'col_bucket','BUCKET[2]','NULL'
 'col_identity','IDENTITY','NULL'
----- RESULTS: VERIFY_IS_NOT_IN
-'Num Buckets:        ','0                   ','NULL'
-'Bucket Columns:     ','[]                  ','NULL'
+'','external.table.purge','TRUE                '
 ---- TYPES
 string, string, string
 ====
@@ -551,6 +554,8 @@ describe formatted ice_non_partition;
 '# col_name            ','transform_type      ','NULL'
 'Num Buckets:        ','0                   ','NULL'
 'Bucket Columns:     ','[]                  ','NULL'
+---- RESULTS: VERIFY_IS_SUBSET
+'','external.table.purge','TRUE                '
 ---- TYPES
 string, string, string
 ====
@@ -566,4 +571,48 @@ describe formatted iceberg_stored_by;
 '','table_type          ','ICEBERG             '
 ---- TYPES
 string, string, string
+====
+---- QUERY
+create table ice_tbl (i int) stored as iceberg 
tblproperties('external.table.purge'='FALSE');
+---- RESULTS
+'Table has been created.'
+====
+---- QUERY
+describe formatted ice_tbl;
+---- RESULTS: VERIFY_IS_SUBSET
+'','external.table.purge','FALSE               '
+---- TYPES
+string, string, string
+====
+---- QUERY
+create table ice_hive_cat_tbl (i int) stored as iceberg tblproperties(
+  'iceberg.catalog' = 'ice_hive_cat',
+  'external.table.purge' = 'FALSE'
+);
+---- RESULTS
+'Table has been created.'
+====
+---- QUERY
+describe formatted ice_hive_cat_tbl;
+---- RESULTS: VERIFY_IS_SUBSET
+'','external.table.purge','FALSE               '
+---- TYPES
+string, string, string
+====
+---- QUERY
+create table ice_hadoop_tbl (i int) stored as iceberg tblproperties(
+  'iceberg.catalog' = 'hadoop.tables',
+  'external.table.purge' = 'FALSE'
+);
+---- ERRORS
+The table property 'external.table.purge' will be set to 'TRUE' on newly 
created managed Iceberg tables.
+---- RESULTS
+'Table has been created.'
+====
+---- QUERY
+describe formatted ice_hadoop_tbl;
+---- RESULTS: VERIFY_IS_SUBSET
+'','external.table.purge','TRUE                '
+---- TYPES
+string, string, string
 ====
\ No newline at end of file

Reply via email to