This is an automated email from the ASF dual-hosted git repository.

stigahuang pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git

commit 579f5c67e0363e856738b160731f916e0ab24e3e
Author: Zoltan Borok-Nagy <[email protected]>
AuthorDate: Mon Nov 30 16:35:43 2020 +0100

    IMPALA-10364: Set the real location for external Iceberg tables stored in 
HadoopCatalog
    
    Impala tries to come up with the table location of external Iceberg
    tables stored in HadoopCatalog. The current method is not correct for
    tables that are nested under multiple namespaces.
    
    With this patch Imapala loads the Iceberg table and retrieves the
    location from it.
    
    Testing:
     * added e2e test in iceberg-create.test
    
    Change-Id: I04b75d219e095ce00b4c48f40b8dee872ba57b78
    Reviewed-on: http://gerrit.cloudera.org:8080/16795
    Reviewed-by: Impala Public Jenkins <[email protected]>
    Tested-by: Impala Public Jenkins <[email protected]>
---
 .../apache/impala/service/CatalogOpExecutor.java   | 12 +++++----
 .../java/org/apache/impala/util/IcebergUtil.java   |  2 +-
 .../queries/QueryTest/iceberg-create.test          | 29 ++++++++++++++++++++++
 3 files changed, 37 insertions(+), 6 deletions(-)

diff --git a/fe/src/main/java/org/apache/impala/service/CatalogOpExecutor.java 
b/fe/src/main/java/org/apache/impala/service/CatalogOpExecutor.java
index 8efc2c7..9fa0f9b 100644
--- a/fe/src/main/java/org/apache/impala/service/CatalogOpExecutor.java
+++ b/fe/src/main/java/org/apache/impala/service/CatalogOpExecutor.java
@@ -2735,13 +2735,15 @@ public class CatalogOpExecutor {
             } else {
               if (location == null) {
                 if (catalog == TIcebergCatalog.HADOOP_CATALOG) {
-                  // When creating external Iceberg table with 'hadoop.catalog'
-                  // We use catalog location and table identifier as location
+                  // When creating external Iceberg table with 
'hadoop.catalog' we load
+                  // the Iceberg table using catalog location and table 
identifier to get
+                  // the actual location of the table. This way we can also 
get the
+                  // correct location for tables stored in nested namespaces.
                   TableIdentifier identifier =
                       IcebergUtil.getIcebergTableIdentifier(newTable);
-                  newTable.getSd().setLocation(String.format("%s/%s/%s",
-                      IcebergUtil.getIcebergCatalogLocation(newTable),
-                      identifier.namespace().level(0), identifier.name()));
+                  newTable.getSd().setLocation(IcebergUtil.loadTable(
+                      TIcebergCatalog.HADOOP_CATALOG, identifier,
+                      
IcebergUtil.getIcebergCatalogLocation(newTable)).location());
                 } else {
                   addSummary(response,
                       "Location is necessary for external iceberg table.");
diff --git a/fe/src/main/java/org/apache/impala/util/IcebergUtil.java 
b/fe/src/main/java/org/apache/impala/util/IcebergUtil.java
index aad0775..8ec8492 100644
--- a/fe/src/main/java/org/apache/impala/util/IcebergUtil.java
+++ b/fe/src/main/java/org/apache/impala/util/IcebergUtil.java
@@ -103,7 +103,7 @@ public class IcebergUtil {
   /**
    * Helper method to load native Iceberg table.
    */
-  private static Table loadTable(TIcebergCatalog catalog, TableIdentifier 
tableId,
+  public static Table loadTable(TIcebergCatalog catalog, TableIdentifier 
tableId,
       String location) throws TableLoadingException {
     try {
       IcebergCatalog cat = getIcebergCatalog(catalog, location);
diff --git 
a/testdata/workloads/functional-query/queries/QueryTest/iceberg-create.test 
b/testdata/workloads/functional-query/queries/QueryTest/iceberg-create.test
index 12990ee..c4b8571 100644
--- a/testdata/workloads/functional-query/queries/QueryTest/iceberg-create.test
+++ b/testdata/workloads/functional-query/queries/QueryTest/iceberg-create.test
@@ -289,6 +289,35 @@ DESCRIBE FORMATTED iceberg_hadoop_cat_with_ident;
 string, string, string
 ====
 ---- QUERY
+INSERT INTO iceberg_hadoop_cat_with_ident values ("ice");
+====
+---- QUERY
+CREATE EXTERNAL TABLE iceberg_hadoop_cat_with_ident_ext
+STORED AS ICEBERG
+TBLPROPERTIES('iceberg.catalog'='hadoop.catalog',
+'iceberg.catalog_location'='/test-warehouse/$DATABASE/hadoop_catalog_test/iceberg_test',
+'iceberg.table_identifier'='org.db.tbl');
+---- RESULTS
+'Table has been created.'
+====
+---- QUERY
+DESCRIBE FORMATTED iceberg_hadoop_cat_with_ident_ext;
+---- RESULTS: VERIFY_IS_SUBSET
+'Location:           
','$NAMENODE/test-warehouse/$DATABASE/hadoop_catalog_test/iceberg_test/org/db/tbl','NULL'
+'','iceberg.catalog_location','/test-warehouse/$DATABASE/hadoop_catalog_test/iceberg_test'
+'','iceberg.file_format ','parquet             '
+'','iceberg.catalog     ','hadoop.catalog      '
+---- TYPES
+string, string, string
+====
+---- QUERY
+SELECT * FROM iceberg_hadoop_cat_with_ident_ext;
+---- RESULTS
+'ice'
+---- TYPES
+STRING
+====
+---- QUERY
 CREATE TABLE iceberg_hadoop_cat_with_orc(
   level STRING,
   event_time TIMESTAMP,

Reply via email to