This is an automated email from the ASF dual-hosted git repository.

boroknagyz pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git


The following commit(s) were added to refs/heads/master by this push:
     new d2f866f  IMPALA-10935: Impala crashes on old Iceberg table property
d2f866f is described below

commit d2f866f9a17c2d71fb3e3e731a2dfcce68d336d9
Author: Zoltan Borok-Nagy <[email protected]>
AuthorDate: Tue Sep 28 17:45:43 2021 +0200

    IMPALA-10935: Impala crashes on old Iceberg table property
    
    With IMPALA-10627 we switched to use standard Iceberg table
    properties: https://iceberg.apache.org/configuration/
    
    E.g. we switched from 'iceberg.file_format' to 'write.format.default'.
    For backward compatibility we also support 'iceberg.file_format'. Though
    the support is not perfect as it causes a crash in some cases.
    
    Impala crashes when the following conditions met:
    * local catalog mode is being used
    * Iceberg table is being queried
    * the data file format is ORC
    * 'iceberg.file_format' is set instead of 'write.format.default' table
      property
    * Query is "select count(*) from t;"
    
    Impala wrongly assumes that PARQUET is being used and tries to apply the
    count star optimization. It is not implemented for the ORC scanner and
    causes it to crash.
    
    This patch fixes the wrong assumption. Also it fixes the HdfsOrcScanner,
    so it won't crash in release mode but raise an error.
    
    This patch also enables UNSETting the file format table property for
    Iceberg tables. This table property was already enabled for
    modifications (changing the value via SET TBLPROPERTIES).
    
    Testing:
     * added e2e test for the above conditions
    
    Change-Id: Iafd9baef1c124d7356a14ba24c571567629a5e50
    Reviewed-on: http://gerrit.cloudera.org:8080/17877
    Reviewed-by: Impala Public Jenkins <[email protected]>
    Tested-by: Impala Public Jenkins <[email protected]>
---
 be/src/exec/hdfs-orc-scanner.cc                    |  5 ++
 .../analysis/AlterTableUnSetTblProperties.java     |  1 -
 .../org/apache/impala/catalog/FeIcebergTable.java  | 17 -------
 .../org/apache/impala/catalog/IcebergTable.java    |  4 +-
 .../impala/catalog/iceberg/IcebergCtasTarget.java  |  2 +-
 .../impala/catalog/local/LocalFsPartition.java     |  5 +-
 .../impala/catalog/local/LocalIcebergTable.java    |  2 +-
 .../java/org/apache/impala/util/IcebergUtil.java   | 17 +++++++
 .../queries/QueryTest/iceberg-query.test           | 59 ++++++++++++++++++++++
 9 files changed, 87 insertions(+), 25 deletions(-)

diff --git a/be/src/exec/hdfs-orc-scanner.cc b/be/src/exec/hdfs-orc-scanner.cc
index 99a8fb9..b02908c 100644
--- a/be/src/exec/hdfs-orc-scanner.cc
+++ b/be/src/exec/hdfs-orc-scanner.cc
@@ -234,6 +234,11 @@ Status HdfsOrcScanner::Open(ScannerContext* context) {
     row_batches_need_validation_ = rows_valid == ValidWriteIdList::SOME;
   }
 
+  if (UNLIKELY(scan_node_->optimize_parquet_count_star())) {
+    DCHECK(false);
+    return Status("Internal ERROR: ORC scanner cannot optimize count star 
slot.");
+  }
+
   // Update 'row_reader_options_' based on the tuple descriptor so the ORC lib 
can skip
   // columns we don't need.
   RETURN_IF_ERROR(SelectColumns(*scan_node_->tuple_desc()));
diff --git 
a/fe/src/main/java/org/apache/impala/analysis/AlterTableUnSetTblProperties.java 
b/fe/src/main/java/org/apache/impala/analysis/AlterTableUnSetTblProperties.java
index cfdd199..4d1d28d 100644
--- 
a/fe/src/main/java/org/apache/impala/analysis/AlterTableUnSetTblProperties.java
+++ 
b/fe/src/main/java/org/apache/impala/analysis/AlterTableUnSetTblProperties.java
@@ -126,7 +126,6 @@ public class AlterTableUnSetTblProperties extends 
AlterTableStmt {
     propertyCheck(IcebergTable.ICEBERG_CATALOG_LOCATION, "Iceberg");
     propertyCheck(IcebergTable.ICEBERG_TABLE_IDENTIFIER, "Iceberg");
     propertyCheck(IcebergTable.METADATA_LOCATION, "Iceberg");
-    propertyCheck(IcebergTable.ICEBERG_FILE_FORMAT, "Iceberg");
   }
 
   private void propertyCheck(String property, String tableType) throws 
AnalysisException {
diff --git a/fe/src/main/java/org/apache/impala/catalog/FeIcebergTable.java 
b/fe/src/main/java/org/apache/impala/catalog/FeIcebergTable.java
index d2fb5fd..bb06002 100644
--- a/fe/src/main/java/org/apache/impala/catalog/FeIcebergTable.java
+++ b/fe/src/main/java/org/apache/impala/catalog/FeIcebergTable.java
@@ -317,23 +317,6 @@ public interface FeIcebergTable extends FeFsTable {
     }
 
     /**
-     * Get iceberg table file format from hms table properties
-     */
-    public static TIcebergFileFormat getIcebergFileFormat(
-        org.apache.hadoop.hive.metastore.api.Table msTable) {
-      TIcebergFileFormat fileFormat = null;
-      Map<String, String> params = msTable.getParameters();
-      if (params.containsKey(IcebergTable.ICEBERG_FILE_FORMAT)) {
-        fileFormat = IcebergUtil.getIcebergFileFormat(
-            params.get(IcebergTable.ICEBERG_FILE_FORMAT));
-      } else {
-        // Accept "iceberg.file_format" for backward compatibility.
-        fileFormat = 
IcebergUtil.getIcebergFileFormat(params.get("iceberg.file_format"));
-      }
-      return fileFormat == null ? TIcebergFileFormat.PARQUET : fileFormat;
-    }
-
-    /**
      * Get iceberg parquet compression codec from hms table properties
      */
     public static TCompressionCodec getIcebergParquetCompressionCodec(
diff --git a/fe/src/main/java/org/apache/impala/catalog/IcebergTable.java 
b/fe/src/main/java/org/apache/impala/catalog/IcebergTable.java
index 541690f..851cd5c 100644
--- a/fe/src/main/java/org/apache/impala/catalog/IcebergTable.java
+++ b/fe/src/main/java/org/apache/impala/catalog/IcebergTable.java
@@ -169,7 +169,7 @@ public class IcebergTable extends Table implements 
FeIcebergTable {
     super(msTable, db, name, owner);
     icebergTableLocation_ = msTable.getSd().getLocation();
     icebergCatalog_ = IcebergUtil.getTIcebergCatalog(msTable);
-    icebergFileFormat_ = Utils.getIcebergFileFormat(msTable);
+    icebergFileFormat_ = IcebergUtil.getIcebergFileFormat(msTable);
     icebergParquetCompressionCodec_ = 
Utils.getIcebergParquetCompressionCodec(msTable);
     icebergParquetRowGroupSize_ = Utils.getIcebergParquetRowGroupSize(msTable);
     icebergParquetPlainPageSize_ = 
Utils.getIcebergParquetPlainPageSize(msTable);
@@ -337,7 +337,7 @@ public class IcebergTable extends Table implements 
FeIcebergTable {
         loadSchemaFromIceberg(metadata);
         // Loading hdfs table after loaded schema from Iceberg,
         // in case we create external Iceberg table skipping column info in 
sql.
-        icebergFileFormat_ = Utils.getIcebergFileFormat(msTbl);
+        icebergFileFormat_ = IcebergUtil.getIcebergFileFormat(msTbl);
         icebergParquetCompressionCodec_ = 
Utils.getIcebergParquetCompressionCodec(msTbl);
         icebergParquetRowGroupSize_ = 
Utils.getIcebergParquetRowGroupSize(msTbl);
         icebergParquetPlainPageSize_ = 
Utils.getIcebergParquetPlainPageSize(msTbl);
diff --git 
a/fe/src/main/java/org/apache/impala/catalog/iceberg/IcebergCtasTarget.java 
b/fe/src/main/java/org/apache/impala/catalog/iceberg/IcebergCtasTarget.java
index 7279975..5e122e9 100644
--- a/fe/src/main/java/org/apache/impala/catalog/iceberg/IcebergCtasTarget.java
+++ b/fe/src/main/java/org/apache/impala/catalog/iceberg/IcebergCtasTarget.java
@@ -92,7 +92,7 @@ public class IcebergCtasTarget extends CtasTargetTable 
implements FeIcebergTable
     createPartitionSpec(partSpec);
     icebergCatalog_ = IcebergUtil.getTIcebergCatalog(msTbl);
     setLocations();
-    icebergFileFormat_ = Utils.getIcebergFileFormat(msTbl);
+    icebergFileFormat_ = IcebergUtil.getIcebergFileFormat(msTbl);
     icebergParquetCompressionCodec_ = 
Utils.getIcebergParquetCompressionCodec(msTbl);
     icebergParquetRowGroupSize_ = Utils.getIcebergParquetRowGroupSize(msTbl);
     icebergParquetPlainPageSize_ = Utils.getIcebergParquetPlainPageSize(msTbl);
diff --git 
a/fe/src/main/java/org/apache/impala/catalog/local/LocalFsPartition.java 
b/fe/src/main/java/org/apache/impala/catalog/local/LocalFsPartition.java
index 9ec78ec..c443455 100644
--- a/fe/src/main/java/org/apache/impala/catalog/local/LocalFsPartition.java
+++ b/fe/src/main/java/org/apache/impala/catalog/local/LocalFsPartition.java
@@ -202,9 +202,8 @@ public class LocalFsPartition implements FeFsPartition {
   public HdfsFileFormat getFileFormat() {
     HdfsFileFormat format = getInputFormatDescriptor().getFileFormat();
     if (format == HdfsFileFormat.ICEBERG) {
-      String format_str = table_.getMetaStoreTable().getParameters().get(
-          IcebergTable.ICEBERG_FILE_FORMAT);
-      return IcebergUtil.toHdfsFileFormat(format_str);
+      return IcebergUtil.toHdfsFileFormat(
+          IcebergUtil.getIcebergFileFormat(table_.getMetaStoreTable()));
     }
     return format;
   }
diff --git 
a/fe/src/main/java/org/apache/impala/catalog/local/LocalIcebergTable.java 
b/fe/src/main/java/org/apache/impala/catalog/local/LocalIcebergTable.java
index 9f7b83a..4db65d6 100644
--- a/fe/src/main/java/org/apache/impala/catalog/local/LocalIcebergTable.java
+++ b/fe/src/main/java/org/apache/impala/catalog/local/LocalIcebergTable.java
@@ -113,7 +113,7 @@ public class LocalIcebergTable extends LocalTable 
implements FeIcebergTable {
           "Failed to load table: %s.%s", msTable.getDbName(), 
msTable.getTableName()),
           (Exception)e);
     }
-    icebergFileFormat_ = Utils.getIcebergFileFormat(msTable);
+    icebergFileFormat_ = IcebergUtil.getIcebergFileFormat(msTable);
     icebergParquetCompressionCodec_ = 
Utils.getIcebergParquetCompressionCodec(msTable);
     icebergParquetRowGroupSize_ = Utils.getIcebergParquetRowGroupSize(msTable);
     icebergParquetPlainPageSize_ = 
Utils.getIcebergParquetPlainPageSize(msTable);
diff --git a/fe/src/main/java/org/apache/impala/util/IcebergUtil.java 
b/fe/src/main/java/org/apache/impala/util/IcebergUtil.java
index 27dd9ce..8bca3a6 100644
--- a/fe/src/main/java/org/apache/impala/util/IcebergUtil.java
+++ b/fe/src/main/java/org/apache/impala/util/IcebergUtil.java
@@ -303,6 +303,23 @@ public class IcebergUtil {
   }
 
   /**
+   * Get iceberg table file format from hms table properties
+   */
+  public static TIcebergFileFormat getIcebergFileFormat(
+      org.apache.hadoop.hive.metastore.api.Table msTable) {
+    TIcebergFileFormat fileFormat = null;
+    Map<String, String> params = msTable.getParameters();
+    if (params.containsKey(IcebergTable.ICEBERG_FILE_FORMAT)) {
+      fileFormat = IcebergUtil.getIcebergFileFormat(
+          params.get(IcebergTable.ICEBERG_FILE_FORMAT));
+    } else {
+      // Accept "iceberg.file_format" for backward compatibility.
+      fileFormat = 
IcebergUtil.getIcebergFileFormat(params.get("iceberg.file_format"));
+    }
+    return fileFormat == null ? TIcebergFileFormat.PARQUET : fileFormat;
+  }
+
+  /**
    * Get TIcebergFileFormat from a string, usually from table properties.
    * Returns PARQUET when 'format' is null. Returns null for invalid formats.
    */
diff --git 
a/testdata/workloads/functional-query/queries/QueryTest/iceberg-query.test 
b/testdata/workloads/functional-query/queries/QueryTest/iceberg-query.test
index 137904e..85645d4 100644
--- a/testdata/workloads/functional-query/queries/QueryTest/iceberg-query.test
+++ b/testdata/workloads/functional-query/queries/QueryTest/iceberg-query.test
@@ -405,6 +405,65 @@ describe formatted iceberg_partitioned_orc_external;
 string, string, string
 ====
 ---- QUERY
+CREATE EXTERNAL TABLE IF NOT EXISTS 
iceberg_partitioned_orc_external_old_fileformat
+STORED AS ICEBERG
+TBLPROPERTIES(
+  'iceberg.file_format'='orc',
+  'iceberg.catalog'='hadoop.catalog',
+  
'iceberg.catalog_location'='/test-warehouse/iceberg_test/hadoop_catalog/iceberg_partitioned_orc',
+  'iceberg.table_identifier'='functional_parquet.iceberg_partitioned_orc'
+);
+ALTER TABLE iceberg_partitioned_orc_external_old_fileformat
+UNSET TBLPROPERTIES IF EXISTS ('write.format.default');
+describe formatted iceberg_partitioned_orc_external_old_fileformat;
+---- RESULTS: VERIFY_IS_SUBSET
+'Location:           
','$NAMENODE/test-warehouse/iceberg_test/hadoop_catalog/iceberg_partitioned_orc/functional_parquet/iceberg_partitioned_orc','NULL'
+'','iceberg.catalog_location','/test-warehouse/iceberg_test/hadoop_catalog/iceberg_partitioned_orc'
+'','iceberg.table_identifier','functional_parquet.iceberg_partitioned_orc'
+'','iceberg.file_format','orc                 '
+'','iceberg.catalog     ','hadoop.catalog      '
+---- RESULTS: VERIFY_IS_NOT_IN
+'','write.format.default','orc                 '
+---- TYPES
+string, string, string
+====
+---- QUERY
+SELECT * FROM iceberg_partitioned_orc_external_old_fileformat;
+---- RESULTS
+7,'Lisa','download'
+16,'Lisa','download'
+13,'Alan','click'
+10,'Alan','click'
+19,'Alex','view'
+1,'Alex','view'
+4,'Alex','view'
+20,'Alex','view'
+14,'Lisa','download'
+5,'Lisa','download'
+15,'Alex','view'
+18,'Alan','click'
+9,'Alan','click'
+17,'Alex','view'
+12,'Alan','click'
+2,'Lisa','download'
+8,'Lisa','download'
+11,'Alex','view'
+6,'Alex','view'
+3,'Alan','click'
+---- TYPES
+INT, STRING, STRING
+====
+---- QUERY
+SELECT count(*) FROM iceberg_partitioned_orc_external_old_fileformat;
+---- RESULTS
+20
+---- TYPES
+BIGINT
+====
+---- QUERY
+DROP TABLE iceberg_partitioned_orc_external_old_fileformat;
+====
+---- QUERY
 SELECT count(*) from iceberg_resolution_test_external;
 ---- TYPES
 bigint

Reply via email to