This is an automated email from the ASF dual-hosted git repository.
boroknagyz pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git
The following commit(s) were added to refs/heads/master by this push:
new d2f866f IMPALA-10935: Impala crashes on old Iceberg table property
d2f866f is described below
commit d2f866f9a17c2d71fb3e3e731a2dfcce68d336d9
Author: Zoltan Borok-Nagy <[email protected]>
AuthorDate: Tue Sep 28 17:45:43 2021 +0200
IMPALA-10935: Impala crashes on old Iceberg table property
With IMPALA-10627 we switched to use standard Iceberg table
properties: https://iceberg.apache.org/configuration/
E.g. we switched from 'iceberg.file_format' to 'write.format.default'.
For backward compatibility we also support 'iceberg.file_format'. Though
the support is not perfect as it causes a crash in some cases.
Impala crashes when the following conditions met:
* local catalog mode is being used
* Iceberg table is being queried
* the data file format is ORC
* 'iceberg.file_format' is set instead of 'write.format.default' table
property
* Query is "select count(*) from t;"
Impala wrongly assumes that PARQUET is being used and tries to apply the
count star optimization. It is not implemented for the ORC scanner and
causes it to crash.
This patch fixes the wrong assumption. Also it fixes the HdfsOrcScanner,
so it won't crash in release mode but raise an error.
This patch also enables UNSETting the file format table property for
Iceberg tables. This table property was already enabled for
modifications (changing the value via SET TBLPROPERTIES).
Testing:
* added e2e test for the above conditions
Change-Id: Iafd9baef1c124d7356a14ba24c571567629a5e50
Reviewed-on: http://gerrit.cloudera.org:8080/17877
Reviewed-by: Impala Public Jenkins <[email protected]>
Tested-by: Impala Public Jenkins <[email protected]>
---
be/src/exec/hdfs-orc-scanner.cc | 5 ++
.../analysis/AlterTableUnSetTblProperties.java | 1 -
.../org/apache/impala/catalog/FeIcebergTable.java | 17 -------
.../org/apache/impala/catalog/IcebergTable.java | 4 +-
.../impala/catalog/iceberg/IcebergCtasTarget.java | 2 +-
.../impala/catalog/local/LocalFsPartition.java | 5 +-
.../impala/catalog/local/LocalIcebergTable.java | 2 +-
.../java/org/apache/impala/util/IcebergUtil.java | 17 +++++++
.../queries/QueryTest/iceberg-query.test | 59 ++++++++++++++++++++++
9 files changed, 87 insertions(+), 25 deletions(-)
diff --git a/be/src/exec/hdfs-orc-scanner.cc b/be/src/exec/hdfs-orc-scanner.cc
index 99a8fb9..b02908c 100644
--- a/be/src/exec/hdfs-orc-scanner.cc
+++ b/be/src/exec/hdfs-orc-scanner.cc
@@ -234,6 +234,11 @@ Status HdfsOrcScanner::Open(ScannerContext* context) {
row_batches_need_validation_ = rows_valid == ValidWriteIdList::SOME;
}
+ if (UNLIKELY(scan_node_->optimize_parquet_count_star())) {
+ DCHECK(false);
+ return Status("Internal ERROR: ORC scanner cannot optimize count star
slot.");
+ }
+
// Update 'row_reader_options_' based on the tuple descriptor so the ORC lib
can skip
// columns we don't need.
RETURN_IF_ERROR(SelectColumns(*scan_node_->tuple_desc()));
diff --git
a/fe/src/main/java/org/apache/impala/analysis/AlterTableUnSetTblProperties.java
b/fe/src/main/java/org/apache/impala/analysis/AlterTableUnSetTblProperties.java
index cfdd199..4d1d28d 100644
---
a/fe/src/main/java/org/apache/impala/analysis/AlterTableUnSetTblProperties.java
+++
b/fe/src/main/java/org/apache/impala/analysis/AlterTableUnSetTblProperties.java
@@ -126,7 +126,6 @@ public class AlterTableUnSetTblProperties extends
AlterTableStmt {
propertyCheck(IcebergTable.ICEBERG_CATALOG_LOCATION, "Iceberg");
propertyCheck(IcebergTable.ICEBERG_TABLE_IDENTIFIER, "Iceberg");
propertyCheck(IcebergTable.METADATA_LOCATION, "Iceberg");
- propertyCheck(IcebergTable.ICEBERG_FILE_FORMAT, "Iceberg");
}
private void propertyCheck(String property, String tableType) throws
AnalysisException {
diff --git a/fe/src/main/java/org/apache/impala/catalog/FeIcebergTable.java
b/fe/src/main/java/org/apache/impala/catalog/FeIcebergTable.java
index d2fb5fd..bb06002 100644
--- a/fe/src/main/java/org/apache/impala/catalog/FeIcebergTable.java
+++ b/fe/src/main/java/org/apache/impala/catalog/FeIcebergTable.java
@@ -317,23 +317,6 @@ public interface FeIcebergTable extends FeFsTable {
}
/**
- * Get iceberg table file format from hms table properties
- */
- public static TIcebergFileFormat getIcebergFileFormat(
- org.apache.hadoop.hive.metastore.api.Table msTable) {
- TIcebergFileFormat fileFormat = null;
- Map<String, String> params = msTable.getParameters();
- if (params.containsKey(IcebergTable.ICEBERG_FILE_FORMAT)) {
- fileFormat = IcebergUtil.getIcebergFileFormat(
- params.get(IcebergTable.ICEBERG_FILE_FORMAT));
- } else {
- // Accept "iceberg.file_format" for backward compatibility.
- fileFormat =
IcebergUtil.getIcebergFileFormat(params.get("iceberg.file_format"));
- }
- return fileFormat == null ? TIcebergFileFormat.PARQUET : fileFormat;
- }
-
- /**
* Get iceberg parquet compression codec from hms table properties
*/
public static TCompressionCodec getIcebergParquetCompressionCodec(
diff --git a/fe/src/main/java/org/apache/impala/catalog/IcebergTable.java
b/fe/src/main/java/org/apache/impala/catalog/IcebergTable.java
index 541690f..851cd5c 100644
--- a/fe/src/main/java/org/apache/impala/catalog/IcebergTable.java
+++ b/fe/src/main/java/org/apache/impala/catalog/IcebergTable.java
@@ -169,7 +169,7 @@ public class IcebergTable extends Table implements
FeIcebergTable {
super(msTable, db, name, owner);
icebergTableLocation_ = msTable.getSd().getLocation();
icebergCatalog_ = IcebergUtil.getTIcebergCatalog(msTable);
- icebergFileFormat_ = Utils.getIcebergFileFormat(msTable);
+ icebergFileFormat_ = IcebergUtil.getIcebergFileFormat(msTable);
icebergParquetCompressionCodec_ =
Utils.getIcebergParquetCompressionCodec(msTable);
icebergParquetRowGroupSize_ = Utils.getIcebergParquetRowGroupSize(msTable);
icebergParquetPlainPageSize_ =
Utils.getIcebergParquetPlainPageSize(msTable);
@@ -337,7 +337,7 @@ public class IcebergTable extends Table implements
FeIcebergTable {
loadSchemaFromIceberg(metadata);
// Loading hdfs table after loaded schema from Iceberg,
// in case we create external Iceberg table skipping column info in
sql.
- icebergFileFormat_ = Utils.getIcebergFileFormat(msTbl);
+ icebergFileFormat_ = IcebergUtil.getIcebergFileFormat(msTbl);
icebergParquetCompressionCodec_ =
Utils.getIcebergParquetCompressionCodec(msTbl);
icebergParquetRowGroupSize_ =
Utils.getIcebergParquetRowGroupSize(msTbl);
icebergParquetPlainPageSize_ =
Utils.getIcebergParquetPlainPageSize(msTbl);
diff --git
a/fe/src/main/java/org/apache/impala/catalog/iceberg/IcebergCtasTarget.java
b/fe/src/main/java/org/apache/impala/catalog/iceberg/IcebergCtasTarget.java
index 7279975..5e122e9 100644
--- a/fe/src/main/java/org/apache/impala/catalog/iceberg/IcebergCtasTarget.java
+++ b/fe/src/main/java/org/apache/impala/catalog/iceberg/IcebergCtasTarget.java
@@ -92,7 +92,7 @@ public class IcebergCtasTarget extends CtasTargetTable
implements FeIcebergTable
createPartitionSpec(partSpec);
icebergCatalog_ = IcebergUtil.getTIcebergCatalog(msTbl);
setLocations();
- icebergFileFormat_ = Utils.getIcebergFileFormat(msTbl);
+ icebergFileFormat_ = IcebergUtil.getIcebergFileFormat(msTbl);
icebergParquetCompressionCodec_ =
Utils.getIcebergParquetCompressionCodec(msTbl);
icebergParquetRowGroupSize_ = Utils.getIcebergParquetRowGroupSize(msTbl);
icebergParquetPlainPageSize_ = Utils.getIcebergParquetPlainPageSize(msTbl);
diff --git
a/fe/src/main/java/org/apache/impala/catalog/local/LocalFsPartition.java
b/fe/src/main/java/org/apache/impala/catalog/local/LocalFsPartition.java
index 9ec78ec..c443455 100644
--- a/fe/src/main/java/org/apache/impala/catalog/local/LocalFsPartition.java
+++ b/fe/src/main/java/org/apache/impala/catalog/local/LocalFsPartition.java
@@ -202,9 +202,8 @@ public class LocalFsPartition implements FeFsPartition {
public HdfsFileFormat getFileFormat() {
HdfsFileFormat format = getInputFormatDescriptor().getFileFormat();
if (format == HdfsFileFormat.ICEBERG) {
- String format_str = table_.getMetaStoreTable().getParameters().get(
- IcebergTable.ICEBERG_FILE_FORMAT);
- return IcebergUtil.toHdfsFileFormat(format_str);
+ return IcebergUtil.toHdfsFileFormat(
+ IcebergUtil.getIcebergFileFormat(table_.getMetaStoreTable()));
}
return format;
}
diff --git
a/fe/src/main/java/org/apache/impala/catalog/local/LocalIcebergTable.java
b/fe/src/main/java/org/apache/impala/catalog/local/LocalIcebergTable.java
index 9f7b83a..4db65d6 100644
--- a/fe/src/main/java/org/apache/impala/catalog/local/LocalIcebergTable.java
+++ b/fe/src/main/java/org/apache/impala/catalog/local/LocalIcebergTable.java
@@ -113,7 +113,7 @@ public class LocalIcebergTable extends LocalTable
implements FeIcebergTable {
"Failed to load table: %s.%s", msTable.getDbName(),
msTable.getTableName()),
(Exception)e);
}
- icebergFileFormat_ = Utils.getIcebergFileFormat(msTable);
+ icebergFileFormat_ = IcebergUtil.getIcebergFileFormat(msTable);
icebergParquetCompressionCodec_ =
Utils.getIcebergParquetCompressionCodec(msTable);
icebergParquetRowGroupSize_ = Utils.getIcebergParquetRowGroupSize(msTable);
icebergParquetPlainPageSize_ =
Utils.getIcebergParquetPlainPageSize(msTable);
diff --git a/fe/src/main/java/org/apache/impala/util/IcebergUtil.java
b/fe/src/main/java/org/apache/impala/util/IcebergUtil.java
index 27dd9ce..8bca3a6 100644
--- a/fe/src/main/java/org/apache/impala/util/IcebergUtil.java
+++ b/fe/src/main/java/org/apache/impala/util/IcebergUtil.java
@@ -303,6 +303,23 @@ public class IcebergUtil {
}
/**
+ * Get iceberg table file format from hms table properties
+ */
+ public static TIcebergFileFormat getIcebergFileFormat(
+ org.apache.hadoop.hive.metastore.api.Table msTable) {
+ TIcebergFileFormat fileFormat = null;
+ Map<String, String> params = msTable.getParameters();
+ if (params.containsKey(IcebergTable.ICEBERG_FILE_FORMAT)) {
+ fileFormat = IcebergUtil.getIcebergFileFormat(
+ params.get(IcebergTable.ICEBERG_FILE_FORMAT));
+ } else {
+ // Accept "iceberg.file_format" for backward compatibility.
+ fileFormat =
IcebergUtil.getIcebergFileFormat(params.get("iceberg.file_format"));
+ }
+ return fileFormat == null ? TIcebergFileFormat.PARQUET : fileFormat;
+ }
+
+ /**
* Get TIcebergFileFormat from a string, usually from table properties.
* Returns PARQUET when 'format' is null. Returns null for invalid formats.
*/
diff --git
a/testdata/workloads/functional-query/queries/QueryTest/iceberg-query.test
b/testdata/workloads/functional-query/queries/QueryTest/iceberg-query.test
index 137904e..85645d4 100644
--- a/testdata/workloads/functional-query/queries/QueryTest/iceberg-query.test
+++ b/testdata/workloads/functional-query/queries/QueryTest/iceberg-query.test
@@ -405,6 +405,65 @@ describe formatted iceberg_partitioned_orc_external;
string, string, string
====
---- QUERY
+CREATE EXTERNAL TABLE IF NOT EXISTS
iceberg_partitioned_orc_external_old_fileformat
+STORED AS ICEBERG
+TBLPROPERTIES(
+ 'iceberg.file_format'='orc',
+ 'iceberg.catalog'='hadoop.catalog',
+
'iceberg.catalog_location'='/test-warehouse/iceberg_test/hadoop_catalog/iceberg_partitioned_orc',
+ 'iceberg.table_identifier'='functional_parquet.iceberg_partitioned_orc'
+);
+ALTER TABLE iceberg_partitioned_orc_external_old_fileformat
+UNSET TBLPROPERTIES IF EXISTS ('write.format.default');
+describe formatted iceberg_partitioned_orc_external_old_fileformat;
+---- RESULTS: VERIFY_IS_SUBSET
+'Location:
','$NAMENODE/test-warehouse/iceberg_test/hadoop_catalog/iceberg_partitioned_orc/functional_parquet/iceberg_partitioned_orc','NULL'
+'','iceberg.catalog_location','/test-warehouse/iceberg_test/hadoop_catalog/iceberg_partitioned_orc'
+'','iceberg.table_identifier','functional_parquet.iceberg_partitioned_orc'
+'','iceberg.file_format','orc '
+'','iceberg.catalog ','hadoop.catalog '
+---- RESULTS: VERIFY_IS_NOT_IN
+'','write.format.default','orc '
+---- TYPES
+string, string, string
+====
+---- QUERY
+SELECT * FROM iceberg_partitioned_orc_external_old_fileformat;
+---- RESULTS
+7,'Lisa','download'
+16,'Lisa','download'
+13,'Alan','click'
+10,'Alan','click'
+19,'Alex','view'
+1,'Alex','view'
+4,'Alex','view'
+20,'Alex','view'
+14,'Lisa','download'
+5,'Lisa','download'
+15,'Alex','view'
+18,'Alan','click'
+9,'Alan','click'
+17,'Alex','view'
+12,'Alan','click'
+2,'Lisa','download'
+8,'Lisa','download'
+11,'Alex','view'
+6,'Alex','view'
+3,'Alan','click'
+---- TYPES
+INT, STRING, STRING
+====
+---- QUERY
+SELECT count(*) FROM iceberg_partitioned_orc_external_old_fileformat;
+---- RESULTS
+20
+---- TYPES
+BIGINT
+====
+---- QUERY
+DROP TABLE iceberg_partitioned_orc_external_old_fileformat;
+====
+---- QUERY
SELECT count(*) from iceberg_resolution_test_external;
---- TYPES
bigint