This is an automated email from the ASF dual-hosted git repository.
szita pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git
The following commit(s) were added to refs/heads/master by this push:
new aa8891ad648 HIVE-26410: Reading nested types within maps in Parquet
Iceberg is not supported with vectorization (#3455) (Adam Szita, reviewed by
Laszlo Pinter)
aa8891ad648 is described below
commit aa8891ad6480b4a50c91a60a2eacd1871128482b
Author: Adam Szita <[email protected]>
AuthorDate: Wed Jul 20 08:55:16 2022 +0200
HIVE-26410: Reading nested types within maps in Parquet Iceberg is not
supported with vectorization (#3455) (Adam Szita, reviewed by Laszlo Pinter)
---
.../apache/iceberg/mr/hive/HiveIcebergStorageHandler.java | 14 ++++++++------
.../org/apache/iceberg/mr/hive/TestHiveIcebergInserts.java | 2 --
2 files changed, 8 insertions(+), 8 deletions(-)
diff --git
a/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergStorageHandler.java
b/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergStorageHandler.java
index 5f1c9158aab..25881408a63 100644
---
a/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergStorageHandler.java
+++
b/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergStorageHandler.java
@@ -49,6 +49,7 @@ import org.apache.hadoop.hive.ql.ddl.table.AlterTableType;
import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
import org.apache.hadoop.hive.ql.exec.Utilities;
import org.apache.hadoop.hive.ql.hooks.WriteEntity;
+import
org.apache.hadoop.hive.ql.io.parquet.vector.VectorizedParquetRecordReader;
import org.apache.hadoop.hive.ql.io.sarg.ConvertAstToSearchArg;
import org.apache.hadoop.hive.ql.io.sarg.SearchArgument;
import org.apache.hadoop.hive.ql.metadata.HiveException;
@@ -818,7 +819,7 @@ public class HiveIcebergStorageHandler implements
HiveStoragePredicateHandler, H
FileFormat.AVRO.name().equalsIgnoreCase(tableProps.getProperty(TableProperties.DEFAULT_FILE_FORMAT))
||
(tableProps.containsKey("metaTable") &&
isValidMetadataTable(tableProps.getProperty("metaTable"))) ||
hasOrcTimeInSchema(tableProps, tableSchema) ||
- !hasParquetListColumnSupport(tableProps, tableSchema)) {
+ !hasParquetNestedTypeWithinListOrMap(tableProps, tableSchema)) {
conf.setBoolean(HiveConf.ConfVars.HIVE_VECTORIZATION_ENABLED.varname,
false);
}
}
@@ -839,20 +840,21 @@ public class HiveIcebergStorageHandler implements
HiveStoragePredicateHandler, H
}
/**
- * Vectorized reads of parquet files from columns with list type is only
supported if the element is a primitive type
- * check {@link VectorizedParquetRecordReader#checkListColumnSupport} for
details
+ * Vectorized reads of parquet files from columns with list or map type is
only supported if the nested types are of
+ * primitive type category
+ * check {@link VectorizedParquetRecordReader#checkListColumnSupport} for
details on nested types under lists
* @param tableProps iceberg table properties
* @param tableSchema iceberg table schema
* @return
*/
- private static boolean hasParquetListColumnSupport(Properties tableProps,
Schema tableSchema) {
+ private static boolean hasParquetNestedTypeWithinListOrMap(Properties
tableProps, Schema tableSchema) {
if
(!FileFormat.PARQUET.name().equalsIgnoreCase(tableProps.getProperty(TableProperties.DEFAULT_FILE_FORMAT)))
{
return true;
}
for (Types.NestedField field : tableSchema.columns()) {
- if (field.type().isListType()) {
- for (Types.NestedField nestedField :
field.type().asListType().fields()) {
+ if (field.type().isListType() || field.type().isMapType()) {
+ for (Types.NestedField nestedField :
field.type().asNestedType().fields()) {
if (!nestedField.type().isPrimitiveType()) {
return false;
}
diff --git
a/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/TestHiveIcebergInserts.java
b/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/TestHiveIcebergInserts.java
index efb08c36d95..31a589a7c96 100644
---
a/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/TestHiveIcebergInserts.java
+++
b/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/TestHiveIcebergInserts.java
@@ -334,8 +334,6 @@ public class TestHiveIcebergInserts extends
HiveIcebergStorageHandlerWithEngineB
@Test
public void testStructMapWithNull() throws IOException {
- Assume.assumeTrue("Vectorized parquet read throws class cast exception",
- !(fileFormat == FileFormat.PARQUET && isVectorized));
Schema schema = new Schema(required(1, "id", Types.LongType.get()),
required(2, "mapofstructs", Types.MapType.ofRequired(3, 4,
Types.StringType.get(),
Types.StructType.of(required(5, "something",
Types.StringType.get()),