This is an automated email from the ASF dual-hosted git repository.

szita pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/master by this push:
     new 63326ff775 HIVE-25421: Fallback from vectorization when reading 
Iceberg's time columns from ORC files (#3334) (Adam Szita, reviewed by Laszlo 
Pinter)
63326ff775 is described below

commit 63326ff775206e59547b6b1332e25279e90ef5ee
Author: Adam Szita <[email protected]>
AuthorDate: Thu Jun 2 09:50:40 2022 +0200

    HIVE-25421: Fallback from vectorization when reading Iceberg's time columns 
from ORC files (#3334) (Adam Szita, reviewed by Laszlo Pinter)
---
 .../apache/iceberg/mr/hive/HiveIcebergStorageHandler.java | 15 ++++++++++++++-
 .../apache/iceberg/mr/hive/TestHiveIcebergSelects.java    |  9 ++++-----
 2 files changed, 18 insertions(+), 6 deletions(-)

diff --git 
a/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergStorageHandler.java
 
b/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergStorageHandler.java
index ba066ed0db..c79d344272 100644
--- 
a/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergStorageHandler.java
+++ 
b/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergStorageHandler.java
@@ -781,17 +781,30 @@ public class HiveIcebergStorageHandler implements 
HiveStoragePredicateHandler, H
    *   <li>iceberg format-version is "2"</li>
    *   <li>fileformat is set to avro</li>
    *   <li>querying metadata tables</li>
+   *   <li>fileformat is set to ORC, and table schema has time type column</li>
    * </ul>
    * @param tableProps table properties, must be not null
    */
   private void fallbackToNonVectorizedModeBasedOnProperties(Properties 
tableProps) {
     if ("2".equals(tableProps.get(TableProperties.FORMAT_VERSION)) ||
         
FileFormat.AVRO.name().equalsIgnoreCase(tableProps.getProperty(TableProperties.DEFAULT_FILE_FORMAT))
 ||
-        (tableProps.containsKey("metaTable") && 
isValidMetadataTable(tableProps.getProperty("metaTable")))) {
+        (tableProps.containsKey("metaTable") && 
isValidMetadataTable(tableProps.getProperty("metaTable"))) ||
+        hasOrcTimeInSchema(tableProps)) {
       conf.setBoolean(HiveConf.ConfVars.HIVE_VECTORIZATION_ENABLED.varname, 
false);
     }
   }
 
+  // Iceberg Time type columns are written as longs into ORC files. There is 
no Time type in Hive, so it is represented
+  // as String instead. For ORC there's no automatic conversion from long to 
string during vectorized reading such as
+  // for example in Parquet (in Parquet files Time type is an int64 with 
'time' logical annotation).
+  private static boolean hasOrcTimeInSchema(Properties tableProps) {
+    if 
(!FileFormat.ORC.name().equalsIgnoreCase(tableProps.getProperty(TableProperties.DEFAULT_FILE_FORMAT)))
 {
+      return false;
+    }
+    Schema tableSchema = 
SchemaParser.fromJson(tableProps.getProperty(InputFormatConfig.TABLE_SCHEMA));
+    return tableSchema.columns().stream().anyMatch(f -> 
Types.TimeType.get().typeId() == f.type().typeId());
+  }
+
   /**
    * Generates a JobContext for the OutputCommitter for the specific table.
    * @param configuration The configuration used for as a base of the JobConf
diff --git 
a/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/TestHiveIcebergSelects.java
 
b/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/TestHiveIcebergSelects.java
index ff54a9b0e2..a9c692d12e 100644
--- 
a/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/TestHiveIcebergSelects.java
+++ 
b/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/TestHiveIcebergSelects.java
@@ -117,9 +117,8 @@ public class TestHiveIcebergSelects extends 
HiveIcebergStorageHandlerWithEngineB
   public void testJoinTablesSupportedTypes() throws IOException {
     for (int i = 0; i < SUPPORTED_TYPES.size(); i++) {
       Type type = SUPPORTED_TYPES.get(i);
-      if ((type == Types.TimestampType.withZone() || type == 
Types.TimeType.get()) &&
-          isVectorized && fileFormat == FileFormat.ORC) {
-        // ORC/TIMESTAMP_INSTANT and time are not supported vectorized types 
for Hive
+      if ((type == Types.TimestampType.withZone()) && isVectorized && 
fileFormat == FileFormat.ORC) {
+        // ORC/TIMESTAMP_INSTANT is not supported vectorized types for Hive
         continue;
       }
       // TODO: remove this filter when issue #1881 is resolved
@@ -145,9 +144,9 @@ public class TestHiveIcebergSelects extends 
HiveIcebergStorageHandlerWithEngineB
   public void testSelectDistinctFromTable() throws IOException {
     for (int i = 0; i < SUPPORTED_TYPES.size(); i++) {
       Type type = SUPPORTED_TYPES.get(i);
-      if ((type == Types.TimestampType.withZone() || type == 
Types.TimeType.get()) &&
+      if ((type == Types.TimestampType.withZone()) &&
           isVectorized && fileFormat == FileFormat.ORC) {
-        // ORC/TIMESTAMP_INSTANT and time are not supported vectorized types 
for Hive
+        // ORC/TIMESTAMP_INSTANT is not supported vectorized types for Hive
         continue;
       }
       // TODO: remove this filter when issue #1881 is resolved

Reply via email to