This is an automated email from the ASF dual-hosted git repository.

sbadhya pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/master by this push:
     new 00d0b6d7b94 HIVE-27938: Iceberg: Fix java.lang.ClassCastException 
during vectorized reads on partition columns (#5048) (Simhadri Govindappa 
reviewed by Sourabh Badhya)
00d0b6d7b94 is described below

commit 00d0b6d7b94e3db00d671542efa3c3cf1ad14714
Author: Simhadri Govindappa <[email protected]>
AuthorDate: Thu Feb 1 15:30:38 2024 +0530

    HIVE-27938: Iceberg: Fix java.lang.ClassCastException during vectorized 
reads on partition columns (#5048) (Simhadri Govindappa reviewed by Sourabh 
Badhya)
---
 .../mapreduce/HiveIdentityPartitionConverters.java |  69 ++++++++++
 .../iceberg/mr/mapreduce/IcebergInputFormat.java   |   5 +-
 .../positive/iceberg_partition_vectorized_read.q   |  24 ++++
 .../iceberg_partition_vectorized_read.q.out        | 139 +++++++++++++++++++++
 4 files changed, 235 insertions(+), 2 deletions(-)

diff --git 
a/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/mapreduce/HiveIdentityPartitionConverters.java
 
b/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/mapreduce/HiveIdentityPartitionConverters.java
new file mode 100644
index 00000000000..6c51de9dabb
--- /dev/null
+++ 
b/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/mapreduce/HiveIdentityPartitionConverters.java
@@ -0,0 +1,69 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.iceberg.mr.mapreduce;
+
+import java.math.BigDecimal;
+import org.apache.avro.generic.GenericData;
+import org.apache.hadoop.hive.common.type.Date;
+import org.apache.hadoop.hive.common.type.HiveDecimal;
+import org.apache.hadoop.hive.common.type.Timestamp;
+import org.apache.iceberg.types.Type;
+import org.apache.iceberg.types.Types;
+import org.apache.iceberg.util.DateTimeUtil;
+
+public class HiveIdentityPartitionConverters {
+
+  private HiveIdentityPartitionConverters() {
+  }
+
+  public static Object convertConstant(Type type, Object value) {
+    if (value == null) {
+      return null;
+    }
+
+    switch (type.typeId()) {
+      case STRING:
+        return value.toString();
+      case TIME:
+        return DateTimeUtil.timeFromMicros((Long) value);
+      case DATE:
+        return Date.ofEpochDay((Integer) value);
+      case TIMESTAMP:
+        if (((Types.TimestampType) type).shouldAdjustToUTC()) {
+          return DateTimeUtil.timestamptzFromMicros((Long) 
value).toOffsetTime();
+        } else {
+          return new Timestamp(DateTimeUtil.timestampFromMicros((Long) value));
+        }
+      case DECIMAL:
+        if (value.getClass().isAssignableFrom(BigDecimal.class)) {
+          return HiveDecimal.create((BigDecimal) value);
+        }
+        return value;
+      case FIXED:
+        if (value instanceof GenericData.Fixed) {
+          return ((GenericData.Fixed) value).bytes();
+        }
+        return value;
+      default:
+    }
+    return value;
+  }
+
+}
diff --git 
a/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/mapreduce/IcebergInputFormat.java
 
b/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/mapreduce/IcebergInputFormat.java
index 3ec1a3b3b7a..754d78e4d93 100644
--- 
a/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/mapreduce/IcebergInputFormat.java
+++ 
b/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/mapreduce/IcebergInputFormat.java
@@ -391,7 +391,7 @@ public class IcebergInputFormat<T> extends 
InputFormat<Void, T> {
           "Vectorized read is unsupported for Hive 2 integration.");
 
       Path path = new Path(task.file().path().toString());
-      Map<Integer, ?> idToConstant = constantsMap(task, 
IdentityPartitionConverters::convertConstant);
+      Map<Integer, ?> idToConstant = constantsMap(task, 
HiveIdentityPartitionConverters::convertConstant);
       Expression residual = HiveIcebergInputFormat.residualForTask(task, 
context.getConfiguration());
 
       // TODO: We have to take care of the EncryptionManager when LLAP and 
vectorization is used
@@ -544,7 +544,8 @@ public class IcebergInputFormat<T> extends 
InputFormat<Void, T> {
         Types.StructType partitionType = Partitioning.partitionType(table);
         return PartitionUtil.constantsMap(task, partitionType, converter);
       } else if (projectsIdentityPartitionColumns) {
-        return PartitionUtil.constantsMap(task, converter);
+        Types.StructType partitionType = Partitioning.partitionType(table);
+        return PartitionUtil.constantsMap(task, partitionType, converter);
       } else {
         return Collections.emptyMap();
       }
diff --git 
a/iceberg/iceberg-handler/src/test/queries/positive/iceberg_partition_vectorized_read.q
 
b/iceberg/iceberg-handler/src/test/queries/positive/iceberg_partition_vectorized_read.q
new file mode 100644
index 00000000000..506f6948871
--- /dev/null
+++ 
b/iceberg/iceberg-handler/src/test/queries/positive/iceberg_partition_vectorized_read.q
@@ -0,0 +1,24 @@
+set hive.vectorized.execution.enabled=true;
+
+CREATE EXTERNAL TABLE ice_date   (`col1` int, `day` date, `calday` date) 
PARTITIONED BY SPEC (calday)   stored by
+iceberg tblproperties('format-version'='2');
+insert into ice_date values(1, '2020-11-20', '2020-11-20'), (1, '2020-11-20', 
'2020-11-20');
+select * from ice_date;
+select count(calday) from ice_date;
+select distinct(calday) from ice_date;
+
+
+CREATE EXTERNAL TABLE ice_timestamp   (`col1` int, `day` date, `times` 
timestamp) PARTITIONED BY SPEC (times)   stored
+by iceberg tblproperties('format-version'='2');
+insert into ice_timestamp values(1, '2020-11-20', '2020-11-20'), (1, 
'2020-11-20', '2020-11-20');
+select * from ice_timestamp;
+select count(times) from ice_timestamp;
+select distinct(times) from ice_timestamp;
+
+
+CREATE EXTERNAL TABLE ice_decimal  (`col1` int, `decimalA` decimal(5,2), 
`decimalC` decimal(5,2)) PARTITIONED BY SPEC
+(decimalC) stored by iceberg tblproperties('format-version'='2');
+insert into ice_decimal values(1, 122.91, 102.21), (1, 12.32, 200.12);
+select * from ice_decimal;
+select distinct(decimalc) from ice_decimal;
+select count(decimala) from ice_decimal where decimala=122.91;
diff --git 
a/iceberg/iceberg-handler/src/test/results/positive/iceberg_partition_vectorized_read.q.out
 
b/iceberg/iceberg-handler/src/test/results/positive/iceberg_partition_vectorized_read.q.out
new file mode 100644
index 00000000000..3cc643380fd
--- /dev/null
+++ 
b/iceberg/iceberg-handler/src/test/results/positive/iceberg_partition_vectorized_read.q.out
@@ -0,0 +1,139 @@
+PREHOOK: query: CREATE EXTERNAL TABLE ice_date   (`col1` int, `day` date, 
`calday` date) PARTITIONED BY SPEC (calday)   stored by
+iceberg tblproperties('format-version'='2')
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@ice_date
+POSTHOOK: query: CREATE EXTERNAL TABLE ice_date   (`col1` int, `day` date, 
`calday` date) PARTITIONED BY SPEC (calday)   stored by
+iceberg tblproperties('format-version'='2')
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@ice_date
+PREHOOK: query: insert into ice_date values(1, '2020-11-20', '2020-11-20'), 
(1, '2020-11-20', '2020-11-20')
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@ice_date
+POSTHOOK: query: insert into ice_date values(1, '2020-11-20', '2020-11-20'), 
(1, '2020-11-20', '2020-11-20')
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@ice_date
+PREHOOK: query: select * from ice_date
+PREHOOK: type: QUERY
+PREHOOK: Input: default@ice_date
+PREHOOK: Output: hdfs://### HDFS PATH ###
+POSTHOOK: query: select * from ice_date
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@ice_date
+POSTHOOK: Output: hdfs://### HDFS PATH ###
+1      2020-11-20      2020-11-20
+1      2020-11-20      2020-11-20
+PREHOOK: query: select count(calday) from ice_date
+PREHOOK: type: QUERY
+PREHOOK: Input: default@ice_date
+PREHOOK: Output: hdfs://### HDFS PATH ###
+POSTHOOK: query: select count(calday) from ice_date
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@ice_date
+POSTHOOK: Output: hdfs://### HDFS PATH ###
+2
+PREHOOK: query: select distinct(calday) from ice_date
+PREHOOK: type: QUERY
+PREHOOK: Input: default@ice_date
+PREHOOK: Output: hdfs://### HDFS PATH ###
+POSTHOOK: query: select distinct(calday) from ice_date
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@ice_date
+POSTHOOK: Output: hdfs://### HDFS PATH ###
+2020-11-20
+PREHOOK: query: CREATE EXTERNAL TABLE ice_timestamp   (`col1` int, `day` date, 
`times` timestamp) PARTITIONED BY SPEC (times)   stored
+by iceberg tblproperties('format-version'='2')
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@ice_timestamp
+POSTHOOK: query: CREATE EXTERNAL TABLE ice_timestamp   (`col1` int, `day` 
date, `times` timestamp) PARTITIONED BY SPEC (times)   stored
+by iceberg tblproperties('format-version'='2')
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@ice_timestamp
+PREHOOK: query: insert into ice_timestamp values(1, '2020-11-20', 
'2020-11-20'), (1, '2020-11-20', '2020-11-20')
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@ice_timestamp
+POSTHOOK: query: insert into ice_timestamp values(1, '2020-11-20', 
'2020-11-20'), (1, '2020-11-20', '2020-11-20')
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@ice_timestamp
+PREHOOK: query: select * from ice_timestamp
+PREHOOK: type: QUERY
+PREHOOK: Input: default@ice_timestamp
+PREHOOK: Output: hdfs://### HDFS PATH ###
+POSTHOOK: query: select * from ice_timestamp
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@ice_timestamp
+POSTHOOK: Output: hdfs://### HDFS PATH ###
+1      2020-11-20      2020-11-20 00:00:00
+1      2020-11-20      2020-11-20 00:00:00
+PREHOOK: query: select count(times) from ice_timestamp
+PREHOOK: type: QUERY
+PREHOOK: Input: default@ice_timestamp
+PREHOOK: Output: hdfs://### HDFS PATH ###
+POSTHOOK: query: select count(times) from ice_timestamp
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@ice_timestamp
+POSTHOOK: Output: hdfs://### HDFS PATH ###
+2
+PREHOOK: query: select distinct(times) from ice_timestamp
+PREHOOK: type: QUERY
+PREHOOK: Input: default@ice_timestamp
+PREHOOK: Output: hdfs://### HDFS PATH ###
+POSTHOOK: query: select distinct(times) from ice_timestamp
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@ice_timestamp
+POSTHOOK: Output: hdfs://### HDFS PATH ###
+2020-11-20 00:00:00
+PREHOOK: query: CREATE EXTERNAL TABLE ice_decimal  (`col1` int, `decimalA` 
decimal(5,2), `decimalC` decimal(5,2)) PARTITIONED BY SPEC
+(decimalC) stored by iceberg tblproperties('format-version'='2')
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@ice_decimal
+POSTHOOK: query: CREATE EXTERNAL TABLE ice_decimal  (`col1` int, `decimalA` 
decimal(5,2), `decimalC` decimal(5,2)) PARTITIONED BY SPEC
+(decimalC) stored by iceberg tblproperties('format-version'='2')
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@ice_decimal
+PREHOOK: query: insert into ice_decimal values(1, 122.91, 102.21), (1, 12.32, 
200.12)
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@ice_decimal
+POSTHOOK: query: insert into ice_decimal values(1, 122.91, 102.21), (1, 12.32, 
200.12)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@ice_decimal
+PREHOOK: query: select * from ice_decimal
+PREHOOK: type: QUERY
+PREHOOK: Input: default@ice_decimal
+PREHOOK: Output: hdfs://### HDFS PATH ###
+POSTHOOK: query: select * from ice_decimal
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@ice_decimal
+POSTHOOK: Output: hdfs://### HDFS PATH ###
+1      122.91  102.21
+1      12.32   200.12
+PREHOOK: query: select distinct(decimalc) from ice_decimal
+PREHOOK: type: QUERY
+PREHOOK: Input: default@ice_decimal
+PREHOOK: Output: hdfs://### HDFS PATH ###
+POSTHOOK: query: select distinct(decimalc) from ice_decimal
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@ice_decimal
+POSTHOOK: Output: hdfs://### HDFS PATH ###
+102.21
+200.12
+PREHOOK: query: select count(decimala) from ice_decimal where decimala=122.91
+PREHOOK: type: QUERY
+PREHOOK: Input: default@ice_decimal
+PREHOOK: Output: hdfs://### HDFS PATH ###
+POSTHOOK: query: select count(decimala) from ice_decimal where decimala=122.91
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@ice_decimal
+POSTHOOK: Output: hdfs://### HDFS PATH ###
+1

Reply via email to