This is an automated email from the ASF dual-hosted git repository.

ayushsaxena pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/master by this push:
     new 3d58586b451 HIVE-29372: Iceberg: [V3] Fix inconsistencies when 
vectorizartion is enabled in case of variant shredding. (#6245)
3d58586b451 is described below

commit 3d58586b45121f58cdcbf0ddbaa11ab08408d35b
Author: Ayush Saxena <[email protected]>
AuthorDate: Fri Dec 19 21:56:16 2025 +0530

    HIVE-29372: Iceberg: [V3] Fix inconsistencies when vectorizartion is 
enabled in case of variant shredding. (#6245)
---
 .../iceberg/mr/hive/HiveIcebergStorageHandler.java |  4 +-
 .../mr/hive/writer/HiveFileWriterFactory.java      |  2 +-
 .../org/apache/iceberg/parquet/VariantUtil.java    | 11 +++--
 .../test/queries/positive/variant_type_shredding.q | 23 +++++++--
 .../results/positive/variant_type_shredding.q.out  | 54 ++++++++++++++++++++++
 5 files changed, 84 insertions(+), 10 deletions(-)

diff --git 
a/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergStorageHandler.java
 
b/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergStorageHandler.java
index b53b9af266f..2978341b6da 100644
--- 
a/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergStorageHandler.java
+++ 
b/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergStorageHandler.java
@@ -187,6 +187,7 @@
 import org.apache.iceberg.mr.hive.actions.HiveIcebergDeleteOrphanFiles;
 import org.apache.iceberg.mr.hive.plan.IcebergBucketFunction;
 import org.apache.iceberg.mr.hive.udf.GenericUDFIcebergZorder;
+import org.apache.iceberg.parquet.VariantUtil;
 import org.apache.iceberg.puffin.Blob;
 import org.apache.iceberg.puffin.BlobMetadata;
 import org.apache.iceberg.puffin.Puffin;
@@ -1751,7 +1752,8 @@ private void 
fallbackToNonVectorizedModeBasedOnProperties(Properties tableProps)
     if (FileFormat.AVRO == 
IcebergTableUtil.defaultFileFormat(tableProps::getProperty) ||
         
isValidMetadataTable(tableProps.getProperty(IcebergAcidUtil.META_TABLE_PROPERTY))
 ||
         hasOrcTimeInSchema(tableProps, tableSchema) ||
-        !hasParquetNestedTypeWithinListOrMap(tableProps, tableSchema)) {
+        !hasParquetNestedTypeWithinListOrMap(tableProps, tableSchema) ||
+        VariantUtil.shouldUseVariantShredding(tableProps::getProperty, 
tableSchema)) {
       // disable vectorization
       SessionStateUtil.getQueryState(conf).ifPresent(queryState ->
           queryState.getConf().setBoolVar(ConfVars.HIVE_VECTORIZATION_ENABLED, 
false));
diff --git 
a/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/writer/HiveFileWriterFactory.java
 
b/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/writer/HiveFileWriterFactory.java
index 234cf928432..049a5a0dfb4 100644
--- 
a/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/writer/HiveFileWriterFactory.java
+++ 
b/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/writer/HiveFileWriterFactory.java
@@ -86,7 +86,7 @@ protected void 
configurePositionDelete(Avro.DeleteWriteBuilder builder) {
   protected void configureDataWrite(Parquet.DataWriteBuilder builder) {
     builder.createWriterFunc(GenericParquetWriter::create);
     // Configure variant shredding if enabled and a sample record is available
-    if (VariantUtil.shouldUseVariantShredding(properties, dataSchema())) {
+    if (VariantUtil.shouldUseVariantShredding(properties::get, dataSchema())) {
       setVariantShreddingFunc(builder, 
VariantUtil.variantShreddingFunc(sampleRecord, dataSchema()));
     }
   }
diff --git 
a/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/parquet/VariantUtil.java
 
b/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/parquet/VariantUtil.java
index 736a39b895c..d03c4a675fa 100644
--- 
a/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/parquet/VariantUtil.java
+++ 
b/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/parquet/VariantUtil.java
@@ -21,6 +21,7 @@
 
 import java.util.List;
 import java.util.Map;
+import java.util.function.UnaryOperator;
 import org.apache.iceberg.Accessor;
 import org.apache.iceberg.Schema;
 import org.apache.iceberg.StructLike;
@@ -59,8 +60,8 @@ public record VariantField(int fieldId, Accessor<StructLike> 
accessor, String[]
   /**
    * Check if variant shredding is enabled via table properties.
    */
-  public static boolean isVariantShreddingEnabled(Map<String, String> 
properties) {
-    String shreddingEnabled = 
properties.get(InputFormatConfig.VARIANT_SHREDDING_ENABLED);
+  public static boolean isVariantShreddingEnabled(UnaryOperator<String> 
propertyLookup) {
+    String shreddingEnabled = 
propertyLookup.apply(InputFormatConfig.VARIANT_SHREDDING_ENABLED);
     return Boolean.parseBoolean(shreddingEnabled);
   }
 
@@ -73,7 +74,7 @@ public static boolean isShreddable(Object value) {
 
   public static List<VariantField> variantFieldsForShredding(
       Map<String, String> properties, Schema schema) {
-    if (!isVariantShreddingEnabled(properties)) {
+    if (!isVariantShreddingEnabled(properties::get)) {
       return List.of();
     }
     return variantFieldsForShredding(schema);
@@ -89,8 +90,8 @@ private static List<VariantField> 
variantFieldsForShredding(Schema schema) {
     return results;
   }
 
-  public static boolean shouldUseVariantShredding(Map<String, String> 
properties, Schema schema) {
-    return isVariantShreddingEnabled(properties) && hasVariantFields(schema);
+  public static boolean shouldUseVariantShredding(UnaryOperator<String> 
propertyLookup, Schema schema) {
+    return isVariantShreddingEnabled(propertyLookup) && 
hasVariantFields(schema);
   }
 
   private static boolean hasVariantFields(Schema schema) {
diff --git 
a/iceberg/iceberg-handler/src/test/queries/positive/variant_type_shredding.q 
b/iceberg/iceberg-handler/src/test/queries/positive/variant_type_shredding.q
index 25d84dd0c0a..df6794c431a 100644
--- a/iceberg/iceberg-handler/src/test/queries/positive/variant_type_shredding.q
+++ b/iceberg/iceberg-handler/src/test/queries/positive/variant_type_shredding.q
@@ -27,9 +27,6 @@ INSERT INTO tbl_shredded_variant VALUES
 (2, parse_json('{"name": "Bill", "active": false}')),
 (3, parse_json('{"name": "Henry", "age": 20}'));
 
--- Disable vectorized execution until Variant type is supported
-set hive.vectorized.execution.enabled=false;
-
 -- Retrieve and verify
 SELECT id, try_variant_get(data, '$.name') FROM tbl_shredded_variant
 WHERE variant_get(data, '$.age') > 25;
@@ -37,3 +34,23 @@ WHERE variant_get(data, '$.age') > 25;
 EXPLAIN
 SELECT id, try_variant_get(data, '$.name') FROM tbl_shredded_variant
 WHERE variant_get(data, '$.age') > 25;
+
+CREATE TABLE t (
+  id INT,
+  v VARIANT
+)
+STORED BY ICEBERG
+TBLPROPERTIES (
+  'format-version'='3',
+  'variant.shredding.enabled'='true'
+);
+
+INSERT INTO t VALUES
+(1, parse_json('{"a": 1}')),
+(2, parse_json('{"b": 2}'));
+
+SELECT
+  try_variant_get(v, '$.a'),
+  try_variant_get(v, '$.b')
+FROM t
+ORDER BY id;
diff --git 
a/iceberg/iceberg-handler/src/test/results/positive/variant_type_shredding.q.out
 
b/iceberg/iceberg-handler/src/test/results/positive/variant_type_shredding.q.out
index b51bc749525..f7c0910a9b8 100644
--- 
a/iceberg/iceberg-handler/src/test/results/positive/variant_type_shredding.q.out
+++ 
b/iceberg/iceberg-handler/src/test/results/positive/variant_type_shredding.q.out
@@ -99,3 +99,57 @@ STAGE PLANS:
       Processor Tree:
         ListSink
 
+PREHOOK: query: CREATE TABLE t (
+  id INT,
+  v VARIANT
+)
+STORED BY ICEBERG
+TBLPROPERTIES (
+  'format-version'='3',
+  'variant.shredding.enabled'='true'
+)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@t
+POSTHOOK: query: CREATE TABLE t (
+  id INT,
+  v VARIANT
+)
+STORED BY ICEBERG
+TBLPROPERTIES (
+  'format-version'='3',
+  'variant.shredding.enabled'='true'
+)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@t
+PREHOOK: query: INSERT INTO t VALUES
+(1, parse_json('{"a": 1}')),
+(2, parse_json('{"b": 2}'))
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@t
+POSTHOOK: query: INSERT INTO t VALUES
+(1, parse_json('{"a": 1}')),
+(2, parse_json('{"b": 2}'))
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@t
+PREHOOK: query: SELECT
+  try_variant_get(v, '$.a'),
+  try_variant_get(v, '$.b')
+FROM t
+ORDER BY id
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t
+PREHOOK: Output: hdfs://### HDFS PATH ###
+POSTHOOK: query: SELECT
+  try_variant_get(v, '$.a'),
+  try_variant_get(v, '$.b')
+FROM t
+ORDER BY id
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t
+POSTHOOK: Output: hdfs://### HDFS PATH ###
+1      NULL
+NULL   2

Reply via email to