This is an automated email from the ASF dual-hosted git repository.

dkuzmenko pushed a commit to branch branch-4.2
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/branch-4.2 by this push:
     new 00036f20068 HIVE-29310: Type casting issue in variant_get UDF (#6176)
00036f20068 is described below

commit 00036f20068a99ad739a88342262ce28840424c5
Author: Denys Kuzmenko <[email protected]>
AuthorDate: Mon Nov 17 18:14:36 2025 +0100

    HIVE-29310: Type casting issue in variant_get UDF (#6176)
    
    (cherry picked from commit e34e87faf32b0167f438e30344f0a04fe10f6575)
---
 .../test/queries/positive/variant_type_filter.q    |  40 +++++++
 .../results/positive/variant_type_filter.q.out     | 122 +++++++++++++++++++++
 .../hive/ql/udf/generic/GenericUDFVariantGet.java  |  22 ++--
 3 files changed, 171 insertions(+), 13 deletions(-)

diff --git 
a/iceberg/iceberg-handler/src/test/queries/positive/variant_type_filter.q 
b/iceberg/iceberg-handler/src/test/queries/positive/variant_type_filter.q
new file mode 100644
index 00000000000..c9907948ef2
--- /dev/null
+++ b/iceberg/iceberg-handler/src/test/queries/positive/variant_type_filter.q
@@ -0,0 +1,40 @@
+-- Mask random uuid
+--! qt:replace:/(\s+'uuid'=')\S+('\s*)/$1#Masked#$2/
+-- Mask random snapshot id
+--! qt:replace:/('current-snapshot-id'=')\d+/$1#SnapshotId#/
+-- Mask current-snapshot-timestamp-ms
+--! qt:replace:/('current-snapshot-timestamp-ms'=')\d+/$1#Masked#/
+
+-- SORT_QUERY_RESULTS
+set hive.explain.user=false;
+set hive.fetch.task.conversion=none;
+
+CREATE EXTERNAL TABLE variant_filter_basic (
+    id BIGINT,
+    data VARIANT
+) STORED BY ICEBERG tblproperties('format-version'='3');
+
+INSERT INTO variant_filter_basic VALUES
+(1, parse_json('{ "name": "Alice", "age": 30, "address": {"city": 
"Wonderland"} }')),
+(2, parse_json('{ "name": "Bob", "age": 40, "address": {"city": "Builderland"} 
}')),
+(3, parse_json('{ "name": "Charlie", "age": 28, "address": {"city": 
"Dreamtown"} }'));
+
+SELECT
+  try_variant_get(data, '$.name') AS name,
+  try_variant_get(data, '$.age', 'int') AS age,
+  try_variant_get(data, '$.address.city') AS city
+FROM variant_filter_basic;
+
+SELECT
+  try_variant_get(data, '$.name') AS name,
+  try_variant_get(data, '$.age', 'int') AS age,
+  try_variant_get(data, '$.address.city') AS city
+FROM variant_filter_basic
+WHERE try_variant_get(data, '$.age', 'int') >= 30;
+
+EXPLAIN SELECT
+  try_variant_get(data, '$.name') AS name,
+  try_variant_get(data, '$.age', 'int') AS age,
+  try_variant_get(data, '$.address.city') AS city
+FROM variant_filter_basic
+WHERE try_variant_get(data, '$.age', 'int') >= 30;
\ No newline at end of file
diff --git 
a/iceberg/iceberg-handler/src/test/results/positive/variant_type_filter.q.out 
b/iceberg/iceberg-handler/src/test/results/positive/variant_type_filter.q.out
new file mode 100644
index 00000000000..649bfab0ce6
--- /dev/null
+++ 
b/iceberg/iceberg-handler/src/test/results/positive/variant_type_filter.q.out
@@ -0,0 +1,122 @@
+PREHOOK: query: CREATE EXTERNAL TABLE variant_filter_basic (
+    id BIGINT,
+    data VARIANT
+) STORED BY ICEBERG tblproperties('format-version'='3')
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@variant_filter_basic
+POSTHOOK: query: CREATE EXTERNAL TABLE variant_filter_basic (
+    id BIGINT,
+    data VARIANT
+) STORED BY ICEBERG tblproperties('format-version'='3')
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@variant_filter_basic
+PREHOOK: query: INSERT INTO variant_filter_basic VALUES
+(1, parse_json('{ "name": "Alice", "age": 30, "address": {"city": 
"Wonderland"} }')),
+(2, parse_json('{ "name": "Bob", "age": 40, "address": {"city": "Builderland"} 
}')),
+(3, parse_json('{ "name": "Charlie", "age": 28, "address": {"city": 
"Dreamtown"} }'))
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@variant_filter_basic
+POSTHOOK: query: INSERT INTO variant_filter_basic VALUES
+(1, parse_json('{ "name": "Alice", "age": 30, "address": {"city": 
"Wonderland"} }')),
+(2, parse_json('{ "name": "Bob", "age": 40, "address": {"city": "Builderland"} 
}')),
+(3, parse_json('{ "name": "Charlie", "age": 28, "address": {"city": 
"Dreamtown"} }'))
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@variant_filter_basic
+PREHOOK: query: SELECT
+  try_variant_get(data, '$.name') AS name,
+  try_variant_get(data, '$.age', 'int') AS age,
+  try_variant_get(data, '$.address.city') AS city
+FROM variant_filter_basic
+PREHOOK: type: QUERY
+PREHOOK: Input: default@variant_filter_basic
+PREHOOK: Output: hdfs://### HDFS PATH ###
+POSTHOOK: query: SELECT
+  try_variant_get(data, '$.name') AS name,
+  try_variant_get(data, '$.age', 'int') AS age,
+  try_variant_get(data, '$.address.city') AS city
+FROM variant_filter_basic
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@variant_filter_basic
+POSTHOOK: Output: hdfs://### HDFS PATH ###
+Alice  30      Wonderland
+Bob    40      Builderland
+Charlie        28      Dreamtown
+PREHOOK: query: SELECT
+  try_variant_get(data, '$.name') AS name,
+  try_variant_get(data, '$.age', 'int') AS age,
+  try_variant_get(data, '$.address.city') AS city
+FROM variant_filter_basic
+WHERE try_variant_get(data, '$.age', 'int') >= 30
+PREHOOK: type: QUERY
+PREHOOK: Input: default@variant_filter_basic
+PREHOOK: Output: hdfs://### HDFS PATH ###
+POSTHOOK: query: SELECT
+  try_variant_get(data, '$.name') AS name,
+  try_variant_get(data, '$.age', 'int') AS age,
+  try_variant_get(data, '$.address.city') AS city
+FROM variant_filter_basic
+WHERE try_variant_get(data, '$.age', 'int') >= 30
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@variant_filter_basic
+POSTHOOK: Output: hdfs://### HDFS PATH ###
+Alice  30      Wonderland
+Bob    40      Builderland
+PREHOOK: query: EXPLAIN SELECT
+  try_variant_get(data, '$.name') AS name,
+  try_variant_get(data, '$.age', 'int') AS age,
+  try_variant_get(data, '$.address.city') AS city
+FROM variant_filter_basic
+WHERE try_variant_get(data, '$.age', 'int') >= 30
+PREHOOK: type: QUERY
+PREHOOK: Input: default@variant_filter_basic
+PREHOOK: Output: hdfs://### HDFS PATH ###
+POSTHOOK: query: EXPLAIN SELECT
+  try_variant_get(data, '$.name') AS name,
+  try_variant_get(data, '$.age', 'int') AS age,
+  try_variant_get(data, '$.address.city') AS city
+FROM variant_filter_basic
+WHERE try_variant_get(data, '$.age', 'int') >= 30
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@variant_filter_basic
+POSTHOOK: Output: hdfs://### HDFS PATH ###
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Tez
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: variant_filter_basic
+                  filterExpr: (try_variant_get(data, '$.age', 'int') >= 30) 
(type: boolean)
+                  Statistics: Num rows: 3 Data size: 1008 Basic stats: 
COMPLETE Column stats: NONE
+                  Filter Operator
+                    predicate: (try_variant_get(data, '$.age', 'int') >= 30) 
(type: boolean)
+                    Statistics: Num rows: 1 Data size: 336 Basic stats: 
COMPLETE Column stats: NONE
+                    Select Operator
+                      expressions: try_variant_get(data, '$.name') (type: 
string), try_variant_get(data, '$.age', 'int') (type: int), 
try_variant_get(data, '$.address.city') (type: string)
+                      outputColumnNames: _col0, _col1, _col2
+                      Statistics: Num rows: 1 Data size: 336 Basic stats: 
COMPLETE Column stats: NONE
+                      File Output Operator
+                        compressed: false
+                        Statistics: Num rows: 1 Data size: 336 Basic stats: 
COMPLETE Column stats: NONE
+                        table:
+                            input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                            output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                            serde: 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+            Execution mode: vectorized
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFVariantGet.java 
b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFVariantGet.java
index f3fb0c12897..2ca34686b8d 100644
--- 
a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFVariantGet.java
+++ 
b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFVariantGet.java
@@ -26,6 +26,8 @@
 import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
 import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
 import 
org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
+import 
org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils;
+import 
org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableConstantStringObjectInspector;
 import org.apache.hadoop.hive.serde2.variant.Variant;
 import org.apache.hadoop.hive.serde2.variant.VariantUtil;
 import org.slf4j.Logger;
@@ -58,8 +60,7 @@ public class GenericUDFVariantGet extends GenericUDF {
   private StructObjectInspector variantOI;
   private PrimitiveObjectInspector pathOI;
 
-  private PrimitiveObjectInspector typeOI;
-  private boolean hasTypeArgument;
+  private String targetType;
 
   @Override
   public ObjectInspector initialize(ObjectInspector[] arguments) throws 
UDFArgumentException {
@@ -77,12 +78,15 @@ public ObjectInspector initialize(ObjectInspector[] 
arguments) throws UDFArgumen
     }
     pathOI = (PrimitiveObjectInspector) arguments[1];
 
-    hasTypeArgument = arguments.length == 3;
+    boolean hasTypeArgument = arguments.length == 3;
     if (hasTypeArgument) {
-      if (!(arguments[2] instanceof PrimitiveObjectInspector)) {
+      if (!(arguments[2] instanceof WritableConstantStringObjectInspector 
typeOI)) {
         throw new UDFArgumentException("Third argument must be string type 
name");
       }
-      typeOI = (PrimitiveObjectInspector) arguments[2];
+      targetType = typeOI.getWritableConstantValue().toString();
+
+      return PrimitiveObjectInspectorFactory.getPrimitiveJavaObjectInspector(
+          
PrimitiveObjectInspectorUtils.getTypeEntryFromTypeName(targetType).primitiveCategory);
     }
 
     return PrimitiveObjectInspectorFactory.javaStringObjectInspector;
@@ -103,14 +107,6 @@ public Object evaluate(DeferredObject[] arguments) throws 
HiveException {
       }
       String path = pathOI.getPrimitiveJavaObject(pathObj).toString();
 
-      String targetType = null;
-      if (hasTypeArgument) {
-        Object typeObj = arguments[2].get();
-        if (typeObj != null) {
-          targetType = typeOI.getPrimitiveJavaObject(typeObj).toString();
-        }
-      }
-
       Variant result = extractValueByPath(variant, path);
       // cast to target type
       return castValue(result, targetType);

Reply via email to