This is an automated email from the ASF dual-hosted git repository.
dkuzmenko pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git
The following commit(s) were added to refs/heads/master by this push:
new e34e87faf32 HIVE-29310: Type casting issue in variant_get UDF (#6176)
e34e87faf32 is described below
commit e34e87faf32b0167f438e30344f0a04fe10f6575
Author: Denys Kuzmenko <[email protected]>
AuthorDate: Mon Nov 17 18:14:36 2025 +0100
HIVE-29310: Type casting issue in variant_get UDF (#6176)
---
.../test/queries/positive/variant_type_filter.q | 40 +++++++
.../results/positive/variant_type_filter.q.out | 122 +++++++++++++++++++++
.../hive/ql/udf/generic/GenericUDFVariantGet.java | 22 ++--
3 files changed, 171 insertions(+), 13 deletions(-)
diff --git
a/iceberg/iceberg-handler/src/test/queries/positive/variant_type_filter.q
b/iceberg/iceberg-handler/src/test/queries/positive/variant_type_filter.q
new file mode 100644
index 00000000000..c9907948ef2
--- /dev/null
+++ b/iceberg/iceberg-handler/src/test/queries/positive/variant_type_filter.q
@@ -0,0 +1,40 @@
+-- Mask random uuid
+--! qt:replace:/(\s+'uuid'=')\S+('\s*)/$1#Masked#$2/
+-- Mask random snapshot id
+--! qt:replace:/('current-snapshot-id'=')\d+/$1#SnapshotId#/
+-- Mask current-snapshot-timestamp-ms
+--! qt:replace:/('current-snapshot-timestamp-ms'=')\d+/$1#Masked#/
+
+-- SORT_QUERY_RESULTS
+set hive.explain.user=false;
+set hive.fetch.task.conversion=none;
+
+CREATE EXTERNAL TABLE variant_filter_basic (
+ id BIGINT,
+ data VARIANT
+) STORED BY ICEBERG tblproperties('format-version'='3');
+
+INSERT INTO variant_filter_basic VALUES
+(1, parse_json('{ "name": "Alice", "age": 30, "address": {"city":
"Wonderland"} }')),
+(2, parse_json('{ "name": "Bob", "age": 40, "address": {"city": "Builderland"}
}')),
+(3, parse_json('{ "name": "Charlie", "age": 28, "address": {"city":
"Dreamtown"} }'));
+
+SELECT
+ try_variant_get(data, '$.name') AS name,
+ try_variant_get(data, '$.age', 'int') AS age,
+ try_variant_get(data, '$.address.city') AS city
+FROM variant_filter_basic;
+
+SELECT
+ try_variant_get(data, '$.name') AS name,
+ try_variant_get(data, '$.age', 'int') AS age,
+ try_variant_get(data, '$.address.city') AS city
+FROM variant_filter_basic
+WHERE try_variant_get(data, '$.age', 'int') >= 30;
+
+EXPLAIN SELECT
+ try_variant_get(data, '$.name') AS name,
+ try_variant_get(data, '$.age', 'int') AS age,
+ try_variant_get(data, '$.address.city') AS city
+FROM variant_filter_basic
+WHERE try_variant_get(data, '$.age', 'int') >= 30;
\ No newline at end of file
diff --git
a/iceberg/iceberg-handler/src/test/results/positive/variant_type_filter.q.out
b/iceberg/iceberg-handler/src/test/results/positive/variant_type_filter.q.out
new file mode 100644
index 00000000000..649bfab0ce6
--- /dev/null
+++
b/iceberg/iceberg-handler/src/test/results/positive/variant_type_filter.q.out
@@ -0,0 +1,122 @@
+PREHOOK: query: CREATE EXTERNAL TABLE variant_filter_basic (
+ id BIGINT,
+ data VARIANT
+) STORED BY ICEBERG tblproperties('format-version'='3')
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@variant_filter_basic
+POSTHOOK: query: CREATE EXTERNAL TABLE variant_filter_basic (
+ id BIGINT,
+ data VARIANT
+) STORED BY ICEBERG tblproperties('format-version'='3')
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@variant_filter_basic
+PREHOOK: query: INSERT INTO variant_filter_basic VALUES
+(1, parse_json('{ "name": "Alice", "age": 30, "address": {"city":
"Wonderland"} }')),
+(2, parse_json('{ "name": "Bob", "age": 40, "address": {"city": "Builderland"}
}')),
+(3, parse_json('{ "name": "Charlie", "age": 28, "address": {"city":
"Dreamtown"} }'))
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@variant_filter_basic
+POSTHOOK: query: INSERT INTO variant_filter_basic VALUES
+(1, parse_json('{ "name": "Alice", "age": 30, "address": {"city":
"Wonderland"} }')),
+(2, parse_json('{ "name": "Bob", "age": 40, "address": {"city": "Builderland"}
}')),
+(3, parse_json('{ "name": "Charlie", "age": 28, "address": {"city":
"Dreamtown"} }'))
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@variant_filter_basic
+PREHOOK: query: SELECT
+ try_variant_get(data, '$.name') AS name,
+ try_variant_get(data, '$.age', 'int') AS age,
+ try_variant_get(data, '$.address.city') AS city
+FROM variant_filter_basic
+PREHOOK: type: QUERY
+PREHOOK: Input: default@variant_filter_basic
+PREHOOK: Output: hdfs://### HDFS PATH ###
+POSTHOOK: query: SELECT
+ try_variant_get(data, '$.name') AS name,
+ try_variant_get(data, '$.age', 'int') AS age,
+ try_variant_get(data, '$.address.city') AS city
+FROM variant_filter_basic
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@variant_filter_basic
+POSTHOOK: Output: hdfs://### HDFS PATH ###
+Alice 30 Wonderland
+Bob 40 Builderland
+Charlie 28 Dreamtown
+PREHOOK: query: SELECT
+ try_variant_get(data, '$.name') AS name,
+ try_variant_get(data, '$.age', 'int') AS age,
+ try_variant_get(data, '$.address.city') AS city
+FROM variant_filter_basic
+WHERE try_variant_get(data, '$.age', 'int') >= 30
+PREHOOK: type: QUERY
+PREHOOK: Input: default@variant_filter_basic
+PREHOOK: Output: hdfs://### HDFS PATH ###
+POSTHOOK: query: SELECT
+ try_variant_get(data, '$.name') AS name,
+ try_variant_get(data, '$.age', 'int') AS age,
+ try_variant_get(data, '$.address.city') AS city
+FROM variant_filter_basic
+WHERE try_variant_get(data, '$.age', 'int') >= 30
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@variant_filter_basic
+POSTHOOK: Output: hdfs://### HDFS PATH ###
+Alice 30 Wonderland
+Bob 40 Builderland
+PREHOOK: query: EXPLAIN SELECT
+ try_variant_get(data, '$.name') AS name,
+ try_variant_get(data, '$.age', 'int') AS age,
+ try_variant_get(data, '$.address.city') AS city
+FROM variant_filter_basic
+WHERE try_variant_get(data, '$.age', 'int') >= 30
+PREHOOK: type: QUERY
+PREHOOK: Input: default@variant_filter_basic
+PREHOOK: Output: hdfs://### HDFS PATH ###
+POSTHOOK: query: EXPLAIN SELECT
+ try_variant_get(data, '$.name') AS name,
+ try_variant_get(data, '$.age', 'int') AS age,
+ try_variant_get(data, '$.address.city') AS city
+FROM variant_filter_basic
+WHERE try_variant_get(data, '$.age', 'int') >= 30
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@variant_filter_basic
+POSTHOOK: Output: hdfs://### HDFS PATH ###
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: variant_filter_basic
+ filterExpr: (try_variant_get(data, '$.age', 'int') >= 30)
(type: boolean)
+ Statistics: Num rows: 3 Data size: 1008 Basic stats:
COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (try_variant_get(data, '$.age', 'int') >= 30)
(type: boolean)
+ Statistics: Num rows: 1 Data size: 336 Basic stats:
COMPLETE Column stats: NONE
+ Select Operator
+ expressions: try_variant_get(data, '$.name') (type:
string), try_variant_get(data, '$.age', 'int') (type: int),
try_variant_get(data, '$.address.city') (type: string)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1 Data size: 336 Basic stats:
COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 336 Basic stats:
COMPLETE Column stats: NONE
+ table:
+ input format:
org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format:
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde:
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Execution mode: vectorized
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
diff --git
a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFVariantGet.java
b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFVariantGet.java
index f3fb0c12897..2ca34686b8d 100644
---
a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFVariantGet.java
+++
b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFVariantGet.java
@@ -26,6 +26,8 @@
import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
import
org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
+import
org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils;
+import
org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableConstantStringObjectInspector;
import org.apache.hadoop.hive.serde2.variant.Variant;
import org.apache.hadoop.hive.serde2.variant.VariantUtil;
import org.slf4j.Logger;
@@ -58,8 +60,7 @@ public class GenericUDFVariantGet extends GenericUDF {
private StructObjectInspector variantOI;
private PrimitiveObjectInspector pathOI;
- private PrimitiveObjectInspector typeOI;
- private boolean hasTypeArgument;
+ private String targetType;
@Override
public ObjectInspector initialize(ObjectInspector[] arguments) throws
UDFArgumentException {
@@ -77,12 +78,15 @@ public ObjectInspector initialize(ObjectInspector[]
arguments) throws UDFArgumen
}
pathOI = (PrimitiveObjectInspector) arguments[1];
- hasTypeArgument = arguments.length == 3;
+ boolean hasTypeArgument = arguments.length == 3;
if (hasTypeArgument) {
- if (!(arguments[2] instanceof PrimitiveObjectInspector)) {
+ if (!(arguments[2] instanceof WritableConstantStringObjectInspector
typeOI)) {
throw new UDFArgumentException("Third argument must be string type
name");
}
- typeOI = (PrimitiveObjectInspector) arguments[2];
+ targetType = typeOI.getWritableConstantValue().toString();
+
+ return PrimitiveObjectInspectorFactory.getPrimitiveJavaObjectInspector(
+
PrimitiveObjectInspectorUtils.getTypeEntryFromTypeName(targetType).primitiveCategory);
}
return PrimitiveObjectInspectorFactory.javaStringObjectInspector;
@@ -103,14 +107,6 @@ public Object evaluate(DeferredObject[] arguments) throws
HiveException {
}
String path = pathOI.getPrimitiveJavaObject(pathObj).toString();
- String targetType = null;
- if (hasTypeArgument) {
- Object typeObj = arguments[2].get();
- if (typeObj != null) {
- targetType = typeOI.getPrimitiveJavaObject(typeObj).toString();
- }
- }
-
Variant result = extractValueByPath(variant, path);
// cast to target type
return castValue(result, targetType);