[ https://issues.apache.org/jira/browse/HIVE-19225?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]
Amruth S updated HIVE-19225: ---------------------------- Description: *To reproduce : [tag - 2.3.2]* Launch hive in debug mode {code:java} hive --hiveconf hive.root.logger=DEBUG,console;{code} Run the sample sql below {code:java} SET mapreduce.framework.name=local; CREATE TABLE `test_class_cast` as select named_struct('a','a','b','b','c','c','d','d','e',true,'f','f','g',timestamp(1),'h','h'), 'i'; select `_c0`.c, `_c0`.g, `_c0`.a, rank() over (partition by `_c0`.c order by `_c0`.g desc) as rown,`_c0`.f,`_c0`.e from default.test_class_cast where `_c0`.f like '%f%' or `_c0`.f like '%f%' {code} Should fail with the exception {code:java} Caused by: org.apache.hadoop.hive.ql.metadata.HiveException: Hive Runtime Error while processing row (tag=0) {"key":{"reducesinkkey0":"c","reducesinkkey1":"1970-01-01 05:30:00.001"},"value":{"_col0":{"a":"a","b":"b","c":"c","d":"d","e":true,"f":"f","g":"1970-01-01 05:30:00.001","h":"h"}}} at org.apache.hadoop.hive.ql.exec.mr.ExecReducer.reduce(ExecReducer.java:245) ~[hive-exec-2.3.2.fk.7.jar:2.3.2.fk.7] at org.apache.hadoop.mapred.ReduceTask.runOldReducer(ReduceTask.java:444) ~[hadoop-mapreduce-client-core-2.6.0.2.2.0.0-2041.jar:?] at org.apache.hadoop.mapred.ReduceTask.run(ReduceTask.java:392) ~[hadoop-mapreduce-client-core-2.6.0.2.2.0.0-2041.jar:?] at org.apache.hadoop.mapred.LocalJobRunner$Job$ReduceTaskRunnable.run(LocalJobRunner.java:319) ~[hadoop-mapreduce-client-common-2.6.0.2.2.0.0-2041.jar:?] at java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511) ~[?:1.8.0_92] at java.util.concurrent.FutureTask.run(FutureTask.java:266) ~[?:1.8.0_92] at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142) ~[?:1.8.0_92] at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617) ~[?:1.8.0_92] at java.lang.Thread.run(Thread.java:745) ~[?:1.8.0_92] Caused by: java.lang.ClassCastException: org.apache.hadoop.hive.serde2.lazybinary.LazyBinaryStruct cannot be cast to org.apache.hadoop.hive.serde2.io.TimestampWritable at org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableTimestampObjectInspector.getPrimitiveJavaObject(WritableTimestampObjectInspector.java:39) ~[hive-exec-2.3.2.fk.7.jar:2.3.2.fk.7] at org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableTimestampObjectInspector.getPrimitiveJavaObject(WritableTimestampObjectInspector.java:25) ~[hive-exec-2.3.2.fk.7.jar:2.3.2.fk.7] at org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils.copyToStandardObject(ObjectInspectorUtils.java:412) ~[hive-exec-2.3.2.fk.7.jar:2.3.2.fk.7] at org.apache.hadoop.hive.ql.udf.generic.GenericUDAFRank.copyToStandardObject(GenericUDAFRank.java:219) ~[hive-exec-2.3.2.fk.7.jar:2.3.2.fk.7]{code} was: Certain queries with rank function is causing class cast exception. {noformat} Caused by: java.lang.ClassCastException: org.apache.hadoop.hive.serde2.lazybinary.LazyBinaryStruct cannot be cast to org.apache.hadoop.hive.serde2.io.TimestampWritable at org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableTimestampObjectInspector.getPrimitiveJavaObject(WritableTimestampObjectInspector.java:39) at org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableTimestampObjectInspector.getPrimitiveJavaObject(WritableTimestampObjectInspector.java:25) at org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils.copyToStandardObject(ObjectInspectorUtils.java:412) at org.apache.hadoop.hive.ql.udf.generic.GenericUDAFRank.copyToStandardObject(GenericUDAFRank.java:219) at org.apache.hadoop.hive.ql.udf.generic.GenericUDAFRank$GenericUDAFAbstractRankEvaluator.iterate(GenericUDAFRank.java:153) at org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator.aggregate(GenericUDAFEvaluator.java:192) at org.apache.hadoop.hive.ql.udf.ptf.WindowingTableFunction.processRow(WindowingTableFunction.java:407) at org.apache.hadoop.hive.ql.exec.PTFOperator$PTFInvocation.processRow(PTFOperator.java:325) at org.apache.hadoop.hive.ql.exec.PTFOperator.process(PTFOperator.java:139) at org.apache.hadoop.hive.ql.exec.Operator.forward(Operator.java:897) at org.apache.hadoop.hive.ql.exec.SelectOperator.process(SelectOperator.java:95) at org.apache.hadoop.hive.ql.exec.mr.ExecReducer.reduce(ExecReducer.java:236) ... 7 more 2018-03-29 09:28:43,432 INFO [main] org.apache.hadoop.mapred.Task: Runnning cleanup for the task {noformat} The following changes fixes this. The evaluator seem to skip the case where the primary obj emitted is struct. Modified the code to find the field inside struct {code:java} diff --git a/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/StandardStructObjectInspector.java b/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/StandardStructObjectInspector.java index 36a500790a..e7731e99d7 100644 --- a/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/StandardStructObjectInspector.java +++ b/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/StandardStructObjectInspector.java @@ -22,6 +22,7 @@ import java.util.Arrays; import java.util.List; +import org.apache.hadoop.hive.serde2.lazybinary.LazyBinaryStruct; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -171,6 +172,10 @@ public Object getStructFieldData(Object data, StructField fieldRef) { // so we have to do differently. boolean isArray = data.getClass().isArray(); if (!isArray && !(data instanceof List)) { + if (data instanceof LazyBinaryStruct + && fieldRef.getFieldObjectInspector().getCategory() == Category.PRIMITIVE) { + return ((LazyBinaryStruct) data).getField(((MyField) fieldRef).fieldID); + } if (!warned) { LOG.warn("Invalid type for struct " + data.getClass()); LOG.warn("ignoring similar errors."); {code} Let me know your thoughts BTW, this is the structure to reproduce. Launch hive in debug mode {code:java} hive --hiveconf hive.root.logger=DEBUG,console;{code} Run the sample sql below {code:java} SET mapreduce.framework.name=local; CREATE TABLE `test_class_cast` as select named_struct('a','a','b','b','c','c','d','d','e',true,'f','f','g',timestamp(1),'h','h'), 'i'; select `_c0`.c, `_c0`.g, `_c0`.a, rank() over (partition by `_c0`.c order by `_c0`.g desc) as rown,`_c0`.f,`_c0`.e from default.test_class_cast where `_c0`.f like '%f%' or `_c0`.f like '%f%' {code} Fails with the exception {code:java} Caused by: org.apache.hadoop.hive.ql.metadata.HiveException: Hive Runtime Error while processing row (tag=0) {"key":{"reducesinkkey0":"c","reducesinkkey1":"1970-01-01 05:30:00.001"},"value":{"_col0":{"a":"a","b":"b","c":"c","d":"d","e":true,"f":"f","g":"1970-01-01 05:30:00.001","h":"h"}}} at org.apache.hadoop.hive.ql.exec.mr.ExecReducer.reduce(ExecReducer.java:245) ~[hive-exec-2.3.2.fk.7.jar:2.3.2.fk.7] at org.apache.hadoop.mapred.ReduceTask.runOldReducer(ReduceTask.java:444) ~[hadoop-mapreduce-client-core-2.6.0.2.2.0.0-2041.jar:?] at org.apache.hadoop.mapred.ReduceTask.run(ReduceTask.java:392) ~[hadoop-mapreduce-client-core-2.6.0.2.2.0.0-2041.jar:?] at org.apache.hadoop.mapred.LocalJobRunner$Job$ReduceTaskRunnable.run(LocalJobRunner.java:319) ~[hadoop-mapreduce-client-common-2.6.0.2.2.0.0-2041.jar:?] at java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511) ~[?:1.8.0_92] at java.util.concurrent.FutureTask.run(FutureTask.java:266) ~[?:1.8.0_92] at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142) ~[?:1.8.0_92] at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617) ~[?:1.8.0_92] at java.lang.Thread.run(Thread.java:745) ~[?:1.8.0_92] Caused by: java.lang.ClassCastException: org.apache.hadoop.hive.serde2.lazybinary.LazyBinaryStruct cannot be cast to org.apache.hadoop.hive.serde2.io.TimestampWritable at org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableTimestampObjectInspector.getPrimitiveJavaObject(WritableTimestampObjectInspector.java:39) ~[hive-exec-2.3.2.fk.7.jar:2.3.2.fk.7] at org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableTimestampObjectInspector.getPrimitiveJavaObject(WritableTimestampObjectInspector.java:25) ~[hive-exec-2.3.2.fk.7.jar:2.3.2.fk.7] at org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils.copyToStandardObject(ObjectInspectorUtils.java:412) ~[hive-exec-2.3.2.fk.7.jar:2.3.2.fk.7] at org.apache.hadoop.hive.ql.udf.generic.GenericUDAFRank.copyToStandardObject(GenericUDAFRank.java:219) ~[hive-exec-2.3.2.fk.7.jar:2.3.2.fk.7]{code} > Class cast exception while running certain queries with UDAF like rank on > internal struct columns > ------------------------------------------------------------------------------------------------- > > Key: HIVE-19225 > URL: https://issues.apache.org/jira/browse/HIVE-19225 > Project: Hive > Issue Type: Bug > Components: Hive > Affects Versions: 2.3.2 > Reporter: Amruth S > Assignee: Amruth S > Priority: Major > Attachments: HIVE-19225.patch > > > > *To reproduce : [tag - 2.3.2]* > Launch hive in debug mode > {code:java} > hive --hiveconf hive.root.logger=DEBUG,console;{code} > Run the sample sql below > {code:java} > SET mapreduce.framework.name=local; > CREATE TABLE `test_class_cast` as select > named_struct('a','a','b','b','c','c','d','d','e',true,'f','f','g',timestamp(1),'h','h'), > 'i'; > select `_c0`.c, `_c0`.g, `_c0`.a, rank() over (partition by `_c0`.c order by > `_c0`.g desc) as rown,`_c0`.f,`_c0`.e from default.test_class_cast where > `_c0`.f like '%f%' or `_c0`.f like '%f%' {code} > Should fail with the exception > {code:java} > Caused by: org.apache.hadoop.hive.ql.metadata.HiveException: Hive Runtime > Error while processing row (tag=0) > {"key":{"reducesinkkey0":"c","reducesinkkey1":"1970-01-01 > 05:30:00.001"},"value":{"_col0":{"a":"a","b":"b","c":"c","d":"d","e":true,"f":"f","g":"1970-01-01 > 05:30:00.001","h":"h"}}} > at > org.apache.hadoop.hive.ql.exec.mr.ExecReducer.reduce(ExecReducer.java:245) > ~[hive-exec-2.3.2.fk.7.jar:2.3.2.fk.7] > at org.apache.hadoop.mapred.ReduceTask.runOldReducer(ReduceTask.java:444) > ~[hadoop-mapreduce-client-core-2.6.0.2.2.0.0-2041.jar:?] > at org.apache.hadoop.mapred.ReduceTask.run(ReduceTask.java:392) > ~[hadoop-mapreduce-client-core-2.6.0.2.2.0.0-2041.jar:?] > at > org.apache.hadoop.mapred.LocalJobRunner$Job$ReduceTaskRunnable.run(LocalJobRunner.java:319) > ~[hadoop-mapreduce-client-common-2.6.0.2.2.0.0-2041.jar:?] > at java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511) > ~[?:1.8.0_92] > at java.util.concurrent.FutureTask.run(FutureTask.java:266) ~[?:1.8.0_92] > at > java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142) > ~[?:1.8.0_92] > at > java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617) > ~[?:1.8.0_92] > at java.lang.Thread.run(Thread.java:745) ~[?:1.8.0_92] > Caused by: java.lang.ClassCastException: > org.apache.hadoop.hive.serde2.lazybinary.LazyBinaryStruct cannot be cast to > org.apache.hadoop.hive.serde2.io.TimestampWritable > at > org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableTimestampObjectInspector.getPrimitiveJavaObject(WritableTimestampObjectInspector.java:39) > ~[hive-exec-2.3.2.fk.7.jar:2.3.2.fk.7] > at > org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableTimestampObjectInspector.getPrimitiveJavaObject(WritableTimestampObjectInspector.java:25) > ~[hive-exec-2.3.2.fk.7.jar:2.3.2.fk.7] > at > org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils.copyToStandardObject(ObjectInspectorUtils.java:412) > ~[hive-exec-2.3.2.fk.7.jar:2.3.2.fk.7] > at > org.apache.hadoop.hive.ql.udf.generic.GenericUDAFRank.copyToStandardObject(GenericUDAFRank.java:219) > ~[hive-exec-2.3.2.fk.7.jar:2.3.2.fk.7]{code} -- This message was sent by Atlassian JIRA (v7.6.3#76005)