[
https://issues.apache.org/jira/browse/HIVE-19225?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
]
Amruth S updated HIVE-19225:
----------------------------
Description:
*To reproduce : [tag - 2.3.2]*
Launch hive in debug mode
{code:java}
hive --hiveconf hive.root.logger=DEBUG,console;{code}
Run the sample sql below
{code:java}
SET mapreduce.framework.name=local;
CREATE TABLE `test_class_cast` as select
named_struct('a','a','b','b','c','c','d','d','e',true,'f','f','g',timestamp(1),'h','h'),
'i';
select `_c0`.c, `_c0`.g, `_c0`.a, rank() over (partition by `_c0`.c order by
`_c0`.g desc) as rown,`_c0`.f,`_c0`.e from default.test_class_cast where
`_c0`.f like '%f%' or `_c0`.f like '%f%' {code}
Should fail with the exception
{code:java}
Caused by: org.apache.hadoop.hive.ql.metadata.HiveException: Hive Runtime Error
while processing row (tag=0)
{"key":{"reducesinkkey0":"c","reducesinkkey1":"1970-01-01
05:30:00.001"},"value":{"_col0":{"a":"a","b":"b","c":"c","d":"d","e":true,"f":"f","g":"1970-01-01
05:30:00.001","h":"h"}}}
at org.apache.hadoop.hive.ql.exec.mr.ExecReducer.reduce(ExecReducer.java:245)
~[hive-exec-2.3.2.fk.7.jar:2.3.2.fk.7]
at org.apache.hadoop.mapred.ReduceTask.runOldReducer(ReduceTask.java:444)
~[hadoop-mapreduce-client-core-2.6.0.2.2.0.0-2041.jar:?]
at org.apache.hadoop.mapred.ReduceTask.run(ReduceTask.java:392)
~[hadoop-mapreduce-client-core-2.6.0.2.2.0.0-2041.jar:?]
at
org.apache.hadoop.mapred.LocalJobRunner$Job$ReduceTaskRunnable.run(LocalJobRunner.java:319)
~[hadoop-mapreduce-client-common-2.6.0.2.2.0.0-2041.jar:?]
at java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511)
~[?:1.8.0_92]
at java.util.concurrent.FutureTask.run(FutureTask.java:266) ~[?:1.8.0_92]
at
java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
~[?:1.8.0_92]
at
java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
~[?:1.8.0_92]
at java.lang.Thread.run(Thread.java:745) ~[?:1.8.0_92]
Caused by: java.lang.ClassCastException:
org.apache.hadoop.hive.serde2.lazybinary.LazyBinaryStruct cannot be cast to
org.apache.hadoop.hive.serde2.io.TimestampWritable
at
org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableTimestampObjectInspector.getPrimitiveJavaObject(WritableTimestampObjectInspector.java:39)
~[hive-exec-2.3.2.fk.7.jar:2.3.2.fk.7]
at
org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableTimestampObjectInspector.getPrimitiveJavaObject(WritableTimestampObjectInspector.java:25)
~[hive-exec-2.3.2.fk.7.jar:2.3.2.fk.7]
at
org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils.copyToStandardObject(ObjectInspectorUtils.java:412)
~[hive-exec-2.3.2.fk.7.jar:2.3.2.fk.7]
at
org.apache.hadoop.hive.ql.udf.generic.GenericUDAFRank.copyToStandardObject(GenericUDAFRank.java:219)
~[hive-exec-2.3.2.fk.7.jar:2.3.2.fk.7]{code}
was:
Certain queries with rank function is causing class cast exception.
{noformat}
Caused by: java.lang.ClassCastException:
org.apache.hadoop.hive.serde2.lazybinary.LazyBinaryStruct cannot be cast to
org.apache.hadoop.hive.serde2.io.TimestampWritable
at
org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableTimestampObjectInspector.getPrimitiveJavaObject(WritableTimestampObjectInspector.java:39)
at
org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableTimestampObjectInspector.getPrimitiveJavaObject(WritableTimestampObjectInspector.java:25)
at
org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils.copyToStandardObject(ObjectInspectorUtils.java:412)
at
org.apache.hadoop.hive.ql.udf.generic.GenericUDAFRank.copyToStandardObject(GenericUDAFRank.java:219)
at
org.apache.hadoop.hive.ql.udf.generic.GenericUDAFRank$GenericUDAFAbstractRankEvaluator.iterate(GenericUDAFRank.java:153)
at
org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator.aggregate(GenericUDAFEvaluator.java:192)
at
org.apache.hadoop.hive.ql.udf.ptf.WindowingTableFunction.processRow(WindowingTableFunction.java:407)
at
org.apache.hadoop.hive.ql.exec.PTFOperator$PTFInvocation.processRow(PTFOperator.java:325)
at
org.apache.hadoop.hive.ql.exec.PTFOperator.process(PTFOperator.java:139)
at org.apache.hadoop.hive.ql.exec.Operator.forward(Operator.java:897)
at
org.apache.hadoop.hive.ql.exec.SelectOperator.process(SelectOperator.java:95)
at
org.apache.hadoop.hive.ql.exec.mr.ExecReducer.reduce(ExecReducer.java:236)
... 7 more
2018-03-29 09:28:43,432 INFO [main] org.apache.hadoop.mapred.Task: Runnning
cleanup for the task
{noformat}
The following changes fixes this.
The evaluator seem to skip the case where the primary obj emitted is struct.
Modified the code to find the field inside struct
{code:java}
diff --git
a/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/StandardStructObjectInspector.java
b/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/StandardStructObjectInspector.java
index 36a500790a..e7731e99d7 100644
---
a/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/StandardStructObjectInspector.java
+++
b/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/StandardStructObjectInspector.java
@@ -22,6 +22,7 @@
import java.util.Arrays;
import java.util.List;
+import org.apache.hadoop.hive.serde2.lazybinary.LazyBinaryStruct;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -171,6 +172,10 @@ public Object getStructFieldData(Object data, StructField
fieldRef) {
// so we have to do differently.
boolean isArray = data.getClass().isArray();
if (!isArray && !(data instanceof List)) {
+ if (data instanceof LazyBinaryStruct
+ && fieldRef.getFieldObjectInspector().getCategory() == Category.PRIMITIVE) {
+ return ((LazyBinaryStruct) data).getField(((MyField) fieldRef).fieldID);
+ }
if (!warned) {
LOG.warn("Invalid type for struct " + data.getClass());
LOG.warn("ignoring similar errors.");
{code}
Let me know your thoughts
BTW, this is the structure to reproduce.
Launch hive in debug mode
{code:java}
hive --hiveconf hive.root.logger=DEBUG,console;{code}
Run the sample sql below
{code:java}
SET mapreduce.framework.name=local;
CREATE TABLE `test_class_cast` as select
named_struct('a','a','b','b','c','c','d','d','e',true,'f','f','g',timestamp(1),'h','h'),
'i';
select `_c0`.c, `_c0`.g, `_c0`.a, rank() over (partition by `_c0`.c order by
`_c0`.g desc) as rown,`_c0`.f,`_c0`.e from default.test_class_cast where
`_c0`.f like '%f%' or `_c0`.f like '%f%' {code}
Fails with the exception
{code:java}
Caused by: org.apache.hadoop.hive.ql.metadata.HiveException: Hive Runtime Error
while processing row (tag=0)
{"key":{"reducesinkkey0":"c","reducesinkkey1":"1970-01-01
05:30:00.001"},"value":{"_col0":{"a":"a","b":"b","c":"c","d":"d","e":true,"f":"f","g":"1970-01-01
05:30:00.001","h":"h"}}}
at org.apache.hadoop.hive.ql.exec.mr.ExecReducer.reduce(ExecReducer.java:245)
~[hive-exec-2.3.2.fk.7.jar:2.3.2.fk.7]
at org.apache.hadoop.mapred.ReduceTask.runOldReducer(ReduceTask.java:444)
~[hadoop-mapreduce-client-core-2.6.0.2.2.0.0-2041.jar:?]
at org.apache.hadoop.mapred.ReduceTask.run(ReduceTask.java:392)
~[hadoop-mapreduce-client-core-2.6.0.2.2.0.0-2041.jar:?]
at
org.apache.hadoop.mapred.LocalJobRunner$Job$ReduceTaskRunnable.run(LocalJobRunner.java:319)
~[hadoop-mapreduce-client-common-2.6.0.2.2.0.0-2041.jar:?]
at java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511)
~[?:1.8.0_92]
at java.util.concurrent.FutureTask.run(FutureTask.java:266) ~[?:1.8.0_92]
at
java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
~[?:1.8.0_92]
at
java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
~[?:1.8.0_92]
at java.lang.Thread.run(Thread.java:745) ~[?:1.8.0_92]
Caused by: java.lang.ClassCastException:
org.apache.hadoop.hive.serde2.lazybinary.LazyBinaryStruct cannot be cast to
org.apache.hadoop.hive.serde2.io.TimestampWritable
at
org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableTimestampObjectInspector.getPrimitiveJavaObject(WritableTimestampObjectInspector.java:39)
~[hive-exec-2.3.2.fk.7.jar:2.3.2.fk.7]
at
org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableTimestampObjectInspector.getPrimitiveJavaObject(WritableTimestampObjectInspector.java:25)
~[hive-exec-2.3.2.fk.7.jar:2.3.2.fk.7]
at
org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils.copyToStandardObject(ObjectInspectorUtils.java:412)
~[hive-exec-2.3.2.fk.7.jar:2.3.2.fk.7]
at
org.apache.hadoop.hive.ql.udf.generic.GenericUDAFRank.copyToStandardObject(GenericUDAFRank.java:219)
~[hive-exec-2.3.2.fk.7.jar:2.3.2.fk.7]{code}
> Class cast exception while running certain queries with UDAF like rank on
> internal struct columns
> -------------------------------------------------------------------------------------------------
>
> Key: HIVE-19225
> URL: https://issues.apache.org/jira/browse/HIVE-19225
> Project: Hive
> Issue Type: Bug
> Components: Hive
> Affects Versions: 2.3.2
> Reporter: Amruth S
> Assignee: Amruth S
> Priority: Major
> Attachments: HIVE-19225.patch
>
>
>
> *To reproduce : [tag - 2.3.2]*
> Launch hive in debug mode
> {code:java}
> hive --hiveconf hive.root.logger=DEBUG,console;{code}
> Run the sample sql below
> {code:java}
> SET mapreduce.framework.name=local;
> CREATE TABLE `test_class_cast` as select
> named_struct('a','a','b','b','c','c','d','d','e',true,'f','f','g',timestamp(1),'h','h'),
> 'i';
> select `_c0`.c, `_c0`.g, `_c0`.a, rank() over (partition by `_c0`.c order by
> `_c0`.g desc) as rown,`_c0`.f,`_c0`.e from default.test_class_cast where
> `_c0`.f like '%f%' or `_c0`.f like '%f%' {code}
> Should fail with the exception
> {code:java}
> Caused by: org.apache.hadoop.hive.ql.metadata.HiveException: Hive Runtime
> Error while processing row (tag=0)
> {"key":{"reducesinkkey0":"c","reducesinkkey1":"1970-01-01
> 05:30:00.001"},"value":{"_col0":{"a":"a","b":"b","c":"c","d":"d","e":true,"f":"f","g":"1970-01-01
> 05:30:00.001","h":"h"}}}
> at
> org.apache.hadoop.hive.ql.exec.mr.ExecReducer.reduce(ExecReducer.java:245)
> ~[hive-exec-2.3.2.fk.7.jar:2.3.2.fk.7]
> at org.apache.hadoop.mapred.ReduceTask.runOldReducer(ReduceTask.java:444)
> ~[hadoop-mapreduce-client-core-2.6.0.2.2.0.0-2041.jar:?]
> at org.apache.hadoop.mapred.ReduceTask.run(ReduceTask.java:392)
> ~[hadoop-mapreduce-client-core-2.6.0.2.2.0.0-2041.jar:?]
> at
> org.apache.hadoop.mapred.LocalJobRunner$Job$ReduceTaskRunnable.run(LocalJobRunner.java:319)
> ~[hadoop-mapreduce-client-common-2.6.0.2.2.0.0-2041.jar:?]
> at java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511)
> ~[?:1.8.0_92]
> at java.util.concurrent.FutureTask.run(FutureTask.java:266) ~[?:1.8.0_92]
> at
> java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
> ~[?:1.8.0_92]
> at
> java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
> ~[?:1.8.0_92]
> at java.lang.Thread.run(Thread.java:745) ~[?:1.8.0_92]
> Caused by: java.lang.ClassCastException:
> org.apache.hadoop.hive.serde2.lazybinary.LazyBinaryStruct cannot be cast to
> org.apache.hadoop.hive.serde2.io.TimestampWritable
> at
> org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableTimestampObjectInspector.getPrimitiveJavaObject(WritableTimestampObjectInspector.java:39)
> ~[hive-exec-2.3.2.fk.7.jar:2.3.2.fk.7]
> at
> org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableTimestampObjectInspector.getPrimitiveJavaObject(WritableTimestampObjectInspector.java:25)
> ~[hive-exec-2.3.2.fk.7.jar:2.3.2.fk.7]
> at
> org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils.copyToStandardObject(ObjectInspectorUtils.java:412)
> ~[hive-exec-2.3.2.fk.7.jar:2.3.2.fk.7]
> at
> org.apache.hadoop.hive.ql.udf.generic.GenericUDAFRank.copyToStandardObject(GenericUDAFRank.java:219)
> ~[hive-exec-2.3.2.fk.7.jar:2.3.2.fk.7]{code}
--
This message was sent by Atlassian JIRA
(v7.6.3#76005)