[ 
https://issues.apache.org/jira/browse/HIVE-19225?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

Amruth S updated HIVE-19225:
----------------------------
    Description: 
Certain queries with rank function is causing class cast exception.
{noformat}
Caused by: java.lang.ClassCastException: 
org.apache.hadoop.hive.serde2.lazybinary.LazyBinaryStruct cannot be cast to 
org.apache.hadoop.hive.serde2.io.TimestampWritable
        at 
org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableTimestampObjectInspector.getPrimitiveJavaObject(WritableTimestampObjectInspector.java:39)
        at 
org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableTimestampObjectInspector.getPrimitiveJavaObject(WritableTimestampObjectInspector.java:25)
        at 
org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils.copyToStandardObject(ObjectInspectorUtils.java:412)
        at 
org.apache.hadoop.hive.ql.udf.generic.GenericUDAFRank.copyToStandardObject(GenericUDAFRank.java:219)
        at 
org.apache.hadoop.hive.ql.udf.generic.GenericUDAFRank$GenericUDAFAbstractRankEvaluator.iterate(GenericUDAFRank.java:153)
        at 
org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator.aggregate(GenericUDAFEvaluator.java:192)
        at 
org.apache.hadoop.hive.ql.udf.ptf.WindowingTableFunction.processRow(WindowingTableFunction.java:407)
        at 
org.apache.hadoop.hive.ql.exec.PTFOperator$PTFInvocation.processRow(PTFOperator.java:325)
        at 
org.apache.hadoop.hive.ql.exec.PTFOperator.process(PTFOperator.java:139)
        at org.apache.hadoop.hive.ql.exec.Operator.forward(Operator.java:897)
        at 
org.apache.hadoop.hive.ql.exec.SelectOperator.process(SelectOperator.java:95)
        at 
org.apache.hadoop.hive.ql.exec.mr.ExecReducer.reduce(ExecReducer.java:236)
        ... 7 more

2018-03-29 09:28:43,432 INFO [main] org.apache.hadoop.mapred.Task: Runnning 
cleanup for the task
{noformat}
The following changes fixes this.

The evaluator seem to skip the case where the primary obj emitted is struct. 
Modified the code to find the field inside struct
{code:java}
diff --git 
a/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/StandardStructObjectInspector.java
 
b/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/StandardStructObjectInspector.java
index 36a500790a..e7731e99d7 100644
--- 
a/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/StandardStructObjectInspector.java
+++ 
b/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/StandardStructObjectInspector.java
@@ -22,6 +22,7 @@
import java.util.Arrays;
import java.util.List;

+import org.apache.hadoop.hive.serde2.lazybinary.LazyBinaryStruct;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

@@ -171,6 +172,10 @@ public Object getStructFieldData(Object data, StructField 
fieldRef) {
// so we have to do differently.
boolean isArray = data.getClass().isArray();
if (!isArray && !(data instanceof List)) {
+ if (data instanceof LazyBinaryStruct
+ && fieldRef.getFieldObjectInspector().getCategory() == Category.PRIMITIVE) {
+ return ((LazyBinaryStruct) data).getField(((MyField) fieldRef).fieldID);
+ }
if (!warned) {
LOG.warn("Invalid type for struct " + data.getClass());
LOG.warn("ignoring similar errors.");
{code}
Let me know your thoughts

 

BTW, this is the structure to reproduce. 

Launch hive in debug mode
{code:java}
hive --hiveconf hive.root.logger=DEBUG,console;{code}
Run the sample sql below
{code:java}
SET mapreduce.framework.name=local; 

CREATE TABLE `test_class_cast` as select 
named_struct('a','a','b','b','c','c','d','d','e',true,'f','f','g',timestamp(1),'h','h'),
 'i'; 

select `_c0`.c, `_c0`.g, `_c0`.a, rank() over (partition by `_c0`.c order by 
`_c0`.g desc) as rown,`_c0`.f,`_c0`.e from default.test_class_cast where 
`_c0`.f like '%f%' or `_c0`.f like '%f%' {code}
 

  was:
Certain queries with rank function is causing class cast exception.
{noformat}
Caused by: java.lang.ClassCastException: 
org.apache.hadoop.hive.serde2.lazybinary.LazyBinaryStruct cannot be cast to 
org.apache.hadoop.hive.serde2.io.TimestampWritable
        at 
org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableTimestampObjectInspector.getPrimitiveJavaObject(WritableTimestampObjectInspector.java:39)
        at 
org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableTimestampObjectInspector.getPrimitiveJavaObject(WritableTimestampObjectInspector.java:25)
        at 
org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils.copyToStandardObject(ObjectInspectorUtils.java:412)
        at 
org.apache.hadoop.hive.ql.udf.generic.GenericUDAFRank.copyToStandardObject(GenericUDAFRank.java:219)
        at 
org.apache.hadoop.hive.ql.udf.generic.GenericUDAFRank$GenericUDAFAbstractRankEvaluator.iterate(GenericUDAFRank.java:153)
        at 
org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator.aggregate(GenericUDAFEvaluator.java:192)
        at 
org.apache.hadoop.hive.ql.udf.ptf.WindowingTableFunction.processRow(WindowingTableFunction.java:407)
        at 
org.apache.hadoop.hive.ql.exec.PTFOperator$PTFInvocation.processRow(PTFOperator.java:325)
        at 
org.apache.hadoop.hive.ql.exec.PTFOperator.process(PTFOperator.java:139)
        at org.apache.hadoop.hive.ql.exec.Operator.forward(Operator.java:897)
        at 
org.apache.hadoop.hive.ql.exec.SelectOperator.process(SelectOperator.java:95)
        at 
org.apache.hadoop.hive.ql.exec.mr.ExecReducer.reduce(ExecReducer.java:236)
        ... 7 more

2018-03-29 09:28:43,432 INFO [main] org.apache.hadoop.mapred.Task: Runnning 
cleanup for the task
{noformat}
The following changes fixes this.

The evaluator seem to skip the case where the primary obj emitted is struct. 
Modified the code to find the field inside struct
{code:java}
diff --git 
a/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/StandardStructObjectInspector.java
 
b/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/StandardStructObjectInspector.java
index 36a500790a..e7731e99d7 100644
--- 
a/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/StandardStructObjectInspector.java
+++ 
b/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/StandardStructObjectInspector.java
@@ -22,6 +22,7 @@
import java.util.Arrays;
import java.util.List;

+import org.apache.hadoop.hive.serde2.lazybinary.LazyBinaryStruct;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

@@ -171,6 +172,10 @@ public Object getStructFieldData(Object data, StructField 
fieldRef) {
// so we have to do differently.
boolean isArray = data.getClass().isArray();
if (!isArray && !(data instanceof List)) {
+ if (data instanceof LazyBinaryStruct
+ && fieldRef.getFieldObjectInspector().getCategory() == Category.PRIMITIVE) {
+ return ((LazyBinaryStruct) data).getField(((MyField) fieldRef).fieldID);
+ }
if (!warned) {
LOG.warn("Invalid type for struct " + data.getClass());
LOG.warn("ignoring similar errors.");
{code}
Let me know your thoughts


> Class cast exception while running certain queries with UDAF like rank on 
> internal struct columns
> -------------------------------------------------------------------------------------------------
>
>                 Key: HIVE-19225
>                 URL: https://issues.apache.org/jira/browse/HIVE-19225
>             Project: Hive
>          Issue Type: Bug
>          Components: Hive
>    Affects Versions: 2.3.2
>            Reporter: Amruth S
>            Assignee: Amruth S
>            Priority: Major
>         Attachments: HIVE-19225.patch
>
>
> Certain queries with rank function is causing class cast exception.
> {noformat}
> Caused by: java.lang.ClassCastException: 
> org.apache.hadoop.hive.serde2.lazybinary.LazyBinaryStruct cannot be cast to 
> org.apache.hadoop.hive.serde2.io.TimestampWritable
>       at 
> org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableTimestampObjectInspector.getPrimitiveJavaObject(WritableTimestampObjectInspector.java:39)
>       at 
> org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableTimestampObjectInspector.getPrimitiveJavaObject(WritableTimestampObjectInspector.java:25)
>       at 
> org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils.copyToStandardObject(ObjectInspectorUtils.java:412)
>       at 
> org.apache.hadoop.hive.ql.udf.generic.GenericUDAFRank.copyToStandardObject(GenericUDAFRank.java:219)
>       at 
> org.apache.hadoop.hive.ql.udf.generic.GenericUDAFRank$GenericUDAFAbstractRankEvaluator.iterate(GenericUDAFRank.java:153)
>       at 
> org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator.aggregate(GenericUDAFEvaluator.java:192)
>       at 
> org.apache.hadoop.hive.ql.udf.ptf.WindowingTableFunction.processRow(WindowingTableFunction.java:407)
>       at 
> org.apache.hadoop.hive.ql.exec.PTFOperator$PTFInvocation.processRow(PTFOperator.java:325)
>       at 
> org.apache.hadoop.hive.ql.exec.PTFOperator.process(PTFOperator.java:139)
>       at org.apache.hadoop.hive.ql.exec.Operator.forward(Operator.java:897)
>       at 
> org.apache.hadoop.hive.ql.exec.SelectOperator.process(SelectOperator.java:95)
>       at 
> org.apache.hadoop.hive.ql.exec.mr.ExecReducer.reduce(ExecReducer.java:236)
>       ... 7 more
> 2018-03-29 09:28:43,432 INFO [main] org.apache.hadoop.mapred.Task: Runnning 
> cleanup for the task
> {noformat}
> The following changes fixes this.
> The evaluator seem to skip the case where the primary obj emitted is struct. 
> Modified the code to find the field inside struct
> {code:java}
> diff --git 
> a/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/StandardStructObjectInspector.java
>  
> b/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/StandardStructObjectInspector.java
> index 36a500790a..e7731e99d7 100644
> --- 
> a/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/StandardStructObjectInspector.java
> +++ 
> b/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/StandardStructObjectInspector.java
> @@ -22,6 +22,7 @@
> import java.util.Arrays;
> import java.util.List;
> +import org.apache.hadoop.hive.serde2.lazybinary.LazyBinaryStruct;
> import org.slf4j.Logger;
> import org.slf4j.LoggerFactory;
> @@ -171,6 +172,10 @@ public Object getStructFieldData(Object data, 
> StructField fieldRef) {
> // so we have to do differently.
> boolean isArray = data.getClass().isArray();
> if (!isArray && !(data instanceof List)) {
> + if (data instanceof LazyBinaryStruct
> + && fieldRef.getFieldObjectInspector().getCategory() == Category.PRIMITIVE) {
> + return ((LazyBinaryStruct) data).getField(((MyField) fieldRef).fieldID);
> + }
> if (!warned) {
> LOG.warn("Invalid type for struct " + data.getClass());
> LOG.warn("ignoring similar errors.");
> {code}
> Let me know your thoughts
>  
> BTW, this is the structure to reproduce. 
> Launch hive in debug mode
> {code:java}
> hive --hiveconf hive.root.logger=DEBUG,console;{code}
> Run the sample sql below
> {code:java}
> SET mapreduce.framework.name=local; 
> CREATE TABLE `test_class_cast` as select 
> named_struct('a','a','b','b','c','c','d','d','e',true,'f','f','g',timestamp(1),'h','h'),
>  'i'; 
> select `_c0`.c, `_c0`.g, `_c0`.a, rank() over (partition by `_c0`.c order by 
> `_c0`.g desc) as rown,`_c0`.f,`_c0`.e from default.test_class_cast where 
> `_c0`.f like '%f%' or `_c0`.f like '%f%' {code}
>  



--
This message was sent by Atlassian JIRA
(v7.6.3#76005)

Reply via email to