[ 
https://issues.apache.org/jira/browse/HIVE-15638?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=15827388#comment-15827388
 ] 

Nemon Lou commented on HIVE-15638:
----------------------------------

The following query will pass(adding a 'select * ' before UDTF hwrl) :
{noformat}
set hive.auto.convert.join=false;
select substring(c.start_time,1,10) create_date, 
tt.data_id,tt.word_type,tt.primary_word,tt.primary_nature,tt.primary_offset,tt.related_word,tt.related_nature,tt.related_offset
 
from (
select * from (
select hwrl(data_dt,src,data_id,tag_id,entity_src,pos_tagging)
as 
(data_dt,data_src,data_id,word_type,primary_word,primary_nature,primary_offset,related_word,related_nature,related_offset)
from (
select a.data_dt,a.src,a.data_id,a.tag_id,a.entity_src,b.pos_tagging
from tb_a a, tb_b b
where a.key like 'CP%' 
and a.data_dt='20160901'
and a.data_id=b.data_id
and b.src='04'
) t
) ttt
) tt, (select key,start_time from tb_c where data_dt='20160901') c 
where tt.data_id=c.key 
;
{noformat}

> ArrayIndexOutOfBoundsException when output Columns for UDTF are pruned 
> -----------------------------------------------------------------------
>
>                 Key: HIVE-15638
>                 URL: https://issues.apache.org/jira/browse/HIVE-15638
>             Project: Hive
>          Issue Type: Bug
>          Components: Query Planning
>    Affects Versions: 1.3.0, 2.1.0
>            Reporter: Nemon Lou
>
> {noformat}
> Caused by: org.apache.hadoop.hive.ql.metadata.HiveException: Hive Runtime 
> Error while processing row [Error getting row data with exception 
> java.lang.ArrayIndexOutOfBoundsException: 151
>       at 
> org.apache.hadoop.hive.serde2.lazybinary.LazyBinaryUtils.readVInt(LazyBinaryUtils.java:314)
>       at 
> org.apache.hadoop.hive.serde2.lazybinary.LazyBinaryUtils.checkObjectByteInfo(LazyBinaryUtils.java:183)
>       at 
> org.apache.hadoop.hive.serde2.lazybinary.LazyBinaryStruct.parse(LazyBinaryStruct.java:142)
>       at 
> org.apache.hadoop.hive.serde2.lazybinary.LazyBinaryStruct.getField(LazyBinaryStruct.java:202)
>       at 
> org.apache.hadoop.hive.serde2.lazybinary.objectinspector.LazyBinaryStructObjectInspector.getStructFieldData(LazyBinaryStructObjectInspector.java:64)
>       at 
> org.apache.hadoop.hive.serde2.SerDeUtils.buildJSONString(SerDeUtils.java:364)
>       at 
> org.apache.hadoop.hive.serde2.SerDeUtils.getJSONString(SerDeUtils.java:200)
>       at 
> org.apache.hadoop.hive.serde2.SerDeUtils.getJSONString(SerDeUtils.java:186)
>       at 
> org.apache.hadoop.hive.ql.exec.MapOperator.toErrorMessage(MapOperator.java:525)
>       at 
> org.apache.hadoop.hive.ql.exec.MapOperator.process(MapOperator.java:494)
>       at org.apache.hadoop.hive.ql.exec.mr.ExecMapper.map(ExecMapper.java:160)
>       at org.apache.hadoop.mapred.MapRunner.run(MapRunner.java:54)
>       at org.apache.hadoop.mapred.MapTask.runOldMapper(MapTask.java:453)
>       at org.apache.hadoop.mapred.MapTask.run(MapTask.java:343)
>       at org.apache.hadoop.mapred.YarnChild$2.run(YarnChild.java:180)
>       at java.security.AccessController.doPrivileged(Native Method)
>       at javax.security.auth.Subject.doAs(Subject.java:422)
>       at 
> org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1710)
>       at org.apache.hadoop.mapred.YarnChild.main(YarnChild.java:174)
>  ]
>       at 
> org.apache.hadoop.hive.ql.exec.MapOperator.process(MapOperator.java:499)
>       at org.apache.hadoop.hive.ql.exec.mr.ExecMapper.map(ExecMapper.java:160)
>       ... 8 more
> Caused by: org.apache.hadoop.hive.ql.metadata.HiveException: 
> java.lang.ArrayIndexOutOfBoundsException: 151
>       at 
> org.apache.hadoop.hive.ql.exec.ReduceSinkOperator.process(ReduceSinkOperator.java:416)
>       at org.apache.hadoop.hive.ql.exec.Operator.forward(Operator.java:878)
>       at 
> org.apache.hadoop.hive.ql.exec.TableScanOperator.process(TableScanOperator.java:130)
>       at 
> org.apache.hadoop.hive.ql.exec.MapOperator$MapOpCtx.forward(MapOperator.java:149)
>       at 
> org.apache.hadoop.hive.ql.exec.MapOperator.process(MapOperator.java:489)
>       ... 9 more
> Caused by: java.lang.ArrayIndexOutOfBoundsException: 151
>       at 
> org.apache.hadoop.hive.serde2.lazybinary.LazyBinaryUtils.readVInt(LazyBinaryUtils.java:314)
>       at 
> org.apache.hadoop.hive.serde2.lazybinary.LazyBinaryUtils.checkObjectByteInfo(LazyBinaryUtils.java:183)
>       at 
> org.apache.hadoop.hive.serde2.lazybinary.LazyBinaryStruct.parse(LazyBinaryStruct.java:142)
>       at 
> org.apache.hadoop.hive.serde2.lazybinary.LazyBinaryStruct.getField(LazyBinaryStruct.java:202)
>       at 
> org.apache.hadoop.hive.serde2.lazybinary.objectinspector.LazyBinaryStructObjectInspector.getStructFieldData(LazyBinaryStructObjectInspector.java:64)
>       at 
> org.apache.hadoop.hive.ql.exec.ExprNodeColumnEvaluator._evaluate(ExprNodeColumnEvaluator.java:94)
>       at 
> org.apache.hadoop.hive.ql.exec.ExprNodeEvaluator.evaluate(ExprNodeEvaluator.java:77)
>       at 
> org.apache.hadoop.hive.ql.exec.ExprNodeEvaluator.evaluate(ExprNodeEvaluator.java:65)
>       at 
> org.apache.hadoop.hive.ql.exec.ReduceSinkOperator.populateCachedDistributionKeys(ReduceSinkOperator.java:443)
>       at 
> org.apache.hadoop.hive.ql.exec.ReduceSinkOperator.process(ReduceSinkOperator.java:350)
>       ... 13 more
> {noformat}
> The way to reproduce :
> DDL:
> {noformat}
> create table tb_a(data_dt string,key string,src string,data_id string,tag_id 
> string, entity_src string);
> create table tb_b(pos_tagging string,src string,data_id string);
> create table tb_c(key string,start_time string,data_dt string);
> insert into tb_a 
> values('20160901','CPI','04','data_id','tag_id','entity_src');
> insert into tb_b values('pos_tagging','04','data_id');
> insert into tb_c values('data_id','start_time_0000','20160901');
> create function hwrl as 'HotwordRelationUDTF' using jar 
> 'hdfs:///tmp/nemon/udf/hotword.jar';
> {noformat}
> UDF File :
> {code}
> import java.util.ArrayList;
> import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
> import org.apache.hadoop.hive.ql.exec.UDFArgumentLengthException;
> import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException;
> import org.apache.hadoop.hive.ql.metadata.HiveException;
> import org.apache.hadoop.hive.ql.udf.generic.GenericUDTF;
> import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
> import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
> import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
> import 
> org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
> /**
>  */
> public class HotwordRelationUDTF extends GenericUDTF {
>   private int argsNumber = 6;
>   @Override
>   public StructObjectInspector initialize(ObjectInspector[] args)
>           throws UDFArgumentException{
>     if (args.length != argsNumber) {
>       String log = "";
>       {
>         for (int i = 0; i < args.length; i++)
>           log += args[i].toString() + ",";
>       }
>       throw new UDFArgumentLengthException(
>               " OrgIdentifyUDTF ("
>                       + log
>                       + ") has wrong arguments. "
>                       + "The function 
> ProductHotWordUDTF(data_dt,data_src,data_id,word_type,primary_word,txt_For_Handle)"
>                       + " have and only have " + argsNumber
>                       + " arguments.");
>     }
>     ArrayList<String> fieldNames = new ArrayList<String>();
>     ArrayList<ObjectInspector> fieldOIs = new ArrayList<ObjectInspector>();
>     for (int i = 0; i < argsNumber; i++){
>       if (args[i].getCategory() != ObjectInspector.Category.PRIMITIVE) {
>         throw new UDFArgumentTypeException(1,
>                 "Only primitive type arguments are accepted but "
>                         + args[i].getTypeName() + " is passed");
>       }
>     }
>     fieldNames.add("data_dt");
>     fieldOIs.add(PrimitiveObjectInspectorFactory.javaStringObjectInspector);
>     fieldNames.add("data_src");
>     fieldOIs.add(PrimitiveObjectInspectorFactory.javaStringObjectInspector);
>     fieldNames.add("data_id");
>     fieldOIs.add(PrimitiveObjectInspectorFactory.javaStringObjectInspector);
>     fieldNames.add("word_type");
>     fieldOIs.add(PrimitiveObjectInspectorFactory.javaStringObjectInspector);
>     fieldNames.add("primary_word");
>     fieldOIs.add(PrimitiveObjectInspectorFactory.javaStringObjectInspector);
>     fieldNames.add("primary_nature");
>     fieldOIs.add(PrimitiveObjectInspectorFactory.javaStringObjectInspector);
>     fieldNames.add("primary_offset");
>     fieldOIs.add(PrimitiveObjectInspectorFactory.javaIntObjectInspector);
>     fieldNames.add("related_word");
>     fieldOIs.add(PrimitiveObjectInspectorFactory.javaStringObjectInspector);
>     fieldNames.add("related_nature");
>     fieldOIs.add(PrimitiveObjectInspectorFactory.javaStringObjectInspector);
>     fieldNames.add("related_offset");
>     fieldOIs.add(PrimitiveObjectInspectorFactory.javaIntObjectInspector);
>     return ObjectInspectorFactory.getStandardStructObjectInspector(
>             fieldNames, fieldOIs);
>   }
>   @Override
>   public void process(Object[] args) throws HiveException {
>     ArrayList<Object> result = new ArrayList<Object>();
>     result.add("20160901");
>     result.add("data_src");
>     result.add("data_id");
>     result.add("word_type");
>     result.add("primary_word");
>     result.add("primary_nature");
>     result.add(6);
>     result.add("related_word");
>     result.add("related_nature");
>     result.add(0);
>     Object[] ret = result.toArray(new Object[] {});
>     forward(ret);
>   }
>   @Override
>   public void close() throws HiveException {
>   }
> }
> {code}
> query:
> {noformat}
> set hive.auto.convert.join=false;
> select substring(c.start_time,1,10) create_date, 
> tt.data_id,tt.word_type,tt.primary_word,tt.primary_nature,tt.primary_offset,tt.related_word,tt.related_nature,tt.related_offset
>  
> from (
> select hwrl(data_dt,src,data_id,tag_id,entity_src,pos_tagging)
> as 
> (data_dt,data_src,data_id,word_type,primary_word,primary_nature,primary_offset,related_word,related_nature,related_offset)
> from (
> select a.data_dt,a.src,a.data_id,a.tag_id,a.entity_src,b.pos_tagging
> from tb_a a, tb_b b
> where a.key like 'CP%' 
> and a.data_dt='20160901'
> and a.data_id=b.data_id
> and b.src='04'
> ) t
> ) tt, (select key,start_time from tb_c where data_dt='20160901') c 
> where tt.data_id=c.key 
> ;
> {noformat}



--
This message was sent by Atlassian JIRA
(v6.3.4#6332)

Reply via email to