[ https://issues.apache.org/jira/browse/HIVE-15638?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=15827388#comment-15827388 ]
Nemon Lou commented on HIVE-15638: ---------------------------------- The following query will pass(adding a 'select * ' before UDTF hwrl) : {noformat} set hive.auto.convert.join=false; select substring(c.start_time,1,10) create_date, tt.data_id,tt.word_type,tt.primary_word,tt.primary_nature,tt.primary_offset,tt.related_word,tt.related_nature,tt.related_offset from ( select * from ( select hwrl(data_dt,src,data_id,tag_id,entity_src,pos_tagging) as (data_dt,data_src,data_id,word_type,primary_word,primary_nature,primary_offset,related_word,related_nature,related_offset) from ( select a.data_dt,a.src,a.data_id,a.tag_id,a.entity_src,b.pos_tagging from tb_a a, tb_b b where a.key like 'CP%' and a.data_dt='20160901' and a.data_id=b.data_id and b.src='04' ) t ) ttt ) tt, (select key,start_time from tb_c where data_dt='20160901') c where tt.data_id=c.key ; {noformat} > ArrayIndexOutOfBoundsException when output Columns for UDTF are pruned > ----------------------------------------------------------------------- > > Key: HIVE-15638 > URL: https://issues.apache.org/jira/browse/HIVE-15638 > Project: Hive > Issue Type: Bug > Components: Query Planning > Affects Versions: 1.3.0, 2.1.0 > Reporter: Nemon Lou > > {noformat} > Caused by: org.apache.hadoop.hive.ql.metadata.HiveException: Hive Runtime > Error while processing row [Error getting row data with exception > java.lang.ArrayIndexOutOfBoundsException: 151 > at > org.apache.hadoop.hive.serde2.lazybinary.LazyBinaryUtils.readVInt(LazyBinaryUtils.java:314) > at > org.apache.hadoop.hive.serde2.lazybinary.LazyBinaryUtils.checkObjectByteInfo(LazyBinaryUtils.java:183) > at > org.apache.hadoop.hive.serde2.lazybinary.LazyBinaryStruct.parse(LazyBinaryStruct.java:142) > at > org.apache.hadoop.hive.serde2.lazybinary.LazyBinaryStruct.getField(LazyBinaryStruct.java:202) > at > org.apache.hadoop.hive.serde2.lazybinary.objectinspector.LazyBinaryStructObjectInspector.getStructFieldData(LazyBinaryStructObjectInspector.java:64) > at > org.apache.hadoop.hive.serde2.SerDeUtils.buildJSONString(SerDeUtils.java:364) > at > org.apache.hadoop.hive.serde2.SerDeUtils.getJSONString(SerDeUtils.java:200) > at > org.apache.hadoop.hive.serde2.SerDeUtils.getJSONString(SerDeUtils.java:186) > at > org.apache.hadoop.hive.ql.exec.MapOperator.toErrorMessage(MapOperator.java:525) > at > org.apache.hadoop.hive.ql.exec.MapOperator.process(MapOperator.java:494) > at org.apache.hadoop.hive.ql.exec.mr.ExecMapper.map(ExecMapper.java:160) > at org.apache.hadoop.mapred.MapRunner.run(MapRunner.java:54) > at org.apache.hadoop.mapred.MapTask.runOldMapper(MapTask.java:453) > at org.apache.hadoop.mapred.MapTask.run(MapTask.java:343) > at org.apache.hadoop.mapred.YarnChild$2.run(YarnChild.java:180) > at java.security.AccessController.doPrivileged(Native Method) > at javax.security.auth.Subject.doAs(Subject.java:422) > at > org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1710) > at org.apache.hadoop.mapred.YarnChild.main(YarnChild.java:174) > ] > at > org.apache.hadoop.hive.ql.exec.MapOperator.process(MapOperator.java:499) > at org.apache.hadoop.hive.ql.exec.mr.ExecMapper.map(ExecMapper.java:160) > ... 8 more > Caused by: org.apache.hadoop.hive.ql.metadata.HiveException: > java.lang.ArrayIndexOutOfBoundsException: 151 > at > org.apache.hadoop.hive.ql.exec.ReduceSinkOperator.process(ReduceSinkOperator.java:416) > at org.apache.hadoop.hive.ql.exec.Operator.forward(Operator.java:878) > at > org.apache.hadoop.hive.ql.exec.TableScanOperator.process(TableScanOperator.java:130) > at > org.apache.hadoop.hive.ql.exec.MapOperator$MapOpCtx.forward(MapOperator.java:149) > at > org.apache.hadoop.hive.ql.exec.MapOperator.process(MapOperator.java:489) > ... 9 more > Caused by: java.lang.ArrayIndexOutOfBoundsException: 151 > at > org.apache.hadoop.hive.serde2.lazybinary.LazyBinaryUtils.readVInt(LazyBinaryUtils.java:314) > at > org.apache.hadoop.hive.serde2.lazybinary.LazyBinaryUtils.checkObjectByteInfo(LazyBinaryUtils.java:183) > at > org.apache.hadoop.hive.serde2.lazybinary.LazyBinaryStruct.parse(LazyBinaryStruct.java:142) > at > org.apache.hadoop.hive.serde2.lazybinary.LazyBinaryStruct.getField(LazyBinaryStruct.java:202) > at > org.apache.hadoop.hive.serde2.lazybinary.objectinspector.LazyBinaryStructObjectInspector.getStructFieldData(LazyBinaryStructObjectInspector.java:64) > at > org.apache.hadoop.hive.ql.exec.ExprNodeColumnEvaluator._evaluate(ExprNodeColumnEvaluator.java:94) > at > org.apache.hadoop.hive.ql.exec.ExprNodeEvaluator.evaluate(ExprNodeEvaluator.java:77) > at > org.apache.hadoop.hive.ql.exec.ExprNodeEvaluator.evaluate(ExprNodeEvaluator.java:65) > at > org.apache.hadoop.hive.ql.exec.ReduceSinkOperator.populateCachedDistributionKeys(ReduceSinkOperator.java:443) > at > org.apache.hadoop.hive.ql.exec.ReduceSinkOperator.process(ReduceSinkOperator.java:350) > ... 13 more > {noformat} > The way to reproduce : > DDL: > {noformat} > create table tb_a(data_dt string,key string,src string,data_id string,tag_id > string, entity_src string); > create table tb_b(pos_tagging string,src string,data_id string); > create table tb_c(key string,start_time string,data_dt string); > insert into tb_a > values('20160901','CPI','04','data_id','tag_id','entity_src'); > insert into tb_b values('pos_tagging','04','data_id'); > insert into tb_c values('data_id','start_time_0000','20160901'); > create function hwrl as 'HotwordRelationUDTF' using jar > 'hdfs:///tmp/nemon/udf/hotword.jar'; > {noformat} > UDF File : > {code} > import java.util.ArrayList; > import org.apache.hadoop.hive.ql.exec.UDFArgumentException; > import org.apache.hadoop.hive.ql.exec.UDFArgumentLengthException; > import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException; > import org.apache.hadoop.hive.ql.metadata.HiveException; > import org.apache.hadoop.hive.ql.udf.generic.GenericUDTF; > import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; > import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; > import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; > import > org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; > /** > */ > public class HotwordRelationUDTF extends GenericUDTF { > private int argsNumber = 6; > @Override > public StructObjectInspector initialize(ObjectInspector[] args) > throws UDFArgumentException{ > if (args.length != argsNumber) { > String log = ""; > { > for (int i = 0; i < args.length; i++) > log += args[i].toString() + ","; > } > throw new UDFArgumentLengthException( > " OrgIdentifyUDTF (" > + log > + ") has wrong arguments. " > + "The function > ProductHotWordUDTF(data_dt,data_src,data_id,word_type,primary_word,txt_For_Handle)" > + " have and only have " + argsNumber > + " arguments."); > } > ArrayList<String> fieldNames = new ArrayList<String>(); > ArrayList<ObjectInspector> fieldOIs = new ArrayList<ObjectInspector>(); > for (int i = 0; i < argsNumber; i++){ > if (args[i].getCategory() != ObjectInspector.Category.PRIMITIVE) { > throw new UDFArgumentTypeException(1, > "Only primitive type arguments are accepted but " > + args[i].getTypeName() + " is passed"); > } > } > fieldNames.add("data_dt"); > fieldOIs.add(PrimitiveObjectInspectorFactory.javaStringObjectInspector); > fieldNames.add("data_src"); > fieldOIs.add(PrimitiveObjectInspectorFactory.javaStringObjectInspector); > fieldNames.add("data_id"); > fieldOIs.add(PrimitiveObjectInspectorFactory.javaStringObjectInspector); > fieldNames.add("word_type"); > fieldOIs.add(PrimitiveObjectInspectorFactory.javaStringObjectInspector); > fieldNames.add("primary_word"); > fieldOIs.add(PrimitiveObjectInspectorFactory.javaStringObjectInspector); > fieldNames.add("primary_nature"); > fieldOIs.add(PrimitiveObjectInspectorFactory.javaStringObjectInspector); > fieldNames.add("primary_offset"); > fieldOIs.add(PrimitiveObjectInspectorFactory.javaIntObjectInspector); > fieldNames.add("related_word"); > fieldOIs.add(PrimitiveObjectInspectorFactory.javaStringObjectInspector); > fieldNames.add("related_nature"); > fieldOIs.add(PrimitiveObjectInspectorFactory.javaStringObjectInspector); > fieldNames.add("related_offset"); > fieldOIs.add(PrimitiveObjectInspectorFactory.javaIntObjectInspector); > return ObjectInspectorFactory.getStandardStructObjectInspector( > fieldNames, fieldOIs); > } > @Override > public void process(Object[] args) throws HiveException { > ArrayList<Object> result = new ArrayList<Object>(); > result.add("20160901"); > result.add("data_src"); > result.add("data_id"); > result.add("word_type"); > result.add("primary_word"); > result.add("primary_nature"); > result.add(6); > result.add("related_word"); > result.add("related_nature"); > result.add(0); > Object[] ret = result.toArray(new Object[] {}); > forward(ret); > } > @Override > public void close() throws HiveException { > } > } > {code} > query: > {noformat} > set hive.auto.convert.join=false; > select substring(c.start_time,1,10) create_date, > tt.data_id,tt.word_type,tt.primary_word,tt.primary_nature,tt.primary_offset,tt.related_word,tt.related_nature,tt.related_offset > > from ( > select hwrl(data_dt,src,data_id,tag_id,entity_src,pos_tagging) > as > (data_dt,data_src,data_id,word_type,primary_word,primary_nature,primary_offset,related_word,related_nature,related_offset) > from ( > select a.data_dt,a.src,a.data_id,a.tag_id,a.entity_src,b.pos_tagging > from tb_a a, tb_b b > where a.key like 'CP%' > and a.data_dt='20160901' > and a.data_id=b.data_id > and b.src='04' > ) t > ) tt, (select key,start_time from tb_c where data_dt='20160901') c > where tt.data_id=c.key > ; > {noformat} -- This message was sent by Atlassian JIRA (v6.3.4#6332)