[ 
https://issues.apache.org/jira/browse/HIVE-11396?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

Gopal V reassigned HIVE-11396:
------------------------------

    Assignee: Gopal V  (was: Hitesh Shah)

> Hive on Tez does not work well with Sequence Files with different keys
> ----------------------------------------------------------------------
>
>                 Key: HIVE-11396
>                 URL: https://issues.apache.org/jira/browse/HIVE-11396
>             Project: Hive
>          Issue Type: Bug
>            Reporter: Rajat Jain
>            Assignee: Gopal V
>
> {code}
> hive> create external table foo (a string) partitioned by (p string) stored 
> as sequencefile location 'hdfs:///user/hive/foo'
> # A useless file with some text in hdfs
> hive> create external table tmp_foo (a string) location 
> 'hdfs:///tmp/random_data'
> hive> insert overwrite table foo partition (p = '1') select * from tmp_foo
> {code}
> After this step, {{foo}} contains one partition with a text file.
> Now use this Java program to generate the second sequence file (but with a 
> different key class)
> {code}
> import org.apache.hadoop.conf.Configuration;
> import org.apache.hadoop.fs.Path;
> import org.apache.hadoop.io.BytesWritable;
> import org.apache.hadoop.io.LongWritable;
> import org.apache.hadoop.io.Text;
> import org.apache.hadoop.mapreduce.Job;
> import org.apache.hadoop.mapreduce.Mapper;
> import org.apache.hadoop.mapreduce.Reducer;
> import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
> import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat;
> import java.io.IOException;
> public class SequenceFileWriter {
>   public static void main(String[] args) throws IOException,
>       InterruptedException, ClassNotFoundException {
>     Configuration conf = new Configuration();
>     Job job = new Job(conf);
>     job.setJobName("Convert Text");
>     job.setJarByClass(Mapper.class);
>     job.setMapperClass(Mapper.class);
>     job.setReducerClass(Reducer.class);
>     // increase if you need sorting or a special number of files
>     job.setNumReduceTasks(0);
>     job.setOutputKeyClass(LongWritable.class);
>     job.setOutputValueClass(Text.class);
>     job.setOutputFormatClass(SequenceFileOutputFormat.class);
>     job.setInputFormatClass(TextInputFormat.class);
>     TextInputFormat.addInputPath(job, new Path("/tmp/random_data"));
>     SequenceFileOutputFormat.setOutputPath(job, new 
> Path("/user/hive/foo/p=2/"));
>     // submit and wait for completion
>     job.waitForCompletion(true);
>   }
> }
> {code}
> Now run {{select count(*) from foo;}}. It passes with MapReduce, but fails 
> with Tez with the following error:
> {code}
> hive> set hive.execution.engine=tez;
> hive> select count(*) from foo;
> Status: Failed
> Vertex failed, vertexName=Map 1, vertexId=vertex_1438013895843_0007_1_00, 
> diagnostics=[Task failed, taskId=task_1438013895843_0007_1_00_000000, 
> diagnostics=[TaskAttempt 0 failed, info=[Error: Failure while running 
> task:java.lang.RuntimeException: 
> org.apache.hadoop.hive.ql.metadata.HiveException: java.io.IOException: 
> java.io.IOException: While processing file 
> hdfs://localhost:9000/user/hive/foo/p=2/part-m-00000. wrong key class: 
> org.apache.hadoop.io.BytesWritable is not class 
> org.apache.hadoop.io.LongWritable
>       at 
> org.apache.hadoop.hive.ql.exec.tez.TezProcessor.initializeAndRunProcessor(TezProcessor.java:171)
>       at 
> org.apache.hadoop.hive.ql.exec.tez.TezProcessor.run(TezProcessor.java:137)
>       at 
> org.apache.tez.runtime.LogicalIOProcessorRuntimeTask.run(LogicalIOProcessorRuntimeTask.java:337)
>       at 
> org.apache.tez.runtime.task.TezTaskRunner$TaskRunnerCallable$1.run(TezTaskRunner.java:179)
>       at 
> org.apache.tez.runtime.task.TezTaskRunner$TaskRunnerCallable$1.run(TezTaskRunner.java:171)
>       at java.security.AccessController.doPrivileged(Native Method)
>       at javax.security.auth.Subject.doAs(Subject.java:415)
>       at 
> org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1635)
>       at 
> org.apache.tez.runtime.task.TezTaskRunner$TaskRunnerCallable.callInternal(TezTaskRunner.java:171)
>       at 
> org.apache.tez.runtime.task.TezTaskRunner$TaskRunnerCallable.callInternal(TezTaskRunner.java:167)
>       at org.apache.tez.common.CallableWithNdc.call(CallableWithNdc.java:36)
>       at java.util.concurrent.FutureTask.run(FutureTask.java:262)
>       at 
> java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145)
>       at 
> java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615)
>       at java.lang.Thread.run(Thread.java:745)
> Caused by: org.apache.hadoop.hive.ql.metadata.HiveException: 
> java.io.IOException: java.io.IOException: While processing file 
> hdfs://localhost:9000/user/hive/foo/p=2/part-m-00000. wrong key class: 
> org.apache.hadoop.io.BytesWritable is not class 
> org.apache.hadoop.io.LongWritable
>       at 
> org.apache.hadoop.hive.ql.exec.tez.MapRecordSource.pushRecord(MapRecordSource.java:71)
>       at 
> org.apache.hadoop.hive.ql.exec.tez.MapRecordProcessor.run(MapRecordProcessor.java:290)
>       at 
> org.apache.hadoop.hive.ql.exec.tez.TezProcessor.initializeAndRunProcessor(TezProcessor.java:148)
>       ... 14 more
> Caused by: java.io.IOException: java.io.IOException: While processing file 
> hdfs://localhost:9000/user/hive/foo/p=2/part-m-00000. wrong key class: 
> org.apache.hadoop.io.BytesWritable is not class 
> org.apache.hadoop.io.LongWritable
>       at 
> org.apache.hadoop.hive.io.HiveIOExceptionHandlerChain.handleRecordReaderNextException(HiveIOExceptionHandlerChain.java:121)
>       at 
> org.apache.hadoop.hive.io.HiveIOExceptionHandlerUtil.handleRecordReaderNextException(HiveIOExceptionHandlerUtil.java:77)
>       at 
> org.apache.hadoop.hive.ql.io.HiveContextAwareRecordReader.doNext(HiveContextAwareRecordReader.java:372)
>       at 
> org.apache.hadoop.hive.ql.io.HiveRecordReader.doNext(HiveRecordReader.java:79)
>       at 
> org.apache.hadoop.hive.ql.io.HiveRecordReader.doNext(HiveRecordReader.java:33)
>       at 
> org.apache.hadoop.hive.ql.io.HiveContextAwareRecordReader.next(HiveContextAwareRecordReader.java:118)
>       at 
> org.apache.hadoop.mapred.split.TezGroupedSplitsInputFormat$TezGroupedSplitsRecordReader.next(TezGroupedSplitsInputFormat.java:137)
>       at 
> org.apache.tez.mapreduce.lib.MRReaderMapred.next(MRReaderMapred.java:113)
>       at 
> org.apache.hadoop.hive.ql.exec.tez.MapRecordSource.pushRecord(MapRecordSource.java:61)
>       ... 16 more
> Caused by: java.io.IOException: While processing file 
> hdfs://localhost:9000/user/hive/foo/p=2/part-m-00000. wrong key class: 
> org.apache.hadoop.io.BytesWritable is not class 
> org.apache.hadoop.io.LongWritable
>       at 
> org.apache.hadoop.hive.ql.io.HiveContextAwareRecordReader.handleExceptionWhenReadNext(HiveContextAwareRecordReader.java:386)
>       at 
> org.apache.hadoop.hive.ql.io.HiveContextAwareRecordReader.doNext(HiveContextAwareRecordReader.java:368)
>       ... 22 more
> Caused by: java.io.IOException: wrong key class: 
> org.apache.hadoop.io.BytesWritable is not class 
> org.apache.hadoop.io.LongWritable
>       at org.apache.hadoop.io.SequenceFile$Reader.next(SequenceFile.java:2484)
>       at 
> org.apache.hadoop.mapred.SequenceFileRecordReader.next(SequenceFileRecordReader.java:82)
>       at 
> org.apache.hadoop.hive.ql.io.HiveContextAwareRecordReader.doNext(HiveContextAwareRecordReader.java:359)
>       ... 22 more
> {code}



--
This message was sent by Atlassian JIRA
(v6.3.4#6332)

Reply via email to