Re: Avromultiple output

David Ginzburg Tue, 24 Feb 2015 09:01:10 -0800

Thank you for the answer.

Tried but the exception
* Error: Found interface org.apache.hadoop.mapreduce.JobContext, but class
was expected*
persists


On Tue, Feb 24, 2015 at 3:12 PM, Artem Ervits <[email protected]> wrote:

> try this
>
> Job job = Job.getInstance(conf);
> Job.setName(name);
>
> Artem Ervits
> On Feb 21, 2015 10:57 PM, "David Ginzburg" <[email protected]>
> wrote:
>
>> Hi,
>> I am trying to run an MR job on emr with AvromultipleOutput
>>
>>
>>
>>
>> I get the following exception when running with AMI with hadoop 2.2 2.5
>> Found interface org.apache.hadoop.mapreduce.JobContext, but class was
>> expected
>>
>> I read it is related to incompatible hadoop versions, So I modified
>>
>> When running with AMI with hadoop 103 I get the following exception:
>>
>> java.lang.NullPointerException
>>     at
>> org.apache.hadoop.io.serializer.SerializationFactory.getSerializer(SerializationFactory.java:73)
>>     at
>> org.apache.hadoop.mapred.MapTask$MapOutputBuffer.<init>(MapTask.java:981)
>>     at
>> org.apache.hadoop.mapred.MapTask$NewOutputCollector.<init>(MapTask.java:681)
>>     at org.apache.hadoop.mapred.MapTask.runNewMapper(MapTask.java:763)
>>     at org.apache.hadoop.mapred.MapTask.run(MapTask.java:375)
>>     at org.apache.hadoop.mapred.Child$4.run(Child.java:259)
>>     at java.security.AccessController.doPrivileged(Native Method)
>>     at javax.security.auth.Subject.doAs(Subject.java:415)
>>     at
>> org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1140)
>>     at org.apache.hadoop.mapred.Child.main(Child.java:253)
>>
>>
>> The driver code is
>>
>> Job job = new Job(getConf(), "myad");
>>
>>
>>
>>         job.setOutputValueClass(NullWritable.class);
>>
>>
>>         job.setJarByClass(myAdTextLineMapper.class);
>>         Path inputOflineFiles = new Path(args[0]);
>>         Path inputOfUbberFiles = new Path(args[1]);
>>
>>          FileInputFormat.setInputPaths(job, inputOflineFiles);
>>
>>         job.setMapperClass(myAdTextLineMapper.class);
>>         job.setMapOutputKeyClass(Text.class);
>>         job.setMapOutputValueClass(UberRecord.class);
>>
>>         job.setOutputFormatClass(AvroSequenceFileOutputFormat.class);
>>         AvroJob.setOutputKeySchema(job,
>> Schema.create(Schema.Type.STRING));
>>         AvroJob.setOutputValueSchema(job, UberRecord.SCHEMA$);
>>
>>
>>         job.setReducerClass(myAdReducer.class);
>>         job.setOutputKeyClass(Text.class);
>>         job.setOutputValueClass(UberRecord.class);
>>         job.setNumReduceTasks(2);
>>         String baseoutputFolder = args[2];
>>         job.getConfiguration().set(myAdReducer.BASE_OUTPUT_FOLDER,
>>                 baseoutputFolder);
>>         ;
>>
>> LazyOutputFormat.setOutputFormatClass(job,AvroSequenceFileOutputFormat.class);
>>
>>         FileOutputFormat.setOutputPath(job, new Path(baseoutputFolder));
>>         return job.waitForCompletion(true) ? 0 : 1;
>>
>>
>> the mapper and reducers
>> @Override
>>     public void setup(Context ctx) {
>>
>>         ubp = new UberRecordProcessor();
>>     }
>>
>>     @Override
>>     protected void map(LongWritable key, Text value, Context context)
>>             throws IOException, InterruptedException {
>>         try {
>>             handleLineinMap(value);
>>             if(ub!=null){
>>                 context.write(new Text(ub.getAuctionId().toString()), ub);
>>                 context.getCounter("myAd",
>> "myAdTextLineMapper").increment(1);
>>             }else{
>>                 context.getCounter("myAd",
>> "myAdTextLineMapperNull").increment(1);
>>             }
>>         } catch (Exception e) {
>>             context.getCounter("myAd",
>> "myAdTextLineMapperError").increment(1);
>>             logger.warn("could not parse line "+value.toString(),e);
>>
>>
>>         }
>>     }
>>
>> public class myAdReducer extends
>>         Reducer<Text, UberRecord, AvroKey<CharSequence>,
>> AvroValue<UberRecord>> {
>>
>>     private static Logger logger = Logger.getLogger(myAdReducer.class);
>>     public static final String BASE_OUTPUT_FOLDER = "base.output.folder";
>>     AvroMultipleOutputs amos; MultipleOutputs<Text, UberRecord> outputs;
>>     UberRecordProcessor ubp = new UberRecordProcessor();
>>     // "year=%s/month=%s/day=%s/hour=%s"
>>     private String baseOutputPath;
>>     private long reduceAttemptUniqueIdentifier =
>> System.currentTimeMillis();
>>
>>     // 2015-02-01T18:00:25.673Z
>>     static DateTimeFormatter dateformat = DateTimeFormat
>>             .forPattern("yyyy-MM-dd'T'HH:mm:ss.SSSZZ");
>>
>>     @Override
>>     protected void setup(Context context) throws IOException,
>>             InterruptedException {
>>
>>         amos = new AvroMultipleOutputs(context);
>>         baseOutputPath =
>> context.getConfiguration().get(BASE_OUTPUT_FOLDER);
>>
>>     }
>>
>>     @Override
>>     protected void reduce(Text key, Iterable<UberRecord> values, Context
>> context)
>>             throws IOException, InterruptedException {
>>
>>         try {
>>             UberRecord ub = new UberRecord();
>>             for (UberRecord ubi : values) {
>>                 // enrich
>>                 if (ubi.getExchange() == null) {
>>                     continue;
>>                 }
>>                 BaseBidRequestEnricher enc = BaseBidRequestEnricher
>>                         .getEnricher(ubi.getExchange().toString());
>>                 enc.enrich(ubi);
>>                 ub = mergeUB(ub, ubi);
>>             }
>>             logger.info("Writing UberRecord [" + ub.toString() + "]");
>>             String partition = getPartition(ub);
>>
>>             // context.write();
>>             // AvroKey<CharSequence>, AvroValue<UberRecord>>
>>             amos.write(new AvroKey<CharSequence>(key.toString()),
>>                     new AvroValue<UberRecord>(ub), baseOutputPath + "/"
>>                             + partition + "/p" +
>> reduceAttemptUniqueIdentifier);
>>         } catch (Exception e) {
>>             // TODO Auto-generated catch block
>>             e.printStackTrace();
>>         }
>>     }
>>
>>     public UberRecord mergeUB(UberRecord dest, UberRecord src) {
>>         List<Field> fields = UberRecord.getClassSchema().getFields();
>>         List<Field> engFields = EngageData.getClassSchema().getFields();
>>         for (Field field : fields) {
>>             if (field.name().equals("engageData")
>>                     && dest.getEngageData() != null) {
>>                 EngageData mergedEng = dest.getEngageData();
>>                 for (Field engField : engFields) {
>>                     if (dest.getEngageData().get(engField.name()) ==
>> null) {
>>                         mergedEng.put(engField.name(),
>>                                 src.getEngageData().get(engField.name()));
>>                     }
>>
>>                 }
>>                 dest.setEngageData(mergedEng);
>>             } else {
>>                 if (dest.get(field.name()) == null) {
>>                     dest.put(field.name(), src.get(field.name()));
>>                 }
>>             }
>>         }
>>         return dest;
>>     }
>>
>>
>>
>>

Re: Avromultiple output

Reply via email to