Re: Writing data to hbase from reducer

jamal sasha Wed, 28 Aug 2013 10:59:19 -0700

Eh
So here is my attempt:
But its not working :(

Exception in thread "main"
org.apache.hadoop.mapred.InvalidJobConfException: Output directory not set.
at
org.apache.hadoop.mapreduce.lib.output.FileOutputFormat.checkOutputSpecs(FileOutputFormat.java:128)
at org.apache.hadoop.mapred.JobClient$2.run(JobClient.java:889)
at org.apache.hadoop.mapred.JobClient$2.run(JobClient.java:850)
at java.security.AccessController.doPrivileged(Native Method)
at javax.security.auth.Subject.doAs(Subject.java:416)
at
org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1093)
at org.apache.hadoop.mapred.JobClient.submitJobInternal(JobClient.java:850)
at org.apache.hadoop.mapreduce.Job.submit(Job.java:500)
at org.rdf.RdfFormatter.run(RdfFormatter.java:142)
at org.apache.hadoop.util.ToolRunner.run(ToolRunner.java:65)
at org.rdf.RdfFormatter.main(RdfFormatter.java:177)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at
sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)
at
sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:616)
at org.apache.hadoop.util.RunJar.main(RunJar.java:156)


package org.random_scripts;

import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.util.Date;
import java.util.HashMap;
import java.util.Map;
import java.util.Map.Entry;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;
//import org.apache.hadoop.hbase.mapred.TableMapReduceUtil;
import org.apache.hadoop.hbase.mapreduce.TableReducer;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.Writable;



import org.apache.hadoop.mapred.lib.MultipleOutputs;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;

import org.apache.hadoop.mapreduce.Reducer.Context;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;


public class RdfFormatter extends Configured implements Tool {
private static final String OUTPUT_TABLE = "edges";
public static class RDFMapper extends Mapper<LongWritable, Text, Text,
Text>{
 private static String edge = "likes";
  protected void map(LongWritable key, Text value , Context context) throws
IOException, InterruptedException{
 String token = "";
if (value.toString().contains(edge) == true){
token = "@EDGE";
}
else token= "@VERTEX";
String[] chunks = value.toString().split("\\s+");
Text valued = new Text(token + " " +chunks[1]+ " " + chunks[2]);
context.write(new Text(chunks[0]), valued);
}
}

 public static class RDFReducer extends TableReducer< Text, Text, Text> {

        /**
         * The reduce method fill the TestCars table with all csv data,
         * compute some counters and save those counters into the
TestBrandsSizes table.
         * So we use two different HBase table as output for the reduce
method.
         */
private static String edge = "http://www.franz.com/simple#has-category";;
        @Override
        protected void reduce(Text key, Iterable<Text> values, Context
context) throws IOException, InterruptedException {
           // Map<String, Integer> statsSizeCounters = new HashMap<String,
Integer>();
            String vertex = key.toString();

            // We are receiving all models,size grouped by brand.
            for (Text value : values) {

                String[] valueSplitted = value.toString().split("@EDGE");
                if (valueSplitted.length == 2) {


                    //String model = valueSplitted[0];
                    String  edgeInfo= valueSplitted[1];
                    String[] edgeChunks = edgeInfo.split("\\s+");

                    // Fill the TestCars table
                    ImmutableBytesWritable putTable = new
ImmutableBytesWritable(Bytes.toBytes("Edges"));
                    byte[] putKey = Bytes.toBytes(vertex);
                    byte[] putFamily = Bytes.toBytes("edge");
                    Put put = new Put(putKey);
                    put.add(putFamily,
Bytes.toBytes(edgeChunks[0]),Bytes.toBytes(edgeChunks[1]));
                   context.write(null,put);
            }


        }
 }
}
  @Override
public int run(String [] args) throws Exception{
//Configuration conf = new Configuration();

       Job job = new Job(getConf());


       //job.setOutputValueClass(Text.class);
       //conf.set("delimiter", "\\s+");

       job.setJarByClass(RdfFormatter.class);

       job.setMapperClass(RDFMapper.class);
       job.setMapOutputKeyClass(Text.class);
       job.setMapOutputValueClass(Text.class);

       job.setInputFormatClass(TextInputFormat.class);
       FileInputFormat.addInputPath(job, new Path(args[0]));

       //job.setReducerClass(RDFReducer.class);
       TableMapReduceUtil.initTableReducerJob(OUTPUT_TABLE,
RDFReducer.class, job);



       job.setOutputFormatClass(TextOutputFormat.class);


      // MultipleOutputs.addNamedOutput(job, "vertext",
TextOutputFormat.class, keyClass, valueClass)
       //FileOutputFormat.setOutputPath(job, new Path(args[1]));
       job.submit();
       long start = new Date().getTime();
       job.waitForCompletion(true);
       long end = new Date().getTime();
       System.out.println("Job took " + ((end-start)/1000) + " seconds" );
return 0;
 }
 public static void main(String[] args) throws Exception {
// TODO Auto-generated method stub
/*
Configuration conf = new Configuration();

       Job job = new Job(conf, "RDF input format");
       //job.setMapOutputKeyClass(Pair.class);
       job.setOutputKeyClass(Text.class);
       job.setOutputValueClass(IntWritable.class);
       conf.set("delimiter", "\\s+");

       job.setJarByClass(RdfFormatter.class);
       job.setMapperClass(RDFMapper.class);
       //job.setReducerClass(MeanReducer.class);
       job.setNumReduceTasks(0);

       job.setInputFormatClass(TextInputFormat.class);
       job.setOutputFormatClass(TextOutputFormat.class);

       FileInputFormat.addInputPath(job, new
Path("/user/user/mohit/wiki-rdf/output/data"));
       FileOutputFormat.setOutputPath(job, new
Path("/user/user/mohit/wiki-rdf/outputter"));
       // set timer
        *
        */
Configuration conf = new Configuration();
int ret = ToolRunner.run(conf, new RdfFormatter(), args); // calls your
run() method.
System.exit(ret);





}




}



On Wed, Aug 28, 2013 at 10:03 AM, Shahab Yunus <[email protected]>wrote:

> Just google it.
>
> For HBaseStorage
> http://blog.whitepages.com/2011/10/27/hbase-storage-and-pig/
>
> For M/R:
> http://wiki.apache.org/hadoop/Hbase/MapReduce
>
> Regards,
> Shahab
>
>
> On Wed, Aug 28, 2013 at 12:59 PM, jamal sasha <[email protected]>wrote:
>
>> So, I am trying my hand with map reduce code.
>> Where can i find some examples using M/R code
>>
>>
>>
>> On Wed, Aug 28, 2013 at 9:53 AM, Shahab Yunus <[email protected]>wrote:
>>
>>> There are many way to do it.
>>>
>>> You can write your own M/R job in Java to use the provided
>>> outputformatter and inputformatters.
>>>
>>> Or you can use Pig to store it in HBase using HBaseStorage.
>>>
>>> There are many ways (and resources available on the web) and he question
>>> that you have asked is very high level.
>>>
>>> Regards,
>>> Shahab
>>>
>>>
>>> On Wed, Aug 28, 2013 at 12:49 PM, jamal sasha <[email protected]>wrote:
>>>
>>>> Hi,
>>>> I have data in form:
>>>>
>>>> source, destination, connection
>>>> This data is saved in hdfs
>>>>
>>>> I want to read this data and put it in hbase table something like:
>>>>         Column1 (source) | Column2(Destination)| Column3(Connection
>>>> Type)
>>>> Row    vertex A            |       vertex B             | connection
>>>>
>>>> How do I do this?
>>>> Thanks
>>>>
>>>
>>>
>>
>

Re: Writing data to hbase from reducer

Reply via email to