Eh
So here is my attempt:
But its not working :(
Exception in thread "main"
org.apache.hadoop.mapred.InvalidJobConfException: Output directory not set.
at
org.apache.hadoop.mapreduce.lib.output.FileOutputFormat.checkOutputSpecs(FileOutputFormat.java:128)
at org.apache.hadoop.mapred.JobClient$2.run(JobClient.java:889)
at org.apache.hadoop.mapred.JobClient$2.run(JobClient.java:850)
at java.security.AccessController.doPrivileged(Native Method)
at javax.security.auth.Subject.doAs(Subject.java:416)
at
org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1093)
at org.apache.hadoop.mapred.JobClient.submitJobInternal(JobClient.java:850)
at org.apache.hadoop.mapreduce.Job.submit(Job.java:500)
at org.rdf.RdfFormatter.run(RdfFormatter.java:142)
at org.apache.hadoop.util.ToolRunner.run(ToolRunner.java:65)
at org.rdf.RdfFormatter.main(RdfFormatter.java:177)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at
sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)
at
sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:616)
at org.apache.hadoop.util.RunJar.main(RunJar.java:156)
package org.random_scripts;
import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.util.Date;
import java.util.HashMap;
import java.util.Map;
import java.util.Map.Entry;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;
//import org.apache.hadoop.hbase.mapred.TableMapReduceUtil;
import org.apache.hadoop.hbase.mapreduce.TableReducer;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.mapred.lib.MultipleOutputs;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;
import org.apache.hadoop.mapreduce.Reducer.Context;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
public class RdfFormatter extends Configured implements Tool {
private static final String OUTPUT_TABLE = "edges";
public static class RDFMapper extends Mapper<LongWritable, Text, Text,
Text>{
private static String edge = "likes";
protected void map(LongWritable key, Text value , Context context) throws
IOException, InterruptedException{
String token = "";
if (value.toString().contains(edge) == true){
token = "@EDGE";
}
else token= "@VERTEX";
String[] chunks = value.toString().split("\\s+");
Text valued = new Text(token + " " +chunks[1]+ " " + chunks[2]);
context.write(new Text(chunks[0]), valued);
}
}
public static class RDFReducer extends TableReducer< Text, Text, Text> {
/**
* The reduce method fill the TestCars table with all csv data,
* compute some counters and save those counters into the
TestBrandsSizes table.
* So we use two different HBase table as output for the reduce
method.
*/
private static String edge = "http://www.franz.com/simple#has-category";
@Override
protected void reduce(Text key, Iterable<Text> values, Context
context) throws IOException, InterruptedException {
// Map<String, Integer> statsSizeCounters = new HashMap<String,
Integer>();
String vertex = key.toString();
// We are receiving all models,size grouped by brand.
for (Text value : values) {
String[] valueSplitted = value.toString().split("@EDGE");
if (valueSplitted.length == 2) {
//String model = valueSplitted[0];
String edgeInfo= valueSplitted[1];
String[] edgeChunks = edgeInfo.split("\\s+");
// Fill the TestCars table
ImmutableBytesWritable putTable = new
ImmutableBytesWritable(Bytes.toBytes("Edges"));
byte[] putKey = Bytes.toBytes(vertex);
byte[] putFamily = Bytes.toBytes("edge");
Put put = new Put(putKey);
put.add(putFamily,
Bytes.toBytes(edgeChunks[0]),Bytes.toBytes(edgeChunks[1]));
context.write(null,put);
}
}
}
}
@Override
public int run(String [] args) throws Exception{
//Configuration conf = new Configuration();
Job job = new Job(getConf());
//job.setOutputValueClass(Text.class);
//conf.set("delimiter", "\\s+");
job.setJarByClass(RdfFormatter.class);
job.setMapperClass(RDFMapper.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(Text.class);
job.setInputFormatClass(TextInputFormat.class);
FileInputFormat.addInputPath(job, new Path(args[0]));
//job.setReducerClass(RDFReducer.class);
TableMapReduceUtil.initTableReducerJob(OUTPUT_TABLE,
RDFReducer.class, job);
job.setOutputFormatClass(TextOutputFormat.class);
// MultipleOutputs.addNamedOutput(job, "vertext",
TextOutputFormat.class, keyClass, valueClass)
//FileOutputFormat.setOutputPath(job, new Path(args[1]));
job.submit();
long start = new Date().getTime();
job.waitForCompletion(true);
long end = new Date().getTime();
System.out.println("Job took " + ((end-start)/1000) + " seconds" );
return 0;
}
public static void main(String[] args) throws Exception {
// TODO Auto-generated method stub
/*
Configuration conf = new Configuration();
Job job = new Job(conf, "RDF input format");
//job.setMapOutputKeyClass(Pair.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(IntWritable.class);
conf.set("delimiter", "\\s+");
job.setJarByClass(RdfFormatter.class);
job.setMapperClass(RDFMapper.class);
//job.setReducerClass(MeanReducer.class);
job.setNumReduceTasks(0);
job.setInputFormatClass(TextInputFormat.class);
job.setOutputFormatClass(TextOutputFormat.class);
FileInputFormat.addInputPath(job, new
Path("/user/user/mohit/wiki-rdf/output/data"));
FileOutputFormat.setOutputPath(job, new
Path("/user/user/mohit/wiki-rdf/outputter"));
// set timer
*
*/
Configuration conf = new Configuration();
int ret = ToolRunner.run(conf, new RdfFormatter(), args); // calls your
run() method.
System.exit(ret);
}
}
On Wed, Aug 28, 2013 at 10:03 AM, Shahab Yunus <[email protected]>wrote:
> Just google it.
>
> For HBaseStorage
> http://blog.whitepages.com/2011/10/27/hbase-storage-and-pig/
>
> For M/R:
> http://wiki.apache.org/hadoop/Hbase/MapReduce
>
> Regards,
> Shahab
>
>
> On Wed, Aug 28, 2013 at 12:59 PM, jamal sasha <[email protected]>wrote:
>
>> So, I am trying my hand with map reduce code.
>> Where can i find some examples using M/R code
>>
>>
>>
>> On Wed, Aug 28, 2013 at 9:53 AM, Shahab Yunus <[email protected]>wrote:
>>
>>> There are many way to do it.
>>>
>>> You can write your own M/R job in Java to use the provided
>>> outputformatter and inputformatters.
>>>
>>> Or you can use Pig to store it in HBase using HBaseStorage.
>>>
>>> There are many ways (and resources available on the web) and he question
>>> that you have asked is very high level.
>>>
>>> Regards,
>>> Shahab
>>>
>>>
>>> On Wed, Aug 28, 2013 at 12:49 PM, jamal sasha <[email protected]>wrote:
>>>
>>>> Hi,
>>>> I have data in form:
>>>>
>>>> source, destination, connection
>>>> This data is saved in hdfs
>>>>
>>>> I want to read this data and put it in hbase table something like:
>>>> Column1 (source) | Column2(Destination)| Column3(Connection
>>>> Type)
>>>> Row vertex A | vertex B | connection
>>>>
>>>> How do I do this?
>>>> Thanks
>>>>
>>>
>>>
>>
>