Dear Wiki user, You have subscribed to a wiki page or wiki category on "Nutch Wiki" for change notification.
The following page has been changed by SteveSeverance: http://wiki.apache.org/nutch/CompleteSourceListing ------------------------------------------------------------------------------ + deleted - {{{ - package com.ivirtuoso.linkcounter; - import java.io.IOException; - import java.util.ArrayList; - import java.util.Iterator; - import java.util.List; - - import org.apache.hadoop.conf.*; - import org.apache.hadoop.fs.*; - import org.apache.hadoop.io.*; - import org.apache.hadoop.mapred.*; - - import org.apache.nutch.parse.*; - import org.apache.nutch.util.*; - - public class LinkCounter { - - public static class CounterMapper extends MapReduceBase implements Mapper - { - public void map(WritableComparable key, Writable value, OutputCollector collector, Reporter reporter) throws IOException { - // TODO Auto-generated method stub - ParseData data = (ParseData)value; - - IntWritable outboundLinkCount = new IntWritable(data.getOutlinks().length); - - collector.collect(key, outboundLinkCount); - } - - public void close() throws IOException { - // TODO Auto-generated method stub - super.close(); - } - - public void configure(JobConf arg0) { - // TODO Auto-generated method stub - super.configure(arg0); - } - - } - - public static class CounterReducer extends MapReduceBase implements Reducer - { - - public void reduce(WritableComparable url, Iterator iterator, OutputCollector output, Reporter reporter) throws IOException { - IntWritable linkCount = (IntWritable)iterator.next(); - output.collect(url, linkCount); - } - - public void close() throws IOException { - // TODO Auto-generated method stub - super.close(); - } - - public void configure(JobConf arg0) { - // TODO Auto-generated method stub - super.configure(arg0); - } - - - } - - public static void main(String[] args) throws IOException{ - Configuration config = NutchConfiguration.create(); - - JobConf jobConfig = new NutchJob(config); - jobConfig.setJobName("countlinks"); - - jobConfig.setInputFormat(SequenceFileInputFormat.class); - - jobConfig.setOutputFormat(MapFileOutputFormat.class); - - // the keys are words (strings) - jobConfig.setOutputKeyClass(Text.class); - // the values are counts (ints) - jobConfig.setOutputValueClass(IntWritable.class); - - jobConfig.setMapperClass(CounterMapper.class); - jobConfig.setCombinerClass(CounterReducer.class); - jobConfig.setReducerClass(CounterReducer.class); - - jobConfig.setInputPath(new Path((String) args[0], ParseData.DIR_NAME)); - jobConfig.setOutputPath(new Path((String) args[1])); - - JobClient.runJob(jobConfig); - } - - } - }}} - ------------------------------------------------------------------------- Take Surveys. Earn Cash. Influence the Future of IT Join SourceForge.net's Techsay panel and you'll get the chance to share your opinions on IT & business topics through brief surveys-and earn cash http://www.techsay.com/default.php?page=join.php&p=sourceforge&CID=DEVDEV _______________________________________________ Nutch-cvs mailing list Nutch-cvs@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/nutch-cvs