Dear Wiki user,

You have subscribed to a wiki page or wiki category on "Nutch Wiki" for change 
notification.

The following page has been changed by SteveSeverance:
http://wiki.apache.org/nutch/CompleteSourceListing

------------------------------------------------------------------------------
+ deleted
- {{{
- package com.ivirtuoso.linkcounter;
  
- import java.io.IOException;
- import java.util.ArrayList;
- import java.util.Iterator;
- import java.util.List;
- 
- import org.apache.hadoop.conf.*;
- import org.apache.hadoop.fs.*;
- import org.apache.hadoop.io.*;
- import org.apache.hadoop.mapred.*;
- 
- import org.apache.nutch.parse.*;
- import org.apache.nutch.util.*;
- 
- public class LinkCounter {
- 
-       public static class CounterMapper extends MapReduceBase implements 
Mapper
-       {
-               public void map(WritableComparable key, Writable value, 
OutputCollector collector, Reporter reporter) throws IOException {
-                       // TODO Auto-generated method stub
-                       ParseData data = (ParseData)value;
-                       
-                       IntWritable outboundLinkCount = new 
IntWritable(data.getOutlinks().length);                                     
-                       
-                       collector.collect(key, outboundLinkCount);
-               }
- 
-               public void close() throws IOException {
-                       // TODO Auto-generated method stub
-                       super.close();
-               }
- 
-               public void configure(JobConf arg0) {
-                       // TODO Auto-generated method stub
-                       super.configure(arg0);
-               }
-               
-       }
-       
-       public static class CounterReducer extends MapReduceBase implements 
Reducer
-       {
- 
-               public void reduce(WritableComparable url, Iterator iterator, 
OutputCollector output, Reporter reporter) throws IOException {
-                       IntWritable linkCount = (IntWritable)iterator.next();
-                       output.collect(url, linkCount);
-               }
- 
-               public void close() throws IOException {
-                       // TODO Auto-generated method stub
-                       super.close();
-               }
- 
-               public void configure(JobConf arg0) {
-                       // TODO Auto-generated method stub
-                       super.configure(arg0);
-               }
-               
-       
-       }
-       
-       public static void main(String[] args) throws IOException{
-               Configuration config = NutchConfiguration.create();
-               
-           JobConf jobConfig = new NutchJob(config);
-           jobConfig.setJobName("countlinks");
-        
-           jobConfig.setInputFormat(SequenceFileInputFormat.class);
-           
-           jobConfig.setOutputFormat(MapFileOutputFormat.class);
-           
-           // the keys are words (strings)
-           jobConfig.setOutputKeyClass(Text.class);
-           // the values are counts (ints)
-           jobConfig.setOutputValueClass(IntWritable.class);
-           
-           jobConfig.setMapperClass(CounterMapper.class);        
-           jobConfig.setCombinerClass(CounterReducer.class);
-           jobConfig.setReducerClass(CounterReducer.class);
-           
-           jobConfig.setInputPath(new Path((String) args[0], 
ParseData.DIR_NAME));
-           jobConfig.setOutputPath(new Path((String) args[1]));
-           
-           JobClient.runJob(jobConfig);
-       }
- 
- }
- }}}
- 

-------------------------------------------------------------------------
Take Surveys. Earn Cash. Influence the Future of IT
Join SourceForge.net's Techsay panel and you'll get the chance to share your
opinions on IT & business topics through brief surveys-and earn cash
http://www.techsay.com/default.php?page=join.php&p=sourceforge&CID=DEVDEV
_______________________________________________
Nutch-cvs mailing list
Nutch-cvs@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/nutch-cvs

Reply via email to