[ 
https://issues.apache.org/jira/browse/NUTCH-2375?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16166486#comment-16166486
 ] 

ASF GitHub Bot commented on NUTCH-2375:
---------------------------------------

sebastian-nagel commented on a change in pull request #221: NUTCH-2375 
Upgrading nutch to use org.apache.hadoop.mapreduce
URL: https://github.com/apache/nutch/pull/221#discussion_r138928833
 
 

 ##########
 File path: src/java/org/apache/nutch/scoring/webgraph/LinkRank.java
 ##########
 @@ -350,186 +367,221 @@ public void close() {
    * WebGraph. The link analysis process consists of inverting, analyzing and
    * scoring, in a loop for a given number of iterations.
    */
-  private static class Inverter implements
-      Mapper<Text, Writable, Text, ObjectWritable>,
-      Reducer<Text, ObjectWritable, Text, LinkDatum> {
+  private static class Inverter {
 
-    private JobConf conf;
-
-    public void configure(JobConf conf) {
-      this.conf = conf;
-    }
+    private static Configuration conf;
 
     /**
      * Convert values to ObjectWritable
      */
-    public void map(Text key, Writable value,
-        OutputCollector<Text, ObjectWritable> output, Reporter reporter)
-        throws IOException {
+    public static class InvertMapper extends 
+        Mapper<Text, Writable, Text, ObjectWritable> {
+      public void setup(Mapper<Text, Writable, Text, ObjectWritable>.Context 
context) {
+        conf = context.getConfiguration();
+      }
+
+      public void cleanup(){
+      }
+
+      public void map(Text key, Writable value,
+          Context context)
+          throws IOException, InterruptedException {
 
-      ObjectWritable objWrite = new ObjectWritable();
-      objWrite.set(value);
-      output.collect(key, objWrite);
+        ObjectWritable objWrite = new ObjectWritable();
+        objWrite.set(value);
+        context.write(key, objWrite);
+      }
     }
 
     /**
      * Inverts outlinks to inlinks, attaches current score for the outlink from
      * the NodeDb of the WebGraph.
      */
-    public void reduce(Text key, Iterator<ObjectWritable> values,
-        OutputCollector<Text, LinkDatum> output, Reporter reporter)
-        throws IOException {
-
-      String fromUrl = key.toString();
-      List<LinkDatum> outlinks = new ArrayList<>();
-      Node node = null;
-
-      // aggregate outlinks, assign other values
-      while (values.hasNext()) {
-        ObjectWritable write = values.next();
-        Object obj = write.get();
-        if (obj instanceof Node) {
-          node = (Node) obj;
-        } else if (obj instanceof LinkDatum) {
-          outlinks.add(WritableUtils.clone((LinkDatum) obj, conf));
-        }
+    public static class InvertReducer extends
+        Reducer<Text, ObjectWritable, Text, LinkDatum> {
+      public void setup(Reducer<Text, ObjectWritable, Text, LinkDatum>.Context 
context) {
+        conf = context.getConfiguration();
       }
 
-      // get the number of outlinks and the current inlink and outlink scores
-      // from the node of the url
-      int numOutlinks = node.getNumOutlinks();
-      float inlinkScore = node.getInlinkScore();
-      float outlinkScore = node.getOutlinkScore();
-      LOG.debug(fromUrl + ": num outlinks " + numOutlinks);
-
-      // can't invert if no outlinks
-      if (numOutlinks > 0) {
-        for (int i = 0; i < outlinks.size(); i++) {
-          LinkDatum outlink = outlinks.get(i);
-          String toUrl = outlink.getUrl();
-
-          outlink.setUrl(fromUrl);
-          outlink.setScore(outlinkScore);
-
-          // collect the inverted outlink
-          output.collect(new Text(toUrl), outlink);
-          LOG.debug(toUrl + ": inverting inlink from " + fromUrl
-              + " origscore: " + inlinkScore + " numOutlinks: " + numOutlinks
-              + " inlinkscore: " + outlinkScore);
-        }
+      public void cleanup(){
       }
-    }
 
-    public void close() {
+      public void reduce(Text key, Iterable<ObjectWritable> values,
+          Context context)
+          throws IOException, InterruptedException {
+
+        String fromUrl = key.toString();
+        List<LinkDatum> outlinks = new ArrayList<>();
+        Node node = null;
+
+        // aggregate outlinks, assign other values
+        for (ObjectWritable write : values) {
+          Object obj = write.get();
+          if (obj instanceof Node) {
+            node = (Node) obj;
+          } else if (obj instanceof LinkDatum) {
+            outlinks.add(WritableUtils.clone((LinkDatum) obj, conf));
+          }
+        }
+
+        // get the number of outlinks and the current inlink and outlink scores
+        // from the node of the url
+        int numOutlinks = node.getNumOutlinks();
+        float inlinkScore = node.getInlinkScore();
+        float outlinkScore = node.getOutlinkScore();
+        LOG.debug(fromUrl + ": num outlinks " + numOutlinks);
+
+        // can't invert if no outlinks
+        if (numOutlinks > 0) {
+          for (int i = 0; i < outlinks.size(); i++) {
+            LinkDatum outlink = outlinks.get(i);
+            String toUrl = outlink.getUrl();
+
+            outlink.setUrl(fromUrl);
+            outlink.setScore(outlinkScore);
+
+            // collect the inverted outlink
+            context.write(new Text(toUrl), outlink);
+            LOG.debug(toUrl + ": inverting inlink from " + fromUrl
+                + " origscore: " + inlinkScore + " numOutlinks: " + numOutlinks
+                + " inlinkscore: " + outlinkScore);
+          }
+        }
+      }
     }
   }
 
   /**
    * Runs a single link analysis iteration.
    */
-  private static class Analyzer implements
-      Mapper<Text, Writable, Text, ObjectWritable>,
-      Reducer<Text, ObjectWritable, Text, Node> {
+  private static class Analyzer {
 
-    private JobConf conf;
-    private float dampingFactor = 0.85f;
-    private float rankOne = 0.0f;
-    private int itNum = 0;
-    private boolean limitPages = true;
-    private boolean limitDomains = true;
+    private static Configuration conf;
+    private static float dampingFactor = 0.85f;
+    private static float rankOne = 0.0f;
+    private static int itNum = 0;
+    private static boolean limitPages = true;
+    private static boolean limitDomains = true;
 
 Review comment:
   See comments in Generator etc. regarding static variables shared between 
mapper and reducer classes.
 
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
[email protected]


> Upgrade the code base from org.apache.hadoop.mapred to 
> org.apache.hadoop.mapreduce
> ----------------------------------------------------------------------------------
>
>                 Key: NUTCH-2375
>                 URL: https://issues.apache.org/jira/browse/NUTCH-2375
>             Project: Nutch
>          Issue Type: Improvement
>          Components: deployment
>            Reporter: Omkar Reddy
>
> Nutch is still using the deprecated org.apache.hadoop.mapred dependency which 
> has been deprecated. It need to be updated to org.apache.hadoop.mapreduce 
> dependency. 



--
This message was sent by Atlassian JIRA
(v6.4.14#64029)

Reply via email to