Hi there. Following instructions from this link:
http://wiki.apache.org/nutch/RunNutchInEclipse0.9

i checked out Nutch and configured within Eclipse. Then I noticed there were
some compilations errors. They were mainly about methods changing their
signature. Well I believe they are now fixed and here I'm attaching a patch
which fixes.
Index: /home/data/software/java/nutch/nutch-svn/contrib/web2/src/main/java/org/apache/nutch/webapp/common/WebAppModule.java
===================================================================
--- /home/data/software/java/nutch/nutch-svn/contrib/web2/src/main/java/org/apache/nutch/webapp/common/WebAppModule.java	(revision 638548)
+++ /home/data/software/java/nutch/nutch-svn/contrib/web2/src/main/java/org/apache/nutch/webapp/common/WebAppModule.java	(working copy)
@@ -158,8 +158,8 @@
       Element pattern = (Element) mapping.getElementsByTagName("url-pattern")
           .item(0);
 
-      String servletName = servlet.getTextContent().trim();
-      String urlPattern = pattern.getTextContent().trim();
+      String servletName = servlet.getNodeValue().trim();
+      String urlPattern = pattern.getNodeValue().trim();
 
       servlets.put(urlPattern, servletName);
       urlPatterns.add(urlPattern);
Index: /home/data/software/java/nutch/nutch-svn/src/plugin/parse-mp3/src/java/org/apache/nutch/parse/mp3/MetadataCollector.java
===================================================================
--- /home/data/software/java/nutch/nutch-svn/src/plugin/parse-mp3/src/java/org/apache/nutch/parse/mp3/MetadataCollector.java	(revision 638548)
+++ /home/data/software/java/nutch/nutch-svn/src/plugin/parse-mp3/src/java/org/apache/nutch/parse/mp3/MetadataCollector.java	(working copy)
@@ -51,7 +51,7 @@
       setArtist(value);
 
     if (name.indexOf("URL Link") > -1) {
-      links.add(new Outlink(value, "", this.conf));
+      links.add(new Outlink(value, ""));
     } else if (name.indexOf("Text") > -1) {
       text += value + "\n";
     }
Index: /home/data/software/java/nutch/nutch-svn/src/plugin/parse-mp3/src/java/org/apache/nutch/parse/mp3/MP3Parser.java
===================================================================
--- /home/data/software/java/nutch/nutch-svn/src/plugin/parse-mp3/src/java/org/apache/nutch/parse/mp3/MP3Parser.java	(revision 638548)
+++ /home/data/software/java/nutch/nutch-svn/src/plugin/parse-mp3/src/java/org/apache/nutch/parse/mp3/MP3Parser.java	(working copy)
@@ -24,26 +24,21 @@
 import java.net.MalformedURLException;
 import java.util.Iterator;
 
-// Java ID3 Tag imports
-import org.farng.mp3.MP3File;
-import org.farng.mp3.TagException;
-import org.farng.mp3.id3.AbstractID3v2;
-import org.farng.mp3.id3.AbstractID3v2Frame;
-import org.farng.mp3.id3.ID3v1;
-import org.farng.mp3.object.AbstractMP3Object;
-
-// Hadoop imports
 import org.apache.hadoop.conf.Configuration;
-
-// Nutch imports
 import org.apache.nutch.metadata.Metadata;
 import org.apache.nutch.parse.Parse;
 import org.apache.nutch.parse.ParseData;
-import org.apache.nutch.parse.ParseException;
 import org.apache.nutch.parse.ParseImpl;
+import org.apache.nutch.parse.ParseResult;
 import org.apache.nutch.parse.ParseStatus;
 import org.apache.nutch.parse.Parser;
 import org.apache.nutch.protocol.Content;
+import org.farng.mp3.MP3File;
+import org.farng.mp3.TagException;
+import org.farng.mp3.id3.AbstractID3v2;
+import org.farng.mp3.id3.AbstractID3v2Frame;
+import org.farng.mp3.id3.ID3v1;
+import org.farng.mp3.object.AbstractMP3Object;
 
 
 /**
@@ -55,7 +50,7 @@
   private MetadataCollector metadataCollector;
   private Configuration conf;
 
-  public Parse getParse(Content content) {
+  public ParseResult getParse(Content content) {
 
     Parse parse = null;
     byte[] raw = content.getContent();
@@ -73,22 +68,25 @@
       } else if (mp3.hasID3v1Tag()) {
         parse = getID3v1Parse(mp3, content.getMetadata());
       } else {
-        return new ParseStatus(ParseStatus.FAILED,
+        parse = new ParseStatus(ParseStatus.FAILED,
                                ParseStatus.FAILED_MISSING_CONTENT,
                                "No textual content available").getEmptyParse(conf);
+        return ParseResult.createParseResult(content.getUrl(), parse);
       }
     } catch (IOException e) {
-      return new ParseStatus(ParseStatus.FAILED,
+      parse = new ParseStatus(ParseStatus.FAILED,
                              ParseStatus.FAILED_EXCEPTION,
                              "Couldn't create temporary file:" + e).getEmptyParse(conf);
+      return ParseResult.createParseResult(content.getUrl(), parse);
     } catch (TagException e) {
-      return new ParseStatus(ParseStatus.FAILED,
+      parse = new ParseStatus(ParseStatus.FAILED,
                              ParseStatus.FAILED_EXCEPTION,
                              "ID3 Tags could not be parsed:" + e).getEmptyParse(conf);
+      return ParseResult.createParseResult(content.getUrl(), parse);
     } finally{
       tmp.delete();
     }
-    return parse;
+    return ParseResult.createParseResult(content.getUrl(), parse);
   }
 
   private Parse getID3v1Parse(MP3File mp3, Metadata contentMeta)
Index: /home/data/software/java/nutch/nutch-svn/src/plugin/parse-mp3/src/test/org/apache/nutch/parse/mp3/TestMP3Parser.java
===================================================================
--- /home/data/software/java/nutch/nutch-svn/src/plugin/parse-mp3/src/test/org/apache/nutch/parse/mp3/TestMP3Parser.java	(revision 638548)
+++ /home/data/software/java/nutch/nutch-svn/src/plugin/parse-mp3/src/test/org/apache/nutch/parse/mp3/TestMP3Parser.java	(working copy)
@@ -17,6 +17,8 @@
 
 package org.apache.nutch.parse.mp3;
 
+import junit.framework.TestCase;
+
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.io.Text;
 import org.apache.nutch.crawl.CrawlDatum;
@@ -23,6 +25,7 @@
 import org.apache.nutch.metadata.Metadata;
 import org.apache.nutch.parse.Parse;
 import org.apache.nutch.parse.ParseException;
+import org.apache.nutch.parse.ParseResult;
 import org.apache.nutch.parse.ParseUtil;
 import org.apache.nutch.protocol.Content;
 import org.apache.nutch.protocol.Protocol;
@@ -30,8 +33,6 @@
 import org.apache.nutch.protocol.ProtocolFactory;
 import org.apache.nutch.util.NutchConfiguration;
 
-import junit.framework.TestCase;
-
 /**
  * Unit tests for TestMP3Parser.  (Adapted from John Xing msword unit tests).
  *
@@ -65,6 +66,8 @@
     Protocol protocol;
     Content content;
     Parse parse;
+    ParseResult parseResult;
+    
 
     Configuration conf = NutchConfiguration.create();
     urlString = "file:" + sampleDir + fileSeparator + id3v2;
@@ -71,7 +74,9 @@
     protocol = new ProtocolFactory(conf).getProtocol(urlString);
     content = protocol.getProtocolOutput(new Text(urlString), new CrawlDatum())
                       .getContent();
-    parse = new ParseUtil(conf).parseByExtensionId("parse-mp3", content);
+    parseResult = new ParseUtil(conf).parseByExtensionId("parse-mp3", content);
+    parse = parseResult.get(content.getUrl());
+    
     Metadata metadata = parse.getData().getParseMeta();
     assertEquals("postgresql comment id3v2", metadata.get("COMM-Text"));
     assertEquals("postgresql composer id3v2", metadata.get("TCOM-Text"));
@@ -96,6 +101,7 @@
     String urlString;
     Protocol protocol;
     Content content;
+    ParseResult parseResult;
     Parse parse;
 
     Configuration conf = NutchConfiguration.create();
@@ -103,7 +109,8 @@
     protocol = new ProtocolFactory(conf).getProtocol(urlString);
     content = protocol.getProtocolOutput(new Text(urlString), new CrawlDatum())
                       .getContent();
-    parse = new ParseUtil(conf).parseByExtensionId("parse-mp3", content);
+    parseResult = new ParseUtil(conf).parseByExtensionId("parse-mp3", content);
+    parse = parseResult.get(content.getUrl());
 
     Metadata metadata = parse.getData().getParseMeta();
     assertEquals("postgresql comment id3v1", metadata.get("COMM-Text"));
@@ -123,6 +130,7 @@
     String urlString;
     Protocol protocol;
     Content content;
+    ParseResult parseResult;
     Parse parse;
 
     Configuration conf = NutchConfiguration.create();
@@ -130,7 +138,9 @@
     protocol = new ProtocolFactory(conf).getProtocol(urlString);
     content = protocol.getProtocolOutput(new Text(urlString), new CrawlDatum())
                       .getContent();
-    parse = new ParseUtil(conf).parseByExtensionId("parse-mp3", content);
+    parseResult = new ParseUtil(conf).parseByExtensionId("parse-mp3", content);
+    parse = parseResult.get(content.getUrl());
+    
 //    Metadata metadata = parse.getData().getParseMeta();
     if (parse.getData().getStatus().isSuccess()) {
       fail("Expected ParseException");
Index: /home/data/software/java/nutch/nutch-svn/src/plugin/parse-rtf/src/java/org/apache/nutch/parse/rtf/RTFParseFactory.java
===================================================================
--- /home/data/software/java/nutch/nutch-svn/src/plugin/parse-rtf/src/java/org/apache/nutch/parse/rtf/RTFParseFactory.java	(revision 638548)
+++ /home/data/software/java/nutch/nutch-svn/src/plugin/parse-rtf/src/java/org/apache/nutch/parse/rtf/RTFParseFactory.java	(working copy)
@@ -31,6 +31,7 @@
 import org.apache.nutch.parse.Parse;
 import org.apache.nutch.parse.ParseData;
 import org.apache.nutch.parse.ParseImpl;
+import org.apache.nutch.parse.ParseResult;
 import org.apache.nutch.parse.ParseStatus;
 import org.apache.nutch.parse.Parser;
 import org.apache.nutch.protocol.Content;
@@ -49,7 +50,7 @@
 
   private Configuration conf;
 
-  public Parse getParse(Content content) {
+  public ParseResult getParse(Content content) {
     byte[] raw = content.getContent();
     Reader reader = new InputStreamReader(new ByteArrayInputStream(raw));
     RTFParserDelegateImpl delegate = new RTFParserDelegateImpl();
@@ -57,6 +58,8 @@
     rtfParser = RTFParser.createParser(reader);
     rtfParser.setNewLine("\n");
     rtfParser.setDelegate(delegate);
+    
+    Parse parse = null;
 
     try {
       rtfParser.parse();
@@ -61,9 +64,10 @@
     try {
       rtfParser.parse();
     } catch (ParseException e) {
-        return new ParseStatus(ParseStatus.FAILED,
+    	parse = new ParseStatus(ParseStatus.FAILED,
                                ParseStatus.FAILED_EXCEPTION,
                                e.toString()).getEmptyParse(conf);
+    	return ParseResult.createParseResult(content.getUrl(), parse);
     }
 
     Metadata metadata = new Metadata();
@@ -78,7 +82,7 @@
 
     String text = delegate.getText();
 
-    return new ParseImpl(text,
+    parse = new ParseImpl(text,
                          new ParseData(ParseStatus.STATUS_SUCCESS,
                                        title,
                                        OutlinkExtractor
@@ -85,6 +89,7 @@
         .                              getOutlinks(text, this.conf),
                                        content.getMetadata(),
                                        metadata));
+    return ParseResult.createParseResult(content.getUrl(), parse);
   }
 
   public void setConf(Configuration conf) {
Index: /home/data/software/java/nutch/nutch-svn/src/plugin/parse-rtf/src/test/org/apache/nutch/parse/rtf/TestRTFParser.java
===================================================================
--- /home/data/software/java/nutch/nutch-svn/src/plugin/parse-rtf/src/test/org/apache/nutch/parse/rtf/TestRTFParser.java	(revision 638548)
+++ /home/data/software/java/nutch/nutch-svn/src/plugin/parse-rtf/src/test/org/apache/nutch/parse/rtf/TestRTFParser.java	(working copy)
@@ -25,6 +25,7 @@
 import org.apache.nutch.metadata.DublinCore;
 import org.apache.nutch.metadata.Metadata;
 import org.apache.nutch.parse.Parse;
+import org.apache.nutch.parse.ParseResult;
 import org.apache.nutch.parse.ParseUtil;
 import org.apache.nutch.parse.ParseException;
 import org.apache.nutch.protocol.Content;
@@ -69,6 +70,7 @@
     Protocol protocol;
     Content content;
     Parse parse;
+    ParseResult parseResult;
 
     Configuration conf = NutchConfiguration.create();
     urlString = "file:" + sampleDir + fileSeparator + rtfFile;
@@ -75,7 +77,10 @@
     protocol = new ProtocolFactory(conf).getProtocol(urlString);
     content = protocol.getProtocolOutput(new Text(urlString), new CrawlDatum())
                       .getContent();
-    parse = new ParseUtil(conf).parseByExtensionId("parse-rtf", content);
+    
+    parseResult = new ParseUtil(conf).parseByExtensionId("parse-rtf", content);
+    parse = parseResult.get(content.getUrl());
+    
     String text = parse.getText();
     assertEquals("The quick brown fox jumps over the lazy dog", text.trim());
 

Reply via email to