Author: mattmann
Date: Sun Oct 18 19:32:22 2015
New Revision: 1709306

URL: http://svn.apache.org/viewvc?rev=1709306&view=rev
Log:
Fix for NUTCH-2129 - Add protocol status tracking to crawl datum contributed by 
Michael Joyce <[email protected]> this closes #68.

Modified:
    nutch/trunk/CHANGES.txt
    nutch/trunk/src/java/org/apache/nutch/metadata/Nutch.java
    
nutch/trunk/src/plugin/lib-http/src/java/org/apache/nutch/protocol/http/api/HttpBase.java
    
nutch/trunk/src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/Ftp.java

Modified: nutch/trunk/CHANGES.txt
URL: 
http://svn.apache.org/viewvc/nutch/trunk/CHANGES.txt?rev=1709306&r1=1709305&r2=1709306&view=diff
==============================================================================
--- nutch/trunk/CHANGES.txt (original)
+++ nutch/trunk/CHANGES.txt Sun Oct 18 19:32:22 2015
@@ -2,6 +2,8 @@ Nutch Change Log
    
 Nutch Current Development 1.11-SNAPSHOT
 
+* NUTCH-2129 Add protocol status tracking to crawl datum (Michael Joyce via 
mattmann)
+
 * NUTCH-2142 Nutch File Dump - FileNotFoundException (Invalid Argument) Error 
(Karanjeet Singh via mattmann)
 
 * NUTCH-2136 Implement a different version of Naive Bayes Parse Filter 
(Asitang Mishra)

Modified: nutch/trunk/src/java/org/apache/nutch/metadata/Nutch.java
URL: 
http://svn.apache.org/viewvc/nutch/trunk/src/java/org/apache/nutch/metadata/Nutch.java?rev=1709306&r1=1709305&r2=1709306&view=diff
==============================================================================
--- nutch/trunk/src/java/org/apache/nutch/metadata/Nutch.java (original)
+++ nutch/trunk/src/java/org/apache/nutch/metadata/Nutch.java Sun Oct 18 
19:32:22 2015
@@ -41,6 +41,8 @@ public interface Nutch {
        public static final Text WRITABLE_GENERATE_TIME_KEY = new Text(
                        GENERATE_TIME_KEY);
 
+       public static final Text PROTOCOL_STATUS_CODE_KEY = new 
Text("nutch.protocol.code");
+
        public static final String PROTO_STATUS_KEY = "_pst_";
 
        public static final Text WRITABLE_PROTO_STATUS_KEY = new Text(

Modified: 
nutch/trunk/src/plugin/lib-http/src/java/org/apache/nutch/protocol/http/api/HttpBase.java
URL: 
http://svn.apache.org/viewvc/nutch/trunk/src/plugin/lib-http/src/java/org/apache/nutch/protocol/http/api/HttpBase.java?rev=1709306&r1=1709305&r2=1709306&view=diff
==============================================================================
--- 
nutch/trunk/src/plugin/lib-http/src/java/org/apache/nutch/protocol/http/api/HttpBase.java
 (original)
+++ 
nutch/trunk/src/plugin/lib-http/src/java/org/apache/nutch/protocol/http/api/HttpBase.java
 Sun Oct 18 19:32:22 2015
@@ -33,6 +33,7 @@ import org.slf4j.LoggerFactory;
 
 // Nutch imports
 import org.apache.nutch.crawl.CrawlDatum;
+import org.apache.nutch.metadata.Nutch;
 import org.apache.nutch.net.protocols.Response;
 import org.apache.nutch.protocol.Content;
 import org.apache.nutch.protocol.Protocol;
@@ -265,6 +266,9 @@ public abstract class HttpBase implement
       }
 
       int code = response.getCode();
+      datum.getMetaData().put(Nutch.PROTOCOL_STATUS_CODE_KEY,
+        new Text(Integer.toString(code)));
+
       byte[] content = response.getContent();
       Content c = new Content(u.toString(), u.toString(),
           (content == null ? EMPTY_CONTENT : content),

Modified: 
nutch/trunk/src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/Ftp.java
URL: 
http://svn.apache.org/viewvc/nutch/trunk/src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/Ftp.java?rev=1709306&r1=1709305&r2=1709306&view=diff
==============================================================================
--- 
nutch/trunk/src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/Ftp.java
 (original)
+++ 
nutch/trunk/src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/Ftp.java
 Sun Oct 18 19:32:22 2015
@@ -29,6 +29,7 @@ import org.apache.nutch.net.protocols.Re
 import org.apache.hadoop.conf.Configuration;
 
 import org.apache.nutch.protocol.Content;
+import org.apache.nutch.metadata.Nutch;
 import org.apache.nutch.protocol.Protocol;
 import org.apache.nutch.protocol.ProtocolOutput;
 import org.apache.nutch.protocol.ProtocolStatus;
@@ -129,6 +130,9 @@ public class Ftp implements Protocol {
         response = new FtpResponse(u, datum, this, getConf()); // make a 
request
 
         int code = response.getCode();
+        datum.getMetaData().put(Nutch.PROTOCOL_STATUS_CODE_KEY,
+          new Text(Integer.toString(code)));
+        
 
         if (code == 200) { // got a good response
           return new ProtocolOutput(response.toContent()); // return it


Reply via email to