Author: lewismc
Date: Tue Mar 26 19:22:45 2013
New Revision: 1461276

URL: http://svn.apache.org/r1461276
Log:
NUTCH-1419 parsechecker and indexchecker to report protocol status

Modified:
    nutch/trunk/CHANGES.txt
    nutch/trunk/src/java/org/apache/nutch/indexer/IndexingFiltersChecker.java
    nutch/trunk/src/java/org/apache/nutch/parse/ParserChecker.java

Modified: nutch/trunk/CHANGES.txt
URL: 
http://svn.apache.org/viewvc/nutch/trunk/CHANGES.txt?rev=1461276&r1=1461275&r2=1461276&view=diff
==============================================================================
--- nutch/trunk/CHANGES.txt (original)
+++ nutch/trunk/CHANGES.txt Tue Mar 26 19:22:45 2013
@@ -2,6 +2,8 @@ Nutch Change Log
 
 (trunk): Current Development
 
+* NUTCH-1419 parsechecker and indexchecker to report protocol status (snagel + 
lewismc)
+
 * NUTCH-1047 Pluggable indexing backends (jnioche)
 
 * NUTCH-1536 Ant build file has hardcoded conf dir location (zm via lewismc)

Modified: 
nutch/trunk/src/java/org/apache/nutch/indexer/IndexingFiltersChecker.java
URL: 
http://svn.apache.org/viewvc/nutch/trunk/src/java/org/apache/nutch/indexer/IndexingFiltersChecker.java?rev=1461276&r1=1461275&r2=1461276&view=diff
==============================================================================
--- nutch/trunk/src/java/org/apache/nutch/indexer/IndexingFiltersChecker.java 
(original)
+++ nutch/trunk/src/java/org/apache/nutch/indexer/IndexingFiltersChecker.java 
Tue Mar 26 19:22:45 2013
@@ -39,6 +39,7 @@ import org.apache.nutch.parse.ParseUtil;
 import org.apache.nutch.protocol.Content;
 import org.apache.nutch.protocol.Protocol;
 import org.apache.nutch.protocol.ProtocolFactory;
+import org.apache.nutch.protocol.ProtocolOutput;
 import org.apache.nutch.util.NutchConfiguration;
 import org.apache.nutch.util.URLUtil;
 
@@ -81,8 +82,14 @@ public class IndexingFiltersChecker exte
     Protocol protocol = factory.getProtocol(url);
     CrawlDatum datum = new CrawlDatum();
 
-    Content content = protocol.getProtocolOutput(new Text(url), datum)
-        .getContent();
+    ProtocolOutput output = protocol.getProtocolOutput(new Text(url), datum);
+    
+    if (!output.getStatus().isSuccess()) {
+      System.out.println("Fetch failed with protocol status: " + 
output.getStatus());
+      return 0;
+    }
+         
+    Content content = output.getContent();
 
     if (content == null) {
       System.out.println("No content for " + url);

Modified: nutch/trunk/src/java/org/apache/nutch/parse/ParserChecker.java
URL: 
http://svn.apache.org/viewvc/nutch/trunk/src/java/org/apache/nutch/parse/ParserChecker.java?rev=1461276&r1=1461275&r2=1461276&view=diff
==============================================================================
--- nutch/trunk/src/java/org/apache/nutch/parse/ParserChecker.java (original)
+++ nutch/trunk/src/java/org/apache/nutch/parse/ParserChecker.java Tue Mar 26 
19:22:45 2013
@@ -28,6 +28,7 @@ import org.apache.nutch.crawl.SignatureF
 import org.apache.nutch.protocol.Content;
 import org.apache.nutch.protocol.Protocol;
 import org.apache.nutch.protocol.ProtocolFactory;
+import org.apache.nutch.protocol.ProtocolOutput;
 import org.apache.nutch.util.NutchConfiguration;
 import org.apache.nutch.util.URLUtil;
 import org.apache.nutch.util.StringUtil;
@@ -80,11 +81,17 @@ public class ParserChecker implements To
 
     ProtocolFactory factory = new ProtocolFactory(conf);
     Protocol protocol = factory.getProtocol(url);
-    Content content = protocol.getProtocolOutput(new Text(url),
-        new CrawlDatum()).getContent();
+    ProtocolOutput output = protocol.getProtocolOutput(new Text(url), new 
CrawlDatum());
+    
+    if (!output.getStatus().isSuccess()) {
+      System.err.println("Fetch failed with protocol status: " + 
output.getStatus());
+      return (-1);
+    }
+    
+    Content content = output.getContent();
 
     if (content == null) {
-      System.err.println("Can't fetch URL successfully");
+      LOG.error("No content for " + url);
       return (-1);
     }
 


Reply via email to