Author: snagel
Date: Wed Aug 7 20:44:01 2013
New Revision: 1511479
URL: http://svn.apache.org/r1511479
Log:
NUTCH-911 protocol-file to return proper protocol status for notmodified, gone,
access_denied
Modified:
nutch/trunk/CHANGES.txt
nutch/trunk/src/plugin/protocol-file/src/java/org/apache/nutch/protocol/file/File.java
Modified: nutch/trunk/CHANGES.txt
URL:
http://svn.apache.org/viewvc/nutch/trunk/CHANGES.txt?rev=1511479&r1=1511478&r2=1511479&view=diff
==============================================================================
--- nutch/trunk/CHANGES.txt (original)
+++ nutch/trunk/CHANGES.txt Wed Aug 7 20:44:01 2013
@@ -2,6 +2,8 @@ Nutch Change Log
Nutch Development Trunk
+* NUTCH-911 protocol-file to return proper protocol status (Peter Lundberg via
snagel)
+
* NUTCH-806 Merge CrawlDBScanner with CrawlDBReader (jnioche)
* NUTCH-1587 misspelled property "threshold" in conf/log4j.properties (snagel)
Modified:
nutch/trunk/src/plugin/protocol-file/src/java/org/apache/nutch/protocol/file/File.java
URL:
http://svn.apache.org/viewvc/nutch/trunk/src/plugin/protocol-file/src/java/org/apache/nutch/protocol/file/File.java?rev=1511479&r1=1511478&r2=1511479&view=diff
==============================================================================
---
nutch/trunk/src/plugin/protocol-file/src/java/org/apache/nutch/protocol/file/File.java
(original)
+++
nutch/trunk/src/plugin/protocol-file/src/java/org/apache/nutch/protocol/file/File.java
Wed Aug 7 20:44:01 2013
@@ -105,6 +105,15 @@ public class File implements Protocol {
if (code == 200) { // got a good response
return new ProtocolOutput(response.toContent()); //
return it
+ } else if (code == 304) { // got not modified
+ return new ProtocolOutput(response.toContent(),
ProtocolStatus.STATUS_NOTMODIFIED);
+
+ } else if (code == 401) { // access denied / no read
permissions
+ return new ProtocolOutput(response.toContent(), new
ProtocolStatus(ProtocolStatus.ACCESS_DENIED));
+
+ } else if (code == 404) { // no such file
+ return new ProtocolOutput(response.toContent(),
ProtocolStatus.STATUS_NOTFOUND);
+
} else if (code >= 300 && code < 400) { // handle redirect
if (redirects == MAX_REDIRECTS)
throw new FileException("Too many redirects: " + url);