Author: snagel Date: Wed Aug 7 20:44:01 2013 New Revision: 1511479 URL: http://svn.apache.org/r1511479 Log: NUTCH-911 protocol-file to return proper protocol status for notmodified, gone, access_denied
Modified: nutch/trunk/CHANGES.txt nutch/trunk/src/plugin/protocol-file/src/java/org/apache/nutch/protocol/file/File.java Modified: nutch/trunk/CHANGES.txt URL: http://svn.apache.org/viewvc/nutch/trunk/CHANGES.txt?rev=1511479&r1=1511478&r2=1511479&view=diff ============================================================================== --- nutch/trunk/CHANGES.txt (original) +++ nutch/trunk/CHANGES.txt Wed Aug 7 20:44:01 2013 @@ -2,6 +2,8 @@ Nutch Change Log Nutch Development Trunk +* NUTCH-911 protocol-file to return proper protocol status (Peter Lundberg via snagel) + * NUTCH-806 Merge CrawlDBScanner with CrawlDBReader (jnioche) * NUTCH-1587 misspelled property "threshold" in conf/log4j.properties (snagel) Modified: nutch/trunk/src/plugin/protocol-file/src/java/org/apache/nutch/protocol/file/File.java URL: http://svn.apache.org/viewvc/nutch/trunk/src/plugin/protocol-file/src/java/org/apache/nutch/protocol/file/File.java?rev=1511479&r1=1511478&r2=1511479&view=diff ============================================================================== --- nutch/trunk/src/plugin/protocol-file/src/java/org/apache/nutch/protocol/file/File.java (original) +++ nutch/trunk/src/plugin/protocol-file/src/java/org/apache/nutch/protocol/file/File.java Wed Aug 7 20:44:01 2013 @@ -105,6 +105,15 @@ public class File implements Protocol { if (code == 200) { // got a good response return new ProtocolOutput(response.toContent()); // return it + } else if (code == 304) { // got not modified + return new ProtocolOutput(response.toContent(), ProtocolStatus.STATUS_NOTMODIFIED); + + } else if (code == 401) { // access denied / no read permissions + return new ProtocolOutput(response.toContent(), new ProtocolStatus(ProtocolStatus.ACCESS_DENIED)); + + } else if (code == 404) { // no such file + return new ProtocolOutput(response.toContent(), ProtocolStatus.STATUS_NOTFOUND); + } else if (code >= 300 && code < 400) { // handle redirect if (redirects == MAX_REDIRECTS) throw new FileException("Too many redirects: " + url);