Author: lewismc
Date: Thu Apr 23 23:55:09 2015
New Revision: 1675735
URL: http://svn.apache.org/r1675735
Log:
Add back in NUTCH-1927 property to nutch-default as revoved during commit
@1675022
Modified:
nutch/trunk/conf/nutch-default.xml
Modified: nutch/trunk/conf/nutch-default.xml
URL:
http://svn.apache.org/viewvc/nutch/trunk/conf/nutch-default.xml?rev=1675735&r1=1675734&r2=1675735&view=diff
==============================================================================
--- nutch/trunk/conf/nutch-default.xml (original)
+++ nutch/trunk/conf/nutch-default.xml Thu Apr 23 23:55:09 2015
@@ -118,6 +118,15 @@
</property>
<property>
+ <name>http.robot.rules.whitelist</name>
+ <value></value>
+ <description>Comma separated list of hostnames or IP addresses to ignore
+ robot rules parsing for. Use with care and only if you are explicitly
+ allowed by the site owner to ignore the site's robots.txt!
+ </description>
+</property>
+
+<property>
<name>http.robots.403.allow</name>
<value>true</value>
<description>Some servers return HTTP status 403 (Forbidden) if