Author: markus
Date: Tue Jan 28 13:07:09 2014
New Revision: 1562058
URL: http://svn.apache.org/r1562058
Log:
NUTCH-1717 HostDB not to complain if filters/normalizers are disabled
Modified:
nutch/trunk/CHANGES.txt
nutch/trunk/src/java/org/apache/nutch/util/hostdb/HostDb.java
Modified: nutch/trunk/CHANGES.txt
URL:
http://svn.apache.org/viewvc/nutch/trunk/CHANGES.txt?rev=1562058&r1=1562057&r2=1562058&view=diff
==============================================================================
--- nutch/trunk/CHANGES.txt (original)
+++ nutch/trunk/CHANGES.txt Tue Jan 28 13:07:09 2014
@@ -2,6 +2,8 @@ Nutch Change Log
Nutch Development Trunk
+* NUTCH-1717 HostDB not to complain if filters/normalizers are disabled
(markus)
+
* NUTCH-1715 RobotRulesParser adds additional '*' to the robots name (tejasp)
* NUTCH-356 Plugin repository cache can lead to memory leak (Enrico Triolo,
DoÄacan Güney via markus)
Modified: nutch/trunk/src/java/org/apache/nutch/util/hostdb/HostDb.java
URL:
http://svn.apache.org/viewvc/nutch/trunk/src/java/org/apache/nutch/util/hostdb/HostDb.java?rev=1562058&r1=1562057&r2=1562058&view=diff
==============================================================================
--- nutch/trunk/src/java/org/apache/nutch/util/hostdb/HostDb.java (original)
+++ nutch/trunk/src/java/org/apache/nutch/util/hostdb/HostDb.java Tue Jan 28
13:07:09 2014
@@ -505,12 +505,12 @@ public class HostDb extends Configured i
conf.setBoolean(HOSTDB_URL_NORMALIZING, normalize);
// Check whether the urlfilter-domainblacklist plugin is loaded
- if ("urlfilter-domainblacklist".matches(conf.get("plugin.includes"))) {
+ if (filter &&
"urlfilter-domainblacklist".matches(conf.get("plugin.includes"))) {
throw new Exception("domainblacklist-urlfilter must not be enabled");
}
// Check whether the urlnormalizer-host plugin is loaded
- if ("urlnormalizer-host".matches(conf.get("plugin.includes"))) {
+ if (normalize &&
"urlnormalizer-host".matches(conf.get("plugin.includes"))) {
throw new Exception("urlnormalizer-host must not be enabled");
}