This is an automated email from the ASF dual-hosted git repository. snagel pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/nutch.git
commit 4cc60483266c3d74f648e083ec731349b22bcc8d Author: Shashanka Balakuntala Srinivasa <shbalakunt...@gmail.com> AuthorDate: Wed Jul 29 20:05:04 2020 +0530 NUTCH-2805: Rename plugin urlfilter-domainblacklist (#540) NUTCH-2805: Rename plugin urlfilter-domainblacklist --- CHANGES.txt | 2 ++ build.xml | 8 +++--- ...plate => domaindenylist-urlfilter.txt.template} | 2 +- default.properties | 2 +- src/plugin/build.xml | 6 ++-- .../nutch/urlfilter/domain/package-info.java | 2 +- .../build.xml | 2 +- .../data/hosts.txt | 0 .../ivy.xml | 0 .../plugin.xml | 16 +++++------ .../domaindenylist/DomainDenylistURLFilter.java} | 18 ++++++------ .../urlfilter/domaindenylist}/package-info.java | 2 +- .../TestDomainDenylistURLFilter.java} | 32 +++++++++++----------- .../nutch/urlfilter/prefix/PrefixURLFilter.java | 4 +-- .../nutch/urlfilter/suffix/SuffixURLFilter.java | 4 +-- 15 files changed, 51 insertions(+), 49 deletions(-) diff --git a/CHANGES.txt b/CHANGES.txt index 76c9fc6..e5c5984 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -2,7 +2,9 @@ Nutch 1.18 Development +Breaking Changes + - As part of NUTCH-2805, the plugin urlfilter-domainblacklist has been renamed to urlfilter-domaindenylist. And the fields required for the plugin urlfilter.domainblacklist.rules and urlfilter.domainblacklist.file has been replaced with urlfilter.domaindenylist.rules and urlfilter.domaindenylist.file respectively. See NUTCH-2802 for more details. Nutch 1.17 Release 18/06/2020 (dd/mm/yyyy) Release Report: https://s.apache.org/ovhry diff --git a/build.xml b/build.xml index bc8d8fb..0f6807a 100644 --- a/build.xml +++ b/build.xml @@ -230,7 +230,7 @@ <packageset dir="${plugins.dir}/tld/src/java"/> <packageset dir="${plugins.dir}/urlfilter-automaton/src/java"/> <packageset dir="${plugins.dir}/urlfilter-domain/src/java"/> - <packageset dir="${plugins.dir}/urlfilter-domainblacklist/src/java"/> + <packageset dir="${plugins.dir}/urlfilter-domaindenylist/src/java"/> <packageset dir="${plugins.dir}/urlfilter-fast/src/java"/> <packageset dir="${plugins.dir}/urlfilter-ignoreexempt/src/java"/> <packageset dir="${plugins.dir}/urlfilter-prefix/src/java"/> @@ -739,7 +739,7 @@ <packageset dir="${plugins.dir}/tld/src/java"/> <packageset dir="${plugins.dir}/urlfilter-automaton/src/java"/> <packageset dir="${plugins.dir}/urlfilter-domain/src/java"/> - <packageset dir="${plugins.dir}/urlfilter-domainblacklist/src/java"/> + <packageset dir="${plugins.dir}/urlfilter-domaindenylist/src/java"/> <packageset dir="${plugins.dir}/urlfilter-fast/src/java"/> <packageset dir="${plugins.dir}/urlfilter-ignoreexempt/src/java"/> <packageset dir="${plugins.dir}/urlfilter-prefix/src/java"/> @@ -1164,8 +1164,8 @@ <source path="${plugins.dir}/urlfilter-automaton/src/test/" /> <source path="${plugins.dir}/urlfilter-domain/src/java/" /> <source path="${plugins.dir}/urlfilter-domain/src/test/" /> - <source path="${plugins.dir}/urlfilter-domainblacklist/src/java/" /> - <source path="${plugins.dir}/urlfilter-domainblacklist/src/test/" /> + <source path="${plugins.dir}/urlfilter-domaindenylist/src/java/" /> + <source path="${plugins.dir}/urlfilter-domaindenylist/src/test/" /> <source path="${plugins.dir}/urlfilter-fast/src/java/"/> <source path="${plugins.dir}/urlfilter-fast/src/test/"/> <source path="${plugins.dir}/urlfilter-ignoreexempt/src/java/" /> diff --git a/conf/domainblacklist-urlfilter.txt.template b/conf/domaindenylist-urlfilter.txt.template similarity index 93% rename from conf/domainblacklist-urlfilter.txt.template rename to conf/domaindenylist-urlfilter.txt.template index ca79a20..ff52043 100644 --- a/conf/domainblacklist-urlfilter.txt.template +++ b/conf/domaindenylist-urlfilter.txt.template @@ -13,4 +13,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -# config file for urlfilter-domainblacklist plugin +# config file for urlfilter-domaindenylist plugin diff --git a/default.properties b/default.properties index 7884cd5..0e406f7 100644 --- a/default.properties +++ b/default.properties @@ -103,7 +103,7 @@ plugins.protocol=\ plugins.urlfilter=\ org.apache.nutch.urlfilter.automaton*:\ org.apache.nutch.urlfilter.domain*:\ - org.apache.nutch.urlfilter.domainblacklist*:\ + org.apache.nutch.urlfilter.domaindenylist*:\ org.apache.nutch.urlfilter.fast*:\ org.apache.nutch.urlfilter.ignoreexempt*:\ org.apache.nutch.urlfilter.prefix*:\ diff --git a/src/plugin/build.xml b/src/plugin/build.xml index a2a0dd7..dd2a507 100755 --- a/src/plugin/build.xml +++ b/src/plugin/build.xml @@ -88,7 +88,7 @@ <ant dir="tld" target="deploy"/> <ant dir="urlfilter-automaton" target="deploy"/> <ant dir="urlfilter-domain" target="deploy" /> - <ant dir="urlfilter-domainblacklist" target="deploy" /> + <ant dir="urlfilter-domaindenylist" target="deploy" /> <ant dir="urlfilter-fast" target="deploy"/> <ant dir="urlfilter-prefix" target="deploy"/> <ant dir="urlfilter-regex" target="deploy"/> @@ -145,7 +145,7 @@ <ant dir="subcollection" target="test"/> <ant dir="urlfilter-automaton" target="test"/> <ant dir="urlfilter-domain" target="test"/> - <ant dir="urlfilter-domainblacklist" target="test"/> + <ant dir="urlfilter-domaindenylist" target="test"/> <ant dir="urlfilter-fast" target="test"/> <!--ant dir="urlfilter-ignoreexempt" target="test"/--> <ant dir="urlfilter-prefix" target="test"/> @@ -234,7 +234,7 @@ <ant dir="tld" target="clean"/> <ant dir="urlfilter-automaton" target="clean"/> <ant dir="urlfilter-domain" target="clean" /> - <ant dir="urlfilter-domainblacklist" target="clean" /> + <ant dir="urlfilter-domaindenylist" target="clean" /> <ant dir="urlfilter-fast" target="clean"/> <ant dir="urlfilter-ignoreexempt" target="clean"/> <ant dir="urlfilter-prefix" target="clean"/> diff --git a/src/plugin/urlfilter-domain/src/java/org/apache/nutch/urlfilter/domain/package-info.java b/src/plugin/urlfilter-domain/src/java/org/apache/nutch/urlfilter/domain/package-info.java index d2eba1f..5388cec 100644 --- a/src/plugin/urlfilter-domain/src/java/org/apache/nutch/urlfilter/domain/package-info.java +++ b/src/plugin/urlfilter-domain/src/java/org/apache/nutch/urlfilter/domain/package-info.java @@ -18,7 +18,7 @@ /** * URL filter plugin to include only URLs which match an element in a given list of * domain suffixes, domain names, and/or host names. - * See {@link org.apache.nutch.urlfilter.domainblacklist} for the counterpart + * See {@link org.apache.nutch.urlfilter.domaindenylist} for the counterpart * (exclude URLs by host or domain). */ package org.apache.nutch.urlfilter.domain; diff --git a/src/plugin/urlfilter-domainblacklist/build.xml b/src/plugin/urlfilter-domaindenylist/build.xml similarity index 94% rename from src/plugin/urlfilter-domainblacklist/build.xml rename to src/plugin/urlfilter-domaindenylist/build.xml index 19ea483..f06dfc5 100644 --- a/src/plugin/urlfilter-domainblacklist/build.xml +++ b/src/plugin/urlfilter-domaindenylist/build.xml @@ -15,7 +15,7 @@ See the License for the specific language governing permissions and limitations under the License. --> -<project name="urlfilter-domainblacklist" default="jar-core"> +<project name="urlfilter-domaindenylist" default="jar-core"> <import file="../build-plugin.xml"/> diff --git a/src/plugin/urlfilter-domainblacklist/data/hosts.txt b/src/plugin/urlfilter-domaindenylist/data/hosts.txt similarity index 100% rename from src/plugin/urlfilter-domainblacklist/data/hosts.txt rename to src/plugin/urlfilter-domaindenylist/data/hosts.txt diff --git a/src/plugin/urlfilter-domainblacklist/ivy.xml b/src/plugin/urlfilter-domaindenylist/ivy.xml similarity index 100% rename from src/plugin/urlfilter-domainblacklist/ivy.xml rename to src/plugin/urlfilter-domaindenylist/ivy.xml diff --git a/src/plugin/urlfilter-domainblacklist/plugin.xml b/src/plugin/urlfilter-domaindenylist/plugin.xml similarity index 71% rename from src/plugin/urlfilter-domainblacklist/plugin.xml rename to src/plugin/urlfilter-domaindenylist/plugin.xml index 04eee6e..d1a35f5 100644 --- a/src/plugin/urlfilter-domainblacklist/plugin.xml +++ b/src/plugin/urlfilter-domaindenylist/plugin.xml @@ -16,13 +16,13 @@ limitations under the License. --> <plugin - id="urlfilter-domainblacklist" - name="Domain Blacklist URL Filter" + id="urlfilter-domaindenylist" + name="Domain Denylist URL Filter" version="1.0.0" provider-name="nutch.org"> <runtime> - <library name="urlfilter-domainblacklist.jar"> + <library name="urlfilter-domaindenylist.jar"> <export name="*"/> </library> </runtime> @@ -31,12 +31,12 @@ <import plugin="nutch-extensionpoints"/> </requires> - <extension id="org.apache.nutch.net.urlfilter.domainblacklist" - name="Nutch Domain Blacklist URL Filter" + <extension id="org.apache.nutch.net.urlfilter.domaindenylist" + name="Nutch Domain Denylist URL Filter" point="org.apache.nutch.net.URLFilter"> - <implementation id="DomainBlacklistURLFilter" - class="org.apache.nutch.urlfilter.domainblacklist.DomainBlacklistURLFilter"> - <parameter name="file" value="domainblacklist-urlfilter.txt"/> + <implementation id="DomainDenylistURLFilter" + class="org.apache.nutch.urlfilter.domaindenylist.DomainDenylistURLFilter"> + <parameter name="file" value="domaindenylist-urlfilter.txt"/> </implementation> </extension> diff --git a/src/plugin/urlfilter-domainblacklist/src/java/org/apache/nutch/urlfilter/domainblacklist/DomainBlacklistURLFilter.java b/src/plugin/urlfilter-domaindenylist/src/java/org/apache/nutch/urlfilter/domaindenylist/DomainDenylistURLFilter.java similarity index 90% rename from src/plugin/urlfilter-domainblacklist/src/java/org/apache/nutch/urlfilter/domainblacklist/DomainBlacklistURLFilter.java rename to src/plugin/urlfilter-domaindenylist/src/java/org/apache/nutch/urlfilter/domaindenylist/DomainDenylistURLFilter.java index 77c238b..58e3754 100644 --- a/src/plugin/urlfilter-domainblacklist/src/java/org/apache/nutch/urlfilter/domainblacklist/DomainBlacklistURLFilter.java +++ b/src/plugin/urlfilter-domaindenylist/src/java/org/apache/nutch/urlfilter/domaindenylist/DomainDenylistURLFilter.java @@ -14,7 +14,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.nutch.urlfilter.domainblacklist; +package org.apache.nutch.urlfilter.domaindenylist; import java.lang.invoke.MethodHandles; import java.io.BufferedReader; @@ -62,12 +62,12 @@ import org.apache.nutch.util.domain.DomainSuffix; * overriding the more specific. * </p> * - * The domain file defaults to domainblacklist-urlfilter.txt in the classpath + * The domain file defaults to domaindenylist-urlfilter.txt in the classpath * but can be overridden using the: * * <ul> * <li> - * property "urlfilter.domainblacklist.file" in ./conf/nutch-*.xml, and + * property "urlfilter.domaindenylist.file" in ./conf/nutch-*.xml, and * </li> * <li> * attribute "file" in plugin.xml of this plugin @@ -75,7 +75,7 @@ import org.apache.nutch.util.domain.DomainSuffix; * </ul> * */ -public class DomainBlacklistURLFilter implements URLFilter { +public class DomainDenylistURLFilter implements URLFilter { private static final Logger LOG = LoggerFactory .getLogger(MethodHandles.lookup().lookupClass()); @@ -105,7 +105,7 @@ public class DomainBlacklistURLFilter implements URLFilter { this.conf = conf; // get the extensions for domain urlfilter - String pluginName = "urlfilter-domainblacklist"; + String pluginName = "urlfilter-domaindenylist"; Extension[] extensions = PluginRepository.get(conf) .getExtensionPoint(URLFilter.class.getName()).getExtensions(); for (int i = 0; i < extensions.length; i++) { @@ -127,11 +127,11 @@ public class DomainBlacklistURLFilter implements URLFilter { // precedence hierarchy for definition of filter rules // (first non-empty definition takes precedence): - // 1. string rules defined by `urlfilter.domainblacklist.rules` - // 2. rule file name defined by `urlfilter.domainblacklist.file` + // 1. string rules defined by `urlfilter.domaindenylist.rules` + // 2. rule file name defined by `urlfilter.domaindenylist.file` // 3. rule file name defined in plugin.xml (`attributeFile`) - String stringRules = conf.get("urlfilter.domainblacklist.rules"); - String file = conf.get("urlfilter.domainblacklist.file", attributeFile); + String stringRules = conf.get("urlfilter.domaindenylist.rules"); + String file = conf.get("urlfilter.domaindenylist.file", attributeFile); Reader reader = null; if (stringRules != null) { // takes precedence over files reader = new StringReader(stringRules); diff --git a/src/plugin/urlfilter-domainblacklist/src/java/org/apache/nutch/urlfilter/domainblacklist/package-info.java b/src/plugin/urlfilter-domaindenylist/src/java/org/apache/nutch/urlfilter/domaindenylist/package-info.java similarity index 95% rename from src/plugin/urlfilter-domainblacklist/src/java/org/apache/nutch/urlfilter/domainblacklist/package-info.java rename to src/plugin/urlfilter-domaindenylist/src/java/org/apache/nutch/urlfilter/domaindenylist/package-info.java index 1f0022c..401d12f 100644 --- a/src/plugin/urlfilter-domainblacklist/src/java/org/apache/nutch/urlfilter/domainblacklist/package-info.java +++ b/src/plugin/urlfilter-domaindenylist/src/java/org/apache/nutch/urlfilter/domaindenylist/package-info.java @@ -20,5 +20,5 @@ * See {@link org.apache.nutch.urlfilter.domain} for the counterpart (include only URLs * matching host or domain). */ -package org.apache.nutch.urlfilter.domainblacklist; +package org.apache.nutch.urlfilter.domaindenylist; diff --git a/src/plugin/urlfilter-domainblacklist/src/test/org/apache/nutch/urlfilter/domainblacklist/TestDomainBlacklistURLFilter.java b/src/plugin/urlfilter-domaindenylist/src/test/org/apache/nutch/urlfilter/domaindenylist/TestDomainDenylistURLFilter.java similarity index 51% rename from src/plugin/urlfilter-domainblacklist/src/test/org/apache/nutch/urlfilter/domainblacklist/TestDomainBlacklistURLFilter.java rename to src/plugin/urlfilter-domaindenylist/src/test/org/apache/nutch/urlfilter/domaindenylist/TestDomainDenylistURLFilter.java index 9ab207a..0dde234 100644 --- a/src/plugin/urlfilter-domainblacklist/src/test/org/apache/nutch/urlfilter/domainblacklist/TestDomainBlacklistURLFilter.java +++ b/src/plugin/urlfilter-domaindenylist/src/test/org/apache/nutch/urlfilter/domaindenylist/TestDomainDenylistURLFilter.java @@ -14,14 +14,14 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.nutch.urlfilter.domainblacklist; +package org.apache.nutch.urlfilter.domaindenylist; import org.junit.Assert; import org.junit.Test; import org.apache.hadoop.conf.Configuration; import org.apache.nutch.util.NutchConfiguration; -public class TestDomainBlacklistURLFilter { +public class TestDomainDenylistURLFilter { private final static String SEPARATOR = System.getProperty("file.separator"); private final static String SAMPLES = System.getProperty("test.data", "."); @@ -29,21 +29,21 @@ public class TestDomainBlacklistURLFilter { @Test public void testFilter() throws Exception { - String domainBlacklistFile = SAMPLES + SEPARATOR + "hosts.txt"; + String domainDenylistFile = SAMPLES + SEPARATOR + "hosts.txt"; Configuration conf = NutchConfiguration.create(); - conf.set("urlfilter.domainblacklist.file", domainBlacklistFile); - DomainBlacklistURLFilter domainBlacklistFilter = new DomainBlacklistURLFilter(); - domainBlacklistFilter.setConf(conf); - Assert.assertNull(domainBlacklistFilter.filter("http://lucene.apache.org")); - Assert.assertNull(domainBlacklistFilter.filter("http://hadoop.apache.org")); - Assert.assertNull(domainBlacklistFilter.filter("http://www.apache.org")); - Assert.assertNotNull(domainBlacklistFilter.filter("http://www.google.com")); - Assert.assertNotNull(domainBlacklistFilter.filter("http://mail.yahoo.com")); - Assert.assertNull(domainBlacklistFilter.filter("http://www.foobar.net")); - Assert.assertNull(domainBlacklistFilter.filter("http://www.foobas.net")); - Assert.assertNull(domainBlacklistFilter.filter("http://www.yahoo.com")); - Assert.assertNull(domainBlacklistFilter.filter("http://www.foobar.be")); - Assert.assertNotNull(domainBlacklistFilter.filter("http://www.adobe.com")); + conf.set("urlfilter.domaindenylist.file", domainDenylistFile); + DomainDenylistURLFilter domainDenylistFilter = new DomainDenylistURLFilter(); + domainDenylistFilter.setConf(conf); + Assert.assertNull(domainDenylistFilter.filter("http://lucene.apache.org")); + Assert.assertNull(domainDenylistFilter.filter("http://hadoop.apache.org")); + Assert.assertNull(domainDenylistFilter.filter("http://www.apache.org")); + Assert.assertNotNull(domainDenylistFilter.filter("http://www.google.com")); + Assert.assertNotNull(domainDenylistFilter.filter("http://mail.yahoo.com")); + Assert.assertNull(domainDenylistFilter.filter("http://www.foobar.net")); + Assert.assertNull(domainDenylistFilter.filter("http://www.foobas.net")); + Assert.assertNull(domainDenylistFilter.filter("http://www.yahoo.com")); + Assert.assertNull(domainDenylistFilter.filter("http://www.foobar.be")); + Assert.assertNotNull(domainDenylistFilter.filter("http://www.adobe.com")); } } diff --git a/src/plugin/urlfilter-prefix/src/java/org/apache/nutch/urlfilter/prefix/PrefixURLFilter.java b/src/plugin/urlfilter-prefix/src/java/org/apache/nutch/urlfilter/prefix/PrefixURLFilter.java index 61c6f17..c54740a 100644 --- a/src/plugin/urlfilter-prefix/src/java/org/apache/nutch/urlfilter/prefix/PrefixURLFilter.java +++ b/src/plugin/urlfilter-prefix/src/java/org/apache/nutch/urlfilter/prefix/PrefixURLFilter.java @@ -139,8 +139,8 @@ public class PrefixURLFilter implements URLFilter { // precedence hierarchy for definition of filter rules // (first non-empty definition takes precedence): - // 1. string rules defined by `urlfilter.domainblacklist.rules` - // 2. rule file name defined by `urlfilter.domainblacklist.file` + // 1. string rules defined by `urlfilter.domaindenylist.rules` + // 2. rule file name defined by `urlfilter.domaindenylist.file` // 3. rule file name defined in plugin.xml (`attributeFile`) String file = conf.get("urlfilter.prefix.file", attributeFile); String stringRules = conf.get("urlfilter.prefix.rules"); diff --git a/src/plugin/urlfilter-suffix/src/java/org/apache/nutch/urlfilter/suffix/SuffixURLFilter.java b/src/plugin/urlfilter-suffix/src/java/org/apache/nutch/urlfilter/suffix/SuffixURLFilter.java index 3833f3c..a9c2023 100644 --- a/src/plugin/urlfilter-suffix/src/java/org/apache/nutch/urlfilter/suffix/SuffixURLFilter.java +++ b/src/plugin/urlfilter-suffix/src/java/org/apache/nutch/urlfilter/suffix/SuffixURLFilter.java @@ -273,8 +273,8 @@ public class SuffixURLFilter implements URLFilter { // precedence hierarchy for definition of filter rules // (first non-empty definition takes precedence): - // 1. string rules defined by `urlfilter.domainblacklist.rules` - // 2. rule file name defined by `urlfilter.domainblacklist.file` + // 1. string rules defined by `urlfilter.domaindenylist.rules` + // 2. rule file name defined by `urlfilter.domaindenylist.file` // 3. rule file name defined in plugin.xml (`attributeFile`) String file = conf.get("urlfilter.suffix.file", attributeFile); String stringRules = conf.get("urlfilter.suffix.rules");