I didn't? I explicitly added it. I'll check again and commit if i have to. 
Thanks


On Thursday 23 February 2012 13:36:35 Lewis John Mcgibbney wrote:
> Hey Markus,
> 
> Great work with this one.
> 
> I notice that you did not add
> 
> <ant dir="urlfilter-domainblacklist" target="test" />
> 
> to nutch/trunk/src/plugin/build.xml
> 
> Lewis
> 
> On Thu, Feb 23, 2012 at 12:32 PM, <[email protected]> wrote:
> > Author: markus
> > Date: Thu Feb 23 12:32:49 2012
> > New Revision: 1292764
> > 
> > URL: http://svn.apache.org/viewvc?rev=1292764&view=rev
> > Log:
> > NUTCH-1210 Domain Blacklist Filter
> > 
> > Added:
> >    nutch/trunk/conf/domainblacklist-urlfilter.txt
> >    nutch/trunk/src/plugin/urlfilter-domainblacklist/
> >    nutch/trunk/src/plugin/urlfilter-domainblacklist/build.xml
> >    nutch/trunk/src/plugin/urlfilter-domainblacklist/data/
> >    nutch/trunk/src/plugin/urlfilter-domainblacklist/data/hosts.txt
> >    nutch/trunk/src/plugin/urlfilter-domainblacklist/ivy.xml
> >    nutch/trunk/src/plugin/urlfilter-domainblacklist/plugin.xml
> >    nutch/trunk/src/plugin/urlfilter-domainblacklist/src/
> >    nutch/trunk/src/plugin/urlfilter-domainblacklist/src/java/
> >    nutch/trunk/src/plugin/urlfilter-domainblacklist/src/java/org/
> >    nutch/trunk/src/plugin/urlfilter-domainblacklist/src/java/org/apache/
> >  
> >  nutch/trunk/src/plugin/urlfilter-domainblacklist/src/java/org/apache/nut
> >  ch/
> >  
> >  nutch/trunk/src/plugin/urlfilter-domainblacklist/src/java/org/apache/nut
> >  ch/urlfilter/
> >  
> >  nutch/trunk/src/plugin/urlfilter-domainblacklist/src/java/org/apache/nut
> >  ch/urlfilter/domainblacklist/
> >  
> >  nutch/trunk/src/plugin/urlfilter-domainblacklist/src/java/org/apache/nut
> >  ch/urlfilter/domainblacklist/DomainBlacklistURLFilter.java
> >  
> >    nutch/trunk/src/plugin/urlfilter-domainblacklist/src/test/
> >    nutch/trunk/src/plugin/urlfilter-domainblacklist/src/test/org/
> >    nutch/trunk/src/plugin/urlfilter-domainblacklist/src/test/org/apache/
> >  
> >  nutch/trunk/src/plugin/urlfilter-domainblacklist/src/test/org/apache/nut
> >  ch/
> >  
> >  nutch/trunk/src/plugin/urlfilter-domainblacklist/src/test/org/apache/nut
> >  ch/urlfilter/
> >  
> >  nutch/trunk/src/plugin/urlfilter-domainblacklist/src/test/org/apache/nut
> >  ch/urlfilter/domainblacklist/
> >  
> >  nutch/trunk/src/plugin/urlfilter-domainblacklist/src/test/org/apache/nut
> >  ch/urlfilter/domainblacklist/TestDomainBlacklistURLFilter.java
> > 
> > Modified:
> >    nutch/trunk/CHANGES.txt
> >    nutch/trunk/src/plugin/build.xml
> > 
> > Modified: nutch/trunk/CHANGES.txt
> > URL:
> > http://svn.apache.org/viewvc/nutch/trunk/CHANGES.txt?rev=1292764&r1=12927
> > 63&r2=1292764&view=diff
> > 
> > =========================================================================
> > ===== --- nutch/trunk/CHANGES.txt (original)
> > +++ nutch/trunk/CHANGES.txt Thu Feb 23 12:32:49 2012
> > @@ -1,5 +1,7 @@
> > 
> >  Nutch Change Log
> > 
> > +* NUTCH-1210 DomainBlacklistFilter (markus)
> > +
> > 
> >  * NUTCH-965 Skip parsing for truncated documents (alexis, lewismc,
> >  ferdy)
> >  
> >  * NUTCH-1193 Incorrect url transform to lowercase: parameter solr
> > 
> > (Eduardo dos Santos Leggiero via lewismc)
> > 
> > Added: nutch/trunk/conf/domainblacklist-urlfilter.txt
> > URL:
> > http://svn.apache.org/viewvc/nutch/trunk/conf/domainblacklist-urlfilter.t
> > xt?rev=1292764&view=auto
> > 
> > =========================================================================
> > ===== --- nutch/trunk/conf/domainblacklist-urlfilter.txt (added)
> > +++ nutch/trunk/conf/domainblacklist-urlfilter.txt Thu Feb 23 12:32:49
> > 2012 @@ -0,0 +1,16 @@
> > +# Licensed to the Apache Software Foundation (ASF) under one or more
> > +# contributor license agreements.  See the NOTICE file distributed with
> > +# this work for additional information regarding copyright ownership.
> > +# The ASF licenses this file to You under the Apache License, Version
> > 2.0 +# (the "License"); you may not use this file except in compliance
> > with +# the License.  You may obtain a copy of the License at
> > +#
> > +#     http://www.apache.org/licenses/LICENSE-2.0
> > +#
> > +# Unless required by applicable law or agreed to in writing, software
> > +# distributed under the License is distributed on an "AS IS" BASIS,
> > +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
> > implied. +# See the License for the specific language governing
> > permissions and +# limitations under the License.
> > +
> > +# config file for urlfilter-domainblacklist plugin
> > 
> > Modified: nutch/trunk/src/plugin/build.xml
> > URL:
> > http://svn.apache.org/viewvc/nutch/trunk/src/plugin/build.xml?rev=1292764
> > &r1=1292763&r2=1292764&view=diff
> > 
> > =========================================================================
> > ===== --- nutch/trunk/src/plugin/build.xml (original)
> > +++ nutch/trunk/src/plugin/build.xml Thu Feb 23 12:32:49 2012
> > @@ -57,6 +57,7 @@
> > 
> >      <ant dir="tld" target="deploy"/>
> >      <ant dir="urlfilter-automaton" target="deploy"/>
> >      <ant dir="urlfilter-domain" target="deploy" />
> > 
> > +     <ant dir="urlfilter-domainblacklist" target="deploy" />
> > 
> >      <ant dir="urlfilter-prefix" target="deploy"/>
> >      <ant dir="urlfilter-regex" target="deploy"/>
> >      <ant dir="urlfilter-suffix" target="deploy"/>
> > 
> > @@ -132,6 +133,7 @@
> > 
> >     <ant dir="tld" target="clean"/>
> >     <ant dir="urlfilter-automaton" target="clean"/>
> >     <ant dir="urlfilter-domain" target="clean" />
> > 
> > +    <ant dir="urlfilter-domainblacklist" target="clean" />
> > 
> >     <ant dir="urlfilter-prefix" target="clean"/>
> >     <ant dir="urlfilter-regex" target="clean"/>
> >     <ant dir="urlfilter-suffix" target="clean"/>
> > 
> > Added: nutch/trunk/src/plugin/urlfilter-domainblacklist/build.xml
> > URL:
> > http://svn.apache.org/viewvc/nutch/trunk/src/plugin/urlfilter-domainblack
> > list/build.xml?rev=1292764&view=auto
> > 
> > =========================================================================
> > ===== --- nutch/trunk/src/plugin/urlfilter-domainblacklist/build.xml
> > (added) +++ nutch/trunk/src/plugin/urlfilter-domainblacklist/build.xml
> > Thu Feb 23 12:32:49 2012
> > @@ -0,0 +1,28 @@
> > +<?xml version="1.0"?>
> > +<!--
> > + Licensed to the Apache Software Foundation (ASF) under one or more
> > + contributor license agreements.  See the NOTICE file distributed with
> > + this work for additional information regarding copyright ownership.
> > + The ASF licenses this file to You under the Apache License, Version 2.0
> > + (the "License"); you may not use this file except in compliance with
> > + the License.  You may obtain a copy of the License at
> > +
> > +     http://www.apache.org/licenses/LICENSE-2.0
> > +
> > + Unless required by applicable law or agreed to in writing, software
> > + distributed under the License is distributed on an "AS IS" BASIS,
> > + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
> > implied. + See the License for the specific language governing
> > permissions and + limitations under the License.
> > +-->
> > +<project name="urlfilter-domainblacklist" default="jar-core">
> > +
> > +  <import file="../build-plugin.xml"/>
> > +
> > +  <!-- for junit test -->
> > +  <mkdir dir="${build.test}/data"/>
> > +  <copy todir="${build.test}/data">
> > +    <fileset dir="data" />
> > +  </copy>
> > +
> > +</project>
> > 
> > Added: nutch/trunk/src/plugin/urlfilter-domainblacklist/data/hosts.txt
> > URL:
> > http://svn.apache.org/viewvc/nutch/trunk/src/plugin/urlfilter-domainblack
> > list/data/hosts.txt?rev=1292764&view=auto
> > 
> > =========================================================================
> > ===== --- nutch/trunk/src/plugin/urlfilter-domainblacklist/data/hosts.txt
> > (added) +++
> > nutch/trunk/src/plugin/urlfilter-domainblacklist/data/hosts.txt Thu Feb
> > 23 12:32:49 2012
> > @@ -0,0 +1,5 @@
> > +# comments start with the pound sign
> > +net
> > +apache.org
> > +be
> > +www.yahoo.com
> > \ No newline at end of file
> > 
> > Added: nutch/trunk/src/plugin/urlfilter-domainblacklist/ivy.xml
> > URL:
> > http://svn.apache.org/viewvc/nutch/trunk/src/plugin/urlfilter-domainblack
> > list/ivy.xml?rev=1292764&view=auto
> > 
> > =========================================================================
> > ===== --- nutch/trunk/src/plugin/urlfilter-domainblacklist/ivy.xml
> > (added) +++ nutch/trunk/src/plugin/urlfilter-domainblacklist/ivy.xml Thu
> > Feb 23 12:32:49 2012
> > @@ -0,0 +1,41 @@
> > +<?xml version="1.0" ?>
> > +
> > +<!--
> > +   Licensed to the Apache Software Foundation (ASF) under one or more
> > +   contributor license agreements.  See the NOTICE file distributed with
> > +   this work for additional information regarding copyright ownership.
> > +   The ASF licenses this file to You under the Apache License, Version
> > 2.0 +   (the "License"); you may not use this file except in compliance
> > with +   the License.  You may obtain a copy of the License at
> > +
> > +       http://www.apache.org/licenses/LICENSE-2.0
> > +
> > +   Unless required by applicable law or agreed to in writing, software
> > +   distributed under the License is distributed on an "AS IS" BASIS,
> > +   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
> > implied.
> > +   See the License for the specific language governing permissions and
> > +   limitations under the License.
> > +-->
> > +
> > +<ivy-module version="1.0">
> > +  <info organisation="org.apache.nutch" module="${ant.project.name}">
> > +    <license name="Apache 2.0"/>
> > +    <ivyauthor name="Apache Nutch Team" url="http://nutch.apache.org"/>
> > +    <description>
> > +        Apache Nutch
> > +    </description>
> > +  </info>
> > +
> > +  <configurations>
> > +    <include file="../../../ivy/ivy-configurations.xml"/>
> > +  </configurations>
> > +
> > +  <publications>
> > +    <!--get the artifact from our module name-->
> > +    <artifact conf="master"/>
> > +  </publications>
> > +
> > +  <dependencies>
> > +  </dependencies>
> > +
> > +</ivy-module>
> > 
> > Added: nutch/trunk/src/plugin/urlfilter-domainblacklist/plugin.xml
> > URL:
> > http://svn.apache.org/viewvc/nutch/trunk/src/plugin/urlfilter-domainblack
> > list/plugin.xml?rev=1292764&view=auto
> > 
> > =========================================================================
> > ===== --- nutch/trunk/src/plugin/urlfilter-domainblacklist/plugin.xml
> > (added) +++ nutch/trunk/src/plugin/urlfilter-domainblacklist/plugin.xml
> > Thu Feb 23 12:32:49 2012
> > @@ -0,0 +1,43 @@
> > +<?xml version="1.0" encoding="UTF-8"?>
> > +<!--
> > + Licensed to the Apache Software Foundation (ASF) under one or more
> > + contributor license agreements.  See the NOTICE file distributed with
> > + this work for additional information regarding copyright ownership.
> > + The ASF licenses this file to You under the Apache License, Version 2.0
> > + (the "License"); you may not use this file except in compliance with
> > + the License.  You may obtain a copy of the License at
> > +
> > +     http://www.apache.org/licenses/LICENSE-2.0
> > +
> > + Unless required by applicable law or agreed to in writing, software
> > + distributed under the License is distributed on an "AS IS" BASIS,
> > + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
> > implied. + See the License for the specific language governing
> > permissions and + limitations under the License.
> > +-->
> > +<plugin
> > +   id="urlfilter-domainblacklist"
> > +   name="Domain Blacklist URL Filter"
> > +   version="1.0.0"
> > +   provider-name="nutch.org">
> > +
> > +   <runtime>
> > +      <library name="urlfilter-domainblacklist.jar">
> > +         <export name="*"/>
> > +      </library>
> > +   </runtime>
> > +
> > +   <requires>
> > +      <import plugin="nutch-extensionpoints"/>
> > +   </requires>
> > +
> > +   <extension id="org.apache.nutch.net.urlfilter.domainblacklist"
> > +              name="Nutch Domain Blacklist URL Filter"
> > +              point="org.apache.nutch.net.URLFilter">
> > +      <implementation id="DomainBlacklistURLFilter"
> > +
> > 
> >  class="org.apache.nutch.urlfilter.domainblacklist.DomainBlacklistURLFilt
> >  er">
> > 
> > +        <parameter name="file" value="domainblacklist-urlfilter.txt"/>
> > +      </implementation>
> > +   </extension>
> > +
> > +</plugin>
> > 
> > Added:
> > nutch/trunk/src/plugin/urlfilter-domainblacklist/src/java/org/apache/nutc
> > h/urlfilter/domainblacklist/DomainBlacklistURLFilter.java URL:
> > http://svn.apache.org/viewvc/nutch/trunk/src/plugin/urlfilter-domainblack
> > list/src/java/org/apache/nutch/urlfilter/domainblacklist/DomainBlacklistU
> > RLFilter.java?rev=1292764&view=auto
> > 
> > =========================================================================
> > ===== ---
> > nutch/trunk/src/plugin/urlfilter-domainblacklist/src/java/org/apache/nutc
> > h/urlfilter/domainblacklist/DomainBlacklistURLFilter.java (added)
> > +++
> > nutch/trunk/src/plugin/urlfilter-domainblacklist/src/java/org/apache/nutc
> > h/urlfilter/domainblacklist/DomainBlacklistURLFilter.java Thu Feb 23
> > 12:32:49 2012
> > @@ -0,0 +1,203 @@
> > +/*
> > + * Licensed to the Apache Software Foundation (ASF) under one or more
> > + * contributor license agreements.  See the NOTICE file distributed with
> > + * this work for additional information regarding copyright ownership.
> > + * The ASF licenses this file to You under the Apache License, Version
> > 2.0 + * (the "License"); you may not use this file except in compliance
> > with + * the License.  You may obtain a copy of the License at
> > + *
> > + *     http://www.apache.org/licenses/LICENSE-2.0
> > + *
> > + * Unless required by applicable law or agreed to in writing, software
> > + * distributed under the License is distributed on an "AS IS" BASIS,
> > + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
> > implied.
> > + * See the License for the specific language governing permissions and
> > + * limitations under the License.
> > + */
> > +package org.apache.nutch.urlfilter.domainblacklist;
> > +
> > +import java.io.BufferedReader;
> > +import java.io.FileReader;
> > +import java.io.IOException;
> > +import java.io.Reader;
> > +import java.io.StringReader;
> > +import java.util.LinkedHashSet;
> > +import java.util.Set;
> > +
> > +import org.apache.commons.lang.StringUtils;
> > +import org.slf4j.Logger;
> > +import org.slf4j.LoggerFactory;
> > +import org.apache.hadoop.conf.Configuration;
> > +import org.apache.nutch.net.URLFilter;
> > +import org.apache.nutch.plugin.Extension;
> > +import org.apache.nutch.plugin.PluginRepository;
> > +import org.apache.nutch.util.URLUtil;
> > +import org.apache.nutch.util.domain.DomainSuffix;
> > +
> > +/**
> > + * <p>Filters URLs based on a file containing domain suffixes, domain
> > names, and
> > + * hostnames. A url that matches one of the suffixes, domains, or hosts
> > + * present in the file is filtered out.</p>
> > + *
> > + * <p>Urls are checked in order of domain suffix, domain name, and
> > hostname
> > + * against entries in the domain file. The domain file would be setup as
> > follows
> > + * with one entry per line:
> > + *
> > + * <pre> com apache.org www.apache.org </pre>
> > + *
> > + * <p>The first line is an example of a filter that would allow all .com
> > + * domains. The second line allows all urls from apache.org and all of
> > its
> > + * subdomains such as lucene.apache.org and hadoop.apache.org. The third
> > line
> > + * would allow only urls from www.apache.org. There is no specific
> > ordering to
> > + * entries. The entries are from more general to more specific with the
> > more
> > + * general overridding the more specific.</p>
> > + *
> > + * The domain file defaults to domainblacklist-urlfilter.txt in the
> > classpath but can be
> > + * overridden using the:
> > + *
> > + * <ul> <ol>property "urlfilter.domainblacklist.file" in
> > ./conf/nutch-*.xml, and</ol>
> > + * <ol>attribute "file" in plugin.xml of this plugin</ol> </ul>
> > + *
> > + * the attribute "file" has higher precedence if defined.
> > + */
> > +public class DomainBlacklistURLFilter
> > +  implements URLFilter {
> > +
> > +  private static final Logger LOG =
> > LoggerFactory.getLogger(DomainBlacklistURLFilter.class);
> > +
> > +  // read in attribute "file" of this plugin.
> > +  private static String attributeFile = null;
> > +  private Configuration conf;
> > +  private String domainFile = null;
> > +  private Set<String> domainSet = new LinkedHashSet<String>();
> > +
> > +  private void readConfiguration(Reader configReader)
> > +    throws IOException {
> > +
> > +    // read the configuration file, line by line
> > +    BufferedReader reader = new BufferedReader(configReader);
> > +    String line = null;
> > +    while ((line = reader.readLine()) != null) {
> > +      if (StringUtils.isNotBlank(line) && !line.startsWith("#")) {
> > +        // add non-blank lines and non-commented lines
> > +        domainSet.add(StringUtils.lowerCase(line));
> > +      }
> > +    }
> > +  }
> > +
> > +  /**
> > +   * Default constructor.
> > +   */
> > +  public DomainBlacklistURLFilter() {
> > +
> > +  }
> > +
> > +  /**
> > +   * Constructor that specifies the domain file to use.
> > +   *
> > +   * @param domainFile The domain file, overrides
> > domainblacklist-urlfilter.text default.
> > +   *
> > +   * @throws IOException
> > +   */
> > +  public DomainBlacklistURLFilter(String domainFile) {
> > +    this.domainFile = domainFile;
> > +  }
> > +
> > +  /**
> > +   * Sets the configuration.
> > +   */
> > +  public void setConf(Configuration conf) {
> > +    this.conf = conf;
> > +
> > +    // get the extensions for domain urlfilter
> > +    String pluginName = "urlfilter-domainblacklist";
> > +    Extension[] extensions =
> > PluginRepository.get(conf).getExtensionPoint( +     
> > URLFilter.class.getName()).getExtensions();
> > +    for (int i = 0; i < extensions.length; i++) {
> > +      Extension extension = extensions[i];
> > +      if (extension.getDescriptor().getPluginId().equals(pluginName)) {
> > +        attributeFile = extension.getAttribute("file");
> > +        break;
> > +      }
> > +    }
> > +
> > +    // handle blank non empty input
> > +    if (attributeFile != null && attributeFile.trim().equals("")) {
> > +      attributeFile = null;
> > +    }
> > +
> > +    if (attributeFile != null) {
> > +      if (LOG.isInfoEnabled()) {
> > +        LOG.info("Attribute \"file\" is defined for plugin " +
> > pluginName +          + " as " + attributeFile);
> > +      }
> > +    }
> > +    else {
> > +      if (LOG.isWarnEnabled()) {
> > +        LOG.warn("Attribute \"file\" is not defined in plugin.xml for
> > plugin "
> > +          + pluginName);
> > +      }
> > +    }
> > +
> > +    // domain file and attribute "file" take precedence if defined
> > +    String file = conf.get("urlfilter.domainblacklist.file");
> > +    String stringRules = conf.get("urlfilter.domainblacklist.rules");
> > +    if (domainFile != null) {
> > +      file = domainFile;
> > +    }
> > +    else if (attributeFile != null) {
> > +      file = attributeFile;
> > +    }
> > +    Reader reader = null;
> > +    if (stringRules != null) { // takes precedence over files
> > +      reader = new StringReader(stringRules);
> > +    } else {
> > +      reader = conf.getConfResourceAsReader(file);
> > +    }
> > +    try {
> > +      if (reader == null) {
> > +        reader = new FileReader(file);
> > +      }
> > +      readConfiguration(reader);
> > +    }
> > +    catch (IOException e) {
> > +     
> > LOG.error(org.apache.hadoop.util.StringUtils.stringifyException(e)); +  
> >  }
> > +  }
> > +
> > +  public Configuration getConf() {
> > +    return this.conf;
> > +  }
> > +
> > +  public String filter(String url) {
> > +
> > +    try {
> > +
> > +      // match for suffix, domain, and host in that order.  more general
> > will
> > +      // override more specific
> > +      String domain = URLUtil.getDomainName(url).toLowerCase().trim();
> > +      String host = URLUtil.getHost(url);
> > +      String suffix = null;
> > +      DomainSuffix domainSuffix = URLUtil.getDomainSuffix(url);
> > +      if (domainSuffix != null) {
> > +        suffix = domainSuffix.getDomain();
> > +      }
> > +
> > +      if (domainSet.contains(suffix) || domainSet.contains(domain)
> > +        || domainSet.contains(host)) {
> > +        // Matches, filter!
> > +        return null;
> > +      }
> > +
> > +      // doesn't match, allow
> > +      return url;
> > +    }
> > +    catch (Exception e) {
> > +
> > +      // if an error happens, allow the url to pass
> > +      LOG.error("Could not apply filter on url: " + url + "\n"
> > +        + org.apache.hadoop.util.StringUtils.stringifyException(e));
> > +      return null;
> > +    }
> > +  }
> > +}
> > 
> > Added:
> > nutch/trunk/src/plugin/urlfilter-domainblacklist/src/test/org/apache/nutc
> > h/urlfilter/domainblacklist/TestDomainBlacklistURLFilter.java URL:
> > http://svn.apache.org/viewvc/nutch/trunk/src/plugin/urlfilter-domainblack
> > list/src/test/org/apache/nutch/urlfilter/domainblacklist/TestDomainBlackl
> > istURLFilter.java?rev=1292764&view=auto
> > 
> > =========================================================================
> > ===== ---
> > nutch/trunk/src/plugin/urlfilter-domainblacklist/src/test/org/apache/nutc
> > h/urlfilter/domainblacklist/TestDomainBlacklistURLFilter.java (added)
> > +++
> > nutch/trunk/src/plugin/urlfilter-domainblacklist/src/test/org/apache/nutc
> > h/urlfilter/domainblacklist/TestDomainBlacklistURLFilter.java Thu Feb 23
> > 12:32:49 2012
> > @@ -0,0 +1,57 @@
> > +/*
> > + * Licensed to the Apache Software Foundation (ASF) under one or more
> > + * contributor license agreements.  See the NOTICE file distributed with
> > + * this work for additional information regarding copyright ownership.
> > + * The ASF licenses this file to You under the Apache License, Version
> > 2.0 + * (the "License"); you may not use this file except in compliance
> > with + * the License.  You may obtain a copy of the License at
> > + *
> > + *     http://www.apache.org/licenses/LICENSE-2.0
> > + *
> > + * Unless required by applicable law or agreed to in writing, software
> > + * distributed under the License is distributed on an "AS IS" BASIS,
> > + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
> > implied.
> > + * See the License for the specific language governing permissions and
> > + * limitations under the License.
> > + */
> > +package org.apache.nutch.urlfilter.domainblacklist;
> > +
> > +import junit.framework.TestCase;
> > +
> > +import org.slf4j.Logger;
> > +import org.slf4j.LoggerFactory;
> > +import org.apache.hadoop.conf.Configuration;
> > +import org.apache.nutch.util.NutchConfiguration;
> > +
> > +public class TestDomainBlacklistURLFilter
> > +  extends TestCase {
> > +
> > +  protected static final Logger LOG =
> > LoggerFactory.getLogger(TestDomainBlacklistURLFilter.class);
> > +
> > +  private final static String SEPARATOR =
> > System.getProperty("file.separator");
> > +  private final static String SAMPLES = System.getProperty("test.data",
> > ".");
> > +
> > +  public TestDomainBlacklistURLFilter(String testName) {
> > +    super(testName);
> > +  }
> > +
> > +  public void testFilter()
> > +    throws Exception {
> > +
> > +    String domainBlacklistFile = SAMPLES + SEPARATOR + "hosts.txt";
> > +    Configuration conf = NutchConfiguration.create();
> > +    DomainBlacklistURLFilter domainBlacklistFilter = new
> > DomainBlacklistURLFilter(domainBlacklistFile);
> > +    domainBlacklistFilter.setConf(conf);
> > +   
> > assertNull(domainBlacklistFilter.filter("http://lucene.apache.org";)); + 
> >   assertNull(domainBlacklistFilter.filter("http://hadoop.apache.org";));
> > +    assertNull(domainBlacklistFilter.filter("http://www.apache.org";));
> > +   
> > assertNotNull(domainBlacklistFilter.filter("http://www.google.com";)); + 
> >   assertNotNull(domainBlacklistFilter.filter("http://mail.yahoo.com";));
> > +    assertNull(domainBlacklistFilter.filter("http://www.foobar.net";));
> > +    assertNull(domainBlacklistFilter.filter("http://www.foobas.net";));
> > +    assertNull(domainBlacklistFilter.filter("http://www.yahoo.com";)); +
> >    assertNull(domainBlacklistFilter.filter("http://www.foobar.be";)); +  
> >  assertNotNull(domainBlacklistFilter.filter("http://www.adobe.com";)); + 
> > }
> > +
> > +}

-- 
Markus Jelsma - CTO - Openindex

Reply via email to