Author: ab
Date: Mon May 15 05:14:36 2006
New Revision: 406625
URL: http://svn.apache.org/viewcvs?rev=406625&view=rev
Log:
Add a suffix-based URLFilter. Correct also extension IDs for other urlfilter
plugins, so that they can be active at the same time.
Added:
lucene/nutch/trunk/conf/suffix-urlfilter.txt (with props)
lucene/nutch/trunk/src/plugin/urlfilter-suffix/
lucene/nutch/trunk/src/plugin/urlfilter-suffix/build.xml (with props)
lucene/nutch/trunk/src/plugin/urlfilter-suffix/plugin.xml (with props)
lucene/nutch/trunk/src/plugin/urlfilter-suffix/src/
lucene/nutch/trunk/src/plugin/urlfilter-suffix/src/java/
lucene/nutch/trunk/src/plugin/urlfilter-suffix/src/java/org/
lucene/nutch/trunk/src/plugin/urlfilter-suffix/src/java/org/apache/
lucene/nutch/trunk/src/plugin/urlfilter-suffix/src/java/org/apache/nutch/
lucene/nutch/trunk/src/plugin/urlfilter-suffix/src/java/org/apache/nutch/urlfilter/
lucene/nutch/trunk/src/plugin/urlfilter-suffix/src/java/org/apache/nutch/urlfilter/suffix/
lucene/nutch/trunk/src/plugin/urlfilter-suffix/src/java/org/apache/nutch/urlfilter/suffix/SuffixURLFilter.java
(with props)
lucene/nutch/trunk/src/plugin/urlfilter-suffix/src/test/
lucene/nutch/trunk/src/plugin/urlfilter-suffix/src/test/org/
lucene/nutch/trunk/src/plugin/urlfilter-suffix/src/test/org/apache/
lucene/nutch/trunk/src/plugin/urlfilter-suffix/src/test/org/apache/nutch/
lucene/nutch/trunk/src/plugin/urlfilter-suffix/src/test/org/apache/nutch/urlfilter/
lucene/nutch/trunk/src/plugin/urlfilter-suffix/src/test/org/apache/nutch/urlfilter/suffix/
lucene/nutch/trunk/src/plugin/urlfilter-suffix/src/test/org/apache/nutch/urlfilter/suffix/TestSuffixURLFilter.java
(with props)
Modified:
lucene/nutch/trunk/conf/nutch-default.xml
lucene/nutch/trunk/src/plugin/build.xml
lucene/nutch/trunk/src/plugin/urlfilter-automaton/plugin.xml
lucene/nutch/trunk/src/plugin/urlfilter-prefix/plugin.xml
lucene/nutch/trunk/src/plugin/urlfilter-regex/plugin.xml
Modified: lucene/nutch/trunk/conf/nutch-default.xml
URL:
http://svn.apache.org/viewcvs/lucene/nutch/trunk/conf/nutch-default.xml?rev=406625&r1=406624&r2=406625&view=diff
==============================================================================
--- lucene/nutch/trunk/conf/nutch-default.xml (original)
+++ lucene/nutch/trunk/conf/nutch-default.xml Mon May 15 05:14:36 2006
@@ -628,6 +628,13 @@
</property>
<property>
+ <name>urlfilter.suffix.file</name>
+ <value>suffix-urlfilter.txt</value>
+ <description>Name of file on CLASSPATH containing url suffixes
+ used by urlfilter-suffix (SuffixURLFilter) plugin.</description>
+</property>
+
+<property>
<name>urlfilter.order</name>
<value></value>
<description>The order by which url filters are applied.
Added: lucene/nutch/trunk/conf/suffix-urlfilter.txt
URL:
http://svn.apache.org/viewcvs/lucene/nutch/trunk/conf/suffix-urlfilter.txt?rev=406625&view=auto
==============================================================================
--- lucene/nutch/trunk/conf/suffix-urlfilter.txt (added)
+++ lucene/nutch/trunk/conf/suffix-urlfilter.txt Mon May 15 05:14:36 2006
@@ -0,0 +1,11 @@
+# config file for urlfilter-suffix plugin
+
+# case-insensitive, allow unknown suffixes
++I
+
+# prohibit these
+.gif
+.jpg
+.jpeg
+.bmp
+.png
Propchange: lucene/nutch/trunk/conf/suffix-urlfilter.txt
------------------------------------------------------------------------------
svn:eol-style = native
Modified: lucene/nutch/trunk/src/plugin/build.xml
URL:
http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/plugin/build.xml?rev=406625&r1=406624&r2=406625&view=diff
==============================================================================
--- lucene/nutch/trunk/src/plugin/build.xml (original)
+++ lucene/nutch/trunk/src/plugin/build.xml Mon May 15 05:14:36 2006
@@ -56,6 +56,7 @@
<ant dir="urlfilter-automaton" target="deploy"/>
<ant dir="urlfilter-prefix" target="deploy"/>
<ant dir="urlfilter-regex" target="deploy"/>
+ <ant dir="urlfilter-suffix" target="deploy"/>
</target>
<!-- ====================================================== -->
@@ -81,6 +82,7 @@
<ant dir="parse-zip" target="test"/>
<ant dir="urlfilter-automaton" target="test"/>
<ant dir="urlfilter-regex" target="test"/>
+ <ant dir="urlfilter-suffix" target="test"/>
</parallel>
</target>
@@ -135,6 +137,7 @@
<ant dir="urlfilter-automaton" target="clean"/>
<ant dir="urlfilter-prefix" target="clean"/>
<ant dir="urlfilter-regex" target="clean"/>
+ <ant dir="urlfilter-suffix" target="clean"/>
</target>
</project>
Modified: lucene/nutch/trunk/src/plugin/urlfilter-automaton/plugin.xml
URL:
http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/plugin/urlfilter-automaton/plugin.xml?rev=406625&r1=406624&r2=406625&view=diff
==============================================================================
--- lucene/nutch/trunk/src/plugin/urlfilter-automaton/plugin.xml (original)
+++ lucene/nutch/trunk/src/plugin/urlfilter-automaton/plugin.xml Mon May 15
05:14:36 2006
@@ -18,7 +18,7 @@
<import plugin="lib-regex-filter"/>
</requires>
- <extension id="org.apache.nutch.net.urlfilter"
+ <extension id="org.apache.nutch.net.urlfilter.automaton"
name="Nutch Automaton URL Filter"
point="org.apache.nutch.net.URLFilter">
<implementation id="AutomatonURLFilter"
Modified: lucene/nutch/trunk/src/plugin/urlfilter-prefix/plugin.xml
URL:
http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/plugin/urlfilter-prefix/plugin.xml?rev=406625&r1=406624&r2=406625&view=diff
==============================================================================
--- lucene/nutch/trunk/src/plugin/urlfilter-prefix/plugin.xml (original)
+++ lucene/nutch/trunk/src/plugin/urlfilter-prefix/plugin.xml Mon May 15
05:14:36 2006
@@ -16,7 +16,7 @@
<import plugin="nutch-extensionpoints"/>
</requires>
- <extension id="org.apache.nutch.net.urlfilter"
+ <extension id="org.apache.nutch.net.urlfilter.prefix"
name="Nutch Prefix URL Filter"
point="org.apache.nutch.net.URLFilter">
<implementation id="PrefixURLFilter"
Modified: lucene/nutch/trunk/src/plugin/urlfilter-regex/plugin.xml
URL:
http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/plugin/urlfilter-regex/plugin.xml?rev=406625&r1=406624&r2=406625&view=diff
==============================================================================
--- lucene/nutch/trunk/src/plugin/urlfilter-regex/plugin.xml (original)
+++ lucene/nutch/trunk/src/plugin/urlfilter-regex/plugin.xml Mon May 15
05:14:36 2006
@@ -17,7 +17,7 @@
<import plugin="lib-regex-filter"/>
</requires>
- <extension id="org.apache.nutch.net.urlfilter"
+ <extension id="org.apache.nutch.net.urlfilter.regex"
name="Nutch Regex URL Filter"
point="org.apache.nutch.net.URLFilter">
<implementation id="RegexURLFilter"
Added: lucene/nutch/trunk/src/plugin/urlfilter-suffix/build.xml
URL:
http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/plugin/urlfilter-suffix/build.xml?rev=406625&view=auto
==============================================================================
--- lucene/nutch/trunk/src/plugin/urlfilter-suffix/build.xml (added)
+++ lucene/nutch/trunk/src/plugin/urlfilter-suffix/build.xml Mon May 15
05:14:36 2006
@@ -0,0 +1,7 @@
+<?xml version="1.0"?>
+
+<project name="urlfilter-suffix" default="jar-core">
+
+ <import file="../build-plugin.xml"/>
+
+</project>
Propchange: lucene/nutch/trunk/src/plugin/urlfilter-suffix/build.xml
------------------------------------------------------------------------------
svn:eol-style = native
Added: lucene/nutch/trunk/src/plugin/urlfilter-suffix/plugin.xml
URL:
http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/plugin/urlfilter-suffix/plugin.xml?rev=406625&view=auto
==============================================================================
--- lucene/nutch/trunk/src/plugin/urlfilter-suffix/plugin.xml (added)
+++ lucene/nutch/trunk/src/plugin/urlfilter-suffix/plugin.xml Mon May 15
05:14:36 2006
@@ -0,0 +1,32 @@
+<?xml version="1.0" encoding="UTF-8"?>
+
+<plugin
+ id="urlfilter-suffix"
+ name="Suffix URL Filter"
+ version="1.0.0"
+ provider-name="nutch.org">
+
+ <runtime>
+ <library name="urlfilter-suffix.jar">
+ <export name="*"/>
+ </library>
+ </runtime>
+
+ <requires>
+ <import plugin="nutch-extensionpoints"/>
+ </requires>
+
+ <extension id="org.apache.nutch.net.urlfilter.suffix"
+ name="Nutch Suffix URL Filter"
+ point="org.apache.nutch.net.URLFilter">
+ <implementation id="SuffixURLFilter"
+
class="org.apache.nutch.urlfilter.suffix.SuffixURLFilter"/>
+ <!-- by default, attribute "file" is undefined, to keep classic behavior.
+ <implementation id="SuffixURLFilter"
+ class="org.apache.nutch.net.SuffixURLFilter">
+ <parameter name="file" value="urlfilter-suffix.txt"/>
+ </implementation>
+ -->
+ </extension>
+
+</plugin>
Propchange: lucene/nutch/trunk/src/plugin/urlfilter-suffix/plugin.xml
------------------------------------------------------------------------------
svn:eol-style = native
Added:
lucene/nutch/trunk/src/plugin/urlfilter-suffix/src/java/org/apache/nutch/urlfilter/suffix/SuffixURLFilter.java
URL:
http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/plugin/urlfilter-suffix/src/java/org/apache/nutch/urlfilter/suffix/SuffixURLFilter.java?rev=406625&view=auto
==============================================================================
---
lucene/nutch/trunk/src/plugin/urlfilter-suffix/src/java/org/apache/nutch/urlfilter/suffix/SuffixURLFilter.java
(added)
+++
lucene/nutch/trunk/src/plugin/urlfilter-suffix/src/java/org/apache/nutch/urlfilter/suffix/SuffixURLFilter.java
Mon May 15 05:14:36 2006
@@ -0,0 +1,280 @@
+/**
+ * Copyright 2006 The Apache Software Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.nutch.urlfilter.suffix;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.util.LogFormatter;
+import org.apache.nutch.net.*;
+
+import org.apache.nutch.util.NutchConfiguration;
+import org.apache.nutch.util.SuffixStringMatcher;
+import org.apache.nutch.util.TrieStringMatcher;
+
+import org.apache.nutch.plugin.Extension;
+import org.apache.nutch.plugin.PluginRepository;
+
+import java.io.Reader;
+import java.io.FileReader;
+import java.io.BufferedReader;
+import java.io.InputStreamReader;
+import java.io.IOException;
+
+import java.util.List;
+import java.util.ArrayList;
+import java.util.logging.Logger;
+
+/**
+ * Filters URLs based on a file of URL suffixes. The file is named by
+ * <ol>
+ * <li>property "urlfilter.suffix.file" in ./conf/nutch-default.xml, and</li>
+ * <li>attribute "file" in plugin.xml of this plugin</li>
+ * </ol>
+ * Attribute "file" has higher precedence if defined. If the config file is
+ * missing, all URLs will be rejected.
+ *
+ * <p>This filter can be configured to work in one of two modes:
+ * <ul>
+ * <li><b>default to reject</b> ('-'): in this mode, only URLs that match
suffixes
+ * specified in the config file will be accepted, all other URLs will be
+ * rejected.</li>
+ * <li><b>default to accept</b> ('+'): in this mode, only URLs that match
suffixes
+ * specified in the config file will be rejected, all other URLs will be
+ * accepted.</li>
+ * </ul>
+ * <p>
+ * The format of this config file is one URL suffix per line, with no preceding
+ * whitespace. Order, in which suffixes are specified, doesn't matter. Blank
+ * lines and comments (#) are allowed.
+ * </p>
+ * <p>
+ * A single '+' or '-' sign not followed by any suffix must be used once, to
+ * signify the mode this plugin operates in. An optional single 'I' can be
appended,
+ * to signify that suffix matches should be case-insensitive. The default, if
+ * not specified, is to use case-sensitive matches, i.e. suffix '.JPG'
+ * does not match '.jpg'.
+ * </p>
+ * <p>
+ * NOTE: the format of this file is different from urlfilter-prefix, because
+ * that plugin doesn't support allowed/prohibited prefixes (only supports
+ * allowed prefixes). Please note that this plugin does not support regular
+ * expressions, it only accepts literal suffixes. I.e. a suffix "+*.jpg" is
most
+ * probably wrong, you should use "+.jpg" instead.
+ * </p>
+ * <h4>Example 1</h4>
+ * <p>
+ * The configuration shown below will accept all URLs with '.html' or '.htm'
+ * suffixes (case-sensitive - '.HTML' or '.HTM' will be rejected),
+ * and prohibit all other suffixes.
+ * <p>
+ *
+ * <pre>
+ * # this is a comment
+ *
+ * # prohibit all unknown, case-sensitive matching
+ * -
+ *
+ * # collect only HTML files.
+ * .html
+ * .htm
+ * </pre>
+ *
+ * </p>
+ * <h4>Example 2</h4>
+ * <p>
+ * The configuration shown below will accept all URLs except common graphical
+ * formats.
+ * <p>
+ *
+ * <pre>
+ * # this is a comment
+ *
+ * # allow all unknown, case-insensitive matching
+ * +I
+ *
+ * # prohibited suffixes
+ * .gif
+ * .png
+ * .jpg
+ * .jpeg
+ * .bmp
+ * </pre>
+ *
+ * </p>
+ * @author Andrzej Bialecki
+ */
+public class SuffixURLFilter implements URLFilter {
+
+ private static final Logger LOG =
LogFormatter.getLogger(SuffixURLFilter.class.getName());
+
+ // read in attribute "file" of this plugin.
+ private String attributeFile = null;
+
+ private SuffixStringMatcher suffixes;
+ private boolean modeAccept = false;
+
+ private boolean ignoreCase = false;
+
+ private Configuration conf;
+
+ public SuffixURLFilter() throws IOException {
+
+ }
+
+ public SuffixURLFilter(Reader reader) throws IOException {
+ readConfigurationFile(reader);
+ }
+
+ public String filter(String url) {
+ if (url == null) return null;
+ String _url;
+ if (ignoreCase)
+ _url = url.toLowerCase();
+ else _url = url;
+ String a = suffixes.shortestMatch(_url);
+ if (a == null) {
+ if (modeAccept) return url;
+ else return null;
+ } else {
+ if (modeAccept) return null;
+ else return url;
+ }
+ }
+
+ public void readConfigurationFile(Reader reader) throws IOException {
+
+ // handle missing config file
+ if (reader == null) {
+ LOG.warning("Missing urlfilter.suffix.file, all URLs will be rejected!");
+ suffixes = new SuffixStringMatcher(new String[0]);
+ modeAccept = false;
+ ignoreCase = false;
+ return;
+ }
+ BufferedReader in = new BufferedReader(reader);
+ List aSuffixes = new ArrayList();
+ boolean allow = false;
+ boolean ignore = false;
+ String line;
+
+ while ((line = in.readLine()) != null) {
+ if (line.length() == 0) continue;
+
+ char first = line.charAt(0);
+ switch (first) {
+ case ' ':
+ case '\n':
+ case '#': // skip blank & comment lines
+ break;
+ case '-':
+ allow = false;
+ if (line.length() > 1 && line.charAt(1) == 'I')
+ ignore = true;
+ break;
+ case '+':
+ allow = true;
+ if (line.length() > 1 && line.charAt(1) == 'I')
+ ignore = true;
+ break;
+ default:
+ aSuffixes.add(line);
+ }
+ }
+ if (ignore) {
+ for (int i = 0; i < aSuffixes.size(); i++) {
+ aSuffixes.set(i, ((String) aSuffixes.get(i)).toLowerCase());
+ }
+ }
+ suffixes = new SuffixStringMatcher(aSuffixes);
+ modeAccept = allow;
+ ignoreCase = ignore;
+ }
+
+ public static void main(String args[]) throws IOException {
+
+ SuffixURLFilter filter;
+ if (args.length >= 1)
+ filter = new SuffixURLFilter(new FileReader(args[0]));
+ else {
+ filter = new SuffixURLFilter();
+ filter.setConf(NutchConfiguration.create());
+ }
+
+ BufferedReader in = new BufferedReader(new InputStreamReader(System.in));
+ String line;
+ while ((line = in.readLine()) != null) {
+ String out = filter.filter(line);
+ if (out != null) {
+ System.out.println("ACCEPTED " + out);
+ } else {
+ System.out.println("REJECTED " + out);
+ }
+ }
+ }
+
+ public void setConf(Configuration conf) {
+ this.conf = conf;
+
+ String pluginName = "urlfilter-suffix";
+ Extension[] extensions =
PluginRepository.get(conf).getExtensionPoint(URLFilter.class.getName()).getExtensions();
+ for (int i = 0; i < extensions.length; i++) {
+ Extension extension = extensions[i];
+ if (extension.getDescriptor().getPluginId().equals(pluginName)) {
+ attributeFile = extension.getAttribute("file");
+ break;
+ }
+ }
+ if (attributeFile != null && attributeFile.trim().equals(""))
attributeFile = null;
+ if (attributeFile != null) {
+ LOG.info("Attribute \"file\" is defined for plugin " + pluginName + " as
" + attributeFile);
+ } else {
+ // LOG.warning("Attribute \"file\" is not defined in plugin.xml for
+ // plugin "+pluginName);
+ }
+
+ String file = conf.get("urlfilter.suffix.file");
+ // attribute "file" takes precedence if defined
+ if (attributeFile != null) file = attributeFile;
+ Reader reader = conf.getConfResourceAsReader(file);
+
+ try {
+ readConfigurationFile(reader);
+ } catch (IOException e) {
+ LOG.severe(e.getMessage());
+ throw new RuntimeException(e.getMessage(), e);
+ }
+ }
+
+ public Configuration getConf() {
+ return this.conf;
+ }
+
+ public boolean isModeAccept() {
+ return modeAccept;
+ }
+
+ public void setModeAccept(boolean modeAccept) {
+ this.modeAccept = modeAccept;
+ }
+
+ public boolean isIgnoreCase() {
+ return ignoreCase;
+ }
+
+ public void setIgnoreCase(boolean ignoreCase) {
+ this.ignoreCase = ignoreCase;
+ }
+}
\ No newline at end of file
Propchange:
lucene/nutch/trunk/src/plugin/urlfilter-suffix/src/java/org/apache/nutch/urlfilter/suffix/SuffixURLFilter.java
------------------------------------------------------------------------------
svn:eol-style = native
Added:
lucene/nutch/trunk/src/plugin/urlfilter-suffix/src/test/org/apache/nutch/urlfilter/suffix/TestSuffixURLFilter.java
URL:
http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/plugin/urlfilter-suffix/src/test/org/apache/nutch/urlfilter/suffix/TestSuffixURLFilter.java?rev=406625&view=auto
==============================================================================
---
lucene/nutch/trunk/src/plugin/urlfilter-suffix/src/test/org/apache/nutch/urlfilter/suffix/TestSuffixURLFilter.java
(added)
+++
lucene/nutch/trunk/src/plugin/urlfilter-suffix/src/test/org/apache/nutch/urlfilter/suffix/TestSuffixURLFilter.java
Mon May 15 05:14:36 2006
@@ -0,0 +1,134 @@
+/**
+ * Copyright 2006 The Apache Software Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.nutch.urlfilter.suffix;
+
+import java.io.IOException;
+import java.io.StringReader;
+
+import junit.framework.Test;
+import junit.framework.TestCase;
+import junit.framework.TestSuite;
+import junit.textui.TestRunner;
+
+
+/**
+ * JUnit test for <code>SuffixURLFilter</code>.
+ *
+ * @author Andrzej Bialecki
+ */
+public class TestSuffixURLFilter extends TestCase {
+ private static final String suffixes =
+ "# this is a comment\n" +
+ "\n" +
+ ".gif\n" +
+ ".jpg\n";
+
+ private static final String[] urls = new String[] {
+ "http://www.example.com/test.gif",
+ "http://www.example.com/TEST.GIF",
+ "http://www.example.com/test.jpg",
+ "http://www.example.com/test.JPG",
+ "http://www.example.com/test.html",
+ "http://www.example.com/test.HTML",
+ };
+
+ private static String[] urlsModeAccept = new String[] {
+ null,
+ urls[1],
+ null,
+ urls[3],
+ urls[4],
+ urls[5]
+ };
+
+ private static String[] urlsModeReject = new String[] {
+ urls[0],
+ null,
+ urls[2],
+ null,
+ null,
+ null
+ };
+
+ private static String[] urlsModeAcceptIgnoreCase = new String[] {
+ null,
+ null,
+ null,
+ null,
+ urls[4],
+ urls[5]
+ };
+
+ private static String[] urlsModeRejectIgnoreCase = new String[] {
+ urls[0],
+ urls[1],
+ urls[2],
+ urls[3],
+ null,
+ null
+ };
+
+ private SuffixURLFilter filter = null;
+
+ public TestSuffixURLFilter(String testName) {
+ super(testName);
+ }
+
+ public static Test suite() {
+ return new TestSuite(TestSuffixURLFilter.class);
+ }
+
+ public static void main(String[] args) {
+ TestRunner.run(suite());
+ }
+
+ public void setUp() throws IOException {
+ filter = new SuffixURLFilter(new StringReader(suffixes));
+ }
+
+ public void testModeAccept() {
+ filter.setIgnoreCase(false);
+ filter.setModeAccept(true);
+ for (int i = 0; i < urls.length; i++) {
+ assertTrue(urlsModeAccept[i] == filter.filter(urls[i]));
+ }
+ }
+
+ public void testModeReject() {
+ filter.setIgnoreCase(false);
+ filter.setModeAccept(false);
+ for (int i = 0; i < urls.length; i++) {
+ assertTrue(urlsModeReject[i] == filter.filter(urls[i]));
+ }
+ }
+
+ public void testModeAcceptIgnoreCase() {
+ filter.setIgnoreCase(true);
+ filter.setModeAccept(true);
+ for (int i = 0; i < urls.length; i++) {
+ assertTrue(urlsModeAcceptIgnoreCase[i] == filter.filter(urls[i]));
+ }
+ }
+
+ public void testModeRejectIgnoreCase() {
+ filter.setIgnoreCase(true);
+ filter.setModeAccept(false);
+ for (int i = 0; i < urls.length; i++) {
+ assertTrue(urlsModeRejectIgnoreCase[i] == filter.filter(urls[i]));
+ }
+ }
+
+}
Propchange:
lucene/nutch/trunk/src/plugin/urlfilter-suffix/src/test/org/apache/nutch/urlfilter/suffix/TestSuffixURLFilter.java
------------------------------------------------------------------------------
svn:eol-style = native