http://git-wip-us.apache.org/repos/asf/nutch/blob/20d28406/nutch-plugins/parse-tika/src/test/org/apache/nutch/tika/TestRobotsMetaProcessor.java
----------------------------------------------------------------------
diff --git 
a/nutch-plugins/parse-tika/src/test/org/apache/nutch/tika/TestRobotsMetaProcessor.java
 
b/nutch-plugins/parse-tika/src/test/org/apache/nutch/tika/TestRobotsMetaProcessor.java
deleted file mode 100644
index 4224f93..0000000
--- 
a/nutch-plugins/parse-tika/src/test/org/apache/nutch/tika/TestRobotsMetaProcessor.java
+++ /dev/null
@@ -1,156 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.nutch.tika;
-
-import org.apache.nutch.parse.HTMLMetaTags;
-import org.apache.nutch.parse.tika.HTMLMetaProcessor;
-
-import java.io.ByteArrayInputStream;
-import java.net.URL;
-
-import org.xml.sax.*;
-import org.w3c.dom.*;
-import org.apache.html.dom.*;
-import org.cyberneko.html.parsers.DOMFragmentParser;
-import org.junit.Assert;
-import org.junit.Test;
-
-/** Unit tests for HTMLMetaProcessor. */
-public class TestRobotsMetaProcessor {
-
-  /*
-   * 
-   * some sample tags:
-   * 
-   * <meta name="robots" content="index,follow"> <meta name="robots"
-   * content="noindex,follow"> <meta name="robots" content="index,nofollow">
-   * <meta name="robots" content="noindex,nofollow">
-   * 
-   * <META HTTP-EQUIV="Pragma" CONTENT="no-cache">
-   */
-
-  public static String[] tests = {
-      "<html><head><title>test page</title>"
-          + "<META NAME=\"ROBOTS\" CONTENT=\"NONE\"> "
-          + "<META HTTP-EQUIV=\"PRAGMA\" CONTENT=\"NO-CACHE\"> "
-          + "</head><body>" + " some text" + "</body></html>",
-
-      "<html><head><title>test page</title>"
-          + "<meta name=\"robots\" content=\"all\"> "
-          + "<meta http-equiv=\"pragma\" content=\"no-cache\"> "
-          + "</head><body>" + " some text" + "</body></html>",
-
-      "<html><head><title>test page</title>"
-          + "<MeTa NaMe=\"RoBoTs\" CoNtEnT=\"nOnE\"> "
-          + "<MeTa HtTp-EqUiV=\"pRaGmA\" cOnTeNt=\"No-CaChE\"> "
-          + "</head><body>" + " some text" + "</body></html>",
-
-      "<html><head><title>test page</title>"
-          + "<meta name=\"robots\" content=\"none\"> " + "</head><body>"
-          + " some text" + "</body></html>",
-
-      "<html><head><title>test page</title>"
-          + "<meta name=\"robots\" content=\"noindex,nofollow\"> "
-          + "</head><body>" + " some text" + "</body></html>",
-
-      "<html><head><title>test page</title>"
-          + "<meta name=\"robots\" content=\"noindex,follow\"> "
-          + "</head><body>" + " some text" + "</body></html>",
-
-      "<html><head><title>test page</title>"
-          + "<meta name=\"robots\" content=\"index,nofollow\"> "
-          + "</head><body>" + " some text" + "</body></html>",
-
-      "<html><head><title>test page</title>"
-          + "<meta name=\"robots\" content=\"index,follow\"> "
-          + "<base href=\"http://www.nutch.org/\";>" + "</head><body>"
-          + " some text" + "</body></html>",
-
-      "<html><head><title>test page</title>" + "<meta name=\"robots\"> "
-          + "<base href=\"http://www.nutch.org/base/\";>" + "</head><body>"
-          + " some text" + "</body></html>",
-
-  };
-
-  public static final boolean[][] answers = { { true, true, true }, // NONE
-      { false, false, true }, // all
-      { true, true, true }, // nOnE
-      { true, true, false }, // none
-      { true, true, false }, // noindex,nofollow
-      { true, false, false }, // noindex,follow
-      { false, true, false }, // index,nofollow
-      { false, false, false }, // index,follow
-      { false, false, false }, // missing!
-  };
-
-  private URL[][] currURLsAndAnswers;
-
-  @Test
-  public void testRobotsMetaProcessor() {
-    DOMFragmentParser parser = new DOMFragmentParser();
-    ;
-
-    try {
-      currURLsAndAnswers = new URL[][] {
-          { new URL("http://www.nutch.org";), null },
-          { new URL("http://www.nutch.org";), null },
-          { new URL("http://www.nutch.org";), null },
-          { new URL("http://www.nutch.org";), null },
-          { new URL("http://www.nutch.org";), null },
-          { new URL("http://www.nutch.org";), null },
-          { new URL("http://www.nutch.org";), null },
-          { new URL("http://www.nutch.org/foo/";),
-              new URL("http://www.nutch.org/";) },
-          { new URL("http://www.nutch.org";),
-              new URL("http://www.nutch.org/base/";) } };
-    } catch (Exception e) {
-      Assert.assertTrue("couldn't make test URLs!", false);
-    }
-
-    for (int i = 0; i < tests.length; i++) {
-      byte[] bytes = tests[i].getBytes();
-
-      DocumentFragment node = new HTMLDocumentImpl().createDocumentFragment();
-
-      try {
-        parser.parse(new InputSource(new ByteArrayInputStream(bytes)), node);
-      } catch (Exception e) {
-        e.printStackTrace();
-      }
-
-      HTMLMetaTags robotsMeta = new HTMLMetaTags();
-      HTMLMetaProcessor.getMetaTags(robotsMeta, node, 
currURLsAndAnswers[i][0]);
-
-      Assert.assertTrue("got index wrong on test " + i,
-          robotsMeta.getNoIndex() == answers[i][0]);
-      Assert.assertTrue("got follow wrong on test " + i,
-          robotsMeta.getNoFollow() == answers[i][1]);
-      Assert.assertTrue("got cache wrong on test " + i,
-          robotsMeta.getNoCache() == answers[i][2]);
-      Assert
-          .assertTrue(
-              "got base href wrong on test " + i + " (got "
-                  + robotsMeta.getBaseHref() + ")",
-              ((robotsMeta.getBaseHref() == null) && (currURLsAndAnswers[i][1] 
== null))
-                  || ((robotsMeta.getBaseHref() != null) && robotsMeta
-                      .getBaseHref().equals(currURLsAndAnswers[i][1])));
-
-    }
-  }
-
-}

http://git-wip-us.apache.org/repos/asf/nutch/blob/20d28406/nutch-plugins/parse-zip/src/test/java/org/apache/nutch/parse/zip/TestZipParser.java
----------------------------------------------------------------------
diff --git 
a/nutch-plugins/parse-zip/src/test/java/org/apache/nutch/parse/zip/TestZipParser.java
 
b/nutch-plugins/parse-zip/src/test/java/org/apache/nutch/parse/zip/TestZipParser.java
new file mode 100644
index 0000000..17e386a
--- /dev/null
+++ 
b/nutch-plugins/parse-zip/src/test/java/org/apache/nutch/parse/zip/TestZipParser.java
@@ -0,0 +1,71 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.nutch.parse.zip;
+
+import org.apache.nutch.protocol.ProtocolFactory;
+import org.apache.nutch.protocol.Protocol;
+import org.apache.nutch.protocol.Content;
+import org.apache.nutch.protocol.ProtocolException;
+import org.apache.nutch.parse.Parse;
+import org.apache.nutch.parse.ParseUtil;
+import org.apache.nutch.parse.ParseException;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.nutch.util.NutchConfiguration;
+import org.apache.hadoop.io.Text;
+import org.apache.nutch.crawl.CrawlDatum;
+import org.junit.Assert;
+import org.junit.Test;
+
+/**
+ * Based on Unit tests for MSWordParser by John Xing
+ * 
+ * @author Rohit Kulkarni & Ashish Vaidya
+ */
+public class TestZipParser {
+
+  private String fileSeparator = System.getProperty("file.separator");
+  // This system property is defined in ./src/plugin/build-plugin.xml
+  private String sampleDir = System.getProperty("test.data", ".");
+
+  // Make sure sample files are copied to "test.data"
+
+  private String[] sampleFiles = { "test.zip" };
+
+  private String expectedText = "textfile.txt This is text file number 1 ";
+
+  @Test
+  public void testIt() throws ProtocolException, ParseException {
+    String urlString;
+    Protocol protocol;
+    Content content;
+    Parse parse;
+
+    Configuration conf = NutchConfiguration.create();
+    for (int i = 0; i < sampleFiles.length; i++) {
+      urlString = "file:" + sampleDir + fileSeparator + sampleFiles[i];
+
+      protocol = new ProtocolFactory(conf).getProtocol(urlString);
+      content = protocol.getProtocolOutput(new Text(urlString),
+          new CrawlDatum()).getContent();
+      parse = new ParseUtil(conf).parseByExtensionId("parse-zip", content).get(
+          content.getUrl());
+      Assert.assertTrue(parse.getText().equals(expectedText));
+    }
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/nutch/blob/20d28406/nutch-plugins/parse-zip/src/test/org/apache/nutch/parse/zip/TestZipParser.java
----------------------------------------------------------------------
diff --git 
a/nutch-plugins/parse-zip/src/test/org/apache/nutch/parse/zip/TestZipParser.java
 
b/nutch-plugins/parse-zip/src/test/org/apache/nutch/parse/zip/TestZipParser.java
deleted file mode 100644
index 17e386a..0000000
--- 
a/nutch-plugins/parse-zip/src/test/org/apache/nutch/parse/zip/TestZipParser.java
+++ /dev/null
@@ -1,71 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.nutch.parse.zip;
-
-import org.apache.nutch.protocol.ProtocolFactory;
-import org.apache.nutch.protocol.Protocol;
-import org.apache.nutch.protocol.Content;
-import org.apache.nutch.protocol.ProtocolException;
-import org.apache.nutch.parse.Parse;
-import org.apache.nutch.parse.ParseUtil;
-import org.apache.nutch.parse.ParseException;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.nutch.util.NutchConfiguration;
-import org.apache.hadoop.io.Text;
-import org.apache.nutch.crawl.CrawlDatum;
-import org.junit.Assert;
-import org.junit.Test;
-
-/**
- * Based on Unit tests for MSWordParser by John Xing
- * 
- * @author Rohit Kulkarni & Ashish Vaidya
- */
-public class TestZipParser {
-
-  private String fileSeparator = System.getProperty("file.separator");
-  // This system property is defined in ./src/plugin/build-plugin.xml
-  private String sampleDir = System.getProperty("test.data", ".");
-
-  // Make sure sample files are copied to "test.data"
-
-  private String[] sampleFiles = { "test.zip" };
-
-  private String expectedText = "textfile.txt This is text file number 1 ";
-
-  @Test
-  public void testIt() throws ProtocolException, ParseException {
-    String urlString;
-    Protocol protocol;
-    Content content;
-    Parse parse;
-
-    Configuration conf = NutchConfiguration.create();
-    for (int i = 0; i < sampleFiles.length; i++) {
-      urlString = "file:" + sampleDir + fileSeparator + sampleFiles[i];
-
-      protocol = new ProtocolFactory(conf).getProtocol(urlString);
-      content = protocol.getProtocolOutput(new Text(urlString),
-          new CrawlDatum()).getContent();
-      parse = new ParseUtil(conf).parseByExtensionId("parse-zip", content).get(
-          content.getUrl());
-      Assert.assertTrue(parse.getText().equals(expectedText));
-    }
-  }
-
-}

http://git-wip-us.apache.org/repos/asf/nutch/blob/20d28406/nutch-plugins/parsefilter-regex/src/test/java/org/apache/nutch/parsefilter/regex/TestRegexParseFilter.java
----------------------------------------------------------------------
diff --git 
a/nutch-plugins/parsefilter-regex/src/test/java/org/apache/nutch/parsefilter/regex/TestRegexParseFilter.java
 
b/nutch-plugins/parsefilter-regex/src/test/java/org/apache/nutch/parsefilter/regex/TestRegexParseFilter.java
new file mode 100644
index 0000000..9bd7149
--- /dev/null
+++ 
b/nutch-plugins/parsefilter-regex/src/test/java/org/apache/nutch/parsefilter/regex/TestRegexParseFilter.java
@@ -0,0 +1,77 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.nutch.parsefilter.regex;
+
+import java.net.MalformedURLException;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.nutch.metadata.Metadata;
+import org.apache.nutch.parse.Parse;
+import org.apache.nutch.parse.ParseData;
+import org.apache.nutch.parse.ParseImpl;
+import org.apache.nutch.parse.ParseResult;
+import org.apache.nutch.protocol.Content;
+import org.apache.nutch.util.NutchConfiguration;
+import junit.framework.TestCase;
+
+public class TestRegexParseFilter extends TestCase {
+
+  private final static String SEPARATOR = System.getProperty("file.separator");
+  private final static String SAMPLES = System.getProperty("test.data", ".");
+
+  public void testPositiveFilter() throws Exception {
+    Configuration conf = NutchConfiguration.create();
+
+    String file = SAMPLES + SEPARATOR + "regex-parsefilter.txt";
+    RegexParseFilter filter = new RegexParseFilter(file);
+    filter.setConf(conf);
+
+    String url = "http://nutch.apache.org/";;
+    String html = "<body><html><h1>nutch</h1><p>this is the extracted text 
blablabla</p></body></html>";
+    Content content = new Content(url, url, html.getBytes("UTF-8"), 
"text/html", new Metadata(), conf);
+    Parse parse = new ParseImpl("nutch this is the extracted text blablabla", 
new ParseData());
+    
+    ParseResult result = ParseResult.createParseResult(url, parse);
+    result = filter.filter(content, result, null, null);
+
+    Metadata meta = parse.getData().getParseMeta();
+    
+    assertEquals("true", meta.get("first"));
+    assertEquals("true", meta.get("second"));
+  }
+  
+  public void testNegativeFilter() throws Exception {
+    Configuration conf = NutchConfiguration.create();
+
+    String file = SAMPLES + SEPARATOR + "regex-parsefilter.txt";
+    RegexParseFilter filter = new RegexParseFilter(file);
+    filter.setConf(conf);
+
+    String url = "http://nutch.apache.org/";;
+    String html = "<body><html><h2>nutch</h2><p>this is the extracted text no 
bla</p></body></html>";
+    Content content = new Content(url, url, html.getBytes("UTF-8"), 
"text/html", new Metadata(), conf);
+    Parse parse = new ParseImpl("nutch this is the extracted text bla", new 
ParseData());
+    
+    ParseResult result = ParseResult.createParseResult(url, parse);
+    result = filter.filter(content, result, null, null);
+
+    Metadata meta = parse.getData().getParseMeta();
+    
+    assertEquals("false", meta.get("first"));
+    assertEquals("false", meta.get("second"));
+  }
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/nutch/blob/20d28406/nutch-plugins/parsefilter-regex/src/test/org/apache/nutch/parsefilter/regex/TestRegexParseFilter.java
----------------------------------------------------------------------
diff --git 
a/nutch-plugins/parsefilter-regex/src/test/org/apache/nutch/parsefilter/regex/TestRegexParseFilter.java
 
b/nutch-plugins/parsefilter-regex/src/test/org/apache/nutch/parsefilter/regex/TestRegexParseFilter.java
deleted file mode 100644
index 9bd7149..0000000
--- 
a/nutch-plugins/parsefilter-regex/src/test/org/apache/nutch/parsefilter/regex/TestRegexParseFilter.java
+++ /dev/null
@@ -1,77 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.nutch.parsefilter.regex;
-
-import java.net.MalformedURLException;
-
-import org.apache.hadoop.conf.Configuration;
-import org.apache.nutch.metadata.Metadata;
-import org.apache.nutch.parse.Parse;
-import org.apache.nutch.parse.ParseData;
-import org.apache.nutch.parse.ParseImpl;
-import org.apache.nutch.parse.ParseResult;
-import org.apache.nutch.protocol.Content;
-import org.apache.nutch.util.NutchConfiguration;
-import junit.framework.TestCase;
-
-public class TestRegexParseFilter extends TestCase {
-
-  private final static String SEPARATOR = System.getProperty("file.separator");
-  private final static String SAMPLES = System.getProperty("test.data", ".");
-
-  public void testPositiveFilter() throws Exception {
-    Configuration conf = NutchConfiguration.create();
-
-    String file = SAMPLES + SEPARATOR + "regex-parsefilter.txt";
-    RegexParseFilter filter = new RegexParseFilter(file);
-    filter.setConf(conf);
-
-    String url = "http://nutch.apache.org/";;
-    String html = "<body><html><h1>nutch</h1><p>this is the extracted text 
blablabla</p></body></html>";
-    Content content = new Content(url, url, html.getBytes("UTF-8"), 
"text/html", new Metadata(), conf);
-    Parse parse = new ParseImpl("nutch this is the extracted text blablabla", 
new ParseData());
-    
-    ParseResult result = ParseResult.createParseResult(url, parse);
-    result = filter.filter(content, result, null, null);
-
-    Metadata meta = parse.getData().getParseMeta();
-    
-    assertEquals("true", meta.get("first"));
-    assertEquals("true", meta.get("second"));
-  }
-  
-  public void testNegativeFilter() throws Exception {
-    Configuration conf = NutchConfiguration.create();
-
-    String file = SAMPLES + SEPARATOR + "regex-parsefilter.txt";
-    RegexParseFilter filter = new RegexParseFilter(file);
-    filter.setConf(conf);
-
-    String url = "http://nutch.apache.org/";;
-    String html = "<body><html><h2>nutch</h2><p>this is the extracted text no 
bla</p></body></html>";
-    Content content = new Content(url, url, html.getBytes("UTF-8"), 
"text/html", new Metadata(), conf);
-    Parse parse = new ParseImpl("nutch this is the extracted text bla", new 
ParseData());
-    
-    ParseResult result = ParseResult.createParseResult(url, parse);
-    result = filter.filter(content, result, null, null);
-
-    Metadata meta = parse.getData().getParseMeta();
-    
-    assertEquals("false", meta.get("first"));
-    assertEquals("false", meta.get("second"));
-  }
-}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/nutch/blob/20d28406/nutch-plugins/protocol-file/src/test/java/org/apache/nutch/protocol/file/TestProtocolFile.java
----------------------------------------------------------------------
diff --git 
a/nutch-plugins/protocol-file/src/test/java/org/apache/nutch/protocol/file/TestProtocolFile.java
 
b/nutch-plugins/protocol-file/src/test/java/org/apache/nutch/protocol/file/TestProtocolFile.java
new file mode 100644
index 0000000..5f95377
--- /dev/null
+++ 
b/nutch-plugins/protocol-file/src/test/java/org/apache/nutch/protocol/file/TestProtocolFile.java
@@ -0,0 +1,99 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.nutch.protocol.file;
+
+// Hadoop imports
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.io.Text;
+
+// Nutch imports
+import org.apache.nutch.crawl.CrawlDatum;
+import org.apache.nutch.net.protocols.Response;
+import org.apache.nutch.protocol.Protocol;
+import org.apache.nutch.protocol.ProtocolException;
+import org.apache.nutch.protocol.ProtocolFactory;
+import org.apache.nutch.protocol.ProtocolOutput;
+import org.apache.nutch.protocol.ProtocolStatus;
+import org.apache.nutch.util.NutchConfiguration;
+import org.junit.Assert;
+import org.junit.Before;
+import org.junit.Test;
+
+/**
+ * @author mattmann
+ * @version $Revision$
+ * 
+ *          <p>
+ *          Unit tests for the {@link File}Protocol.
+ *          </p>
+ *          .
+ */
+public class TestProtocolFile {
+
+  private String fileSeparator = System.getProperty("file.separator");
+  private String sampleDir = System.getProperty("test.data", ".");
+
+  private static final String[] testTextFiles = new String[] {
+      "testprotocolfile.txt", "testprotocolfile_(encoded).txt",
+      "testprotocolfile_%28encoded%29.txt" };
+
+  private static final CrawlDatum datum = new CrawlDatum();
+
+  private static final String expectedMimeType = "text/plain";
+
+  private Configuration conf;
+
+  @Before
+  public void setUp() {
+    conf = NutchConfiguration.create();
+  }
+
+  @Test
+  public void testSetContentType() throws ProtocolException {
+    for (String testTextFile : testTextFiles) {
+      setContentType(testTextFile);
+    }
+  }
+
+  /**
+   * Tests the setting of the <code>Response.CONTENT_TYPE</code> metadata 
field.
+   * 
+   * @since NUTCH-384
+   * 
+   */
+  public void setContentType(String testTextFile) throws ProtocolException {
+    String urlString = "file:" + sampleDir + fileSeparator + testTextFile;
+    Assert.assertNotNull(urlString);
+    Protocol protocol = new ProtocolFactory(conf).getProtocol(urlString);
+    ProtocolOutput output = protocol.getProtocolOutput(new Text(urlString),
+        datum);
+    Assert.assertNotNull(output);
+    Assert.assertEquals("Status code: [" + output.getStatus().getCode()
+        + "], not equal to: [" + ProtocolStatus.SUCCESS + "]: args: ["
+        + output.getStatus().getArgs() + "]", ProtocolStatus.SUCCESS, output
+        .getStatus().getCode());
+    Assert.assertNotNull(output.getContent());
+    Assert.assertNotNull(output.getContent().getContentType());
+    Assert.assertEquals(expectedMimeType, 
output.getContent().getContentType());
+    Assert.assertNotNull(output.getContent().getMetadata());
+    Assert.assertEquals(expectedMimeType, output.getContent().getMetadata()
+        .get(Response.CONTENT_TYPE));
+
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/nutch/blob/20d28406/nutch-plugins/protocol-file/src/test/org/apache/nutch/protocol/file/TestProtocolFile.java
----------------------------------------------------------------------
diff --git 
a/nutch-plugins/protocol-file/src/test/org/apache/nutch/protocol/file/TestProtocolFile.java
 
b/nutch-plugins/protocol-file/src/test/org/apache/nutch/protocol/file/TestProtocolFile.java
deleted file mode 100644
index 5f95377..0000000
--- 
a/nutch-plugins/protocol-file/src/test/org/apache/nutch/protocol/file/TestProtocolFile.java
+++ /dev/null
@@ -1,99 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.nutch.protocol.file;
-
-// Hadoop imports
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.io.Text;
-
-// Nutch imports
-import org.apache.nutch.crawl.CrawlDatum;
-import org.apache.nutch.net.protocols.Response;
-import org.apache.nutch.protocol.Protocol;
-import org.apache.nutch.protocol.ProtocolException;
-import org.apache.nutch.protocol.ProtocolFactory;
-import org.apache.nutch.protocol.ProtocolOutput;
-import org.apache.nutch.protocol.ProtocolStatus;
-import org.apache.nutch.util.NutchConfiguration;
-import org.junit.Assert;
-import org.junit.Before;
-import org.junit.Test;
-
-/**
- * @author mattmann
- * @version $Revision$
- * 
- *          <p>
- *          Unit tests for the {@link File}Protocol.
- *          </p>
- *          .
- */
-public class TestProtocolFile {
-
-  private String fileSeparator = System.getProperty("file.separator");
-  private String sampleDir = System.getProperty("test.data", ".");
-
-  private static final String[] testTextFiles = new String[] {
-      "testprotocolfile.txt", "testprotocolfile_(encoded).txt",
-      "testprotocolfile_%28encoded%29.txt" };
-
-  private static final CrawlDatum datum = new CrawlDatum();
-
-  private static final String expectedMimeType = "text/plain";
-
-  private Configuration conf;
-
-  @Before
-  public void setUp() {
-    conf = NutchConfiguration.create();
-  }
-
-  @Test
-  public void testSetContentType() throws ProtocolException {
-    for (String testTextFile : testTextFiles) {
-      setContentType(testTextFile);
-    }
-  }
-
-  /**
-   * Tests the setting of the <code>Response.CONTENT_TYPE</code> metadata 
field.
-   * 
-   * @since NUTCH-384
-   * 
-   */
-  public void setContentType(String testTextFile) throws ProtocolException {
-    String urlString = "file:" + sampleDir + fileSeparator + testTextFile;
-    Assert.assertNotNull(urlString);
-    Protocol protocol = new ProtocolFactory(conf).getProtocol(urlString);
-    ProtocolOutput output = protocol.getProtocolOutput(new Text(urlString),
-        datum);
-    Assert.assertNotNull(output);
-    Assert.assertEquals("Status code: [" + output.getStatus().getCode()
-        + "], not equal to: [" + ProtocolStatus.SUCCESS + "]: args: ["
-        + output.getStatus().getArgs() + "]", ProtocolStatus.SUCCESS, output
-        .getStatus().getCode());
-    Assert.assertNotNull(output.getContent());
-    Assert.assertNotNull(output.getContent().getContentType());
-    Assert.assertEquals(expectedMimeType, 
output.getContent().getContentType());
-    Assert.assertNotNull(output.getContent().getMetadata());
-    Assert.assertEquals(expectedMimeType, output.getContent().getMetadata()
-        .get(Response.CONTENT_TYPE));
-
-  }
-
-}

http://git-wip-us.apache.org/repos/asf/nutch/blob/20d28406/nutch-plugins/protocol-http/src/test/java/org/apache/nutch/protocol/http/TestProtocolHttp.java
----------------------------------------------------------------------
diff --git 
a/nutch-plugins/protocol-http/src/test/java/org/apache/nutch/protocol/http/TestProtocolHttp.java
 
b/nutch-plugins/protocol-http/src/test/java/org/apache/nutch/protocol/http/TestProtocolHttp.java
new file mode 100644
index 0000000..7dd9e9b
--- /dev/null
+++ 
b/nutch-plugins/protocol-http/src/test/java/org/apache/nutch/protocol/http/TestProtocolHttp.java
@@ -0,0 +1,140 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.nutch.protocol.http;
+
+import static org.junit.Assert.assertEquals;
+
+import java.net.URL;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.io.Text;
+import org.apache.nutch.crawl.CrawlDatum;
+import org.apache.nutch.net.protocols.Response;
+import org.apache.nutch.protocol.Content;
+import org.apache.nutch.protocol.ProtocolOutput;
+import org.junit.After;
+import org.junit.Test;
+import org.mortbay.jetty.Server;
+import org.mortbay.jetty.nio.SelectChannelConnector;
+import org.mortbay.jetty.servlet.Context;
+import org.mortbay.jetty.servlet.ServletHolder;
+
+/**
+ * Test cases for protocol-http
+ */
+public class TestProtocolHttp {
+  private static final String RES_DIR = System.getProperty("test.data", ".");
+
+  private Http http;
+  private Server server;
+  private Context root;
+  private Configuration conf;
+  private int port;
+
+  public void setUp(boolean redirection) throws Exception {
+    conf = new Configuration();
+    conf.addResource("nutch-default.xml");
+    conf.addResource("nutch-site-test.xml");
+
+    http = new Http();
+    http.setConf(conf);
+
+    server = new Server();
+
+    if (redirection) {
+      root = new Context(server, "/redirection", Context.SESSIONS);
+      root.setAttribute("newContextURL", "/redirect");
+    } else {
+      root = new Context(server, "/", Context.SESSIONS);
+    }
+
+    ServletHolder sh = new ServletHolder(
+        org.apache.jasper.servlet.JspServlet.class);
+    root.addServlet(sh, "*.jsp");
+    root.setResourceBase(RES_DIR);
+  }
+
+  @After
+  public void tearDown() throws Exception {
+    server.stop();
+  }
+
+  @Test
+  public void testStatusCode() throws Exception {
+    startServer(47504, false);
+    fetchPage("/basic-http.jsp", 200);
+    fetchPage("/redirect301.jsp", 301);
+    fetchPage("/redirect302.jsp", 302);
+    fetchPage("/nonexists.html", 404);
+    fetchPage("/brokenpage.jsp", 500);
+  }
+
+  @Test
+  public void testRedirectionJetty() throws Exception {
+    // Redirection via Jetty
+    startServer(47503, true);
+    fetchPage("/redirection", 302);
+  }
+
+  /**
+   * Starts the Jetty server at a specified port and redirection parameter.
+   * 
+   * @param portno
+   *          Port number.
+   * @param redirection
+   *          whether redirection
+   */
+  private void startServer(int portno, boolean redirection) throws Exception {
+    port = portno;
+    setUp(redirection);
+    SelectChannelConnector connector = new SelectChannelConnector();
+    connector.setHost("127.0.0.1");
+    connector.setPort(port);
+
+    server.addConnector(connector);
+    server.start();
+  }
+
+  /**
+   * Fetches the specified <code>page</code> from the local Jetty server and
+   * checks whether the HTTP response status code matches with the expected
+   * code. Also use jsp pages for redirection.
+   * 
+   * @param page
+   *          Page to be fetched.
+   * @param expectedCode
+   *          HTTP response status code expected while fetching the page.
+   */
+  private void fetchPage(String page, int expectedCode) throws Exception {
+    URL url = new URL("http", "127.0.0.1", port, page);
+    CrawlDatum crawlDatum = new CrawlDatum();
+    Response response = http.getResponse(url, crawlDatum, true);
+    ProtocolOutput out = http.getProtocolOutput(new Text(url.toString()),
+        crawlDatum);
+    Content content = out.getContent();
+    assertEquals("HTTP Status Code for " + url, expectedCode,
+        response.getCode());
+
+    if (page.compareTo("/nonexists.html") != 0
+        && page.compareTo("/brokenpage.jsp") != 0
+        && page.compareTo("/redirection") != 0) {
+      assertEquals("ContentType " + url, "text/html",
+          content.getContentType());
+    }
+  }
+}

http://git-wip-us.apache.org/repos/asf/nutch/blob/20d28406/nutch-plugins/protocol-http/src/test/org/apache/nutch/protocol/http/TestProtocolHttp.java
----------------------------------------------------------------------
diff --git 
a/nutch-plugins/protocol-http/src/test/org/apache/nutch/protocol/http/TestProtocolHttp.java
 
b/nutch-plugins/protocol-http/src/test/org/apache/nutch/protocol/http/TestProtocolHttp.java
deleted file mode 100644
index 7dd9e9b..0000000
--- 
a/nutch-plugins/protocol-http/src/test/org/apache/nutch/protocol/http/TestProtocolHttp.java
+++ /dev/null
@@ -1,140 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.nutch.protocol.http;
-
-import static org.junit.Assert.assertEquals;
-
-import java.net.URL;
-
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.io.Text;
-import org.apache.nutch.crawl.CrawlDatum;
-import org.apache.nutch.net.protocols.Response;
-import org.apache.nutch.protocol.Content;
-import org.apache.nutch.protocol.ProtocolOutput;
-import org.junit.After;
-import org.junit.Test;
-import org.mortbay.jetty.Server;
-import org.mortbay.jetty.nio.SelectChannelConnector;
-import org.mortbay.jetty.servlet.Context;
-import org.mortbay.jetty.servlet.ServletHolder;
-
-/**
- * Test cases for protocol-http
- */
-public class TestProtocolHttp {
-  private static final String RES_DIR = System.getProperty("test.data", ".");
-
-  private Http http;
-  private Server server;
-  private Context root;
-  private Configuration conf;
-  private int port;
-
-  public void setUp(boolean redirection) throws Exception {
-    conf = new Configuration();
-    conf.addResource("nutch-default.xml");
-    conf.addResource("nutch-site-test.xml");
-
-    http = new Http();
-    http.setConf(conf);
-
-    server = new Server();
-
-    if (redirection) {
-      root = new Context(server, "/redirection", Context.SESSIONS);
-      root.setAttribute("newContextURL", "/redirect");
-    } else {
-      root = new Context(server, "/", Context.SESSIONS);
-    }
-
-    ServletHolder sh = new ServletHolder(
-        org.apache.jasper.servlet.JspServlet.class);
-    root.addServlet(sh, "*.jsp");
-    root.setResourceBase(RES_DIR);
-  }
-
-  @After
-  public void tearDown() throws Exception {
-    server.stop();
-  }
-
-  @Test
-  public void testStatusCode() throws Exception {
-    startServer(47504, false);
-    fetchPage("/basic-http.jsp", 200);
-    fetchPage("/redirect301.jsp", 301);
-    fetchPage("/redirect302.jsp", 302);
-    fetchPage("/nonexists.html", 404);
-    fetchPage("/brokenpage.jsp", 500);
-  }
-
-  @Test
-  public void testRedirectionJetty() throws Exception {
-    // Redirection via Jetty
-    startServer(47503, true);
-    fetchPage("/redirection", 302);
-  }
-
-  /**
-   * Starts the Jetty server at a specified port and redirection parameter.
-   * 
-   * @param portno
-   *          Port number.
-   * @param redirection
-   *          whether redirection
-   */
-  private void startServer(int portno, boolean redirection) throws Exception {
-    port = portno;
-    setUp(redirection);
-    SelectChannelConnector connector = new SelectChannelConnector();
-    connector.setHost("127.0.0.1");
-    connector.setPort(port);
-
-    server.addConnector(connector);
-    server.start();
-  }
-
-  /**
-   * Fetches the specified <code>page</code> from the local Jetty server and
-   * checks whether the HTTP response status code matches with the expected
-   * code. Also use jsp pages for redirection.
-   * 
-   * @param page
-   *          Page to be fetched.
-   * @param expectedCode
-   *          HTTP response status code expected while fetching the page.
-   */
-  private void fetchPage(String page, int expectedCode) throws Exception {
-    URL url = new URL("http", "127.0.0.1", port, page);
-    CrawlDatum crawlDatum = new CrawlDatum();
-    Response response = http.getResponse(url, crawlDatum, true);
-    ProtocolOutput out = http.getProtocolOutput(new Text(url.toString()),
-        crawlDatum);
-    Content content = out.getContent();
-    assertEquals("HTTP Status Code for " + url, expectedCode,
-        response.getCode());
-
-    if (page.compareTo("/nonexists.html") != 0
-        && page.compareTo("/brokenpage.jsp") != 0
-        && page.compareTo("/redirection") != 0) {
-      assertEquals("ContentType " + url, "text/html",
-          content.getContentType());
-    }
-  }
-}

http://git-wip-us.apache.org/repos/asf/nutch/blob/20d28406/nutch-plugins/protocol-httpclient/src/test/java/org/apache/nutch/protocol/httpclient/TestProtocolHttpClient.java
----------------------------------------------------------------------
diff --git 
a/nutch-plugins/protocol-httpclient/src/test/java/org/apache/nutch/protocol/httpclient/TestProtocolHttpClient.java
 
b/nutch-plugins/protocol-httpclient/src/test/java/org/apache/nutch/protocol/httpclient/TestProtocolHttpClient.java
new file mode 100644
index 0000000..783e5af
--- /dev/null
+++ 
b/nutch-plugins/protocol-httpclient/src/test/java/org/apache/nutch/protocol/httpclient/TestProtocolHttpClient.java
@@ -0,0 +1,217 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.nutch.protocol.httpclient;
+
+import java.net.URL;
+import org.junit.After;
+import org.junit.Assert;
+import org.junit.Before;
+import org.junit.Test;
+import org.mortbay.jetty.Server;
+import org.mortbay.jetty.bio.SocketConnector;
+import org.mortbay.jetty.handler.ContextHandler;
+import org.mortbay.jetty.servlet.ServletHandler;
+import org.mortbay.jetty.servlet.SessionHandler;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.nutch.crawl.CrawlDatum;
+import org.apache.nutch.net.protocols.Response;
+
+/**
+ * Test cases for protocol-httpclient.
+ */
+public class TestProtocolHttpClient {
+
+  private Server server;
+  private Configuration conf;
+  private static final String RES_DIR = System.getProperty("test.data", ".");
+  private int port;
+  private Http http = new Http();
+
+  @Before
+  public void setUp() throws Exception {
+
+    ContextHandler context = new ContextHandler();
+    context.setContextPath("/");
+    context.setResourceBase(RES_DIR);
+    ServletHandler sh = new ServletHandler();
+    sh.addServletWithMapping("org.apache.jasper.servlet.JspServlet", "*.jsp");
+    context.addHandler(sh);
+    context.addHandler(new SessionHandler());
+
+    server = new Server();
+    server.addHandler(context);
+
+    conf = new Configuration();
+    conf.addResource("nutch-default.xml");
+    conf.addResource("nutch-site-test.xml");
+
+    http = new Http();
+    http.setConf(conf);
+  }
+
+  @After
+  public void tearDown() throws Exception {
+    server.stop();
+    for (int i = 0; i < 5; i++) {
+      if (!server.isStopped()) {
+       Thread.sleep(1000);
+      }
+    }
+  }
+
+  /**
+   * Tests whether the client can remember cookies.
+   * 
+   * @throws Exception
+   *           If an error occurs or the test case fails.
+   */
+  @Test
+  public void testCookies() throws Exception {
+    startServer(47500);
+    fetchPage("/cookies.jsp", 200);
+    fetchPage("/cookies.jsp?cookie=yes", 200);
+  }
+
+  /**
+   * Tests that no pre-emptive authorization headers are sent by the client.
+   * 
+   * @throws Exception
+   *           If an error occurs or the test case fails.
+   */
+  @Test
+  public void testNoPreemptiveAuth() throws Exception {
+    startServer(47500);
+    fetchPage("/noauth.jsp", 200);
+  }
+
+  /**
+   * Tests default credentials.
+   * 
+   * @throws Exception
+   *           If an error occurs or the test case fails.
+   */
+  @Test
+  public void testDefaultCredentials() throws Exception {
+    startServer(47502);
+    fetchPage("/basic.jsp", 200);
+  }
+
+  /**
+   * Tests basic authentication scheme for various realms.
+   * 
+   * @throws Exception
+   *           If an error occurs or the test case fails.
+   */
+  @Test
+  public void testBasicAuth() throws Exception {
+    startServer(47500);
+    fetchPage("/basic.jsp", 200);
+    fetchPage("/basic.jsp?case=1", 200);
+    fetchPage("/basic.jsp?case=2", 200);
+    server.start();
+  }
+
+  /**
+   * Tests that authentication happens for a defined realm and not for other
+   * realms for a host:port when an extra <code>authscope</code> tag is not
+   * defined to match all other realms.
+   * 
+   * @throws Exception
+   *           If an error occurs or the test case fails.
+   */
+  @Test
+  public void testOtherRealmsNoAuth() throws Exception {
+    startServer(47501);
+    fetchPage("/basic.jsp", 200);
+    fetchPage("/basic.jsp?case=1", 401);
+    fetchPage("/basic.jsp?case=2", 401);
+  }
+
+  /**
+   * Tests Digest authentication scheme.
+   * 
+   * @throws Exception
+   *           If an error occurs or the test case fails.
+   */
+  @Test
+  public void testDigestAuth() throws Exception {
+    startServer(47500);
+    fetchPage("/digest.jsp", 200);
+  }
+
+  /**
+   * Tests NTLM authentication scheme.
+   * 
+   * @throws Exception
+   *           If an error occurs or the test case fails.
+   */
+  @Test
+  public void testNtlmAuth() throws Exception {
+    startServer(47501);
+    fetchPage("/ntlm.jsp", 200);
+  }
+
+  /**
+   * Starts the Jetty server at a specified port.
+   *
+   * Will try up to 10 ports to find an available port to use.
+   *
+   * @param portno
+   *          Port number.
+   * @throws Exception
+   *           When an error occurs.
+   */
+  private void startServer(int portno) throws Exception {
+    SocketConnector listener = new SocketConnector();
+    listener.setHost("127.0.0.1");
+    server.addConnector(listener);
+    for (int p = portno; p < portno + 10; p++) {
+      port = portno;
+      listener.setPort(port);
+      try {
+        server.start();
+        break;
+      } catch (Exception e) {
+        if (p == portno + 9) {
+          throw e;
+        }
+      }
+    }
+  }
+
+  /**
+   * Fetches the specified <code>page</code> from the local Jetty server and
+   * checks whether the HTTP response status code matches with the expected
+   * code.
+   * 
+   * @param page
+   *          Page to be fetched.
+   * @param expectedCode
+   *          HTTP response status code expected while fetching the page.
+   * @throws Exception
+   *           When an error occurs or test case fails.
+   */
+  private void fetchPage(String page, int expectedCode) throws Exception {
+    URL url = new URL("http", "127.0.0.1", port, page);
+    Response response = null;
+    response = http.getResponse(url, new CrawlDatum(), true);
+
+    int code = response.getCode();
+    Assert.assertEquals("HTTP Status Code for " + url, expectedCode, code);
+  }
+}

http://git-wip-us.apache.org/repos/asf/nutch/blob/20d28406/nutch-plugins/protocol-httpclient/src/test/org/apache/nutch/protocol/httpclient/TestProtocolHttpClient.java
----------------------------------------------------------------------
diff --git 
a/nutch-plugins/protocol-httpclient/src/test/org/apache/nutch/protocol/httpclient/TestProtocolHttpClient.java
 
b/nutch-plugins/protocol-httpclient/src/test/org/apache/nutch/protocol/httpclient/TestProtocolHttpClient.java
deleted file mode 100644
index 783e5af..0000000
--- 
a/nutch-plugins/protocol-httpclient/src/test/org/apache/nutch/protocol/httpclient/TestProtocolHttpClient.java
+++ /dev/null
@@ -1,217 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.nutch.protocol.httpclient;
-
-import java.net.URL;
-import org.junit.After;
-import org.junit.Assert;
-import org.junit.Before;
-import org.junit.Test;
-import org.mortbay.jetty.Server;
-import org.mortbay.jetty.bio.SocketConnector;
-import org.mortbay.jetty.handler.ContextHandler;
-import org.mortbay.jetty.servlet.ServletHandler;
-import org.mortbay.jetty.servlet.SessionHandler;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.nutch.crawl.CrawlDatum;
-import org.apache.nutch.net.protocols.Response;
-
-/**
- * Test cases for protocol-httpclient.
- */
-public class TestProtocolHttpClient {
-
-  private Server server;
-  private Configuration conf;
-  private static final String RES_DIR = System.getProperty("test.data", ".");
-  private int port;
-  private Http http = new Http();
-
-  @Before
-  public void setUp() throws Exception {
-
-    ContextHandler context = new ContextHandler();
-    context.setContextPath("/");
-    context.setResourceBase(RES_DIR);
-    ServletHandler sh = new ServletHandler();
-    sh.addServletWithMapping("org.apache.jasper.servlet.JspServlet", "*.jsp");
-    context.addHandler(sh);
-    context.addHandler(new SessionHandler());
-
-    server = new Server();
-    server.addHandler(context);
-
-    conf = new Configuration();
-    conf.addResource("nutch-default.xml");
-    conf.addResource("nutch-site-test.xml");
-
-    http = new Http();
-    http.setConf(conf);
-  }
-
-  @After
-  public void tearDown() throws Exception {
-    server.stop();
-    for (int i = 0; i < 5; i++) {
-      if (!server.isStopped()) {
-       Thread.sleep(1000);
-      }
-    }
-  }
-
-  /**
-   * Tests whether the client can remember cookies.
-   * 
-   * @throws Exception
-   *           If an error occurs or the test case fails.
-   */
-  @Test
-  public void testCookies() throws Exception {
-    startServer(47500);
-    fetchPage("/cookies.jsp", 200);
-    fetchPage("/cookies.jsp?cookie=yes", 200);
-  }
-
-  /**
-   * Tests that no pre-emptive authorization headers are sent by the client.
-   * 
-   * @throws Exception
-   *           If an error occurs or the test case fails.
-   */
-  @Test
-  public void testNoPreemptiveAuth() throws Exception {
-    startServer(47500);
-    fetchPage("/noauth.jsp", 200);
-  }
-
-  /**
-   * Tests default credentials.
-   * 
-   * @throws Exception
-   *           If an error occurs or the test case fails.
-   */
-  @Test
-  public void testDefaultCredentials() throws Exception {
-    startServer(47502);
-    fetchPage("/basic.jsp", 200);
-  }
-
-  /**
-   * Tests basic authentication scheme for various realms.
-   * 
-   * @throws Exception
-   *           If an error occurs or the test case fails.
-   */
-  @Test
-  public void testBasicAuth() throws Exception {
-    startServer(47500);
-    fetchPage("/basic.jsp", 200);
-    fetchPage("/basic.jsp?case=1", 200);
-    fetchPage("/basic.jsp?case=2", 200);
-    server.start();
-  }
-
-  /**
-   * Tests that authentication happens for a defined realm and not for other
-   * realms for a host:port when an extra <code>authscope</code> tag is not
-   * defined to match all other realms.
-   * 
-   * @throws Exception
-   *           If an error occurs or the test case fails.
-   */
-  @Test
-  public void testOtherRealmsNoAuth() throws Exception {
-    startServer(47501);
-    fetchPage("/basic.jsp", 200);
-    fetchPage("/basic.jsp?case=1", 401);
-    fetchPage("/basic.jsp?case=2", 401);
-  }
-
-  /**
-   * Tests Digest authentication scheme.
-   * 
-   * @throws Exception
-   *           If an error occurs or the test case fails.
-   */
-  @Test
-  public void testDigestAuth() throws Exception {
-    startServer(47500);
-    fetchPage("/digest.jsp", 200);
-  }
-
-  /**
-   * Tests NTLM authentication scheme.
-   * 
-   * @throws Exception
-   *           If an error occurs or the test case fails.
-   */
-  @Test
-  public void testNtlmAuth() throws Exception {
-    startServer(47501);
-    fetchPage("/ntlm.jsp", 200);
-  }
-
-  /**
-   * Starts the Jetty server at a specified port.
-   *
-   * Will try up to 10 ports to find an available port to use.
-   *
-   * @param portno
-   *          Port number.
-   * @throws Exception
-   *           When an error occurs.
-   */
-  private void startServer(int portno) throws Exception {
-    SocketConnector listener = new SocketConnector();
-    listener.setHost("127.0.0.1");
-    server.addConnector(listener);
-    for (int p = portno; p < portno + 10; p++) {
-      port = portno;
-      listener.setPort(port);
-      try {
-        server.start();
-        break;
-      } catch (Exception e) {
-        if (p == portno + 9) {
-          throw e;
-        }
-      }
-    }
-  }
-
-  /**
-   * Fetches the specified <code>page</code> from the local Jetty server and
-   * checks whether the HTTP response status code matches with the expected
-   * code.
-   * 
-   * @param page
-   *          Page to be fetched.
-   * @param expectedCode
-   *          HTTP response status code expected while fetching the page.
-   * @throws Exception
-   *           When an error occurs or test case fails.
-   */
-  private void fetchPage(String page, int expectedCode) throws Exception {
-    URL url = new URL("http", "127.0.0.1", port, page);
-    Response response = null;
-    response = http.getResponse(url, new CrawlDatum(), true);
-
-    int code = response.getCode();
-    Assert.assertEquals("HTTP Status Code for " + url, expectedCode, code);
-  }
-}

http://git-wip-us.apache.org/repos/asf/nutch/blob/20d28406/nutch-plugins/subcollection/src/test/java/org/apache/nutch/collection/TestSubcollection.java
----------------------------------------------------------------------
diff --git 
a/nutch-plugins/subcollection/src/test/java/org/apache/nutch/collection/TestSubcollection.java
 
b/nutch-plugins/subcollection/src/test/java/org/apache/nutch/collection/TestSubcollection.java
new file mode 100644
index 0000000..a2d2772
--- /dev/null
+++ 
b/nutch-plugins/subcollection/src/test/java/org/apache/nutch/collection/TestSubcollection.java
@@ -0,0 +1,112 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.nutch.collection;
+
+import java.io.ByteArrayInputStream;
+import java.io.InputStream;
+import java.util.Collection;
+
+import org.apache.nutch.util.NutchConfiguration;
+import org.junit.Assert;
+import org.junit.Test;
+
+public class TestSubcollection {
+
+  /**
+   * Test filtering logic
+   * 
+   * @throws Exception
+   */
+  @Test
+  public void testFilter() throws Exception {
+    Subcollection sc = new Subcollection(NutchConfiguration.create());
+    sc.setWhiteList("www.nutch.org\nwww.apache.org");
+    sc.setBlackList("jpg\nwww.apache.org/zecret/");
+
+    // matches whitelist
+    Assert.assertEquals("http://www.apache.org/index.html";,
+        sc.filter("http://www.apache.org/index.html";));
+
+    // matches blacklist
+    Assert.assertEquals(null,
+        sc.filter("http://www.apache.org/zecret/index.html";));
+    Assert.assertEquals(null, 
sc.filter("http://www.apache.org/img/image.jpg";));
+
+    // no match
+    Assert.assertEquals(null, sc.filter("http://www.google.com/";));
+  }
+
+  @Test
+  public void testInput() {
+    StringBuffer xml = new StringBuffer();
+    xml.append("<?xml version=\"1.0\" encoding=\"UTF-8\"?>");
+    xml.append("<!-- just a comment -->");
+    xml.append("<subcollections>");
+    xml.append("<subcollection>");
+    xml.append("<name>nutch collection</name>");
+    xml.append("<id>nutch</id>");
+    xml.append("<whitelist>");
+    xml.append("http://lucene.apache.org/nutch/\n";);
+    xml.append("http://wiki.apache.org/nutch/\n";);
+    xml.append("</whitelist>");
+    xml.append("<blacklist>");
+    xml.append("http://www.xxx.yyy\n";);
+    xml.append("</blacklist>");
+    xml.append("</subcollection>");
+    xml.append("</subcollections>");
+
+    InputStream is = new ByteArrayInputStream(xml.toString().getBytes());
+
+    CollectionManager cm = new CollectionManager();
+    cm.parse(is);
+
+    Collection<?> c = cm.getAll();
+
+    // test that size matches
+    Assert.assertEquals(1, c.size());
+
+    Subcollection collection = (Subcollection) c.toArray()[0];
+
+    // test collection id
+    Assert.assertEquals("nutch", collection.getId());
+
+    // test collection name
+    Assert.assertEquals("nutch collection", collection.getName());
+
+    // test whitelist
+    Assert.assertEquals(2, collection.whiteList.size());
+
+    String wlUrl = (String) collection.whiteList.get(0);
+    Assert.assertEquals("http://lucene.apache.org/nutch/";, wlUrl);
+
+    wlUrl = (String) collection.whiteList.get(1);
+    Assert.assertEquals("http://wiki.apache.org/nutch/";, wlUrl);
+
+    // matches whitelist
+    Assert.assertEquals("http://lucene.apache.org/nutch/";,
+        collection.filter("http://lucene.apache.org/nutch/";));
+
+    // test blacklist
+    Assert.assertEquals(1, collection.blackList.size());
+
+    String blUrl = (String) collection.blackList.get(0);
+    Assert.assertEquals("http://www.xxx.yyy";, blUrl);
+
+    // no match
+    Assert.assertEquals(null, collection.filter("http://www.google.com/";));
+  }
+}

http://git-wip-us.apache.org/repos/asf/nutch/blob/20d28406/nutch-plugins/subcollection/src/test/org/apache/nutch/collection/TestSubcollection.java
----------------------------------------------------------------------
diff --git 
a/nutch-plugins/subcollection/src/test/org/apache/nutch/collection/TestSubcollection.java
 
b/nutch-plugins/subcollection/src/test/org/apache/nutch/collection/TestSubcollection.java
deleted file mode 100644
index a2d2772..0000000
--- 
a/nutch-plugins/subcollection/src/test/org/apache/nutch/collection/TestSubcollection.java
+++ /dev/null
@@ -1,112 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.nutch.collection;
-
-import java.io.ByteArrayInputStream;
-import java.io.InputStream;
-import java.util.Collection;
-
-import org.apache.nutch.util.NutchConfiguration;
-import org.junit.Assert;
-import org.junit.Test;
-
-public class TestSubcollection {
-
-  /**
-   * Test filtering logic
-   * 
-   * @throws Exception
-   */
-  @Test
-  public void testFilter() throws Exception {
-    Subcollection sc = new Subcollection(NutchConfiguration.create());
-    sc.setWhiteList("www.nutch.org\nwww.apache.org");
-    sc.setBlackList("jpg\nwww.apache.org/zecret/");
-
-    // matches whitelist
-    Assert.assertEquals("http://www.apache.org/index.html";,
-        sc.filter("http://www.apache.org/index.html";));
-
-    // matches blacklist
-    Assert.assertEquals(null,
-        sc.filter("http://www.apache.org/zecret/index.html";));
-    Assert.assertEquals(null, 
sc.filter("http://www.apache.org/img/image.jpg";));
-
-    // no match
-    Assert.assertEquals(null, sc.filter("http://www.google.com/";));
-  }
-
-  @Test
-  public void testInput() {
-    StringBuffer xml = new StringBuffer();
-    xml.append("<?xml version=\"1.0\" encoding=\"UTF-8\"?>");
-    xml.append("<!-- just a comment -->");
-    xml.append("<subcollections>");
-    xml.append("<subcollection>");
-    xml.append("<name>nutch collection</name>");
-    xml.append("<id>nutch</id>");
-    xml.append("<whitelist>");
-    xml.append("http://lucene.apache.org/nutch/\n";);
-    xml.append("http://wiki.apache.org/nutch/\n";);
-    xml.append("</whitelist>");
-    xml.append("<blacklist>");
-    xml.append("http://www.xxx.yyy\n";);
-    xml.append("</blacklist>");
-    xml.append("</subcollection>");
-    xml.append("</subcollections>");
-
-    InputStream is = new ByteArrayInputStream(xml.toString().getBytes());
-
-    CollectionManager cm = new CollectionManager();
-    cm.parse(is);
-
-    Collection<?> c = cm.getAll();
-
-    // test that size matches
-    Assert.assertEquals(1, c.size());
-
-    Subcollection collection = (Subcollection) c.toArray()[0];
-
-    // test collection id
-    Assert.assertEquals("nutch", collection.getId());
-
-    // test collection name
-    Assert.assertEquals("nutch collection", collection.getName());
-
-    // test whitelist
-    Assert.assertEquals(2, collection.whiteList.size());
-
-    String wlUrl = (String) collection.whiteList.get(0);
-    Assert.assertEquals("http://lucene.apache.org/nutch/";, wlUrl);
-
-    wlUrl = (String) collection.whiteList.get(1);
-    Assert.assertEquals("http://wiki.apache.org/nutch/";, wlUrl);
-
-    // matches whitelist
-    Assert.assertEquals("http://lucene.apache.org/nutch/";,
-        collection.filter("http://lucene.apache.org/nutch/";));
-
-    // test blacklist
-    Assert.assertEquals(1, collection.blackList.size());
-
-    String blUrl = (String) collection.blackList.get(0);
-    Assert.assertEquals("http://www.xxx.yyy";, blUrl);
-
-    // no match
-    Assert.assertEquals(null, collection.filter("http://www.google.com/";));
-  }
-}

http://git-wip-us.apache.org/repos/asf/nutch/blob/20d28406/nutch-plugins/urlfilter-automaton/src/test/java/org/apache/nutch/urlfilter/automaton/TestAutomatonURLFilter.java
----------------------------------------------------------------------
diff --git 
a/nutch-plugins/urlfilter-automaton/src/test/java/org/apache/nutch/urlfilter/automaton/TestAutomatonURLFilter.java
 
b/nutch-plugins/urlfilter-automaton/src/test/java/org/apache/nutch/urlfilter/automaton/TestAutomatonURLFilter.java
new file mode 100644
index 0000000..a70a6b6
--- /dev/null
+++ 
b/nutch-plugins/urlfilter-automaton/src/test/java/org/apache/nutch/urlfilter/automaton/TestAutomatonURLFilter.java
@@ -0,0 +1,56 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.nutch.urlfilter.automaton;
+
+// JDK imports
+import java.io.IOException;
+import java.io.Reader;
+
+import org.apache.nutch.net.*;
+// Nutch imports
+import org.apache.nutch.urlfilter.api.RegexURLFilterBaseTest;
+import org.junit.Assert;
+import org.junit.Test;
+
+/**
+ * JUnit based test of class <code>AutomatonURLFilter</code>.
+ * 
+ * @author J&eacute;r&ocirc;me Charron
+ */
+public class TestAutomatonURLFilter extends RegexURLFilterBaseTest {
+
+  protected URLFilter getURLFilter(Reader rules) {
+    try {
+      return new AutomatonURLFilter(rules);
+    } catch (IOException e) {
+      Assert.fail(e.toString());
+      return null;
+    }
+  }
+
+  @Test
+  public void test() {
+    test("WholeWebCrawling");
+    test("IntranetCrawling");
+    bench(50, "Benchmarks");
+    bench(100, "Benchmarks");
+    bench(200, "Benchmarks");
+    bench(400, "Benchmarks");
+    bench(800, "Benchmarks");
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/nutch/blob/20d28406/nutch-plugins/urlfilter-automaton/src/test/org/apache/nutch/urlfilter/automaton/TestAutomatonURLFilter.java
----------------------------------------------------------------------
diff --git 
a/nutch-plugins/urlfilter-automaton/src/test/org/apache/nutch/urlfilter/automaton/TestAutomatonURLFilter.java
 
b/nutch-plugins/urlfilter-automaton/src/test/org/apache/nutch/urlfilter/automaton/TestAutomatonURLFilter.java
deleted file mode 100644
index a70a6b6..0000000
--- 
a/nutch-plugins/urlfilter-automaton/src/test/org/apache/nutch/urlfilter/automaton/TestAutomatonURLFilter.java
+++ /dev/null
@@ -1,56 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.nutch.urlfilter.automaton;
-
-// JDK imports
-import java.io.IOException;
-import java.io.Reader;
-
-import org.apache.nutch.net.*;
-// Nutch imports
-import org.apache.nutch.urlfilter.api.RegexURLFilterBaseTest;
-import org.junit.Assert;
-import org.junit.Test;
-
-/**
- * JUnit based test of class <code>AutomatonURLFilter</code>.
- * 
- * @author J&eacute;r&ocirc;me Charron
- */
-public class TestAutomatonURLFilter extends RegexURLFilterBaseTest {
-
-  protected URLFilter getURLFilter(Reader rules) {
-    try {
-      return new AutomatonURLFilter(rules);
-    } catch (IOException e) {
-      Assert.fail(e.toString());
-      return null;
-    }
-  }
-
-  @Test
-  public void test() {
-    test("WholeWebCrawling");
-    test("IntranetCrawling");
-    bench(50, "Benchmarks");
-    bench(100, "Benchmarks");
-    bench(200, "Benchmarks");
-    bench(400, "Benchmarks");
-    bench(800, "Benchmarks");
-  }
-
-}

http://git-wip-us.apache.org/repos/asf/nutch/blob/20d28406/nutch-plugins/urlfilter-domain/src/test/java/org/apache/nutch/urlfilter/domain/TestDomainURLFilter.java
----------------------------------------------------------------------
diff --git 
a/nutch-plugins/urlfilter-domain/src/test/java/org/apache/nutch/urlfilter/domain/TestDomainURLFilter.java
 
b/nutch-plugins/urlfilter-domain/src/test/java/org/apache/nutch/urlfilter/domain/TestDomainURLFilter.java
new file mode 100644
index 0000000..0be1e31
--- /dev/null
+++ 
b/nutch-plugins/urlfilter-domain/src/test/java/org/apache/nutch/urlfilter/domain/TestDomainURLFilter.java
@@ -0,0 +1,67 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.nutch.urlfilter.domain;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.nutch.util.NutchConfiguration;
+import org.junit.Assert;
+import org.junit.Test;
+
+public class TestDomainURLFilter {
+
+  private final static String SEPARATOR = System.getProperty("file.separator");
+  private final static String SAMPLES = System.getProperty("test.data", ".");
+
+  @Test
+  public void testFilter() throws Exception {
+
+    String domainFile = SAMPLES + SEPARATOR + "hosts.txt";
+    Configuration conf = NutchConfiguration.create();
+    DomainURLFilter domainFilter = new DomainURLFilter(domainFile);
+    domainFilter.setConf(conf);
+    Assert.assertNotNull(domainFilter.filter("http://lucene.apache.org";));
+    Assert.assertNotNull(domainFilter.filter("http://hadoop.apache.org";));
+    Assert.assertNotNull(domainFilter.filter("http://www.apache.org";));
+    Assert.assertNull(domainFilter.filter("http://www.google.com";));
+    Assert.assertNull(domainFilter.filter("http://mail.yahoo.com";));
+    Assert.assertNotNull(domainFilter.filter("http://www.foobar.net";));
+    Assert.assertNotNull(domainFilter.filter("http://www.foobas.net";));
+    Assert.assertNotNull(domainFilter.filter("http://www.yahoo.com";));
+    Assert.assertNotNull(domainFilter.filter("http://www.foobar.be";));
+    Assert.assertNull(domainFilter.filter("http://www.adobe.com";));
+  }
+  
+  @Test
+  public void testNoFilter() throws Exception {
+    // https://issues.apache.org/jira/browse/NUTCH-2189
+    String domainFile = SAMPLES + SEPARATOR + "this-file-does-not-exist.txt";
+    Configuration conf = NutchConfiguration.create();
+    DomainURLFilter domainFilter = new DomainURLFilter(domainFile);
+    domainFilter.setConf(conf);
+    Assert.assertNotNull(domainFilter.filter("http://lucene.apache.org";));
+    Assert.assertNotNull(domainFilter.filter("http://hadoop.apache.org";));
+    Assert.assertNotNull(domainFilter.filter("http://www.apache.org";));
+    Assert.assertNotNull(domainFilter.filter("http://www.google.com";));
+    Assert.assertNotNull(domainFilter.filter("http://mail.yahoo.com";));
+    Assert.assertNotNull(domainFilter.filter("http://www.foobar.net";));
+    Assert.assertNotNull(domainFilter.filter("http://www.foobas.net";));
+    Assert.assertNotNull(domainFilter.filter("http://www.yahoo.com";));
+    Assert.assertNotNull(domainFilter.filter("http://www.foobar.be";));
+    Assert.assertNotNull(domainFilter.filter("http://www.adobe.com";));
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/nutch/blob/20d28406/nutch-plugins/urlfilter-domain/src/test/org/apache/nutch/urlfilter/domain/TestDomainURLFilter.java
----------------------------------------------------------------------
diff --git 
a/nutch-plugins/urlfilter-domain/src/test/org/apache/nutch/urlfilter/domain/TestDomainURLFilter.java
 
b/nutch-plugins/urlfilter-domain/src/test/org/apache/nutch/urlfilter/domain/TestDomainURLFilter.java
deleted file mode 100644
index 0be1e31..0000000
--- 
a/nutch-plugins/urlfilter-domain/src/test/org/apache/nutch/urlfilter/domain/TestDomainURLFilter.java
+++ /dev/null
@@ -1,67 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.nutch.urlfilter.domain;
-
-import org.apache.hadoop.conf.Configuration;
-import org.apache.nutch.util.NutchConfiguration;
-import org.junit.Assert;
-import org.junit.Test;
-
-public class TestDomainURLFilter {
-
-  private final static String SEPARATOR = System.getProperty("file.separator");
-  private final static String SAMPLES = System.getProperty("test.data", ".");
-
-  @Test
-  public void testFilter() throws Exception {
-
-    String domainFile = SAMPLES + SEPARATOR + "hosts.txt";
-    Configuration conf = NutchConfiguration.create();
-    DomainURLFilter domainFilter = new DomainURLFilter(domainFile);
-    domainFilter.setConf(conf);
-    Assert.assertNotNull(domainFilter.filter("http://lucene.apache.org";));
-    Assert.assertNotNull(domainFilter.filter("http://hadoop.apache.org";));
-    Assert.assertNotNull(domainFilter.filter("http://www.apache.org";));
-    Assert.assertNull(domainFilter.filter("http://www.google.com";));
-    Assert.assertNull(domainFilter.filter("http://mail.yahoo.com";));
-    Assert.assertNotNull(domainFilter.filter("http://www.foobar.net";));
-    Assert.assertNotNull(domainFilter.filter("http://www.foobas.net";));
-    Assert.assertNotNull(domainFilter.filter("http://www.yahoo.com";));
-    Assert.assertNotNull(domainFilter.filter("http://www.foobar.be";));
-    Assert.assertNull(domainFilter.filter("http://www.adobe.com";));
-  }
-  
-  @Test
-  public void testNoFilter() throws Exception {
-    // https://issues.apache.org/jira/browse/NUTCH-2189
-    String domainFile = SAMPLES + SEPARATOR + "this-file-does-not-exist.txt";
-    Configuration conf = NutchConfiguration.create();
-    DomainURLFilter domainFilter = new DomainURLFilter(domainFile);
-    domainFilter.setConf(conf);
-    Assert.assertNotNull(domainFilter.filter("http://lucene.apache.org";));
-    Assert.assertNotNull(domainFilter.filter("http://hadoop.apache.org";));
-    Assert.assertNotNull(domainFilter.filter("http://www.apache.org";));
-    Assert.assertNotNull(domainFilter.filter("http://www.google.com";));
-    Assert.assertNotNull(domainFilter.filter("http://mail.yahoo.com";));
-    Assert.assertNotNull(domainFilter.filter("http://www.foobar.net";));
-    Assert.assertNotNull(domainFilter.filter("http://www.foobas.net";));
-    Assert.assertNotNull(domainFilter.filter("http://www.yahoo.com";));
-    Assert.assertNotNull(domainFilter.filter("http://www.foobar.be";));
-    Assert.assertNotNull(domainFilter.filter("http://www.adobe.com";));
-  }
-
-}

http://git-wip-us.apache.org/repos/asf/nutch/blob/20d28406/nutch-plugins/urlfilter-domainblacklist/src/test/java/org/apache/nutch/urlfilter/domainblacklist/TestDomainBlacklistURLFilter.java
----------------------------------------------------------------------
diff --git 
a/nutch-plugins/urlfilter-domainblacklist/src/test/java/org/apache/nutch/urlfilter/domainblacklist/TestDomainBlacklistURLFilter.java
 
b/nutch-plugins/urlfilter-domainblacklist/src/test/java/org/apache/nutch/urlfilter/domainblacklist/TestDomainBlacklistURLFilter.java
new file mode 100644
index 0000000..d253867
--- /dev/null
+++ 
b/nutch-plugins/urlfilter-domainblacklist/src/test/java/org/apache/nutch/urlfilter/domainblacklist/TestDomainBlacklistURLFilter.java
@@ -0,0 +1,49 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.nutch.urlfilter.domainblacklist;
+
+import org.junit.Assert;
+import org.junit.Test;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.nutch.util.NutchConfiguration;
+
+public class TestDomainBlacklistURLFilter {
+
+  private final static String SEPARATOR = System.getProperty("file.separator");
+  private final static String SAMPLES = System.getProperty("test.data", ".");
+
+  @Test
+  public void testFilter() throws Exception {
+
+    String domainBlacklistFile = SAMPLES + SEPARATOR + "hosts.txt";
+    Configuration conf = NutchConfiguration.create();
+    DomainBlacklistURLFilter domainBlacklistFilter = new 
DomainBlacklistURLFilter(
+        domainBlacklistFile);
+    domainBlacklistFilter.setConf(conf);
+    
Assert.assertNull(domainBlacklistFilter.filter("http://lucene.apache.org";));
+    
Assert.assertNull(domainBlacklistFilter.filter("http://hadoop.apache.org";));
+    Assert.assertNull(domainBlacklistFilter.filter("http://www.apache.org";));
+    
Assert.assertNotNull(domainBlacklistFilter.filter("http://www.google.com";));
+    
Assert.assertNotNull(domainBlacklistFilter.filter("http://mail.yahoo.com";));
+    Assert.assertNull(domainBlacklistFilter.filter("http://www.foobar.net";));
+    Assert.assertNull(domainBlacklistFilter.filter("http://www.foobas.net";));
+    Assert.assertNull(domainBlacklistFilter.filter("http://www.yahoo.com";));
+    Assert.assertNull(domainBlacklistFilter.filter("http://www.foobar.be";));
+    Assert.assertNotNull(domainBlacklistFilter.filter("http://www.adobe.com";));
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/nutch/blob/20d28406/nutch-plugins/urlfilter-domainblacklist/src/test/org/apache/nutch/urlfilter/domainblacklist/TestDomainBlacklistURLFilter.java
----------------------------------------------------------------------
diff --git 
a/nutch-plugins/urlfilter-domainblacklist/src/test/org/apache/nutch/urlfilter/domainblacklist/TestDomainBlacklistURLFilter.java
 
b/nutch-plugins/urlfilter-domainblacklist/src/test/org/apache/nutch/urlfilter/domainblacklist/TestDomainBlacklistURLFilter.java
deleted file mode 100644
index d253867..0000000
--- 
a/nutch-plugins/urlfilter-domainblacklist/src/test/org/apache/nutch/urlfilter/domainblacklist/TestDomainBlacklistURLFilter.java
+++ /dev/null
@@ -1,49 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.nutch.urlfilter.domainblacklist;
-
-import org.junit.Assert;
-import org.junit.Test;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.nutch.util.NutchConfiguration;
-
-public class TestDomainBlacklistURLFilter {
-
-  private final static String SEPARATOR = System.getProperty("file.separator");
-  private final static String SAMPLES = System.getProperty("test.data", ".");
-
-  @Test
-  public void testFilter() throws Exception {
-
-    String domainBlacklistFile = SAMPLES + SEPARATOR + "hosts.txt";
-    Configuration conf = NutchConfiguration.create();
-    DomainBlacklistURLFilter domainBlacklistFilter = new 
DomainBlacklistURLFilter(
-        domainBlacklistFile);
-    domainBlacklistFilter.setConf(conf);
-    
Assert.assertNull(domainBlacklistFilter.filter("http://lucene.apache.org";));
-    
Assert.assertNull(domainBlacklistFilter.filter("http://hadoop.apache.org";));
-    Assert.assertNull(domainBlacklistFilter.filter("http://www.apache.org";));
-    
Assert.assertNotNull(domainBlacklistFilter.filter("http://www.google.com";));
-    
Assert.assertNotNull(domainBlacklistFilter.filter("http://mail.yahoo.com";));
-    Assert.assertNull(domainBlacklistFilter.filter("http://www.foobar.net";));
-    Assert.assertNull(domainBlacklistFilter.filter("http://www.foobas.net";));
-    Assert.assertNull(domainBlacklistFilter.filter("http://www.yahoo.com";));
-    Assert.assertNull(domainBlacklistFilter.filter("http://www.foobar.be";));
-    Assert.assertNotNull(domainBlacklistFilter.filter("http://www.adobe.com";));
-  }
-
-}

http://git-wip-us.apache.org/repos/asf/nutch/blob/20d28406/nutch-plugins/urlfilter-prefix/src/test/java/org/apache/nutch/urlfilter/prefix/TestPrefixURLFilter.java
----------------------------------------------------------------------
diff --git 
a/nutch-plugins/urlfilter-prefix/src/test/java/org/apache/nutch/urlfilter/prefix/TestPrefixURLFilter.java
 
b/nutch-plugins/urlfilter-prefix/src/test/java/org/apache/nutch/urlfilter/prefix/TestPrefixURLFilter.java
new file mode 100644
index 0000000..b7a7ce4
--- /dev/null
+++ 
b/nutch-plugins/urlfilter-prefix/src/test/java/org/apache/nutch/urlfilter/prefix/TestPrefixURLFilter.java
@@ -0,0 +1,79 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.nutch.urlfilter.prefix;
+
+import junit.framework.Test;
+import junit.framework.TestCase;
+import junit.framework.TestSuite;
+import junit.textui.TestRunner;
+
+import java.io.IOException;
+
+
+/**
+ * JUnit test for <code>PrefixURLFilter</code>.
+ *
+ * @author Talat Uyarer
+ * @author Cihad Guzel
+ */
+public class TestPrefixURLFilter extends TestCase {
+  private static final String prefixes =
+    "# this is a comment\n" +
+    "\n" +
+    "http://\n"; +
+    "https://\n"; +
+    "file://\n" +
+    "ftp://\n";;
+
+  private static final String[] urls = new String[] {
+    "http://www.example.com/";,
+    "https://www.example.com/";,
+    "ftp://www.example.com/";,
+    "file://www.example.com/",
+    "abcd://www.example.com/",
+    "www.example.com/",
+  };
+
+  private static String[] urlsModeAccept = new String[] {
+    urls[0],
+    urls[1],
+    urls[2],
+    urls[3],
+    null,
+    null
+  };
+
+  private PrefixURLFilter filter = null;
+
+  public static Test suite() {
+    return new TestSuite(TestPrefixURLFilter.class);
+  }
+
+  public static void main(String[] args) {
+    TestRunner.run(suite());
+  }
+
+  public void setUp() throws IOException {
+    filter = new PrefixURLFilter(prefixes);
+  }
+
+  public void testModeAccept() {
+    for (int i = 0; i < urls.length; i++) {
+      assertTrue(urlsModeAccept[i] == filter.filter(urls[i]));
+    }
+  }
+}

http://git-wip-us.apache.org/repos/asf/nutch/blob/20d28406/nutch-plugins/urlfilter-prefix/src/test/org/apache/nutch/urlfilter/prefix/TestPrefixURLFilter.java
----------------------------------------------------------------------
diff --git 
a/nutch-plugins/urlfilter-prefix/src/test/org/apache/nutch/urlfilter/prefix/TestPrefixURLFilter.java
 
b/nutch-plugins/urlfilter-prefix/src/test/org/apache/nutch/urlfilter/prefix/TestPrefixURLFilter.java
deleted file mode 100644
index b7a7ce4..0000000
--- 
a/nutch-plugins/urlfilter-prefix/src/test/org/apache/nutch/urlfilter/prefix/TestPrefixURLFilter.java
+++ /dev/null
@@ -1,79 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.nutch.urlfilter.prefix;
-
-import junit.framework.Test;
-import junit.framework.TestCase;
-import junit.framework.TestSuite;
-import junit.textui.TestRunner;
-
-import java.io.IOException;
-
-
-/**
- * JUnit test for <code>PrefixURLFilter</code>.
- *
- * @author Talat Uyarer
- * @author Cihad Guzel
- */
-public class TestPrefixURLFilter extends TestCase {
-  private static final String prefixes =
-    "# this is a comment\n" +
-    "\n" +
-    "http://\n"; +
-    "https://\n"; +
-    "file://\n" +
-    "ftp://\n";;
-
-  private static final String[] urls = new String[] {
-    "http://www.example.com/";,
-    "https://www.example.com/";,
-    "ftp://www.example.com/";,
-    "file://www.example.com/",
-    "abcd://www.example.com/",
-    "www.example.com/",
-  };
-
-  private static String[] urlsModeAccept = new String[] {
-    urls[0],
-    urls[1],
-    urls[2],
-    urls[3],
-    null,
-    null
-  };
-
-  private PrefixURLFilter filter = null;
-
-  public static Test suite() {
-    return new TestSuite(TestPrefixURLFilter.class);
-  }
-
-  public static void main(String[] args) {
-    TestRunner.run(suite());
-  }
-
-  public void setUp() throws IOException {
-    filter = new PrefixURLFilter(prefixes);
-  }
-
-  public void testModeAccept() {
-    for (int i = 0; i < urls.length; i++) {
-      assertTrue(urlsModeAccept[i] == filter.filter(urls[i]));
-    }
-  }
-}

http://git-wip-us.apache.org/repos/asf/nutch/blob/20d28406/nutch-plugins/urlfilter-regex/src/test/java/org/apache/nutch/urlfilter/regex/TestRegexURLFilter.java
----------------------------------------------------------------------
diff --git 
a/nutch-plugins/urlfilter-regex/src/test/java/org/apache/nutch/urlfilter/regex/TestRegexURLFilter.java
 
b/nutch-plugins/urlfilter-regex/src/test/java/org/apache/nutch/urlfilter/regex/TestRegexURLFilter.java
new file mode 100644
index 0000000..b86181e
--- /dev/null
+++ 
b/nutch-plugins/urlfilter-regex/src/test/java/org/apache/nutch/urlfilter/regex/TestRegexURLFilter.java
@@ -0,0 +1,61 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.nutch.urlfilter.regex;
+
+// JDK imports
+import java.io.IOException;
+import java.io.Reader;
+
+import org.apache.nutch.net.*;
+// Nutch imports
+import org.apache.nutch.urlfilter.api.RegexURLFilterBaseTest;
+import org.junit.Assert;
+import org.junit.Test;
+
+/**
+ * JUnit based test of class <code>RegexURLFilter</code>.
+ * 
+ * @author J&eacute;r&ocirc;me Charron
+ */
+public class TestRegexURLFilter extends RegexURLFilterBaseTest {
+
+  protected URLFilter getURLFilter(Reader rules) {
+    try {
+      return new RegexURLFilter(rules);
+    } catch (IOException e) {
+      Assert.fail(e.toString());
+      return null;
+    }
+  }
+
+  @Test
+  public void test() {
+    test("WholeWebCrawling");
+    test("IntranetCrawling");
+    bench(50, "Benchmarks");
+    bench(100, "Benchmarks");
+    bench(200, "Benchmarks");
+    bench(400, "Benchmarks");
+    bench(800, "Benchmarks");
+  }
+  
+  @Test
+  public void test1838() {
+    test("nutch1838");
+  }
+
+}

Reply via email to