Author: lewismc
Date: Tue Jan 8 03:56:13 2013
New Revision: 1430135
URL: http://svn.apache.org/viewvc?rev=1430135&view=rev
Log:
NUTCH-1127 JUnit test for urlfilter-validator
Added:
nutch/trunk/src/plugin/urlfilter-validator/src/test/
nutch/trunk/src/plugin/urlfilter-validator/src/test/org/
nutch/trunk/src/plugin/urlfilter-validator/src/test/org/apache/
nutch/trunk/src/plugin/urlfilter-validator/src/test/org/apache/nutch/
nutch/trunk/src/plugin/urlfilter-validator/src/test/org/apache/nutch/urlfilter/
nutch/trunk/src/plugin/urlfilter-validator/src/test/org/apache/nutch/urlfilter/validator/
nutch/trunk/src/plugin/urlfilter-validator/src/test/org/apache/nutch/urlfilter/validator/TestUrlValidator.java
Modified:
nutch/trunk/CHANGES.txt
nutch/trunk/src/plugin/build.xml
Modified: nutch/trunk/CHANGES.txt
URL:
http://svn.apache.org/viewvc/nutch/trunk/CHANGES.txt?rev=1430135&r1=1430134&r2=1430135&view=diff
==============================================================================
--- nutch/trunk/CHANGES.txt (original)
+++ nutch/trunk/CHANGES.txt Tue Jan 8 03:56:13 2013
@@ -2,6 +2,8 @@ Nutch Change Log
(trunk): Current Development
+* NUTCH-1127 JUnit test for urlfilter-validator (Tejas Patil via lewismc)
+
* NUTCH-1119 JUnit test for index-static (Tejas Patil via lewismc)
* NUTCH-1510 Upgrade to Hadoop 1.1.1 (markus)
Modified: nutch/trunk/src/plugin/build.xml
URL:
http://svn.apache.org/viewvc/nutch/trunk/src/plugin/build.xml?rev=1430135&r1=1430134&r2=1430135&view=diff
==============================================================================
--- nutch/trunk/src/plugin/build.xml (original)
+++ nutch/trunk/src/plugin/build.xml Tue Jan 8 03:56:13 2013
@@ -98,6 +98,7 @@
<ant dir="urlfilter-domainblacklist" target="test"/>
<ant dir="urlfilter-regex" target="test"/>
<ant dir="urlfilter-suffix" target="test"/>
+ <ant dir="urlfilter-validator" target="test"/>
<ant dir="urlnormalizer-basic" target="test"/>
<ant dir="urlnormalizer-host" target="test"/>
<ant dir="urlnormalizer-pass" target="test"/>
Added:
nutch/trunk/src/plugin/urlfilter-validator/src/test/org/apache/nutch/urlfilter/validator/TestUrlValidator.java
URL:
http://svn.apache.org/viewvc/nutch/trunk/src/plugin/urlfilter-validator/src/test/org/apache/nutch/urlfilter/validator/TestUrlValidator.java?rev=1430135&view=auto
==============================================================================
---
nutch/trunk/src/plugin/urlfilter-validator/src/test/org/apache/nutch/urlfilter/validator/TestUrlValidator.java
(added)
+++
nutch/trunk/src/plugin/urlfilter-validator/src/test/org/apache/nutch/urlfilter/validator/TestUrlValidator.java
Tue Jan 8 03:56:13 2013
@@ -0,0 +1,58 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.nutch.urlfilter.validator;
+
+import org.apache.nutch.urlfilter.validator.UrlValidator;
+import junit.framework.TestCase;
+
+/**
+ * JUnit test case which tests
+ * 1. that valid urls are not filtered while invalid ones are filtered.
+ * 2. that Urls' scheme, authority, path and query are validated.
+ *
+ * @author tejasp
+ *
+ */
+
+public class TestUrlValidator extends TestCase {
+
+ /**
+ * Test method for {@link
org.apache.nutch.urlfilter.validator.UrlValidator#filter(java.lang.String)}.
+ */
+ public void testFilter() {
+ UrlValidator url_validator = new UrlValidator();
+ assertNotNull(url_validator);
+
+ assertNull("Filtering on a null object should return null",
url_validator.filter(null));
+ assertNull("Invalid url: example.com/file[/].html",
url_validator.filter("example.com/file[/].html"));
+ assertNull("Invalid url: http://www.example.com/space here.html",
url_validator.filter("http://www.example.com/space here.html"));
+ assertNull("Invalid url: /main.html", url_validator.filter("/main.html"));
+ assertNull("Invalid url: www.example.com/main.html",
url_validator.filter("www.example.com/main.html"));
+ assertNull("Invalid url: ftp:www.example.com/main.html",
url_validator.filter("ftp:www.example.com/main.html"));
+ assertNull("Inalid url: http://999.000.456.32/nutch/trunk/README.txt",
+ url_validator.filter("http://999.000.456.32/nutch/trunk/README.txt"));
+ assertNull("Invalid url: http://www.example.com/ma|in\\toc.html",
url_validator.filter(" http://www.example.com/ma|in\\toc.html"));
+
+ assertNotNull("Valid url: https://issues.apache.org/jira/NUTCH-1127",
url_validator.filter("https://issues.apache.org/jira/NUTCH-1127"));
+ assertNotNull("Valid url:
http://domain.tld/function.cgi?url=http://fonzi.com/&name=Fonzi&mood=happy&coat=leather",
+
url_validator.filter("http://domain.tld/function.cgi?url=http://fonzi.com/&name=Fonzi&mood=happy&coat=leather"));
+ assertNotNull("Valid url:
http://validator.w3.org/feed/check.cgi?url=http%3A%2F%2Ffeeds.feedburner.com%2Fperishablepress",
+
url_validator.filter("http://validator.w3.org/feed/check.cgi?url=http%3A%2F%2Ffeeds.feedburner.com%2Fperishablepress"));
+ assertNotNull("Valid url: ftp://alfa.bravo.pi/foo/bar/plan.pdf",
url_validator.filter("ftp://alfa.bravo.pi/mike/check/plan.pdf"));
+
+ }
+}