Author: kwright
Date: Tue Sep 16 17:12:31 2014
New Revision: 1625337
URL: http://svn.apache.org/r1625337
Log:
Add robots evaluation tests
Added:
manifoldcf/trunk/connectors/webcrawler/connector/src/test/java/org/apache/manifoldcf/crawler/connectors/webcrawler/RobotsTest.java
(with props)
manifoldcf/trunk/connectors/webcrawler/connector/src/test/java/org/apache/manifoldcf/crawler/connectors/webcrawler/URLTest.java
- copied, changed from r1624603,
manifoldcf/trunk/connectors/webcrawler/connector/src/test/java/org/apache/manifoldcf/crawler/connectors/webcrawler/tests/URLTest.java
Removed:
manifoldcf/trunk/connectors/webcrawler/connector/src/test/java/org/apache/manifoldcf/crawler/connectors/webcrawler/tests/URLTest.java
Added:
manifoldcf/trunk/connectors/webcrawler/connector/src/test/java/org/apache/manifoldcf/crawler/connectors/webcrawler/RobotsTest.java
URL:
http://svn.apache.org/viewvc/manifoldcf/trunk/connectors/webcrawler/connector/src/test/java/org/apache/manifoldcf/crawler/connectors/webcrawler/RobotsTest.java?rev=1625337&view=auto
==============================================================================
---
manifoldcf/trunk/connectors/webcrawler/connector/src/test/java/org/apache/manifoldcf/crawler/connectors/webcrawler/RobotsTest.java
(added)
+++
manifoldcf/trunk/connectors/webcrawler/connector/src/test/java/org/apache/manifoldcf/crawler/connectors/webcrawler/RobotsTest.java
Tue Sep 16 17:12:31 2014
@@ -0,0 +1,56 @@
+/* $Id$ */
+
+/**
+* Licensed to the Apache Software Foundation (ASF) under one or more
+* contributor license agreements. See the NOTICE file distributed with
+* this work for additional information regarding copyright ownership.
+* The ASF licenses this file to You under the Apache License, Version 2.0
+* (the "License"); you may not use this file except in compliance with
+* the License. You may obtain a copy of the License at
+*
+* http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
+package org.apache.manifoldcf.crawler.connectors.webcrawler;
+
+import org.apache.manifoldcf.crawler.connectors.webcrawler.RobotsManager;
+import org.junit.*;
+import static org.junit.Assert.*;
+
+public class RobotsTest
+{
+
+ @Test
+ public void doesPathMatch()
+ throws Exception
+ {
+ // This test assesses the functionality of doesPathMatch()
+
assertTrue(RobotsManager.doesPathMatch("/folder/doc1.pdf","/folder/doc1.pdf"));
+ assertTrue(RobotsManager.doesPathMatch("/folder/doc1.pdf","/folder/*"));
+ assertTrue(RobotsManager.doesPathMatch("/folder/doc1.pdf","/"));
+ assertTrue(RobotsManager.doesPathMatch("/folder/doc1.pdf","/folder/"));
+
assertFalse(RobotsManager.doesPathMatch("/folder/doc1.pdf","folder/doc1.pdf"));
+ }
+
+ @Test
+ public void testRecord()
+ throws Exception
+ {
+ // Assess whether the Record class is doing the right thing
+ RobotsManager.Record record = new RobotsManager.Record();
+ record.addAgent("*");
+ record.addDisallow("/");
+ record.addAllow("folder/doc1.pdf");
+ record.addAllow("folder/doc2.pdf");
+ record.addAllow("folder/doc3.pdf");
+ assertTrue(record.isAgentMatch("*",true));
+ assertTrue(record.isDisallowed("/folder/doc1.pdf"));
+ assertFalse(record.isAllowed("/folder/doc1.pdf"));
+ }
+
+}
Propchange:
manifoldcf/trunk/connectors/webcrawler/connector/src/test/java/org/apache/manifoldcf/crawler/connectors/webcrawler/RobotsTest.java
------------------------------------------------------------------------------
svn:eol-style = native
Propchange:
manifoldcf/trunk/connectors/webcrawler/connector/src/test/java/org/apache/manifoldcf/crawler/connectors/webcrawler/RobotsTest.java
------------------------------------------------------------------------------
svn:keywords = Id
Copied:
manifoldcf/trunk/connectors/webcrawler/connector/src/test/java/org/apache/manifoldcf/crawler/connectors/webcrawler/URLTest.java
(from r1624603,
manifoldcf/trunk/connectors/webcrawler/connector/src/test/java/org/apache/manifoldcf/crawler/connectors/webcrawler/tests/URLTest.java)
URL:
http://svn.apache.org/viewvc/manifoldcf/trunk/connectors/webcrawler/connector/src/test/java/org/apache/manifoldcf/crawler/connectors/webcrawler/URLTest.java?p2=manifoldcf/trunk/connectors/webcrawler/connector/src/test/java/org/apache/manifoldcf/crawler/connectors/webcrawler/URLTest.java&p1=manifoldcf/trunk/connectors/webcrawler/connector/src/test/java/org/apache/manifoldcf/crawler/connectors/webcrawler/tests/URLTest.java&r1=1624603&r2=1625337&rev=1625337&view=diff
==============================================================================
---
manifoldcf/trunk/connectors/webcrawler/connector/src/test/java/org/apache/manifoldcf/crawler/connectors/webcrawler/tests/URLTest.java
(original)
+++
manifoldcf/trunk/connectors/webcrawler/connector/src/test/java/org/apache/manifoldcf/crawler/connectors/webcrawler/URLTest.java
Tue Sep 16 17:12:31 2014
@@ -16,7 +16,7 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-package org.apache.manifoldcf.crawler.connectors.webcrawler.tests;
+package org.apache.manifoldcf.crawler.connectors.webcrawler;
import org.apache.manifoldcf.crawler.connectors.webcrawler.WebURL;
import org.junit.*;