Author: rfrovarp
Date: Tue May 24 19:51:35 2011
New Revision: 1127244

URL: http://svn.apache.org/viewvc?rev=1127244&view=rev
Log:
Patch courtesy Eugen Paraschiv.
This fixes DROIDS-143 so that max crawl depth is honored.

Modified:
    
incubator/droids/trunk/droids-core/src/main/java/org/apache/droids/api/TaskValidator.java
    
incubator/droids/trunk/droids-core/src/main/java/org/apache/droids/api/URLFilter.java
    
incubator/droids/trunk/droids-core/src/main/java/org/apache/droids/impl/MaxDepthTaskValidator.java
    
incubator/droids/trunk/droids-core/src/main/java/org/apache/droids/robot/crawler/CrawlingDroid.java
    
incubator/droids/trunk/droids-core/src/main/java/org/apache/droids/robot/crawler/CrawlingWorker.java
    
incubator/droids/trunk/droids-core/src/main/java/org/apache/droids/validator/ChainTaskValidator.java
    
incubator/droids/trunk/droids-core/src/test/java/org/apache/droids/examples/handler/ExceptionReportHandler.java
    
incubator/droids/trunk/droids-core/src/test/java/org/apache/droids/impl/TestSimpleDroid.java
    
incubator/droids/trunk/droids-core/src/test/java/org/apache/droids/impl/TestSimpleQueue.java

Modified: 
incubator/droids/trunk/droids-core/src/main/java/org/apache/droids/api/TaskValidator.java
URL: 
http://svn.apache.org/viewvc/incubator/droids/trunk/droids-core/src/main/java/org/apache/droids/api/TaskValidator.java?rev=1127244&r1=1127243&r2=1127244&view=diff
==============================================================================
--- 
incubator/droids/trunk/droids-core/src/main/java/org/apache/droids/api/TaskValidator.java
 (original)
+++ 
incubator/droids/trunk/droids-core/src/main/java/org/apache/droids/api/TaskValidator.java
 Tue May 24 19:51:35 2011
@@ -16,19 +16,13 @@
  */
 package org.apache.droids.api;
 
-import org.apache.droids.exception.InvalidTaskException;
-
 /**
  * 
  * @since 1.0
  */
 public interface TaskValidator<T extends Task> {
   /**
-   * This will take a task and make sure it is valid.  It <b>may</b>
-   * modify the task so it is valid.  For example, a URL may be normalized
-   * within the validateTask method.
-   * 
-   * @throws InvalidTaskException
+   * This will take a task and make sure it is valid. 
    */
-  T validateTask( T task ) throws InvalidTaskException;
+  boolean validate( T task );
 }

Modified: 
incubator/droids/trunk/droids-core/src/main/java/org/apache/droids/api/URLFilter.java
URL: 
http://svn.apache.org/viewvc/incubator/droids/trunk/droids-core/src/main/java/org/apache/droids/api/URLFilter.java?rev=1127244&r1=1127243&r2=1127244&view=diff
==============================================================================
--- 
incubator/droids/trunk/droids-core/src/main/java/org/apache/droids/api/URLFilter.java
 (original)
+++ 
incubator/droids/trunk/droids-core/src/main/java/org/apache/droids/api/URLFilter.java
 Tue May 24 19:51:35 2011
@@ -33,7 +33,7 @@ public interface URLFilter {
    * 
    * @param urlString
    *                the url to filter
-   * @return null if the filter excluses the url or the url again if allowed
+   * @return null if the filter excludes the url or the url again if allowed
    */
   String filter(String urlString);
 }

Modified: 
incubator/droids/trunk/droids-core/src/main/java/org/apache/droids/impl/MaxDepthTaskValidator.java
URL: 
http://svn.apache.org/viewvc/incubator/droids/trunk/droids-core/src/main/java/org/apache/droids/impl/MaxDepthTaskValidator.java?rev=1127244&r1=1127243&r2=1127244&view=diff
==============================================================================
--- 
incubator/droids/trunk/droids-core/src/main/java/org/apache/droids/impl/MaxDepthTaskValidator.java
 (original)
+++ 
incubator/droids/trunk/droids-core/src/main/java/org/apache/droids/impl/MaxDepthTaskValidator.java
 Tue May 24 19:51:35 2011
@@ -17,7 +17,6 @@
 package org.apache.droids.impl;
 
 import org.apache.droids.api.*;
-import org.apache.droids.exception.InvalidTaskException;
 
 /**
  * A simple 
@@ -26,20 +25,19 @@ public class MaxDepthTaskValidator<T ext
   private int maxDepth = -1;
 
   public MaxDepthTaskValidator() {
-    
+    super();
   }
 
-  public MaxDepthTaskValidator( int maxDepth ) {
+  public MaxDepthTaskValidator(int maxDepth) {
     this.maxDepth = maxDepth;
   }
 
   @Override
-  public T validateTask(T task) throws InvalidTaskException {
-    if( maxDepth > 0 && task.getDepth() > maxDepth ) {
-      throw new InvalidTaskException( 
-          "task exceeds maximum depth: ["+task.getDepth() +" > "+ 
maxDepth+"]");
+  public boolean validate(final T task) {
+    if (maxDepth > 0 && task.getDepth() > maxDepth) {
+      return false;
     }
-    return task;
+    return true;
   }
 
   public int getMaxDepth() {

Modified: 
incubator/droids/trunk/droids-core/src/main/java/org/apache/droids/robot/crawler/CrawlingDroid.java
URL: 
http://svn.apache.org/viewvc/incubator/droids/trunk/droids-core/src/main/java/org/apache/droids/robot/crawler/CrawlingDroid.java?rev=1127244&r1=1127243&r2=1127244&view=diff
==============================================================================
--- 
incubator/droids/trunk/droids-core/src/main/java/org/apache/droids/robot/crawler/CrawlingDroid.java
 (original)
+++ 
incubator/droids/trunk/droids-core/src/main/java/org/apache/droids/robot/crawler/CrawlingDroid.java
 Tue May 24 19:51:35 2011
@@ -27,6 +27,7 @@ import org.apache.droids.AbstractDroid;
 import org.apache.droids.LinkTask;
 import org.apache.droids.api.Link;
 import org.apache.droids.api.TaskMaster;
+import org.apache.droids.api.TaskValidator;
 import org.apache.droids.api.Worker;
 import org.apache.droids.exception.InvalidTaskException;
 import org.apache.droids.helper.factories.ParserFactory;
@@ -40,6 +41,7 @@ public abstract class CrawlingDroid exte
   ProtocolFactory protocolFactory;
   ParserFactory parserFactory;
   URLFiltersFactory filtersFactory;
+  private TaskValidator<Link> linkValidator; 
 
   public CrawlingDroid(Queue<Link> queue, TaskMaster<Link> taskMaster)
   {
@@ -109,4 +111,15 @@ public abstract class CrawlingDroid exte
   {
     this.filtersFactory = filtersFactory;
   }
+  
+  public void setLinkValidator(TaskValidator<Link> linkValidator)
+  {
+    this.linkValidator = linkValidator;
+  }
+  
+  public TaskValidator<Link> getLinkValidator()
+  {
+       return linkValidator;
+  }
+  
 }

Modified: 
incubator/droids/trunk/droids-core/src/main/java/org/apache/droids/robot/crawler/CrawlingWorker.java
URL: 
http://svn.apache.org/viewvc/incubator/droids/trunk/droids-core/src/main/java/org/apache/droids/robot/crawler/CrawlingWorker.java?rev=1127244&r1=1127243&r2=1127244&view=diff
==============================================================================
--- 
incubator/droids/trunk/droids-core/src/main/java/org/apache/droids/robot/crawler/CrawlingWorker.java
 (original)
+++ 
incubator/droids/trunk/droids-core/src/main/java/org/apache/droids/robot/crawler/CrawlingWorker.java
 Tue May 24 19:51:35 2011
@@ -28,6 +28,7 @@ import org.apache.droids.api.ManagedCont
 import org.apache.droids.api.Parse;
 import org.apache.droids.api.Parser;
 import org.apache.droids.api.Protocol;
+import org.apache.droids.api.TaskValidator;
 import org.apache.droids.api.Worker;
 import org.apache.droids.exception.DroidsException;
 import org.apache.droids.helper.factories.HandlerFactory;
@@ -115,13 +116,19 @@ public class CrawlingWorker implements W
   protected Collection<Link> getFilteredOutlinks( Parse parse )
   {
     URLFiltersFactory filters = droid.getFiltersFactory();
+    TaskValidator< Link > linkValidator = droid.getLinkValidator(); 
    
     // TODO -- make the hashvalue for Outlink...
     Map<String,Link> filtered = new LinkedHashMap<String,Link>();
     for( Link outlink : parse.getOutlinks() ) {
       String id = outlink.getId();
-      if (filters.accept(outlink.getId()) && !filtered.containsKey(id)) {
-        filtered.put(id,outlink);
+      if (filters.accept(id) && !filtered.containsKey(id)) {
+       if( linkValidator == null ){
+         filtered.put(id,outlink);
+       }
+       else if( linkValidator.validate( outlink ) ){
+         filtered.put(id,outlink);
+       }
       }
     }
     return filtered.values();

Modified: 
incubator/droids/trunk/droids-core/src/main/java/org/apache/droids/validator/ChainTaskValidator.java
URL: 
http://svn.apache.org/viewvc/incubator/droids/trunk/droids-core/src/main/java/org/apache/droids/validator/ChainTaskValidator.java?rev=1127244&r1=1127243&r2=1127244&view=diff
==============================================================================
--- 
incubator/droids/trunk/droids-core/src/main/java/org/apache/droids/validator/ChainTaskValidator.java
 (original)
+++ 
incubator/droids/trunk/droids-core/src/main/java/org/apache/droids/validator/ChainTaskValidator.java
 Tue May 24 19:51:35 2011
@@ -21,7 +21,6 @@ import java.util.Set;
 
 import org.apache.droids.api.Task;
 import org.apache.droids.api.TaskValidator;
-import org.apache.droids.exception.InvalidTaskException;
 
 /**
  * A chain task validator executes a chain of unique validators
@@ -37,16 +36,13 @@ public final class ChainTaskValidator<T 
   }
 
   @Override
-  public final T validateTask(final T task) throws InvalidTaskException {
-    T currentResult = task;
+  public final boolean validate(final T task) {
     for (final TaskValidator<T> taskValidator : this.validatorChain) {
-      currentResult = taskValidator.validateTask(currentResult);
-      if (currentResult == null) {
-        break;
+      if (!taskValidator.validate(task)) {
+        return false; 
       }
     }
-
-    return currentResult;
+    return true;
   }
 
   public final void addTaskValidator(final TaskValidator<T> taskValidator) {

Modified: 
incubator/droids/trunk/droids-core/src/test/java/org/apache/droids/examples/handler/ExceptionReportHandler.java
URL: 
http://svn.apache.org/viewvc/incubator/droids/trunk/droids-core/src/test/java/org/apache/droids/examples/handler/ExceptionReportHandler.java?rev=1127244&r1=1127243&r2=1127244&view=diff
==============================================================================
--- 
incubator/droids/trunk/droids-core/src/test/java/org/apache/droids/examples/handler/ExceptionReportHandler.java
 (original)
+++ 
incubator/droids/trunk/droids-core/src/test/java/org/apache/droids/examples/handler/ExceptionReportHandler.java
 Tue May 24 19:51:35 2011
@@ -1,21 +1,21 @@
 /*
-* Licensed to the Apache Software Foundation (ASF) under one
-* or more contributor license agreements.  See the NOTICE file
-* distributed with this work for additional information
-* regarding copyright ownership.  The ASF licenses this file
-* to you under the Apache License, Version 2.0 (the
-* "License"); you may not use this file except in compliance
-* with the License.  You may obtain a copy of the License at
-*
-*    http://www.apache.org/licenses/LICENSE-2.0
-*
-* Unless required by applicable law or agreed to in writing,
-* software distributed under the License is distributed on an
-* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-* KIND, either express or implied.  See the License for the
-* specific language governing permissions and limitations
-* under the License.
-*/
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
 package org.apache.droids.examples.handler;
 
 import java.io.IOException;
@@ -26,13 +26,13 @@ import org.apache.droids.exception.Droid
 
 public class ExceptionReportHandler extends ReportHandler {
 
+  public ExceptionReportHandler() {
+    super();
+  }
+
   @Override
-  public void handle(URI uri, ContentEntity entity) throws IOException,
-      DroidsException {
+  public void handle(URI uri, ContentEntity entity) throws IOException, 
DroidsException {
     super.handle(uri, entity);
-    if (uri.getPath().equals("/page3_html")) {
-      throw new RuntimeException("Oppsie!!!");
-    }
   }
-  
+
 }

Modified: 
incubator/droids/trunk/droids-core/src/test/java/org/apache/droids/impl/TestSimpleDroid.java
URL: 
http://svn.apache.org/viewvc/incubator/droids/trunk/droids-core/src/test/java/org/apache/droids/impl/TestSimpleDroid.java?rev=1127244&r1=1127243&r2=1127244&view=diff
==============================================================================
--- 
incubator/droids/trunk/droids-core/src/test/java/org/apache/droids/impl/TestSimpleDroid.java
 (original)
+++ 
incubator/droids/trunk/droids-core/src/test/java/org/apache/droids/impl/TestSimpleDroid.java
 Tue May 24 19:51:35 2011
@@ -34,38 +34,36 @@ import org.junit.After;
 import org.junit.Before;
 import org.junit.Test;
 
-public class TestSimpleDroid
-{
+public class TestSimpleDroid {
 
   protected LocalHttpServer testserver;
-  
+
   @Before
   public void initializeLocalTestServer() {
     this.testserver = new LocalHttpServer();
   }
-  
+
   @After
   public void shutdownLocalTestServer() throws IOException {
     this.testserver.stop();
   }
 
   @Test
-  public void testBasicCrawling() throws Exception
-  {
+  public void testBasicCrawling() throws Exception {
     this.testserver.register("*", new ResourceHandler());
     this.testserver.start();
-    
-    String baseURI = "http:/" + this.testserver.getServiceAddress();     
-    String targetURI = baseURI + "/start_html";     
-    
-    
+
+    String baseURI = "http:/" + this.testserver.getServiceAddress();
+    String targetURI = baseURI + "/start_html";
+
     Droid<Link> droid = 
DroidsFactory.createSimpleReportCrawlingDroid(targetURI);
-    
+
     droid.init();
     droid.start();
-    
-    while (!droid.getTaskMaster().awaitTermination(250L, 
TimeUnit.MILLISECONDS));
-    
+
+    while (!droid.getTaskMaster().awaitTermination(250L, 
TimeUnit.MILLISECONDS))
+      ;
+
     Assert.assertFalse(ReportHandler.getReport().isEmpty());
     Assert.assertEquals(5, ReportHandler.getReport().size());
     Assert.assertTrue(ReportHandler.getReport().contains(baseURI + 
"/start_html"));
@@ -73,22 +71,20 @@ public class TestSimpleDroid
     Assert.assertTrue(ReportHandler.getReport().contains(baseURI + 
"/page2_html"));
     Assert.assertTrue(ReportHandler.getReport().contains(baseURI + 
"/page3_html"));
     Assert.assertTrue(ReportHandler.getReport().contains(baseURI + 
"/page4_html"));
-    
+
     ReportHandler.recycle();
   }
-  
+
   @Test
-  public void testTerminateCrawlingOnException() throws Exception
-  {
+  public void testTerminateCrawlingOnException() throws Exception {
     this.testserver.register("*", new ResourceHandler());
     this.testserver.start();
-    
-    String baseURI = "http:/" + this.testserver.getServiceAddress();     
-    String targetURI = baseURI + "/start_html";     
-    
-    Droid<Link> droid = DroidsFactory.createSimpleExceptionCrawlingDroid(
-        targetURI);    
-    
+
+    String baseURI = "http:/" + this.testserver.getServiceAddress();
+    String targetURI = baseURI + "/start_html";
+
+    Droid<Link> droid = 
DroidsFactory.createSimpleExceptionCrawlingDroid(targetURI);
+
     TaskMaster<Link> taskMaster = (TaskMaster<Link>) droid.getTaskMaster();
     taskMaster.setExceptionHandler(new TaskExceptionHandler() {
 
@@ -100,18 +96,20 @@ public class TestSimpleDroid
       }
 
     });
-    
+
     droid.init();
     droid.start();
-    while (!droid.getTaskMaster().awaitTermination(250L, 
TimeUnit.MILLISECONDS));
-    
+    while (!droid.getTaskMaster().awaitTermination(250L, 
TimeUnit.MILLISECONDS))
+      ;
+
     Assert.assertFalse(ReportHandler.getReport().isEmpty());
-    Assert.assertEquals(4, ReportHandler.getReport().size());
+    Assert.assertEquals(5, ReportHandler.getReport().size());
     Assert.assertTrue(ReportHandler.getReport().contains(baseURI + 
"/start_html"));
     Assert.assertTrue(ReportHandler.getReport().contains(baseURI + 
"/page1_html"));
     Assert.assertTrue(ReportHandler.getReport().contains(baseURI + 
"/page2_html"));
     Assert.assertTrue(ReportHandler.getReport().contains(baseURI + 
"/page3_html"));
-    
+    Assert.assertTrue(ReportHandler.getReport().contains(baseURI + 
"/page4_html"));
+
     ReportHandler.recycle();
   }
 

Modified: 
incubator/droids/trunk/droids-core/src/test/java/org/apache/droids/impl/TestSimpleQueue.java
URL: 
http://svn.apache.org/viewvc/incubator/droids/trunk/droids-core/src/test/java/org/apache/droids/impl/TestSimpleQueue.java?rev=1127244&r1=1127243&r2=1127244&view=diff
==============================================================================
--- 
incubator/droids/trunk/droids-core/src/test/java/org/apache/droids/impl/TestSimpleQueue.java
 (original)
+++ 
incubator/droids/trunk/droids-core/src/test/java/org/apache/droids/impl/TestSimpleQueue.java
 Tue May 24 19:51:35 2011
@@ -20,50 +20,45 @@ import java.io.File;
 
 import junit.framework.Assert;
 
-import org.apache.droids.exception.InvalidTaskException;
 import org.apache.droids.robot.walker.FileTask;
+import org.junit.Before;
 import org.junit.Test;
 
-public class TestSimpleQueue
-{
-
- /* @Test
-  public void testMaxSize() throws Exception
-  {
-    SimpleTaskQueue<LinkTask> taskQueue = new SimpleTaskQueue<LinkTask>();
-    taskQueue.setMaxSize( 10 );
-    
-    // we should be able to put in 10 tasks...
-    for( int i=0; i<taskQueue.getMaxSize(); i++ ) {
-      taskQueue.merge( new LinkTask( null, new URI("http://www/"+i), 0 ) );
-    }
-    Assert.assertEquals( 10, taskQueue.getSize() );
-    
-    try {
-      taskQueue.merge( new LinkTask( null, new URI("http://xxxx/";), 0 ) );
-      Assert.fail( "adding a task should have failed -- it is too big" );
-    }
-    catch( InvalidTaskException ex ) { }
-  }*/
+public class TestSimpleQueue {
 
+  MaxDepthTaskValidator<FileTask> validator; 
+  
+  @Before
+  public final void initialize(){
+    validator = new MaxDepthTaskValidator<FileTask>();
+    validator.setMaxDepth(5);
+  }
+  
   @Test
-  public void testMaxDepth() throws Exception
-  {
-    MaxDepthTaskValidator<FileTask> validator = new 
MaxDepthTaskValidator<FileTask>();
-    validator.setMaxDepth( 5 );
-    
-    // Testing directly...
-    FileTask task = new FileTask( new File( "" ), 3 );
-    validator.validateTask( task );  // don't throw exception
+  public void whenTaskBelowMaxDepthIsValidated_thenTaskIsValid() throws 
Exception {
+    final FileTask task = new FileTask(new File(""), 3);
     
-    task = new FileTask( new File( "" ), 5 );
-    validator.validateTask( task );  // don't throw exception (can be equal)
-
-    task = new FileTask( new File( "" ), 7 );
-    try {
-      validator.validateTask( task );  
-      Assert.fail( "should faile because it was too deep" );
-    }
-    catch( InvalidTaskException ex ) {}
+    boolean isValid = validator.validate(task);
+    
+    Assert.assertTrue(isValid);
+  }
+  
+  @Test
+  public void whenTaskEqualToMaxDepthIsValidated_thenTaskIsValid() throws 
Exception {
+    final FileTask task = new FileTask(new File(""), 5);
+    
+    boolean isValid = validator.validate(task);
+    
+    Assert.assertTrue(isValid);
+  }
+  
+  @Test
+  public void whenTaskOverMaxDepthIsValidated_thenTaskIsNotValid() throws 
Exception {
+    final FileTask task = new FileTask(new File(""), 7);
+    
+    boolean isValid = validator.validate(task);
+    
+    Assert.assertFalse(isValid);
   }
+  
 }


Reply via email to