Author: thorsten
Date: Mon Jan 14 07:55:34 2008
New Revision: 611834

URL: http://svn.apache.org/viewvc?rev=611834&view=rev
Log:
Wrapping up first working version based on spring. The API is mainly the same 
as before only small extension point specific code has been dropped.

Added:
    labs/droids/trunk/src/core/java/org/apache/droids/api/Handler.java   (with 
props)
    labs/droids/trunk/src/core/java/org/apache/droids/handle/
    labs/droids/trunk/src/core/java/org/apache/droids/handle/Save.java   (with 
props)
    labs/droids/trunk/src/core/java/org/apache/droids/handle/Sysout.java   
(with props)
    labs/droids/trunk/src/core/java/org/apache/droids/handle/WriterHandler.java 
  (with props)
    
labs/droids/trunk/src/core/java/org/apache/droids/helper/factories/HandlerFactory.java
   (with props)
Modified:
    labs/droids/trunk/src/core/java/org/apache/droids/Core.java
    labs/droids/trunk/src/core/java/org/apache/droids/DefaultCrawler.java
    labs/droids/trunk/src/core/java/org/apache/droids/DefaultWorker.java
    labs/droids/trunk/src/core/java/org/apache/droids/api/Parser.java
    labs/droids/trunk/src/core/java/org/apache/droids/api/Task.java
    labs/droids/trunk/src/core/java/org/apache/droids/api/Worker.java
    labs/droids/trunk/src/core/java/org/apache/droids/droids-core-context.xml
    
labs/droids/trunk/src/core/java/org/apache/droids/helper/factories/GenericFactory.java
    
labs/droids/trunk/src/core/java/org/apache/droids/helper/factories/URLFiltersFactory.java
    labs/droids/trunk/src/core/java/org/apache/droids/parse/Outlink.java
    labs/droids/trunk/src/core/java/org/apache/droids/parse/html/HtmlParser.java
    labs/droids/trunk/src/core/java/org/apache/droids/queue/QueueBean.java
    labs/droids/trunk/src/core/java/org/apache/droids/queue/QueueLink.java
    labs/droids/trunk/src/core/java/org/apache/droids/queue/Simple.java

Modified: labs/droids/trunk/src/core/java/org/apache/droids/Core.java
URL: 
http://svn.apache.org/viewvc/labs/droids/trunk/src/core/java/org/apache/droids/Core.java?rev=611834&r1=611833&r2=611834&view=diff
==============================================================================
--- labs/droids/trunk/src/core/java/org/apache/droids/Core.java (original)
+++ labs/droids/trunk/src/core/java/org/apache/droids/Core.java Mon Jan 14 
07:55:34 2008
@@ -18,6 +18,7 @@
 
 import org.apache.droids.api.Droid;
 import org.apache.droids.helper.factories.DroidFactory;
+import org.apache.droids.helper.factories.HandlerFactory;
 import org.apache.droids.helper.factories.ParserFactory;
 import org.apache.droids.helper.factories.ProtocolFactory;
 import org.apache.droids.helper.factories.URLFiltersFactory;
@@ -39,6 +40,8 @@
   private ProtocolFactory protocolFactory;
 
   private URLFiltersFactory filtersFactory;
+  
+  private HandlerFactory handlerFactory;
 
   public ProtocolFactory getProtocolFactory() {
     return protocolFactory;
@@ -73,6 +76,14 @@
 
   public void setFiltersFactory(URLFiltersFactory filtersFactory) {
     this.filtersFactory = filtersFactory;
+  }
+
+  public HandlerFactory getHandlerFactory() {
+    return handlerFactory;
+  }
+
+  public void setHandlerFactory(HandlerFactory handlerFactory) {
+    this.handlerFactory = handlerFactory;
   }
 
 

Modified: labs/droids/trunk/src/core/java/org/apache/droids/DefaultCrawler.java
URL: 
http://svn.apache.org/viewvc/labs/droids/trunk/src/core/java/org/apache/droids/DefaultCrawler.java?rev=611834&r1=611833&r2=611834&view=diff
==============================================================================
--- labs/droids/trunk/src/core/java/org/apache/droids/DefaultCrawler.java 
(original)
+++ labs/droids/trunk/src/core/java/org/apache/droids/DefaultCrawler.java Mon 
Jan 14 07:55:34 2008
@@ -38,6 +38,12 @@
   
   private ConcurrentHashMap<Integer,Worker> runningWorker;
 
+  private int x=0;
+  
+  private synchronized void increment(){
+    x++;
+  }
+
   public void run() {
     runningThreads = 0;
     taskDate = new SimpleDateFormat("yyyyMMddHHmmss").format(new Date(System
@@ -47,11 +53,14 @@
         new LinkedBlockingQueue<Runnable>());
     runningWorker=new ConcurrentHashMap<Integer,Worker>();
     initQueue();
-    int x = 0;
+    doWork(0);
+    Core.threadMessage("Finshed invocation, waiting for workers to finish.");
+  }
+
+  private void doWork(int i) {
     while (queue.hasNext()) {
-      x = startWorkers(x);
+      startWorkers(x);
     }
-    Core.threadMessage("Finshed invocation, waiting for workers to finish.");
   }
 
   private synchronized int startWorkers(int x) {
@@ -59,7 +68,7 @@
     worker.setId(x);
     runningWorker.put(x,worker);
     pool.execute(worker);
-    x++;
+    increment();
     try {
       Core.threadMessage("suspending");
       Thread.sleep(4000);
@@ -70,7 +79,7 @@
   }
 
   public synchronized void initQueue() {
-    QueueLink initialLink = new QueueLink(url, taskDate);
+    QueueLink initialLink = new QueueLink(url, taskDate,0);
     queue.init((Task[])new Task[] {initialLink});
   }
   public synchronized Worker getWorker() {
@@ -155,12 +164,16 @@
   }
   
   public synchronized void finishedWorker(int id) {
-    pool.remove(runningWorker.get(id));
+    Worker worker = runningWorker.get(id);
+    int y = worker.getDepth()+1;
+    pool.remove(worker);
     runningWorker.remove(id);
     Core.threadMessage("Worker \""+id+"\" has finished.");
     if (runningWorker.size()==0 & !queue.hasNext()){
       shutdownAndAwaitTermination();
       Core.threadMessage("All threads has finished.");
+    }else if(queue.hasNext()){
+      doWork(y);
     }
   }
 

Modified: labs/droids/trunk/src/core/java/org/apache/droids/DefaultWorker.java
URL: 
http://svn.apache.org/viewvc/labs/droids/trunk/src/core/java/org/apache/droids/DefaultWorker.java?rev=611834&r1=611833&r2=611834&view=diff
==============================================================================
--- labs/droids/trunk/src/core/java/org/apache/droids/DefaultWorker.java 
(original)
+++ labs/droids/trunk/src/core/java/org/apache/droids/DefaultWorker.java Mon 
Jan 14 07:55:34 2008
@@ -1,5 +1,7 @@
 package org.apache.droids;
 
+import java.io.IOException;
+import java.net.MalformedURLException;
 import java.net.URL;
 import java.util.ArrayList;
 
@@ -10,6 +12,7 @@
 import org.apache.droids.api.Queue;
 import org.apache.droids.api.Task;
 import org.apache.droids.api.Worker;
+import org.apache.droids.helper.factories.HandlerFactory;
 import org.apache.droids.helper.factories.ParserFactory;
 import org.apache.droids.helper.factories.ProtocolFactory;
 import org.apache.droids.helper.factories.URLFiltersFactory;
@@ -35,6 +38,10 @@
   private String uri;
 
   private URLFiltersFactory filtersFactory;
+
+  private HandlerFactory handlerFactory;
+
+  private int depth;
   
   public synchronized void run() {
     Core.threadMessage("Starting " + this.getClass().getCanonicalName());
@@ -47,8 +54,7 @@
       parser = parserFactory.getParser(contentType);
       // parse contains the outlinks and can be used later
       Parse parse = getParse();
-      // if no parser is found we do not extract links
-      
+      handle(parse);
       droid.finishedWorker(id);
     } catch (Exception e) {
       e.printStackTrace();
@@ -56,41 +62,51 @@
     
   }
 
+  private void handle(Parse parse) throws MalformedURLException, IOException {
+    if (null != parse)
+    handlerFactory.handle(protocol.openStream(uri), new URL(uri), parse);
+  }
+
   private Parse getParse() {
     Parse parse =null;
     if (null != parser) {
       try {
           // extract links
-          parse = parser.getParse(protocol.openStream(uri), new URL(uri));
+          parse = parser.getParse(protocol.openStream(uri), link);
           // all links from the page unfiltered
           filter(parse);
       } catch (Exception e) {
           Core.threadMessage(e.getMessage());
       }
   }
-    // TODO Auto-generated method stub
-    return null;
+    return parse;
   }
 
   private void filter(Parse parse) {
+    Outlink[] filterLinks = filterLinks(parse);
+    queue.merge(filterLinks);
+  }
+
+  private Outlink[] filterLinks(Parse parse) {
+ // filter the link 
     Outlink[] links = parse.getData().getOutlinks();
     // new cleaned list
     ArrayList<Outlink> filtered = new ArrayList<Outlink>();
     for (int i = 0; i < links.length; i++) {
         Outlink outlink = links[i];
-        String test = filtersFactory.filter(outlink.getToUrl());
-        if (null != test & !filtered.contains(outlink)) {
+        if (filtersFactory.accept(outlink.getToUrl()) & 
!filtered.contains(outlink)) {
             filtered.add(outlink);
         }
     }
     // this are the links we need to follow
     Outlink[] filterLinks = filtered.toArray(new Outlink[filtered.size()]);
-    queue.merge(filterLinks);
+    return filterLinks;
   }
 
   public void setQueue(Queue queue) {
     this.queue=queue;
     link = queue.next();
+    depth=link.getDepth();
   }
 
   public void setDroid(Droid droid) {
@@ -98,9 +114,18 @@
     protocolFactory = droid.getCore().getProtocolFactory();
     parserFactory=droid.getCore().getParserFactory();
     filtersFactory=droid.getCore().getFiltersFactory();
+    handlerFactory=droid.getCore().getHandlerFactory();
   }
 
   public void setId(int x) {
     id=x;
+  }
+
+  public int getDepth() {
+    return depth;
+  }
+
+  public void setDepth(int x) {
+    depth=x;
   }
 }

Added: labs/droids/trunk/src/core/java/org/apache/droids/api/Handler.java
URL: 
http://svn.apache.org/viewvc/labs/droids/trunk/src/core/java/org/apache/droids/api/Handler.java?rev=611834&view=auto
==============================================================================
--- labs/droids/trunk/src/core/java/org/apache/droids/api/Handler.java (added)
+++ labs/droids/trunk/src/core/java/org/apache/droids/api/Handler.java Mon Jan 
14 07:55:34 2008
@@ -0,0 +1,24 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.droids.api;
+
+import java.io.InputStream;
+import java.net.URL;
+
+public interface Handler{
+    public void handle(InputStream openStream, URL url, Parse parse) throws 
Exception;
+}

Propchange: labs/droids/trunk/src/core/java/org/apache/droids/api/Handler.java
------------------------------------------------------------------------------
    svn:eol-style = native

Modified: labs/droids/trunk/src/core/java/org/apache/droids/api/Parser.java
URL: 
http://svn.apache.org/viewvc/labs/droids/trunk/src/core/java/org/apache/droids/api/Parser.java?rev=611834&r1=611833&r2=611834&view=diff
==============================================================================
--- labs/droids/trunk/src/core/java/org/apache/droids/api/Parser.java (original)
+++ labs/droids/trunk/src/core/java/org/apache/droids/api/Parser.java Mon Jan 
14 07:55:34 2008
@@ -21,5 +21,5 @@
 
 public interface Parser{
         /** Creates the parse for some content. */
-        Parse getParse(InputStream stream, URL base);
+        Parse getParse(InputStream openStream, Task link);
 }

Modified: labs/droids/trunk/src/core/java/org/apache/droids/api/Task.java
URL: 
http://svn.apache.org/viewvc/labs/droids/trunk/src/core/java/org/apache/droids/api/Task.java?rev=611834&r1=611833&r2=611834&view=diff
==============================================================================
--- labs/droids/trunk/src/core/java/org/apache/droids/api/Task.java (original)
+++ labs/droids/trunk/src/core/java/org/apache/droids/api/Task.java Mon Jan 14 
07:55:34 2008
@@ -19,4 +19,6 @@
 public interface Task {
     public String getId();
     public String getTaskDate();
+    public int getDepth();
+    public void setDepth(int depth);
 }

Modified: labs/droids/trunk/src/core/java/org/apache/droids/api/Worker.java
URL: 
http://svn.apache.org/viewvc/labs/droids/trunk/src/core/java/org/apache/droids/api/Worker.java?rev=611834&r1=611833&r2=611834&view=diff
==============================================================================
--- labs/droids/trunk/src/core/java/org/apache/droids/api/Worker.java (original)
+++ labs/droids/trunk/src/core/java/org/apache/droids/api/Worker.java Mon Jan 
14 07:55:34 2008
@@ -5,4 +5,6 @@
   public abstract void setQueue(Queue queue);
   public abstract void setDroid(Droid droid);
   public abstract void setId(int x);
+  public abstract void setDepth(int x);
+  public abstract int getDepth();
 }

Modified: 
labs/droids/trunk/src/core/java/org/apache/droids/droids-core-context.xml
URL: 
http://svn.apache.org/viewvc/labs/droids/trunk/src/core/java/org/apache/droids/droids-core-context.xml?rev=611834&r1=611833&r2=611834&view=diff
==============================================================================
--- labs/droids/trunk/src/core/java/org/apache/droids/droids-core-context.xml 
(original)
+++ labs/droids/trunk/src/core/java/org/apache/droids/droids-core-context.xml 
Mon Jan 14 07:55:34 2008
@@ -3,6 +3,7 @@
 
 <beans>
   
+  <!-- Core -  factories register -->
   <bean id="org.apache.droids.Core" class="org.apache.droids.Core">
     <property name="droids"
       ref="org.apache.droids.helper.factories.DroidFactory"/>
@@ -12,8 +13,15 @@
       ref="org.apache.droids.helper.factories.ParserFactory"/>
     <property name="filtersFactory"
       ref="org.apache.droids.helper.factories.URLFiltersFactory"/>
+    <property name="handlerFactory"
+      ref="org.apache.droids.helper.factories.HandlerFactory"/>
   </bean>
   
+  <!-- Factories -->
+  <!-- FIXME: this needs to be easy overridable for custom droids
+    e.g. split into different files and using import
+    Workaround:
+    1) Using your own context (copy this one or better import it. -->
   <bean id="org.apache.droids.helper.factories.DroidFactory"
     class="org.apache.droids.helper.factories.DroidFactory">
     <property name="map">
@@ -50,6 +58,16 @@
     </property>
   </bean>
   
+  <bean id="org.apache.droids.helper.factories.HandlerFactory"
+    class="org.apache.droids.helper.factories.HandlerFactory">
+    <property name="map">
+      <map>
+        <entry key="save" value-ref="org.apache.droids.handle.Save"/>
+        <!--<entry key="sysout" 
value-ref="org.apache.droids.handle.Sysout"/>-->
+      </map>
+    </property>
+  </bean>
+  <!-- Droids -->
   <bean id="default" class="org.apache.droids.DefaultCrawler">
     <property name="core" ref="org.apache.droids.Core"/>
     <property name="queue" ref="org.apache.droids.queue.Simple"/>
@@ -57,13 +75,13 @@
     <property name="url"
       value="http://target-x.de/about.html"/>
   </bean>
-  
+  <!-- Queue -->
   <bean id="org.apache.droids.queue.Simple"
     class="org.apache.droids.queue.Simple">
     <property name="maxDepth" value="1"/>
-    <property name="maxSize" value="-1"/>
+    <property name="maxSize" value="5"/>
   </bean>
-  
+  <!-- Protocol -->
   <bean id="org.apache.droids.protocol.http.Http"
     class="org.apache.droids.protocol.http.Http">
     <property name="from" value="[EMAIL PROTECTED]"/>
@@ -71,14 +89,15 @@
     <property name="userAgent" value="DROIDS-crawler-x-m01y08"/>
     <property name="timeout" value="10000"/>
   </bean>
-  
+  <!-- Parser -->
   <bean id="org.apache.droids.parse.html.HtmlParser" 
class="org.apache.droids.parse.html.HtmlParser"/>
-  
+  <!-- Filter -->
   <bean id="org.apache.droids.net.RegexURLFilter" 
class="org.apache.droids.net.RegexURLFilter">
-    <property name="file" 
value="/home/thorsten/src/apache/droids/trunk/regex-urlfilter.txt">
-      
-    </property>
+    <property name="file" 
value="/home/thorsten/src/apache/droids/trunk/regex-urlfilter.txt"/>
   </bean>
-  
-  <!--<bean id="org.apache.droids.Job" class="org.apache.droids.Job">-->
+  <!-- Handler -->
+  <bean id="org.apache.droids.handle.Save" 
class="org.apache.droids.handle.Save">
+    <property name="outputDir" 
value="/home/thorsten/src/sadesi/temp/boja2/droids/"/>
+  </bean>
+  <bean id="org.apache.droids.handle.Sysout" 
class="org.apache.droids.handle.Sysout"/>
 </beans>

Added: labs/droids/trunk/src/core/java/org/apache/droids/handle/Save.java
URL: 
http://svn.apache.org/viewvc/labs/droids/trunk/src/core/java/org/apache/droids/handle/Save.java?rev=611834&view=auto
==============================================================================
--- labs/droids/trunk/src/core/java/org/apache/droids/handle/Save.java (added)
+++ labs/droids/trunk/src/core/java/org/apache/droids/handle/Save.java Mon Jan 
14 07:55:34 2008
@@ -0,0 +1,61 @@
+package org.apache.droids.handle;
+
+import java.io.File;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.InputStreamReader;
+import java.io.OutputStreamWriter;
+import java.io.Reader;
+import java.io.Writer;
+import java.net.URL;
+
+import org.apache.droids.api.Handler;
+import org.apache.droids.api.Parse;
+
+public class Save extends WriterHandler implements Handler {
+
+  private String outputDir;
+
+  private URL url;
+
+  public void handle(InputStream stream, URL url, Parse parse) throws 
Exception {
+    this.url = url;
+    writeOutput(stream);
+  }
+
+  private void writeOutput(InputStream stream) throws IOException {
+    if (!url.getFile().endsWith("/")) {
+      Reader reader = new InputStreamReader(stream);
+      String file = outputDir + url.getHost() + url.getFile();
+      log.info("Trying to save " + url + " to " + file);
+      File cache = new File(file);
+      createFile(cache);
+      Writer output = new OutputStreamWriter(new FileOutputStream(cache));
+      pipe(reader, output);
+    }
+  }
+
+  private void createFile(File cache) throws IOException {
+    if (!cache.isDirectory() & !cache.getAbsolutePath().endsWith("/")) {
+      try {
+        cache.createNewFile();
+      } catch (Exception e) {
+        // if we cannot create a file that means that the parent path
+        // does not exists
+        File path = new File(cache.getParent());
+        path.mkdirs();
+        cache.createNewFile();
+      }
+    }
+  }
+
+  public String getOutputDir() {
+    return outputDir;
+  }
+
+  public void setOutputDir(String outputDir) {
+    this.outputDir = outputDir;
+  }
+
+}

Propchange: labs/droids/trunk/src/core/java/org/apache/droids/handle/Save.java
------------------------------------------------------------------------------
    svn:eol-style = native

Added: labs/droids/trunk/src/core/java/org/apache/droids/handle/Sysout.java
URL: 
http://svn.apache.org/viewvc/labs/droids/trunk/src/core/java/org/apache/droids/handle/Sysout.java?rev=611834&view=auto
==============================================================================
--- labs/droids/trunk/src/core/java/org/apache/droids/handle/Sysout.java (added)
+++ labs/droids/trunk/src/core/java/org/apache/droids/handle/Sysout.java Mon 
Jan 14 07:55:34 2008
@@ -0,0 +1,26 @@
+package org.apache.droids.handle;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.InputStreamReader;
+import java.io.OutputStreamWriter;
+import java.io.Reader;
+import java.io.Writer;
+import java.net.URL;
+
+import org.apache.droids.api.Handler;
+import org.apache.droids.api.Parse;
+
+public class Sysout extends WriterHandler implements Handler {
+
+  private void writeOutput(InputStream stream) throws IOException {
+    Reader reader = new InputStreamReader(stream);
+    Writer output = new OutputStreamWriter(System.out);
+    pipe(reader, output);
+  }
+
+  public void handle(InputStream stream, URL url, Parse parse) throws 
Exception {
+    writeOutput(stream);
+  }
+
+}

Propchange: labs/droids/trunk/src/core/java/org/apache/droids/handle/Sysout.java
------------------------------------------------------------------------------
    svn:eol-style = native

Added: 
labs/droids/trunk/src/core/java/org/apache/droids/handle/WriterHandler.java
URL: 
http://svn.apache.org/viewvc/labs/droids/trunk/src/core/java/org/apache/droids/handle/WriterHandler.java?rev=611834&view=auto
==============================================================================
--- labs/droids/trunk/src/core/java/org/apache/droids/handle/WriterHandler.java 
(added)
+++ labs/droids/trunk/src/core/java/org/apache/droids/handle/WriterHandler.java 
Mon Jan 14 07:55:34 2008
@@ -0,0 +1,47 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.droids.handle;
+
+import java.io.IOException;
+import java.io.Reader;
+import java.io.Writer;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+
+public class WriterHandler {
+
+  protected final Log log = LogFactory.getLog(this.getClass()
+      .getCanonicalName());
+
+  /**
+   * Pipes everything from the reader to the writer via a buffer
+   */
+  protected static void pipe(Reader reader, Writer writer) throws IOException {
+    char[] buf = new char[1024];
+    int read = 0;
+    while ((read = reader.read(buf)) >= 0) {
+      writer.write(buf, 0, read);
+    }
+    writer.flush();
+  }
+
+  public WriterHandler() {
+    super();
+  }
+
+}
\ No newline at end of file

Propchange: 
labs/droids/trunk/src/core/java/org/apache/droids/handle/WriterHandler.java
------------------------------------------------------------------------------
    svn:eol-style = native

Modified: 
labs/droids/trunk/src/core/java/org/apache/droids/helper/factories/GenericFactory.java
URL: 
http://svn.apache.org/viewvc/labs/droids/trunk/src/core/java/org/apache/droids/helper/factories/GenericFactory.java?rev=611834&r1=611833&r2=611834&view=diff
==============================================================================
--- 
labs/droids/trunk/src/core/java/org/apache/droids/helper/factories/GenericFactory.java
 (original)
+++ 
labs/droids/trunk/src/core/java/org/apache/droids/helper/factories/GenericFactory.java
 Mon Jan 14 07:55:34 2008
@@ -2,7 +2,13 @@
 
 import java.util.LinkedHashMap;
 
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+
 public class GenericFactory {
+  
+  protected final Log log = 
LogFactory.getLog(this.getClass().getCanonicalName());
+  
   private LinkedHashMap<String, Object> map;
 
   public LinkedHashMap<String, Object> getMap() {

Added: 
labs/droids/trunk/src/core/java/org/apache/droids/helper/factories/HandlerFactory.java
URL: 
http://svn.apache.org/viewvc/labs/droids/trunk/src/core/java/org/apache/droids/helper/factories/HandlerFactory.java?rev=611834&view=auto
==============================================================================
--- 
labs/droids/trunk/src/core/java/org/apache/droids/helper/factories/HandlerFactory.java
 (added)
+++ 
labs/droids/trunk/src/core/java/org/apache/droids/helper/factories/HandlerFactory.java
 Mon Jan 14 07:55:34 2008
@@ -0,0 +1,45 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.droids.helper.factories;
+
+import java.io.InputStream;
+import java.net.URL;
+import java.util.Iterator;
+
+import org.apache.droids.api.Handler;
+import org.apache.droids.api.Parse;
+
+public class HandlerFactory extends GenericFactory {
+
+  public boolean handle(InputStream stream, URL url, Parse parse) {
+    for (Iterator<String> iterator = getMap().keySet().iterator(); iterator
+        .hasNext();) {
+      if (stream == null)
+        return false;
+      String handlerName = iterator.next();
+      Handler handler = (Handler) getMap().get(handlerName);
+      try {
+        handler.handle(stream, url, parse);
+      } catch (Exception e) {
+        log.fatal("Handler \""+handlerName + "\" has thrown an error.", e);
+      }
+    }
+
+    return true;
+  }
+
+}

Propchange: 
labs/droids/trunk/src/core/java/org/apache/droids/helper/factories/HandlerFactory.java
------------------------------------------------------------------------------
    svn:eol-style = native

Modified: 
labs/droids/trunk/src/core/java/org/apache/droids/helper/factories/URLFiltersFactory.java
URL: 
http://svn.apache.org/viewvc/labs/droids/trunk/src/core/java/org/apache/droids/helper/factories/URLFiltersFactory.java?rev=611834&r1=611833&r2=611834&view=diff
==============================================================================
--- 
labs/droids/trunk/src/core/java/org/apache/droids/helper/factories/URLFiltersFactory.java
 (original)
+++ 
labs/droids/trunk/src/core/java/org/apache/droids/helper/factories/URLFiltersFactory.java
 Mon Jan 14 07:55:34 2008
@@ -17,23 +17,40 @@
 package org.apache.droids.helper.factories;
 
 import java.util.Iterator;
-import java.util.LinkedHashMap;
 
 import org.apache.droids.api.URLFilter;
 
 public class URLFiltersFactory extends GenericFactory {
 
-  private URLFilter[] filters;
+  /** Run all defined filters. Assume logical AND. 
+   * @param urlString - url to test
+   * @return true if filter plugin accept the url, false if excluded.
+   */
+  public boolean accept(String urlString) {
+    for (Iterator<String> iterator = getMap().keySet().iterator(); iterator
+        .hasNext();) {
+      if (urlString == null)
+        return false;
+      URLFilter filter = (URLFilter) getMap().get(iterator.next());
+      urlString = filter.filter(urlString);
+      if (urlString == null)
+        return false;
+    }
+    return true;
+  }
 
-  /** Run all defined filters. Assume logical AND. */
-  public String filter(String urlString) {
+  /** Run a specific filter class. 
+   * @param urlString - url to test
+   * @param filterName - name of the specific filter class.
+   * @return true if filter plugin accept the url, false if excluded.
+   */
+  public boolean accept(String urlString, String filterName) {
     if (urlString == null)
-      return null;
-    for (Iterator iterator = getMap().entrySet().iterator(); 
iterator.hasNext();) {
-      urlString = ((URLFilter) iterator.next()).filter(urlString);
-      
-    }
-    
-    return urlString;
+      return false;
+    URLFilter filter = (URLFilter) getMap().get(filterName);
+    urlString = filter.filter(urlString);
+    if (urlString == null)
+      return false;
+    return true;
   }
 }

Modified: labs/droids/trunk/src/core/java/org/apache/droids/parse/Outlink.java
URL: 
http://svn.apache.org/viewvc/labs/droids/trunk/src/core/java/org/apache/droids/parse/Outlink.java?rev=611834&r1=611833&r2=611834&view=diff
==============================================================================
--- labs/droids/trunk/src/core/java/org/apache/droids/parse/Outlink.java 
(original)
+++ labs/droids/trunk/src/core/java/org/apache/droids/parse/Outlink.java Mon 
Jan 14 07:55:34 2008
@@ -24,14 +24,17 @@
 public class Outlink implements Task {
     private String toUrl;
     private String anchor;
+    private int depth;
     private String taskDate=new SimpleDateFormat("yyyyMMddHHmmss").format(new 
Date(System
             .currentTimeMillis()));
-    public Outlink(String toUrl, String anchor){
+    public Outlink(String toUrl, String anchor, int depth2){
         this.toUrl=toUrl;
         this.anchor = anchor;
+        this.depth= depth2;
     }
-    public Outlink(String toUrl){
-        this.toUrl=toUrl;
+    public Outlink(String toUrl, int depth2) {
+      this.toUrl=toUrl;
+      this.depth= depth2;
     }
     public String getToUrl() { return toUrl; }
     public String getAnchor() { return anchor; }
@@ -40,5 +43,11 @@
     }
     public String getTaskDate() {
         return taskDate;
+    }
+    public int getDepth() {
+      return depth;
+    }
+    public void setDepth(int depth) {
+      this.depth = depth;
     }
 }

Modified: 
labs/droids/trunk/src/core/java/org/apache/droids/parse/html/HtmlParser.java
URL: 
http://svn.apache.org/viewvc/labs/droids/trunk/src/core/java/org/apache/droids/parse/html/HtmlParser.java?rev=611834&r1=611833&r2=611834&view=diff
==============================================================================
--- 
labs/droids/trunk/src/core/java/org/apache/droids/parse/html/HtmlParser.java 
(original)
+++ 
labs/droids/trunk/src/core/java/org/apache/droids/parse/html/HtmlParser.java 
Mon Jan 14 07:55:34 2008
@@ -24,8 +24,11 @@
 
 import javax.xml.stream.XMLInputFactory;
 
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
 import org.apache.droids.api.Parse;
 import org.apache.droids.api.Parser;
+import org.apache.droids.api.Task;
 import org.apache.droids.parse.Outlink;
 import org.apache.droids.parse.ParseData;
 import org.apache.droids.parse.ParseImpl;
@@ -41,104 +44,120 @@
 import org.xml.sax.SAXNotSupportedException;
 
 public class HtmlParser implements Parser {
+  protected final Log log = LogFactory.getLog(this.getClass()
+      .getCanonicalName());
 
-    private XMLInputFactory inputFactory = XMLInputFactory.newInstance();
-    private URL base;
+  private XMLInputFactory inputFactory = XMLInputFactory.newInstance();
 
-    public Parse getParse(InputStream stream, URL base) {
-        this.base=base;
-        ParseData parseData = null;
-        getRemover();
-        // setup filter chain
-        XMLDocumentFilter[] filters = { getRemover()};
-        // create HTML parser
-        DOMFragmentParser  parser = getParser(filters);
-        DocumentFragment node = new 
HTMLDocumentImpl().createDocumentFragment();
-        // parse document
-        // XMLInputSource source = new XMLInputSource(null, uri, uri);
-        try {
-          parser.parse(base.toExternalForm(), node);
-          parseData=extract(node);
-        } catch (Exception e) {
-          // TODO Auto-generated catch block
-          e.printStackTrace();
-          return new ParseImpl(stream.toString(), null);
-        }
-        return new ParseImpl(stream.toString(), parseData);
+  private URL base;
+
+  private Task link;
+
+  public Parse getParse(InputStream stream, Task link) {
+    this.link = link;
+    try {
+      this.base = new URL(link.getId());
+    } catch (MalformedURLException e1) {
+      // TODO Auto-generated catch block
+      e1.printStackTrace();
+    }
+    ParseData parseData = null;
+    // setup filter chain
+    XMLDocumentFilter[] filters = { getRemover() };
+    // create HTML parser
+    DOMFragmentParser parser = getParser(filters);
+    DocumentFragment node = new HTMLDocumentImpl().createDocumentFragment();
+    // parse document
+    // XMLInputSource source = new XMLInputSource(null, uri, uri);
+    try {
+      parser.parse(base.toExternalForm(), node);
+      parseData = extract(node);
+    } catch (Exception e) {
+      // TODO Auto-generated catch block
+      e.printStackTrace();
+      return new ParseImpl(stream.toString(), null);
     }
+    return new ParseImpl(stream.toString(), parseData);
+  }
 
-    private ParseData extract(DocumentFragment node) {
-      ArrayList<Outlink> links = new ArrayList<Outlink>();
-        try {
-          extractLinks(node,links,new HashSet<String>());
-        } catch (MalformedURLException e) {
-          e.printStackTrace();
-        } 
-      Outlink[] outlinks = new Outlink[0];
-      outlinks = (Outlink[])links.toArray(new Outlink[links.size()]);
-      return new ParseData(outlinks);
-    }
-
-    private  DOMFragmentParser getParser(XMLDocumentFilter[] filters) {
-      DOMFragmentParser parser = new DOMFragmentParser();
-      try {
-        parser.setProperty("http://cyberneko.org/html/properties/filters";, 
filters);
-        parser
-        .setFeature(
-            
"http://cyberneko.org/html/features/balance-tags/ignore-outside-content";,
-            false);
-   parser.setFeature(
-        "http://cyberneko.org/html/features/balance-tags/document-fragment";,
-        true);
-   parser.setFeature("http://cyberneko.org/html/features/report-errors";, 
false);
-      } catch (SAXNotRecognizedException e) {
-        // TODO Auto-generated catch block
-        e.printStackTrace();
-      } catch (SAXNotSupportedException e) {
-        // TODO Auto-generated catch block
-        e.printStackTrace();
-      }
-      return parser;
+  private ParseData extract(DocumentFragment node) {
+    ArrayList<Outlink> links = new ArrayList<Outlink>();
+    try {
+      extractLinks(node, links, new HashSet<String>());
+    } catch (MalformedURLException e) {
+      e.printStackTrace();
+    }
+    Outlink[] outlinks = new Outlink[0];
+    outlinks = (Outlink[]) links.toArray(new Outlink[links.size()]);
+    return new ParseData(outlinks);
+  }
+
+  private DOMFragmentParser getParser(XMLDocumentFilter[] filters) {
+    DOMFragmentParser parser = new DOMFragmentParser();
+    try {
+      parser.setProperty("http://cyberneko.org/html/properties/filters";,
+          filters);
+      parser
+          .setFeature(
+              
"http://cyberneko.org/html/features/balance-tags/ignore-outside-content";,
+              false);
+      parser.setFeature(
+          "http://cyberneko.org/html/features/balance-tags/document-fragment";,
+          true);
+      parser.setFeature("http://cyberneko.org/html/features/report-errors";,
+          false);
+    } catch (SAXNotRecognizedException e) {
+      // TODO Auto-generated catch block
+      e.printStackTrace();
+    } catch (SAXNotSupportedException e) {
+      // TODO Auto-generated catch block
+      e.printStackTrace();
     }
+    return parser;
+  }
 
-    private ElementRemover getRemover() {
-      // create element remover filter
-      ElementRemover remover = new ElementRemover();
-      // set which elements to accept
-      remover.acceptElement("a", new String[] { "href" });
-      // completely remove some elements
-      remover.removeElement("script");
-      remover.removeElement("head");
-      return remover;
-    }
-    
-    private void extractLinks(Node node, ArrayList<Outlink> links, 
HashSet<String> set) throws MalformedURLException {
-      if (node.getNodeType() == Node.ELEMENT_NODE) {
-        if ("a".equalsIgnoreCase(node.getNodeName())) {
-          NamedNodeMap attrs = node.getAttributes();
-          String target;
-          for (int i = 0; i < attrs.getLength(); i++) {
-            Node attr = attrs.item(i);
-            String attrName = attr.getNodeName();
-            if (attrName.equalsIgnoreCase("href")) {
-              target = attr.getNodeValue();
-              try {
-                final Outlink outlink = new 
Outlink(target.contains(":/")?target:new URL (base,target).toString());
-                if (!set.contains(outlink.getToUrl())) {
-                    set.add(outlink.getToUrl());
-                    links.add(outlink);
-                }
-              } catch (Exception e) {  }
+  private ElementRemover getRemover() {
+    // create element remover filter
+    ElementRemover remover = new ElementRemover();
+    // set which elements to accept
+    remover.acceptElement("a", new String[] { "href" });
+    // completely remove some elements
+    remover.removeElement("script");
+    remover.removeElement("head");
+    return remover;
+  }
+
+  private void extractLinks(Node node, ArrayList<Outlink> links,
+      HashSet<String> set) throws MalformedURLException {
+    if (node.getNodeType() == Node.ELEMENT_NODE) {
+      if ("a".equalsIgnoreCase(node.getNodeName())) {
+        NamedNodeMap attrs = node.getAttributes();
+        String target;
+        for (int i = 0; i < attrs.getLength(); i++) {
+          Node attr = attrs.item(i);
+          String attrName = attr.getNodeName();
+          if (attrName.equalsIgnoreCase("href")) {
+            target = attr.getNodeValue();
+            try {
+              final Outlink outlink = new Outlink(
+                  target.contains(":/") ? target : new URL(base, target)
+                      .toString(), link.getDepth() + 1);
+              if (!set.contains(outlink.getToUrl())) {
+                set.add(outlink.getToUrl());
+                links.add(outlink);
+              }
+            } catch (Exception e) {
             }
           }
         }
       }
-      NodeList children = node.getChildNodes();
-      if (children != null) {
-        int len = children.getLength();
-        for (int i = 0; i < len; i++) {
-          extractLinks(children.item(i), links, set);
-        }
+    }
+    NodeList children = node.getChildNodes();
+    if (children != null) {
+      int len = children.getLength();
+      for (int i = 0; i < len; i++) {
+        extractLinks(children.item(i), links, set);
       }
     }
+  }
 }

Modified: labs/droids/trunk/src/core/java/org/apache/droids/queue/QueueBean.java
URL: 
http://svn.apache.org/viewvc/labs/droids/trunk/src/core/java/org/apache/droids/queue/QueueBean.java?rev=611834&r1=611833&r2=611834&view=diff
==============================================================================
--- labs/droids/trunk/src/core/java/org/apache/droids/queue/QueueBean.java 
(original)
+++ labs/droids/trunk/src/core/java/org/apache/droids/queue/QueueBean.java Mon 
Jan 14 07:55:34 2008
@@ -20,7 +20,6 @@
 
 public class QueueBean {
 
-
     protected int maxSize = 0;
 
     protected int maxDepth = 0;
@@ -63,4 +62,10 @@
       this.maxDepth = maxDepth;
     }
 
+    public boolean acceptSize(int i){
+      return (maxSize==-1)?true:maxSize>=i;
+    }
+    public boolean acceptDepth(int i){
+      return (maxDepth==-1)?true:maxDepth>=i;
+    }
 }

Modified: labs/droids/trunk/src/core/java/org/apache/droids/queue/QueueLink.java
URL: 
http://svn.apache.org/viewvc/labs/droids/trunk/src/core/java/org/apache/droids/queue/QueueLink.java?rev=611834&r1=611833&r2=611834&view=diff
==============================================================================
--- labs/droids/trunk/src/core/java/org/apache/droids/queue/QueueLink.java 
(original)
+++ labs/droids/trunk/src/core/java/org/apache/droids/queue/QueueLink.java Mon 
Jan 14 07:55:34 2008
@@ -20,14 +20,17 @@
 import org.apache.droids.api.Task;
 
 public class QueueLink implements Link,Task {
-    public QueueLink(String uri, String taskDate) {
+    public QueueLink(String uri, String taskDate, int i) {
         id = uri;
         this.taskDate=taskDate;
+        this.depth=i;
     }
 
     private String[] from, to;
 
     private String lastModifiedDate, taskDate, id;
+    
+    private int depth;
 
     public String[] getFrom() {
         return from;
@@ -47,6 +50,14 @@
 
     public String getTaskDate() {
         return taskDate;
+    }
+
+    public int getDepth() {
+      return depth;
+    }
+
+    public void setDepth(int depth) {
+      this.depth=depth;
     }
 
 }

Modified: labs/droids/trunk/src/core/java/org/apache/droids/queue/Simple.java
URL: 
http://svn.apache.org/viewvc/labs/droids/trunk/src/core/java/org/apache/droids/queue/Simple.java?rev=611834&r1=611833&r2=611834&view=diff
==============================================================================
--- labs/droids/trunk/src/core/java/org/apache/droids/queue/Simple.java 
(original)
+++ labs/droids/trunk/src/core/java/org/apache/droids/queue/Simple.java Mon Jan 
14 07:55:34 2008
@@ -43,7 +43,7 @@
         LinkedList<Task> list = new LinkedList<Task>();
         for (int i = 0; i < initialTask.length; i++) {
             Link task = (Link) initialTask[i];
-            if (null != task) {
+            if (null != task & acceptSize(i)) {
                 allTasks.put(task.getId(), task);
                 list.add(task);
             }
@@ -62,14 +62,14 @@
         if (null != toDoLinks) {
             for (int i = 0; i < toDoLinks.length; i++) {
                 Task task = toDoLinks[i];
-                if (null != task) {
+                if (null != task & 
acceptSize(i+allTasks.size())&acceptDepth(task.getDepth())) {
                     list.add(task);
                 }
             }
         }
         for (int i = 0; i < filterLinks.length; i++) {
             Task task = filterLinks[i];
-            if (null != task & !allTasks.containsKey(task.getId())) {
+            if (null != task & acceptSize(i+allTasks.size()) & 
!allTasks.containsKey(task.getId())&acceptDepth(task.getDepth())) {
                 allTasks.put(task.getId(), task);
                 list.add(task);
             }



---------------------------------------------------------------------
To unsubscribe, e-mail: [EMAIL PROTECTED]
For additional commands, e-mail: [EMAIL PROTECTED]

Reply via email to