Modified: 
incubator/droids/branches/0.2.x-cleanup/droids-spring/src/test/java/org/apache/droids/dynamic/TestSimpleDroid.java
URL: 
http://svn.apache.org/viewvc/incubator/droids/branches/0.2.x-cleanup/droids-spring/src/test/java/org/apache/droids/dynamic/TestSimpleDroid.java?rev=1423339&r1=1423338&r2=1423339&view=diff
==============================================================================
--- 
incubator/droids/branches/0.2.x-cleanup/droids-spring/src/test/java/org/apache/droids/dynamic/TestSimpleDroid.java
 (original)
+++ 
incubator/droids/branches/0.2.x-cleanup/droids-spring/src/test/java/org/apache/droids/dynamic/TestSimpleDroid.java
 Tue Dec 18 08:47:39 2012
@@ -36,63 +36,63 @@ import org.springframework.context.suppo
 
 public class TestSimpleDroid {
 
-       protected LocalHttpServer testserver;
+    protected LocalHttpServer testserver;
 
-       private final static ApplicationContext context = new 
ClassPathXmlApplicationContext(
-                       "classpath:/droids-core-test-context.xml");
+    private final static ApplicationContext context = new 
ClassPathXmlApplicationContext(
+            "classpath:/droids-core-test-context.xml");
 
-       private DroidsConfig droidsConfig = null;
+    private DroidsConfig droidsConfig = null;
 
-       @Before
-       public void setUp() throws Exception {
-               this.droidsConfig = (DroidsConfig) TestSimpleDroid.context
-                               
.getBean("org.apache.droids.dynamic.DroidsConfig");
-               this.testserver = new LocalHttpServer();
-       }
-
-       @Test
-       public void testReportCrawlingDroid() throws Exception {
-               this.testserver.register("*", new ResourceHandler());
-               this.testserver.start();
-
-               String baseURI = "http:/" + this.testserver.getServiceAddress();
-               String targetURI = baseURI + "/start_html";
-
-               Droid<Link> droid = createSimpleReportCrawlingDroid(targetURI);
-
-               droid.init();
-               droid.start();
-               droid.getTaskMaster().awaitTermination(30, TimeUnit.SECONDS);
-
-               Assert.assertFalse(ReportHandler.getReport().isEmpty());
-               Assert.assertEquals(5, ReportHandler.getReport().size());
-               Assert.assertTrue(ReportHandler.getReport().contains(
-                               baseURI + "/start_html"));
-               Assert.assertTrue(ReportHandler.getReport().contains(
-                               baseURI + "/page1_html"));
-               Assert.assertTrue(ReportHandler.getReport().contains(
-                               baseURI + "/page2_html"));
-               Assert.assertTrue(ReportHandler.getReport().contains(
-                               baseURI + "/page3_html"));
-               Assert.assertTrue(ReportHandler.getReport().contains(
-                               baseURI + "/page4_html"));
-
-               ReportHandler.recycle();
-       }
-
-       private Droid<Link> createSimpleReportCrawlingDroid(final String 
targetURI) {
-               Droid<Link> droid = this.droidsConfig.getDroid("report");
-
-               Assert.assertFalse("Droid is null.", droid == null);
-               Assert.assertTrue(
-                               "The test droid must be an instance of 
ReportCrawlingDroid",
-                               droid instanceof ReportCrawlingDroid);
-
-               final List<String> locations = new ArrayList<String>();
-               locations.add(targetURI);
-               ((CrawlingDroid) droid).setInitialLocations(locations);
+    @Before
+    public void setUp() throws Exception {
+        this.droidsConfig = (DroidsConfig) TestSimpleDroid.context
+                .getBean("org.apache.droids.dynamic.DroidsConfig");
+        this.testserver = new LocalHttpServer();
+    }
+
+    @Test
+    public void testReportCrawlingDroid() throws Exception {
+        this.testserver.register("*", new ResourceHandler());
+        this.testserver.start();
+
+        String baseURI = "http:/" + this.testserver.getServiceAddress();
+        String targetURI = baseURI + "/start_html";
+
+        Droid<Link> droid = createSimpleReportCrawlingDroid(targetURI);
+
+        droid.init();
+        droid.start();
+        droid.getTaskMaster().awaitTermination(30, TimeUnit.SECONDS);
+
+        Assert.assertFalse(ReportHandler.getReport().isEmpty());
+        Assert.assertEquals(5, ReportHandler.getReport().size());
+        Assert.assertTrue(ReportHandler.getReport().contains(
+                baseURI + "/start_html"));
+        Assert.assertTrue(ReportHandler.getReport().contains(
+                baseURI + "/page1_html"));
+        Assert.assertTrue(ReportHandler.getReport().contains(
+                baseURI + "/page2_html"));
+        Assert.assertTrue(ReportHandler.getReport().contains(
+                baseURI + "/page3_html"));
+        Assert.assertTrue(ReportHandler.getReport().contains(
+                baseURI + "/page4_html"));
+
+        ReportHandler.recycle();
+    }
+
+    private Droid<Link> createSimpleReportCrawlingDroid(final String 
targetURI) {
+        Droid<Link> droid = this.droidsConfig.getDroid("report");
+
+        Assert.assertFalse("Droid is null.", droid == null);
+        Assert.assertTrue(
+                "The test droid must be an instance of ReportCrawlingDroid",
+                droid instanceof ReportCrawlingDroid);
+
+        final List<String> locations = new ArrayList<String>();
+        locations.add(targetURI);
+        ((CrawlingDroid) droid).setInitialLocations(locations);
 
-               return droid;
-       }
+        return droid;
+    }
 
 }

Modified: 
incubator/droids/branches/0.2.x-cleanup/droids-spring/src/test/resources/droids-core-test-context.xml
URL: 
http://svn.apache.org/viewvc/incubator/droids/branches/0.2.x-cleanup/droids-spring/src/test/resources/droids-core-test-context.xml?rev=1423339&r1=1423338&r2=1423339&view=diff
==============================================================================
--- 
incubator/droids/branches/0.2.x-cleanup/droids-spring/src/test/resources/droids-core-test-context.xml
 (original)
+++ 
incubator/droids/branches/0.2.x-cleanup/droids-spring/src/test/resources/droids-core-test-context.xml
 Tue Dec 18 08:47:39 2012
@@ -15,105 +15,105 @@
  See the License for the specific language governing permissions and
  limitations under the License.
 -->
-  <!-- 
-     Using your own context
-    +++++++++++++++++++++++++ 
-    The easiest way is to 
-    a) create a droids-your-context.xml 
-    b) add:
-       <import 
resource="classpath:/org/apache/droids/dynamic/droids-core-context.xml"/>
-    c) implement your own beans which will override the imported ones
-    d) Call the ant target like:
-       ant droids.crawl default 
-Ddroids.spring.context=PATH/droids-your-context.xml
- -->
+<!--
+   Using your own context
+  +++++++++++++++++++++++++
+  The easiest way is to
+  a) create a droids-your-context.xml
+  b) add:
+     <import 
resource="classpath:/org/apache/droids/dynamic/droids-core-context.xml"/>
+  c) implement your own beans which will override the imported ones
+  d) Call the ant target like:
+     ant droids.crawl default 
-Ddroids.spring.context=PATH/droids-your-context.xml
+-->
 <beans xmlns="http://www.springframework.org/schema/beans";
-  xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance";
-  xmlns:configurator="http://cocoon.apache.org/schema/configurator";
-  xsi:schemaLocation="http://www.springframework.org/schema/beans 
http://www.springframework.org/schema/beans/spring-beans-2.5.xsd
+       xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance";
+       xmlns:configurator="http://cocoon.apache.org/schema/configurator";
+       xsi:schemaLocation="http://www.springframework.org/schema/beans 
http://www.springframework.org/schema/beans/spring-beans-2.5.xsd
        http://cocoon.apache.org/schema/configurator 
http://cocoon.apache.org/schema/configurator/cocoon-configurator-1.1.0.xsd";>
-  
-  <import 
resource="classpath:/org/apache/droids/dynamic/droids-core-context.xml"/>
-  
-  <!-- configuration properties file -->
-  <bean 
class="org.springframework.beans.factory.config.PropertyPlaceholderConfigurer">
-    <property name="locations" value="classpath:/droids-core.properties"/>
-  </bean>
-  
-  <bean name="taskExceptionHandler"
-    class="org.apache.droids.impl.DefaultTaskExceptionHandler">
-  </bean>
-  
-  <bean name="taskMaster"
-    class="org.apache.droids.impl.MultiThreadedTaskMaster">
-    <property name="exceptionHandler" ref="taskExceptionHandler" />
-    <property name="delayTimer" 
ref="org.apache.droids.delay.SimpleDelayTimer"/>
-    <!--<property name="maxThreads" value="${droids.maxThreads}"/>-->
-  </bean>
-  
-  <!-- Droids -->
-  <bean name="org.apache.droids.api.Droid/report"
-    class="org.apache.droids.robot.crawler.ReportCrawlingDroid">
-    <constructor-arg ref="java.util.LinkedList" />
-    <constructor-arg ref="taskMaster" />
-    
-    
-    <property name="protocolFactory" 
ref="org.apache.droids.helper.factories.ProtocolFactory"/>
-    <property name="parserFactory" 
ref="org.apache.droids.helper.factories.ParserFactory"/>
-    <property name="filtersFactory" 
ref="org.apache.droids.helper.factories.FilterFactory"/>
-  </bean>
-  <!-- Queue -->
-  <bean id="java.util.LinkedList"
-    class="java.util.LinkedList">
-  </bean>
-  <!-- Protocol -->
-  <bean 
-    name="org.apache.droids.api.Protocol/http"
-    class="org.apache.droids.protocol.http.HttpProtocol" scope="singleton">
-    <property name="userAgent" value="DROIDS-crawler-x-m01y08"/>
-    <property name="forceAllow" value="${droids.protocol.http.force}"/>
-  </bean>
-  <bean name="org.apache.droids.api.Protocol/file"
-    class="org.apache.droids.protocol.file.FileProtocol" scope="singleton"/>
-  <!-- Parser -->
-  <bean 
-    name="text/html"
-    class="org.apache.droids.parse.html.HtmlParser">
-    <property name="elements">
-    <map>
-        <entry key="a" value="href"/>
-        <entry key="link" value="href"/>
-        <entry key="img" value="src"/>
-        <entry key="script" value="src"/>
-     </map>
-     </property>
-    </bean>
-  <!-- Filter -->
-  <bean
-    name="org.apache.droids.api.URLFilter/org.apache.droids.net.RegexURLFilter"
-    class="org.apache.droids.net.RegexURLFilter">
-    <property name="file" value="${droids.filter.regex}"/>
-  </bean>
-  <!-- Handler -->
-  <bean 
-    name="org.apache.droids.api.Handler/org.apache.droids.handle.SysoutHandler"
-    class="org.apache.droids.handle.SysoutHandler"/>
-  <bean 
-    name="org.apache.droids.api.Handler/org.apache.droids.handle.SaveHandler"
-    class="org.apache.droids.handle.SaveHandler">
-    <property name="saveContentHandlerStrategy" 
-    
ref="org.apache.droids.api.Handler/org.apache.droids.handle.DefaultSaveContentHandlerStrategy"
 />
-  </bean>
-  <bean
-    
name="org.apache.droids.api.Handler/org.apache.droids.handle.DefaultSaveContentHandlerStrategy"
-    class="org.apache.droids.handle.DefaultSaveContentHandlerStrategy">
-      <property name="includeHost" value="true" />
-      <property name="outputDir" value="tmp/" />
-  </bean>
-
-  
-  <bean 
-    name="org.apache.droids.delay.SimpleDelayTimer"
-    class="org.apache.droids.delay.SimpleDelayTimer">
-    <property name="delayMillis" value="${droids.delay.request}"/>
-  </bean>
+
+    <import 
resource="classpath:/org/apache/droids/dynamic/droids-core-context.xml"/>
+
+    <!-- configuration properties file -->
+    <bean 
class="org.springframework.beans.factory.config.PropertyPlaceholderConfigurer">
+        <property name="locations" value="classpath:/droids-core.properties"/>
+    </bean>
+
+    <bean name="taskExceptionHandler"
+          class="org.apache.droids.impl.DefaultTaskExceptionHandler">
+    </bean>
+
+    <bean name="taskMaster"
+          class="org.apache.droids.impl.MultiThreadedTaskMaster">
+        <property name="exceptionHandler" ref="taskExceptionHandler"/>
+        <property name="delayTimer" 
ref="org.apache.droids.delay.SimpleDelayTimer"/>
+        <!--<property name="maxThreads" value="${droids.maxThreads}"/>-->
+    </bean>
+
+    <!-- Droids -->
+    <bean name="org.apache.droids.api.Droid/report"
+          class="org.apache.droids.robot.crawler.ReportCrawlingDroid">
+        <constructor-arg ref="java.util.LinkedList"/>
+        <constructor-arg ref="taskMaster"/>
+
+
+        <property name="protocolFactory" 
ref="org.apache.droids.helper.factories.ProtocolFactory"/>
+        <property name="parserFactory" 
ref="org.apache.droids.helper.factories.ParserFactory"/>
+        <property name="filtersFactory" 
ref="org.apache.droids.helper.factories.FilterFactory"/>
+    </bean>
+    <!-- Queue -->
+    <bean id="java.util.LinkedList"
+          class="java.util.LinkedList">
+    </bean>
+    <!-- Protocol -->
+    <bean
+            name="org.apache.droids.api.Protocol/http"
+            class="org.apache.droids.protocol.http.HttpProtocol" 
scope="singleton">
+        <property name="userAgent" value="DROIDS-crawler-x-m01y08"/>
+        <property name="forceAllow" value="${droids.protocol.http.force}"/>
+    </bean>
+    <bean name="org.apache.droids.api.Protocol/file"
+          class="org.apache.droids.protocol.file.FileProtocol" 
scope="singleton"/>
+    <!-- Parser -->
+    <bean
+            name="text/html"
+            class="org.apache.droids.parse.html.HtmlParser">
+        <property name="elements">
+            <map>
+                <entry key="a" value="href"/>
+                <entry key="link" value="href"/>
+                <entry key="img" value="src"/>
+                <entry key="script" value="src"/>
+            </map>
+        </property>
+    </bean>
+    <!-- Filter -->
+    <bean
+            
name="org.apache.droids.api.URLFilter/org.apache.droids.net.RegexURLFilter"
+            class="org.apache.droids.net.RegexURLFilter">
+        <property name="file" value="${droids.filter.regex}"/>
+    </bean>
+    <!-- Handler -->
+    <bean
+            
name="org.apache.droids.api.Handler/org.apache.droids.handle.SysoutHandler"
+            class="org.apache.droids.handle.SysoutHandler"/>
+    <bean
+            
name="org.apache.droids.api.Handler/org.apache.droids.handle.SaveHandler"
+            class="org.apache.droids.handle.SaveHandler">
+        <property name="saveContentHandlerStrategy"
+                  
ref="org.apache.droids.api.Handler/org.apache.droids.handle.DefaultSaveContentHandlerStrategy"/>
+    </bean>
+    <bean
+            
name="org.apache.droids.api.Handler/org.apache.droids.handle.DefaultSaveContentHandlerStrategy"
+            class="org.apache.droids.handle.DefaultSaveContentHandlerStrategy">
+        <property name="includeHost" value="true"/>
+        <property name="outputDir" value="tmp/"/>
+    </bean>
+
+
+    <bean
+            name="org.apache.droids.delay.SimpleDelayTimer"
+            class="org.apache.droids.delay.SimpleDelayTimer">
+        <property name="delayMillis" value="${droids.delay.request}"/>
+    </bean>
 </beans>

Modified: incubator/droids/branches/0.2.x-cleanup/droids-tika/pom.xml
URL: 
http://svn.apache.org/viewvc/incubator/droids/branches/0.2.x-cleanup/droids-tika/pom.xml?rev=1423339&r1=1423338&r2=1423339&view=diff
==============================================================================
--- incubator/droids/branches/0.2.x-cleanup/droids-tika/pom.xml (original)
+++ incubator/droids/branches/0.2.x-cleanup/droids-tika/pom.xml Tue Dec 18 
08:47:39 2012
@@ -15,53 +15,54 @@
    See the License for the specific language governing permissions and
    limitations under the License.
 -->
-<project xmlns="http://maven.apache.org/POM/4.0.0"; 
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"; 
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 
http://maven.apache.org/maven-v4_0_0.xsd";>
+<project xmlns="http://maven.apache.org/POM/4.0.0"; 
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance";
+         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 
http://maven.apache.org/maven-v4_0_0.xsd";>
 
-  <modelVersion>4.0.0</modelVersion>
-  <parent>
-    <artifactId>droids</artifactId>
-    <groupId>org.apache.droids</groupId>
-    <version>0.3.0-incubating-SNAPSHOT</version>
-    <relativePath>../pom.xml</relativePath>
-  </parent>
-  <artifactId>droids-tika</artifactId>
-  <name>Apache Droids Tika</name>
-  <inceptionYear>2007</inceptionYear>
-  <description>Apache Droids Tika Parser</description>
-  <packaging>jar</packaging>  
+    <modelVersion>4.0.0</modelVersion>
+    <parent>
+        <artifactId>droids</artifactId>
+        <groupId>org.apache.droids</groupId>
+        <version>0.3.0-incubating-SNAPSHOT</version>
+        <relativePath>../pom.xml</relativePath>
+    </parent>
+    <artifactId>droids-tika</artifactId>
+    <name>Apache Droids Tika</name>
+    <inceptionYear>2007</inceptionYear>
+    <description>Apache Droids Tika Parser</description>
+    <packaging>jar</packaging>
 
-  <properties>
-    <tika-release-version>1.1</tika-release-version>
-  </properties>
+    <properties>
+        <tika-release-version>1.1</tika-release-version>
+    </properties>
 
-  <dependencies>
-    <dependency>
-      <groupId>org.apache.droids</groupId>
-      <artifactId>droids-core</artifactId>
-      <version>${project.version}</version>
-    </dependency>
-    <dependency>
-       <groupId>org.apache.tika</groupId>
-       <artifactId>tika-core</artifactId>
-       <version>${tika-release-version}</version>
-    </dependency>
-    <dependency>
-       <groupId>org.apache.tika</groupId>
-       <artifactId>tika-parsers</artifactId>
-       <version>${tika-release-version}</version>
-       <exclusions>
-               <exclusion>
-                       <artifactId>commons-logging</artifactId>
-                       <groupId>commons-logging</groupId>
-               </exclusion>
-       </exclusions>
-    </dependency>
-    <!-- test dependencies -->
-    <dependency>
-      <groupId>junit</groupId>
-      <artifactId>junit</artifactId>
-      <version>${junit.version}</version>
-      <scope>test</scope>
-    </dependency>
-  </dependencies>
+    <dependencies>
+        <dependency>
+            <groupId>org.apache.droids</groupId>
+            <artifactId>droids-core</artifactId>
+            <version>${project.version}</version>
+        </dependency>
+        <dependency>
+            <groupId>org.apache.tika</groupId>
+            <artifactId>tika-core</artifactId>
+            <version>${tika-release-version}</version>
+        </dependency>
+        <dependency>
+            <groupId>org.apache.tika</groupId>
+            <artifactId>tika-parsers</artifactId>
+            <version>${tika-release-version}</version>
+            <exclusions>
+                <exclusion>
+                    <artifactId>commons-logging</artifactId>
+                    <groupId>commons-logging</groupId>
+                </exclusion>
+            </exclusions>
+        </dependency>
+        <!-- test dependencies -->
+        <dependency>
+            <groupId>junit</groupId>
+            <artifactId>junit</artifactId>
+            <version>${junit.version}</version>
+            <scope>test</scope>
+        </dependency>
+    </dependencies>
 </project>

Modified: 
incubator/droids/branches/0.2.x-cleanup/droids-tika/src/main/java/org/apache/droids/tika/TikaDocumentParser.java
URL: 
http://svn.apache.org/viewvc/incubator/droids/branches/0.2.x-cleanup/droids-tika/src/main/java/org/apache/droids/tika/TikaDocumentParser.java?rev=1423339&r1=1423338&r2=1423339&view=diff
==============================================================================
--- 
incubator/droids/branches/0.2.x-cleanup/droids-tika/src/main/java/org/apache/droids/tika/TikaDocumentParser.java
 (original)
+++ 
incubator/droids/branches/0.2.x-cleanup/droids-tika/src/main/java/org/apache/droids/tika/TikaDocumentParser.java
 Tue Dec 18 08:47:39 2012
@@ -54,76 +54,75 @@ import org.xml.sax.SAXException;
 /**
  * Parses documents using Tika.
  * Any document type that Tika can handle, can be handled by this class,
- * including HTML. 
- *
+ * including HTML.
  */
 public class TikaDocumentParser implements TikaParser {
 
-  protected static final Logger LOG = 
LoggerFactory.getLogger(TikaDocumentParser.class);
-  
-  @Override
-  public TikaParse parse(ContentEntity entity, Task task) throws 
DroidsException,
-      IOException {
-    // Init Tika objects
-    org.apache.tika.parser.Parser parser = new AutoDetectParser();
-    Metadata metadata = new Metadata();
-    
-    String charset = entity.getCharset();
-    if (charset == null) {
-      charset = "UTF-8";
-    }
-    
-    StringWriter dataBuffer = new StringWriter();
-    StringWriter bodyBuffer = new StringWriter();
-    StringWriter mainContentBuffer = new StringWriter();
-     
-    SAXTransformerFactory factory = (SAXTransformerFactory) 
SAXTransformerFactory.newInstance();
-    TransformerHandler xmlHandler;
-    try {
-      xmlHandler = factory.newTransformerHandler();
-    } catch (TransformerConfigurationException e) {
-      throw new DroidsException(e);
-    }
-    xmlHandler.getTransformer().setOutputProperty(OutputKeys.METHOD, "xml");
-    xmlHandler.setResult(new StreamResult(dataBuffer));
-    
-    BoilerpipeContentHandler mainContentHandler = new 
BoilerpipeContentHandler(mainContentBuffer);
-    BodyContentHandler bodyHandler = new BodyContentHandler(bodyBuffer);
-    LinkContentHandler linkHandler = new LinkContentHandler();
-    
-    TeeContentHandler parallelHandler = new TeeContentHandler(xmlHandler, 
mainContentHandler, bodyHandler, linkHandler );
-
-    InputStream instream = entity.obtainContent();
-    try {
-      parser.parse(instream, parallelHandler, metadata, new ParseContext());
-      
-      ArrayList<Link> extractedTasks = new ArrayList<Link>();
-      int depth = task.getDepth() + 1;
-      if (task instanceof LinkTask) {
-             for(org.apache.tika.sax.Link tikaLink : linkHandler.getLinks()) {
-               try {
-                 URI uri = new URI(tikaLink.getUri());
-            // Test to see if the scheme is empty
-            // This would indicate a relative URL, so resolve it against the 
task URI
-            if(uri.getScheme() == null) {
-              uri = ((Link) task).getURI().resolve(uri);
+    protected static final Logger LOG = 
LoggerFactory.getLogger(TikaDocumentParser.class);
+
+    @Override
+    public TikaParse parse(ContentEntity entity, Task task) throws 
DroidsException,
+            IOException {
+        // Init Tika objects
+        org.apache.tika.parser.Parser parser = new AutoDetectParser();
+        Metadata metadata = new Metadata();
+
+        String charset = entity.getCharset();
+        if (charset == null) {
+            charset = "UTF-8";
+        }
+
+        StringWriter dataBuffer = new StringWriter();
+        StringWriter bodyBuffer = new StringWriter();
+        StringWriter mainContentBuffer = new StringWriter();
+
+        SAXTransformerFactory factory = (SAXTransformerFactory) 
SAXTransformerFactory.newInstance();
+        TransformerHandler xmlHandler;
+        try {
+            xmlHandler = factory.newTransformerHandler();
+        } catch (TransformerConfigurationException e) {
+            throw new DroidsException(e);
+        }
+        xmlHandler.getTransformer().setOutputProperty(OutputKeys.METHOD, 
"xml");
+        xmlHandler.setResult(new StreamResult(dataBuffer));
+
+        BoilerpipeContentHandler mainContentHandler = new 
BoilerpipeContentHandler(mainContentBuffer);
+        BodyContentHandler bodyHandler = new BodyContentHandler(bodyBuffer);
+        LinkContentHandler linkHandler = new LinkContentHandler();
+
+        TeeContentHandler parallelHandler = new TeeContentHandler(xmlHandler, 
mainContentHandler, bodyHandler, linkHandler);
+
+        InputStream instream = entity.obtainContent();
+        try {
+            parser.parse(instream, parallelHandler, metadata, new 
ParseContext());
+
+            ArrayList<Link> extractedTasks = new ArrayList<Link>();
+            int depth = task.getDepth() + 1;
+            if (task instanceof LinkTask) {
+                for (org.apache.tika.sax.Link tikaLink : 
linkHandler.getLinks()) {
+                    try {
+                        URI uri = new URI(tikaLink.getUri());
+                        // Test to see if the scheme is empty
+                        // This would indicate a relative URL, so resolve it 
against the task URI
+                        if (uri.getScheme() == null) {
+                            uri = ((Link) task).getURI().resolve(uri);
+                        }
+                        extractedTasks.add(new LinkTask((Link) task, uri, 
depth, tikaLink.getText()));
+                    } catch (URISyntaxException e) {
+                        if (LOG.isWarnEnabled()) {
+                            LOG.warn("URI not valid: " + tikaLink.getUri());
+                        }
+                    }
+                }
             }
-            extractedTasks.add(new LinkTask((Link)task, uri, depth, 
tikaLink.getText()));
-               } catch (URISyntaxException e) {
-                 if(LOG.isWarnEnabled()) {
-                   LOG.warn("URI not valid: "+ tikaLink.getUri());
-                 }
-               }
-             }
-      }
-      return new TikaParseImpl(dataBuffer.toString(), extractedTasks, 
bodyBuffer.toString(), mainContentBuffer.toString(), metadata);
-    } catch (SAXException ex) {
-      throw new DroidsException("Failure parsing document " + task.getId(), 
ex);
-    } catch (TikaException ex) {
-      throw new DroidsException("Failure parsing document " + task.getId(), 
ex);
-    } finally {
-      instream.close();
-    } 
-  }
+            return new TikaParseImpl(dataBuffer.toString(), extractedTasks, 
bodyBuffer.toString(), mainContentBuffer.toString(), metadata);
+        } catch (SAXException ex) {
+            throw new DroidsException("Failure parsing document " + 
task.getId(), ex);
+        } catch (TikaException ex) {
+            throw new DroidsException("Failure parsing document " + 
task.getId(), ex);
+        } finally {
+            instream.close();
+        }
+    }
 
 }

Modified: 
incubator/droids/branches/0.2.x-cleanup/droids-tika/src/main/java/org/apache/droids/tika/TikaHtmlParser.java
URL: 
http://svn.apache.org/viewvc/incubator/droids/branches/0.2.x-cleanup/droids-tika/src/main/java/org/apache/droids/tika/TikaHtmlParser.java?rev=1423339&r1=1423338&r2=1423339&view=diff
==============================================================================
--- 
incubator/droids/branches/0.2.x-cleanup/droids-tika/src/main/java/org/apache/droids/tika/TikaHtmlParser.java
 (original)
+++ 
incubator/droids/branches/0.2.x-cleanup/droids-tika/src/main/java/org/apache/droids/tika/TikaHtmlParser.java
 Tue Dec 18 08:47:39 2012
@@ -50,76 +50,74 @@ import org.apache.tika.sax.TeeContentHan
 import org.xml.sax.SAXException;
 
 /**
- * 
  * @deprecated Use TikaDocumentParser instead as it handles HTML just fine and 
performs the same operations.
- *
  */
 @Deprecated
 public class TikaHtmlParser implements TikaParser {
 
-  protected static final Log log = LogFactory.getLog(TikaHtmlParser.class);
+    protected static final Log log = LogFactory.getLog(TikaHtmlParser.class);
 
-  @Override
-  public TikaParse parse(ContentEntity entity, Task task) throws IOException, 
DroidsException {
-    // Init Tika objects
-    org.apache.tika.parser.Parser parser = new AutoDetectParser();
-    Metadata metadata = new Metadata();
-    
-    String charset = entity.getCharset();
-    if (charset == null) {
-      charset = "UTF-8";
-    }
-    
-    StringWriter dataBuffer = new StringWriter();
-    StringWriter bodyBuffer = new StringWriter();
-    StringWriter mainContentBuffer = new StringWriter();
-     
-    SAXTransformerFactory factory = (SAXTransformerFactory) 
SAXTransformerFactory.newInstance();
-    TransformerHandler xmlHandler;
-    try {
-      xmlHandler = factory.newTransformerHandler();
-    } catch (TransformerConfigurationException e) {
-      throw new DroidsException(e);
+    @Override
+    public TikaParse parse(ContentEntity entity, Task task) throws 
IOException, DroidsException {
+        // Init Tika objects
+        org.apache.tika.parser.Parser parser = new AutoDetectParser();
+        Metadata metadata = new Metadata();
+
+        String charset = entity.getCharset();
+        if (charset == null) {
+            charset = "UTF-8";
+        }
+
+        StringWriter dataBuffer = new StringWriter();
+        StringWriter bodyBuffer = new StringWriter();
+        StringWriter mainContentBuffer = new StringWriter();
+
+        SAXTransformerFactory factory = (SAXTransformerFactory) 
SAXTransformerFactory.newInstance();
+        TransformerHandler xmlHandler;
+        try {
+            xmlHandler = factory.newTransformerHandler();
+        } catch (TransformerConfigurationException e) {
+            throw new DroidsException(e);
+        }
+        xmlHandler.getTransformer().setOutputProperty(OutputKeys.METHOD, 
"xml");
+        xmlHandler.setResult(new StreamResult(dataBuffer));
+
+        BoilerpipeContentHandler mainContentHandler = new 
BoilerpipeContentHandler(mainContentBuffer);
+        BodyContentHandler bodyHandler = new BodyContentHandler(bodyBuffer);
+        LinkContentHandler linkHandler = new LinkContentHandler();
+
+        TeeContentHandler parallelHandler = new TeeContentHandler(xmlHandler, 
mainContentHandler, bodyHandler, linkHandler);
+
+        InputStream instream = entity.obtainContent();
+        try {
+            parser.parse(instream, parallelHandler, metadata, new 
ParseContext());
+
+            ArrayList<Link> extractedTasks = new ArrayList<Link>();
+            if (task instanceof Link) {
+                int depth = task.getDepth() + 1;
+                for (org.apache.tika.sax.Link tikaLink : 
linkHandler.getLinks()) {
+                    try {
+                        URI uri = new URI(tikaLink.getUri());
+                        // Test to see if the scheme is empty
+                        // This would indicate a relative URL, so resolve it 
against the task URI
+                        if (uri.getScheme() == null) {
+                            uri = ((Link) task).getURI().resolve(uri);
+                        }
+                        extractedTasks.add(new LinkTask((Link) task, uri, 
depth, tikaLink.getText()));
+                    } catch (URISyntaxException e) {
+                        if (log.isWarnEnabled()) {
+                            log.warn("URI not valid: " + tikaLink.getUri());
+                        }
+                    }
+                }
+            }
+            return new TikaParseImpl(dataBuffer.toString(), extractedTasks, 
bodyBuffer.toString(), mainContentBuffer.toString(), metadata);
+        } catch (SAXException ex) {
+            throw new DroidsException("Failure parsing document " + 
task.getId(), ex);
+        } catch (TikaException ex) {
+            throw new DroidsException("Failure parsing document " + 
task.getId(), ex);
+        } finally {
+            instream.close();
+        }
     }
-    xmlHandler.getTransformer().setOutputProperty(OutputKeys.METHOD, "xml");
-    xmlHandler.setResult(new StreamResult(dataBuffer));
-    
-    BoilerpipeContentHandler mainContentHandler = new 
BoilerpipeContentHandler(mainContentBuffer);
-    BodyContentHandler bodyHandler = new BodyContentHandler(bodyBuffer);
-    LinkContentHandler linkHandler = new LinkContentHandler();
-    
-    TeeContentHandler parallelHandler = new TeeContentHandler(xmlHandler, 
mainContentHandler, bodyHandler, linkHandler );
-
-    InputStream instream = entity.obtainContent();
-    try {
-      parser.parse(instream, parallelHandler, metadata, new ParseContext());
-      
-      ArrayList<Link> extractedTasks = new ArrayList<Link>();
-      if (task instanceof Link) {
-             int depth = task.getDepth() + 1;
-             for(org.apache.tika.sax.Link tikaLink : linkHandler.getLinks()) {
-               try {
-                 URI uri = new URI(tikaLink.getUri());
-                 // Test to see if the scheme is empty
-                 // This would indicate a relative URL, so resolve it against 
the task URI
-                 if(uri.getScheme() == null) {
-                   uri = ((Link) task).getURI().resolve(uri);
-                 }
-                 extractedTasks.add(new LinkTask((Link)task, uri, depth, 
tikaLink.getText()));
-               } catch (URISyntaxException e) {
-                 if(log.isWarnEnabled()) {
-                   log.warn("URI not valid: "+ tikaLink.getUri());
-                 }
-               }
-             }
-      }
-      return new TikaParseImpl(dataBuffer.toString(), extractedTasks, 
bodyBuffer.toString(), mainContentBuffer.toString(), metadata);
-    } catch (SAXException ex) {
-      throw new DroidsException("Failure parsing document " + task.getId(), 
ex);
-    } catch (TikaException ex) {
-      throw new DroidsException("Failure parsing document " + task.getId(), 
ex);
-    } finally {
-      instream.close();
-    } 
-  }
 }

Modified: 
incubator/droids/branches/0.2.x-cleanup/droids-tika/src/main/java/org/apache/droids/tika/api/TikaParse.java
URL: 
http://svn.apache.org/viewvc/incubator/droids/branches/0.2.x-cleanup/droids-tika/src/main/java/org/apache/droids/tika/api/TikaParse.java?rev=1423339&r1=1423338&r2=1423339&view=diff
==============================================================================
--- 
incubator/droids/branches/0.2.x-cleanup/droids-tika/src/main/java/org/apache/droids/tika/api/TikaParse.java
 (original)
+++ 
incubator/droids/branches/0.2.x-cleanup/droids-tika/src/main/java/org/apache/droids/tika/api/TikaParse.java
 Tue Dec 18 08:47:39 2012
@@ -21,36 +21,41 @@ import org.apache.tika.metadata.Metadata
 
 public interface TikaParse extends Parse {
 
-       /**
-        * Retrieves the main content of the parsed document.
-        * Uses Tika's plugin in for Boilerpipe.
-        * @return plain text result with boilerplate removed
-        */
-  public String getMainContent();
-  
-  /**
-   * Extracted meta data from the document. This can include
-   * meta tags from within an HTML document
-   * @return metadata object from the parse
-   */
-  public Metadata getMetadata();
-  
-  /**
-   * The HTML representation of the document.
-   * @return The HTML representation of the document.
-   */
-  public String getXml();
-  
-  /**
-   * Plain text representation of the document.
-   * @return plain text version without formatting
-   */
-  public String getPlainText();
-  
-  /**
-   * If the document should be indexed or not.
-   * This can be determined from metadata or other methods
-   * @return false if the document shouldn't be indexed, true otherwise
-   */
-  public boolean isIndexed();
+    /**
+     * Retrieves the main content of the parsed document.
+     * Uses Tika's plugin in for Boilerpipe.
+     *
+     * @return plain text result with boilerplate removed
+     */
+    public String getMainContent();
+
+    /**
+     * Extracted meta data from the document. This can include
+     * meta tags from within an HTML document
+     *
+     * @return metadata object from the parse
+     */
+    public Metadata getMetadata();
+
+    /**
+     * The HTML representation of the document.
+     *
+     * @return The HTML representation of the document.
+     */
+    public String getXml();
+
+    /**
+     * Plain text representation of the document.
+     *
+     * @return plain text version without formatting
+     */
+    public String getPlainText();
+
+    /**
+     * If the document should be indexed or not.
+     * This can be determined from metadata or other methods
+     *
+     * @return false if the document shouldn't be indexed, true otherwise
+     */
+    public boolean isIndexed();
 }

Modified: 
incubator/droids/branches/0.2.x-cleanup/droids-tika/src/main/java/org/apache/droids/tika/parse/TikaParseImpl.java
URL: 
http://svn.apache.org/viewvc/incubator/droids/branches/0.2.x-cleanup/droids-tika/src/main/java/org/apache/droids/tika/parse/TikaParseImpl.java?rev=1423339&r1=1423338&r2=1423339&view=diff
==============================================================================
--- 
incubator/droids/branches/0.2.x-cleanup/droids-tika/src/main/java/org/apache/droids/tika/parse/TikaParseImpl.java
 (original)
+++ 
incubator/droids/branches/0.2.x-cleanup/droids-tika/src/main/java/org/apache/droids/tika/parse/TikaParseImpl.java
 Tue Dec 18 08:47:39 2012
@@ -26,60 +26,60 @@ import org.apache.tika.metadata.Metadata
 
 public class TikaParseImpl extends ParseImpl implements TikaParse {
 
-  private String plainText;
-  private String mainContent;
-  private Metadata metadata;
-  
-  public TikaParseImpl(String text, Collection<Link> outlinks) {
-    super(text,outlinks);
-  }
-  
-  public TikaParseImpl(String text, Object data, Collection<Link> outlinks) {
-    super(text,data,outlinks);
-  }
-
-  public TikaParseImpl(String xmlContent, ArrayList<Link> extractedTasks,
-      String plainText, String mainContent, Metadata metadata) {
-    this(xmlContent, extractedTasks);
-    this.plainText = plainText;
-    this.mainContent = mainContent;
-    this.metadata = metadata;
-  }
-
-  @Override
-  public String getMainContent() {
-    return mainContent;
-  }
-
-  @Override
-  public Metadata getMetadata() {
-    return metadata;
-  }
-
-  @Override
-  public String getXml() {
-    return super.text;
-  }
-
-  @Override
-  public String getPlainText() {
-    return plainText;
-  }
-
-  @Override
-  public boolean isFollowed() {
-    if(metadata.get("robots") != null && 
metadata.get("robots").toLowerCase().contains("nofollow")) {
-      return false;
-    }
-    return true;
-  }
-
-  @Override
-  public boolean isIndexed() {
-    if(metadata.get("robots") != null && 
metadata.get("robots").toLowerCase().contains("noindex")) {
-      return false;
+    private String plainText;
+    private String mainContent;
+    private Metadata metadata;
+
+    public TikaParseImpl(String text, Collection<Link> outlinks) {
+        super(text, outlinks);
+    }
+
+    public TikaParseImpl(String text, Object data, Collection<Link> outlinks) {
+        super(text, data, outlinks);
+    }
+
+    public TikaParseImpl(String xmlContent, ArrayList<Link> extractedTasks,
+                         String plainText, String mainContent, Metadata 
metadata) {
+        this(xmlContent, extractedTasks);
+        this.plainText = plainText;
+        this.mainContent = mainContent;
+        this.metadata = metadata;
+    }
+
+    @Override
+    public String getMainContent() {
+        return mainContent;
+    }
+
+    @Override
+    public Metadata getMetadata() {
+        return metadata;
+    }
+
+    @Override
+    public String getXml() {
+        return super.text;
+    }
+
+    @Override
+    public String getPlainText() {
+        return plainText;
+    }
+
+    @Override
+    public boolean isFollowed() {
+        if (metadata.get("robots") != null && 
metadata.get("robots").toLowerCase().contains("nofollow")) {
+            return false;
+        }
+        return true;
+    }
+
+    @Override
+    public boolean isIndexed() {
+        if (metadata.get("robots") != null && 
metadata.get("robots").toLowerCase().contains("noindex")) {
+            return false;
+        }
+        return true;
     }
-    return true;
-  }
 
 }

Modified: incubator/droids/branches/0.2.x-cleanup/droids-tika/src/site/site.xml
URL: 
http://svn.apache.org/viewvc/incubator/droids/branches/0.2.x-cleanup/droids-tika/src/site/site.xml?rev=1423339&r1=1423338&r2=1423339&view=diff
==============================================================================
--- incubator/droids/branches/0.2.x-cleanup/droids-tika/src/site/site.xml 
(original)
+++ incubator/droids/branches/0.2.x-cleanup/droids-tika/src/site/site.xml Tue 
Dec 18 08:47:39 2012
@@ -16,14 +16,14 @@
  limitations under the License.
 -->
 <project xmlns="http://maven.apache.org/DECORATION/1.0.0";
-    xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance";
-    xsi:schemaLocation="http://maven.apache.org/DECORATION/1.0.0 
http://maven.apache.org/xsd/decoration-1.0.0.xsd";>
-  <body>
-    <menu ref="parent" />
-    
-    <menu name="JavaDocs"> 
-      <item name="JavaDocs" href="apidocs/index.html"/>
-      <item name="Test JavaDocs" href="testapidocs/index.html"/>
-    </menu>
-  </body>
+         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance";
+         xsi:schemaLocation="http://maven.apache.org/DECORATION/1.0.0 
http://maven.apache.org/xsd/decoration-1.0.0.xsd";>
+    <body>
+        <menu ref="parent"/>
+
+        <menu name="JavaDocs">
+            <item name="JavaDocs" href="apidocs/index.html"/>
+            <item name="Test JavaDocs" href="testapidocs/index.html"/>
+        </menu>
+    </body>
 </project>
\ No newline at end of file

Modified: 
incubator/droids/branches/0.2.x-cleanup/droids-tika/src/test/java/org/apache/droids/tika/TikaHtmlParserTest.java
URL: 
http://svn.apache.org/viewvc/incubator/droids/branches/0.2.x-cleanup/droids-tika/src/test/java/org/apache/droids/tika/TikaHtmlParserTest.java?rev=1423339&r1=1423338&r2=1423339&view=diff
==============================================================================
--- 
incubator/droids/branches/0.2.x-cleanup/droids-tika/src/test/java/org/apache/droids/tika/TikaHtmlParserTest.java
 (original)
+++ 
incubator/droids/branches/0.2.x-cleanup/droids-tika/src/test/java/org/apache/droids/tika/TikaHtmlParserTest.java
 Tue Dec 18 08:47:39 2012
@@ -19,10 +19,9 @@ package org.apache.droids.tika;
 import junit.framework.TestCase;
 
 public class TikaHtmlParserTest extends TestCase {
- 
-  public void testSomething() throws Exception
-  {
-    // TODO -- test stuff!
-    assertTrue( true );
-  }
+
+    public void testSomething() throws Exception {
+        // TODO -- test stuff!
+        assertTrue(true);
+    }
 }
\ No newline at end of file


Reply via email to