Author: thorsten
Date: Fri Apr 30 11:28:26 2010
New Revision: 939642

URL: http://svn.apache.org/viewvc?rev=939642&view=rev
Log:
DROIDS-81
Reporter: Richard Frovarp
Patch: Richard Frovarp
review: thorsten

Added:
    
incubator/droids/trunk/droids-tika/src/main/java/org/apache/droids/tika/TikaDocumentParser.java
Modified:
    incubator/droids/trunk/droids-tika/pom.xml

Modified: incubator/droids/trunk/droids-tika/pom.xml
URL: 
http://svn.apache.org/viewvc/incubator/droids/trunk/droids-tika/pom.xml?rev=939642&r1=939641&r2=939642&view=diff
==============================================================================
--- incubator/droids/trunk/droids-tika/pom.xml (original)
+++ incubator/droids/trunk/droids-tika/pom.xml Fri Apr 30 11:28:26 2010
@@ -46,9 +46,14 @@
       <version>${pom.version}</version>
     </dependency>
     <dependency>
-      <groupId>org.apache.tika</groupId>
-      <artifactId>tika</artifactId>
-      <version>0.2</version>
+       <groupId>org.apache.tika</groupId>
+       <artifactId>tika-core</artifactId>
+       <version>0.6</version>
+    </dependency>
+    <dependency>
+       <groupId>org.apache.tika</groupId>
+       <artifactId>tika-parsers</artifactId>
+       <version>0.6</version>
     </dependency>
   </dependencies>
 

Added: 
incubator/droids/trunk/droids-tika/src/main/java/org/apache/droids/tika/TikaDocumentParser.java
URL: 
http://svn.apache.org/viewvc/incubator/droids/trunk/droids-tika/src/main/java/org/apache/droids/tika/TikaDocumentParser.java?rev=939642&view=auto
==============================================================================
--- 
incubator/droids/trunk/droids-tika/src/main/java/org/apache/droids/tika/TikaDocumentParser.java
 (added)
+++ 
incubator/droids/trunk/droids-tika/src/main/java/org/apache/droids/tika/TikaDocumentParser.java
 Fri Apr 30 11:28:26 2010
@@ -0,0 +1,45 @@
+package org.apache.droids.tika;
+
+import java.io.IOException;
+import java.io.InputStream;
+
+import org.apache.droids.api.ContentEntity;
+import org.apache.droids.api.Link;
+import org.apache.droids.api.Parse;
+import org.apache.droids.api.Parser;
+import org.apache.droids.exception.DroidsException;
+import org.apache.droids.helper.Loggable;
+import org.apache.droids.parse.ParseImpl;
+import org.apache.tika.exception.TikaException;
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.parser.AutoDetectParser;
+import org.apache.tika.parser.ParseContext;
+import org.apache.tika.sax.BodyContentHandler;
+import org.xml.sax.SAXException;
+
+public class TikaDocumentParser extends Loggable implements Parser {
+
+  @Override
+  public Parse parse(ContentEntity entity, Link link) throws DroidsException,
+      IOException {
+    org.apache.tika.parser.Parser parser = new AutoDetectParser();
+    Metadata metadata = new Metadata();
+    BodyContentHandler handler = new BodyContentHandler();
+    
+    InputStream instream = entity.obtainContent();
+    try {
+      parser.parse(instream, handler, metadata, new ParseContext());
+      ParseImpl parse = new ParseImpl(handler.toString(),null);
+      
+      return parse;
+
+    } catch (SAXException ex) {
+      throw new DroidsException("Failure parsing document " + link.getId(), 
ex);
+    } catch (TikaException ex) {
+      throw new DroidsException("Failure parsing document " + link.getId(), 
ex);
+    } finally {
+      instream.close();
+    } 
+  }
+
+}


Reply via email to