Author: jukka
Date: Sun Sep 27 17:36:42 2009
New Revision: 819358

URL: http://svn.apache.org/viewvc?rev=819358&view=rev
Log:
TIKA-269: Ease of use -facade for Tika

Make the facade non-static to allow different configurations to be used.

Modified:
    lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/Tika.java
    
lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/config/TikaConfig.java
    
lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/parser/AutoDetectParser.java
    
lucene/tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/asm/ClassParserTest.java
    
lucene/tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/audio/AudioParserTest.java
    
lucene/tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/audio/MidiParserTest.java
    
lucene/tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/html/HtmlParserTest.java

Modified: lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/Tika.java
URL: 
http://svn.apache.org/viewvc/lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/Tika.java?rev=819358&r1=819357&r2=819358&view=diff
==============================================================================
--- lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/Tika.java 
(original)
+++ lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/Tika.java Sun Sep 
27 17:36:42 2009
@@ -26,6 +26,7 @@
 import java.util.HashMap;
 import java.util.Map;
 
+import org.apache.tika.config.TikaConfig;
 import org.apache.tika.exception.TikaException;
 import org.apache.tika.metadata.Metadata;
 import org.apache.tika.parser.AutoDetectParser;
@@ -44,12 +45,24 @@
  */
 public class Tika {
 
-    private static final Parser parser = new AutoDetectParser();
+    /**
+     * The parser instance used by this facade.
+     */
+    private final Parser parser;
 
     /**
-     * Private constructor to prevent this class from being instantiated.
+     * Creates a Tika facade using the given configuration.
+     * @param config
      */
-    private Tika() {
+    public Tika(TikaConfig config) {
+        this.parser = new AutoDetectParser(config);
+    }
+
+    /**
+     * Creates a Tika facade using the default configuration.
+     */
+    public Tika() {
+        this(TikaConfig.getDefaultConfig());
     }
 
     /**
@@ -62,7 +75,7 @@
      * @return extracted text content
      * @throws IOException if the document can not be read or parsed
      */
-    public static Reader parse(InputStream stream, Metadata metadata)
+    public Reader parse(InputStream stream, Metadata metadata)
             throws IOException {
         Map<String, Object> context = new HashMap<String, Object>();
         context.put(Parser.class.getName(), parser);
@@ -76,7 +89,7 @@
      * @return extracted text content
      * @throws IOException if the document can not be read or parsed
      */
-    public static Reader parse(InputStream stream) throws IOException {
+    public Reader parse(InputStream stream) throws IOException {
         return parse(stream, new Metadata());
     }
 
@@ -88,8 +101,7 @@
      * @throws FileNotFoundException if the given file does not exist
      * @throws IOException if the file can not be read or parsed
      */
-    public static Reader parse(File file)
-            throws FileNotFoundException, IOException {
+    public Reader parse(File file) throws FileNotFoundException, IOException {
         return parse(new FileInputStream(file), getFileMetadata(file));
     }
 
@@ -101,7 +113,7 @@
      * @return extracted text content
      * @throws IOException if the resource can not be read or parsed
      */
-    public static Reader parse(URL url) throws IOException {
+    public Reader parse(URL url) throws IOException {
         return parse(url.openStream(), getUrlMetadata(url));
     }
 
@@ -115,7 +127,7 @@
      * @throws IOException if the document can not be read
      * @throws TikaException if the document can not be parsed
      */
-    public static String parseToString(InputStream stream, Metadata metadata)
+    public String parseToString(InputStream stream, Metadata metadata)
             throws IOException, TikaException {
         try {
             ContentHandler handler = new BodyContentHandler();
@@ -140,7 +152,7 @@
      * @throws IOException if the document can not be read
      * @throws TikaException if the document can not be parsed
      */
-    public static String parseToString(InputStream stream)
+    public String parseToString(InputStream stream)
             throws IOException, TikaException {
         return parseToString(stream, new Metadata());
     }
@@ -154,7 +166,7 @@
      * @throws IOException if the file can not be read
      * @throws TikaException if the file can not be parsed
      */
-    public static String parseToString(File file)
+    public String parseToString(File file)
             throws FileNotFoundException, IOException, TikaException {
         return parseToString(new FileInputStream(file), getFileMetadata(file));
     }
@@ -168,8 +180,7 @@
      * @throws IOException if the resource can not be read
      * @throws TikaException if the resource can not be parsed
      */
-    public static String parseToString(URL url)
-            throws IOException, TikaException {
+    public String parseToString(URL url) throws IOException, TikaException {
         return parseToString(url.openStream(), getUrlMetadata(url));
     }
 

Modified: 
lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/config/TikaConfig.java
URL: 
http://svn.apache.org/viewvc/lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/config/TikaConfig.java?rev=819358&r1=819357&r2=819358&view=diff
==============================================================================
--- 
lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/config/TikaConfig.java
 (original)
+++ 
lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/config/TikaConfig.java
 Sun Sep 27 17:36:42 2009
@@ -165,17 +165,21 @@
      * return a shared instance once it is completely immutable.
      *
      * @return default configuration
-     * @throws TikaException if the default configuration is not available
      */
-    public static TikaConfig getDefaultConfig() throws TikaException {
+    public static TikaConfig getDefaultConfig() {
         try {
             InputStream stream =
                 TikaConfig.class.getResourceAsStream(DEFAULT_CONFIG_LOCATION);
             return new TikaConfig(stream);
         } catch (IOException e) {
-            throw new TikaException("Unable to read default configuration", e);
+            throw new RuntimeException(
+                    "Unable to read default configuration", e);
         } catch (SAXException e) {
-            throw new TikaException("Unable to parse default configuration", 
e);
+            throw new RuntimeException(
+                    "Unable to parse default configuration", e);
+        } catch (TikaException e) {
+            throw new RuntimeException(
+                    "Unable to access default configuration", e);
         }
     }
 

Modified: 
lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/parser/AutoDetectParser.java
URL: 
http://svn.apache.org/viewvc/lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/parser/AutoDetectParser.java?rev=819358&r1=819357&r2=819358&view=diff
==============================================================================
--- 
lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/parser/AutoDetectParser.java
 (original)
+++ 
lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/parser/AutoDetectParser.java
 Sun Sep 27 17:36:42 2009
@@ -1,4 +1,4 @@
-/**
+/*
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
  * this work for additional information regarding copyright ownership.
@@ -45,12 +45,7 @@
      * configuration.
      */
     public AutoDetectParser() {
-        try {
-            setConfig(TikaConfig.getDefaultConfig());
-        } catch (TikaException e) {
-            // FIXME: This should never happen
-            throw new RuntimeException(e);
-        }
+        this(TikaConfig.getDefaultConfig());
     }
 
     public AutoDetectParser(TikaConfig config) {

Modified: 
lucene/tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/asm/ClassParserTest.java
URL: 
http://svn.apache.org/viewvc/lucene/tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/asm/ClassParserTest.java?rev=819358&r1=819357&r2=819358&view=diff
==============================================================================
--- 
lucene/tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/asm/ClassParserTest.java
 (original)
+++ 
lucene/tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/asm/ClassParserTest.java
 Sun Sep 27 17:36:42 2009
@@ -29,7 +29,7 @@
     public void testClassParsing() throws Exception {
         String path = "/test-documents/AutoDetectParser.class";
         Metadata metadata = new Metadata();
-        String content = Tika.parseToString(
+        String content = new Tika().parseToString(
                 ClassParserTest.class.getResourceAsStream(path), metadata);
 
         assertEquals("AutoDetectParser", metadata.get(Metadata.TITLE));

Modified: 
lucene/tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/audio/AudioParserTest.java
URL: 
http://svn.apache.org/viewvc/lucene/tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/audio/AudioParserTest.java?rev=819358&r1=819357&r2=819358&view=diff
==============================================================================
--- 
lucene/tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/audio/AudioParserTest.java
 (original)
+++ 
lucene/tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/audio/AudioParserTest.java
 Sun Sep 27 17:36:42 2009
@@ -26,7 +26,7 @@
     public void testWAV() throws Exception {
         String path = "/test-documents/testWAV.wav";
         Metadata metadata = new Metadata();
-        String content = Tika.parseToString(
+        String content = new Tika().parseToString(
                 AudioParserTest.class.getResourceAsStream(path), metadata);
 
         assertEquals("audio/x-wav", metadata.get(Metadata.CONTENT_TYPE));
@@ -41,7 +41,7 @@
     public void testAIFF() throws Exception {
         String path = "/test-documents/testAIFF.aif";
         Metadata metadata = new Metadata();
-        String content = Tika.parseToString(
+        String content = new Tika().parseToString(
                 AudioParserTest.class.getResourceAsStream(path), metadata);
 
         assertEquals("audio/x-aiff", metadata.get(Metadata.CONTENT_TYPE));
@@ -56,7 +56,7 @@
     public void testAU() throws Exception {
         String path = "/test-documents/testAU.au";
         Metadata metadata = new Metadata();
-        String content = Tika.parseToString(
+        String content = new Tika().parseToString(
                 AudioParserTest.class.getResourceAsStream(path), metadata);
 
         assertEquals("audio/basic", metadata.get(Metadata.CONTENT_TYPE));

Modified: 
lucene/tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/audio/MidiParserTest.java
URL: 
http://svn.apache.org/viewvc/lucene/tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/audio/MidiParserTest.java?rev=819358&r1=819357&r2=819358&view=diff
==============================================================================
--- 
lucene/tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/audio/MidiParserTest.java
 (original)
+++ 
lucene/tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/audio/MidiParserTest.java
 Sun Sep 27 17:36:42 2009
@@ -26,7 +26,7 @@
     public void testMID() throws Exception {
         String path = "/test-documents/testMID.mid";
         Metadata metadata = new Metadata();
-        String content = Tika.parseToString(
+        String content = new Tika().parseToString(
                 MidiParserTest.class.getResourceAsStream(path), metadata);
 
         assertEquals("audio/midi", metadata.get(Metadata.CONTENT_TYPE));

Modified: 
lucene/tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/html/HtmlParserTest.java
URL: 
http://svn.apache.org/viewvc/lucene/tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/html/HtmlParserTest.java?rev=819358&r1=819357&r2=819358&view=diff
==============================================================================
--- 
lucene/tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/html/HtmlParserTest.java
 (original)
+++ 
lucene/tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/html/HtmlParserTest.java
 Sun Sep 27 17:36:42 2009
@@ -1,4 +1,4 @@
-/**
+/*
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
  * this work for additional information regarding copyright ownership.
@@ -79,7 +79,7 @@
     public void XtestParseUTF8() throws IOException, SAXException, 
TikaException {
         String path = "/test-documents/testXHTML_utf8.html";
         Metadata metadata = new Metadata();
-        String content = Tika.parseToString(
+        String content = new Tika().parseToString(
                 HtmlParserTest.class.getResourceAsStream(path), metadata);
 
         assertTrue("Did not contain expected text:"
@@ -97,7 +97,7 @@
     public void testXhtmlParsing() throws Exception {
         String path = "/test-documents/testXHTML.html";
         Metadata metadata = new Metadata();
-        String content = Tika.parseToString(
+        String content = new Tika().parseToString(
                 HtmlParserTest.class.getResourceAsStream(path), metadata);
 
         assertEquals("application/xhtml+xml", 
metadata.get(Metadata.CONTENT_TYPE));
@@ -124,7 +124,7 @@
      */
     public void testCharactersDirectlyUnderBodyElement() throws Exception {
         String test = "<html><body>test</body></html>";
-        String content = Tika.parseToString(
+        String content = new Tika().parseToString(
                 new ByteArrayInputStream(test.getBytes("UTF-8")));
         assertEquals("test", content);
     }
@@ -136,7 +136,7 @@
     public void testWhitespaceBetweenTableCells() throws Exception {
         String test =
             
"<html><body><table><tr><td>a</td><td>b</td></table></body></html>";
-        String content = Tika.parseToString(
+        String content = new Tika().parseToString(
                 new ByteArrayInputStream(test.getBytes("UTF-8")));
         assertTrue(content.contains("a"));
         assertTrue(content.contains("b"));


Reply via email to