Author: jukka
Date: Wed Feb 17 23:26:13 2010
New Revision: 911225

URL: http://svn.apache.org/viewvc?rev=911225&view=rev
Log:
TIKA-317: Annotation-based Tika configuration

Use the service provider mechanism to automatically add all available parsers 
to the default Tika configuration.

Added:
    lucene/tika/trunk/tika-parsers/src/main/resources/
    lucene/tika/trunk/tika-parsers/src/main/resources/META-INF/
    lucene/tika/trunk/tika-parsers/src/main/resources/META-INF/services/
    
lucene/tika/trunk/tika-parsers/src/main/resources/META-INF/services/org.apache.tika.parser.Parser
Removed:
    
lucene/tika/trunk/tika-core/src/main/resources/org/apache/tika/tika-config.xml
Modified:
    
lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/config/TikaConfig.java

Modified: 
lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/config/TikaConfig.java
URL: 
http://svn.apache.org/viewvc/lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/config/TikaConfig.java?rev=911225&r1=911224&r2=911225&view=diff
==============================================================================
--- 
lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/config/TikaConfig.java
 (original)
+++ 
lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/config/TikaConfig.java
 Wed Feb 17 23:26:13 2010
@@ -21,14 +21,17 @@
 import java.io.InputStream;
 import java.net.URL;
 import java.util.HashMap;
+import java.util.Iterator;
 import java.util.Map;
 
+import javax.imageio.spi.ServiceRegistry;
 import javax.xml.parsers.DocumentBuilder;
 import javax.xml.parsers.DocumentBuilderFactory;
 import javax.xml.parsers.ParserConfigurationException;
 
 import org.apache.tika.exception.TikaException;
 import org.apache.tika.mime.MediaType;
+import org.apache.tika.mime.MimeTypeException;
 import org.apache.tika.mime.MimeTypes;
 import org.apache.tika.mime.MimeTypesFactory;
 import org.apache.tika.parser.ParseContext;
@@ -49,7 +52,7 @@
 
     private final Map<String, Parser> parsers = new HashMap<String, Parser>();
 
-    private static MimeTypes mimeTypes;
+    private final MimeTypes mimeTypes;
 
     public TikaConfig(String file)
             throws TikaException, IOException, SAXException {
@@ -95,8 +98,10 @@
 
     public TikaConfig(Element element) throws TikaException, IOException {
         Element mtr = getChild(element, "mimeTypeRepository");
-        if (mtr != null) {
+        if (mtr != null && mtr.hasAttribute("resource")) {
             mimeTypes = MimeTypesFactory.create(mtr.getAttribute("resource"));
+        } else {
+            mimeTypes = MimeTypesFactory.create("tika-mimetypes.xml");
         }
 
         NodeList nodes = element.getElementsByTagName("parser");
@@ -125,6 +130,19 @@
         }
     }
 
+    public TikaConfig() throws MimeTypeException, IOException {
+        ParseContext context = new ParseContext();
+        Iterator<Parser> iterator =
+            ServiceRegistry.lookupProviders(Parser.class);
+        while (iterator.hasNext()) {
+            Parser parser = iterator.next();
+            for (MediaType type : parser.getSupportedTypes(context)) {
+                parsers.put(type.toString(), parser);
+            }
+        }
+        mimeTypes = MimeTypesFactory.create("tika-mimetypes.xml");
+    }
+
     /**
      * @deprecated This method will be removed in Apache Tika 1.0
      * @see <a 
href="https://issues.apache.org/jira/browse/TIKA-275";>TIKA-275</a>
@@ -177,15 +195,10 @@
      */
     public static TikaConfig getDefaultConfig() {
         try {
-            InputStream stream =
-                TikaConfig.class.getResourceAsStream(DEFAULT_CONFIG_LOCATION);
-            return new TikaConfig(stream);
+            return new TikaConfig();
         } catch (IOException e) {
             throw new RuntimeException(
                     "Unable to read default configuration", e);
-        } catch (SAXException e) {
-            throw new RuntimeException(
-                    "Unable to parse default configuration", e);
         } catch (TikaException e) {
             throw new RuntimeException(
                     "Unable to access default configuration", e);

Added: 
lucene/tika/trunk/tika-parsers/src/main/resources/META-INF/services/org.apache.tika.parser.Parser
URL: 
http://svn.apache.org/viewvc/lucene/tika/trunk/tika-parsers/src/main/resources/META-INF/services/org.apache.tika.parser.Parser?rev=911225&view=auto
==============================================================================
--- 
lucene/tika/trunk/tika-parsers/src/main/resources/META-INF/services/org.apache.tika.parser.Parser
 (added)
+++ 
lucene/tika/trunk/tika-parsers/src/main/resources/META-INF/services/org.apache.tika.parser.Parser
 Wed Feb 17 23:26:13 2010
@@ -0,0 +1,36 @@
+#  Licensed to the Apache Software Foundation (ASF) under one or more
+#  contributor license agreements.  See the NOTICE file distributed with
+#  this work for additional information regarding copyright ownership.
+#  The ASF licenses this file to You under the Apache License, Version 2.0
+#  (the "License"); you may not use this file except in compliance with
+#  the License.  You may obtain a copy of the License at
+#
+#       http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+
+org.apache.tika.parser.asm.ClassParser
+org.apache.tika.parser.audio.AudioParser
+org.apache.tika.parser.audio.MidiParser
+org.apache.tika.parser.epub.EpubParser
+org.apache.tika.parser.html.HtmlParser
+org.apache.tika.parser.image.ImageParser
+org.apache.tika.parser.jpeg.JpegParser
+org.apache.tika.parser.mbox.MboxParser
+org.apache.tika.parser.microsoft.OfficeParser
+org.apache.tika.parser.microsoft.ooxml.OOXMLParser
+org.apache.tika.parser.mp3.Mp3Parser
+org.apache.tika.parser.odf.OpenDocumentParser
+org.apache.tika.parser.pdf.PDFParser
+org.apache.tika.parser.pkg.Bzip2Parser
+org.apache.tika.parser.pkg.GzipParser
+org.apache.tika.parser.pkg.TarParser
+org.apache.tika.parser.pkg.ZipParser
+org.apache.tika.parser.rtf.RTFParser
+org.apache.tika.parser.txt.TXTParser
+org.apache.tika.parser.video.FLVParser
+org.apache.tika.parser.xml.DcXMLParser


Reply via email to