Author: jukka
Date: Wed Feb 17 23:26:13 2010
New Revision: 911225
URL: http://svn.apache.org/viewvc?rev=911225&view=rev
Log:
TIKA-317: Annotation-based Tika configuration
Use the service provider mechanism to automatically add all available parsers
to the default Tika configuration.
Added:
lucene/tika/trunk/tika-parsers/src/main/resources/
lucene/tika/trunk/tika-parsers/src/main/resources/META-INF/
lucene/tika/trunk/tika-parsers/src/main/resources/META-INF/services/
lucene/tika/trunk/tika-parsers/src/main/resources/META-INF/services/org.apache.tika.parser.Parser
Removed:
lucene/tika/trunk/tika-core/src/main/resources/org/apache/tika/tika-config.xml
Modified:
lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/config/TikaConfig.java
Modified:
lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/config/TikaConfig.java
URL:
http://svn.apache.org/viewvc/lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/config/TikaConfig.java?rev=911225&r1=911224&r2=911225&view=diff
==============================================================================
---
lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/config/TikaConfig.java
(original)
+++
lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/config/TikaConfig.java
Wed Feb 17 23:26:13 2010
@@ -21,14 +21,17 @@
import java.io.InputStream;
import java.net.URL;
import java.util.HashMap;
+import java.util.Iterator;
import java.util.Map;
+import javax.imageio.spi.ServiceRegistry;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import org.apache.tika.exception.TikaException;
import org.apache.tika.mime.MediaType;
+import org.apache.tika.mime.MimeTypeException;
import org.apache.tika.mime.MimeTypes;
import org.apache.tika.mime.MimeTypesFactory;
import org.apache.tika.parser.ParseContext;
@@ -49,7 +52,7 @@
private final Map<String, Parser> parsers = new HashMap<String, Parser>();
- private static MimeTypes mimeTypes;
+ private final MimeTypes mimeTypes;
public TikaConfig(String file)
throws TikaException, IOException, SAXException {
@@ -95,8 +98,10 @@
public TikaConfig(Element element) throws TikaException, IOException {
Element mtr = getChild(element, "mimeTypeRepository");
- if (mtr != null) {
+ if (mtr != null && mtr.hasAttribute("resource")) {
mimeTypes = MimeTypesFactory.create(mtr.getAttribute("resource"));
+ } else {
+ mimeTypes = MimeTypesFactory.create("tika-mimetypes.xml");
}
NodeList nodes = element.getElementsByTagName("parser");
@@ -125,6 +130,19 @@
}
}
+ public TikaConfig() throws MimeTypeException, IOException {
+ ParseContext context = new ParseContext();
+ Iterator<Parser> iterator =
+ ServiceRegistry.lookupProviders(Parser.class);
+ while (iterator.hasNext()) {
+ Parser parser = iterator.next();
+ for (MediaType type : parser.getSupportedTypes(context)) {
+ parsers.put(type.toString(), parser);
+ }
+ }
+ mimeTypes = MimeTypesFactory.create("tika-mimetypes.xml");
+ }
+
/**
* @deprecated This method will be removed in Apache Tika 1.0
* @see <a
href="https://issues.apache.org/jira/browse/TIKA-275">TIKA-275</a>
@@ -177,15 +195,10 @@
*/
public static TikaConfig getDefaultConfig() {
try {
- InputStream stream =
- TikaConfig.class.getResourceAsStream(DEFAULT_CONFIG_LOCATION);
- return new TikaConfig(stream);
+ return new TikaConfig();
} catch (IOException e) {
throw new RuntimeException(
"Unable to read default configuration", e);
- } catch (SAXException e) {
- throw new RuntimeException(
- "Unable to parse default configuration", e);
} catch (TikaException e) {
throw new RuntimeException(
"Unable to access default configuration", e);
Added:
lucene/tika/trunk/tika-parsers/src/main/resources/META-INF/services/org.apache.tika.parser.Parser
URL:
http://svn.apache.org/viewvc/lucene/tika/trunk/tika-parsers/src/main/resources/META-INF/services/org.apache.tika.parser.Parser?rev=911225&view=auto
==============================================================================
---
lucene/tika/trunk/tika-parsers/src/main/resources/META-INF/services/org.apache.tika.parser.Parser
(added)
+++
lucene/tika/trunk/tika-parsers/src/main/resources/META-INF/services/org.apache.tika.parser.Parser
Wed Feb 17 23:26:13 2010
@@ -0,0 +1,36 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+org.apache.tika.parser.asm.ClassParser
+org.apache.tika.parser.audio.AudioParser
+org.apache.tika.parser.audio.MidiParser
+org.apache.tika.parser.epub.EpubParser
+org.apache.tika.parser.html.HtmlParser
+org.apache.tika.parser.image.ImageParser
+org.apache.tika.parser.jpeg.JpegParser
+org.apache.tika.parser.mbox.MboxParser
+org.apache.tika.parser.microsoft.OfficeParser
+org.apache.tika.parser.microsoft.ooxml.OOXMLParser
+org.apache.tika.parser.mp3.Mp3Parser
+org.apache.tika.parser.odf.OpenDocumentParser
+org.apache.tika.parser.pdf.PDFParser
+org.apache.tika.parser.pkg.Bzip2Parser
+org.apache.tika.parser.pkg.GzipParser
+org.apache.tika.parser.pkg.TarParser
+org.apache.tika.parser.pkg.ZipParser
+org.apache.tika.parser.rtf.RTFParser
+org.apache.tika.parser.txt.TXTParser
+org.apache.tika.parser.video.FLVParser
+org.apache.tika.parser.xml.DcXMLParser