Author: nick
Date: Mon Apr 29 14:20:14 2013
New Revision: 1477097
URL: http://svn.apache.org/r1477097
Log:
Patch from Ryan McKinley from TIKA-1014 - Allow custom MimeTypesReader (with
tests)
Added:
tika/trunk/tika-core/src/test/java/org/apache/tika/mime/CustomReaderTest.java
tika/trunk/tika-core/src/test/resources/org/apache/tika/mime/custom-mimetypes2.xml
Modified:
tika/trunk/tika-core/src/main/java/org/apache/tika/mime/MimeTypesReader.java
tika/trunk/tika-core/src/test/resources/org/apache/tika/mime/custom-mimetypes.xml
Modified:
tika/trunk/tika-core/src/main/java/org/apache/tika/mime/MimeTypesReader.java
URL:
http://svn.apache.org/viewvc/tika/trunk/tika-core/src/main/java/org/apache/tika/mime/MimeTypesReader.java?rev=1477097&r1=1477096&r2=1477097&view=diff
==============================================================================
---
tika/trunk/tika-core/src/main/java/org/apache/tika/mime/MimeTypesReader.java
(original)
+++
tika/trunk/tika-core/src/main/java/org/apache/tika/mime/MimeTypesReader.java
Mon Apr 29 14:20:14 2013
@@ -98,22 +98,21 @@ import org.xml.sax.helpers.DefaultHandle
*
* @see http://freedesktop.org/wiki/Standards_2fshared_2dmime_2dinfo_2dspec
*/
-class MimeTypesReader extends DefaultHandler implements MimeTypesReaderMetKeys
{
-
- private final MimeTypes types;
+public class MimeTypesReader extends DefaultHandler implements
MimeTypesReaderMetKeys {
+ protected final MimeTypes types;
/** Current type */
- private MimeType type = null;
+ protected MimeType type = null;
- private int priority;
+ protected int priority;
- private StringBuilder characters = null;
+ protected StringBuilder characters = null;
- MimeTypesReader(MimeTypes types) {
+ protected MimeTypesReader(MimeTypes types) {
this.types = types;
}
- void read(InputStream stream) throws IOException, MimeTypeException {
+ public void read(InputStream stream) throws IOException, MimeTypeException
{
try {
SAXParserFactory factory = SAXParserFactory.newInstance();
factory.setNamespaceAware(false);
@@ -126,7 +125,7 @@ class MimeTypesReader extends DefaultHan
}
}
- void read(Document document) throws MimeTypeException {
+ public void read(Document document) throws MimeTypeException {
try {
TransformerFactory factory = TransformerFactory.newInstance();
Transformer transformer = factory.newTransformer();
@@ -151,7 +150,7 @@ class MimeTypesReader extends DefaultHan
try {
type = types.forName(name);
} catch (MimeTypeException e) {
- throw new SAXException(e);
+ handleMimeError(name, e, qName, attributes);
}
}
} else if (ALIAS_TAG.equals(qName)) {
@@ -172,7 +171,7 @@ class MimeTypesReader extends DefaultHan
try {
types.addPattern(type, pattern, Boolean.valueOf(isRegex));
} catch (MimeTypeException e) {
- throw new SAXException(e);
+ handleGlobError(type, pattern, e, qName, attributes);
}
}
} else if (ROOT_XML_TAG.equals(qName)) {
@@ -240,6 +239,14 @@ class MimeTypesReader extends DefaultHan
}
}
+ protected void handleMimeError(String input, MimeTypeException ex, String
qName, Attributes attributes) throws SAXException {
+ throw new SAXException(ex);
+ }
+
+ protected void handleGlobError(MimeType type, String pattern,
MimeTypeException ex, String qName, Attributes attributes) throws SAXException {
+ throw new SAXException(ex);
+ }
+
private ClauseRecord current = new ClauseRecord(null);
private class ClauseRecord {
Added:
tika/trunk/tika-core/src/test/java/org/apache/tika/mime/CustomReaderTest.java
URL:
http://svn.apache.org/viewvc/tika/trunk/tika-core/src/test/java/org/apache/tika/mime/CustomReaderTest.java?rev=1477097&view=auto
==============================================================================
---
tika/trunk/tika-core/src/test/java/org/apache/tika/mime/CustomReaderTest.java
(added)
+++
tika/trunk/tika-core/src/test/java/org/apache/tika/mime/CustomReaderTest.java
Mon Apr 29 14:20:14 2013
@@ -0,0 +1,93 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.mime;
+
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+import junit.framework.TestCase;
+
+import org.xml.sax.Attributes;
+import org.xml.sax.SAXException;
+
+
+public class CustomReaderTest extends TestCase {
+
+ static class CustomMimeTypesReader extends MimeTypesReader {
+ public Map<String, String> values = new HashMap<String, String>();
+ public List<String> ignorePatterns = new ArrayList<String>();
+
+ CustomMimeTypesReader(MimeTypes types) {
+ super(types);
+ }
+
+
+ @Override
+ public void startElement(
+ String uri, String localName, String qName,
+ Attributes attributes) throws SAXException {
+ super.startElement(uri, localName, qName, attributes);
+ if ("hello".equals(qName)) {
+ characters = new StringBuilder();
+ }
+ }
+
+ @Override
+ public void endElement(String uri, String localName, String qName) {
+ super.endElement(uri, localName, qName);
+ if (type != null) {
+ if("hello".equals(qName)) {
+ values.put(type.toString(), characters.toString().trim());
+ characters = null;
+ }
+ }
+ }
+
+ @Override
+ protected void handleGlobError(MimeType type, String pattern,
MimeTypeException ex,
+ String qName, Attributes attributes) throws SAXException {
+ ignorePatterns.add( type.toString() + ">>" + pattern);
+ }
+ }
+
+ public void testCustomReader() throws Exception {
+ MimeTypes mimeTypes = new MimeTypes();
+ CustomMimeTypesReader reader = new CustomMimeTypesReader(mimeTypes);
+ reader.read(getClass().getResourceAsStream("custom-mimetypes.xml"));
+
+ String key = "hello/world-file";
+
+ MimeType hello = mimeTypes.forName(key);
+ assertEquals("A \"Hello World\" file", hello.getDescription());
+ assertEquals("world", reader.values.get(key));
+ assertEquals(0, reader.ignorePatterns.size());
+
+ // Now add another resource with conflicting regex
+ reader.read(getClass().getResourceAsStream("custom-mimetypes2.xml"));
+
+ key = "another/world-file";
+ MimeType another = mimeTypes.forName(key);
+ assertEquals("kittens", reader.values.get(key));
+ assertEquals(1, reader.ignorePatterns.size());
+ assertEquals(another.toString()+">>*"+hello.getExtension(),
+ reader.ignorePatterns.get(0));
+
+ //System.out.println( mimeTypes.getMediaTypeRegistry().getTypes() );
+ }
+}
Modified:
tika/trunk/tika-core/src/test/resources/org/apache/tika/mime/custom-mimetypes.xml
URL:
http://svn.apache.org/viewvc/tika/trunk/tika-core/src/test/resources/org/apache/tika/mime/custom-mimetypes.xml?rev=1477097&r1=1477096&r2=1477097&view=diff
==============================================================================
---
tika/trunk/tika-core/src/test/resources/org/apache/tika/mime/custom-mimetypes.xml
(original)
+++
tika/trunk/tika-core/src/test/resources/org/apache/tika/mime/custom-mimetypes.xml
Mon Apr 29 14:20:14 2013
@@ -26,6 +26,7 @@
<!-- A more complex mimetype, with a glob and a match -->
<mime-type type="hello/world-file">
<_comment>A "Hello World" file</_comment>
+ <hello>world</hello>
<glob pattern="*.hello.world" />
<magic priority="50">
<match value="Hello, World!" type="string" offset="0:13" />
Added:
tika/trunk/tika-core/src/test/resources/org/apache/tika/mime/custom-mimetypes2.xml
URL:
http://svn.apache.org/viewvc/tika/trunk/tika-core/src/test/resources/org/apache/tika/mime/custom-mimetypes2.xml?rev=1477097&view=auto
==============================================================================
---
tika/trunk/tika-core/src/test/resources/org/apache/tika/mime/custom-mimetypes2.xml
(added)
+++
tika/trunk/tika-core/src/test/resources/org/apache/tika/mime/custom-mimetypes2.xml
Mon Apr 29 14:20:14 2013
@@ -0,0 +1,24 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+<mime-info>
+ <mime-type type="another/world-file">
+ <hello>kittens</hello>
+ <glob pattern="*.hello.world" /> <!-- Will collide with
'hello/world-file' -->
+ <sub-class-of type="hello/world" />
+ </mime-type>
+</mime-info>