Author: nick
Date: Mon Apr 29 14:20:14 2013
New Revision: 1477097

URL: http://svn.apache.org/r1477097
Log:
Patch from Ryan McKinley from TIKA-1014 - Allow custom MimeTypesReader (with 
tests)

Added:
    
tika/trunk/tika-core/src/test/java/org/apache/tika/mime/CustomReaderTest.java
    
tika/trunk/tika-core/src/test/resources/org/apache/tika/mime/custom-mimetypes2.xml
Modified:
    tika/trunk/tika-core/src/main/java/org/apache/tika/mime/MimeTypesReader.java
    
tika/trunk/tika-core/src/test/resources/org/apache/tika/mime/custom-mimetypes.xml

Modified: 
tika/trunk/tika-core/src/main/java/org/apache/tika/mime/MimeTypesReader.java
URL: 
http://svn.apache.org/viewvc/tika/trunk/tika-core/src/main/java/org/apache/tika/mime/MimeTypesReader.java?rev=1477097&r1=1477096&r2=1477097&view=diff
==============================================================================
--- 
tika/trunk/tika-core/src/main/java/org/apache/tika/mime/MimeTypesReader.java 
(original)
+++ 
tika/trunk/tika-core/src/main/java/org/apache/tika/mime/MimeTypesReader.java 
Mon Apr 29 14:20:14 2013
@@ -98,22 +98,21 @@ import org.xml.sax.helpers.DefaultHandle
  *
  * @see http://freedesktop.org/wiki/Standards_2fshared_2dmime_2dinfo_2dspec
  */
-class MimeTypesReader extends DefaultHandler implements MimeTypesReaderMetKeys 
{
-
-    private final MimeTypes types;
+public class MimeTypesReader extends DefaultHandler implements 
MimeTypesReaderMetKeys {
+    protected final MimeTypes types;
 
     /** Current type */
-    private MimeType type = null;
+    protected MimeType type = null;
 
-    private int priority;
+    protected int priority;
 
-    private StringBuilder characters = null;
+    protected StringBuilder characters = null;
 
-    MimeTypesReader(MimeTypes types) {
+    protected MimeTypesReader(MimeTypes types) {
         this.types = types;
     }
 
-    void read(InputStream stream) throws IOException, MimeTypeException {
+    public void read(InputStream stream) throws IOException, MimeTypeException 
{
         try {
             SAXParserFactory factory = SAXParserFactory.newInstance();
             factory.setNamespaceAware(false);
@@ -126,7 +125,7 @@ class MimeTypesReader extends DefaultHan
         }
     }
 
-    void read(Document document) throws MimeTypeException {
+    public void read(Document document) throws MimeTypeException {
         try {
             TransformerFactory factory = TransformerFactory.newInstance();
             Transformer transformer = factory.newTransformer();
@@ -151,7 +150,7 @@ class MimeTypesReader extends DefaultHan
                 try {
                     type = types.forName(name);
                 } catch (MimeTypeException e) {
-                    throw new SAXException(e);
+                    handleMimeError(name, e, qName, attributes);
                 }
             }
         } else if (ALIAS_TAG.equals(qName)) {
@@ -172,7 +171,7 @@ class MimeTypesReader extends DefaultHan
                 try {
                     types.addPattern(type, pattern, Boolean.valueOf(isRegex));
                 } catch (MimeTypeException e) {
-                    throw new SAXException(e);
+                  handleGlobError(type, pattern, e, qName, attributes);
                 }
             }
         } else if (ROOT_XML_TAG.equals(qName)) {
@@ -240,6 +239,14 @@ class MimeTypesReader extends DefaultHan
         }
     }
 
+    protected void handleMimeError(String input, MimeTypeException ex, String 
qName, Attributes attributes) throws SAXException {
+      throw new SAXException(ex);
+    }
+    
+    protected void handleGlobError(MimeType type, String pattern, 
MimeTypeException ex, String qName, Attributes attributes) throws SAXException {
+      throw new SAXException(ex);
+    }
+
     private ClauseRecord current = new ClauseRecord(null);
 
     private class ClauseRecord {

Added: 
tika/trunk/tika-core/src/test/java/org/apache/tika/mime/CustomReaderTest.java
URL: 
http://svn.apache.org/viewvc/tika/trunk/tika-core/src/test/java/org/apache/tika/mime/CustomReaderTest.java?rev=1477097&view=auto
==============================================================================
--- 
tika/trunk/tika-core/src/test/java/org/apache/tika/mime/CustomReaderTest.java 
(added)
+++ 
tika/trunk/tika-core/src/test/java/org/apache/tika/mime/CustomReaderTest.java 
Mon Apr 29 14:20:14 2013
@@ -0,0 +1,93 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.mime;
+
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+import junit.framework.TestCase;
+
+import org.xml.sax.Attributes;
+import org.xml.sax.SAXException;
+
+
+public class CustomReaderTest extends TestCase {
+  
+  static class CustomMimeTypesReader extends MimeTypesReader {
+    public Map<String, String> values = new HashMap<String, String>();
+    public List<String> ignorePatterns = new ArrayList<String>();
+
+    CustomMimeTypesReader(MimeTypes types) {
+      super(types); 
+    }
+    
+
+    @Override
+    public void startElement(
+            String uri, String localName, String qName,
+            Attributes attributes) throws SAXException {
+      super.startElement(uri, localName, qName, attributes);
+      if ("hello".equals(qName)) {
+          characters = new StringBuilder();
+      }
+    }
+
+    @Override
+    public void endElement(String uri, String localName, String qName) {
+      super.endElement(uri, localName, qName);
+        if (type != null) {
+          if("hello".equals(qName)) {
+            values.put(type.toString(), characters.toString().trim());
+            characters = null;
+          }
+        }
+    }
+
+    @Override
+    protected void handleGlobError(MimeType type, String pattern, 
MimeTypeException ex, 
+        String qName, Attributes attributes) throws SAXException {
+      ignorePatterns.add( type.toString() + ">>" + pattern);
+    }
+  }
+  
+  public void testCustomReader() throws Exception {
+    MimeTypes mimeTypes = new MimeTypes();
+    CustomMimeTypesReader reader = new CustomMimeTypesReader(mimeTypes);
+    reader.read(getClass().getResourceAsStream("custom-mimetypes.xml"));
+    
+    String key = "hello/world-file";
+
+    MimeType hello = mimeTypes.forName(key);
+    assertEquals("A \"Hello World\" file", hello.getDescription());    
+    assertEquals("world", reader.values.get(key));
+    assertEquals(0, reader.ignorePatterns.size());
+    
+    // Now add another resource with conflicting regex
+    reader.read(getClass().getResourceAsStream("custom-mimetypes2.xml"));
+    
+    key = "another/world-file";
+    MimeType another = mimeTypes.forName(key);
+    assertEquals("kittens", reader.values.get(key));
+    assertEquals(1, reader.ignorePatterns.size());
+    assertEquals(another.toString()+">>*"+hello.getExtension(), 
+        reader.ignorePatterns.get(0));
+    
+    //System.out.println( mimeTypes.getMediaTypeRegistry().getTypes() );
+  }
+}

Modified: 
tika/trunk/tika-core/src/test/resources/org/apache/tika/mime/custom-mimetypes.xml
URL: 
http://svn.apache.org/viewvc/tika/trunk/tika-core/src/test/resources/org/apache/tika/mime/custom-mimetypes.xml?rev=1477097&r1=1477096&r2=1477097&view=diff
==============================================================================
--- 
tika/trunk/tika-core/src/test/resources/org/apache/tika/mime/custom-mimetypes.xml
 (original)
+++ 
tika/trunk/tika-core/src/test/resources/org/apache/tika/mime/custom-mimetypes.xml
 Mon Apr 29 14:20:14 2013
@@ -26,6 +26,7 @@
   <!-- A more complex mimetype, with a glob and a match -->
   <mime-type type="hello/world-file">
      <_comment>A "Hello World" file</_comment>
+     <hello>world</hello>
      <glob pattern="*.hello.world" />
      <magic priority="50">
         <match value="Hello, World!" type="string" offset="0:13" />

Added: 
tika/trunk/tika-core/src/test/resources/org/apache/tika/mime/custom-mimetypes2.xml
URL: 
http://svn.apache.org/viewvc/tika/trunk/tika-core/src/test/resources/org/apache/tika/mime/custom-mimetypes2.xml?rev=1477097&view=auto
==============================================================================
--- 
tika/trunk/tika-core/src/test/resources/org/apache/tika/mime/custom-mimetypes2.xml
 (added)
+++ 
tika/trunk/tika-core/src/test/resources/org/apache/tika/mime/custom-mimetypes2.xml
 Mon Apr 29 14:20:14 2013
@@ -0,0 +1,24 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+
+  http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+-->
+<mime-info>
+  <mime-type type="another/world-file">
+     <hello>kittens</hello>
+     <glob pattern="*.hello.world" /> <!-- Will collide with 
'hello/world-file'  -->
+     <sub-class-of type="hello/world" />
+  </mime-type>
+</mime-info>


Reply via email to