Author: nick
Date: Wed Mar 23 23:00:13 2011
New Revision: 1084798

URL: http://svn.apache.org/viewvc?rev=1084798&view=rev
Log:
When trying to identify a parser for a media type in AutoDetect and similar, if 
the Parser claims to support an alias of the media type but not the canonical 
one (eg someone changed the mimetype file but not the parser), then have the 
parser accepted on the alias.
Also adds AutoDetectParser tests for images (the bmp one of which didn't work 
before)

Modified:
    
tika/trunk/tika-core/src/main/java/org/apache/tika/parser/CompositeParser.java
    
tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/AutoDetectParserTest.java

Modified: 
tika/trunk/tika-core/src/main/java/org/apache/tika/parser/CompositeParser.java
URL: 
http://svn.apache.org/viewvc/tika/trunk/tika-core/src/main/java/org/apache/tika/parser/CompositeParser.java?rev=1084798&r1=1084797&r2=1084798&view=diff
==============================================================================
--- 
tika/trunk/tika-core/src/main/java/org/apache/tika/parser/CompositeParser.java 
(original)
+++ 
tika/trunk/tika-core/src/main/java/org/apache/tika/parser/CompositeParser.java 
Wed Mar 23 23:00:13 2011
@@ -25,6 +25,7 @@ import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
 import java.util.Set;
+import java.util.SortedSet;
 
 import org.apache.tika.exception.TikaException;
 import org.apache.tika.io.TaggedInputStream;
@@ -165,10 +166,22 @@ public class CompositeParser implements 
         Map<MediaType, Parser> map = getParsers(context);
         MediaType type = MediaType.parse(metadata.get(Metadata.CONTENT_TYPE));
         while (type != null) {
+            // Try finding a parser for the type
             Parser parser = map.get(type);
             if (parser != null) {
                 return parser;
             }
+            
+            // Next up, look for one for its aliases
+            SortedSet<MediaType> aliases = registry.getAliases(type);
+            for (MediaType alias : aliases) {
+               parser = map.get(alias);
+               if (parser != null) {
+                   return parser;
+               }
+            }
+         
+            // Failing that, try for the parent of the type
             type = registry.getSupertype(type);
         }
         return fallback;

Modified: 
tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/AutoDetectParserTest.java
URL: 
http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/AutoDetectParserTest.java?rev=1084798&r1=1084797&r2=1084798&view=diff
==============================================================================
--- 
tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/AutoDetectParserTest.java
 (original)
+++ 
tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/AutoDetectParserTest.java
 Wed Mar 23 23:00:13 2011
@@ -40,13 +40,17 @@ public class AutoDetectParserTest extend
     private static final String HTML       = "text/html";
     private static final String PDF        = "application/pdf";
     private static final String POWERPOINT = "application/vnd.ms-powerpoint";
-    private static final String KEYNOTE = "application/vnd.apple.keynote";
-    private static final String PAGES = "application/vnd.apple.pages";
-    private static final String NUMBERS = "application/vnd.apple.numbers";
+    private static final String KEYNOTE    = "application/vnd.apple.keynote";
+    private static final String PAGES      = "application/vnd.apple.pages";
+    private static final String NUMBERS    = "application/vnd.apple.numbers";
     private static final String RTF        = "application/rtf";
     private static final String PLAINTEXT  = "text/plain";
     private static final String WORD       = "application/msword";
     private static final String XML        = "application/xml";
+    private static final String BMP        = "image/x-ms-bmp";
+    private static final String GIF        = "image/gif";
+    private static final String JPEG       = "image/jpeg";
+    private static final String PNG        = "image/png";
     private static final String OPENOFFICE
             = "application/vnd.oasis.opendocument.text";
 
@@ -76,8 +80,10 @@ public class AutoDetectParserTest extend
             assertEquals("Bad content type: " + tp,
                     tp.realType, metadata.get(Metadata.CONTENT_TYPE));
 
-            assertTrue("Expected content not found: " + tp,
-                    handler.toString().contains(tp.expectedContentFragment));
+            if (tp.expectedContentFragment != null) {
+               assertTrue("Expected content not found: " + tp,
+                       
handler.toString().contains(tp.expectedContentFragment));
+            }
         } finally {
             input.close();
         }
@@ -192,6 +198,13 @@ public class AutoDetectParserTest extend
         assertAutoDetect("testXML.xml", XML, "Lius");
     }
 
+    public void testImages() throws Exception {
+       assertAutoDetect("testBMP.bmp", BMP, null);
+       assertAutoDetect("testGIF.gif", GIF, null);
+       assertAutoDetect("testJPEG.jpg", JPEG, null);
+       assertAutoDetect("testPNG.png", PNG, null);
+   }
+
     /**
      * Make sure that zip bomb attacks are prevented.
      *


Reply via email to