Author: nick
Date: Wed Mar 23 23:00:13 2011
New Revision: 1084798
URL: http://svn.apache.org/viewvc?rev=1084798&view=rev
Log:
When trying to identify a parser for a media type in AutoDetect and similar, if
the Parser claims to support an alias of the media type but not the canonical
one (eg someone changed the mimetype file but not the parser), then have the
parser accepted on the alias.
Also adds AutoDetectParser tests for images (the bmp one of which didn't work
before)
Modified:
tika/trunk/tika-core/src/main/java/org/apache/tika/parser/CompositeParser.java
tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/AutoDetectParserTest.java
Modified:
tika/trunk/tika-core/src/main/java/org/apache/tika/parser/CompositeParser.java
URL:
http://svn.apache.org/viewvc/tika/trunk/tika-core/src/main/java/org/apache/tika/parser/CompositeParser.java?rev=1084798&r1=1084797&r2=1084798&view=diff
==============================================================================
---
tika/trunk/tika-core/src/main/java/org/apache/tika/parser/CompositeParser.java
(original)
+++
tika/trunk/tika-core/src/main/java/org/apache/tika/parser/CompositeParser.java
Wed Mar 23 23:00:13 2011
@@ -25,6 +25,7 @@ import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Set;
+import java.util.SortedSet;
import org.apache.tika.exception.TikaException;
import org.apache.tika.io.TaggedInputStream;
@@ -165,10 +166,22 @@ public class CompositeParser implements
Map<MediaType, Parser> map = getParsers(context);
MediaType type = MediaType.parse(metadata.get(Metadata.CONTENT_TYPE));
while (type != null) {
+ // Try finding a parser for the type
Parser parser = map.get(type);
if (parser != null) {
return parser;
}
+
+ // Next up, look for one for its aliases
+ SortedSet<MediaType> aliases = registry.getAliases(type);
+ for (MediaType alias : aliases) {
+ parser = map.get(alias);
+ if (parser != null) {
+ return parser;
+ }
+ }
+
+ // Failing that, try for the parent of the type
type = registry.getSupertype(type);
}
return fallback;
Modified:
tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/AutoDetectParserTest.java
URL:
http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/AutoDetectParserTest.java?rev=1084798&r1=1084797&r2=1084798&view=diff
==============================================================================
---
tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/AutoDetectParserTest.java
(original)
+++
tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/AutoDetectParserTest.java
Wed Mar 23 23:00:13 2011
@@ -40,13 +40,17 @@ public class AutoDetectParserTest extend
private static final String HTML = "text/html";
private static final String PDF = "application/pdf";
private static final String POWERPOINT = "application/vnd.ms-powerpoint";
- private static final String KEYNOTE = "application/vnd.apple.keynote";
- private static final String PAGES = "application/vnd.apple.pages";
- private static final String NUMBERS = "application/vnd.apple.numbers";
+ private static final String KEYNOTE = "application/vnd.apple.keynote";
+ private static final String PAGES = "application/vnd.apple.pages";
+ private static final String NUMBERS = "application/vnd.apple.numbers";
private static final String RTF = "application/rtf";
private static final String PLAINTEXT = "text/plain";
private static final String WORD = "application/msword";
private static final String XML = "application/xml";
+ private static final String BMP = "image/x-ms-bmp";
+ private static final String GIF = "image/gif";
+ private static final String JPEG = "image/jpeg";
+ private static final String PNG = "image/png";
private static final String OPENOFFICE
= "application/vnd.oasis.opendocument.text";
@@ -76,8 +80,10 @@ public class AutoDetectParserTest extend
assertEquals("Bad content type: " + tp,
tp.realType, metadata.get(Metadata.CONTENT_TYPE));
- assertTrue("Expected content not found: " + tp,
- handler.toString().contains(tp.expectedContentFragment));
+ if (tp.expectedContentFragment != null) {
+ assertTrue("Expected content not found: " + tp,
+
handler.toString().contains(tp.expectedContentFragment));
+ }
} finally {
input.close();
}
@@ -192,6 +198,13 @@ public class AutoDetectParserTest extend
assertAutoDetect("testXML.xml", XML, "Lius");
}
+ public void testImages() throws Exception {
+ assertAutoDetect("testBMP.bmp", BMP, null);
+ assertAutoDetect("testGIF.gif", GIF, null);
+ assertAutoDetect("testJPEG.jpg", JPEG, null);
+ assertAutoDetect("testPNG.png", PNG, null);
+ }
+
/**
* Make sure that zip bomb attacks are prevented.
*