TIKA-2250 As of RFC7903, the official mime type for EMF is now an image one and without the x- prefix
Project: http://git-wip-us.apache.org/repos/asf/tika/repo Commit: http://git-wip-us.apache.org/repos/asf/tika/commit/bd667acd Tree: http://git-wip-us.apache.org/repos/asf/tika/tree/bd667acd Diff: http://git-wip-us.apache.org/repos/asf/tika/diff/bd667acd Branch: refs/heads/2.x Commit: bd667acde6a48e118574129d79dfacb1c3c2db25 Parents: 6668d78 Author: Nick Burch <n...@gagravarr.org> Authored: Mon Jan 23 18:31:49 2017 +0000 Committer: Nick Burch <n...@gagravarr.org> Committed: Mon Jan 23 18:46:17 2017 +0000 ---------------------------------------------------------------------- CHANGES.txt | 3 +++ .../org/apache/tika/mime/TestMimeTypes.java | 7 +++--- .../org/apache/tika/mime/tika-mimetypes.xml | 25 +++++++++++++------- .../apache/tika/parser/pdf/PDFParserTest.java | 2 +- .../tika/parser/microsoft/HSLFExtractor.java | 2 +- .../AbstractPOIContainerExtractionTest.java | 2 +- .../apache/tika/parser/rtf/RTFParserTest.java | 2 +- 7 files changed, 28 insertions(+), 15 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/tika/blob/bd667acd/CHANGES.txt ---------------------------------------------------------------------- diff --git a/CHANGES.txt b/CHANGES.txt index 1b9b213..8d099b8 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -17,6 +17,9 @@ Release 2.0 - ??? Release 1.15 -??? + * Official mime types for BMP, EMF and WMF have been registered with + IANA, so switch to these (image/bmp image/emf image/wmf) (TIKA-2250) + * Be more parsimonious with BufferedInputStreams via Josh Hight (TIKA-2244). http://git-wip-us.apache.org/repos/asf/tika/blob/bd667acd/tika-app/src/test/java/org/apache/tika/mime/TestMimeTypes.java ---------------------------------------------------------------------- diff --git a/tika-app/src/test/java/org/apache/tika/mime/TestMimeTypes.java b/tika-app/src/test/java/org/apache/tika/mime/TestMimeTypes.java index 1d14b43..0a51bb9 100644 --- a/tika-app/src/test/java/org/apache/tika/mime/TestMimeTypes.java +++ b/tika-app/src/test/java/org/apache/tika/mime/TestMimeTypes.java @@ -598,9 +598,10 @@ public class TestMimeTypes extends TikaTest { assertTypeByData("image/wmf", "testWMF.wmf"); assertTypeByName("image/wmf", "x.WMF"); - assertTypeByName("application/x-emf", "x.emf"); - assertTypeByData("application/x-emf","testEMF.emf"); - assertTypeByName("application/x-emf", "x.EMF"); + assertTypeByName("image/emf", "x.emf"); + assertTypeByData("image/emf", "testEMF.emf"); + assertTypeByName("image/emf", "x.EMF"); + // TODO: Need a test wmz file assertTypeByName("application/x-ms-wmz", "x.wmz"); assertTypeByName("application/x-ms-wmz", "x.WMZ"); http://git-wip-us.apache.org/repos/asf/tika/blob/bd667acd/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml ---------------------------------------------------------------------- diff --git a/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml b/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml index 8c91680..1364f72 100644 --- a/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml +++ b/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml @@ -3227,16 +3227,11 @@ <glob pattern="*.exe"/> </mime-type> - <mime-type type="application/x-emf"> - <acronym>EMF</acronym> - <_comment>Extended Metafile</_comment> - <tika:link>https://msdn.microsoft.com/en-us/library/cc230711.aspx</tika:link> - <glob pattern="*.emf"/> + <mime-type type="application/x-erdas-hfa"> <magic priority="50"> - <match value="0x01000000" type="string" offset="0"> - <match value="0x464D4520" type="little32" offset="40"/> - </match> + <match value="EHFA_HEADER_TAG" type="string" offset="0" /> </magic> + <glob pattern="*.hfa"/> </mime-type> <mime-type type="application/x-filemaker"> @@ -4808,6 +4803,20 @@ <glob pattern="*.cgm"/> </mime-type> + <mime-type type="image/emf"> + <alias type="image/x-emf"/> + <alias type="application/x-emf"/> + <acronym>EMF</acronym> + <_comment>Enhanced Metafile</_comment> + <tika:link>https://msdn.microsoft.com/en-us/library/cc230711.aspx</tika:link> + <glob pattern="*.emf"/> + <magic priority="50"> + <match value="0x01000000" type="string" offset="0"> + <match value="0x464D4520" type="little32" offset="40"/> + </match> + </magic> + </mime-type> + <mime-type type="image/example"/> <mime-type type="image/fits"> http://git-wip-us.apache.org/repos/asf/tika/blob/bd667acd/tika-parser-modules/tika-parser-multimedia-module/src/test/java/org/apache/tika/parser/pdf/PDFParserTest.java ---------------------------------------------------------------------- diff --git a/tika-parser-modules/tika-parser-multimedia-module/src/test/java/org/apache/tika/parser/pdf/PDFParserTest.java b/tika-parser-modules/tika-parser-multimedia-module/src/test/java/org/apache/tika/parser/pdf/PDFParserTest.java index 9d92971..d71b9cd 100644 --- a/tika-parser-modules/tika-parser-multimedia-module/src/test/java/org/apache/tika/parser/pdf/PDFParserTest.java +++ b/tika-parser-modules/tika-parser-multimedia-module/src/test/java/org/apache/tika/parser/pdf/PDFParserTest.java @@ -72,7 +72,7 @@ import org.xml.sax.ContentHandler; public class PDFParserTest extends TikaTest { public static final MediaType TYPE_TEXT = MediaType.TEXT_PLAIN; - public static final MediaType TYPE_EMF = MediaType.application("x-emf"); + public static final MediaType TYPE_EMF = MediaType.image("emf"); public static final MediaType TYPE_PDF = MediaType.application("pdf"); public static final MediaType TYPE_DOCX = MediaType.application("vnd.openxmlformats-officedocument.wordprocessingml.document"); public static final MediaType TYPE_DOC = MediaType.application("msword"); http://git-wip-us.apache.org/repos/asf/tika/blob/bd667acd/tika-parser-modules/tika-parser-office-module/src/main/java/org/apache/tika/parser/microsoft/HSLFExtractor.java ---------------------------------------------------------------------- diff --git a/tika-parser-modules/tika-parser-office-module/src/main/java/org/apache/tika/parser/microsoft/HSLFExtractor.java b/tika-parser-modules/tika-parser-office-module/src/main/java/org/apache/tika/parser/microsoft/HSLFExtractor.java index 174db83..2be7329 100644 --- a/tika-parser-modules/tika-parser-office-module/src/main/java/org/apache/tika/parser/microsoft/HSLFExtractor.java +++ b/tika-parser-modules/tika-parser-office-module/src/main/java/org/apache/tika/parser/microsoft/HSLFExtractor.java @@ -319,7 +319,7 @@ public class HSLFExtractor extends AbstractPOIFSExtractor { switch (pic.getType()) { case EMF: - mediaType = "application/x-emf"; + mediaType = "image/emf"; break; case WMF: mediaType = "image/wmf"; http://git-wip-us.apache.org/repos/asf/tika/blob/bd667acd/tika-parser-modules/tika-parser-office-module/src/test/java/org/apache/tika/parser/microsoft/AbstractPOIContainerExtractionTest.java ---------------------------------------------------------------------- diff --git a/tika-parser-modules/tika-parser-office-module/src/test/java/org/apache/tika/parser/microsoft/AbstractPOIContainerExtractionTest.java b/tika-parser-modules/tika-parser-office-module/src/test/java/org/apache/tika/parser/microsoft/AbstractPOIContainerExtractionTest.java index 1a2940d..86657b1 100644 --- a/tika-parser-modules/tika-parser-office-module/src/test/java/org/apache/tika/parser/microsoft/AbstractPOIContainerExtractionTest.java +++ b/tika-parser-modules/tika-parser-office-module/src/test/java/org/apache/tika/parser/microsoft/AbstractPOIContainerExtractionTest.java @@ -45,7 +45,7 @@ public abstract class AbstractPOIContainerExtractionTest extends TikaTest { public static final MediaType TYPE_JPG = MediaType.image("jpeg"); public static final MediaType TYPE_GIF = MediaType.image("gif"); public static final MediaType TYPE_PNG = MediaType.image("png"); - public static final MediaType TYPE_EMF = MediaType.application("x-emf"); + public static final MediaType TYPE_EMF = MediaType.image("emf"); public static final MediaType TYPE_WMF = MediaType.image("wmf"); protected static TikaInputStream getTestFile(String filename) throws Exception { http://git-wip-us.apache.org/repos/asf/tika/blob/bd667acd/tika-parser-modules/tika-parser-office-module/src/test/java/org/apache/tika/parser/rtf/RTFParserTest.java ---------------------------------------------------------------------- diff --git a/tika-parser-modules/tika-parser-office-module/src/test/java/org/apache/tika/parser/rtf/RTFParserTest.java b/tika-parser-modules/tika-parser-office-module/src/test/java/org/apache/tika/parser/rtf/RTFParserTest.java index 88ac0f9..e1b4891 100644 --- a/tika-parser-modules/tika-parser-office-module/src/test/java/org/apache/tika/parser/rtf/RTFParserTest.java +++ b/tika-parser-modules/tika-parser-office-module/src/test/java/org/apache/tika/parser/rtf/RTFParserTest.java @@ -477,7 +477,7 @@ public class RTFParserTest extends TikaTest { @Test public void testEmbeddedLinkedDocument() throws Exception { Set<MediaType> skipTypes = new HashSet<MediaType>(); - skipTypes.add(MediaType.parse("application/x-emf")); + skipTypes.add(MediaType.parse("image/emf")); skipTypes.add(MediaType.parse("image/wmf")); TrackingHandler tracker = new TrackingHandler(skipTypes);