TIKA-2250 As of RFC7903, the official mime type for EMF is now an image one and 
without the x- prefix


Project: http://git-wip-us.apache.org/repos/asf/tika/repo
Commit: http://git-wip-us.apache.org/repos/asf/tika/commit/bd667acd
Tree: http://git-wip-us.apache.org/repos/asf/tika/tree/bd667acd
Diff: http://git-wip-us.apache.org/repos/asf/tika/diff/bd667acd

Branch: refs/heads/2.x
Commit: bd667acde6a48e118574129d79dfacb1c3c2db25
Parents: 6668d78
Author: Nick Burch <n...@gagravarr.org>
Authored: Mon Jan 23 18:31:49 2017 +0000
Committer: Nick Burch <n...@gagravarr.org>
Committed: Mon Jan 23 18:46:17 2017 +0000

----------------------------------------------------------------------
 CHANGES.txt                                     |  3 +++
 .../org/apache/tika/mime/TestMimeTypes.java     |  7 +++---
 .../org/apache/tika/mime/tika-mimetypes.xml     | 25 +++++++++++++-------
 .../apache/tika/parser/pdf/PDFParserTest.java   |  2 +-
 .../tika/parser/microsoft/HSLFExtractor.java    |  2 +-
 .../AbstractPOIContainerExtractionTest.java     |  2 +-
 .../apache/tika/parser/rtf/RTFParserTest.java   |  2 +-
 7 files changed, 28 insertions(+), 15 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/tika/blob/bd667acd/CHANGES.txt
----------------------------------------------------------------------
diff --git a/CHANGES.txt b/CHANGES.txt
index 1b9b213..8d099b8 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -17,6 +17,9 @@ Release 2.0 - ???
 
 Release 1.15 -???
 
+  * Official mime types for BMP, EMF and WMF have been registered with
+    IANA, so switch to these (image/bmp image/emf image/wmf) (TIKA-2250)
+
   * Be more parsimonious with BufferedInputStreams via Josh Hight
     (TIKA-2244).
 

http://git-wip-us.apache.org/repos/asf/tika/blob/bd667acd/tika-app/src/test/java/org/apache/tika/mime/TestMimeTypes.java
----------------------------------------------------------------------
diff --git a/tika-app/src/test/java/org/apache/tika/mime/TestMimeTypes.java 
b/tika-app/src/test/java/org/apache/tika/mime/TestMimeTypes.java
index 1d14b43..0a51bb9 100644
--- a/tika-app/src/test/java/org/apache/tika/mime/TestMimeTypes.java
+++ b/tika-app/src/test/java/org/apache/tika/mime/TestMimeTypes.java
@@ -598,9 +598,10 @@ public class TestMimeTypes extends TikaTest {
         assertTypeByData("image/wmf", "testWMF.wmf");
         assertTypeByName("image/wmf", "x.WMF");
 
-        assertTypeByName("application/x-emf", "x.emf");
-        assertTypeByData("application/x-emf","testEMF.emf");
-        assertTypeByName("application/x-emf", "x.EMF");
+        assertTypeByName("image/emf", "x.emf");
+        assertTypeByData("image/emf", "testEMF.emf");
+        assertTypeByName("image/emf", "x.EMF");
+
         // TODO: Need a test wmz file
         assertTypeByName("application/x-ms-wmz", "x.wmz");
         assertTypeByName("application/x-ms-wmz", "x.WMZ");

http://git-wip-us.apache.org/repos/asf/tika/blob/bd667acd/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml
----------------------------------------------------------------------
diff --git 
a/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml 
b/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml
index 8c91680..1364f72 100644
--- a/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml
+++ b/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml
@@ -3227,16 +3227,11 @@
     <glob pattern="*.exe"/>
   </mime-type>
 
-  <mime-type type="application/x-emf">
-    <acronym>EMF</acronym>
-    <_comment>Extended Metafile</_comment>
-    
<tika:link>https://msdn.microsoft.com/en-us/library/cc230711.aspx</tika:link>
-    <glob pattern="*.emf"/>
+  <mime-type type="application/x-erdas-hfa">
     <magic priority="50">
-      <match value="0x01000000" type="string" offset="0">
-        <match value="0x464D4520" type="little32" offset="40"/>
-      </match>
+      <match value="EHFA_HEADER_TAG" type="string" offset="0" />
     </magic>
+    <glob pattern="*.hfa"/>
   </mime-type>
 
   <mime-type type="application/x-filemaker">
@@ -4808,6 +4803,20 @@
     <glob pattern="*.cgm"/>
   </mime-type>
 
+  <mime-type type="image/emf">
+    <alias type="image/x-emf"/>
+    <alias type="application/x-emf"/>
+    <acronym>EMF</acronym>
+    <_comment>Enhanced Metafile</_comment>
+    
<tika:link>https://msdn.microsoft.com/en-us/library/cc230711.aspx</tika:link>
+    <glob pattern="*.emf"/>
+    <magic priority="50">
+      <match value="0x01000000" type="string" offset="0">
+        <match value="0x464D4520" type="little32" offset="40"/>
+      </match>
+    </magic>
+  </mime-type>
+
   <mime-type type="image/example"/>
 
   <mime-type type="image/fits">

http://git-wip-us.apache.org/repos/asf/tika/blob/bd667acd/tika-parser-modules/tika-parser-multimedia-module/src/test/java/org/apache/tika/parser/pdf/PDFParserTest.java
----------------------------------------------------------------------
diff --git 
a/tika-parser-modules/tika-parser-multimedia-module/src/test/java/org/apache/tika/parser/pdf/PDFParserTest.java
 
b/tika-parser-modules/tika-parser-multimedia-module/src/test/java/org/apache/tika/parser/pdf/PDFParserTest.java
index 9d92971..d71b9cd 100644
--- 
a/tika-parser-modules/tika-parser-multimedia-module/src/test/java/org/apache/tika/parser/pdf/PDFParserTest.java
+++ 
b/tika-parser-modules/tika-parser-multimedia-module/src/test/java/org/apache/tika/parser/pdf/PDFParserTest.java
@@ -72,7 +72,7 @@ import org.xml.sax.ContentHandler;
 public class PDFParserTest extends TikaTest {
 
     public static final MediaType TYPE_TEXT = MediaType.TEXT_PLAIN;
-    public static final MediaType TYPE_EMF = MediaType.application("x-emf");
+    public static final MediaType TYPE_EMF = MediaType.image("emf");
     public static final MediaType TYPE_PDF = MediaType.application("pdf");
     public static final MediaType TYPE_DOCX = 
MediaType.application("vnd.openxmlformats-officedocument.wordprocessingml.document");
     public static final MediaType TYPE_DOC = MediaType.application("msword");

http://git-wip-us.apache.org/repos/asf/tika/blob/bd667acd/tika-parser-modules/tika-parser-office-module/src/main/java/org/apache/tika/parser/microsoft/HSLFExtractor.java
----------------------------------------------------------------------
diff --git 
a/tika-parser-modules/tika-parser-office-module/src/main/java/org/apache/tika/parser/microsoft/HSLFExtractor.java
 
b/tika-parser-modules/tika-parser-office-module/src/main/java/org/apache/tika/parser/microsoft/HSLFExtractor.java
index 174db83..2be7329 100644
--- 
a/tika-parser-modules/tika-parser-office-module/src/main/java/org/apache/tika/parser/microsoft/HSLFExtractor.java
+++ 
b/tika-parser-modules/tika-parser-office-module/src/main/java/org/apache/tika/parser/microsoft/HSLFExtractor.java
@@ -319,7 +319,7 @@ public class HSLFExtractor extends AbstractPOIFSExtractor {
 
             switch (pic.getType()) {
                 case EMF:
-                    mediaType = "application/x-emf";
+                    mediaType = "image/emf";
                     break;
                 case WMF:
                     mediaType = "image/wmf";

http://git-wip-us.apache.org/repos/asf/tika/blob/bd667acd/tika-parser-modules/tika-parser-office-module/src/test/java/org/apache/tika/parser/microsoft/AbstractPOIContainerExtractionTest.java
----------------------------------------------------------------------
diff --git 
a/tika-parser-modules/tika-parser-office-module/src/test/java/org/apache/tika/parser/microsoft/AbstractPOIContainerExtractionTest.java
 
b/tika-parser-modules/tika-parser-office-module/src/test/java/org/apache/tika/parser/microsoft/AbstractPOIContainerExtractionTest.java
index 1a2940d..86657b1 100644
--- 
a/tika-parser-modules/tika-parser-office-module/src/test/java/org/apache/tika/parser/microsoft/AbstractPOIContainerExtractionTest.java
+++ 
b/tika-parser-modules/tika-parser-office-module/src/test/java/org/apache/tika/parser/microsoft/AbstractPOIContainerExtractionTest.java
@@ -45,7 +45,7 @@ public abstract class AbstractPOIContainerExtractionTest 
extends TikaTest {
     public static final MediaType TYPE_JPG = MediaType.image("jpeg");
     public static final MediaType TYPE_GIF = MediaType.image("gif");
     public static final MediaType TYPE_PNG = MediaType.image("png");
-    public static final MediaType TYPE_EMF = MediaType.application("x-emf");
+    public static final MediaType TYPE_EMF = MediaType.image("emf");
     public static final MediaType TYPE_WMF = MediaType.image("wmf");
 
     protected static TikaInputStream getTestFile(String filename) throws 
Exception {

http://git-wip-us.apache.org/repos/asf/tika/blob/bd667acd/tika-parser-modules/tika-parser-office-module/src/test/java/org/apache/tika/parser/rtf/RTFParserTest.java
----------------------------------------------------------------------
diff --git 
a/tika-parser-modules/tika-parser-office-module/src/test/java/org/apache/tika/parser/rtf/RTFParserTest.java
 
b/tika-parser-modules/tika-parser-office-module/src/test/java/org/apache/tika/parser/rtf/RTFParserTest.java
index 88ac0f9..e1b4891 100644
--- 
a/tika-parser-modules/tika-parser-office-module/src/test/java/org/apache/tika/parser/rtf/RTFParserTest.java
+++ 
b/tika-parser-modules/tika-parser-office-module/src/test/java/org/apache/tika/parser/rtf/RTFParserTest.java
@@ -477,7 +477,7 @@ public class RTFParserTest extends TikaTest {
     @Test
     public void testEmbeddedLinkedDocument() throws Exception {
         Set<MediaType> skipTypes = new HashSet<MediaType>();
-        skipTypes.add(MediaType.parse("application/x-emf"));
+        skipTypes.add(MediaType.parse("image/emf"));
         skipTypes.add(MediaType.parse("image/wmf"));
 
         TrackingHandler tracker = new TrackingHandler(skipTypes);

Reply via email to