Author: rgauss
Date: Fri Oct  4 18:57:42 2013
New Revision: 1529260

URL: http://svn.apache.org/r1529260
Log:
TIKA-1177: Add Matroska (mkv, mka) format detection
   - Added Matroska video and audio mime-types and extensions
   - Added WebM video mime-type
   - Added mkv and webm test files (converted from existing testFLV.flv)
   - Added name detection unit tests

Added:
    tika/trunk/tika-parsers/src/test/resources/test-documents/testMKV.mkv   
(with props)
    tika/trunk/tika-parsers/src/test/resources/test-documents/testWEBM.webm   
(with props)
Modified:
    
tika/trunk/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml
    
tika/trunk/tika-parsers/src/test/java/org/apache/tika/mime/TestMimeTypes.java

Modified: 
tika/trunk/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml
URL: 
http://svn.apache.org/viewvc/tika/trunk/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml?rev=1529260&r1=1529259&r2=1529260&view=diff
==============================================================================
--- 
tika/trunk/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml 
(original)
+++ 
tika/trunk/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml 
Fri Oct  4 18:57:42 2013
@@ -5501,6 +5501,27 @@
     </magic>
     <glob pattern="*.movie"/>
   </mime-type>
+  
+  <mime-type type="video/x-matroska">
+    <glob pattern="*.mkv" />
+    <!-- TODO: The magic value below isn't present in all MKV files, 
0x1A45DFA3 is but also applies to WebM
+    <magic priority="40">
+      <match value="0x1A45DFA3934282886D6174726F736B61" type="string" 
offset="0" />
+    </magic>
+    -->
+  </mime-type>
+  <mime-type type="audio/x-matroska">
+    <glob pattern="*.mka" />
+  </mime-type>
+  
+  <mime-type type="video/webm">
+    <glob pattern="*.webm" />
+    <!-- TODO: The magic value below would match MKV as well
+    <magic priority="40">
+      <match value="0x1A45DFA3" type="string" offset="0" />
+    </magic>
+    -->
+  </mime-type>
 
   <mime-type type="x-conference/x-cooltalk">
     <_comment>Cooltalk Audio</_comment>

Modified: 
tika/trunk/tika-parsers/src/test/java/org/apache/tika/mime/TestMimeTypes.java
URL: 
http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/test/java/org/apache/tika/mime/TestMimeTypes.java?rev=1529260&r1=1529259&r2=1529260&view=diff
==============================================================================
--- 
tika/trunk/tika-parsers/src/test/java/org/apache/tika/mime/TestMimeTypes.java 
(original)
+++ 
tika/trunk/tika-parsers/src/test/java/org/apache/tika/mime/TestMimeTypes.java 
Fri Oct  4 18:57:42 2013
@@ -660,6 +660,24 @@ public class TestMimeTypes extends TestC
     public void testGroupWiseEml() throws Exception {
         assertTypeDetection("testGroupWiseEml.eml", "message/rfc822");
     }
+    
+    public void testMatroskaDetection() throws Exception {
+        assertType("video/x-matroska", "testMKV.mkv");
+        // TODO: need better magic value for data detection
+        // assertTypeByData("video/x-matroska", "testMKV.mkv");
+        assertTypeByName("video/x-matroska", "x.mkv");
+        assertTypeByName("video/x-matroska", "x.MKV");
+        assertTypeByName("audio/x-matroska", "x.mka");
+        assertTypeByName("audio/x-matroska", "x.MKA");
+    }
+    
+    public void testWebMDetection() throws Exception {
+        assertType("video/webm", "testWEBM.webm");
+        // TODO: need better magic value for data detection
+        // assertTypeByData("video/x-matroska", "testMKV.mkv");
+        assertTypeByName("video/webm", "x.webm");
+        assertTypeByName("video/webm", "x.WEBM");
+    }
 
     /** Test getMimeType(byte[]) */
     public void testGetMimeType_byteArray() throws IOException {

Added: tika/trunk/tika-parsers/src/test/resources/test-documents/testMKV.mkv
URL: 
http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/test/resources/test-documents/testMKV.mkv?rev=1529260&view=auto
==============================================================================
Binary file - no diff available.

Propchange: 
tika/trunk/tika-parsers/src/test/resources/test-documents/testMKV.mkv
------------------------------------------------------------------------------
    svn:mime-type = application/octet-stream

Added: tika/trunk/tika-parsers/src/test/resources/test-documents/testWEBM.webm
URL: 
http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/test/resources/test-documents/testWEBM.webm?rev=1529260&view=auto
==============================================================================
Binary file - no diff available.

Propchange: 
tika/trunk/tika-parsers/src/test/resources/test-documents/testWEBM.webm
------------------------------------------------------------------------------
    svn:mime-type = application/octet-stream


Reply via email to