Author: rgauss
Date: Fri Oct 4 18:57:42 2013
New Revision: 1529260
URL: http://svn.apache.org/r1529260
Log:
TIKA-1177: Add Matroska (mkv, mka) format detection
- Added Matroska video and audio mime-types and extensions
- Added WebM video mime-type
- Added mkv and webm test files (converted from existing testFLV.flv)
- Added name detection unit tests
Added:
tika/trunk/tika-parsers/src/test/resources/test-documents/testMKV.mkv
(with props)
tika/trunk/tika-parsers/src/test/resources/test-documents/testWEBM.webm
(with props)
Modified:
tika/trunk/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml
tika/trunk/tika-parsers/src/test/java/org/apache/tika/mime/TestMimeTypes.java
Modified:
tika/trunk/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml
URL:
http://svn.apache.org/viewvc/tika/trunk/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml?rev=1529260&r1=1529259&r2=1529260&view=diff
==============================================================================
---
tika/trunk/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml
(original)
+++
tika/trunk/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml
Fri Oct 4 18:57:42 2013
@@ -5501,6 +5501,27 @@
</magic>
<glob pattern="*.movie"/>
</mime-type>
+
+ <mime-type type="video/x-matroska">
+ <glob pattern="*.mkv" />
+ <!-- TODO: The magic value below isn't present in all MKV files,
0x1A45DFA3 is but also applies to WebM
+ <magic priority="40">
+ <match value="0x1A45DFA3934282886D6174726F736B61" type="string"
offset="0" />
+ </magic>
+ -->
+ </mime-type>
+ <mime-type type="audio/x-matroska">
+ <glob pattern="*.mka" />
+ </mime-type>
+
+ <mime-type type="video/webm">
+ <glob pattern="*.webm" />
+ <!-- TODO: The magic value below would match MKV as well
+ <magic priority="40">
+ <match value="0x1A45DFA3" type="string" offset="0" />
+ </magic>
+ -->
+ </mime-type>
<mime-type type="x-conference/x-cooltalk">
<_comment>Cooltalk Audio</_comment>
Modified:
tika/trunk/tika-parsers/src/test/java/org/apache/tika/mime/TestMimeTypes.java
URL:
http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/test/java/org/apache/tika/mime/TestMimeTypes.java?rev=1529260&r1=1529259&r2=1529260&view=diff
==============================================================================
---
tika/trunk/tika-parsers/src/test/java/org/apache/tika/mime/TestMimeTypes.java
(original)
+++
tika/trunk/tika-parsers/src/test/java/org/apache/tika/mime/TestMimeTypes.java
Fri Oct 4 18:57:42 2013
@@ -660,6 +660,24 @@ public class TestMimeTypes extends TestC
public void testGroupWiseEml() throws Exception {
assertTypeDetection("testGroupWiseEml.eml", "message/rfc822");
}
+
+ public void testMatroskaDetection() throws Exception {
+ assertType("video/x-matroska", "testMKV.mkv");
+ // TODO: need better magic value for data detection
+ // assertTypeByData("video/x-matroska", "testMKV.mkv");
+ assertTypeByName("video/x-matroska", "x.mkv");
+ assertTypeByName("video/x-matroska", "x.MKV");
+ assertTypeByName("audio/x-matroska", "x.mka");
+ assertTypeByName("audio/x-matroska", "x.MKA");
+ }
+
+ public void testWebMDetection() throws Exception {
+ assertType("video/webm", "testWEBM.webm");
+ // TODO: need better magic value for data detection
+ // assertTypeByData("video/x-matroska", "testMKV.mkv");
+ assertTypeByName("video/webm", "x.webm");
+ assertTypeByName("video/webm", "x.WEBM");
+ }
/** Test getMimeType(byte[]) */
public void testGetMimeType_byteArray() throws IOException {
Added: tika/trunk/tika-parsers/src/test/resources/test-documents/testMKV.mkv
URL:
http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/test/resources/test-documents/testMKV.mkv?rev=1529260&view=auto
==============================================================================
Binary file - no diff available.
Propchange:
tika/trunk/tika-parsers/src/test/resources/test-documents/testMKV.mkv
------------------------------------------------------------------------------
svn:mime-type = application/octet-stream
Added: tika/trunk/tika-parsers/src/test/resources/test-documents/testWEBM.webm
URL:
http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/test/resources/test-documents/testWEBM.webm?rev=1529260&view=auto
==============================================================================
Binary file - no diff available.
Propchange:
tika/trunk/tika-parsers/src/test/resources/test-documents/testWEBM.webm
------------------------------------------------------------------------------
svn:mime-type = application/octet-stream