Author: nick
Date: Fri Oct 16 13:46:27 2015
New Revision: 1708996
URL: http://svn.apache.org/viewvc?rev=1708996&view=rev
Log:
TIKA-1772 Test WebVTT file from Alexander Widera, mime magic for it, and
detection tests
Added:
tika/trunk/tika-parsers/src/test/resources/test-documents/testWebVTT.vtt
Modified:
tika/trunk/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml
tika/trunk/tika-parsers/src/test/java/org/apache/tika/mime/TestMimeTypes.java
Modified:
tika/trunk/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml
URL:
http://svn.apache.org/viewvc/tika/trunk/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml?rev=1708996&r1=1708995&r2=1708996&view=diff
==============================================================================
---
tika/trunk/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml
(original)
+++
tika/trunk/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml
Fri Oct 16 13:46:27 2015
@@ -5541,6 +5541,10 @@
<mime-type type="text/vtt">
<_comment>Web Video Text Tracks Format</_comment>
<acronym>WebVTT</acronym>
+ <magic priority="40">
+ <match value="WEBVTT FILE\r" type="string" offset="0"/>
+ <match value="WEBVTT FILE\n" type="string" offset="0"/>
+ </magic>
<glob pattern="*.vtt"/>
<sub-class-of type="text/plain"/>
</mime-type>
Modified:
tika/trunk/tika-parsers/src/test/java/org/apache/tika/mime/TestMimeTypes.java
URL:
http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/test/java/org/apache/tika/mime/TestMimeTypes.java?rev=1708996&r1=1708995&r2=1708996&view=diff
==============================================================================
---
tika/trunk/tika-parsers/src/test/java/org/apache/tika/mime/TestMimeTypes.java
(original)
+++
tika/trunk/tika-parsers/src/test/java/org/apache/tika/mime/TestMimeTypes.java
Fri Oct 16 13:46:27 2015
@@ -962,6 +962,12 @@ public class TestMimeTypes {
assertTypeByData("text/x-matlab", "testMATLAB_barcast.m");
}
+ @Test
+ public void testWebVTT() throws Exception {
+ assertType("text/vtt", "testWebVTT.vtt");
+ assertTypeByData("text/vtt", "testWebVTT.vtt");
+ }
+
private void assertText(byte[] prefix) throws IOException {
assertMagic("text/plain", prefix);
}
Added: tika/trunk/tika-parsers/src/test/resources/test-documents/testWebVTT.vtt
URL:
http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/test/resources/test-documents/testWebVTT.vtt?rev=1708996&view=auto
==============================================================================
--- tika/trunk/tika-parsers/src/test/resources/test-documents/testWebVTT.vtt
(added)
+++ tika/trunk/tika-parsers/src/test/resources/test-documents/testWebVTT.vtt
Fri Oct 16 13:46:27 2015
@@ -0,0 +1,33 @@
+WEBVTT FILE
+
+1
+00:00:03.500 --> 00:00:05.000 D:vertical A:start
+Everyone wants the most from life
+
+2
+00:00:06.000 --> 00:00:09.000 A:start
+Like internet experiences that are rich <b>and</b> entertaining
+
+3
+00:00:11.000 --> 00:00:14.000 A:end
+Phone conversations where people truly <c.highlight>connect</c>
+
+4
+00:00:14.500 --> 00:00:18.000
+Your favourite TV programmes ready to watch at the touch of a button
+
+5
+00:00:19.000 --> 00:00:24.000
+Which is why we are bringing TV, internet and phone together in
<c.highlight>one</c> super package
+
+6
+00:00:24.500 --> 00:00:26.000
+<c.highlight>One</c> simple way to get everything
+
+7
+00:00:26.500 --> 00:00:27.500 L:12%
+UPC
+
+8
+00:00:28.000 --> 00:00:30.000 L:75%
+Simply for <u>everyone</u>
\ No newline at end of file