Author: nick
Date: Fri Oct 16 13:46:27 2015
New Revision: 1708996

URL: http://svn.apache.org/viewvc?rev=1708996&view=rev
Log:
TIKA-1772 Test WebVTT file from Alexander Widera, mime magic for it, and 
detection tests

Added:
    tika/trunk/tika-parsers/src/test/resources/test-documents/testWebVTT.vtt
Modified:
    
tika/trunk/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml
    
tika/trunk/tika-parsers/src/test/java/org/apache/tika/mime/TestMimeTypes.java

Modified: 
tika/trunk/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml
URL: 
http://svn.apache.org/viewvc/tika/trunk/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml?rev=1708996&r1=1708995&r2=1708996&view=diff
==============================================================================
--- 
tika/trunk/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml 
(original)
+++ 
tika/trunk/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml 
Fri Oct 16 13:46:27 2015
@@ -5541,6 +5541,10 @@
   <mime-type type="text/vtt">
     <_comment>Web Video Text Tracks Format</_comment>
     <acronym>WebVTT</acronym>
+    <magic priority="40">
+      <match value="WEBVTT FILE\r" type="string" offset="0"/>
+      <match value="WEBVTT FILE\n" type="string" offset="0"/>
+    </magic>
     <glob pattern="*.vtt"/>
     <sub-class-of type="text/plain"/>
   </mime-type>

Modified: 
tika/trunk/tika-parsers/src/test/java/org/apache/tika/mime/TestMimeTypes.java
URL: 
http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/test/java/org/apache/tika/mime/TestMimeTypes.java?rev=1708996&r1=1708995&r2=1708996&view=diff
==============================================================================
--- 
tika/trunk/tika-parsers/src/test/java/org/apache/tika/mime/TestMimeTypes.java 
(original)
+++ 
tika/trunk/tika-parsers/src/test/java/org/apache/tika/mime/TestMimeTypes.java 
Fri Oct 16 13:46:27 2015
@@ -962,6 +962,12 @@ public class TestMimeTypes {
         assertTypeByData("text/x-matlab", "testMATLAB_barcast.m");
     }
 
+    @Test
+    public void testWebVTT() throws Exception {
+        assertType("text/vtt", "testWebVTT.vtt");
+        assertTypeByData("text/vtt", "testWebVTT.vtt");
+    }
+    
     private void assertText(byte[] prefix) throws IOException {
         assertMagic("text/plain", prefix);
     }

Added: tika/trunk/tika-parsers/src/test/resources/test-documents/testWebVTT.vtt
URL: 
http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/test/resources/test-documents/testWebVTT.vtt?rev=1708996&view=auto
==============================================================================
--- tika/trunk/tika-parsers/src/test/resources/test-documents/testWebVTT.vtt 
(added)
+++ tika/trunk/tika-parsers/src/test/resources/test-documents/testWebVTT.vtt 
Fri Oct 16 13:46:27 2015
@@ -0,0 +1,33 @@
+WEBVTT FILE
+
+1
+00:00:03.500 --> 00:00:05.000 D:vertical A:start
+Everyone wants the most from life
+
+2
+00:00:06.000 --> 00:00:09.000 A:start
+Like internet experiences that are rich <b>and</b> entertaining
+
+3
+00:00:11.000 --> 00:00:14.000 A:end
+Phone conversations where people truly <c.highlight>connect</c>
+
+4
+00:00:14.500 --> 00:00:18.000
+Your favourite TV programmes ready to watch at the touch of a button
+
+5
+00:00:19.000 --> 00:00:24.000
+Which is why we are bringing TV, internet and phone together in 
<c.highlight>one</c> super package
+
+6
+00:00:24.500 --> 00:00:26.000
+<c.highlight>One</c> simple way to get everything
+
+7
+00:00:26.500 --> 00:00:27.500 L:12%
+UPC
+
+8
+00:00:28.000 --> 00:00:30.000 L:75%
+Simply for <u>everyone</u>
\ No newline at end of file


Reply via email to