Author: nick
Date: Mon Mar 14 20:26:36 2011
New Revision: 1081547

URL: http://svn.apache.org/viewvc?rev=1081547&view=rev
Log:
Fix the mime magic detection of TNEF files, and add a unit test for it. (The 
rest of the TNEF support will be committed when POI 3.8 beta 2 is out). 
(TIKA-615)

Added:
    
tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/TNEFParserTest.java
    tika/trunk/tika-parsers/src/test/resources/test-documents/testWINMAIL.dat   
(with props)
Modified:
    
tika/trunk/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml
    
tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/AbstractPOIContainerExtractionTest.java

Modified: 
tika/trunk/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml
URL: 
http://svn.apache.org/viewvc/tika/trunk/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml?rev=1081547&r1=1081546&r2=1081547&view=diff
==============================================================================
--- 
tika/trunk/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml 
(original)
+++ 
tika/trunk/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml 
Mon Mar 14 20:26:36 2011
@@ -1291,7 +1291,7 @@
   <mime-type type="application/vnd.ms-tnef">
     <alias type="application/ms-tnef" />
     <magic priority="50">
-      <match value="0x223e9f78" type="little16" offset="0" />
+      <match value="0x223e9f78" type="little32" offset="0" />
     </magic>
   </mime-type>
 

Modified: 
tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/AbstractPOIContainerExtractionTest.java
URL: 
http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/AbstractPOIContainerExtractionTest.java?rev=1081547&r1=1081546&r2=1081547&view=diff
==============================================================================
--- 
tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/AbstractPOIContainerExtractionTest.java
 (original)
+++ 
tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/AbstractPOIContainerExtractionTest.java
 Mon Mar 14 20:26:36 2011
@@ -49,13 +49,7 @@ public abstract class AbstractPOIContain
     public static final MediaType TYPE_EMF = 
MediaType.application("x-msmetafile");
 
     protected TrackingHandler process(String filename, ContainerExtractor 
extractor, boolean recurse) throws Exception {
-        InputStream input = 
AbstractPOIContainerExtractionTest.class.getResourceAsStream(
-             "/test-documents/" + filename);
-        assertNotNull(filename + " not found", input);
-        
-        TikaInputStream stream = TikaInputStream.get(input);
-        assertNotNull(stream);
-        
+        TikaInputStream stream = getTestFile(filename);
         assertEquals(true, extractor.isSupported(stream));
         
         // Process it
@@ -70,6 +64,17 @@ public abstract class AbstractPOIContain
         return handler;
     }
     
+    protected TikaInputStream getTestFile(String filename) throws Exception {
+       InputStream input = 
AbstractPOIContainerExtractionTest.class.getResourceAsStream(
+             "/test-documents/" + filename);
+        assertNotNull(filename + " not found", input);
+        
+        TikaInputStream stream = TikaInputStream.get(input);
+        assertNotNull(stream);
+        
+        return stream;
+    }
+    
     protected static class TrackingHandler implements EmbeddedResourceHandler {
        public List<String> filenames = new ArrayList<String>();
        public List<MediaType> mediaTypes = new ArrayList<MediaType>();

Added: 
tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/TNEFParserTest.java
URL: 
http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/TNEFParserTest.java?rev=1081547&view=auto
==============================================================================
--- 
tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/TNEFParserTest.java
 (added)
+++ 
tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/TNEFParserTest.java
 Mon Mar 14 20:26:36 2011
@@ -0,0 +1,44 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.parser.microsoft;
+
+import org.apache.tika.detect.ContainerAwareDetector;
+import org.apache.tika.io.TikaInputStream;
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.mime.MediaType;
+import org.apache.tika.mime.MimeTypes;
+
+/**
+ * Tests for the TNEF (winmail.dat) parser
+ */
+public class TNEFParserTest extends AbstractPOIContainerExtractionTest {
+   private static final String file = "testWINMAIL.dat";
+   
+   public void testBasics() throws Exception {
+      TikaInputStream stream = getTestFile(file);
+      ContainerAwareDetector detector =
+         new ContainerAwareDetector(MimeTypes.getDefaultMimeTypes());
+      
+      try {
+         assertEquals(
+                 MediaType.application("vnd.ms-tnef"),
+                 detector.detect(stream, new Metadata()));
+     } finally {
+         stream.close();
+     }
+   }
+}

Added: tika/trunk/tika-parsers/src/test/resources/test-documents/testWINMAIL.dat
URL: 
http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/test/resources/test-documents/testWINMAIL.dat?rev=1081547&view=auto
==============================================================================
Binary file - no diff available.

Propchange: 
tika/trunk/tika-parsers/src/test/resources/test-documents/testWINMAIL.dat
------------------------------------------------------------------------------
    svn:mime-type = application/octet-stream


Reply via email to