Author: nick Date: Mon Mar 14 20:26:36 2011 New Revision: 1081547 URL: http://svn.apache.org/viewvc?rev=1081547&view=rev Log: Fix the mime magic detection of TNEF files, and add a unit test for it. (The rest of the TNEF support will be committed when POI 3.8 beta 2 is out). (TIKA-615)
Added: tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/TNEFParserTest.java tika/trunk/tika-parsers/src/test/resources/test-documents/testWINMAIL.dat (with props) Modified: tika/trunk/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/AbstractPOIContainerExtractionTest.java Modified: tika/trunk/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml URL: http://svn.apache.org/viewvc/tika/trunk/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml?rev=1081547&r1=1081546&r2=1081547&view=diff ============================================================================== --- tika/trunk/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml (original) +++ tika/trunk/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml Mon Mar 14 20:26:36 2011 @@ -1291,7 +1291,7 @@ <mime-type type="application/vnd.ms-tnef"> <alias type="application/ms-tnef" /> <magic priority="50"> - <match value="0x223e9f78" type="little16" offset="0" /> + <match value="0x223e9f78" type="little32" offset="0" /> </magic> </mime-type> Modified: tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/AbstractPOIContainerExtractionTest.java URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/AbstractPOIContainerExtractionTest.java?rev=1081547&r1=1081546&r2=1081547&view=diff ============================================================================== --- tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/AbstractPOIContainerExtractionTest.java (original) +++ tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/AbstractPOIContainerExtractionTest.java Mon Mar 14 20:26:36 2011 @@ -49,13 +49,7 @@ public abstract class AbstractPOIContain public static final MediaType TYPE_EMF = MediaType.application("x-msmetafile"); protected TrackingHandler process(String filename, ContainerExtractor extractor, boolean recurse) throws Exception { - InputStream input = AbstractPOIContainerExtractionTest.class.getResourceAsStream( - "/test-documents/" + filename); - assertNotNull(filename + " not found", input); - - TikaInputStream stream = TikaInputStream.get(input); - assertNotNull(stream); - + TikaInputStream stream = getTestFile(filename); assertEquals(true, extractor.isSupported(stream)); // Process it @@ -70,6 +64,17 @@ public abstract class AbstractPOIContain return handler; } + protected TikaInputStream getTestFile(String filename) throws Exception { + InputStream input = AbstractPOIContainerExtractionTest.class.getResourceAsStream( + "/test-documents/" + filename); + assertNotNull(filename + " not found", input); + + TikaInputStream stream = TikaInputStream.get(input); + assertNotNull(stream); + + return stream; + } + protected static class TrackingHandler implements EmbeddedResourceHandler { public List<String> filenames = new ArrayList<String>(); public List<MediaType> mediaTypes = new ArrayList<MediaType>(); Added: tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/TNEFParserTest.java URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/TNEFParserTest.java?rev=1081547&view=auto ============================================================================== --- tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/TNEFParserTest.java (added) +++ tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/TNEFParserTest.java Mon Mar 14 20:26:36 2011 @@ -0,0 +1,44 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.tika.parser.microsoft; + +import org.apache.tika.detect.ContainerAwareDetector; +import org.apache.tika.io.TikaInputStream; +import org.apache.tika.metadata.Metadata; +import org.apache.tika.mime.MediaType; +import org.apache.tika.mime.MimeTypes; + +/** + * Tests for the TNEF (winmail.dat) parser + */ +public class TNEFParserTest extends AbstractPOIContainerExtractionTest { + private static final String file = "testWINMAIL.dat"; + + public void testBasics() throws Exception { + TikaInputStream stream = getTestFile(file); + ContainerAwareDetector detector = + new ContainerAwareDetector(MimeTypes.getDefaultMimeTypes()); + + try { + assertEquals( + MediaType.application("vnd.ms-tnef"), + detector.detect(stream, new Metadata())); + } finally { + stream.close(); + } + } +} Added: tika/trunk/tika-parsers/src/test/resources/test-documents/testWINMAIL.dat URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/test/resources/test-documents/testWINMAIL.dat?rev=1081547&view=auto ============================================================================== Binary file - no diff available. Propchange: tika/trunk/tika-parsers/src/test/resources/test-documents/testWINMAIL.dat ------------------------------------------------------------------------------ svn:mime-type = application/octet-stream