Author: nick
Date: Sun Aug 16 18:00:57 2015
New Revision: 1696159

URL: http://svn.apache.org/r1696159
Log:
Outlook detection with custom config tests, based on work by Justin Palmer 
TIKA-1708

Added:
    
tika/trunk/tika-parsers/src/test/resources/org/apache/tika/config/TIKA-1708-detector-composite.xml
    
tika/trunk/tika-parsers/src/test/resources/org/apache/tika/config/TIKA-1708-detector-default.xml
Modified:
    
tika/trunk/tika-core/src/test/java/org/apache/tika/config/AbstractTikaConfigTest.java
    tika/trunk/tika-example/pom.xml
    
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/mbox/OutlookPSTParser.java
    
tika/trunk/tika-parsers/src/test/java/org/apache/tika/config/TikaDetectorConfigTest.java

Modified: 
tika/trunk/tika-core/src/test/java/org/apache/tika/config/AbstractTikaConfigTest.java
URL: 
http://svn.apache.org/viewvc/tika/trunk/tika-core/src/test/java/org/apache/tika/config/AbstractTikaConfigTest.java?rev=1696159&r1=1696158&r2=1696159&view=diff
==============================================================================
--- 
tika/trunk/tika-core/src/test/java/org/apache/tika/config/AbstractTikaConfigTest.java
 (original)
+++ 
tika/trunk/tika-core/src/test/java/org/apache/tika/config/AbstractTikaConfigTest.java
 Sun Aug 16 18:00:57 2015
@@ -20,6 +20,7 @@ import static org.junit.Assert.assertNot
 
 import java.net.URL;
 
+import org.apache.tika.TikaTest;
 import org.apache.tika.parser.ParseContext;
 import org.junit.After;
 
@@ -29,7 +30,7 @@ import org.junit.After;
  *  that {@link TikaConfigTest} can't, do due to a need for the
  *  full set of "real" classes of parsers / detectors
  */
-public abstract class AbstractTikaConfigTest {
+public abstract class AbstractTikaConfigTest extends TikaTest {
     protected static ParseContext context = new ParseContext();
     
     protected static String getConfigPath(String config) throws Exception {

Modified: tika/trunk/tika-example/pom.xml
URL: 
http://svn.apache.org/viewvc/tika/trunk/tika-example/pom.xml?rev=1696159&r1=1696158&r2=1696159&view=diff
==============================================================================
--- tika/trunk/tika-example/pom.xml (original)
+++ tika/trunk/tika-example/pom.xml Sun Aug 16 18:00:57 2015
@@ -64,6 +64,13 @@
     </dependency>
     <dependency>
       <groupId>org.apache.tika</groupId>
+      <artifactId>tika-core</artifactId>
+      <version>${project.version}</version>
+      <type>test-jar</type>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.tika</groupId>
       <artifactId>tika-parsers</artifactId>
       <version>${project.version}</version>
       <type>test-jar</type>

Modified: 
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/mbox/OutlookPSTParser.java
URL: 
http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/mbox/OutlookPSTParser.java?rev=1696159&r1=1696158&r2=1696159&view=diff
==============================================================================
--- 
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/mbox/OutlookPSTParser.java
 (original)
+++ 
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/mbox/OutlookPSTParser.java
 Sun Aug 16 18:00:57 2015
@@ -46,14 +46,13 @@ import org.xml.sax.SAXException;
 import org.xml.sax.helpers.AttributesImpl;
 
 /**
- * @author Tran Nam Quang
- * @author hong-thai.nguyen
+ * Parser for MS Outlook PST email storage files
  */
 public class OutlookPSTParser extends AbstractParser {
 
     private static final long serialVersionUID = 620998217748364063L;
 
-    private static final MediaType MS_OUTLOOK_PST_MIMETYPE = 
MediaType.application("vnd.ms-outlook-pst");
+    public static final MediaType MS_OUTLOOK_PST_MIMETYPE = 
MediaType.application("vnd.ms-outlook-pst");
     private static final Set<MediaType> SUPPORTED_TYPES = 
singleton(MS_OUTLOOK_PST_MIMETYPE);
 
     private static AttributesImpl createAttribute(String attName, String 
attValue) {

Modified: 
tika/trunk/tika-parsers/src/test/java/org/apache/tika/config/TikaDetectorConfigTest.java
URL: 
http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/test/java/org/apache/tika/config/TikaDetectorConfigTest.java?rev=1696159&r1=1696158&r2=1696159&view=diff
==============================================================================
--- 
tika/trunk/tika-parsers/src/test/java/org/apache/tika/config/TikaDetectorConfigTest.java
 (original)
+++ 
tika/trunk/tika-parsers/src/test/java/org/apache/tika/config/TikaDetectorConfigTest.java
 Sun Aug 16 18:00:57 2015
@@ -25,8 +25,12 @@ import org.apache.tika.detect.CompositeD
 import org.apache.tika.detect.DefaultDetector;
 import org.apache.tika.detect.Detector;
 import org.apache.tika.detect.EmptyDetector;
+import org.apache.tika.io.TikaInputStream;
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.parser.mbox.OutlookPSTParser;
 import org.apache.tika.parser.microsoft.POIFSContainerDetector;
 import org.apache.tika.parser.pkg.ZipContainerDetector;
+import org.junit.Ignore;
 import org.junit.Test;
 
 /**
@@ -52,33 +56,83 @@ public class TikaDetectorConfigTest exte
         
         
         // Get the DefaultDetector from the config
-        DefaultDetector confDetecotor = 
(DefaultDetector)detector.getDetectors().get(0);
+        DefaultDetector confDetector = 
(DefaultDetector)detector.getDetectors().get(0);
         
         // Get a fresh "default" DefaultParser
         DefaultDetector normDetector = new 
DefaultDetector(config.getMimeRepository());
         
         
         // The default one will offer the Zip and POIFS detectors
+        assertDetectors(normDetector, true, true);
+        
+        
+        // The one from the config won't, as we excluded those
+        assertDetectors(confDetector, false, false);
+    }
+    
+    /**
+     * TIKA-1708 - If the Zip detector is disabled, either explicitly,
+     *  or via giving a list of detectors that it isn't part of, ensure
+     *  that detection of PST files still works
+     */
+    @Test
+    @Ignore // Currently broken as per bug report
+    public void testPSTDetectionWithoutZipDetector() throws Exception {
+        // Check the one with an exclude
+        TikaConfig config = getConfig("TIKA-1708-detector-default.xml");
+        assertNotNull(config.getParser());
+        assertNotNull(config.getDetector());
+        CompositeDetector detectorWX = (CompositeDetector)config.getDetector();
+
+        // Check it has the POIFS one, but not the zip one
+        assertDetectors(detectorWX, true, false);
+        
+        
+        // Check the one with an explicit list
+        config = getConfig("TIKA-1708-detector-composite.xml");
+        assertNotNull(config.getParser());
+        assertNotNull(config.getDetector());
+        CompositeDetector detectorCL = (CompositeDetector)config.getDetector();
+        assertEquals(2, detectorCL.getDetectors().size());
+        
+        // Check it also has the POIFS one, but not the zip one
+        assertDetectors(detectorCL, true, false);
+        
+        
+        // Now check they detect PST files correctly
+        TikaInputStream stream = TikaInputStream.get(
+                getResourceAsFile("/test-documents/testPST.pst"));
+        assertEquals(
+                OutlookPSTParser.MS_OUTLOOK_PST_MIMETYPE.toString(), 
+                detectorWX.detect(stream, new Metadata())
+        );
+        assertEquals(
+                OutlookPSTParser.MS_OUTLOOK_PST_MIMETYPE.toString(), 
+                detectorCL.detect(stream, new Metadata())
+        );
+    }
+    
+    private void assertDetectors(CompositeDetector detector, boolean 
shouldHavePOIFS,
+                                 boolean shouldHaveZip) {
         boolean hasZip = false;
         boolean hasPOIFS = false;
-        for (Detector d : normDetector.getDetectors()) {
+        for (Detector d : detector.getDetectors()) {
             if (d instanceof ZipContainerDetector) {
-                hasZip = true;
+                if (shouldHaveZip) {
+                    hasZip = true;
+                } else {
+                    fail("Shouldn't have the ZipContainerDetector from 
config");
+                }
             }
             if (d instanceof POIFSContainerDetector) {
-                hasPOIFS = true;
+                if (shouldHavePOIFS) {
+                    hasPOIFS = true;
+                } else {
+                    fail("Shouldn't have the POIFSContainerDetector from 
config");
+                }
             }
         }
-        assertTrue(hasZip);
-        assertTrue(hasPOIFS);
-        
-        
-        // The one from the config won't, as we excluded those
-        for (Detector d : confDetecotor.getDetectors()) {
-            if (d instanceof ZipContainerDetector)
-                fail("Shouldn't have the ZipContainerDetector from config");
-            if (d instanceof POIFSContainerDetector)
-                fail("Shouldn't have the POIFSContainerDetector from config");
-        }
+        if (shouldHavePOIFS) assertTrue("Should have the 
POIFSContainerDetector", hasPOIFS);
+        if (shouldHaveZip)   assertTrue("Should have the 
ZipContainerDetector", hasZip);
     }
 }

Added: 
tika/trunk/tika-parsers/src/test/resources/org/apache/tika/config/TIKA-1708-detector-composite.xml
URL: 
http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/test/resources/org/apache/tika/config/TIKA-1708-detector-composite.xml?rev=1696159&view=auto
==============================================================================
--- 
tika/trunk/tika-parsers/src/test/resources/org/apache/tika/config/TIKA-1708-detector-composite.xml
 (added)
+++ 
tika/trunk/tika-parsers/src/test/resources/org/apache/tika/config/TIKA-1708-detector-composite.xml
 Sun Aug 16 18:00:57 2015
@@ -0,0 +1,25 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+
+  http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+-->
+<properties>
+  <parsers/>
+  <detectors>
+    <detector class="org.apache.tika.parser.microsoft.POIFSContainerDetector"/>
+    <detector class="org.apache.tika.mime.MimeTypes"/>
+  </detectors>
+  <translator class="org.apache.tika.language.translate.DefaultTranslator"/>
+</properties>

Added: 
tika/trunk/tika-parsers/src/test/resources/org/apache/tika/config/TIKA-1708-detector-default.xml
URL: 
http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/test/resources/org/apache/tika/config/TIKA-1708-detector-default.xml?rev=1696159&view=auto
==============================================================================
--- 
tika/trunk/tika-parsers/src/test/resources/org/apache/tika/config/TIKA-1708-detector-default.xml
 (added)
+++ 
tika/trunk/tika-parsers/src/test/resources/org/apache/tika/config/TIKA-1708-detector-default.xml
 Sun Aug 16 18:00:57 2015
@@ -0,0 +1,26 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+
+  http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+-->
+<properties>
+  <parsers/>
+  <detectors>
+    <detector class="org.apache.tika.detect.DefaultDetector">
+      <detector-exclude 
class="org.apache.tika.parser.pkg.ZipContainerDetector"/>
+    </detector>
+  </detectors>
+  <translator class="org.apache.tika.language.translate.DefaultTranslator"/>
+</properties>


Reply via email to