Author: nick
Date: Tue Nov 15 09:41:46 2011
New Revision: 1202109

URL: http://svn.apache.org/viewvc?rev=1202109&view=rev
Log:
TIKA-779 Works 2000 container aware detection, plus test

Added:
    tika/trunk/tika-parsers/src/test/resources/test-documents/testWORKS2000.wps 
  (with props)
Modified:
    
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/POIFSContainerDetector.java
    
tika/trunk/tika-parsers/src/test/java/org/apache/tika/detect/TestContainerAwareDetector.java

Modified: 
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/POIFSContainerDetector.java
URL: 
http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/POIFSContainerDetector.java?rev=1202109&r1=1202108&r2=1202109&view=diff
==============================================================================
--- 
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/POIFSContainerDetector.java
 (original)
+++ 
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/POIFSContainerDetector.java
 Tue Nov 15 09:41:46 2011
@@ -124,10 +124,14 @@ public class POIFSContainerDetector impl
             } else if (names.contains("VisioDocument")) {
                 return VSD;
             } else if (names.contains("CONTENTS") && 
names.contains("SPELLING")) {
+               // Newer Works files
+               return WPS;
+            } else if (names.contains("CONTENTS") && 
names.contains("\u0001CompObj")) {
+               // Normally an older Works file
                return WPS;
             } else if (names.contains("CONTENTS")) {
-               // CONTENTS without SPELLING normally means some sort of
-               //  embedded non-office file inside an OLE2 document
+               // CONTENTS without SPELLING nor CompObj normally means some 
sort
+               //  of embedded non-office file inside an OLE2 document
                // This is most commonly triggered on nested directories
                return OLE;
             } else if (names.contains("\u0001Ole10Native")) {

Modified: 
tika/trunk/tika-parsers/src/test/java/org/apache/tika/detect/TestContainerAwareDetector.java
URL: 
http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/test/java/org/apache/tika/detect/TestContainerAwareDetector.java?rev=1202109&r1=1202108&r2=1202109&view=diff
==============================================================================
--- 
tika/trunk/tika-parsers/src/test/java/org/apache/tika/detect/TestContainerAwareDetector.java
 (original)
+++ 
tika/trunk/tika-parsers/src/test/java/org/apache/tika/detect/TestContainerAwareDetector.java
 Tue Nov 15 09:41:46 2011
@@ -56,6 +56,7 @@ public class TestContainerAwareDetector 
 
         // Try some ones that POI doesn't handle, that are still OLE2 based
         assertDetect("testWORKS.wps", "application/vnd.ms-works");
+        assertDetect("testWORKS2000.wps", "application/vnd.ms-works");
         assertDetect("testCOREL.shw", "application/x-corelpresentations");
         assertDetect("testQUATTRO.qpw", "application/x-quattro-pro");
         assertDetect("testQUATTRO.wb3", "application/x-quattro-pro");

Added: 
tika/trunk/tika-parsers/src/test/resources/test-documents/testWORKS2000.wps
URL: 
http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/test/resources/test-documents/testWORKS2000.wps?rev=1202109&view=auto
==============================================================================
Binary file - no diff available.

Propchange: 
tika/trunk/tika-parsers/src/test/resources/test-documents/testWORKS2000.wps
------------------------------------------------------------------------------
    svn:mime-type = application/octet-stream


Reply via email to