Author: nick
Date: Tue Nov 15 09:41:46 2011
New Revision: 1202109
URL: http://svn.apache.org/viewvc?rev=1202109&view=rev
Log:
TIKA-779 Works 2000 container aware detection, plus test
Added:
tika/trunk/tika-parsers/src/test/resources/test-documents/testWORKS2000.wps
(with props)
Modified:
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/POIFSContainerDetector.java
tika/trunk/tika-parsers/src/test/java/org/apache/tika/detect/TestContainerAwareDetector.java
Modified:
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/POIFSContainerDetector.java
URL:
http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/POIFSContainerDetector.java?rev=1202109&r1=1202108&r2=1202109&view=diff
==============================================================================
---
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/POIFSContainerDetector.java
(original)
+++
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/POIFSContainerDetector.java
Tue Nov 15 09:41:46 2011
@@ -124,10 +124,14 @@ public class POIFSContainerDetector impl
} else if (names.contains("VisioDocument")) {
return VSD;
} else if (names.contains("CONTENTS") &&
names.contains("SPELLING")) {
+ // Newer Works files
+ return WPS;
+ } else if (names.contains("CONTENTS") &&
names.contains("\u0001CompObj")) {
+ // Normally an older Works file
return WPS;
} else if (names.contains("CONTENTS")) {
- // CONTENTS without SPELLING normally means some sort of
- // embedded non-office file inside an OLE2 document
+ // CONTENTS without SPELLING nor CompObj normally means some
sort
+ // of embedded non-office file inside an OLE2 document
// This is most commonly triggered on nested directories
return OLE;
} else if (names.contains("\u0001Ole10Native")) {
Modified:
tika/trunk/tika-parsers/src/test/java/org/apache/tika/detect/TestContainerAwareDetector.java
URL:
http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/test/java/org/apache/tika/detect/TestContainerAwareDetector.java?rev=1202109&r1=1202108&r2=1202109&view=diff
==============================================================================
---
tika/trunk/tika-parsers/src/test/java/org/apache/tika/detect/TestContainerAwareDetector.java
(original)
+++
tika/trunk/tika-parsers/src/test/java/org/apache/tika/detect/TestContainerAwareDetector.java
Tue Nov 15 09:41:46 2011
@@ -56,6 +56,7 @@ public class TestContainerAwareDetector
// Try some ones that POI doesn't handle, that are still OLE2 based
assertDetect("testWORKS.wps", "application/vnd.ms-works");
+ assertDetect("testWORKS2000.wps", "application/vnd.ms-works");
assertDetect("testCOREL.shw", "application/x-corelpresentations");
assertDetect("testQUATTRO.qpw", "application/x-quattro-pro");
assertDetect("testQUATTRO.wb3", "application/x-quattro-pro");
Added:
tika/trunk/tika-parsers/src/test/resources/test-documents/testWORKS2000.wps
URL:
http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/test/resources/test-documents/testWORKS2000.wps?rev=1202109&view=auto
==============================================================================
Binary file - no diff available.
Propchange:
tika/trunk/tika-parsers/src/test/resources/test-documents/testWORKS2000.wps
------------------------------------------------------------------------------
svn:mime-type = application/octet-stream