This is an automated email from the ASF dual-hosted git repository.

tallison pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/tika.git

commit 0b37895ef0eae560444e3078b7fa33f6fec11eca
Author: tballison <[email protected]>
AuthorDate: Mon May 1 15:12:59 2017 -0400

    TIKA-2311 -- to handle truncated files more robustly, in 
ZipContainerDetector, try OPCContainer before ZipFile
---
 .../tika/parser/pkg/ZipContainerDetector.java      | 24 +++++++++++++---------
 1 file changed, 14 insertions(+), 10 deletions(-)

diff --git 
a/tika-parsers/src/main/java/org/apache/tika/parser/pkg/ZipContainerDetector.java
 
b/tika-parsers/src/main/java/org/apache/tika/parser/pkg/ZipContainerDetector.java
index 411a050..495fd2d 100644
--- 
a/tika-parsers/src/main/java/org/apache/tika/parser/pkg/ZipContainerDetector.java
+++ 
b/tika-parsers/src/main/java/org/apache/tika/parser/pkg/ZipContainerDetector.java
@@ -118,12 +118,19 @@ public class ZipContainerDetector implements Detector {
 
     private static MediaType detectZipFormat(TikaInputStream tis) {
         try {
+
+            //try opc first because opening a package
+            //will not necessarily throw an exception for
+            //truncated files.
+            MediaType type = detectOPCBased(tis);
+            if (type != null) {
+                return type;
+            }
+
             ZipFile zip = new ZipFile(tis.getFile()); // TODO: hasFile()?
             try {
-                MediaType type = detectOpenDocument(zip);
-                if (type == null) {
-                    type = detectOPCBased(zip, tis);
-                }
+                type = detectOpenDocument(zip);
+
                 if (type == null) {
                     type = detectIWork13(zip);
                 }
@@ -180,10 +187,10 @@ public class ZipContainerDetector implements Detector {
         }
     }
 
-    private static MediaType detectOPCBased(ZipFile zip, TikaInputStream 
stream) {
+    private static MediaType detectOPCBased(TikaInputStream stream) {
         try {
-            if (zip.getEntry("_rels/.rels") != null
-                    || zip.getEntry("[Content_Types].xml") != null) {
+//            if (zip.getEntry("_rels/.rels") != null
+  //                  || zip.getEntry("[Content_Types].xml") != null) {
                 // Use POI to open and investigate it for us
                 OPCPackage pkg = OPCPackage.open(stream.getFile().getPath(), 
PackageAccess.READ);
                 stream.setOpenContainer(pkg);
@@ -202,9 +209,6 @@ public class ZipContainerDetector implements Detector {
                 
                 // We don't know what it is, sorry
                 return null;
-            } else {
-                return null;
-            }
         } catch (IOException e) {
             return null;
         } catch (RuntimeException e) {

-- 
To stop receiving notification emails like this one, please contact
"[email protected]" <[email protected]>.

Reply via email to