This is an automated email from the ASF dual-hosted git repository.

tallison pushed a commit to branch 2.x
in repository https://gitbox.apache.org/repos/asf/tika.git

commit 6930ff0251e9e93ee969a9f1287c902d31045b59
Author: tballison <[email protected]>
AuthorDate: Mon May 1 15:20:30 2017 -0400

    TIKA-2311 -- try OPC before ZipFile.  This can work better on some 
truncated files.
---
 .../org/apache/tika/parser/opc/OPCDetector.java    |  3 +--
 .../tika/parser/pkg/ZipContainerDetector.java      | 25 +++++++++++++---------
 2 files changed, 16 insertions(+), 12 deletions(-)

diff --git 
a/tika-parser-modules/tika-parser-office-module/src/main/java/org/apache/tika/parser/opc/OPCDetector.java
 
b/tika-parser-modules/tika-parser-office-module/src/main/java/org/apache/tika/parser/opc/OPCDetector.java
index a8fe200..21032d1 100644
--- 
a/tika-parser-modules/tika-parser-office-module/src/main/java/org/apache/tika/parser/opc/OPCDetector.java
+++ 
b/tika-parser-modules/tika-parser-office-module/src/main/java/org/apache/tika/parser/opc/OPCDetector.java
@@ -76,8 +76,7 @@ public class OPCDetector implements Detector {
             
             return type;
         } catch (InvalidFormatException e) {
-            // TODO Auto-generated catch block
-            e.printStackTrace();
+            //swallow
         }finally {
             tmp.close();
         }
diff --git 
a/tika-parser-modules/tika-parser-package-module/src/main/java/org/apache/tika/parser/pkg/ZipContainerDetector.java
 
b/tika-parser-modules/tika-parser-package-module/src/main/java/org/apache/tika/parser/pkg/ZipContainerDetector.java
index 0a12e15..1980bd6 100644
--- 
a/tika-parser-modules/tika-parser-package-module/src/main/java/org/apache/tika/parser/pkg/ZipContainerDetector.java
+++ 
b/tika-parser-modules/tika-parser-package-module/src/main/java/org/apache/tika/parser/pkg/ZipContainerDetector.java
@@ -131,13 +131,20 @@ public class ZipContainerDetector extends 
AbstractDetector {
     }
 
     private MediaType detectZipFormat(TikaInputStream tis) {
+
+        //try opc first because opening a package
+        //will not necessarily throw an exception for
+        //truncated files.
+        MediaType type = detectOPCBased(tis);
+        if (type != null) {
+            return type;
+        }
+
         try {
             ZipFile zip = new ZipFile(tis.getFile()); // TODO: hasFile()?
             try {
-                MediaType type = detectOpenDocument(zip);
-                if (type == null) {
-                    type = detectOPCBased(zip, tis);
-                }
+                type = detectOpenDocument(zip);
+
                 if (type == null) {
                     type = detectIWork(zip);
                 }
@@ -191,18 +198,16 @@ public class ZipContainerDetector extends 
AbstractDetector {
         }
     }
 
-    private MediaType detectOPCBased(ZipFile zip, TikaInputStream stream) {
+    private MediaType detectOPCBased(TikaInputStream stream) {
         try {
-            if (zip.getEntry("_rels/.rels") != null
-                    || zip.getEntry("[Content_Types].xml") != null) {
+//            if (zip.getEntry("_rels/.rels") != null
+  //                  || zip.getEntry("[Content_Types].xml") != null) {
                 MediaType type = this.opcDetector.detect(stream, null);
                 if (type != null) return type;
                 
                 // We don't know what it is, sorry
                 return null;
-            } else {
-                return null;
-            }
+
         } catch (IOException e) {
             return null;
         } catch (RuntimeException e) {

-- 
To stop receiving notification emails like this one, please contact
"[email protected]" <[email protected]>.

Reply via email to