This is an automated email from the ASF dual-hosted git repository. tallison pushed a commit to branch 2.x in repository https://gitbox.apache.org/repos/asf/tika.git
commit 6930ff0251e9e93ee969a9f1287c902d31045b59 Author: tballison <[email protected]> AuthorDate: Mon May 1 15:20:30 2017 -0400 TIKA-2311 -- try OPC before ZipFile. This can work better on some truncated files. --- .../org/apache/tika/parser/opc/OPCDetector.java | 3 +-- .../tika/parser/pkg/ZipContainerDetector.java | 25 +++++++++++++--------- 2 files changed, 16 insertions(+), 12 deletions(-) diff --git a/tika-parser-modules/tika-parser-office-module/src/main/java/org/apache/tika/parser/opc/OPCDetector.java b/tika-parser-modules/tika-parser-office-module/src/main/java/org/apache/tika/parser/opc/OPCDetector.java index a8fe200..21032d1 100644 --- a/tika-parser-modules/tika-parser-office-module/src/main/java/org/apache/tika/parser/opc/OPCDetector.java +++ b/tika-parser-modules/tika-parser-office-module/src/main/java/org/apache/tika/parser/opc/OPCDetector.java @@ -76,8 +76,7 @@ public class OPCDetector implements Detector { return type; } catch (InvalidFormatException e) { - // TODO Auto-generated catch block - e.printStackTrace(); + //swallow }finally { tmp.close(); } diff --git a/tika-parser-modules/tika-parser-package-module/src/main/java/org/apache/tika/parser/pkg/ZipContainerDetector.java b/tika-parser-modules/tika-parser-package-module/src/main/java/org/apache/tika/parser/pkg/ZipContainerDetector.java index 0a12e15..1980bd6 100644 --- a/tika-parser-modules/tika-parser-package-module/src/main/java/org/apache/tika/parser/pkg/ZipContainerDetector.java +++ b/tika-parser-modules/tika-parser-package-module/src/main/java/org/apache/tika/parser/pkg/ZipContainerDetector.java @@ -131,13 +131,20 @@ public class ZipContainerDetector extends AbstractDetector { } private MediaType detectZipFormat(TikaInputStream tis) { + + //try opc first because opening a package + //will not necessarily throw an exception for + //truncated files. + MediaType type = detectOPCBased(tis); + if (type != null) { + return type; + } + try { ZipFile zip = new ZipFile(tis.getFile()); // TODO: hasFile()? try { - MediaType type = detectOpenDocument(zip); - if (type == null) { - type = detectOPCBased(zip, tis); - } + type = detectOpenDocument(zip); + if (type == null) { type = detectIWork(zip); } @@ -191,18 +198,16 @@ public class ZipContainerDetector extends AbstractDetector { } } - private MediaType detectOPCBased(ZipFile zip, TikaInputStream stream) { + private MediaType detectOPCBased(TikaInputStream stream) { try { - if (zip.getEntry("_rels/.rels") != null - || zip.getEntry("[Content_Types].xml") != null) { +// if (zip.getEntry("_rels/.rels") != null + // || zip.getEntry("[Content_Types].xml") != null) { MediaType type = this.opcDetector.detect(stream, null); if (type != null) return type; // We don't know what it is, sorry return null; - } else { - return null; - } + } catch (IOException e) { return null; } catch (RuntimeException e) { -- To stop receiving notification emails like this one, please contact "[email protected]" <[email protected]>.
