Repository: tika Updated Branches: refs/heads/master 2df68c84b -> 75fa1386b
TIKA-2157 -- handle zip exception in embedded stream Project: http://git-wip-us.apache.org/repos/asf/tika/repo Commit: http://git-wip-us.apache.org/repos/asf/tika/commit/75fa1386 Tree: http://git-wip-us.apache.org/repos/asf/tika/tree/75fa1386 Diff: http://git-wip-us.apache.org/repos/asf/tika/diff/75fa1386 Branch: refs/heads/master Commit: 75fa1386b95ccf1bc7fb9a9f60811636baace05e Parents: 2df68c8 Author: tballison <[email protected]> Authored: Fri Nov 4 11:38:20 2016 -0400 Committer: tballison <[email protected]> Committed: Fri Nov 4 11:38:20 2016 -0400 ---------------------------------------------------------------------- .../tika/parser/microsoft/HSLFExtractor.java | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/tika/blob/75fa1386/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/HSLFExtractor.java ---------------------------------------------------------------------- diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/HSLFExtractor.java b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/HSLFExtractor.java index ed3bbeb..3be3f37 100644 --- a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/HSLFExtractor.java +++ b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/HSLFExtractor.java @@ -17,10 +17,12 @@ package org.apache.tika.parser.microsoft; import java.io.IOException; +import java.io.InputStream; import java.util.HashSet; import java.util.List; import org.apache.poi.common.usermodel.Hyperlink; +import org.apache.poi.hslf.exceptions.HSLFException; import org.apache.poi.hslf.model.Comment; import org.apache.poi.hslf.model.HeadersFooters; import org.apache.poi.hslf.model.OLEShape; @@ -337,10 +339,18 @@ public class HSLFExtractor extends AbstractPOIFSExtractor { mediaType = pic.getContentType(); break; } - - handleEmbeddedResource( - TikaInputStream.get(pic.getData()), null, null, - mediaType, xhtml, false); + try (TikaInputStream picIs = TikaInputStream.get(pic.getData())){ + handleEmbeddedResource( + picIs, null, null, + mediaType, xhtml, false); + } catch (HSLFException e) { + if (e.getMessage() != null && e.getMessage().contains("incorrect data check")) { + //TIKA-2157 + //swallow + } else { + throw e; + } + } } }
