Repository: tika Updated Branches: refs/heads/2.x 6ca74bec6 -> 2d5189186
TIKA-2157 - handle zip exception in embedded file Project: http://git-wip-us.apache.org/repos/asf/tika/repo Commit: http://git-wip-us.apache.org/repos/asf/tika/commit/2d518918 Tree: http://git-wip-us.apache.org/repos/asf/tika/tree/2d518918 Diff: http://git-wip-us.apache.org/repos/asf/tika/diff/2d518918 Branch: refs/heads/2.x Commit: 2d5189186668166cdc7109e9096f9c6f4dcc5e6b Parents: 6ca74be Author: tballison <[email protected]> Authored: Fri Nov 4 11:41:29 2016 -0400 Committer: tballison <[email protected]> Committed: Fri Nov 4 11:41:29 2016 -0400 ---------------------------------------------------------------------- .../apache/tika/parser/microsoft/HSLFExtractor.java | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/tika/blob/2d518918/tika-parser-modules/tika-parser-office-module/src/main/java/org/apache/tika/parser/microsoft/HSLFExtractor.java ---------------------------------------------------------------------- diff --git a/tika-parser-modules/tika-parser-office-module/src/main/java/org/apache/tika/parser/microsoft/HSLFExtractor.java b/tika-parser-modules/tika-parser-office-module/src/main/java/org/apache/tika/parser/microsoft/HSLFExtractor.java index 582a369..ce0ede7 100644 --- a/tika-parser-modules/tika-parser-office-module/src/main/java/org/apache/tika/parser/microsoft/HSLFExtractor.java +++ b/tika-parser-modules/tika-parser-office-module/src/main/java/org/apache/tika/parser/microsoft/HSLFExtractor.java @@ -22,6 +22,7 @@ import java.util.List; import org.apache.commons.io.input.CloseShieldInputStream; import org.apache.poi.common.usermodel.Hyperlink; +import org.apache.poi.hslf.exceptions.HSLFException; import org.apache.poi.hslf.model.Comment; import org.apache.poi.hslf.model.HeadersFooters; import org.apache.poi.hslf.model.OLEShape; @@ -329,9 +330,18 @@ public class HSLFExtractor extends AbstractPOIFSExtractor { break; } - handleEmbeddedResource( - TikaInputStream.get(pic.getData()), null, null, - mediaType, xhtml, false); + try (TikaInputStream picIs = TikaInputStream.get(pic.getData())){ + handleEmbeddedResource( + picIs, null, null, + mediaType, xhtml, false); + } catch (HSLFException e) { + if (e.getMessage() != null && e.getMessage().contains("incorrect data check")) { + //TIKA-2157 + //swallow + } else { + throw e; + } + } } }
