This is an automated email from the ASF dual-hosted git repository. tballison pushed a commit to branch TIKA-4727-fix-ioobe in repository https://gitbox.apache.org/repos/asf/tika.git
commit 4d9919f99c398b9476cd249979fb5c5d9f76a91c Author: tballison <[email protected]> AuthorDate: Mon Jun 8 16:03:45 2026 +0200 TIKA-4727 -- catch ioobe --- .../main/java/org/apache/tika/parser/microsoft/HSLFExtractor.java | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/HSLFExtractor.java b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/HSLFExtractor.java index e516979da0..d83c496a52 100644 --- a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/HSLFExtractor.java +++ b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/HSLFExtractor.java @@ -614,7 +614,13 @@ public class HSLFExtractor extends AbstractPOIFSExtractor { } for (HSLFShape shape : shapes) { if (shape instanceof HSLFPictureShape) { - HSLFPictureData pd = ((HSLFPictureShape) shape).getPictureData(); + HSLFPictureData pd; + try { + pd = ((HSLFPictureShape) shape).getPictureData(); + } catch (Exception e) { + // corrupt Escher BSE record -- skip page anchoring for this shape + continue; + } if (pd != null) { picToSlides.computeIfAbsent(pd.getIndex(), k -> new HashSet<>()).add(slideNum); }
