This is an automated email from the ASF dual-hosted git repository.
tballison pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/tika.git
The following commit(s) were added to refs/heads/main by this push:
new 55655ed296 TIKA-4727 -- catch ioobe (#2885)
55655ed296 is described below
commit 55655ed296657a583679bf586e650c4100cd838f
Author: Tim Allison <[email protected]>
AuthorDate: Tue Jun 9 21:49:52 2026 +0200
TIKA-4727 -- catch ioobe (#2885)
---
.../java/org/apache/tika/parser/microsoft/HSLFExtractor.java | 9 ++++++++-
1 file changed, 8 insertions(+), 1 deletion(-)
diff --git
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/HSLFExtractor.java
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/HSLFExtractor.java
index e516979da0..dca24644dd 100644
---
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/HSLFExtractor.java
+++
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/HSLFExtractor.java
@@ -614,7 +614,14 @@ public class HSLFExtractor extends AbstractPOIFSExtractor {
}
for (HSLFShape shape : shapes) {
if (shape instanceof HSLFPictureShape) {
- HSLFPictureData pd = ((HSLFPictureShape)
shape).getPictureData();
+ HSLFPictureData pd;
+ try {
+ pd = ((HSLFPictureShape) shape).getPictureData();
+ } catch (IndexOutOfBoundsException e) {
+ // corrupt Escher BSE record -- skip page anchoring for
this shape
+ EmbeddedDocumentUtil.recordEmbeddedStreamException(e,
parentMetadata);
+ continue;
+ }
if (pd != null) {
picToSlides.computeIfAbsent(pd.getIndex(), k -> new
HashSet<>()).add(slideNum);
}