This is an automated email from the ASF dual-hosted git repository.
tilman pushed a commit to branch branch_3x
in repository https://gitbox.apache.org/repos/asf/tika.git
The following commit(s) were added to refs/heads/branch_3x by this push:
new bfc99c6975 TIKA-4622: Add test for PDF annotations without page
content streams (#2530)
bfc99c6975 is described below
commit bfc99c69753a805bbff67291d65638bb7a7eb726
Author: Tilman Hausherr <[email protected]>
AuthorDate: Wed Jan 14 14:22:05 2026 +0100
TIKA-4622: Add test for PDF annotations without page content streams (#2530)
---
.../src/test/java/org/apache/tika/parser/pdf/PDFParserTest.java | 7 +++++++
1 file changed, 7 insertions(+)
diff --git
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/test/java/org/apache/tika/parser/pdf/PDFParserTest.java
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/test/java/org/apache/tika/parser/pdf/PDFParserTest.java
index a7fb92621e..9b50edcbc9 100644
---
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/test/java/org/apache/tika/parser/pdf/PDFParserTest.java
+++
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/test/java/org/apache/tika/parser/pdf/PDFParserTest.java
@@ -371,6 +371,13 @@ public class PDFParserTest extends TikaTest {
assertContains("igalsh", r.xml);
}
+ // TIKA-4622 / PDFBOX-6145 make sure that annotations aren't missed if no
page content stream
+ @Test
+ public void testAnnotationNoContents() throws Exception {
+ XMLResult r = getXML("testPDFFileEmbInAnnotation_noContents.pdf");
+ assertContains("Excel.xlsx", r.xml);
+ }
+
@Test
public void testEmbeddedPDFs() throws Exception {
List<Metadata> metadataList =
getRecursiveMetadata("testPDFPackage.pdf");