This is an automated email from the ASF dual-hosted git repository.
tilman pushed a commit to branch branch_2x
in repository https://gitbox.apache.org/repos/asf/tika.git
The following commit(s) were added to refs/heads/branch_2x by this push:
new 5fb4e3fc73 TIKA-4622: Add test for PDF annotations without page
content streams (#2530)
5fb4e3fc73 is described below
commit 5fb4e3fc73a82250236031bab7f77bcc8f72739d
Author: Tilman Hausherr <[email protected]>
AuthorDate: Wed Jan 14 14:22:05 2026 +0100
TIKA-4622: Add test for PDF annotations without page content streams (#2530)
---
.../src/test/java/org/apache/tika/parser/pdf/PDFParserTest.java | 7 +++++++
1 file changed, 7 insertions(+)
diff --git
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/test/java/org/apache/tika/parser/pdf/PDFParserTest.java
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/test/java/org/apache/tika/parser/pdf/PDFParserTest.java
index 6eb0b4a0ae..b08c12398e 100644
---
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/test/java/org/apache/tika/parser/pdf/PDFParserTest.java
+++
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/test/java/org/apache/tika/parser/pdf/PDFParserTest.java
@@ -369,6 +369,13 @@ public class PDFParserTest extends TikaTest {
assertContains("igalsh", r.xml);
}
+ // TIKA-4622 / PDFBOX-6145 make sure that annotations aren't missed if no
page content stream
+ @Test
+ public void testAnnotationNoContents() throws Exception {
+ XMLResult r = getXML("testPDFFileEmbInAnnotation_noContents.pdf");
+ assertContains("Excel.xlsx", r.xml);
+ }
+
@Test
public void testEmbeddedPDFs() throws Exception {
List<Metadata> metadataList =
getRecursiveMetadata("testPDFPackage.pdf");