Author: tilman
Date: Wed Jan 14 12:30:22 2026
New Revision: 1931313
Log:
PDFBOX-6145: revert last change because it breaks tika with pages that have no
contents but have annotations
Modified:
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/text/PDFTextStripper.java
Modified:
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/text/PDFTextStripper.java
==============================================================================
---
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/text/PDFTextStripper.java
Wed Jan 14 12:30:17 2026 (r1931312)
+++
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/text/PDFTextStripper.java
Wed Jan 14 12:30:22 2026 (r1931313)
@@ -298,7 +298,10 @@ public class PDFTextStripper extends Leg
for (PDPage page : pages)
{
- processPage(page);
+ if (page.hasContents())
+ {
+ processPage(page);
+ }
currentPageNo++;
}
}
@@ -340,10 +343,6 @@ public class PDFTextStripper extends Leg
&& (startBookmarkPageNumber == -1 || currentPageNo >=
startBookmarkPageNumber)
&& (endBookmarkPageNumber == -1 || currentPageNo <=
endBookmarkPageNumber))
{
- if (!page.hasContents())
- {
- return;
- }
startPage(page);
int numberOfArticleSections = 1;