Author: tilman
Date: Wed Jan 14 12:30:12 2026
New Revision: 1931311
Log:
PDFBOX-6145: revert last change because it breaks tika with pages that have no
contents but have annotations
Modified:
pdfbox/branches/2.0/pdfbox/src/main/java/org/apache/pdfbox/text/PDFTextStripper.java
Modified:
pdfbox/branches/2.0/pdfbox/src/main/java/org/apache/pdfbox/text/PDFTextStripper.java
==============================================================================
---
pdfbox/branches/2.0/pdfbox/src/main/java/org/apache/pdfbox/text/PDFTextStripper.java
Wed Jan 14 10:30:56 2026 (r1931310)
+++
pdfbox/branches/2.0/pdfbox/src/main/java/org/apache/pdfbox/text/PDFTextStripper.java
Wed Jan 14 12:30:12 2026 (r1931311)
@@ -302,7 +302,10 @@ public class PDFTextStripper extends Leg
for (PDPage page : pages)
{
- processPage(page);
+ if (page.hasContents())
+ {
+ processPage(page);
+ }
currentPageNo++;
}
}
@@ -344,10 +347,6 @@ public class PDFTextStripper extends Leg
&& (startBookmarkPageNumber == -1 || currentPageNo >=
startBookmarkPageNumber)
&& (endBookmarkPageNumber == -1 || currentPageNo <=
endBookmarkPageNumber))
{
- if (!page.hasContents())
- {
- return;
- }
startPage(page);
int numberOfArticleSections = 1;