Author: tilman
Date: Wed Jan 14 12:30:17 2026
New Revision: 1931312
Log:
PDFBOX-6145: revert last change because it breaks tika with pages that have no
contents but have annotations
Modified:
pdfbox/branches/3.0/pdfbox/src/main/java/org/apache/pdfbox/text/PDFTextStripper.java
Modified:
pdfbox/branches/3.0/pdfbox/src/main/java/org/apache/pdfbox/text/PDFTextStripper.java
==============================================================================
---
pdfbox/branches/3.0/pdfbox/src/main/java/org/apache/pdfbox/text/PDFTextStripper.java
Wed Jan 14 12:30:12 2026 (r1931311)
+++
pdfbox/branches/3.0/pdfbox/src/main/java/org/apache/pdfbox/text/PDFTextStripper.java
Wed Jan 14 12:30:17 2026 (r1931312)
@@ -299,7 +299,10 @@ public class PDFTextStripper extends Leg
for (PDPage page : pages)
{
- processPage(page);
+ if (page.hasContents())
+ {
+ processPage(page);
+ }
currentPageNo++;
}
}
@@ -341,10 +344,6 @@ public class PDFTextStripper extends Leg
&& (startBookmarkPageNumber == -1 || currentPageNo >=
startBookmarkPageNumber)
&& (endBookmarkPageNumber == -1 || currentPageNo <=
endBookmarkPageNumber))
{
- if (!page.hasContents())
- {
- return;
- }
startPage(page);
int numberOfArticleSections = 1;