Author: tilman
Date: Tue Jan 13 11:46:45 2026
New Revision: 1931288
Log:
PDFBOX-6145: move content check after page number check so that not all pages
get checked
Modified:
pdfbox/branches/3.0/pdfbox/src/main/java/org/apache/pdfbox/text/PDFTextStripper.java
Modified:
pdfbox/branches/3.0/pdfbox/src/main/java/org/apache/pdfbox/text/PDFTextStripper.java
==============================================================================
---
pdfbox/branches/3.0/pdfbox/src/main/java/org/apache/pdfbox/text/PDFTextStripper.java
Tue Jan 13 11:46:41 2026 (r1931287)
+++
pdfbox/branches/3.0/pdfbox/src/main/java/org/apache/pdfbox/text/PDFTextStripper.java
Tue Jan 13 11:46:45 2026 (r1931288)
@@ -299,10 +299,7 @@ public class PDFTextStripper extends Leg
for (PDPage page : pages)
{
- if (page.hasContents())
- {
- processPage(page);
- }
+ processPage(page);
currentPageNo++;
}
}
@@ -344,6 +341,10 @@ public class PDFTextStripper extends Leg
&& (startBookmarkPageNumber == -1 || currentPageNo >=
startBookmarkPageNumber)
&& (endBookmarkPageNumber == -1 || currentPageNo <=
endBookmarkPageNumber))
{
+ if (!page.hasContents())
+ {
+ return;
+ }
startPage(page);
int numberOfArticleSections = 1;