Author: tilman
Date: Wed Jan 14 12:30:22 2026
New Revision: 1931313

Log:
PDFBOX-6145: revert last change because it breaks tika with pages that have no 
contents but have annotations

Modified:
   pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/text/PDFTextStripper.java

Modified: 
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/text/PDFTextStripper.java
==============================================================================
--- 
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/text/PDFTextStripper.java   
    Wed Jan 14 12:30:17 2026        (r1931312)
+++ 
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/text/PDFTextStripper.java   
    Wed Jan 14 12:30:22 2026        (r1931313)
@@ -298,7 +298,10 @@ public class PDFTextStripper extends Leg
 
         for (PDPage page : pages)
         {
-            processPage(page);
+            if (page.hasContents())
+            {
+                processPage(page);
+            }
             currentPageNo++;
         }
     }
@@ -340,10 +343,6 @@ public class PDFTextStripper extends Leg
                 && (startBookmarkPageNumber == -1 || currentPageNo >= 
startBookmarkPageNumber)
                 && (endBookmarkPageNumber == -1 || currentPageNo <= 
endBookmarkPageNumber))
         {
-            if (!page.hasContents())
-            {
-                return;
-            }
             startPage(page);
 
             int numberOfArticleSections = 1;

Reply via email to