This is an automated email from the ASF dual-hosted git repository.

tilman pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/tika.git


The following commit(s) were added to refs/heads/main by this push:
     new 3d2692160 TIKA-4327: update pdfbox
3d2692160 is described below

commit 3d2692160b6b9576bbae7bf319015cdd48a72604
Author: Tilman Hausherr <[email protected]>
AuthorDate: Fri Jan 24 05:50:53 2025 +0100

    TIKA-4327: update pdfbox
---
 tika-parent/pom.xml                                                   | 4 +---
 .../src/main/java/org/apache/tika/parser/pdf/PDFParserConfig.java     | 3 +--
 .../src/test/java/org/apache/tika/parser/pdf/PDFParserTest.java       | 3 +--
 3 files changed, 3 insertions(+), 7 deletions(-)

diff --git a/tika-parent/pom.xml b/tika-parent/pom.xml
index 2c820a2c3..124ddd572 100644
--- a/tika-parent/pom.xml
+++ b/tika-parent/pom.xml
@@ -423,9 +423,7 @@
     <osgi.util.version>1.2.0</osgi.util.version>
     <parso.version>2.0.14</parso.version>
     <pax.exam.version>4.14.0</pax.exam.version>
-    <!-- TODO when updating to 3.0.4, search for TIKA-2342 and achtivate the 
changes,
-         also update the documentation with the new option    -->
-    <pdfbox.version>3.0.3</pdfbox.version>
+    <pdfbox.version>3.0.4</pdfbox.version>
     <poi.version>5.4.0</poi.version>
     <protobuf.version>3.25.5</protobuf.version>
     <quartz.version>2.5.0</quartz.version>
diff --git 
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/main/java/org/apache/tika/parser/pdf/PDFParserConfig.java
 
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/main/java/org/apache/tika/parser/pdf/PDFParserConfig.java
index 62214455f..2df300e7d 100644
--- 
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/main/java/org/apache/tika/parser/pdf/PDFParserConfig.java
+++ 
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/main/java/org/apache/tika/parser/pdf/PDFParserConfig.java
@@ -226,8 +226,7 @@ public class PDFParserConfig implements Serializable {
             pdf2XHTML.setDropThreshold(dropThreshold);
         }
         
pdf2XHTML.setSuppressDuplicateOverlappingText(isSuppressDuplicateOverlappingText());
-        // TODO TIKA-2342 activate after PDFBox release
-        
//pdf2XHTML.setIgnoreContentStreamSpaceGlyphs(isIgnoreContentStreamSpaceGlyphs());
+        
pdf2XHTML.setIgnoreContentStreamSpaceGlyphs(isIgnoreContentStreamSpaceGlyphs());
     }
 
     /**
diff --git 
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/test/java/org/apache/tika/parser/pdf/PDFParserTest.java
 
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/test/java/org/apache/tika/parser/pdf/PDFParserTest.java
index 585ec2d82..0d85d0ed5 100644
--- 
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/test/java/org/apache/tika/parser/pdf/PDFParserTest.java
+++ 
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/test/java/org/apache/tika/parser/pdf/PDFParserTest.java
@@ -465,8 +465,7 @@ public class PDFParserTest extends TikaTest {
 
     }
 
-    // TODO TIKA-2342 activate after PDFBox release
-    // @Test
+    @Test
     public void testIgnoreContentStreamSpaceGlyphs() throws Exception {
         PDFParser parser = new PDFParser();
         // Default is false (keep spaces, don't sort):

Reply via email to