This is an automated email from the ASF dual-hosted git repository.
tilman pushed a commit to branch branch_3x
in repository https://gitbox.apache.org/repos/asf/tika.git
The following commit(s) were added to refs/heads/branch_3x by this push:
new 74f75cfe2 TIKA-4326: update pdfbox
74f75cfe2 is described below
commit 74f75cfe2d83fc4424a6b09832b2803de4b88a7c
Author: Tilman Hausherr <[email protected]>
AuthorDate: Fri Jan 24 05:50:09 2025 +0100
TIKA-4326: update pdfbox
---
tika-parent/pom.xml | 3 +--
.../src/main/java/org/apache/tika/parser/pdf/PDFParserConfig.java | 3 +--
.../src/test/java/org/apache/tika/parser/pdf/PDFParserTest.java | 3 +--
3 files changed, 3 insertions(+), 6 deletions(-)
diff --git a/tika-parent/pom.xml b/tika-parent/pom.xml
index 863b742d8..96e209691 100644
--- a/tika-parent/pom.xml
+++ b/tika-parent/pom.xml
@@ -420,8 +420,7 @@
<osgi.util.version>1.2.0</osgi.util.version>
<parso.version>2.0.14</parso.version>
<pax.exam.version>4.14.0</pax.exam.version>
- <!-- TODO when updating to 3.0.4, search for TIKA-2342 and achtivate the
changes -->
- <pdfbox.version>3.0.3</pdfbox.version>
+ <pdfbox.version>3.0.4</pdfbox.version>
<poi.version>5.4.0</poi.version>
<protobuf.version>3.25.5</protobuf.version>
<quartz.version>2.5.0</quartz.version>
diff --git
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/main/java/org/apache/tika/parser/pdf/PDFParserConfig.java
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/main/java/org/apache/tika/parser/pdf/PDFParserConfig.java
index af6213ba9..bcc5b739a 100644
---
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/main/java/org/apache/tika/parser/pdf/PDFParserConfig.java
+++
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/main/java/org/apache/tika/parser/pdf/PDFParserConfig.java
@@ -226,8 +226,7 @@ public class PDFParserConfig implements Serializable {
pdf2XHTML.setDropThreshold(dropThreshold);
}
pdf2XHTML.setSuppressDuplicateOverlappingText(isSuppressDuplicateOverlappingText());
- // TODO TIKA-2342 activate after PDFBox release
-
//pdf2XHTML.setIgnoreContentStreamSpaceGlyphs(isIgnoreContentStreamSpaceGlyphs());
+
pdf2XHTML.setIgnoreContentStreamSpaceGlyphs(isIgnoreContentStreamSpaceGlyphs());
}
/**
diff --git
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/test/java/org/apache/tika/parser/pdf/PDFParserTest.java
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/test/java/org/apache/tika/parser/pdf/PDFParserTest.java
index 1c722994e..d3f4f9f28 100644
---
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/test/java/org/apache/tika/parser/pdf/PDFParserTest.java
+++
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/test/java/org/apache/tika/parser/pdf/PDFParserTest.java
@@ -465,8 +465,7 @@ public class PDFParserTest extends TikaTest {
}
- // TODO TIKA-2342 activate after PDFBox release
- // @Test
+ @Test
public void testIgnoreContentStreamSpaceGlyphs() throws Exception {
PDFParser parser = new PDFParser();
// Default is false (keep spaces, don't sort):