This is an automated email from the ASF dual-hosted git repository.
tilman pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/tika.git
The following commit(s) were added to refs/heads/main by this push:
new 3d2692160 TIKA-4327: update pdfbox
3d2692160 is described below
commit 3d2692160b6b9576bbae7bf319015cdd48a72604
Author: Tilman Hausherr <[email protected]>
AuthorDate: Fri Jan 24 05:50:53 2025 +0100
TIKA-4327: update pdfbox
---
tika-parent/pom.xml | 4 +---
.../src/main/java/org/apache/tika/parser/pdf/PDFParserConfig.java | 3 +--
.../src/test/java/org/apache/tika/parser/pdf/PDFParserTest.java | 3 +--
3 files changed, 3 insertions(+), 7 deletions(-)
diff --git a/tika-parent/pom.xml b/tika-parent/pom.xml
index 2c820a2c3..124ddd572 100644
--- a/tika-parent/pom.xml
+++ b/tika-parent/pom.xml
@@ -423,9 +423,7 @@
<osgi.util.version>1.2.0</osgi.util.version>
<parso.version>2.0.14</parso.version>
<pax.exam.version>4.14.0</pax.exam.version>
- <!-- TODO when updating to 3.0.4, search for TIKA-2342 and achtivate the
changes,
- also update the documentation with the new option -->
- <pdfbox.version>3.0.3</pdfbox.version>
+ <pdfbox.version>3.0.4</pdfbox.version>
<poi.version>5.4.0</poi.version>
<protobuf.version>3.25.5</protobuf.version>
<quartz.version>2.5.0</quartz.version>
diff --git
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/main/java/org/apache/tika/parser/pdf/PDFParserConfig.java
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/main/java/org/apache/tika/parser/pdf/PDFParserConfig.java
index 62214455f..2df300e7d 100644
---
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/main/java/org/apache/tika/parser/pdf/PDFParserConfig.java
+++
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/main/java/org/apache/tika/parser/pdf/PDFParserConfig.java
@@ -226,8 +226,7 @@ public class PDFParserConfig implements Serializable {
pdf2XHTML.setDropThreshold(dropThreshold);
}
pdf2XHTML.setSuppressDuplicateOverlappingText(isSuppressDuplicateOverlappingText());
- // TODO TIKA-2342 activate after PDFBox release
-
//pdf2XHTML.setIgnoreContentStreamSpaceGlyphs(isIgnoreContentStreamSpaceGlyphs());
+
pdf2XHTML.setIgnoreContentStreamSpaceGlyphs(isIgnoreContentStreamSpaceGlyphs());
}
/**
diff --git
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/test/java/org/apache/tika/parser/pdf/PDFParserTest.java
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/test/java/org/apache/tika/parser/pdf/PDFParserTest.java
index 585ec2d82..0d85d0ed5 100644
---
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/test/java/org/apache/tika/parser/pdf/PDFParserTest.java
+++
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/test/java/org/apache/tika/parser/pdf/PDFParserTest.java
@@ -465,8 +465,7 @@ public class PDFParserTest extends TikaTest {
}
- // TODO TIKA-2342 activate after PDFBox release
- // @Test
+ @Test
public void testIgnoreContentStreamSpaceGlyphs() throws Exception {
PDFParser parser = new PDFParser();
// Default is false (keep spaces, don't sort):