This is an automated email from the ASF dual-hosted git repository.
tallison pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/tika.git
The following commit(s) were added to refs/heads/main by this push:
new 5f747ac TIKA-3131 -- swap default values of averageCharTolerance and
spacingTolerance to match PDFBox defaults (#325)
5f747ac is described below
commit 5f747ac3c7d19224cd9d9086346251096c1109fc
Author: Clark Perkins <[email protected]>
AuthorDate: Wed Jul 15 14:08:01 2020 -0500
TIKA-3131 -- swap default values of averageCharTolerance and
spacingTolerance to match PDFBox defaults (#325)
---
.../src/main/java/org/apache/tika/parser/pdf/PDFParserConfig.java | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git
a/tika-parsers/src/main/java/org/apache/tika/parser/pdf/PDFParserConfig.java
b/tika-parsers/src/main/java/org/apache/tika/parser/pdf/PDFParserConfig.java
index 81d7e0f..bb588df 100644
--- a/tika-parsers/src/main/java/org/apache/tika/parser/pdf/PDFParserConfig.java
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/pdf/PDFParserConfig.java
@@ -119,11 +119,11 @@ public class PDFParserConfig implements Serializable {
//The character width-based tolerance value used to estimate where spaces
in text should be added
//Default taken from PDFBox.
- private Float averageCharTolerance = 0.5f;
+ private Float averageCharTolerance = 0.3f;
//The space width-based tolerance value used to estimate where spaces in
text should be added
//Default taken from PDFBox.
- private Float spacingTolerance = 0.3f;
+ private Float spacingTolerance = 0.5f;
// The multiplication factor for line height to decide when a new
paragraph starts.
//Default taken from PDFBox.