This is an automated email from the ASF dual-hosted git repository.

tallison pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/tika.git


The following commit(s) were added to refs/heads/main by this push:
     new 5f747ac  TIKA-3131 -- swap default values of averageCharTolerance and 
spacingTolerance to match PDFBox defaults (#325)
5f747ac is described below

commit 5f747ac3c7d19224cd9d9086346251096c1109fc
Author: Clark Perkins <[email protected]>
AuthorDate: Wed Jul 15 14:08:01 2020 -0500

    TIKA-3131 -- swap default values of averageCharTolerance and 
spacingTolerance to match PDFBox defaults (#325)
---
 .../src/main/java/org/apache/tika/parser/pdf/PDFParserConfig.java     | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git 
a/tika-parsers/src/main/java/org/apache/tika/parser/pdf/PDFParserConfig.java 
b/tika-parsers/src/main/java/org/apache/tika/parser/pdf/PDFParserConfig.java
index 81d7e0f..bb588df 100644
--- a/tika-parsers/src/main/java/org/apache/tika/parser/pdf/PDFParserConfig.java
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/pdf/PDFParserConfig.java
@@ -119,11 +119,11 @@ public class PDFParserConfig implements Serializable {
 
     //The character width-based tolerance value used to estimate where spaces 
in text should be added
     //Default taken from PDFBox.
-    private Float averageCharTolerance = 0.5f;
+    private Float averageCharTolerance = 0.3f;
 
     //The space width-based tolerance value used to estimate where spaces in 
text should be added
     //Default taken from PDFBox.
-    private Float spacingTolerance = 0.3f;
+    private Float spacingTolerance = 0.5f;
 
     // The multiplication factor for line height to decide when a new 
paragraph starts.
     //Default taken from PDFBox.

Reply via email to