This is an automated email from the ASF dual-hosted git repository. tallison pushed a commit to branch branch_1x in repository https://gitbox.apache.org/repos/asf/tika.git
commit 6fb39c9583e04edf72bd19f800b591b1f49c6497 Author: Clark Perkins <[email protected]> AuthorDate: Wed Jul 15 14:08:01 2020 -0500 TIKA-3131 -- swap default values of averageCharTolerance and spacingTolerance to match PDFBox defaults (#325) --- .../src/main/java/org/apache/tika/parser/pdf/PDFParserConfig.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/pdf/PDFParserConfig.java b/tika-parsers/src/main/java/org/apache/tika/parser/pdf/PDFParserConfig.java index 9613781..f88ff0f 100644 --- a/tika-parsers/src/main/java/org/apache/tika/parser/pdf/PDFParserConfig.java +++ b/tika-parsers/src/main/java/org/apache/tika/parser/pdf/PDFParserConfig.java @@ -119,11 +119,11 @@ public class PDFParserConfig implements Serializable { //The character width-based tolerance value used to estimate where spaces in text should be added //Default taken from PDFBox. - private Float averageCharTolerance = 0.5f; + private Float averageCharTolerance = 0.3f; //The space width-based tolerance value used to estimate where spaces in text should be added //Default taken from PDFBox. - private Float spacingTolerance = 0.3f; + private Float spacingTolerance = 0.5f; // The multiplication factor for line height to decide when a new paragraph starts. //Default taken from PDFBox.
