This is an automated email from the ASF dual-hosted git repository. tallison pushed a commit to branch branch_3x in repository https://gitbox.apache.org/repos/asf/tika.git
commit 1fc0ac1eb0dccf4c9fe28cf2cab0159b1f4591ca Author: tallison <[email protected]> AuthorDate: Thu Nov 14 13:58:59 2024 -0500 improve serialization (cherry picked from commit 3a8990d4d6a25f359962ce8a1a8b5e5d22486a93) --- .../src/main/java/org/apache/tika/parser/ocr/TesseractOCRConfig.java | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-ocr-module/src/main/java/org/apache/tika/parser/ocr/TesseractOCRConfig.java b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-ocr-module/src/main/java/org/apache/tika/parser/ocr/TesseractOCRConfig.java index ec021643c..f802b26f5 100644 --- a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-ocr-module/src/main/java/org/apache/tika/parser/ocr/TesseractOCRConfig.java +++ b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-ocr-module/src/main/java/org/apache/tika/parser/ocr/TesseractOCRConfig.java @@ -199,6 +199,9 @@ public class TesseractOCRConfig implements Serializable { * @param pageSeparator */ public void setPageSeparator(String pageSeparator) { + if (pageSeparator.isBlank()) { + return; + } Matcher m = ALLOWABLE_PAGE_SEPARATORS_PATTERN.matcher(pageSeparator); if (!m.find()) { throw new IllegalArgumentException(pageSeparator + " contains illegal characters.\n" +
