This is an automated email from the ASF dual-hosted git repository.
tallison pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/tika.git
The following commit(s) were added to refs/heads/main by this push:
new 3a8990d4d improve serialization
3a8990d4d is described below
commit 3a8990d4d6a25f359962ce8a1a8b5e5d22486a93
Author: tallison <[email protected]>
AuthorDate: Thu Nov 14 13:58:59 2024 -0500
improve serialization
---
.../src/main/java/org/apache/tika/parser/ocr/TesseractOCRConfig.java | 3 +++
1 file changed, 3 insertions(+)
diff --git
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-ocr-module/src/main/java/org/apache/tika/parser/ocr/TesseractOCRConfig.java
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-ocr-module/src/main/java/org/apache/tika/parser/ocr/TesseractOCRConfig.java
index ec021643c..f802b26f5 100644
---
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-ocr-module/src/main/java/org/apache/tika/parser/ocr/TesseractOCRConfig.java
+++
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-ocr-module/src/main/java/org/apache/tika/parser/ocr/TesseractOCRConfig.java
@@ -199,6 +199,9 @@ public class TesseractOCRConfig implements Serializable {
* @param pageSeparator
*/
public void setPageSeparator(String pageSeparator) {
+ if (pageSeparator.isBlank()) {
+ return;
+ }
Matcher m = ALLOWABLE_PAGE_SEPARATORS_PATTERN.matcher(pageSeparator);
if (!m.find()) {
throw new IllegalArgumentException(pageSeparator + " contains
illegal characters.\n" +