This is an automated email from the ASF dual-hosted git repository. tallison pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/tika.git
The following commit(s) were added to refs/heads/main by this push: new 237e73f18 TIKA-4216 (#1673) 237e73f18 is described below commit 237e73f18f46af8322a910178fa8ed99e3710d8f Author: Tim Allison <talli...@apache.org> AuthorDate: Thu Mar 21 10:08:05 2024 -0400 TIKA-4216 (#1673) * TIKA-4216 -- Avoid checking for imagemagick if image processing is disabled --- .../apache/tika/parser/ocr/TesseractOCRParser.java | 20 +++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) diff --git a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-ocr-module/src/main/java/org/apache/tika/parser/ocr/TesseractOCRParser.java b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-ocr-module/src/main/java/org/apache/tika/parser/ocr/TesseractOCRParser.java index a79e05b1d..a28ae8951 100644 --- a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-ocr-module/src/main/java/org/apache/tika/parser/ocr/TesseractOCRParser.java +++ b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-ocr-module/src/main/java/org/apache/tika/parser/ocr/TesseractOCRParser.java @@ -126,6 +126,8 @@ public class TesseractOCRParser extends AbstractExternalProcessParser implements }))); private static volatile boolean HAS_WARNED = false; + private static volatile boolean HAS_CHECKED_FOR_IMAGE_MAGICK = false; + //if a user specifies a custom tess path or tessdata path //load the available languages at initialization time private final Set<String> langs = new HashSet<>(); @@ -190,7 +192,10 @@ public class TesseractOCRParser extends AbstractExternalProcessParser implements return hasTesseract; } - boolean hasImageMagick() throws TikaConfigException { + synchronized boolean hasImageMagick() throws TikaConfigException { + if (HAS_CHECKED_FOR_IMAGE_MAGICK) { + return hasImageMagick; + } // Fetch where the config says to find ImageMagick Program String fullImageMagickPath = imageMagickPath + getImageMagickProg(); @@ -208,7 +213,7 @@ public class TesseractOCRParser extends AbstractExternalProcessParser implements LOG.debug("ImageMagick does not appear to be installed " + "(commandline: " + fullImageMagickPath + ")"); } - + HAS_CHECKED_FOR_IMAGE_MAGICK = true; return hasImageMagick; } @@ -245,6 +250,11 @@ public class TesseractOCRParser extends AbstractExternalProcessParser implements return; } + //if you haven't checked yet, and a per file config requests imagemagick + //and if the default is not to use image processing + if (! HAS_CHECKED_FOR_IMAGE_MAGICK && config.isEnableImagePreprocessing()) { + hasImageMagick = hasImageMagick(); + } try (TemporaryResources tmp = new TemporaryResources()) { TikaInputStream tikaStream = TikaInputStream.get(stream, tmp, metadata); @@ -528,7 +538,11 @@ public class TesseractOCRParser extends AbstractExternalProcessParser implements @Override public void initialize(Map<String, Param> params) throws TikaConfigException { hasTesseract = hasTesseract(); - hasImageMagick = hasImageMagick(); + if (isEnableImagePreprocessing()) { + hasImageMagick = hasImageMagick(); + } else { + hasImageMagick = false; + } if (preloadLangs) { preloadLangs(); if (!StringUtils.isBlank(defaultConfig.getLanguage())) {