This is an automated email from the ASF dual-hosted git repository.

tallison pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/tika.git

The following commit(s) were added to refs/heads/master by this push:
       new  ebc87ae   TIKA-2359: Alert user that tesseract is available and 
will be used.
ebc87ae is described below

commit ebc87aec539eef752072e95315daee65f7f42ebb
Author: tballison <[email protected]>
AuthorDate: Thu May 18 06:54:41 2017 -0400

    TIKA-2359: Alert user that tesseract is available and will be used.
---
 .../org/apache/tika/parser/ocr/TesseractOCRParser.java     | 14 +++++++++++---
 1 file changed, 11 insertions(+), 3 deletions(-)

diff --git 
a/tika-parsers/src/main/java/org/apache/tika/parser/ocr/TesseractOCRParser.java 
b/tika-parsers/src/main/java/org/apache/tika/parser/ocr/TesseractOCRParser.java
index 9728b38..121e096 100644
--- 
a/tika-parsers/src/main/java/org/apache/tika/parser/ocr/TesseractOCRParser.java
+++ 
b/tika-parsers/src/main/java/org/apache/tika/parser/ocr/TesseractOCRParser.java
@@ -101,16 +101,24 @@ public class TesseractOCRParser extends AbstractParser {
                     MediaType.image("jpx"), 
MediaType.image("x-portable-pixmap")
             })));
     private static Map<String,Boolean> TESSERACT_PRESENT = new HashMap<>();
-
+    private static volatile boolean HAS_ALERTED = false;
 
 
     @Override
     public Set<MediaType> getSupportedTypes(ParseContext context) {
         // If Tesseract is installed, offer our supported image types
         TesseractOCRConfig config = context.get(TesseractOCRConfig.class, 
DEFAULT_CONFIG);
-        if (hasTesseract(config))
+        if (hasTesseract(config)) {
+            if (! HAS_ALERTED) {
+                LOG.info("Tesseract OCR is installed and will be automatically 
applied to image files.\n"+
+                        "This may dramatically slow down content extraction 
(TIKA-2359).\n"+
+                        "As of Tika 1.15 (and prior versions), Tesseract is 
automatically called.\n"+
+                        "In future versions of Tika, users may need to turn 
the TesseractOCRParser on via TikaConfig."
+                );
+                HAS_ALERTED = true;
+            }
             return SUPPORTED_TYPES;
-
+        }
         // Otherwise don't advertise anything, so the other image parsers
         //  can be selected instead
         return Collections.emptySet();

-- 
To stop receiving notification emails like this one, please contact
['"[email protected]" <[email protected]>'].

Reply via email to