optional processing enabled
Project: http://git-wip-us.apache.org/repos/asf/tika/repo Commit: http://git-wip-us.apache.org/repos/asf/tika/commit/b10f2506 Tree: http://git-wip-us.apache.org/repos/asf/tika/tree/b10f2506 Diff: http://git-wip-us.apache.org/repos/asf/tika/diff/b10f2506 Branch: refs/heads/master Commit: b10f2506939a1c62648e2dd7b1094e42f840541a Parents: de84d71 Author: Zarana Parekh <[email protected]> Authored: Mon Jun 27 18:22:56 2016 -0700 Committer: Zarana Parekh <[email protected]> Committed: Mon Jun 27 18:22:56 2016 -0700 ---------------------------------------------------------------------- tika-bundle/pom.xml | 2 -- tika-parsers/pom.xml | 2 -- .../tika/parser/ocr/TesseractOCRConfig.java | 18 ++++++++++++++++++ .../tika/parser/ocr/TesseractOCRParser.java | 2 +- .../tika/parser/ocr/TesseractOCRConfig.properties | 2 ++ 5 files changed, 21 insertions(+), 5 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/tika/blob/b10f2506/tika-bundle/pom.xml ---------------------------------------------------------------------- diff --git a/tika-bundle/pom.xml b/tika-bundle/pom.xml index e94b43f..e993c4b 100644 --- a/tika-bundle/pom.xml +++ b/tika-bundle/pom.xml @@ -112,7 +112,6 @@ </dependencies> <build> - <pluginManagement> <plugins> <plugin> <groupId>org.apache.felix</groupId> @@ -427,7 +426,6 @@ </configuration> </plugin> </plugins> - </pluginManagement> </build> <organization> http://git-wip-us.apache.org/repos/asf/tika/blob/b10f2506/tika-parsers/pom.xml ---------------------------------------------------------------------- diff --git a/tika-parsers/pom.xml b/tika-parsers/pom.xml index cab385e..fc1107c 100644 --- a/tika-parsers/pom.xml +++ b/tika-parsers/pom.xml @@ -509,7 +509,6 @@ </file> </activation> <build> - <pluginManagement> <plugins> <plugin> <groupId>org.codehaus.gmaven</groupId> @@ -540,7 +539,6 @@ </executions> </plugin> </plugins> - </pluginManagement> </build> </profile> </profiles> http://git-wip-us.apache.org/repos/asf/tika/blob/b10f2506/tika-parsers/src/main/java/org/apache/tika/parser/ocr/TesseractOCRConfig.java ---------------------------------------------------------------------- diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/ocr/TesseractOCRConfig.java b/tika-parsers/src/main/java/org/apache/tika/parser/ocr/TesseractOCRConfig.java index d660142..0e553b6 100644 --- a/tika-parsers/src/main/java/org/apache/tika/parser/ocr/TesseractOCRConfig.java +++ b/tika-parsers/src/main/java/org/apache/tika/parser/ocr/TesseractOCRConfig.java @@ -62,6 +62,9 @@ public class TesseractOCRConfig implements Serializable{ // Maximum time (seconds) to wait for the ocring process termination private int timeout = 120; + + // enable image processing (optional) + private int enableProcessing = 0; // Path to ImageMagick program, if not on system path. private String ImageMagickPath = ""; @@ -134,6 +137,8 @@ public class TesseractOCRConfig implements Serializable{ getProp(props, "timeout", getTimeout())); // set parameters for ImageMagick + setEnableProcessing( + getProp(props, "enableProcessing", isEnableProcessing())); setImageMagickPath( getProp(props, "ImageMagickPath", getImageMagickPath())); setDensity( @@ -256,6 +261,19 @@ public class TesseractOCRConfig implements Serializable{ return timeout; } + /** @see #setEnableProcessing(boolean) */ + public int isEnableProcessing() { + return enableProcessing; + } + + /** + * Set the value to true if processing is to be enabled. + * Default value is false. + */ + public void setEnableProcessing(int enableProcessing) { + this.enableProcessing = enableProcessing; + } + /** * @return the density */ http://git-wip-us.apache.org/repos/asf/tika/blob/b10f2506/tika-parsers/src/main/java/org/apache/tika/parser/ocr/TesseractOCRParser.java ---------------------------------------------------------------------- diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/ocr/TesseractOCRParser.java b/tika-parsers/src/main/java/org/apache/tika/parser/ocr/TesseractOCRParser.java index dae4a64..dbecb71 100644 --- a/tika-parsers/src/main/java/org/apache/tika/parser/ocr/TesseractOCRParser.java +++ b/tika-parsers/src/main/java/org/apache/tika/parser/ocr/TesseractOCRParser.java @@ -320,7 +320,7 @@ public class TesseractOCRParser extends AbstractParser { FileUtils.copyFile(input, tmpFile); // Process image if ImageMagick Tool is present - if(hasImageMagick(config)) { + if(config.isEnableProcessing() == 1 && hasImageMagick(config)) { processImage(tmpFile,config); } http://git-wip-us.apache.org/repos/asf/tika/blob/b10f2506/tika-parsers/src/main/resources/org/apache/tika/parser/ocr/TesseractOCRConfig.properties ---------------------------------------------------------------------- diff --git a/tika-parsers/src/main/resources/org/apache/tika/parser/ocr/TesseractOCRConfig.properties b/tika-parsers/src/main/resources/org/apache/tika/parser/ocr/TesseractOCRConfig.properties index a0a0b54..e18c229 100644 --- a/tika-parsers/src/main/resources/org/apache/tika/parser/ocr/TesseractOCRConfig.properties +++ b/tika-parsers/src/main/resources/org/apache/tika/parser/ocr/TesseractOCRConfig.properties @@ -22,6 +22,8 @@ minFileSizeToOcr=0 timeout=120 # properties for image processing +# to enable processing, set enableProcessing to 1 +enableProcessing=0 ImageMagickPath= density=300 depth=4
