added check for non-UNIX OS
Project: http://git-wip-us.apache.org/repos/asf/tika/repo Commit: http://git-wip-us.apache.org/repos/asf/tika/commit/1a46c590 Tree: http://git-wip-us.apache.org/repos/asf/tika/tree/1a46c590 Diff: http://git-wip-us.apache.org/repos/asf/tika/diff/1a46c590 Branch: refs/heads/master Commit: 1a46c590cb0e0fbaccd6f9793c307fa6458acd16 Parents: 6809282 Author: Zarana Parekh <[email protected]> Authored: Wed Jul 6 11:46:11 2016 -0700 Committer: Zarana Parekh <[email protected]> Committed: Wed Jul 6 11:46:11 2016 -0700 ---------------------------------------------------------------------- tika-parsers/pom.xml | 7 +++++++ .../org/apache/tika/parser/ocr/TesseractOCRConfigTest.java | 9 ++++++--- .../test-properties/TesseractOCRConfig-partial.properties | 2 +- 3 files changed, 14 insertions(+), 4 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/tika/blob/1a46c590/tika-parsers/pom.xml ---------------------------------------------------------------------- diff --git a/tika-parsers/pom.xml b/tika-parsers/pom.xml index c3e22df..a658014 100644 --- a/tika-parsers/pom.xml +++ b/tika-parsers/pom.xml @@ -266,6 +266,13 @@ <scope>compile</scope> </dependency> + <!-- TIKA-2021: Tesseract OCR Parser tests --> + <dependency> + <groupId>org.apache.commons</groupId> + <artifactId>commons-lang3</artifactId> + <version>3.4</version> + </dependency> + <!-- Provided dependencies --> <dependency> <groupId>org.xerial</groupId> http://git-wip-us.apache.org/repos/asf/tika/blob/1a46c590/tika-parsers/src/test/java/org/apache/tika/parser/ocr/TesseractOCRConfigTest.java ---------------------------------------------------------------------- diff --git a/tika-parsers/src/test/java/org/apache/tika/parser/ocr/TesseractOCRConfigTest.java b/tika-parsers/src/test/java/org/apache/tika/parser/ocr/TesseractOCRConfigTest.java index 63cdc26..2b821b6 100644 --- a/tika-parsers/src/test/java/org/apache/tika/parser/ocr/TesseractOCRConfigTest.java +++ b/tika-parsers/src/test/java/org/apache/tika/parser/ocr/TesseractOCRConfigTest.java @@ -16,6 +16,7 @@ */ package org.apache.tika.parser.ocr; +import org.apache.commons.lang.SystemUtils; import org.apache.tika.TikaTest; import org.junit.Test; @@ -74,14 +75,16 @@ public class TesseractOCRConfigTest extends TikaTest { "/test-properties/TesseractOCRConfig-full.properties"); TesseractOCRConfig config = new TesseractOCRConfig(stream); - assertEquals("Invalid overridden tesseractPath value", "/opt/tesseract" + File.separator, config.getTesseractPath()); - assertEquals("Invalid overridden tesseractPath value", "/usr/local/share" + File.separator, config.getTessdataPath()); + if(SystemUtils.IS_OS_UNIX) { + assertEquals("Invalid overridden tesseractPath value", "/opt/tesseract" + File.separator, config.getTesseractPath()); + assertEquals("Invalid overridden tesseractPath value", "/usr/local/share" + File.separator, config.getTessdataPath()); + assertEquals("Invalid overridden ImageMagickPath value", "/usr/local/bin/", config.getImageMagickPath()); + } assertEquals("Invalid overridden language value", "fra+deu", config.getLanguage()); assertEquals("Invalid overridden pageSegMode value", "2", config.getPageSegMode()); assertEquals("Invalid overridden minFileSizeToOcr value", 1, config.getMinFileSizeToOcr()); assertEquals("Invalid overridden maxFileSizeToOcr value", 2000000, config.getMaxFileSizeToOcr()); assertEquals("Invalid overridden timeout value", 240, config.getTimeout()); - assertEquals("Invalid overridden ImageMagickPath value", "/usr/local/bin/", config.getImageMagickPath()); assertEquals("Invalid overridden density value", 200 , config.getDensity()); assertEquals("Invalid overridden depth value", 8 , config.getDepth()); assertEquals("Invalid overridden filter value", "box" , config.getFilter()); http://git-wip-us.apache.org/repos/asf/tika/blob/1a46c590/tika-parsers/src/test/resources/test-properties/TesseractOCRConfig-partial.properties ---------------------------------------------------------------------- diff --git a/tika-parsers/src/test/resources/test-properties/TesseractOCRConfig-partial.properties b/tika-parsers/src/test/resources/test-properties/TesseractOCRConfig-partial.properties index 0b2173c..c84fa01 100644 --- a/tika-parsers/src/test/resources/test-properties/TesseractOCRConfig-partial.properties +++ b/tika-parsers/src/test/resources/test-properties/TesseractOCRConfig-partial.properties @@ -17,7 +17,7 @@ language=fra+deu timeout=240 minFileSizeToOcr=1 -enableProcessing=0 +enableImageProcessing=0 density=200 depth=8 filter=box
