Author: nick
Date: Wed Jan 7 12:14:18 2015
New Revision: 1650046
URL: http://svn.apache.org/r1650046
Log:
TIKA-1445 Unit test to show that when an invalid tesseract config is given, and
tesseract cannot be found, TesseractOCRParser will return no types and will not
be selected by DefaultParser
Modified:
tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/ocr/TesseractOCRParserTest.java
Modified:
tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/ocr/TesseractOCRParserTest.java
URL:
http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/ocr/TesseractOCRParserTest.java?rev=1650046&r1=1650045&r2=1650046&view=diff
==============================================================================
---
tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/ocr/TesseractOCRParserTest.java
(original)
+++
tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/ocr/TesseractOCRParserTest.java
Wed Jan 7 12:14:18 2015
@@ -17,6 +17,7 @@
package org.apache.tika.parser.ocr;
import static org.apache.tika.parser.ocr.TesseractOCRParser.getTesseractProg;
+import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertTrue;
import static org.junit.Assume.assumeTrue;
@@ -24,10 +25,14 @@ import java.io.InputStream;
import org.apache.tika.TikaTest;
import org.apache.tika.metadata.Metadata;
+import org.apache.tika.mime.MediaType;
import org.apache.tika.parser.AutoDetectParser;
+import org.apache.tika.parser.DefaultParser;
import org.apache.tika.parser.ParseContext;
import org.apache.tika.parser.Parser;
import org.apache.tika.parser.external.ExternalParser;
+import org.apache.tika.parser.image.ImageParser;
+import org.apache.tika.parser.jpeg.JpegParser;
import org.apache.tika.parser.pdf.PDFParserConfig;
import org.apache.tika.sax.BodyContentHandler;
import org.junit.Test;
@@ -45,6 +50,38 @@ public class TesseractOCRParserTest exte
// If Tesseract is not on the path, do not run the test.
return ExternalParser.check(checkCmd);
}
+
+ @Test
+ public void offersNoTypesIfNotFound() throws Exception {
+ TesseractOCRParser parser = new TesseractOCRParser();
+ DefaultParser defaultParser = new DefaultParser();
+ MediaType png = MediaType.image("png");
+
+ // With an invalid path, will offer no types
+ TesseractOCRConfig invalidConfig = new TesseractOCRConfig();
+ invalidConfig.setTesseractPath("/made/up/path");
+
+ ParseContext parseContext = new ParseContext();
+ parseContext.set(TesseractOCRConfig.class, invalidConfig);
+
+ // No types offered
+ assertEquals(0, parser.getSupportedTypes(parseContext).size());
+
+ // And DefaultParser won't use us
+ assertEquals(ImageParser.class,
defaultParser.getParsers(parseContext).get(png).getClass());
+
+
+ // With a correct path, with offer the usual types
+ TesseractOCRConfig normalConfig = new TesseractOCRConfig();
+ assumeTrue(canRun(normalConfig));
+ parseContext.set(TesseractOCRConfig.class, normalConfig);
+
+ assertEquals(5, parser.getSupportedTypes(parseContext).size());
+ assertTrue(parser.getSupportedTypes(parseContext).contains(png));
+
+ // DefaultParser now will
+ assertEquals(TesseractOCRParser.class,
defaultParser.getParsers(parseContext).get(png).getClass());
+ }
@Test
public void testPDFOCR() throws Exception {