This is an automated email from the ASF dual-hosted git repository. tallison pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/tika.git
commit 95cf1a364de643828334935e9552dd6c6f84c416 Author: tallison <[email protected]> AuthorDate: Mon Oct 21 10:50:47 2019 -0400 TIKA-2969 -- use assumeTrue more often in tests to let users know when tests are skipped if external resources are not available (e.g. tesseract)...via Eric Pugh --- .../tika/dl/imagerec/DL4JInceptionV3NetTest.java | 28 +++++++++---------- .../apache/tika/dl/imagerec/DL4JVGG16NetTest.java | 31 +++++++++++----------- .../tika/parser/ocr/TesseractOCRParserTest.java | 16 ++++++----- .../org/apache/tika/parser/pdf/PDFParserTest.java | 11 +++----- 4 files changed, 44 insertions(+), 42 deletions(-) diff --git a/tika-dl/src/test/java/org/apache/tika/dl/imagerec/DL4JInceptionV3NetTest.java b/tika-dl/src/test/java/org/apache/tika/dl/imagerec/DL4JInceptionV3NetTest.java index 601b464..4d5b340 100644 --- a/tika-dl/src/test/java/org/apache/tika/dl/imagerec/DL4JInceptionV3NetTest.java +++ b/tika-dl/src/test/java/org/apache/tika/dl/imagerec/DL4JInceptionV3NetTest.java @@ -17,6 +17,7 @@ package org.apache.tika.dl.imagerec; import static org.junit.Assert.assertTrue; +import static org.junit.Assume.assumeTrue; import org.apache.tika.Tika; import org.apache.tika.config.TikaConfig; @@ -29,27 +30,26 @@ public class DL4JInceptionV3NetTest { public void recognise() throws Exception { TikaConfig config = null; try { - config = new TikaConfig(getClass().getResourceAsStream("dl4j-inception3-config.xml")); + config = new TikaConfig(getClass().getResourceAsStream("dl4j-inception3-config.xml")); } catch (Exception e) { if (e.getMessage() != null && (e.getMessage().contains("Connection refused") || e.getMessage().contains("connect timed out"))) { - //skip test - return; + assumeTrue("skipping test because of connection issue", false); } + throw e; } - if (config != null) { - Tika tika = new Tika(config); - Metadata md = new Metadata(); - tika.parse(getClass().getResourceAsStream("cat.jpg"), md); - String[] objects = md.getValues("OBJECT"); - boolean found = false; - for (String object : objects) { - if (object.contains("_cat")) { - found = true; - } + assumeTrue("something went wrong loading tika config", config != null); + Tika tika = new Tika(config); + Metadata md = new Metadata(); + tika.parse(getClass().getResourceAsStream("cat.jpg"), md); + String[] objects = md.getValues("OBJECT"); + boolean found = false; + for (String object : objects) { + if (object.contains("_cat")) { + found = true; } - assertTrue(found); } + assertTrue(found); } } \ No newline at end of file diff --git a/tika-dl/src/test/java/org/apache/tika/dl/imagerec/DL4JVGG16NetTest.java b/tika-dl/src/test/java/org/apache/tika/dl/imagerec/DL4JVGG16NetTest.java index 15a4c0e..a0915ec 100644 --- a/tika-dl/src/test/java/org/apache/tika/dl/imagerec/DL4JVGG16NetTest.java +++ b/tika-dl/src/test/java/org/apache/tika/dl/imagerec/DL4JVGG16NetTest.java @@ -17,6 +17,7 @@ package org.apache.tika.dl.imagerec; import static org.junit.Assert.assertTrue; +import static org.junit.Assume.assumeTrue; import org.apache.tika.Tika; import org.apache.tika.config.TikaConfig; @@ -36,24 +37,24 @@ public class DL4JVGG16NetTest { } catch (Exception e) { if (e.getMessage() != null && (e.getMessage().contains("Connection refused") - || e.getMessage().contains("connect timed out"))) { - //skip test - return; + || e.getMessage().contains("connect timed out"))) { + assumeTrue("skipping test because of connection issue", false); } + throw e; } - - if(config != null) { - Tika tika = new Tika(config); - Metadata md = new Metadata(); - tika.parse(getClass().getResourceAsStream("lion.jpg"), md); - String[] objects = md.getValues("OBJECT"); - boolean found = false; - for (String object : objects) { - if (object.contains("lion")){ - found = true; - } + + assumeTrue("something went wrong loading tika config", config != null); + Tika tika = new Tika(config); + Metadata md = new Metadata(); + tika.parse(getClass().getResourceAsStream("lion.jpg"), md); + String[] objects = md.getValues("OBJECT"); + boolean found = false; + for (String object : objects) { + if (object.contains("lion")) { + found = true; } - assertTrue(found); } + assertTrue(found); } + } \ No newline at end of file diff --git a/tika-parsers/src/test/java/org/apache/tika/parser/ocr/TesseractOCRParserTest.java b/tika-parsers/src/test/java/org/apache/tika/parser/ocr/TesseractOCRParserTest.java index 9ebcee0..269a33f 100644 --- a/tika-parsers/src/test/java/org/apache/tika/parser/ocr/TesseractOCRParserTest.java +++ b/tika-parsers/src/test/java/org/apache/tika/parser/ocr/TesseractOCRParserTest.java @@ -44,6 +44,7 @@ import org.apache.tika.parser.image.ImageParser; import org.apache.tika.parser.pdf.PDFParserConfig; import org.apache.tika.sax.AbstractRecursiveParserWrapperHandler; import org.apache.tika.sax.BasicContentHandlerFactory; +import org.junit.Assume; import org.junit.Test; import org.xml.sax.helpers.DefaultHandler; @@ -97,7 +98,7 @@ public class TesseractOCRParserTest extends TikaTest { MediaType png = MediaType.image("png"); // Assuming that Tesseract is on the path, we should find 5 Parsers that support PNG. - assumeTrue(canRun()); + assumeTrue("can run OCR", canRun()); assertEquals(8, parser.getSupportedTypes(parseContext).size()); assertTrue(parser.getSupportedTypes(parseContext).contains(png)); @@ -134,7 +135,7 @@ public class TesseractOCRParserTest extends TikaTest { @Test public void testOCROutputsHOCR() throws Exception { - assumeTrue(canRun()); + assumeTrue("can run OCR", canRun()); String resource = "/test-documents/testOCR.pdf"; @@ -149,7 +150,9 @@ public class TesseractOCRParserTest extends TikaTest { } private void testBasicOCR(String resource, String[] nonOCRContains, int numMetadatas) throws Exception{ - String contents = runOCR(resource, nonOCRContains, numMetadatas, + Assume.assumeTrue("can run OCR", canRun()); + + String contents = runOCR(resource, nonOCRContains, numMetadatas, BasicContentHandlerFactory.HANDLER_TYPE.TEXT, TesseractOCRConfig.OUTPUT_TYPE.TXT); if (canRun()) { if(resource.substring(resource.lastIndexOf('.'), resource.length()).equals(".jpg")) { @@ -202,7 +205,8 @@ public class TesseractOCRParserTest extends TikaTest { @Test public void testSingleImage() throws Exception { - assumeTrue(canRun()); + Assume.assumeTrue("can run OCR", canRun()); + String xml = getXML("testOCR.jpg").xml; assertContains("OCR Testing", xml); //test metadata extraction @@ -267,7 +271,7 @@ public class TesseractOCRParserTest extends TikaTest { @Test public void testInterwordSpacing() throws Exception { - assumeTrue(canRun()); + assumeTrue("can run OCR", canRun()); //default String xml = getXML("testOCR_spacing.png").xml; assertContains("The quick", xml); @@ -287,7 +291,7 @@ public class TesseractOCRParserTest extends TikaTest { @Test public void confirmMultiPageTiffHandling() throws Exception { - assumeTrue(canRun()); + assumeTrue("can run OCR", canRun()); //tesseract should handle multipage tiffs by itself //let's confirm that String xml = getXML("testTIFF_multipage.tif").xml; diff --git a/tika-parsers/src/test/java/org/apache/tika/parser/pdf/PDFParserTest.java b/tika-parsers/src/test/java/org/apache/tika/parser/pdf/PDFParserTest.java index 1337b25..63d56ba 100644 --- a/tika-parsers/src/test/java/org/apache/tika/parser/pdf/PDFParserTest.java +++ b/tika-parsers/src/test/java/org/apache/tika/parser/pdf/PDFParserTest.java @@ -23,6 +23,7 @@ import static org.junit.Assert.assertNotNull; import static org.junit.Assert.assertNull; import static org.junit.Assert.assertTrue; import static org.junit.Assert.fail; +import static org.junit.Assume.assumeTrue; import java.io.InputStream; import java.util.Arrays; @@ -1239,7 +1240,7 @@ public class PDFParserTest extends TikaTest { @Test public void testEmbeddedDocsWithOCROnly() throws Exception { - if (! canRunOCR()) { return; } + assumeTrue("can run OCR", canRunOCR()); for (PDFParserConfig.OCR_STRATEGY strategy : PDFParserConfig.OCR_STRATEGY.values()) { PDFParserConfig config = new PDFParserConfig(); @@ -1278,9 +1279,7 @@ public class PDFParserTest extends TikaTest { @Test public void testJBIG2OCROnly() throws Exception { - if (!canRunOCR()) { - return; - } + assumeTrue("can run OCR", canRunOCR()); PDFParserConfig config = new PDFParserConfig(); config.setOcrStrategy(PDFParserConfig.OCR_STRATEGY.OCR_ONLY); ParseContext context = new ParseContext(); @@ -1296,9 +1295,7 @@ public class PDFParserTest extends TikaTest { //TIKA-2970 -- make sure that configurations set on the TesseractOCRParser //make it through to when the TesseractOCRParser is called via //the PDFParser - if (!canRunOCR()) { - return; - } + assumeTrue("can run OCR", canRunOCR()); //via the config, tesseract should skip this file because it is too large InputStream is = getClass().getResourceAsStream("/org/apache/tika/parser/pdf/tika-ocr-config.xml");
