Repository: tika Updated Branches: refs/heads/master 7dda921de -> c17d1b8a6
TIKA-2174 add jp2 and jpx to file formats handled by TesseractOCRParser Project: http://git-wip-us.apache.org/repos/asf/tika/repo Commit: http://git-wip-us.apache.org/repos/asf/tika/commit/c17d1b8a Tree: http://git-wip-us.apache.org/repos/asf/tika/tree/c17d1b8a Diff: http://git-wip-us.apache.org/repos/asf/tika/diff/c17d1b8a Branch: refs/heads/master Commit: c17d1b8a6bef4409787aa2b58b96f691dfcf1170 Parents: 7dda921 Author: tballison <[email protected]> Authored: Wed Nov 9 12:46:35 2016 -0500 Committer: tballison <[email protected]> Committed: Wed Nov 9 12:46:35 2016 -0500 ---------------------------------------------------------------------- .../main/java/org/apache/tika/parser/ocr/TesseractOCRParser.java | 3 ++- .../java/org/apache/tika/parser/ocr/TesseractOCRParserTest.java | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/tika/blob/c17d1b8a/tika-parsers/src/main/java/org/apache/tika/parser/ocr/TesseractOCRParser.java ---------------------------------------------------------------------- diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/ocr/TesseractOCRParser.java b/tika-parsers/src/main/java/org/apache/tika/parser/ocr/TesseractOCRParser.java index 5066f3c..90fe18c 100644 --- a/tika-parsers/src/main/java/org/apache/tika/parser/ocr/TesseractOCRParser.java +++ b/tika-parsers/src/main/java/org/apache/tika/parser/ocr/TesseractOCRParser.java @@ -95,7 +95,8 @@ public class TesseractOCRParser extends AbstractParser { private static final Set<MediaType> SUPPORTED_TYPES = Collections.unmodifiableSet( new HashSet<MediaType>(Arrays.asList(new MediaType[] { MediaType.image("png"), MediaType.image("jpeg"), MediaType.image("tiff"), - MediaType.image("x-ms-bmp"), MediaType.image("gif") + MediaType.image("x-ms-bmp"), MediaType.image("gif"), MediaType.APPLICATION_XML.image("jp2"), + MediaType.image("jpx") }))); private static Map<String,Boolean> TESSERACT_PRESENT = new HashMap<String, Boolean>(); http://git-wip-us.apache.org/repos/asf/tika/blob/c17d1b8a/tika-parsers/src/test/java/org/apache/tika/parser/ocr/TesseractOCRParserTest.java ---------------------------------------------------------------------- diff --git a/tika-parsers/src/test/java/org/apache/tika/parser/ocr/TesseractOCRParserTest.java b/tika-parsers/src/test/java/org/apache/tika/parser/ocr/TesseractOCRParserTest.java index b81ded3..92b1560 100644 --- a/tika-parsers/src/test/java/org/apache/tika/parser/ocr/TesseractOCRParserTest.java +++ b/tika-parsers/src/test/java/org/apache/tika/parser/ocr/TesseractOCRParserTest.java @@ -91,7 +91,7 @@ public class TesseractOCRParserTest extends TikaTest { // Assuming that Tesseract is on the path, we should find 5 Parsers that support PNG. assumeTrue(canRun()); - assertEquals(5, parser.getSupportedTypes(parseContext).size()); + assertEquals(7, parser.getSupportedTypes(parseContext).size()); assertTrue(parser.getSupportedTypes(parseContext).contains(png)); // DefaultParser will now select the TesseractOCRParser.
