Repository: tika
Updated Branches:
  refs/heads/master 7dda921de -> c17d1b8a6


TIKA-2174 add jp2 and jpx to file formats handled by TesseractOCRParser


Project: http://git-wip-us.apache.org/repos/asf/tika/repo
Commit: http://git-wip-us.apache.org/repos/asf/tika/commit/c17d1b8a
Tree: http://git-wip-us.apache.org/repos/asf/tika/tree/c17d1b8a
Diff: http://git-wip-us.apache.org/repos/asf/tika/diff/c17d1b8a

Branch: refs/heads/master
Commit: c17d1b8a6bef4409787aa2b58b96f691dfcf1170
Parents: 7dda921
Author: tballison <[email protected]>
Authored: Wed Nov 9 12:46:35 2016 -0500
Committer: tballison <[email protected]>
Committed: Wed Nov 9 12:46:35 2016 -0500

----------------------------------------------------------------------
 .../main/java/org/apache/tika/parser/ocr/TesseractOCRParser.java  | 3 ++-
 .../java/org/apache/tika/parser/ocr/TesseractOCRParserTest.java   | 2 +-
 2 files changed, 3 insertions(+), 2 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/tika/blob/c17d1b8a/tika-parsers/src/main/java/org/apache/tika/parser/ocr/TesseractOCRParser.java
----------------------------------------------------------------------
diff --git 
a/tika-parsers/src/main/java/org/apache/tika/parser/ocr/TesseractOCRParser.java 
b/tika-parsers/src/main/java/org/apache/tika/parser/ocr/TesseractOCRParser.java
index 5066f3c..90fe18c 100644
--- 
a/tika-parsers/src/main/java/org/apache/tika/parser/ocr/TesseractOCRParser.java
+++ 
b/tika-parsers/src/main/java/org/apache/tika/parser/ocr/TesseractOCRParser.java
@@ -95,7 +95,8 @@ public class TesseractOCRParser extends AbstractParser {
     private static final Set<MediaType> SUPPORTED_TYPES = 
Collections.unmodifiableSet(
             new HashSet<MediaType>(Arrays.asList(new MediaType[] {
                     MediaType.image("png"), MediaType.image("jpeg"), 
MediaType.image("tiff"),
-                    MediaType.image("x-ms-bmp"), MediaType.image("gif")
+                    MediaType.image("x-ms-bmp"), MediaType.image("gif"), 
MediaType.APPLICATION_XML.image("jp2"),
+                    MediaType.image("jpx")
             })));
     private static Map<String,Boolean> TESSERACT_PRESENT = new HashMap<String, 
Boolean>();
 

http://git-wip-us.apache.org/repos/asf/tika/blob/c17d1b8a/tika-parsers/src/test/java/org/apache/tika/parser/ocr/TesseractOCRParserTest.java
----------------------------------------------------------------------
diff --git 
a/tika-parsers/src/test/java/org/apache/tika/parser/ocr/TesseractOCRParserTest.java
 
b/tika-parsers/src/test/java/org/apache/tika/parser/ocr/TesseractOCRParserTest.java
index b81ded3..92b1560 100644
--- 
a/tika-parsers/src/test/java/org/apache/tika/parser/ocr/TesseractOCRParserTest.java
+++ 
b/tika-parsers/src/test/java/org/apache/tika/parser/ocr/TesseractOCRParserTest.java
@@ -91,7 +91,7 @@ public class TesseractOCRParserTest extends TikaTest {
         // Assuming that Tesseract is on the path, we should find 5 Parsers 
that support PNG.
         assumeTrue(canRun());
 
-        assertEquals(5, parser.getSupportedTypes(parseContext).size());
+        assertEquals(7, parser.getSupportedTypes(parseContext).size());
         assertTrue(parser.getSupportedTypes(parseContext).contains(png));
 
         // DefaultParser will now select the TesseractOCRParser.

Reply via email to