Repository: tika
Updated Branches:
  refs/heads/master 91cdce43d -> 1aff6380d


TIKA-2174 -- add .ppm to tesseract


Project: http://git-wip-us.apache.org/repos/asf/tika/repo
Commit: http://git-wip-us.apache.org/repos/asf/tika/commit/1aff6380
Tree: http://git-wip-us.apache.org/repos/asf/tika/tree/1aff6380
Diff: http://git-wip-us.apache.org/repos/asf/tika/diff/1aff6380

Branch: refs/heads/master
Commit: 1aff6380d46b9104835909c31e7f2f36f621eca0
Parents: 91cdce4
Author: tballison <[email protected]>
Authored: Thu Nov 10 08:03:29 2016 -0500
Committer: tballison <[email protected]>
Committed: Thu Nov 10 08:03:29 2016 -0500

----------------------------------------------------------------------
 CHANGES.txt                                                     | 5 +++++
 .../java/org/apache/tika/parser/ocr/TesseractOCRParser.java     | 2 +-
 .../java/org/apache/tika/parser/ocr/TesseractOCRParserTest.java | 2 ++
 3 files changed, 8 insertions(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/tika/blob/1aff6380/CHANGES.txt
----------------------------------------------------------------------
diff --git a/CHANGES.txt b/CHANGES.txt
index 6cd2de8..9e62fc7 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -1,7 +1,12 @@
 Release 1.15 - ??
 
+  * Add extraction of .jpx inline images from PDFs (TIKA-2175).
+
+  * Add .jpx, .jp2, .ppm to formats handled by Tesseract (TIKA-2174).
+
   * Upgrade SQLite "provided" dependency to 3.15.1.
 
+
 Release 1.14 - 10/19/2016
 
   * Extract all headers from MSG/RFC822 (TIKA-2122).

http://git-wip-us.apache.org/repos/asf/tika/blob/1aff6380/tika-parsers/src/main/java/org/apache/tika/parser/ocr/TesseractOCRParser.java
----------------------------------------------------------------------
diff --git 
a/tika-parsers/src/main/java/org/apache/tika/parser/ocr/TesseractOCRParser.java 
b/tika-parsers/src/main/java/org/apache/tika/parser/ocr/TesseractOCRParser.java
index 90fe18c..ff9a755 100644
--- 
a/tika-parsers/src/main/java/org/apache/tika/parser/ocr/TesseractOCRParser.java
+++ 
b/tika-parsers/src/main/java/org/apache/tika/parser/ocr/TesseractOCRParser.java
@@ -96,7 +96,7 @@ public class TesseractOCRParser extends AbstractParser {
             new HashSet<MediaType>(Arrays.asList(new MediaType[] {
                     MediaType.image("png"), MediaType.image("jpeg"), 
MediaType.image("tiff"),
                     MediaType.image("x-ms-bmp"), MediaType.image("gif"), 
MediaType.APPLICATION_XML.image("jp2"),
-                    MediaType.image("jpx")
+                    MediaType.image("jpx"), 
MediaType.image("x-portable-pixmap")
             })));
     private static Map<String,Boolean> TESSERACT_PRESENT = new HashMap<String, 
Boolean>();
 

http://git-wip-us.apache.org/repos/asf/tika/blob/1aff6380/tika-parsers/src/test/java/org/apache/tika/parser/ocr/TesseractOCRParserTest.java
----------------------------------------------------------------------
diff --git 
a/tika-parsers/src/test/java/org/apache/tika/parser/ocr/TesseractOCRParserTest.java
 
b/tika-parsers/src/test/java/org/apache/tika/parser/ocr/TesseractOCRParserTest.java
index 92b1560..7607427 100644
--- 
a/tika-parsers/src/test/java/org/apache/tika/parser/ocr/TesseractOCRParserTest.java
+++ 
b/tika-parsers/src/test/java/org/apache/tika/parser/ocr/TesseractOCRParserTest.java
@@ -244,4 +244,6 @@ public class TesseractOCRParserTest extends TikaTest {
         assertEquals("75", m.get(Metadata.IMAGE_LENGTH));
         assertEquals("72 dots per inch", m.get("Y Resolution"));
     }
+
+    //TODO: add unit tests for jp2/jpx/ppm TIKA-2174
 }

Reply via email to