Repository: tika Updated Branches: refs/heads/2.x ab009aeb7 -> 3f24e6c3e
TIKA-2174 -- add ppm and update changes.txt Project: http://git-wip-us.apache.org/repos/asf/tika/repo Commit: http://git-wip-us.apache.org/repos/asf/tika/commit/3f24e6c3 Tree: http://git-wip-us.apache.org/repos/asf/tika/tree/3f24e6c3 Diff: http://git-wip-us.apache.org/repos/asf/tika/diff/3f24e6c3 Branch: refs/heads/2.x Commit: 3f24e6c3e2514a7be2d966305c53a3da0f397ef9 Parents: ab009ae Author: tballison <[email protected]> Authored: Thu Nov 10 08:06:13 2016 -0500 Committer: tballison <[email protected]> Committed: Thu Nov 10 08:06:13 2016 -0500 ---------------------------------------------------------------------- CHANGES.txt | 11 ++++++++--- .../org/apache/tika/parser/ocr/TesseractOCRParser.java | 2 +- .../apache/tika/parser/ocr/TesseractOCRParserTest.java | 2 ++ 3 files changed, 11 insertions(+), 4 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/tika/blob/3f24e6c3/CHANGES.txt ---------------------------------------------------------------------- diff --git a/CHANGES.txt b/CHANGES.txt index 8e4fc56..e062673 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -15,10 +15,15 @@ Release 2.0 - ??? parser classes and their dependencies. To keep the old behaviour, set your LoadErrorHandler to IGNORE. (TIKA-1805) -Release 1.15 - * Upgrade provided SQLite parser to 3.15.1 (TIKA-2171). +Release 1.15 -??? -Release 1.14 - ??? + * Add extraction of .jpx inline images from PDFs (TIKA-2175). + + * Add .jpx, .jp2, .ppm to formats handled by Tesseract (TIKA-2174). + + * Upgrade provided SQLite parser to 3.15.1 (TIKA-2171). + +Release 1.14 - 11/9/2016 * Extract all headers from MSG/RFC822 (TIKA-2122). http://git-wip-us.apache.org/repos/asf/tika/blob/3f24e6c3/tika-parser-modules/tika-parser-multimedia-module/src/main/java/org/apache/tika/parser/ocr/TesseractOCRParser.java ---------------------------------------------------------------------- diff --git a/tika-parser-modules/tika-parser-multimedia-module/src/main/java/org/apache/tika/parser/ocr/TesseractOCRParser.java b/tika-parser-modules/tika-parser-multimedia-module/src/main/java/org/apache/tika/parser/ocr/TesseractOCRParser.java index a83d419..031e8b9 100644 --- a/tika-parser-modules/tika-parser-multimedia-module/src/main/java/org/apache/tika/parser/ocr/TesseractOCRParser.java +++ b/tika-parser-modules/tika-parser-multimedia-module/src/main/java/org/apache/tika/parser/ocr/TesseractOCRParser.java @@ -97,7 +97,7 @@ public class TesseractOCRParser extends AbstractParser { MediaType.image("png"), MediaType.image("jpeg"), MediaType.image("tiff"), MediaType.image("x-ms-bmp"), MediaType.image("gif"), MediaType.APPLICATION_XML.image("jp2"), - MediaType.image("jpx") + MediaType.image("jpx"), MediaType.image("x-portable-pixmap") }))); private static Map<String,Boolean> TESSERACT_PRESENT = new HashMap<String, Boolean>(); http://git-wip-us.apache.org/repos/asf/tika/blob/3f24e6c3/tika-parser-modules/tika-parser-multimedia-module/src/test/java/org/apache/tika/parser/ocr/TesseractOCRParserTest.java ---------------------------------------------------------------------- diff --git a/tika-parser-modules/tika-parser-multimedia-module/src/test/java/org/apache/tika/parser/ocr/TesseractOCRParserTest.java b/tika-parser-modules/tika-parser-multimedia-module/src/test/java/org/apache/tika/parser/ocr/TesseractOCRParserTest.java index 8d7e9a9..ce5531d 100644 --- a/tika-parser-modules/tika-parser-multimedia-module/src/test/java/org/apache/tika/parser/ocr/TesseractOCRParserTest.java +++ b/tika-parser-modules/tika-parser-multimedia-module/src/test/java/org/apache/tika/parser/ocr/TesseractOCRParserTest.java @@ -244,4 +244,6 @@ public class TesseractOCRParserTest extends TikaTest { assertEquals("75", m.get(Metadata.IMAGE_LENGTH)); assertEquals("72 dots per inch", m.get("Y Resolution")); } + + //TODO: add unit tests for jp2/jpx and ppm }
