Repository: tika Updated Branches: refs/heads/master feac58b6f -> 5af482ef6
TIKA-2106 -- need to lowercase hocr/txt suffix; thanks to Eric Pugh. This closes #136 Project: http://git-wip-us.apache.org/repos/asf/tika/repo Commit: http://git-wip-us.apache.org/repos/asf/tika/commit/5af482ef Tree: http://git-wip-us.apache.org/repos/asf/tika/tree/5af482ef Diff: http://git-wip-us.apache.org/repos/asf/tika/diff/5af482ef Branch: refs/heads/master Commit: 5af482ef6341369d6ba30df87ca2c8be9f0d0c0f Parents: feac58b Author: tballison <[email protected]> Authored: Fri Sep 30 19:39:54 2016 -0400 Committer: tballison <[email protected]> Committed: Fri Sep 30 19:39:54 2016 -0400 ---------------------------------------------------------------------- .../main/java/org/apache/tika/parser/ocr/TesseractOCRParser.java | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/tika/blob/5af482ef/tika-parsers/src/main/java/org/apache/tika/parser/ocr/TesseractOCRParser.java ---------------------------------------------------------------------- diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/ocr/TesseractOCRParser.java b/tika-parsers/src/main/java/org/apache/tika/parser/ocr/TesseractOCRParser.java index 36c831b..5066f3c 100644 --- a/tika-parsers/src/main/java/org/apache/tika/parser/ocr/TesseractOCRParser.java +++ b/tika-parsers/src/main/java/org/apache/tika/parser/ocr/TesseractOCRParser.java @@ -357,7 +357,8 @@ public class TesseractOCRParser extends AbstractParser { doOCR(tmpFile, tmpImgFile, config); // Tesseract appends the output type (.txt or .hocr) to output file name - tmpTxtOutput = new File(tmpImgFile.getAbsolutePath() + "." + config.getOutputType()); + tmpTxtOutput = new File(tmpImgFile.getAbsolutePath() + "." + + config.getOutputType().toString().toLowerCase(Locale.US)); if (tmpTxtOutput.exists()) { try (InputStream is = new FileInputStream(tmpTxtOutput)) {
