Repository: tika Updated Branches: refs/heads/2.x 1ab6c81ce -> 1ec8c0947
Tesseract may see the t in haystack as a ! some times... Project: http://git-wip-us.apache.org/repos/asf/tika/repo Commit: http://git-wip-us.apache.org/repos/asf/tika/commit/1ec8c094 Tree: http://git-wip-us.apache.org/repos/asf/tika/tree/1ec8c094 Diff: http://git-wip-us.apache.org/repos/asf/tika/diff/1ec8c094 Branch: refs/heads/2.x Commit: 1ec8c0947575729975601d543f9a5b08ca3c7269 Parents: 1ab6c81 Author: Nick Burch <[email protected]> Authored: Wed Jun 22 09:33:41 2016 +0100 Committer: Nick Burch <[email protected]> Committed: Wed Oct 5 12:08:25 2016 +0100 ---------------------------------------------------------------------- .../test/java/org/apache/tika/parser/pdf/PDFParserTest.java | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/tika/blob/1ec8c094/tika-parser-modules/tika-parser-multimedia-module/src/test/java/org/apache/tika/parser/pdf/PDFParserTest.java ---------------------------------------------------------------------- diff --git a/tika-parser-modules/tika-parser-multimedia-module/src/test/java/org/apache/tika/parser/pdf/PDFParserTest.java b/tika-parser-modules/tika-parser-multimedia-module/src/test/java/org/apache/tika/parser/pdf/PDFParserTest.java index ff74e50..e99e87b 100644 --- a/tika-parser-modules/tika-parser-multimedia-module/src/test/java/org/apache/tika/parser/pdf/PDFParserTest.java +++ b/tika-parser-modules/tika-parser-multimedia-module/src/test/java/org/apache/tika/parser/pdf/PDFParserTest.java @@ -1188,7 +1188,13 @@ public class PDFParserTest extends TikaTest { assertContains("Haystack", xmlResult.xml); assertContains("Needle", xmlResult.xml); if (! strategy.equals(PDFParserConfig.OCR_STRATEGY.NO_OCR)) { - assertContains("<div class=\"ocr\">pdf_haystack", xmlResult.xml); + // Tesseract may see the t in haystack as a ! some times... + String div = "<div class=\"ocr\">pdf_hays"; + if (xmlResult.xml.contains(div+"!ack")) { + assertContains(div+"!ack", xmlResult.xml); + } else { + assertContains(div+"tack", xmlResult.xml); + } } else { assertNotContained("<div class=\"ocr\">pdf_haystack", xmlResult.xml); }
