Repository: tika
Updated Branches:
  refs/heads/2.x 1ab6c81ce -> 1ec8c0947


Tesseract may see the t in haystack as a ! some times...


Project: http://git-wip-us.apache.org/repos/asf/tika/repo
Commit: http://git-wip-us.apache.org/repos/asf/tika/commit/1ec8c094
Tree: http://git-wip-us.apache.org/repos/asf/tika/tree/1ec8c094
Diff: http://git-wip-us.apache.org/repos/asf/tika/diff/1ec8c094

Branch: refs/heads/2.x
Commit: 1ec8c0947575729975601d543f9a5b08ca3c7269
Parents: 1ab6c81
Author: Nick Burch <[email protected]>
Authored: Wed Jun 22 09:33:41 2016 +0100
Committer: Nick Burch <[email protected]>
Committed: Wed Oct 5 12:08:25 2016 +0100

----------------------------------------------------------------------
 .../test/java/org/apache/tika/parser/pdf/PDFParserTest.java  | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/tika/blob/1ec8c094/tika-parser-modules/tika-parser-multimedia-module/src/test/java/org/apache/tika/parser/pdf/PDFParserTest.java
----------------------------------------------------------------------
diff --git 
a/tika-parser-modules/tika-parser-multimedia-module/src/test/java/org/apache/tika/parser/pdf/PDFParserTest.java
 
b/tika-parser-modules/tika-parser-multimedia-module/src/test/java/org/apache/tika/parser/pdf/PDFParserTest.java
index ff74e50..e99e87b 100644
--- 
a/tika-parser-modules/tika-parser-multimedia-module/src/test/java/org/apache/tika/parser/pdf/PDFParserTest.java
+++ 
b/tika-parser-modules/tika-parser-multimedia-module/src/test/java/org/apache/tika/parser/pdf/PDFParserTest.java
@@ -1188,7 +1188,13 @@ public class PDFParserTest extends TikaTest {
             assertContains("Haystack", xmlResult.xml);
             assertContains("Needle", xmlResult.xml);
             if (! strategy.equals(PDFParserConfig.OCR_STRATEGY.NO_OCR)) {
-                assertContains("<div class=\"ocr\">pdf_haystack", 
xmlResult.xml);
+                // Tesseract may see the t in haystack as a ! some times...
+                String div = "<div class=\"ocr\">pdf_hays";
+                if (xmlResult.xml.contains(div+"!ack")) {
+                   assertContains(div+"!ack", xmlResult.xml);
+                } else {
+                   assertContains(div+"tack", xmlResult.xml);
+                }
             } else {
                 assertNotContained("<div class=\"ocr\">pdf_haystack", 
xmlResult.xml);
             }

Reply via email to