Repository: tika
Updated Branches:
  refs/heads/master ade60ed62 -> a46ffacf1


Tesseract may see the t in haystack as a ! some times...


Project: http://git-wip-us.apache.org/repos/asf/tika/repo
Commit: http://git-wip-us.apache.org/repos/asf/tika/commit/51010235
Tree: http://git-wip-us.apache.org/repos/asf/tika/tree/51010235
Diff: http://git-wip-us.apache.org/repos/asf/tika/diff/51010235

Branch: refs/heads/master
Commit: 5101023503c892d4af49d33c85a13b99f85bc0b9
Parents: ade60ed
Author: Nick Burch <[email protected]>
Authored: Wed Jun 22 09:33:41 2016 +0100
Committer: Nick Burch <[email protected]>
Committed: Wed Jun 22 09:33:41 2016 +0100

----------------------------------------------------------------------
 .../test/java/org/apache/tika/parser/pdf/PDFParserTest.java  | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/tika/blob/51010235/tika-parsers/src/test/java/org/apache/tika/parser/pdf/PDFParserTest.java
----------------------------------------------------------------------
diff --git 
a/tika-parsers/src/test/java/org/apache/tika/parser/pdf/PDFParserTest.java 
b/tika-parsers/src/test/java/org/apache/tika/parser/pdf/PDFParserTest.java
index df2e27c..58c23b8 100644
--- a/tika-parsers/src/test/java/org/apache/tika/parser/pdf/PDFParserTest.java
+++ b/tika-parsers/src/test/java/org/apache/tika/parser/pdf/PDFParserTest.java
@@ -1203,7 +1203,13 @@ public class PDFParserTest extends TikaTest {
             assertContains("Haystack", xmlResult.xml);
             assertContains("Needle", xmlResult.xml);
             if (! strategy.equals(PDFParserConfig.OCR_STRATEGY.NO_OCR)) {
-                assertContains("<div class=\"ocr\">pdf_haystack", 
xmlResult.xml);
+                // Tesseract may see the t in haystack as a ! some times...
+                String div = "<div class=\"ocr\">pdf_hays";
+                if (xmlResult.xml.contains(div+"!ack")) {
+                   assertContains(div+"!ack", xmlResult.xml);
+                } else {
+                   assertContains(div+"tack", xmlResult.xml);
+                }
             } else {
                 assertNotContained("<div class=\"ocr\">pdf_haystack", 
xmlResult.xml);
             }

Reply via email to