Repository: tika
Updated Branches:
  refs/heads/master 308d26fb2 -> 0a4b0e80b


fix for TIKA-2098 contributed by alexshadow007


Project: http://git-wip-us.apache.org/repos/asf/tika/repo
Commit: http://git-wip-us.apache.org/repos/asf/tika/commit/c33ac046
Tree: http://git-wip-us.apache.org/repos/asf/tika/tree/c33ac046
Diff: http://git-wip-us.apache.org/repos/asf/tika/diff/c33ac046

Branch: refs/heads/master
Commit: c33ac04618f97c06fe4508b5d41465b2c11ba1b9
Parents: ce07d8a
Author: Alexander Kazakov <[email protected]>
Authored: Mon Sep 26 21:48:11 2016 +0300
Committer: Alexander Kazakov <[email protected]>
Committed: Mon Sep 26 21:48:11 2016 +0300

----------------------------------------------------------------------
 .../src/main/java/org/apache/tika/parser/pdf/PDF2XHTML.java | 9 ++++-----
 .../test/java/org/apache/tika/parser/pdf/PDFParserTest.java | 9 +++++++++
 2 files changed, 13 insertions(+), 5 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/tika/blob/c33ac046/tika-parsers/src/main/java/org/apache/tika/parser/pdf/PDF2XHTML.java
----------------------------------------------------------------------
diff --git 
a/tika-parsers/src/main/java/org/apache/tika/parser/pdf/PDF2XHTML.java 
b/tika-parsers/src/main/java/org/apache/tika/parser/pdf/PDF2XHTML.java
index 34a3aff..5dd0680 100644
--- a/tika-parsers/src/main/java/org/apache/tika/parser/pdf/PDF2XHTML.java
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/pdf/PDF2XHTML.java
@@ -122,6 +122,10 @@ class PDF2XHTML extends AbstractPDF2XHTML {
                 }
             });
 
+            if (pdf2XHTML.exceptions.size() > 0) {
+                //throw the first
+                throw pdf2XHTML.exceptions.get(0);
+            }
         } catch (IOException e) {
             if (e.getCause() instanceof SAXException) {
                 throw (SAXException) e.getCause();
@@ -129,11 +133,6 @@ class PDF2XHTML extends AbstractPDF2XHTML {
                 throw new TikaException("Unable to extract PDF content", e);
             }
         }
-        if (pdf2XHTML.exceptions.size() > 0) {
-            //throw the first
-            throw new TikaException("Unable to extract all PDF content",
-                    pdf2XHTML.exceptions.get(0));
-        }
     }
 
 

http://git-wip-us.apache.org/repos/asf/tika/blob/c33ac046/tika-parsers/src/test/java/org/apache/tika/parser/pdf/PDFParserTest.java
----------------------------------------------------------------------
diff --git 
a/tika-parsers/src/test/java/org/apache/tika/parser/pdf/PDFParserTest.java 
b/tika-parsers/src/test/java/org/apache/tika/parser/pdf/PDFParserTest.java
index 61b8ba2..5276f81 100644
--- a/tika-parsers/src/test/java/org/apache/tika/parser/pdf/PDFParserTest.java
+++ b/tika-parsers/src/test/java/org/apache/tika/parser/pdf/PDFParserTest.java
@@ -34,6 +34,7 @@ import org.apache.commons.io.IOUtils;
 import org.apache.log4j.Level;
 import org.apache.log4j.Logger;
 import org.apache.pdfbox.rendering.ImageType;
+import org.apache.tika.Tika;
 import org.apache.tika.TikaTest;
 import org.apache.tika.config.TikaConfig;
 import org.apache.tika.exception.AccessPermissionException;
@@ -1261,6 +1262,14 @@ public class PDFParserTest extends TikaTest {
         assertEquals("Sample Title", m.get(TikaCoreProperties.TITLE));
     }
 
+    @Test
+    public void testMaxLength() throws Exception {
+        InputStream is = getResourceAsStream("/test-documents/testPDF.pdf");
+        String content = new Tika().parseToString(is, new Metadata(), 100);
+
+        assertTrue(content.length() <= 100);
+    }
+
     private void assertException(String path, Parser parser, ParseContext 
context, Class expected) {
         boolean noEx = false;
         InputStream is = getResourceAsStream(path);

Reply via email to