Repository: tika Updated Branches: refs/heads/master 308d26fb2 -> 0a4b0e80b
fix for TIKA-2098 contributed by alexshadow007 Project: http://git-wip-us.apache.org/repos/asf/tika/repo Commit: http://git-wip-us.apache.org/repos/asf/tika/commit/c33ac046 Tree: http://git-wip-us.apache.org/repos/asf/tika/tree/c33ac046 Diff: http://git-wip-us.apache.org/repos/asf/tika/diff/c33ac046 Branch: refs/heads/master Commit: c33ac04618f97c06fe4508b5d41465b2c11ba1b9 Parents: ce07d8a Author: Alexander Kazakov <[email protected]> Authored: Mon Sep 26 21:48:11 2016 +0300 Committer: Alexander Kazakov <[email protected]> Committed: Mon Sep 26 21:48:11 2016 +0300 ---------------------------------------------------------------------- .../src/main/java/org/apache/tika/parser/pdf/PDF2XHTML.java | 9 ++++----- .../test/java/org/apache/tika/parser/pdf/PDFParserTest.java | 9 +++++++++ 2 files changed, 13 insertions(+), 5 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/tika/blob/c33ac046/tika-parsers/src/main/java/org/apache/tika/parser/pdf/PDF2XHTML.java ---------------------------------------------------------------------- diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/pdf/PDF2XHTML.java b/tika-parsers/src/main/java/org/apache/tika/parser/pdf/PDF2XHTML.java index 34a3aff..5dd0680 100644 --- a/tika-parsers/src/main/java/org/apache/tika/parser/pdf/PDF2XHTML.java +++ b/tika-parsers/src/main/java/org/apache/tika/parser/pdf/PDF2XHTML.java @@ -122,6 +122,10 @@ class PDF2XHTML extends AbstractPDF2XHTML { } }); + if (pdf2XHTML.exceptions.size() > 0) { + //throw the first + throw pdf2XHTML.exceptions.get(0); + } } catch (IOException e) { if (e.getCause() instanceof SAXException) { throw (SAXException) e.getCause(); @@ -129,11 +133,6 @@ class PDF2XHTML extends AbstractPDF2XHTML { throw new TikaException("Unable to extract PDF content", e); } } - if (pdf2XHTML.exceptions.size() > 0) { - //throw the first - throw new TikaException("Unable to extract all PDF content", - pdf2XHTML.exceptions.get(0)); - } } http://git-wip-us.apache.org/repos/asf/tika/blob/c33ac046/tika-parsers/src/test/java/org/apache/tika/parser/pdf/PDFParserTest.java ---------------------------------------------------------------------- diff --git a/tika-parsers/src/test/java/org/apache/tika/parser/pdf/PDFParserTest.java b/tika-parsers/src/test/java/org/apache/tika/parser/pdf/PDFParserTest.java index 61b8ba2..5276f81 100644 --- a/tika-parsers/src/test/java/org/apache/tika/parser/pdf/PDFParserTest.java +++ b/tika-parsers/src/test/java/org/apache/tika/parser/pdf/PDFParserTest.java @@ -34,6 +34,7 @@ import org.apache.commons.io.IOUtils; import org.apache.log4j.Level; import org.apache.log4j.Logger; import org.apache.pdfbox.rendering.ImageType; +import org.apache.tika.Tika; import org.apache.tika.TikaTest; import org.apache.tika.config.TikaConfig; import org.apache.tika.exception.AccessPermissionException; @@ -1261,6 +1262,14 @@ public class PDFParserTest extends TikaTest { assertEquals("Sample Title", m.get(TikaCoreProperties.TITLE)); } + @Test + public void testMaxLength() throws Exception { + InputStream is = getResourceAsStream("/test-documents/testPDF.pdf"); + String content = new Tika().parseToString(is, new Metadata(), 100); + + assertTrue(content.length() <= 100); + } + private void assertException(String path, Parser parser, ParseContext context, Class expected) { boolean noEx = false; InputStream is = getResourceAsStream(path);
