This is an automated email from the ASF dual-hosted git repository. tallison pushed a commit to branch branch_1x in repository https://gitbox.apache.org/repos/asf/tika.git
commit a138a7968f42e1c70df5b3ce8e4ba50688c9ad4a Author: tallison <[email protected]> AuthorDate: Mon Dec 16 14:14:13 2019 -0500 TIKA-3012 -- prevent the RFC822Parser from calling endDocument() when the document has not yet been ended. --- .../java/org/apache/tika/parser/mail/MailContentHandler.java | 10 ---------- .../main/java/org/apache/tika/parser/mail/RFC822Parser.java | 3 ++- .../java/org/apache/tika/parser/mail/RFC822ParserTest.java | 8 ++++++++ 3 files changed, 10 insertions(+), 11 deletions(-) diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/mail/MailContentHandler.java b/tika-parsers/src/main/java/org/apache/tika/parser/mail/MailContentHandler.java index 6bee05c..61607a2 100644 --- a/tika-parsers/src/main/java/org/apache/tika/parser/mail/MailContentHandler.java +++ b/tika-parsers/src/main/java/org/apache/tika/parser/mail/MailContentHandler.java @@ -310,20 +310,10 @@ class MailContentHandler implements ContentHandler { @Override public void startMessage() throws MimeException { - try { - handler.startDocument(); - } catch (SAXException e) { - throw new MimeException(e); - } } @Override public void endMessage() throws MimeException { - try { - handler.endDocument(); - } catch (SAXException e) { - throw new MimeException(e); - } } @Override diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/mail/RFC822Parser.java b/tika-parsers/src/main/java/org/apache/tika/parser/mail/RFC822Parser.java index ffc4d26..149b330 100644 --- a/tika-parsers/src/main/java/org/apache/tika/parser/mail/RFC822Parser.java +++ b/tika-parsers/src/main/java/org/apache/tika/parser/mail/RFC822Parser.java @@ -94,7 +94,7 @@ public class RFC822Parser extends AbstractParser { extractAllAlternatives); parser.setContentHandler(mch); parser.setContentDecoding(true); - + xhtml.startDocument(); TikaInputStream tstream = TikaInputStream.get(stream); try { parser.parse(tstream); @@ -112,6 +112,7 @@ public class RFC822Parser extends AbstractParser { throw new TikaException("Failed to parse an email message", e); } } + xhtml.endDocument(); } /** diff --git a/tika-parsers/src/test/java/org/apache/tika/parser/mail/RFC822ParserTest.java b/tika-parsers/src/test/java/org/apache/tika/parser/mail/RFC822ParserTest.java index 66a5309..411c608 100644 --- a/tika-parsers/src/test/java/org/apache/tika/parser/mail/RFC822ParserTest.java +++ b/tika-parsers/src/test/java/org/apache/tika/parser/mail/RFC822ParserTest.java @@ -700,4 +700,12 @@ public class RFC822ParserTest extends TikaTest { assertEquals(1, metadataList.size()); assertContains("asked", metadataList.get(0).get(RecursiveParserWrapperHandler.TIKA_CONTENT)); } + + @Test + public void testGroupwise() throws Exception { + //TODO -- this should treat attachments as attachments, no? + List<Metadata> metadataList = getRecursiveMetadata("testGroupWiseEml.eml"); + assertEquals(1, metadataList.size()); + assertContains("ssssss", metadataList.get(0).get(AbstractRecursiveParserWrapperHandler.TIKA_CONTENT)); + } }
