This is an automated email from the ASF dual-hosted git repository. tallison pushed a commit to branch branch_1x in repository https://gitbox.apache.org/repos/asf/tika.git
commit 8e45fb073f79f8a62c4b5dbd608869db7c2b83a3 Author: tallison <[email protected]> AuthorDate: Mon Dec 16 16:44:19 2019 -0500 TIKA-3015 -- TNEFParser should call startDocument and endDocument() --- .../main/java/org/apache/tika/parser/microsoft/TNEFParser.java | 9 ++++++--- .../java/org/apache/tika/parser/microsoft/TNEFParserTest.java | 6 ++++++ 2 files changed, 12 insertions(+), 3 deletions(-) diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/TNEFParser.java b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/TNEFParser.java index 484f0c5..fa3120e 100644 --- a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/TNEFParser.java +++ b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/TNEFParser.java @@ -38,6 +38,7 @@ import org.apache.tika.mime.MediaType; import org.apache.tika.parser.AbstractParser; import org.apache.tika.parser.ParseContext; import org.apache.tika.sax.EmbeddedContentHandler; +import org.apache.tika.sax.XHTMLContentHandler; import org.xml.sax.ContentHandler; import org.xml.sax.SAXException; @@ -80,7 +81,8 @@ public class TNEFParser extends AbstractParser { // TODO: Move to title in Tika 2.0 metadata.set(TikaCoreProperties.TRANSITION_SUBJECT_TO_DC_TITLE, subject); } - + XHTMLContentHandler xhtml = new XHTMLContentHandler(handler, metadata); + xhtml.startDocument(); // Recurse into the message body RTF MAPIAttribute attr = msg.getMessageMAPIAttribute(MAPIProperty.RTF_COMPRESSED); if (attr != null && attr instanceof MAPIRtfAttribute) { @@ -88,7 +90,7 @@ public class TNEFParser extends AbstractParser { handleEmbedded( "message.rtf", "application/rtf", rtf.getData(), - embeddedExtractor, handler + embeddedExtractor, xhtml ); } @@ -106,9 +108,10 @@ public class TNEFParser extends AbstractParser { } handleEmbedded( name, null, attachment.getContents(), - embeddedExtractor, handler + embeddedExtractor, xhtml ); } + xhtml.endDocument(); } private void handleEmbedded(String name, String type, byte[] contents, diff --git a/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/TNEFParserTest.java b/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/TNEFParserTest.java index 8062555..39e5e31 100644 --- a/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/TNEFParserTest.java +++ b/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/TNEFParserTest.java @@ -95,4 +95,10 @@ public class TNEFParserTest extends AbstractPOIContainerExtractionTest { assertEquals("quick.xml", handler.filenames.get(5)); assertEquals(MediaType.application("xml"), handler.mediaTypes.get(5)); } + + @Test + public void testRTF() throws Exception { + String xml = getXML("testWINMAIL.dat").xml; + assertContains("Der schnelle braune", xml); + } }
