This is an automated email from the ASF dual-hosted git repository. tallison pushed a commit to branch branch_1x in repository https://gitbox.apache.org/repos/asf/tika.git
commit 242083967621d0c2dda2dd2fed89799153f9cb8f Author: tallison <[email protected]> AuthorDate: Mon Dec 16 16:52:24 2019 -0500 TIKA-3016 -- fix OldExcelParser to work with the ToXMLHandler --- .../java/org/apache/tika/parser/microsoft/OldExcelParser.java | 7 ++----- .../java/org/apache/tika/parser/microsoft/OldExcelParserTest.java | 8 ++++++++ 2 files changed, 10 insertions(+), 5 deletions(-) diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/OldExcelParser.java b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/OldExcelParser.java index 446eea9..207c28d 100644 --- a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/OldExcelParser.java +++ b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/OldExcelParser.java @@ -55,10 +55,7 @@ public class OldExcelParser extends AbstractParser { XHTMLContentHandler xhtml) throws TikaException, IOException, SAXException { // Get the whole text, as a single string String text = extractor.getText(); - // Split and output - xhtml.startDocument(); - String line; BufferedReader reader = new BufferedReader(new StringReader(text)); while ((line = reader.readLine()) != null) { @@ -66,8 +63,6 @@ public class OldExcelParser extends AbstractParser { xhtml.characters(line); xhtml.endElement("p"); } - - xhtml.endDocument(); } public Set<MediaType> getSupportedTypes(ParseContext context) { @@ -92,6 +87,8 @@ public class OldExcelParser extends AbstractParser { // Have the text extracted and given to our Content Handler XHTMLContentHandler xhtml = new XHTMLContentHandler(handler, metadata); + xhtml.startDocument(); parse(extractor, xhtml); + xhtml.endDocument(); } } diff --git a/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/OldExcelParserTest.java b/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/OldExcelParserTest.java index fcf601c..36c1dfe 100644 --- a/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/OldExcelParserTest.java +++ b/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/OldExcelParserTest.java @@ -111,4 +111,12 @@ public class OldExcelParserTest extends TikaTest { assertContains("<p>(1)</p>", xml); assertContains("<p>5.0</p>", xml); } + + + @Test + public void testToXMLInOldExcelParser() throws Exception { + String xml = getXML("testEXCEL_5.xls").xml; + assertContains("Written and saved in Microsoft Excel X for Mac Service Release 1", + xml); + } }
