This is an automated email from the ASF dual-hosted git repository. tallison pushed a commit to branch 2.x in repository https://gitbox.apache.org/repos/asf/tika.git
commit 82509f32c30a3b7d82169d4757ca2735b656b511 Author: tballison <[email protected]> AuthorDate: Mon Feb 27 21:40:02 2017 -0500 TIKA-1857 xfa fix --- .../org/apache/tika/parser/pdf/XFAExtractor.java | 28 ++++++++++++++++++---- 1 file changed, 23 insertions(+), 5 deletions(-) diff --git a/tika-parser-modules/tika-parser-multimedia-module/src/main/java/org/apache/tika/parser/pdf/XFAExtractor.java b/tika-parser-modules/tika-parser-multimedia-module/src/main/java/org/apache/tika/parser/pdf/XFAExtractor.java index d3c34dd..d136295 100644 --- a/tika-parser-modules/tika-parser-multimedia-module/src/main/java/org/apache/tika/parser/pdf/XFAExtractor.java +++ b/tika-parser-modules/tika-parser-multimedia-module/src/main/java/org/apache/tika/parser/pdf/XFAExtractor.java @@ -218,16 +218,34 @@ class XFAExtractor { private void loadData(XMLStreamReader reader, Map<String, String> pdfObjRToValues) throws XMLStreamException { //reader is at the "xfa:data" element + //scrape the contents from the text containing nodes + StringBuilder buffer = new StringBuilder(); while (reader.hasNext()) { switch (reader.next()) { case (XMLStreamConstants.START_ELEMENT) : - if ("topmostSubform".equals(reader.getLocalName())) { - continue; - } - String value = scrapeTextUntil(reader, reader.getName()); - pdfObjRToValues.put(reader.getLocalName(), value); break; + case XMLStreamConstants.CHARACTERS: + int start = reader.getTextStart(); + int length = reader.getTextLength(); + buffer.append(reader.getTextCharacters(), + start, + length); + break; + + case XMLStreamConstants.CDATA: + start = reader.getTextStart(); + length = reader.getTextLength(); + buffer.append(reader.getTextCharacters(), + start, + length); + break; + case (XMLStreamConstants.END_ELEMENT) : + if (buffer.length() > 0) { + String localName = reader.getLocalName(); + pdfObjRToValues.put(localName, buffer.toString()); + buffer.setLength(0); + } if (XFA_DATA.equals(reader.getName())) { return; } -- To stop receiving notification emails like this one, please contact "[email protected]" <[email protected]>.
