[ https://issues.apache.org/jira/browse/ARTEMIS-747?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=15515833#comment-15515833 ]
ASF GitHub Bot commented on ARTEMIS-747: ---------------------------------------- Github user TomasHofman commented on a diff in the pull request: https://github.com/apache/activemq-artemis/pull/791#discussion_r80204402 --- Diff: artemis-cli/src/main/java/org/apache/activemq/artemis/cli/commands/tools/XmlDataImporter.java --- @@ -444,33 +444,59 @@ private void processMessageBody(Message message) throws XMLStreamException, IOEx } } reader.next(); + ActiveMQServerLogger.LOGGER.debug("XMLStreamReader impl: " + reader); if (isLarge) { tempFileName = UUID.randomUUID().toString() + ".tmp"; ActiveMQServerLogger.LOGGER.debug("Creating temp file " + tempFileName + " for large message."); try (OutputStream out = new FileOutputStream(tempFileName)) { - while (reader.hasNext()) { - if (reader.getEventType() == XMLStreamConstants.END_ELEMENT) { - break; - } - else { - String characters = new String(reader.getTextCharacters(), reader.getTextStart(), reader.getTextLength()); - String trimmedCharacters = characters.trim(); - if (trimmedCharacters.length() > 0) { // this will skip "indentation" characters - byte[] data = decode(trimmedCharacters); - out.write(data); - } - } - reader.next(); - } + getMessageBodyBytes(new MessageBodyBytesProcessor() { + @Override + public void processBodyBytes(byte[] bytes) throws IOException { + out.write(bytes); + } + }); } FileInputStream fileInputStream = new FileInputStream(tempFileName); BufferedInputStream bufferedInput = new BufferedInputStream(fileInputStream); ((ClientMessage) message).setBodyInputStream(bufferedInput); } else { - reader.next(); // step past the "indentation" characters to get to the CDATA with the message body - String characters = new String(reader.getTextCharacters(), reader.getTextStart(), reader.getTextLength()); - message.getBodyBuffer().writeBytes(decode(characters.trim())); + getMessageBodyBytes(new MessageBodyBytesProcessor() { + @Override + public void processBodyBytes(byte[] bytes) throws IOException { + message.getBodyBuffer().writeBytes(bytes); + } + }); + } + } + + /** + * Message bodies are written to XML as Base64 encoded CDATA elements. Some parser implementations won't read the + * entire CDATA element at once (e.g. Woodstox) so it's possible for multiple CDATA events to be combined into a + * single Base64 encoded string. You can't decode bits and pieces of each CDATA. Each CDATA has to be decoded in + * its entirety. + * + * @param processor used to deal with the decoded CDATA elements + * @throws IOException + * @throws XMLStreamException + */ + private void getMessageBodyBytes(MessageBodyBytesProcessor processor) throws IOException, XMLStreamException { + int currentEventType; + StringBuilder cdata = new StringBuilder(); + while (reader.hasNext()) { + currentEventType = reader.getEventType(); + if (currentEventType == XMLStreamConstants.END_ELEMENT) { + break; + } + // when we hit a CHARACTERS event we know that the entire CDATA is complete so decode and pass back to the processor + else if (currentEventType == XMLStreamConstants.CHARACTERS && cdata.length() > 0) { --- End diff -- Although the problem would only occur if JDK's STAX implementation happened to fragment the the CDATA, which it (probably?) doesn't do. So it may be OK how it is. > Multiple CDATA events during import fails > ----------------------------------------- > > Key: ARTEMIS-747 > URL: https://issues.apache.org/jira/browse/ARTEMIS-747 > Project: ActiveMQ Artemis > Issue Type: Bug > Affects Versions: 1.4.0 > Reporter: Justin Bertram > Assignee: Justin Bertram > > Message bodies are written to XML as Base64 encoded CDATA elements. Some > parser implementations won't read the entire CDATA element at once (e.g. > Woodstox) so it's possible for multiple CDATA events to be combined into a > single Base64 encoded string. You can't decode bits and pieces of each > CDATA. Each CDATA has to be decoded in its entirety. The current importer > doesn't deal with this properly. -- This message was sent by Atlassian JIRA (v6.3.4#6332)