BROOKLYN-305: Handle invalid xml chars in attribute vals
Project: http://git-wip-us.apache.org/repos/asf/brooklyn-server/repo Commit: http://git-wip-us.apache.org/repos/asf/brooklyn-server/commit/0ef20baf Tree: http://git-wip-us.apache.org/repos/asf/brooklyn-server/tree/0ef20baf Diff: http://git-wip-us.apache.org/repos/asf/brooklyn-server/diff/0ef20baf Branch: refs/heads/master Commit: 0ef20baf2af093373b97dd0e56566089bfa53bdd Parents: 7981326 Author: Aled Sage <[email protected]> Authored: Wed Jun 22 15:09:02 2016 +0100 Committer: Aled Sage <[email protected]> Committed: Thu Jun 23 15:26:42 2016 +0100 ---------------------------------------------------------------------- .../BrooklynMementoPersisterToObjectStore.java | 4 +- .../brooklyn/util/core/xstream/XmlUtil.java | 56 +++++++ .../core/mgmt/rebind/RebindEntityTest.java | 20 +++ .../util/core/xstream/XmlSerializerTest.java | 8 +- .../brooklyn/util/core/xstream/XmlUtilTest.java | 147 +++++++++++++++++++ 5 files changed, 231 insertions(+), 4 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/brooklyn-server/blob/0ef20baf/core/src/main/java/org/apache/brooklyn/core/mgmt/persist/BrooklynMementoPersisterToObjectStore.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/brooklyn/core/mgmt/persist/BrooklynMementoPersisterToObjectStore.java b/core/src/main/java/org/apache/brooklyn/core/mgmt/persist/BrooklynMementoPersisterToObjectStore.java index e449529..cce552d 100644 --- a/core/src/main/java/org/apache/brooklyn/core/mgmt/persist/BrooklynMementoPersisterToObjectStore.java +++ b/core/src/main/java/org/apache/brooklyn/core/mgmt/persist/BrooklynMementoPersisterToObjectStore.java @@ -294,7 +294,7 @@ public class BrooklynMementoPersisterToObjectStore implements BrooklynMementoPer exceptionHandler.onLoadMementoFailed(type, "memento "+id+" read error", e); } - String xmlId = (String) XmlUtil.xpath(contents, "/"+type.toCamelCase()+"/id"); + String xmlId = (String) XmlUtil.xpathHandlingIllegalChars(contents, "/"+type.toCamelCase()+"/id"); String safeXmlId = Strings.makeValidFilename(xmlId); if (!Objects.equal(id, safeXmlId)) LOG.warn("ID mismatch on "+type.toCamelCase()+", "+id+" from path, "+safeXmlId+" from xml"); @@ -334,7 +334,7 @@ public class BrooklynMementoPersisterToObjectStore implements BrooklynMementoPer class XPathHelper { private String get(String innerPath) { - return (String) XmlUtil.xpath(contents, prefix+innerPath); + return (String) XmlUtil.xpathHandlingIllegalChars(contents, prefix+innerPath); } } XPathHelper x = new XPathHelper(); http://git-wip-us.apache.org/repos/asf/brooklyn-server/blob/0ef20baf/core/src/main/java/org/apache/brooklyn/util/core/xstream/XmlUtil.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/brooklyn/util/core/xstream/XmlUtil.java b/core/src/main/java/org/apache/brooklyn/util/core/xstream/XmlUtil.java index a969dea..9f914fa 100644 --- a/core/src/main/java/org/apache/brooklyn/util/core/xstream/XmlUtil.java +++ b/core/src/main/java/org/apache/brooklyn/util/core/xstream/XmlUtil.java @@ -32,6 +32,8 @@ import org.apache.brooklyn.util.exceptions.Exceptions; import org.w3c.dom.Document; import org.xml.sax.SAXException; +import com.google.common.annotations.Beta; + public class XmlUtil { /** @@ -73,4 +75,58 @@ public class XmlUtil { throw Exceptions.propagate(e); } } + + /** + * Executes the given xpath on the given xml. If this fails becaues the xml is invalid + * (e.g. contains ""), then it will attempt to escape such illegal characters + * and try again. Note that the *escaped* values may be contained in the returned result! + * The escaping used is the prefix "BR_UNICODE_"; if that string is already in the xml, + * then it will replace that with "NOT_BR_UNICODE_". + */ + @Beta + public static Object xpathHandlingIllegalChars(String xml, String xpath) { + try { + return xpath(xml, xpath); + } catch (Exception e) { + SAXException saxe = Exceptions.getFirstThrowableOfType(e, SAXException.class); + if (saxe != null && saxe.toString().contains("&#")) { + // Looks like illegal chars (e.g. xstream converts unicode char 27 to "", + // which is not valid in XML! Try again with an escaped xml. + Escaper escaper = new Escaper(); + String xmlCleaned = escaper.escape(xml); + try { + Object result = xpath(xmlCleaned, xpath); + if (result instanceof String) { + return escaper.unescape((String)result); + } else { + return result; + } + } catch (Exception e2) { + Exceptions.propagateIfFatal(e2); + } + } + throw e; + } + } + + /** + * Replaces things like "" with "BR_UNICODE_x1b". This is because xstream happily writes + * out such characters (which are not valid in xml), but xpath fails when parsing them. + */ + @Beta + protected static class Escaper { + + public String escape(String string) { + String unicodeRegex = "&#([x0-9a-fA-f]{1,5});"; + return string.replaceAll("BR_UNICODE_", "NOT_BR_UNICODE_") + .replaceAll(unicodeRegex, "BR_UNICODE_$1;"); + } + + public String unescape(String string) { + String unicodeRegex = "(?<!NOT_)BR_UNICODE_([x0-9a-fA-F]{1,5})"; + return string + .replaceAll(unicodeRegex, "&#$1") + .replaceAll("NOT_BR_UNICODE_", "BR_UNICODE_"); + } + } } http://git-wip-us.apache.org/repos/asf/brooklyn-server/blob/0ef20baf/core/src/test/java/org/apache/brooklyn/core/mgmt/rebind/RebindEntityTest.java ---------------------------------------------------------------------- diff --git a/core/src/test/java/org/apache/brooklyn/core/mgmt/rebind/RebindEntityTest.java b/core/src/test/java/org/apache/brooklyn/core/mgmt/rebind/RebindEntityTest.java index 2bcebf4..13697df 100644 --- a/core/src/test/java/org/apache/brooklyn/core/mgmt/rebind/RebindEntityTest.java +++ b/core/src/test/java/org/apache/brooklyn/core/mgmt/rebind/RebindEntityTest.java @@ -73,6 +73,7 @@ import org.apache.brooklyn.test.Asserts; import org.apache.brooklyn.util.collections.MutableMap; import org.apache.brooklyn.util.collections.MutableSet; import org.apache.brooklyn.util.core.flags.SetFromFlag; +import org.apache.brooklyn.util.core.xstream.XmlSerializerTest; import org.apache.brooklyn.util.exceptions.RuntimeInterruptedException; import org.apache.brooklyn.util.time.Durations; import org.testng.Assert; @@ -622,6 +623,25 @@ public class RebindEntityTest extends RebindTestFixtureWithApp { assertNull(newApp.getAttribute(MY_DYNAMIC_SENSOR)); } + /** + * @see {@link XmlSerializerTest#testIllegalXmlCharacter()} + */ + @Test + public void testRebindAttributeWithSpecialCharacters() throws Exception { + String val = "abc\u001b"; + assertEquals((int)val.charAt(3), 27); // expect that to give us unicode character 27 + + MyEntity origE = origApp.createAndManageChild(EntitySpec.create(MyEntity.class)); + origE.sensors().set(MyEntity.MY_SENSOR, val); + + assertEquals(origE.getAttribute(MyEntity.MY_SENSOR), val); + + newApp = rebind(); + MyEntity newE = (MyEntity) Iterables.find(newApp.getChildren(), Predicates.instanceOf(MyEntity.class)); + + assertEquals(newE.getAttribute(MyEntity.MY_SENSOR), val); + } + @Test public void testRebindWhenPreviousAppDestroyedHasNoApp() throws Exception { origApp.stop(); http://git-wip-us.apache.org/repos/asf/brooklyn-server/blob/0ef20baf/core/src/test/java/org/apache/brooklyn/util/core/xstream/XmlSerializerTest.java ---------------------------------------------------------------------- diff --git a/core/src/test/java/org/apache/brooklyn/util/core/xstream/XmlSerializerTest.java b/core/src/test/java/org/apache/brooklyn/util/core/xstream/XmlSerializerTest.java index f89e770..8bf0ab4 100644 --- a/core/src/test/java/org/apache/brooklyn/util/core/xstream/XmlSerializerTest.java +++ b/core/src/test/java/org/apache/brooklyn/util/core/xstream/XmlSerializerTest.java @@ -44,10 +44,14 @@ public class XmlSerializerTest { assertSerializeAndDeserialize(new StringHolder("abc")); } + /** + * See https://issues.apache.org/jira/browse/BROOKLYN-305 and http://x-stream.github.io/faq.html#XML_control_char + */ @Test public void testIllegalXmlCharacter() throws Exception { - String val = "abc\u001b"; - assertEquals((int)val.charAt(3), 27); // expect that to give us unicode character 27 + String val = "PREFIX_\u001b\u0000_SUFFIX"; + assertEquals((int)val.charAt(7), 27); // expect that to give us unicode character 27 + assertEquals((int)val.charAt(8), 0); // expect that to give us unicode character 0 assertSerializeAndDeserialize(val); assertSerializeAndDeserialize(new StringHolder(val)); } http://git-wip-us.apache.org/repos/asf/brooklyn-server/blob/0ef20baf/core/src/test/java/org/apache/brooklyn/util/core/xstream/XmlUtilTest.java ---------------------------------------------------------------------- diff --git a/core/src/test/java/org/apache/brooklyn/util/core/xstream/XmlUtilTest.java b/core/src/test/java/org/apache/brooklyn/util/core/xstream/XmlUtilTest.java index 65983e3..0b195b5 100644 --- a/core/src/test/java/org/apache/brooklyn/util/core/xstream/XmlUtilTest.java +++ b/core/src/test/java/org/apache/brooklyn/util/core/xstream/XmlUtilTest.java @@ -19,12 +19,35 @@ package org.apache.brooklyn.util.core.xstream; import static org.testng.Assert.assertEquals; +import static org.testng.Assert.assertFalse; +import static org.testng.Assert.assertTrue; +import java.util.List; + +import org.apache.brooklyn.util.core.xstream.XmlUtil.Escaper; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.testng.annotations.Test; +import com.google.common.annotations.Beta; +import com.google.common.base.Joiner; +import com.google.common.base.Optional; +import com.google.common.collect.Lists; public class XmlUtilTest { + /* + * For invalid unicode characters, see https://issues.apache.org/jira/browse/BROOKLYN-305 and + * https://en.wikipedia.org/wiki/Valid_characters_in_XML. + * + * If we include the xml version in the xml string, then more unicode characters are accepted. + * But if it still contains something illegal, then we can resort to calling + * xpathHandlingIllegalChars (to temporarily escape those characters + * + */ + + private static final Logger LOG = LoggerFactory.getLogger(XmlUtilTest.class); + @Test public void testXpath() throws Exception { String xml = "<a><b>myb</b></a>"; @@ -32,4 +55,128 @@ public class XmlUtilTest { assertEquals(XmlUtil.xpath(xml, "/a/b[text()]"), "myb"); } } + + @Test + public void testXpathWithEscapedCharsAndXmlVersion1_1() throws Exception { + StringBuilder xml = new StringBuilder("<?xml version=\"1.1\" encoding=\"UTF-8\"?>"+"\n"+ + "<a><b>myb</b><c>"); + for (int i = 0; i < Integer.valueOf("FFFF", 16); i++) { + if (isValidUnicodeInXml1_1(i)) { + String unicode = Integer.toHexString(i); + xml.append("&#x"+unicode+";"); + } + } + xml.append("</c></a>"); + assertEquals(XmlUtil.xpath(xml.toString(), "/a/b[text()]"), "myb"); + } + + @Test + public void testXpathWithEscapedCharsAndXmlUnversioned() throws Exception { + StringBuilder xml = new StringBuilder("<a><b>myb</b><c>"); + for (int i = 0; i < Integer.valueOf("FFFF", 16); i++) { + if (isValidUnicodeInXml1_0(i)) { + String unicode = Integer.toHexString(i); + xml.append("&#x"+unicode+";"); + } + } + xml.append("</c></a>"); + assertEquals(XmlUtil.xpath(xml.toString(), "/a/b[text()]"), "myb"); + } + + @Test + public void testXpathHandlingIllegalChars() throws Exception { + StringBuilder xml = new StringBuilder("<a><b>myb</b><c>"); + for (int i = 0; i < Integer.valueOf("FFFF", 16); i++) { + String unicode = Integer.toHexString(i); + xml.append("&#x"+unicode+";"); + } + xml.append("</c></a>"); + assertEquals(XmlUtil.xpathHandlingIllegalChars(xml.toString(), "/a/b[text()]"), "myb"); + } + + @Test + public void testEscaper() throws Exception { + // Escapes unicode char, ignoring things around it + assertEscapeAndUnescape("", "BR_UNICODE_x001b;"); + assertEscapeAndUnescape("", "BR_UNICODE_00027;"); + assertEscapeAndUnescape("", "BR_UNICODE_x1b;"); + assertEscapeAndUnescape("SUFFIX", "BR_UNICODE_x1b;SUFFIX"); + assertEscapeAndUnescape("PREFIX", "PREFIXBR_UNICODE_x1b;"); + assertEscapeAndUnescape("PREFIXSUFFIX", "PREFIXBR_UNICODE_x1b;SUFFIX"); + + // Ignores invalid unicodes + assertEscapeAndUnescape("", ""); // no ";" + assertEscapeAndUnescape("g;", "g;"); // chars out of range + assertEscapeAndUnescape("ƻ", "ƻ"); // too many chars + + // Handles strings that already contain the expected escape sequence + assertEscapeAndUnescape("BR_UNICODE_x1b;", "NOT_BR_UNICODE_x1b;"); + assertEscapeAndUnescape("NOT_BR_UNICODE_x1b;", "NOT_NOT_BR_UNICODE_x1b;"); + assertEscapeAndUnescape("BR_UNICODE_x1b;THEN", "NOT_BR_UNICODE_x1b;THENBR_UNICODE_x1b;"); + } + + protected void assertEscapeAndUnescape(String val) { + assertEscapeAndUnescape(val, Optional.<String>absent()); + } + + protected void assertEscapeAndUnescape(String val, String expectedEscaped) { + assertEscapeAndUnescape(val, Optional.of(expectedEscaped)); + } + + protected void assertEscapeAndUnescape(String val, Optional<String> expectedEscaped) { + Escaper escaper = new XmlUtil.Escaper(); + String escaped = escaper.escape(val); + if (expectedEscaped.isPresent()) { + assertEquals(escaped, expectedEscaped.get()); + } + assertEquals(escaper.unescape(escaped), val); + } + + @Beta + protected boolean isValidUnicodeInXml1_1(int c) { + int min = 0; + int max = 65535; // xFFFF + return min <= c && c <= max && + c != 0 && (c <= 55295 || c >= 57344) && c != 65534; + } + + @Beta + protected boolean isValidUnicodeInXml1_0(int c) { + // see https://www.w3.org/TR/xml/#charsets + return c == 0x9 || c == 0xA || c == 0xD || (0x20 <= c && c <= 0xD7FF) + || (0xE000 <= c && c <= 0xFFFD) || (0x10000 <= c && c <= 0x10FFFF); + } + + // For checking our assumptions about what is a valid and invalid (escaped) unicode char + // in XML version 1.0 and 1.1. + @Test(groups={"WIP"}, enabled=false) + public void testXpathWithEachEscapeCharacterAndXmlVersion() throws Exception { + List<Integer> errsInXml1_1 = Lists.newArrayList(); + List<Integer> errsInXmlUnversioned = Lists.newArrayList(); + for (int i = 0; i < Integer.valueOf("FFFF", 16); i++) { + String unicode = Integer.toHexString(i); + try { + String xml = Joiner.on("\n").join( + "<?xml version=\"1.1\" encoding=\"UTF-8\"?>", + "<a><b>myb</b><c>&#x"+unicode+";</c></a>"); + assertEquals(XmlUtil.xpath(xml, "/a/b[text()]"), "myb"); + assertTrue(isValidUnicodeInXml1_1(i), "i="+i+"; unicode="+unicode); + } catch (Throwable t) { + LOG.debug("Failed for code "+unicode+": "+t.getMessage()); + errsInXml1_1.add(Integer.valueOf(unicode, 16)); + assertFalse(isValidUnicodeInXml1_1(i), "i="+i+"; unicode="+unicode); + } + try { + String xml = "<a><b>myb</b><c>&#x"+unicode+";</c></a>"; + assertEquals(XmlUtil.xpath(xml, "/a/b[text()]"), "myb"); + assertTrue(isValidUnicodeInXml1_0(i), "i="+i+"; unicode="+unicode); + } catch (Throwable t) { + LOG.debug("Failed for code "+unicode+": "+t.getMessage()); + errsInXmlUnversioned.add(Integer.valueOf(unicode, 16)); + assertFalse(isValidUnicodeInXml1_0(i), "i="+i+"; unicode="+unicode); + } + } + LOG.info("XML version 1.1 invalidCodes="+errsInXml1_1); + LOG.info("XML unversioned invalidCodes="+errsInXmlUnversioned); + } }
