BROOKLYN-305: Handle invalid xml chars in attribute vals

Project: http://git-wip-us.apache.org/repos/asf/brooklyn-server/repo
Commit: http://git-wip-us.apache.org/repos/asf/brooklyn-server/commit/0ef20baf
Tree: http://git-wip-us.apache.org/repos/asf/brooklyn-server/tree/0ef20baf
Diff: http://git-wip-us.apache.org/repos/asf/brooklyn-server/diff/0ef20baf

Branch: refs/heads/master
Commit: 0ef20baf2af093373b97dd0e56566089bfa53bdd
Parents: 7981326
Author: Aled Sage <[email protected]>
Authored: Wed Jun 22 15:09:02 2016 +0100
Committer: Aled Sage <[email protected]>
Committed: Thu Jun 23 15:26:42 2016 +0100

----------------------------------------------------------------------
 .../BrooklynMementoPersisterToObjectStore.java  |   4 +-
 .../brooklyn/util/core/xstream/XmlUtil.java     |  56 +++++++
 .../core/mgmt/rebind/RebindEntityTest.java      |  20 +++
 .../util/core/xstream/XmlSerializerTest.java    |   8 +-
 .../brooklyn/util/core/xstream/XmlUtilTest.java | 147 +++++++++++++++++++
 5 files changed, 231 insertions(+), 4 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/brooklyn-server/blob/0ef20baf/core/src/main/java/org/apache/brooklyn/core/mgmt/persist/BrooklynMementoPersisterToObjectStore.java
----------------------------------------------------------------------
diff --git 
a/core/src/main/java/org/apache/brooklyn/core/mgmt/persist/BrooklynMementoPersisterToObjectStore.java
 
b/core/src/main/java/org/apache/brooklyn/core/mgmt/persist/BrooklynMementoPersisterToObjectStore.java
index e449529..cce552d 100644
--- 
a/core/src/main/java/org/apache/brooklyn/core/mgmt/persist/BrooklynMementoPersisterToObjectStore.java
+++ 
b/core/src/main/java/org/apache/brooklyn/core/mgmt/persist/BrooklynMementoPersisterToObjectStore.java
@@ -294,7 +294,7 @@ public class BrooklynMementoPersisterToObjectStore 
implements BrooklynMementoPer
                     exceptionHandler.onLoadMementoFailed(type, "memento "+id+" 
read error", e);
                 }
                 
-                String xmlId = (String) XmlUtil.xpath(contents, 
"/"+type.toCamelCase()+"/id");
+                String xmlId = (String) 
XmlUtil.xpathHandlingIllegalChars(contents, "/"+type.toCamelCase()+"/id");
                 String safeXmlId = Strings.makeValidFilename(xmlId);
                 if (!Objects.equal(id, safeXmlId))
                     LOG.warn("ID mismatch on "+type.toCamelCase()+", "+id+" 
from path, "+safeXmlId+" from xml");
@@ -334,7 +334,7 @@ public class BrooklynMementoPersisterToObjectStore 
implements BrooklynMementoPer
 
                 class XPathHelper {
                     private String get(String innerPath) {
-                        return (String) XmlUtil.xpath(contents, 
prefix+innerPath);
+                        return (String) 
XmlUtil.xpathHandlingIllegalChars(contents, prefix+innerPath);
                     }
                 }
                 XPathHelper x = new XPathHelper();

http://git-wip-us.apache.org/repos/asf/brooklyn-server/blob/0ef20baf/core/src/main/java/org/apache/brooklyn/util/core/xstream/XmlUtil.java
----------------------------------------------------------------------
diff --git 
a/core/src/main/java/org/apache/brooklyn/util/core/xstream/XmlUtil.java 
b/core/src/main/java/org/apache/brooklyn/util/core/xstream/XmlUtil.java
index a969dea..9f914fa 100644
--- a/core/src/main/java/org/apache/brooklyn/util/core/xstream/XmlUtil.java
+++ b/core/src/main/java/org/apache/brooklyn/util/core/xstream/XmlUtil.java
@@ -32,6 +32,8 @@ import org.apache.brooklyn.util.exceptions.Exceptions;
 import org.w3c.dom.Document;
 import org.xml.sax.SAXException;
 
+import com.google.common.annotations.Beta;
+
 public class XmlUtil {
 
     /**
@@ -73,4 +75,58 @@ public class XmlUtil {
             throw Exceptions.propagate(e);
         }
     }
+    
+    /**
+     * Executes the given xpath on the given xml. If this fails becaues the 
xml is invalid
+     * (e.g. contains "&#x1b;"), then it will attempt to escape such illegal 
characters
+     * and try again. Note that the *escaped* values may be contained in the 
returned result!
+     * The escaping used is the prefix "BR_UNICODE_"; if that string is 
already in the xml,
+     * then it will replace that with "NOT_BR_UNICODE_".
+     */
+    @Beta
+    public static Object xpathHandlingIllegalChars(String xml, String xpath) {
+        try {
+            return xpath(xml, xpath);
+        } catch (Exception e) {
+            SAXException saxe = Exceptions.getFirstThrowableOfType(e, 
SAXException.class);
+            if (saxe != null && saxe.toString().contains("&#")) {
+                // Looks like illegal chars (e.g. xstream converts unicode 
char 27 to "&#x1b;", 
+                // which is not valid in XML! Try again with an escaped xml.
+                Escaper escaper = new Escaper();
+                String xmlCleaned = escaper.escape(xml);
+                try {
+                    Object result = xpath(xmlCleaned, xpath);
+                    if (result instanceof String) {
+                        return escaper.unescape((String)result);
+                    } else {
+                        return result;
+                    }
+                } catch (Exception e2) {
+                    Exceptions.propagateIfFatal(e2);
+                }
+            }
+            throw e;
+        }
+    }
+
+    /**
+     * Replaces things like "&#x1b;" with "BR_UNICODE_x1b". This is because 
xstream happily writes 
+     * out such characters (which are not valid in xml), but xpath fails when 
parsing them.
+     */
+    @Beta
+    protected static class Escaper {
+
+        public String escape(String string) {
+            String unicodeRegex = "&#([x0-9a-fA-f]{1,5});";
+            return string.replaceAll("BR_UNICODE_", "NOT_BR_UNICODE_")
+                    .replaceAll(unicodeRegex, "BR_UNICODE_$1;");
+        }
+        
+        public String unescape(String string) {
+            String unicodeRegex = "(?<!NOT_)BR_UNICODE_([x0-9a-fA-F]{1,5})";
+            return string
+                    .replaceAll(unicodeRegex, "&#$1")
+                    .replaceAll("NOT_BR_UNICODE_", "BR_UNICODE_");
+        }
+    }
 }

http://git-wip-us.apache.org/repos/asf/brooklyn-server/blob/0ef20baf/core/src/test/java/org/apache/brooklyn/core/mgmt/rebind/RebindEntityTest.java
----------------------------------------------------------------------
diff --git 
a/core/src/test/java/org/apache/brooklyn/core/mgmt/rebind/RebindEntityTest.java 
b/core/src/test/java/org/apache/brooklyn/core/mgmt/rebind/RebindEntityTest.java
index 2bcebf4..13697df 100644
--- 
a/core/src/test/java/org/apache/brooklyn/core/mgmt/rebind/RebindEntityTest.java
+++ 
b/core/src/test/java/org/apache/brooklyn/core/mgmt/rebind/RebindEntityTest.java
@@ -73,6 +73,7 @@ import org.apache.brooklyn.test.Asserts;
 import org.apache.brooklyn.util.collections.MutableMap;
 import org.apache.brooklyn.util.collections.MutableSet;
 import org.apache.brooklyn.util.core.flags.SetFromFlag;
+import org.apache.brooklyn.util.core.xstream.XmlSerializerTest;
 import org.apache.brooklyn.util.exceptions.RuntimeInterruptedException;
 import org.apache.brooklyn.util.time.Durations;
 import org.testng.Assert;
@@ -622,6 +623,25 @@ public class RebindEntityTest extends 
RebindTestFixtureWithApp {
         assertNull(newApp.getAttribute(MY_DYNAMIC_SENSOR));
     }
 
+    /**
+     * @see {@link XmlSerializerTest#testIllegalXmlCharacter()}
+     */
+    @Test
+    public void testRebindAttributeWithSpecialCharacters() throws Exception {
+        String val = "abc\u001b";
+        assertEquals((int)val.charAt(3), 27); // expect that to give us 
unicode character 27
+        
+        MyEntity origE = 
origApp.createAndManageChild(EntitySpec.create(MyEntity.class));
+        origE.sensors().set(MyEntity.MY_SENSOR, val);
+
+        assertEquals(origE.getAttribute(MyEntity.MY_SENSOR), val);
+
+        newApp = rebind();
+        MyEntity newE = (MyEntity) Iterables.find(newApp.getChildren(), 
Predicates.instanceOf(MyEntity.class));
+        
+        assertEquals(newE.getAttribute(MyEntity.MY_SENSOR), val);
+    }
+
     @Test
     public void testRebindWhenPreviousAppDestroyedHasNoApp() throws Exception {
         origApp.stop();

http://git-wip-us.apache.org/repos/asf/brooklyn-server/blob/0ef20baf/core/src/test/java/org/apache/brooklyn/util/core/xstream/XmlSerializerTest.java
----------------------------------------------------------------------
diff --git 
a/core/src/test/java/org/apache/brooklyn/util/core/xstream/XmlSerializerTest.java
 
b/core/src/test/java/org/apache/brooklyn/util/core/xstream/XmlSerializerTest.java
index f89e770..8bf0ab4 100644
--- 
a/core/src/test/java/org/apache/brooklyn/util/core/xstream/XmlSerializerTest.java
+++ 
b/core/src/test/java/org/apache/brooklyn/util/core/xstream/XmlSerializerTest.java
@@ -44,10 +44,14 @@ public class XmlSerializerTest {
         assertSerializeAndDeserialize(new StringHolder("abc"));
     }
 
+    /**
+     * See https://issues.apache.org/jira/browse/BROOKLYN-305 and 
http://x-stream.github.io/faq.html#XML_control_char
+     */
     @Test
     public void testIllegalXmlCharacter() throws Exception {
-        String val = "abc\u001b";
-        assertEquals((int)val.charAt(3), 27); // expect that to give us 
unicode character 27
+        String val = "PREFIX_\u001b\u0000_SUFFIX";
+        assertEquals((int)val.charAt(7), 27); // expect that to give us 
unicode character 27
+        assertEquals((int)val.charAt(8), 0); // expect that to give us unicode 
character 0
         assertSerializeAndDeserialize(val);
         assertSerializeAndDeserialize(new StringHolder(val));
     }

http://git-wip-us.apache.org/repos/asf/brooklyn-server/blob/0ef20baf/core/src/test/java/org/apache/brooklyn/util/core/xstream/XmlUtilTest.java
----------------------------------------------------------------------
diff --git 
a/core/src/test/java/org/apache/brooklyn/util/core/xstream/XmlUtilTest.java 
b/core/src/test/java/org/apache/brooklyn/util/core/xstream/XmlUtilTest.java
index 65983e3..0b195b5 100644
--- a/core/src/test/java/org/apache/brooklyn/util/core/xstream/XmlUtilTest.java
+++ b/core/src/test/java/org/apache/brooklyn/util/core/xstream/XmlUtilTest.java
@@ -19,12 +19,35 @@
 package org.apache.brooklyn.util.core.xstream;
 
 import static org.testng.Assert.assertEquals;
+import static org.testng.Assert.assertFalse;
+import static org.testng.Assert.assertTrue;
 
+import java.util.List;
+
+import org.apache.brooklyn.util.core.xstream.XmlUtil.Escaper;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
 import org.testng.annotations.Test;
 
+import com.google.common.annotations.Beta;
+import com.google.common.base.Joiner;
+import com.google.common.base.Optional;
+import com.google.common.collect.Lists;
 
 public class XmlUtilTest {
 
+    /*
+     * For invalid unicode characters, see 
https://issues.apache.org/jira/browse/BROOKLYN-305 and 
+     * https://en.wikipedia.org/wiki/Valid_characters_in_XML.
+     *
+     * If we include the xml version in the xml string, then more unicode 
characters are accepted.
+     * But if it still contains something illegal, then we can resort to 
calling
+     * xpathHandlingIllegalChars (to temporarily escape those characters
+     * 
+     */
+
+    private static final Logger LOG = 
LoggerFactory.getLogger(XmlUtilTest.class);
+    
     @Test
     public void testXpath() throws Exception {
         String xml = "<a><b>myb</b></a>";
@@ -32,4 +55,128 @@ public class XmlUtilTest {
             assertEquals(XmlUtil.xpath(xml, "/a/b[text()]"), "myb");
         }
     }
+
+    @Test
+    public void testXpathWithEscapedCharsAndXmlVersion1_1() throws Exception {
+        StringBuilder xml = new StringBuilder("<?xml version=\"1.1\" 
encoding=\"UTF-8\"?>"+"\n"+
+                "<a><b>myb</b><c>");
+        for (int i = 0; i < Integer.valueOf("FFFF", 16); i++) {
+            if (isValidUnicodeInXml1_1(i)) {
+                String unicode = Integer.toHexString(i);
+                xml.append("&#x"+unicode+";");
+            }
+        }
+        xml.append("</c></a>");
+        assertEquals(XmlUtil.xpath(xml.toString(), "/a/b[text()]"), "myb");
+    }
+
+    @Test
+    public void testXpathWithEscapedCharsAndXmlUnversioned() throws Exception {
+        StringBuilder xml = new StringBuilder("<a><b>myb</b><c>");
+        for (int i = 0; i < Integer.valueOf("FFFF", 16); i++) {
+            if (isValidUnicodeInXml1_0(i)) {
+                String unicode = Integer.toHexString(i);
+                xml.append("&#x"+unicode+";");
+            }
+        }
+        xml.append("</c></a>");
+        assertEquals(XmlUtil.xpath(xml.toString(), "/a/b[text()]"), "myb");
+    }
+
+    @Test
+    public void testXpathHandlingIllegalChars() throws Exception {
+        StringBuilder xml = new StringBuilder("<a><b>myb</b><c>");
+        for (int i = 0; i < Integer.valueOf("FFFF", 16); i++) {
+            String unicode = Integer.toHexString(i);
+            xml.append("&#x"+unicode+";");
+        }
+        xml.append("</c></a>");
+        assertEquals(XmlUtil.xpathHandlingIllegalChars(xml.toString(), 
"/a/b[text()]"), "myb");
+    }
+
+    @Test
+    public void testEscaper() throws Exception {
+        // Escapes unicode char, ignoring things around it
+        assertEscapeAndUnescape("&#x001b;", "BR_UNICODE_x001b;");
+        assertEscapeAndUnescape("&#00027;", "BR_UNICODE_00027;");
+        assertEscapeAndUnescape("&#x1b;", "BR_UNICODE_x1b;");
+        assertEscapeAndUnescape("&#x1b;SUFFIX", "BR_UNICODE_x1b;SUFFIX");
+        assertEscapeAndUnescape("PREFIX&#x1b;", "PREFIXBR_UNICODE_x1b;");
+        assertEscapeAndUnescape("PREFIX&#x1b;SUFFIX", 
"PREFIXBR_UNICODE_x1b;SUFFIX");
+
+        // Ignores invalid unicodes
+        assertEscapeAndUnescape("&#x001b", "&#x001b"); // no ";"
+        assertEscapeAndUnescape("&#x001g;", "&#x001g;"); // chars out of range
+        assertEscapeAndUnescape("&#x001bb;", "&#x001bb;"); // too many chars
+
+        // Handles strings that already contain the expected escape sequence
+        assertEscapeAndUnescape("BR_UNICODE_x1b;", "NOT_BR_UNICODE_x1b;");
+        assertEscapeAndUnescape("NOT_BR_UNICODE_x1b;", 
"NOT_NOT_BR_UNICODE_x1b;");
+        assertEscapeAndUnescape("BR_UNICODE_x1b;THEN&#x1b;", 
"NOT_BR_UNICODE_x1b;THENBR_UNICODE_x1b;");
+    }
+
+    protected void assertEscapeAndUnescape(String val) {
+        assertEscapeAndUnescape(val, Optional.<String>absent());
+    }
+
+    protected void assertEscapeAndUnescape(String val, String expectedEscaped) 
{
+        assertEscapeAndUnescape(val, Optional.of(expectedEscaped));
+    }
+    
+    protected void assertEscapeAndUnescape(String val, Optional<String> 
expectedEscaped) {
+        Escaper escaper = new XmlUtil.Escaper();
+        String escaped = escaper.escape(val);
+        if (expectedEscaped.isPresent()) {
+            assertEquals(escaped, expectedEscaped.get());
+        }
+        assertEquals(escaper.unescape(escaped), val);
+    }
+    
+    @Beta
+    protected boolean isValidUnicodeInXml1_1(int c) {
+        int min = 0;
+        int max = 65535; // xFFFF
+        return min <= c && c <= max &&
+                c != 0 && (c <= 55295 || c >= 57344) && c != 65534;
+    }
+
+    @Beta
+    protected boolean isValidUnicodeInXml1_0(int c) {
+        // see https://www.w3.org/TR/xml/#charsets
+        return c == 0x9 || c == 0xA || c == 0xD || (0x20 <= c && c <= 0xD7FF) 
+                || (0xE000 <= c && c <= 0xFFFD) || (0x10000 <= c && c <= 
0x10FFFF);
+    }
+
+    // For checking our assumptions about what is a valid and invalid 
(escaped) unicode char
+    // in XML version 1.0 and 1.1.
+    @Test(groups={"WIP"}, enabled=false)
+    public void testXpathWithEachEscapeCharacterAndXmlVersion() throws 
Exception {
+        List<Integer> errsInXml1_1 = Lists.newArrayList();
+        List<Integer> errsInXmlUnversioned = Lists.newArrayList();
+        for (int i = 0; i < Integer.valueOf("FFFF", 16); i++) {
+            String unicode = Integer.toHexString(i);
+            try {
+                String xml = Joiner.on("\n").join(
+                        "<?xml version=\"1.1\" encoding=\"UTF-8\"?>",
+                        "<a><b>myb</b><c>&#x"+unicode+";</c></a>");
+                assertEquals(XmlUtil.xpath(xml, "/a/b[text()]"), "myb");
+                assertTrue(isValidUnicodeInXml1_1(i), "i="+i+"; 
unicode="+unicode);
+            } catch (Throwable t) {
+                LOG.debug("Failed for code "+unicode+": "+t.getMessage());
+                errsInXml1_1.add(Integer.valueOf(unicode, 16));
+                assertFalse(isValidUnicodeInXml1_1(i), "i="+i+"; 
unicode="+unicode);
+            }
+            try {
+                String xml = "<a><b>myb</b><c>&#x"+unicode+";</c></a>";
+                assertEquals(XmlUtil.xpath(xml, "/a/b[text()]"), "myb");
+                assertTrue(isValidUnicodeInXml1_0(i), "i="+i+"; 
unicode="+unicode);
+            } catch (Throwable t) {
+                LOG.debug("Failed for code "+unicode+": "+t.getMessage());
+                errsInXmlUnversioned.add(Integer.valueOf(unicode, 16));
+                assertFalse(isValidUnicodeInXml1_0(i), "i="+i+"; 
unicode="+unicode);
+            }
+        }
+        LOG.info("XML version 1.1 invalidCodes="+errsInXml1_1);
+        LOG.info("XML unversioned invalidCodes="+errsInXmlUnversioned);
+    }
 }

Reply via email to