There is a bug in XMLParser when parsing consecutive numeric character
references because UNESCAPE_UNICODE_PATTERN greedily matches.
We found this when exchanging JSON snippets over chat. The chat client was
turning the preceding whitespace in a JSON snippet into
     and this causes XMLParser to throw an exception trying
to parse it as one character instead of as four characters.
Here is a patch. (I'm not sure if this is the best approach. Maybe it would be
better to make the whole expression lazy?):
diff --git a/nbxml/src/main/java/org/apache/vysper/xml/sax/impl/XMLParser.java
b/nbxml/src/main/java/org/apache/vysper/xml/sax/impl/XML
index 96ca040..6be4bb4 100644
--- a/nbxml/src/main/java/org/apache/vysper/xml/sax/impl/XMLParser.java
+++ b/nbxml/src/main/java/org/apache/vysper/xml/sax/impl/XMLParser.java
@@ -54,7 +54,7 @@ public class XMLParser implements TokenListener {
public static final Pattern NAME_PREFIX_PATTERN = Pattern.compile("^xml",
Pattern.CASE_INSENSITIVE);
- public static final Pattern UNESCAPE_UNICODE_PATTERN =
Pattern.compile("\\&\\#(x?)(.+);");
+ public static final Pattern UNESCAPE_UNICODE_PATTERN =
Pattern.compile("\\&\\#(x?)([0-9a-fA-F]++);");
private ContentHandler contentHandler;
diff --git
a/nbxml/src/test/java/org/apache/vysper/xml/sax/impl/ParseTextTestCase.java
b/nbxml/src/test/java/org/apache/vysper/xml/sax/
index 5305a9b..2f3112e 100644
--- a/nbxml/src/test/java/org/apache/vysper/xml/sax/impl/ParseTextTestCase.java
+++ b/nbxml/src/test/java/org/apache/vysper/xml/sax/impl/ParseTextTestCase.java
@@ -88,6 +88,18 @@ public class ParseTextTestCase extends
AbstractAsyncXMLReaderTestCase {
assertFalse(events.hasNext());
}
+ public void testConsecutiveUnicodeEscape() throws Exception {
+ Iterator<TestEvent> events =
parse("<root>    </root>").iterator();
+
+ assertStartDocument(events.next());
+ assertStartElement("", "root", "root", events.next());
+ assertText("\u00A0\u00A0\u00A0\u00A0", events.next());
+ assertEndElement("", "root", "root", events.next());
+ assertEndDocument(events.next());
+
+ assertFalse(events.hasNext());
+ }
+
public void testTextOnly() throws Exception {
Iterator<TestEvent> events = parse("text</root>").iterator();