Author: mrglavas
Date: Tue Nov 11 20:15:46 2014
New Revision: 1638344
URL: http://svn.apache.org/r1638344
Log:
Fixing JIRA Issue #1651: https://issues.apache.org/jira/browse/XERCESJ-1651. \w
should be processed as [#x0000-#x10FFFF]-[\p{P}\p{Z}\p{C}] as per the XML
Schema specification.
Modified:
xerces/java/trunk/src/org/apache/xerces/impl/xpath/regex/ParserForXMLSchema.java
xerces/java/trunk/src/org/apache/xerces/impl/xpath/regex/Token.java
Modified:
xerces/java/trunk/src/org/apache/xerces/impl/xpath/regex/ParserForXMLSchema.java
URL:
http://svn.apache.org/viewvc/xerces/java/trunk/src/org/apache/xerces/impl/xpath/regex/ParserForXMLSchema.java?rev=1638344&r1=1638343&r2=1638344&view=diff
==============================================================================
---
xerces/java/trunk/src/org/apache/xerces/impl/xpath/regex/ParserForXMLSchema.java
(original)
+++
xerces/java/trunk/src/org/apache/xerces/impl/xpath/regex/ParserForXMLSchema.java
Tue Nov 11 20:15:46 2014
@@ -384,11 +384,16 @@ class ParserForXMLSchema extends RegexPa
ranges.put("xml:isDigit", tok);
ranges2.put("xml:isDigit", Token.complementRanges(tok));
+ /*
+ * \w is defined by the XML Schema specification to be:
+ * [#x0000-#x10FFFF]-[\p{P}\p{Z}\p{C}] (all characters except the
set of "punctuation", "separator" and "other" characters)
+ */
tok = Token.createRange();
- setupRange(tok, LETTERS);
- tok.mergeRanges((Token)ranges.get("xml:isDigit"));
- ranges.put("xml:isWord", tok);
- ranges2.put("xml:isWord", Token.complementRanges(tok));
+ tok.mergeRanges(Token.getRange("P", true));
+ tok.mergeRanges(Token.getRange("Z", true));
+ tok.mergeRanges(Token.getRange("C", true));
+ ranges2.put("xml:isWord", tok);
+ ranges.put("xml:isWord", Token.complementRanges(tok));
tok = Token.createRange();
setupRange(tok, NAMECHARS);
Modified: xerces/java/trunk/src/org/apache/xerces/impl/xpath/regex/Token.java
URL:
http://svn.apache.org/viewvc/xerces/java/trunk/src/org/apache/xerces/impl/xpath/regex/Token.java?rev=1638344&r1=1638343&r2=1638344&view=diff
==============================================================================
--- xerces/java/trunk/src/org/apache/xerces/impl/xpath/regex/Token.java
(original)
+++ xerces/java/trunk/src/org/apache/xerces/impl/xpath/regex/Token.java Tue Nov
11 20:15:46 2014
@@ -803,6 +803,7 @@ class Token implements java.io.Serializa
ranges[type].addRange(i, i);
} // for all characters
ranges[Character.UNASSIGNED].addRange(0x10000,
Token.UTF16_MAX);
+ ranges[CHAR_OTHER].addRange(0x10000, Token.UTF16_MAX);
for (int i = 0; i < ranges.length; i ++) {
if (Token.categoryNames[i] != null) {
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]