Author: mrglavas
Date: Tue Nov 11 20:22:20 2014
New Revision: 1638353
URL: http://svn.apache.org/r1638353
Log:
Fixing JIRA Issue #1651: https://issues.apache.org/jira/browse/XERCESJ-1651. \w
should be processed as [#x0000-#x10FFFF]-[\p{P}\p{Z}\p{C}] as per the XML
Schema specification.
Modified:
xerces/java/branches/xml-schema-1.1-dev/src/org/apache/xerces/impl/xpath/regex/Token.java
xerces/java/branches/xml-schema-1.1-dev/src/org/apache/xerces/impl/xpath/regex/XML11TokenMap.java
xerces/java/branches/xml-schema-1.1-dev/src/org/apache/xerces/impl/xpath/regex/XMLTokenMap.java
Modified:
xerces/java/branches/xml-schema-1.1-dev/src/org/apache/xerces/impl/xpath/regex/Token.java
URL:
http://svn.apache.org/viewvc/xerces/java/branches/xml-schema-1.1-dev/src/org/apache/xerces/impl/xpath/regex/Token.java?rev=1638353&r1=1638352&r2=1638353&view=diff
==============================================================================
---
xerces/java/branches/xml-schema-1.1-dev/src/org/apache/xerces/impl/xpath/regex/Token.java
(original)
+++
xerces/java/branches/xml-schema-1.1-dev/src/org/apache/xerces/impl/xpath/regex/Token.java
Tue Nov 11 20:22:20 2014
@@ -807,6 +807,7 @@ class Token implements java.io.Serializa
ranges[type].addRange(i, i);
} // for all characters
ranges[Character.UNASSIGNED].addRange(0x10000,
Token.UTF16_MAX);
+ ranges[CHAR_OTHER].addRange(0x10000, Token.UTF16_MAX);
for (int i = 0; i < ranges.length; i ++) {
if (Token.categoryNames[i] != null) {
Modified:
xerces/java/branches/xml-schema-1.1-dev/src/org/apache/xerces/impl/xpath/regex/XML11TokenMap.java
URL:
http://svn.apache.org/viewvc/xerces/java/branches/xml-schema-1.1-dev/src/org/apache/xerces/impl/xpath/regex/XML11TokenMap.java?rev=1638353&r1=1638352&r2=1638353&view=diff
==============================================================================
---
xerces/java/branches/xml-schema-1.1-dev/src/org/apache/xerces/impl/xpath/regex/XML11TokenMap.java
(original)
+++
xerces/java/branches/xml-schema-1.1-dev/src/org/apache/xerces/impl/xpath/regex/XML11TokenMap.java
Tue Nov 11 20:22:20 2014
@@ -51,11 +51,16 @@ final class XML11TokenMap implements Ran
ranges.put("xml:isDigit", tok);
ranges2.put("xml:isDigit", Token.complementRanges(tok));
+ /*
+ * \w is defined by the XML Schema specification to be:
+ * [#x0000-#x10FFFF]-[\p{P}\p{Z}\p{C}] (all characters except the set
of "punctuation", "separator" and "other" characters)
+ */
tok = Token.createRange();
- REUtil.setupRange(tok, REConstants.LETTERS);
- tok.mergeRanges((Token)ranges.get("xml:isDigit"));
- ranges.put("xml:isWord", tok);
- ranges2.put("xml:isWord", Token.complementRanges(tok));
+ tok.mergeRanges(Token.getRange("P", true));
+ tok.mergeRanges(Token.getRange("Z", true));
+ tok.mergeRanges(Token.getRange("C", true));
+ ranges2.put("xml:isWord", tok);
+ ranges.put("xml:isWord", Token.complementRanges(tok));
tok = Token.createRange();
REUtil.setupRange(tok, REConstants.NAMECHARS11_INTS);
Modified:
xerces/java/branches/xml-schema-1.1-dev/src/org/apache/xerces/impl/xpath/regex/XMLTokenMap.java
URL:
http://svn.apache.org/viewvc/xerces/java/branches/xml-schema-1.1-dev/src/org/apache/xerces/impl/xpath/regex/XMLTokenMap.java?rev=1638353&r1=1638352&r2=1638353&view=diff
==============================================================================
---
xerces/java/branches/xml-schema-1.1-dev/src/org/apache/xerces/impl/xpath/regex/XMLTokenMap.java
(original)
+++
xerces/java/branches/xml-schema-1.1-dev/src/org/apache/xerces/impl/xpath/regex/XMLTokenMap.java
Tue Nov 11 20:22:20 2014
@@ -51,11 +51,16 @@ final class XMLTokenMap implements Range
ranges.put("xml:isDigit", tok);
ranges2.put("xml:isDigit", Token.complementRanges(tok));
+ /*
+ * \w is defined by the XML Schema specification to be:
+ * [#x0000-#x10FFFF]-[\p{P}\p{Z}\p{C}] (all characters except the set
of "punctuation", "separator" and "other" characters)
+ */
tok = Token.createRange();
- REUtil.setupRange(tok, REConstants.LETTERS);
- tok.mergeRanges((Token)ranges.get("xml:isDigit"));
- ranges.put("xml:isWord", tok);
- ranges2.put("xml:isWord", Token.complementRanges(tok));
+ tok.mergeRanges(Token.getRange("P", true));
+ tok.mergeRanges(Token.getRange("Z", true));
+ tok.mergeRanges(Token.getRange("C", true));
+ ranges2.put("xml:isWord", tok);
+ ranges.put("xml:isWord", Token.complementRanges(tok));
tok = Token.createRange();
REUtil.setupRange(tok, REConstants.NAMECHARS);
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]