RegexParser.java

knoaman Thu, 12 Nov 2009 07:06:55 -0800

Author: knoaman
Date: Thu Nov 12 15:06:18 2009
New Revision: 835408

URL: http://svn.apache.org/viewvc?rev=835408&view=rev
Log:
Allow character class subtraction in the normal mode


Modified:
    
xerces/java/branches/xml-schema-1.1-dev/src/org/apache/xerces/impl/xpath/regex/RegexParser.java

Modified: 
xerces/java/branches/xml-schema-1.1-dev/src/org/apache/xerces/impl/xpath/regex/RegexParser.java
URL: 
http://svn.apache.org/viewvc/xerces/java/branches/xml-schema-1.1-dev/src/org/apache/xerces/impl/xpath/regex/RegexParser.java?rev=835408&r1=835407&r2=835408&view=diff
==============================================================================
--- 
xerces/java/branches/xml-schema-1.1-dev/src/org/apache/xerces/impl/xpath/regex/RegexParser.java
 (original)
+++ 
xerces/java/branches/xml-schema-1.1-dev/src/org/apache/xerces/impl/xpath/regex/RegexParser.java
 Thu Nov 12 15:06:18 2009
@@ -177,8 +177,9 @@
                 break;
 
               case '-':
-                if (this.isSet(RegularExpression.XMLSCHEMA_MODE)
-                    && this.offset < this.regexlen && 
this.regex.charAt(this.offset) == '[') {
+                // Allow character class subtraction (regardless of whether we 
are in
+                // XML Schema mode or not)
+                if (this.offset < this.regexlen && 
this.regex.charAt(this.offset) == '[') {
                     this.offset++;
                     ret = T_XMLSCHEMA_CC_SUBTRACTION;
                 } else
@@ -887,7 +888,6 @@
         while ((type = this.read()) != T_EOF) {
             if (type == T_CHAR && this.chardata == ']' && !firstloop)
                 break;
-            firstloop = false;
             int c = this.chardata;
             boolean end = false;
             if (type == T_BACKSOLIDUS) {
@@ -937,6 +937,24 @@
                     throw this.ex("parser.cc.1", nameend);
                 this.offset = nameend+2;
             }
+            else if (type == T_XMLSCHEMA_CC_SUBTRACTION && !firstloop) {
+                if (nrange) {
+                    nrange = false;
+                    if (useNrange) {
+                        tok = (RangeToken) Token.complementRanges(tok);
+                    }
+                    else {
+                        base.subtractRanges(tok);
+                        tok = base;
+                    }
+                }
+                RangeToken range2 = this.parseCharacterClass(false);
+                tok.subtractRanges(range2);
+                if (this.read() != T_CHAR || this.chardata != ']') {
+                    throw this.ex("parser.cc.5", this.offset);
+                }
+                break;                          // Exit this loop
+            }
             this.next();
             if (!end) {                         // if not shorthands...
                 if (this.read() != T_CHAR || this.chardata != '-') { // Here 
is no '-'.
@@ -946,7 +964,11 @@
                     else {
                         addCaseInsensitiveChar(tok, c);
                     }
-                } else {
+                }
+                else if (type == T_XMLSCHEMA_CC_SUBTRACTION) {
+                    throw this.ex("parser.cc.8", this.offset-1);
+                }
+                else {
                     this.next(); // Skips '-'
                     if ((type = this.read()) == T_EOF)  throw 
this.ex("parser.cc.2", this.offset);
                     if (type == T_CHAR && this.chardata == ']') {
@@ -959,9 +981,13 @@
                         tok.addRange('-', '-');
                     } else {
                         int rangeend = this.chardata;
-                        if (type == T_BACKSOLIDUS)
+                        if (type == T_BACKSOLIDUS) {
                             rangeend = this.decodeEscaped();
+                        }
                         this.next();
+                        if (c > rangeend) {
+                            throw this.ex("parser.ope.3", this.offset-1);
+                        }
                         if (!this.isSet(RegularExpression.IGNORE_CASE) ||
                                 (c > 0xffff && rangeend > 0xffff)) {
                             tok.addRange(c, rangeend);
@@ -973,22 +999,21 @@
                 }
             }
             if (this.isSet(RegularExpression.SPECIAL_COMMA)
-                && this.read() == T_CHAR && this.chardata == ',')
+                && this.read() == T_CHAR && this.chardata == ',') {
                 this.next();
+            }
+            firstloop = false;
         }
-        if (this.read() == T_EOF)
+        if (this.read() == T_EOF) {
             throw this.ex("parser.cc.2", this.offset);
+        }
+        
         if (!useNrange && nrange) {
             base.subtractRanges(tok);
             tok = base;
         }
         tok.sortRanges();
         tok.compactRanges();
-        //tok.dumpRanges();
-        /*
-        if (this.isSet(RegularExpression.IGNORE_CASE))
-            tok = RangeToken.createCaseInsensitiveToken(tok);
-        */
         this.setContext(S_NORMAL);
         this.next();                    // Skips ']'
 



---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

svn commit: r835408 - /xerces/java/branches/xml-schema-1.1-dev/src/org/apache/xerces/impl/xpath/regex/RegexParser.java

Reply via email to