Author: amassari
Date: Tue Nov  4 12:03:14 2008
New Revision: 711369

URL: http://svn.apache.org/viewvc?rev=711369&view=rev
Log:
A regular expression with a negative character class was always stored as a 
positive character class, so trying to perform a case-insensitive match of "q" 
against "[^Q]" would fail because it was immediately exploded into [a-zA-PR-Z]; 
we now preserve the negative character class

Modified:
    xerces/c/trunk/src/xercesc/util/regx/RegxParser.cpp
    xerces/c/trunk/src/xercesc/util/regx/RegxParser.hpp

Modified: xerces/c/trunk/src/xercesc/util/regx/RegxParser.cpp
URL: 
http://svn.apache.org/viewvc/xerces/c/trunk/src/xercesc/util/regx/RegxParser.cpp?rev=711369&r1=711368&r2=711369&view=diff
==============================================================================
--- xerces/c/trunk/src/xercesc/util/regx/RegxParser.cpp (original)
+++ xerces/c/trunk/src/xercesc/util/regx/RegxParser.cpp Tue Nov  4 12:03:14 2008
@@ -502,7 +502,7 @@
     case REGX_T_DOLLAR:
         return processDollar();
     case REGX_T_LBRACKET:
-        return parseCharacterClass();
+        return parseCharacterClass(true);
     case REGX_T_BACKSOLIDUS:
         switch(fCharData) {
 
@@ -598,29 +598,21 @@
     return  fTokenFactory->getRange(rangeName, !(ch == chLatin_p));
 }
 
-RangeToken* RegxParser::parseCharacterClass() {
+RangeToken* RegxParser::parseCharacterClass(const bool useNRange) {
 
     setParseContext(regexParserStateInBrackets);
     processNext();
 
-    RangeToken* base = 0;
     RangeToken* tok = 0;
     bool isNRange = false;
 
     if (getState() == REGX_T_CHAR && getCharData() == chCaret) {
-
         isNRange = true;
         processNext();
-
-        base = fTokenFactory->createRange();
-        base->addRange(0, Token::UTF16_MAX);
-        tok = fTokenFactory->createRange();
-    }
-    else {
-        tok= fTokenFactory->createRange();
     }
+    tok = fTokenFactory->createRange();
 
-    int type;
+    parserState type;
     bool firstLoop = true;
     bool wasDecoded;
 
@@ -629,15 +621,8 @@
         wasDecoded = false;
 
         // single range | from-to-range | subtraction
-        if (type == REGX_T_CHAR && getCharData() == chCloseSquare && 
!firstLoop) {
-
-            if (isNRange) {
-
-                base->subtractRanges(tok);
-                tok = base;
-            }
+        if (type == REGX_T_CHAR && getCharData() == chCloseSquare && 
!firstLoop)
             break;
-        }
 
         XMLInt32 ch = getCharData();
         bool     end = false;
@@ -682,13 +667,12 @@
         } // end if REGX_T_BACKSOLIDUS
         else if (type == REGX_T_XMLSCHEMA_CC_SUBTRACTION && !firstLoop) {
 
-            if (isNRange) {
-
-                base->subtractRanges(tok);
-                tok = base;
+            if (isNRange)
+            {
+                tok = RangeToken::complementRanges(tok, fTokenFactory, 
fMemoryManager);
+                isNRange=false;
             }
-
-            RangeToken* rangeTok = parseCharacterClass();
+            RangeToken* rangeTok = parseCharacterClass(false);
             tok->subtractRanges(rangeTok);
 
             if (getState() != REGX_T_CHAR || getCharData() != chCloseSquare) {
@@ -706,7 +690,7 @@
                     || ch == chCloseSquare
                     || (ch == chDash && getCharData() == chCloseSquare && 
firstLoop))) {
                 // if regex = [-] then invalid...
-                // '[', ']', '-' not allowed and should be esacaped
+                // '[', ']', '-' not allowed and should be escaped
                 XMLCh chStr[] = { ch, chNull };
                 ThrowXMLwithMemMgr2(ParseException,XMLExcepts::Parser_CC6, 
chStr, chStr, getMemoryManager());
             }
@@ -743,7 +727,7 @@
                         if (rangeEnd == chOpenSquare
                             || rangeEnd == chCloseSquare
                             || rangeEnd == chDash)
-                            // '[', ']', '-' not allowed and should be esacaped
+                            // '[', ']', '-' not allowed and should be escaped
                             ThrowXMLwithMemMgr2(ParseException, 
XMLExcepts::Parser_CC6, rangeEndStr, rangeEndStr, getMemoryManager());
                     }
                     else if (type == REGX_T_BACKSOLIDUS) {
@@ -767,6 +751,14 @@
     if (getState() == REGX_T_EOF)
         ThrowXMLwithMemMgr(ParseException,XMLExcepts::Parser_CC2, 
getMemoryManager());
 
+    if (isNRange)
+    {
+        if(useNRange)
+            tok->setTokenType(Token::T_NRANGE);
+        else
+            tok = RangeToken::complementRanges(tok, fTokenFactory, 
fMemoryManager);
+    }
+
     tok->sortRanges();
     tok->compactRanges();
 

Modified: xerces/c/trunk/src/xercesc/util/regx/RegxParser.hpp
URL: 
http://svn.apache.org/viewvc/xerces/c/trunk/src/xercesc/util/regx/RegxParser.hpp?rev=711369&r1=711368&r2=711369&view=diff
==============================================================================
--- xerces/c/trunk/src/xercesc/util/regx/RegxParser.hpp (original)
+++ xerces/c/trunk/src/xercesc/util/regx/RegxParser.hpp Tue Nov  4 12:03:14 2008
@@ -122,7 +122,7 @@
     virtual Token*      processQuestion(Token* const tok);
     virtual Token*      processParen();
 
-    RangeToken*         parseCharacterClass();
+    RangeToken*         parseCharacterClass(const bool useNRange);
     RangeToken*         processBacksolidus_pP(const XMLInt32 ch);
 
     // -----------------------------------------------------------------------



---------------------------------------------------------------------
To unsubscribe, e-mail: [EMAIL PROTECTED]
For additional commands, e-mail: [EMAIL PROTECTED]

Reply via email to