Author: amassari
Date: Tue Nov 4 12:03:14 2008
New Revision: 711369
URL: http://svn.apache.org/viewvc?rev=711369&view=rev
Log:
A regular expression with a negative character class was always stored as a
positive character class, so trying to perform a case-insensitive match of "q"
against "[^Q]" would fail because it was immediately exploded into [a-zA-PR-Z];
we now preserve the negative character class
Modified:
xerces/c/trunk/src/xercesc/util/regx/RegxParser.cpp
xerces/c/trunk/src/xercesc/util/regx/RegxParser.hpp
Modified: xerces/c/trunk/src/xercesc/util/regx/RegxParser.cpp
URL:
http://svn.apache.org/viewvc/xerces/c/trunk/src/xercesc/util/regx/RegxParser.cpp?rev=711369&r1=711368&r2=711369&view=diff
==============================================================================
--- xerces/c/trunk/src/xercesc/util/regx/RegxParser.cpp (original)
+++ xerces/c/trunk/src/xercesc/util/regx/RegxParser.cpp Tue Nov 4 12:03:14 2008
@@ -502,7 +502,7 @@
case REGX_T_DOLLAR:
return processDollar();
case REGX_T_LBRACKET:
- return parseCharacterClass();
+ return parseCharacterClass(true);
case REGX_T_BACKSOLIDUS:
switch(fCharData) {
@@ -598,29 +598,21 @@
return fTokenFactory->getRange(rangeName, !(ch == chLatin_p));
}
-RangeToken* RegxParser::parseCharacterClass() {
+RangeToken* RegxParser::parseCharacterClass(const bool useNRange) {
setParseContext(regexParserStateInBrackets);
processNext();
- RangeToken* base = 0;
RangeToken* tok = 0;
bool isNRange = false;
if (getState() == REGX_T_CHAR && getCharData() == chCaret) {
-
isNRange = true;
processNext();
-
- base = fTokenFactory->createRange();
- base->addRange(0, Token::UTF16_MAX);
- tok = fTokenFactory->createRange();
- }
- else {
- tok= fTokenFactory->createRange();
}
+ tok = fTokenFactory->createRange();
- int type;
+ parserState type;
bool firstLoop = true;
bool wasDecoded;
@@ -629,15 +621,8 @@
wasDecoded = false;
// single range | from-to-range | subtraction
- if (type == REGX_T_CHAR && getCharData() == chCloseSquare &&
!firstLoop) {
-
- if (isNRange) {
-
- base->subtractRanges(tok);
- tok = base;
- }
+ if (type == REGX_T_CHAR && getCharData() == chCloseSquare &&
!firstLoop)
break;
- }
XMLInt32 ch = getCharData();
bool end = false;
@@ -682,13 +667,12 @@
} // end if REGX_T_BACKSOLIDUS
else if (type == REGX_T_XMLSCHEMA_CC_SUBTRACTION && !firstLoop) {
- if (isNRange) {
-
- base->subtractRanges(tok);
- tok = base;
+ if (isNRange)
+ {
+ tok = RangeToken::complementRanges(tok, fTokenFactory,
fMemoryManager);
+ isNRange=false;
}
-
- RangeToken* rangeTok = parseCharacterClass();
+ RangeToken* rangeTok = parseCharacterClass(false);
tok->subtractRanges(rangeTok);
if (getState() != REGX_T_CHAR || getCharData() != chCloseSquare) {
@@ -706,7 +690,7 @@
|| ch == chCloseSquare
|| (ch == chDash && getCharData() == chCloseSquare &&
firstLoop))) {
// if regex = [-] then invalid...
- // '[', ']', '-' not allowed and should be esacaped
+ // '[', ']', '-' not allowed and should be escaped
XMLCh chStr[] = { ch, chNull };
ThrowXMLwithMemMgr2(ParseException,XMLExcepts::Parser_CC6,
chStr, chStr, getMemoryManager());
}
@@ -743,7 +727,7 @@
if (rangeEnd == chOpenSquare
|| rangeEnd == chCloseSquare
|| rangeEnd == chDash)
- // '[', ']', '-' not allowed and should be esacaped
+ // '[', ']', '-' not allowed and should be escaped
ThrowXMLwithMemMgr2(ParseException,
XMLExcepts::Parser_CC6, rangeEndStr, rangeEndStr, getMemoryManager());
}
else if (type == REGX_T_BACKSOLIDUS) {
@@ -767,6 +751,14 @@
if (getState() == REGX_T_EOF)
ThrowXMLwithMemMgr(ParseException,XMLExcepts::Parser_CC2,
getMemoryManager());
+ if (isNRange)
+ {
+ if(useNRange)
+ tok->setTokenType(Token::T_NRANGE);
+ else
+ tok = RangeToken::complementRanges(tok, fTokenFactory,
fMemoryManager);
+ }
+
tok->sortRanges();
tok->compactRanges();
Modified: xerces/c/trunk/src/xercesc/util/regx/RegxParser.hpp
URL:
http://svn.apache.org/viewvc/xerces/c/trunk/src/xercesc/util/regx/RegxParser.hpp?rev=711369&r1=711368&r2=711369&view=diff
==============================================================================
--- xerces/c/trunk/src/xercesc/util/regx/RegxParser.hpp (original)
+++ xerces/c/trunk/src/xercesc/util/regx/RegxParser.hpp Tue Nov 4 12:03:14 2008
@@ -122,7 +122,7 @@
virtual Token* processQuestion(Token* const tok);
virtual Token* processParen();
- RangeToken* parseCharacterClass();
+ RangeToken* parseCharacterClass(const bool useNRange);
RangeToken* processBacksolidus_pP(const XMLInt32 ch);
// -----------------------------------------------------------------------
---------------------------------------------------------------------
To unsubscribe, e-mail: [EMAIL PROTECTED]
For additional commands, e-mail: [EMAIL PROTECTED]