knoaman 2003/03/18 11:38:28 Modified: c/src/xercesc/util/regx ParserForXMLSchema.cpp RegxParser.cpp RegxParser.hpp Log: Schema Errata E2-18 + misc. regex fixes. Revision Changes Path 1.5 +22 -23 xml-xerces/c/src/xercesc/util/regx/ParserForXMLSchema.cpp Index: ParserForXMLSchema.cpp =================================================================== RCS file: /home/cvs/xml-xerces/c/src/xercesc/util/regx/ParserForXMLSchema.cpp,v retrieving revision 1.4 retrieving revision 1.5 diff -u -r1.4 -r1.5 --- ParserForXMLSchema.cpp 13 Jan 2003 19:02:23 -0000 1.4 +++ ParserForXMLSchema.cpp 18 Mar 2003 19:38:28 -0000 1.5 @@ -56,6 +56,9 @@ /* * $Log$ + * Revision 1.5 2003/03/18 19:38:28 knoaman + * Schema Errata E2-18 + misc. regex fixes. + * * Revision 1.4 2003/01/13 19:02:23 knoaman * [Bug 14390] C++ Indentifier collision with Python. * @@ -169,7 +172,7 @@ Token* ParserForXMLSchema::processParen() { processNext(); - Token* retTok = getTokenFactory()->createParenthesis(parseRegx(), 0); + Token* retTok = getTokenFactory()->createParenthesis(parseRegx(true), 0); if (getState() != REGX_T_RPAREN) { ThrowXML(ParseException, XMLExcepts::Parser_Factor1); @@ -283,13 +286,13 @@ if (!end) { - if (type == REGX_T_CHAR) { - - if (ch == chOpenSquare) - ThrowXML(ParseException,XMLExcepts::Parser_CC6); - - if (ch == chCloseSquare) - ThrowXML(ParseException,XMLExcepts::Parser_CC7); + if (type == REGX_T_CHAR + && (ch == chOpenSquare + || ch == chCloseSquare + || ch == chDash)) { + // '[', ']', '-' not allowed and should be esacaped + XMLCh chStr[] = { ch, chNull }; + ThrowXML2(ParseException,XMLExcepts::Parser_CC6, chStr, chStr); } if (getState() != REGX_T_CHAR || getCharData() != chDash) { @@ -301,36 +304,32 @@ if ((type = getState()) == REGX_T_EOF) ThrowXML(ParseException,XMLExcepts::Parser_CC2); - if (type == REGX_T_CHAR && getCharData() == chCloseSquare) { + if ((type == REGX_T_CHAR && getCharData() == chCloseSquare) + || type == REGX_T_XMLSCHEMA_CC_SUBTRACTION) { - tok->addRange(ch, ch); - tok->addRange(chDash, chDash); - } - else if (type == REGX_T_XMLSCHEMA_CC_SUBTRACTION) { - tok->addRange(ch, ch); - tok->addRange(chDash, chDash); + static const XMLCh dashStr[] = { chDash, chNull}; + ThrowXML2(ParseException, XMLExcepts::Parser_CC6, dashStr, dashStr); } else { XMLInt32 rangeEnd = getCharData(); + XMLCh rangeEndStr[] = { rangeEnd, chNull }; if (type == REGX_T_CHAR) { - if (rangeEnd == chOpenSquare) - ThrowXML(ParseException,XMLExcepts::Parser_CC6); - - if (rangeEnd == chCloseSquare) - ThrowXML(ParseException,XMLExcepts::Parser_CC7); + if (rangeEnd == chOpenSquare + || rangeEnd == chCloseSquare + || rangeEnd == chDash) + // '[', ']', '-' not allowed and should be esacaped + ThrowXML2(ParseException, XMLExcepts::Parser_CC6, rangeEndStr, rangeEndStr); } - - if (type == REGX_T_BACKSOLIDUS) { + else if (type == REGX_T_BACKSOLIDUS) { rangeEnd = decodeEscaped(); } processNext(); if (ch > rangeEnd) { - XMLCh rangeEndStr[] = { rangeEnd, chNull }; XMLCh chStr[] = { ch, chNull }; ThrowXML2(ParseException,XMLExcepts::Parser_Ope3, rangeEndStr, chStr); } 1.6 +64 -74 xml-xerces/c/src/xercesc/util/regx/RegxParser.cpp Index: RegxParser.cpp =================================================================== RCS file: /home/cvs/xml-xerces/c/src/xercesc/util/regx/RegxParser.cpp,v retrieving revision 1.5 retrieving revision 1.6 diff -u -r1.5 -r1.6 --- RegxParser.cpp 4 Mar 2003 16:36:17 -0000 1.5 +++ RegxParser.cpp 18 Mar 2003 19:38:28 -0000 1.6 @@ -56,6 +56,9 @@ /* * $Log$ + * Revision 1.6 2003/03/18 19:38:28 knoaman + * Schema Errata E2-18 + misc. regex fixes. + * * Revision 1.5 2003/03/04 16:36:17 knoaman * RegEx: fix for character category escape * @@ -420,9 +423,9 @@ } -Token* RegxParser::parseRegx() { +Token* RegxParser::parseRegx(const bool matchingRParen) { - Token* tok = parseTerm(); + Token* tok = parseTerm(matchingRParen); Token* parentTok = 0; while (fState == REGX_T_OR) { @@ -435,26 +438,28 @@ tok = parentTok; } - tok->addChild(parseTerm(), fTokenFactory); + tok->addChild(parseTerm(matchingRParen), fTokenFactory); } return tok; } -Token* RegxParser::parseTerm() { +Token* RegxParser::parseTerm(const bool matchingRParen) { unsigned short state = fState; - if (state == REGX_T_OR || state == REGX_T_RPAREN || state == REGX_T_EOF) { + if (state == REGX_T_OR || state == REGX_T_EOF + || (state == REGX_T_RPAREN && matchingRParen)) { return fTokenFactory->createToken(Token::T_EMPTY); } else { - Token* tok = parseFactor(); - Token* concatTok = 0; + Token* tok = parseFactor(); + Token* concatTok = 0; - while ((state = fState) != REGX_T_OR && state != REGX_T_RPAREN && state != REGX_T_EOF) + while ((state = fState) != REGX_T_OR && state != REGX_T_EOF + && (state != REGX_T_RPAREN || !matchingRParen)) { if (concatTok == 0) { @@ -605,7 +610,7 @@ processNext(); int num = fNoGroups++; - Token* tok = fTokenFactory->createParenthesis(parseRegx(),num); + Token* tok = fTokenFactory->createParenthesis(parseRegx(true),num); if (fState != REGX_T_RPAREN) ThrowXML(ParseException,XMLExcepts::Parser_Factor1); @@ -893,85 +898,74 @@ case REGX_T_QUESTION: return processQuestion(tok); case REGX_T_CHAR: - if (fCharData == chOpenCurly) { + if (fCharData == chOpenCurly && fOffset < fStringLen) { - int offset = fOffset; int min = 0; int max = -1; - bool minExist = false; - - if (offset >= fStringLen) - break; + XMLInt32 ch = fString[fOffset++]; - XMLInt32 ch = fString[offset++]; + if (ch >= chDigit_0 && ch <= chDigit_9) { - if (ch != chComma && (ch < chDigit_0 || ch > chDigit_9)) - ThrowXML1(ParseException, XMLExcepts::Regex_InvalidQuantifier, fString); - - if (ch != chComma) { - minExist = true; min = ch - chDigit_0; - while (offset < fStringLen - && (ch = fString[offset++]) >= chDigit_0 + while (fOffset < fStringLen + && (ch = fString[fOffset++]) >= chDigit_0 && ch <= chDigit_9) { min = min*10 + ch - chDigit_0; - ch = -1; } + + if (min < 0) + ThrowXML1(ParseException, XMLExcepts::Parser_Quantifier5, fString); + } + else { + ThrowXML1(ParseException, XMLExcepts::Parser_Quantifier1, fString); } max = min; - if (ch != chCloseCurly && ch != chComma) { - ThrowXML1(ParseException, XMLExcepts::Regex_InvalidQuantifier, fString); - } - if (ch == chComma) { - if (offset >= fStringLen) - break; - - if (((ch = fString[offset++]) < chDigit_0 || ch > chDigit_9) - && ch != chCloseCurly) - ThrowXML1(ParseException, XMLExcepts::Regex_InvalidQuantifier, fString); - - if (ch == chCloseCurly) { - if (minExist) - max = -1; - else - ThrowXML1(ParseException, XMLExcepts::Regex_InvalidQuantifier, fString); + if (fOffset >= fStringLen) { + ThrowXML1(ParseException, XMLExcepts::Parser_Quantifier3, fString); } - else { + else if ((ch = fString[fOffset++]) >= chDigit_0 && ch <= chDigit_9) { + max = ch - chDigit_0; - while (offset < fStringLen - && (ch = fString[offset++]) >= chDigit_0 + while (fOffset < fStringLen + && (ch = fString[fOffset++]) >= chDigit_0 && ch <= chDigit_9) { max = max*10 + ch - chDigit_0; - ch = -1; } - if (ch != chCloseCurly) { - ThrowXML1(ParseException, XMLExcepts::Regex_InvalidQuantifier, fString); - } + if (max < 0) + ThrowXML1(ParseException, XMLExcepts::Parser_Quantifier5, fString); + else if (min > max) + ThrowXML1(ParseException, XMLExcepts::Parser_Quantifier4, fString); + } + else { + max = -1; } - } // end if ch = chComma + } + + if (ch != chCloseCurly) { + ThrowXML1(ParseException, XMLExcepts::Parser_Quantifier2, fString); + } - if (checkQuestion(offset)) { + if (checkQuestion(fOffset)) { tok = fTokenFactory->createClosure(tok, true); - fOffset = offset + 1; + fOffset++; } else { - tok = fTokenFactory->createClosure(tok); - fOffset = offset; } tok->setMin(min); tok->setMax(max); processNext(); } + break; } return tok; @@ -1014,27 +1008,6 @@ tok = getTokenForShorthand(fCharData); processNext(); return tok; - case chLatin_e: - case chLatin_f: - case chLatin_n: - case chLatin_r: - case chLatin_t: - case chLatin_u: - case chLatin_v: - case chLatin_x: - { - XMLInt32 ch = decodeEscaped(); - if (ch < 0x10000) { - tok = fTokenFactory->createChar(ch); - } - else { - - XMLCh* surrogateStr = RegxUtil::decomposeToSurrogates(ch); - ArrayJanitor<XMLCh> janSurrogate(surrogateStr); - tok = fTokenFactory->createString(surrogateStr); - } - } - break; case chLatin_c: return processBacksolidus_c(); case chLatin_C: @@ -1069,12 +1042,29 @@ } break; default: - tok = fTokenFactory->createChar(fCharData); + { + XMLInt32 ch = decodeEscaped(); + if (ch < 0x10000) { + tok = fTokenFactory->createChar(ch); + } + else { + + XMLCh* surrogateStr = RegxUtil::decomposeToSurrogates(ch); + ArrayJanitor<XMLCh> janSurrogate(surrogateStr); + tok = fTokenFactory->createString(surrogateStr); + } + } + break; } // end switch processNext(); break; case REGX_T_CHAR: + if (fCharData == chOpenCurly + || fCharData == chCloseCurly + || fCharData == chCloseSquare) + ThrowXML(ParseException,XMLExcepts::Parser_Atom4); + tok = fTokenFactory->createChar(fCharData); processNext(); break; 1.4 +3 -3 xml-xerces/c/src/xercesc/util/regx/RegxParser.hpp Index: RegxParser.hpp =================================================================== RCS file: /home/cvs/xml-xerces/c/src/xercesc/util/regx/RegxParser.hpp,v retrieving revision 1.3 retrieving revision 1.4 diff -u -r1.3 -r1.4 --- RegxParser.hpp 13 Jan 2003 19:02:23 -0000 1.3 +++ RegxParser.hpp 18 Mar 2003 19:38:28 -0000 1.4 @@ -158,7 +158,7 @@ // Protected Parsing/Processing methods // ----------------------------------------------------------------------- void processNext(); - Token* parseRegx(); + Token* parseRegx(const bool matchingRParen = false); virtual Token* processCaret(); virtual Token* processDollar(); virtual Token* processLook(const unsigned short tokType); @@ -199,7 +199,7 @@ // ----------------------------------------------------------------------- // Private parsing/processing methods // ----------------------------------------------------------------------- - Token* parseTerm(); + Token* parseTerm(const bool matchingRParen = false); Token* parseFactor(); Token* parseAtom();
--------------------------------------------------------------------- To unsubscribe, e-mail: [EMAIL PROTECTED] For additional commands, e-mail: [EMAIL PROTECTED]