regx ParserForXMLSchema.cpp RegxParser.cpp RegxParser.hpp

knoaman Tue, 18 Mar 2003 11:36:58 -0800

knoaman     2003/03/18 11:38:28

  Modified:    c/src/xercesc/util/regx ParserForXMLSchema.cpp
                        RegxParser.cpp RegxParser.hpp
  Log:
  Schema Errata E2-18 + misc. regex fixes.
  
  Revision  Changes    Path
  1.5       +22 -23    xml-xerces/c/src/xercesc/util/regx/ParserForXMLSchema.cpp
  
  Index: ParserForXMLSchema.cpp
  ===================================================================
  RCS file: /home/cvs/xml-xerces/c/src/xercesc/util/regx/ParserForXMLSchema.cpp,v
  retrieving revision 1.4
  retrieving revision 1.5
  diff -u -r1.4 -r1.5
  --- ParserForXMLSchema.cpp    13 Jan 2003 19:02:23 -0000      1.4
  +++ ParserForXMLSchema.cpp    18 Mar 2003 19:38:28 -0000      1.5
  @@ -56,6 +56,9 @@
   
   /*
    * $Log$
  + * Revision 1.5  2003/03/18 19:38:28  knoaman
  + * Schema Errata E2-18 + misc. regex fixes.
  + *
    * Revision 1.4  2003/01/13 19:02:23  knoaman
    * [Bug 14390] C++ Indentifier collision with Python.
    *
  @@ -169,7 +172,7 @@
   Token* ParserForXMLSchema::processParen() {
   
       processNext();
  -    Token* retTok = getTokenFactory()->createParenthesis(parseRegx(), 0);
  +    Token* retTok = getTokenFactory()->createParenthesis(parseRegx(true), 0);
   
       if (getState() != REGX_T_RPAREN) {
           ThrowXML(ParseException, XMLExcepts::Parser_Factor1);
  @@ -283,13 +286,13 @@
   
           if (!end) {
   
  -            if (type == REGX_T_CHAR) {
  -
  -                if (ch == chOpenSquare)
  -                    ThrowXML(ParseException,XMLExcepts::Parser_CC6);
  -
  -                if (ch == chCloseSquare)
  -                    ThrowXML(ParseException,XMLExcepts::Parser_CC7);
  +            if (type == REGX_T_CHAR
  +                && (ch == chOpenSquare
  +                    || ch == chCloseSquare
  +                    || ch == chDash)) {
  +                // '[', ']', '-' not allowed and should be esacaped
  +                XMLCh chStr[] = { ch, chNull };
  +                ThrowXML2(ParseException,XMLExcepts::Parser_CC6, chStr, chStr);
               }
   
               if (getState() != REGX_T_CHAR || getCharData() != chDash) {
  @@ -301,36 +304,32 @@
                   if ((type = getState()) == REGX_T_EOF)
                       ThrowXML(ParseException,XMLExcepts::Parser_CC2);
   
  -                if (type == REGX_T_CHAR && getCharData() == chCloseSquare) {
  +                if ((type == REGX_T_CHAR && getCharData() == chCloseSquare)
  +                    || type == REGX_T_XMLSCHEMA_CC_SUBTRACTION) {
   
  -                    tok->addRange(ch, ch);
  -                    tok->addRange(chDash, chDash);
  -                }
  -                else if (type == REGX_T_XMLSCHEMA_CC_SUBTRACTION) {
  -                    tok->addRange(ch, ch);
  -                    tok->addRange(chDash, chDash);
  +                    static const XMLCh dashStr[] = { chDash, chNull};
  +                    ThrowXML2(ParseException, XMLExcepts::Parser_CC6, dashStr, 
dashStr);
                   }
                   else {
   
                       XMLInt32 rangeEnd = getCharData();
  +                    XMLCh rangeEndStr[] = { rangeEnd, chNull };
   
                       if (type == REGX_T_CHAR) {
   
  -                        if (rangeEnd == chOpenSquare)
  -                            ThrowXML(ParseException,XMLExcepts::Parser_CC6);
  -
  -                        if (rangeEnd == chCloseSquare)
  -                            ThrowXML(ParseException,XMLExcepts::Parser_CC7);
  +                        if (rangeEnd == chOpenSquare
  +                            || rangeEnd == chCloseSquare
  +                            || rangeEnd == chDash)
  +                            // '[', ']', '-' not allowed and should be esacaped
  +                            ThrowXML2(ParseException, XMLExcepts::Parser_CC6, 
rangeEndStr, rangeEndStr);
                       }
  -
  -                    if (type == REGX_T_BACKSOLIDUS) {
  +                    else if (type == REGX_T_BACKSOLIDUS) {
                           rangeEnd = decodeEscaped();
                       }
   
                       processNext();
   
                       if (ch > rangeEnd) {
  -                        XMLCh rangeEndStr[] = { rangeEnd, chNull };
                           XMLCh chStr[] = { ch, chNull };
                           ThrowXML2(ParseException,XMLExcepts::Parser_Ope3, 
rangeEndStr, chStr);
                       }
  
  
  
  1.6       +64 -74    xml-xerces/c/src/xercesc/util/regx/RegxParser.cpp
  
  Index: RegxParser.cpp
  ===================================================================
  RCS file: /home/cvs/xml-xerces/c/src/xercesc/util/regx/RegxParser.cpp,v
  retrieving revision 1.5
  retrieving revision 1.6
  diff -u -r1.5 -r1.6
  --- RegxParser.cpp    4 Mar 2003 16:36:17 -0000       1.5
  +++ RegxParser.cpp    18 Mar 2003 19:38:28 -0000      1.6
  @@ -56,6 +56,9 @@
   
   /*
    * $Log$
  + * Revision 1.6  2003/03/18 19:38:28  knoaman
  + * Schema Errata E2-18 + misc. regex fixes.
  + *
    * Revision 1.5  2003/03/04 16:36:17  knoaman
    * RegEx: fix for character category escape
    *
  @@ -420,9 +423,9 @@
   }
   
   
  -Token* RegxParser::parseRegx() {
  +Token* RegxParser::parseRegx(const bool matchingRParen) {
   
  -    Token* tok = parseTerm();
  +    Token* tok = parseTerm(matchingRParen);
       Token* parentTok = 0;
   
       while (fState == REGX_T_OR) {
  @@ -435,26 +438,28 @@
               tok = parentTok;
           }
   
  -        tok->addChild(parseTerm(), fTokenFactory);
  +        tok->addChild(parseTerm(matchingRParen), fTokenFactory);
       }
   
       return tok;
   }
   
   
  -Token* RegxParser::parseTerm() {
  +Token* RegxParser::parseTerm(const bool matchingRParen) {
   
       unsigned short state = fState;
   
  -    if (state == REGX_T_OR || state == REGX_T_RPAREN || state == REGX_T_EOF) {
  +    if (state == REGX_T_OR || state == REGX_T_EOF
  +        || (state == REGX_T_RPAREN && matchingRParen)) {
           return fTokenFactory->createToken(Token::T_EMPTY);
       }
       else {
   
  -             Token* tok = parseFactor();
  -             Token* concatTok = 0;
  +        Token* tok = parseFactor();
  +        Token* concatTok = 0;
   
  -             while ((state = fState) != REGX_T_OR && state != REGX_T_RPAREN && 
state != REGX_T_EOF)
  +        while ((state = fState) != REGX_T_OR && state != REGX_T_EOF
  +               && (state != REGX_T_RPAREN || !matchingRParen))
           {
               if (concatTok == 0) {
   
  @@ -605,7 +610,7 @@
   
       processNext();
       int num = fNoGroups++;
  -    Token* tok = fTokenFactory->createParenthesis(parseRegx(),num);
  +    Token* tok = fTokenFactory->createParenthesis(parseRegx(true),num);
   
       if (fState != REGX_T_RPAREN)
           ThrowXML(ParseException,XMLExcepts::Parser_Factor1);
  @@ -893,85 +898,74 @@
       case REGX_T_QUESTION:
           return processQuestion(tok);
       case REGX_T_CHAR:
  -             if (fCharData == chOpenCurly) {
  +        if (fCharData == chOpenCurly && fOffset < fStringLen) {
   
  -            int offset = fOffset;
               int min = 0;
               int max = -1;
  -            bool minExist = false;
  -
  -            if (offset >= fStringLen)
  -                break;
  +            XMLInt32 ch = fString[fOffset++];
   
  -            XMLInt32 ch = fString[offset++];
  +            if (ch >= chDigit_0 && ch <= chDigit_9) {
   
  -            if (ch != chComma && (ch < chDigit_0 || ch > chDigit_9))
  -                ThrowXML1(ParseException, XMLExcepts::Regex_InvalidQuantifier, 
fString);
  -
  -            if (ch != chComma) {
  -                minExist = true;
                   min = ch - chDigit_0;
  -                while (offset < fStringLen
  -                       && (ch = fString[offset++]) >= chDigit_0
  +                while (fOffset < fStringLen
  +                       && (ch = fString[fOffset++]) >= chDigit_0
                          && ch <= chDigit_9) {
   
                       min = min*10 + ch - chDigit_0;
  -                    ch = -1;
                   }
  +
  +                if (min < 0)
  +                    ThrowXML1(ParseException, XMLExcepts::Parser_Quantifier5, 
fString);
  +            }
  +            else {
  +                ThrowXML1(ParseException, XMLExcepts::Parser_Quantifier1, fString);
               }
   
               max = min;
   
  -            if (ch != chCloseCurly && ch != chComma)  {
  -                ThrowXML1(ParseException, XMLExcepts::Regex_InvalidQuantifier, 
fString);
  -            }
  -
               if (ch == chComma) {
   
  -                if (offset >= fStringLen)
  -                    break;
  -
  -                if (((ch = fString[offset++]) < chDigit_0 || ch > chDigit_9)
  -                    && ch != chCloseCurly)
  -                    ThrowXML1(ParseException, XMLExcepts::Regex_InvalidQuantifier, 
fString);
  -
  -                if (ch == chCloseCurly) {
  -                    if (minExist)
  -                        max = -1;
  -                    else
  -                        ThrowXML1(ParseException, 
XMLExcepts::Regex_InvalidQuantifier, fString);
  +                if (fOffset >= fStringLen) {
  +                    ThrowXML1(ParseException, XMLExcepts::Parser_Quantifier3, 
fString);
                   }
  -                else {
  +                else if ((ch = fString[fOffset++]) >= chDigit_0 && ch <= chDigit_9) 
{
  +
                       max = ch - chDigit_0;
  -                    while (offset < fStringLen
  -                           && (ch = fString[offset++]) >= chDigit_0
  +                    while (fOffset < fStringLen
  +                           && (ch = fString[fOffset++]) >= chDigit_0
                              && ch <= chDigit_9) {
   
                           max = max*10 + ch - chDigit_0;
  -                        ch = -1;
                       }
   
  -                    if (ch != chCloseCurly)  {
  -                        ThrowXML1(ParseException, 
XMLExcepts::Regex_InvalidQuantifier, fString);
  -                    }
  +                    if (max < 0)
  +                        ThrowXML1(ParseException, XMLExcepts::Parser_Quantifier5, 
fString);
  +                    else if (min > max)
  +                        ThrowXML1(ParseException, XMLExcepts::Parser_Quantifier4, 
fString);
  +                }
  +                else {
  +                    max = -1;
                   }
  -            } // end if ch = chComma
  +            }
  +
  +            if (ch != chCloseCurly)  {
  +                ThrowXML1(ParseException, XMLExcepts::Parser_Quantifier2, fString);
  +            }
   
  -            if (checkQuestion(offset)) {
  +            if (checkQuestion(fOffset)) {
   
                   tok = fTokenFactory->createClosure(tok, true);
  -                fOffset = offset + 1;
  +                fOffset++;
               }
               else {
  -
                   tok = fTokenFactory->createClosure(tok);
  -                fOffset = offset;
               }
   
               tok->setMin(min);
               tok->setMax(max);
               processNext();
                }
  +        break;
        }
   
        return tok;
  @@ -1014,27 +1008,6 @@
               tok = getTokenForShorthand(fCharData);
               processNext();
               return tok;
  -        case chLatin_e:
  -        case chLatin_f:
  -        case chLatin_n:
  -        case chLatin_r:
  -        case chLatin_t:
  -        case chLatin_u:
  -        case chLatin_v:
  -        case chLatin_x:
  -                     {
  -                XMLInt32 ch = decodeEscaped();
  -                if (ch < 0x10000) {
  -                    tok = fTokenFactory->createChar(ch);
  -                }
  -                else {
  -
  -                    XMLCh* surrogateStr = RegxUtil::decomposeToSurrogates(ch);
  -                                 ArrayJanitor<XMLCh> janSurrogate(surrogateStr);
  -                                 tok = fTokenFactory->createString(surrogateStr);
  -                }
  -            }
  -                     break;
           case chLatin_c:
               return processBacksolidus_c();
           case chLatin_C:
  @@ -1069,12 +1042,29 @@
                        }
               break;
           default:
  -            tok = fTokenFactory->createChar(fCharData);
  +            {
  +                XMLInt32 ch = decodeEscaped();
  +                if (ch < 0x10000) {
  +                    tok = fTokenFactory->createChar(ch);
  +                }
  +                else {
  +
  +                    XMLCh* surrogateStr = RegxUtil::decomposeToSurrogates(ch);
  +                                 ArrayJanitor<XMLCh> janSurrogate(surrogateStr);
  +                                 tok = fTokenFactory->createString(surrogateStr);
  +                }
  +            }
  +                     break;
                } // end switch
   
           processNext();
           break;
       case REGX_T_CHAR:
  +        if (fCharData == chOpenCurly
  +            || fCharData == chCloseCurly
  +            || fCharData == chCloseSquare)
  +            ThrowXML(ParseException,XMLExcepts::Parser_Atom4);
  +
           tok = fTokenFactory->createChar(fCharData);
           processNext();
           break;
  
  
  
  1.4       +3 -3      xml-xerces/c/src/xercesc/util/regx/RegxParser.hpp
  
  Index: RegxParser.hpp
  ===================================================================
  RCS file: /home/cvs/xml-xerces/c/src/xercesc/util/regx/RegxParser.hpp,v
  retrieving revision 1.3
  retrieving revision 1.4
  diff -u -r1.3 -r1.4
  --- RegxParser.hpp    13 Jan 2003 19:02:23 -0000      1.3
  +++ RegxParser.hpp    18 Mar 2003 19:38:28 -0000      1.4
  @@ -158,7 +158,7 @@
       //  Protected Parsing/Processing methods
       // -----------------------------------------------------------------------
        void                processNext();
  -     Token*              parseRegx();
  +     Token*              parseRegx(const bool matchingRParen = false);
        virtual Token*      processCaret();
       virtual Token*      processDollar();
        virtual Token*      processLook(const unsigned short tokType);
  @@ -199,7 +199,7 @@
       // -----------------------------------------------------------------------
       //  Private parsing/processing methods
       // -----------------------------------------------------------------------
  -    Token* parseTerm();
  +    Token* parseTerm(const bool matchingRParen = false);
        Token* parseFactor();
        Token* parseAtom();


---------------------------------------------------------------------
To unsubscribe, e-mail: [EMAIL PROTECTED]
For additional commands, e-mail: [EMAIL PROTECTED]

cvs commit: xml-xerces/c/src/xercesc/util/regx ParserForXMLSchema.cpp RegxParser.cpp RegxParser.hpp

Reply via email to