tng         2002/12/24 08:11:40

  Modified:    c/src/xercesc/internal DGXMLScanner.cpp IGXMLScanner2.cpp
                        SGXMLScanner.cpp WFXMLScanner.cpp XMLScanner.cpp
  Log:
  For performance reason, move the character check to scancharref.
  
  Revision  Changes    Path
  1.3       +73 -103   xml-xerces/c/src/xercesc/internal/DGXMLScanner.cpp
  
  Index: DGXMLScanner.cpp
  ===================================================================
  RCS file: /home/cvs/xml-xerces/c/src/xercesc/internal/DGXMLScanner.cpp,v
  retrieving revision 1.2
  retrieving revision 1.3
  diff -u -r1.2 -r1.3
  --- DGXMLScanner.cpp  20 Dec 2002 22:09:56 -0000      1.2
  +++ DGXMLScanner.cpp  24 Dec 2002 16:11:39 -0000      1.3
  @@ -2326,23 +2326,13 @@
       bool    firstNonWS = false;
       bool    gotLeadingSurrogate = false;
       bool    escaped;
  -    bool    charref_expanded = false;
       while (true)
       {
       try
       {
           while(true)
           {
  -            // Get another char. Use second char if one is waiting
  -            if (secondCh)
  -            {
  -                nextCh = secondCh;
  -                secondCh = 0;
  -            }
  -            else
  -            {
  -                nextCh = fReaderMgr.getNextChar();
  -            }
  +            nextCh = fReaderMgr.getNextChar();
   
               if (!nextCh)
                   ThrowXML(UnexpectedEOFException, XMLExcepts::Gen_UnexpectedEOF);
  @@ -2372,18 +2362,16 @@
                       gotLeadingSurrogate = false;
                       continue;
                   }
  -                charref_expanded = true;
               }
  -
  -            // Deal with surrogate pairs
  -            if ((nextCh >= 0xD800) && (nextCh <= 0xDBFF))
  +            else if ((nextCh >= 0xD800) && (nextCh <= 0xDBFF))
               {
  +                // Deal with surrogate pairs
                   //  Its a leading surrogate. If we already got one, then
                   //  issue an error, else set leading flag to make sure that
                   //  we look for a trailing next time.
                   if (gotLeadingSurrogate)
                       emitError(XMLErrs::Expected2ndSurrogateChar);
  -                 else
  +                else
                       gotLeadingSurrogate = true;
               }
               else
  @@ -2407,22 +2395,17 @@
                       // Its got to at least be a valid XML character
                       if (!fReaderMgr.getCurrentReader()->isXMLChar(nextCh))
                       {
  -                        // if it was a character reference and is control char, 
then it's ok
  -                        if (!(charref_expanded && 
fReaderMgr.getCurrentReader()->isControlChar(nextCh)))
  -                        {
  -                            XMLCh tmpBuf[9];
  -                            XMLString::binToText
  -                            (
  -                                nextCh
  -                                , tmpBuf
  -                                , 8
  -                                , 16
  -                            );
  -                            emitError(XMLErrs::InvalidCharacterInAttrValue, 
attrName, tmpBuf);
  -                        }
  +                        XMLCh tmpBuf[9];
  +                        XMLString::binToText
  +                        (
  +                            nextCh
  +                            , tmpBuf
  +                            , 8
  +                            , 16
  +                        );
  +                        emitError(XMLErrs::InvalidCharacterInAttrValue, attrName, 
tmpBuf);
                       }
                   }
  -                charref_expanded = false;
                   gotLeadingSurrogate = false;
               }
   
  @@ -2498,6 +2481,9 @@
   
               // Else add it to the buffer
               toFill.append(nextCh);
  +
  +            if (secondCh)
  +                toFill.append(secondCh);
           }
       }
       catch(const EndOfEntityException&)
  @@ -2704,44 +2690,36 @@
       bool    escaped = false;
       bool    gotLeadingSurrogate = false;
       bool    notDone = true;
  -    bool    charref_expanded = false;
       while (notDone)
       {
           try
           {
               while (true)
               {
  -                if (secondCh)
  +                //  Eat through as many plain content characters as possible without
  +                //  needing special handling.  Moving most content characters here,
  +                //  in this one call, rather than running the overall loop once
  +                //  per content character, is a speed optimization.
  +                if (curState == State_Waiting  &&  !gotLeadingSurrogate)
                   {
  -                    nextCh = secondCh;
  -                    secondCh = 0;
  +                     fReaderMgr.movePlainContentChars(toUse);
                   }
  -                else
  -                {
  -                    //  Eat through as many plain content characters as possible 
without
  -                    //  needing special handling.  Moving most content characters 
here,
  -                    //  in this one call, rather than running the overall loop once
  -                    //  per content character, is a speed optimization.
  -                    if (curState == State_Waiting  &&  !gotLeadingSurrogate)
  -                    {
  -                         fReaderMgr.movePlainContentChars(toUse);
  -                    }
   
  -                    // Try to get another char from the source
  -                    //   The code from here on down covers all contengencies,
  -                    if (!fReaderMgr.getNextCharIfNot(chOpenAngle, nextCh))
  -                    {
  -                        // If we were waiting for a trailing surrogate, its an error
  -                        if (gotLeadingSurrogate)
  -                            emitError(XMLErrs::Expected2ndSurrogateChar);
  +                // Try to get another char from the source
  +                //   The code from here on down covers all contengencies,
  +                if (!fReaderMgr.getNextCharIfNot(chOpenAngle, nextCh))
  +                {
  +                    // If we were waiting for a trailing surrogate, its an error
  +                    if (gotLeadingSurrogate)
  +                        emitError(XMLErrs::Expected2ndSurrogateChar);
   
  -                        notDone = false;
  -                        break;
  -                    }
  +                    notDone = false;
  +                    break;
                   }
   
                   //  Watch for a reference. Note that the escapement mechanism
                   //  is ignored in this content.
  +                escaped = false;
                   if (nextCh == chAmpersand)
                   {
                       sendCharData(toUse);
  @@ -2754,42 +2732,10 @@
                           gotLeadingSurrogate = false;
                           continue;
                       }
  -                    charref_expanded = true;
  -                }
  -                else
  -                {
  -                    escaped = false;
  -                }
  -
  -                 // Keep the state machine up to date
  -                if (!escaped)
  -                {
  -                    if (nextCh == chCloseSquare)
  -                    {
  -                        if (curState == State_Waiting)
  -                            curState = State_GotOne;
  -                        else if (curState == State_GotOne)
  -                            curState = State_GotTwo;
  -                    }
  -                    else if (nextCh == chCloseAngle)
  -                    {
  -                        if (curState == State_GotTwo)
  -                            emitError(XMLErrs::BadSequenceInCharData);
  -                        curState = State_Waiting;
  -                    }
  -                    else
  -                    {
  -                        curState = State_Waiting;
  -                    }
  -                }
  -                else
  -                {
  -                    curState = State_Waiting;
                   }
  -
  -                // Deal with surrogate pairs
  -                if ((nextCh >= 0xD800) && (nextCh <= 0xDBFF))
  +                else if ((nextCh >= 0xD800) && (nextCh <= 0xDBFF))
                   {
  +                    // Deal with surrogate pairs
                       //  Its a leading surrogate. If we already got one, then
                       //  issue an error, else set leading flag to make sure that
                       //  we look for a trailing next time.
  @@ -2819,27 +2765,51 @@
                           // Make sure the returned char is a valid XML char
                           if (!fReaderMgr.getCurrentReader()->isXMLChar(nextCh))
                           {
  -                            // if it was a character reference and is control char, 
then it's ok
  -                            if (!(charref_expanded && 
fReaderMgr.getCurrentReader()->isControlChar(nextCh)))
  -                            {
  -                                XMLCh tmpBuf[9];
  -                                XMLString::binToText
  -                                (
  -                                    nextCh
  -                                    , tmpBuf
  -                                    , 8
  -                                    , 16
  -                                );
  -                                emitError(XMLErrs::InvalidCharacter, tmpBuf);
  -                            }
  +                            XMLCh tmpBuf[9];
  +                            XMLString::binToText
  +                            (
  +                                nextCh
  +                                , tmpBuf
  +                                , 8
  +                                , 16
  +                            );
  +                            emitError(XMLErrs::InvalidCharacter, tmpBuf);
                           }
                       }
  -                    charref_expanded = false;
                       gotLeadingSurrogate = false;
                   }
   
  +                 // Keep the state machine up to date
  +                if (!escaped)
  +                {
  +                    if (nextCh == chCloseSquare)
  +                    {
  +                        if (curState == State_Waiting)
  +                            curState = State_GotOne;
  +                        else if (curState == State_GotOne)
  +                            curState = State_GotTwo;
  +                    }
  +                    else if (nextCh == chCloseAngle)
  +                    {
  +                        if (curState == State_GotTwo)
  +                            emitError(XMLErrs::BadSequenceInCharData);
  +                        curState = State_Waiting;
  +                    }
  +                    else
  +                    {
  +                        curState = State_Waiting;
  +                    }
  +                }
  +                else
  +                {
  +                    curState = State_Waiting;
  +                }
  +
                   // Add this char to the buffer
                   toUse.append(nextCh);
  +
  +                if (secondCh)
  +                    toUse.append(secondCh);
               }
           }
           catch(const EndOfEntityException& toCatch)
  
  
  
  1.5       +87 -132   xml-xerces/c/src/xercesc/internal/IGXMLScanner2.cpp
  
  Index: IGXMLScanner2.cpp
  ===================================================================
  RCS file: /home/cvs/xml-xerces/c/src/xercesc/internal/IGXMLScanner2.cpp,v
  retrieving revision 1.4
  retrieving revision 1.5
  diff -u -r1.4 -r1.5
  --- IGXMLScanner2.cpp 20 Dec 2002 22:09:56 -0000      1.4
  +++ IGXMLScanner2.cpp 24 Dec 2002 16:11:39 -0000      1.5
  @@ -1542,23 +1542,13 @@
       XMLCh   secondCh = 0;
       bool    gotLeadingSurrogate = false;
       bool    escaped;
  -    bool    charref_expanded = false;
       while (true)
       {
           try
           {
               while(true)
               {
  -                // Get another char. Use second char if one is waiting
  -                if (secondCh)
  -                {
  -                    nextCh = secondCh;
  -                    secondCh = 0;
  -                }
  -                else
  -                {
  -                    nextCh = fReaderMgr.getNextChar();
  -                }
  +                nextCh = fReaderMgr.getNextChar();
   
                   if (!nextCh)
                       ThrowXML(UnexpectedEOFException, XMLExcepts::Gen_UnexpectedEOF);
  @@ -1589,12 +1579,10 @@
                           gotLeadingSurrogate = false;
                           continue;
                       }
  -                    charref_expanded = true;
                   }
  -
  -                // Deal with surrogate pairs
  -                if ((nextCh >= 0xD800) && (nextCh <= 0xDBFF))
  +                else if ((nextCh >= 0xD800) && (nextCh <= 0xDBFF))
                   {
  +                    // Deal with surrogate pairs
                       //  Its a leading surrogate. If we already got one, then
                       //  issue an error, else set leading flag to make sure that
                       //  we look for a trailing next time.
  @@ -1626,23 +1614,17 @@
                           // Its got to at least be a valid XML character
                           else if (!fReaderMgr.getCurrentReader()->isXMLChar(nextCh))
                           {
  -                            // if it was a character reference and is control char, 
then it's ok
  -                            if (!(charref_expanded && 
fReaderMgr.getCurrentReader()->isControlChar(nextCh)))
  -                            {
  -
  -                                XMLCh tmpBuf[9];
  -                                XMLString::binToText
  -                                (
  -                                    nextCh
  -                                    , tmpBuf
  -                                    , 8
  -                                    , 16
  -                                );
  -                                emitError(XMLErrs::InvalidCharacterInAttrValue, 
attrName, tmpBuf);
  -                            }
  +                            XMLCh tmpBuf[9];
  +                            XMLString::binToText
  +                            (
  +                                nextCh
  +                                , tmpBuf
  +                                , 8
  +                                , 16
  +                            );
  +                            emitError(XMLErrs::InvalidCharacterInAttrValue, 
attrName, tmpBuf);
                           }
                       }
  -                    charref_expanded = false;
                       gotLeadingSurrogate = false;
                   }
   
  @@ -1655,6 +1637,9 @@
   
                   // Else add it to the buffer
                   toFill.append(nextCh);
  +
  +                if (secondCh)
  +                    toFill.append(secondCh);
               }
           }
           catch(const EndOfEntityException&)
  @@ -1705,23 +1690,13 @@
       bool    firstNonWS = false;
       bool    gotLeadingSurrogate = false;
       bool    escaped;
  -    bool    charref_expanded = false;
       while (true)
       {
       try
       {
           while(true)
           {
  -            // Get another char. Use second char if one is waiting
  -            if (secondCh)
  -            {
  -                nextCh = secondCh;
  -                secondCh = 0;
  -            }
  -            else
  -            {
  -                nextCh = fReaderMgr.getNextChar();
  -            }
  +            nextCh = fReaderMgr.getNextChar();
   
               if (!nextCh)
                   ThrowXML(UnexpectedEOFException, XMLExcepts::Gen_UnexpectedEOF);
  @@ -1751,12 +1726,10 @@
                       gotLeadingSurrogate = false;
                       continue;
                   }
  -                charref_expanded = true;
               }
  -
  -            // Deal with surrogate pairs
  -            if ((nextCh >= 0xD800) && (nextCh <= 0xDBFF))
  +            else if ((nextCh >= 0xD800) && (nextCh <= 0xDBFF))
               {
  +                // Deal with surrogate pairs
                   //  Its a leading surrogate. If we already got one, then
                   //  issue an error, else set leading flag to make sure that
                   //  we look for a trailing next time.
  @@ -1786,22 +1759,17 @@
                       // Its got to at least be a valid XML character
                       if (!fReaderMgr.getCurrentReader()->isXMLChar(nextCh))
                       {
  -                        // if it was a character reference and is control char, 
then it's ok
  -                        if (!(charref_expanded && 
fReaderMgr.getCurrentReader()->isControlChar(nextCh)))
  -                        {
  -                            XMLCh tmpBuf[9];
  -                            XMLString::binToText
  -                            (
  -                                nextCh
  -                                , tmpBuf
  -                                , 8
  -                                , 16
  -                            );
  -                            emitError(XMLErrs::InvalidCharacterInAttrValue, 
attrName, tmpBuf);
  -                        }
  +                        XMLCh tmpBuf[9];
  +                        XMLString::binToText
  +                        (
  +                            nextCh
  +                            , tmpBuf
  +                            , 8
  +                            , 16
  +                        );
  +                        emitError(XMLErrs::InvalidCharacterInAttrValue, attrName, 
tmpBuf);
                       }
                   }
  -                charref_expanded = false;
                   gotLeadingSurrogate = false;
               }
   
  @@ -1877,6 +1845,9 @@
   
               // Else add it to the buffer
               toFill.append(nextCh);
  +
  +            if (secondCh)
  +               toFill.append(secondCh);
           }
       }
       catch(const EndOfEntityException&)
  @@ -2093,44 +2064,36 @@
       bool    escaped = false;
       bool    gotLeadingSurrogate = false;
       bool    notDone = true;
  -    bool    charref_expanded = false;
       while (notDone)
       {
           try
           {
               while (true)
               {
  -                if (secondCh)
  +                //  Eat through as many plain content characters as possible without
  +                //  needing special handling.  Moving most content characters here,
  +                //  in this one call, rather than running the overall loop once
  +                //  per content character, is a speed optimization.
  +                if (curState == State_Waiting  &&  !gotLeadingSurrogate)
                   {
  -                    nextCh = secondCh;
  -                    secondCh = 0;
  +                     fReaderMgr.movePlainContentChars(toUse);
                   }
  -                else
  -                {
  -                    //  Eat through as many plain content characters as possible 
without
  -                    //  needing special handling.  Moving most content characters 
here,
  -                    //  in this one call, rather than running the overall loop once
  -                    //  per content character, is a speed optimization.
  -                    if (curState == State_Waiting  &&  !gotLeadingSurrogate)
  -                    {
  -                         fReaderMgr.movePlainContentChars(toUse);
  -                    }
   
  -                    // Try to get another char from the source
  -                    //   The code from here on down covers all contengencies,
  -                    if (!fReaderMgr.getNextCharIfNot(chOpenAngle, nextCh))
  -                    {
  -                        // If we were waiting for a trailing surrogate, its an error
  -                        if (gotLeadingSurrogate)
  -                            emitError(XMLErrs::Expected2ndSurrogateChar);
  +                // Try to get another char from the source
  +                //   The code from here on down covers all contengencies,
  +                if (!fReaderMgr.getNextCharIfNot(chOpenAngle, nextCh))
  +                {
  +                    // If we were waiting for a trailing surrogate, its an error
  +                    if (gotLeadingSurrogate)
  +                        emitError(XMLErrs::Expected2ndSurrogateChar);
   
  -                        notDone = false;
  -                        break;
  -                    }
  +                    notDone = false;
  +                    break;
                   }
   
                   //  Watch for a reference. Note that the escapement mechanism
                   //  is ignored in this content.
  +                escaped = false;
                   if (nextCh == chAmpersand)
                   {
                       sendCharData(toUse);
  @@ -2143,42 +2106,10 @@
                           gotLeadingSurrogate = false;
                           continue;
                       }
  -                    charref_expanded = true;
                   }
  -                else
  -                {
  -                    escaped = false;
  -                }
  -
  -                 // Keep the state machine up to date
  -                if (!escaped)
  -                {
  -                    if (nextCh == chCloseSquare)
  -                    {
  -                        if (curState == State_Waiting)
  -                            curState = State_GotOne;
  -                        else if (curState == State_GotOne)
  -                            curState = State_GotTwo;
  -                    }
  -                    else if (nextCh == chCloseAngle)
  -                    {
  -                        if (curState == State_GotTwo)
  -                            emitError(XMLErrs::BadSequenceInCharData);
  -                        curState = State_Waiting;
  -                    }
  -                    else
  -                    {
  -                        curState = State_Waiting;
  -                    }
  -                }
  -                else
  -                {
  -                    curState = State_Waiting;
  -                }
  -
  -                // Deal with surrogate pairs
  -                if ((nextCh >= 0xD800) && (nextCh <= 0xDBFF))
  +                else if ((nextCh >= 0xD800) && (nextCh <= 0xDBFF))
                   {
  +                    // Deal with surrogate pairs
                       //  Its a leading surrogate. If we already got one, then
                       //  issue an error, else set leading flag to make sure that
                       //  we look for a trailing next time.
  @@ -2208,27 +2139,51 @@
                           // Make sure the returned char is a valid XML char
                           if (!fReaderMgr.getCurrentReader()->isXMLChar(nextCh))
                           {
  -                            // if it was a character reference and is control char, 
then it's ok
  -                            if (!(charref_expanded && 
fReaderMgr.getCurrentReader()->isControlChar(nextCh)))
  -                            {
  -                                XMLCh tmpBuf[9];
  -                                XMLString::binToText
  -                                (
  -                                    nextCh
  -                                    , tmpBuf
  -                                    , 8
  -                                    , 16
  -                                );
  -                                emitError(XMLErrs::InvalidCharacter, tmpBuf);
  -                            }
  +                            XMLCh tmpBuf[9];
  +                            XMLString::binToText
  +                            (
  +                                nextCh
  +                                , tmpBuf
  +                                , 8
  +                                , 16
  +                            );
  +                            emitError(XMLErrs::InvalidCharacter, tmpBuf);
                           }
                       }
  -                    charref_expanded = false;
                       gotLeadingSurrogate = false;
                   }
   
  +                 // Keep the state machine up to date
  +                if (!escaped)
  +                {
  +                    if (nextCh == chCloseSquare)
  +                    {
  +                        if (curState == State_Waiting)
  +                            curState = State_GotOne;
  +                        else if (curState == State_GotOne)
  +                            curState = State_GotTwo;
  +                    }
  +                    else if (nextCh == chCloseAngle)
  +                    {
  +                        if (curState == State_GotTwo)
  +                            emitError(XMLErrs::BadSequenceInCharData);
  +                        curState = State_Waiting;
  +                    }
  +                    else
  +                    {
  +                        curState = State_Waiting;
  +                    }
  +                }
  +                else
  +                {
  +                    curState = State_Waiting;
  +                }
  +
                   // Add this char to the buffer
                   toUse.append(nextCh);
  +
  +                if (secondCh)
  +                    toUse.append(secondCh);
               }
           }
           catch(const EndOfEntityException& toCatch)
  
  
  
  1.5       +72 -102   xml-xerces/c/src/xercesc/internal/SGXMLScanner.cpp
  
  Index: SGXMLScanner.cpp
  ===================================================================
  RCS file: /home/cvs/xml-xerces/c/src/xercesc/internal/SGXMLScanner.cpp,v
  retrieving revision 1.4
  retrieving revision 1.5
  diff -u -r1.4 -r1.5
  --- SGXMLScanner.cpp  20 Dec 2002 22:09:56 -0000      1.4
  +++ SGXMLScanner.cpp  24 Dec 2002 16:11:39 -0000      1.5
  @@ -3224,23 +3224,13 @@
       XMLCh   secondCh = 0;
       bool    gotLeadingSurrogate = false;
       bool    escaped;
  -    bool    charref_expanded = false;
       while (true)
       {
           try
           {
               while(true)
               {
  -                // Get another char. Use second char if one is waiting
  -                if (secondCh)
  -                {
  -                    nextCh = secondCh;
  -                    secondCh = 0;
  -                }
  -                else
  -                {
  -                    nextCh = fReaderMgr.getNextChar();
  -                }
  +                nextCh = fReaderMgr.getNextChar();
   
                   if (!nextCh)
                       ThrowXML(UnexpectedEOFException, XMLExcepts::Gen_UnexpectedEOF);
  @@ -3271,12 +3261,10 @@
                           gotLeadingSurrogate = false;
                           continue;
                       }
  -                    charref_expanded = true;
                   }
  -
  -                // Deal with surrogate pairs
  -                if ((nextCh >= 0xD800) && (nextCh <= 0xDBFF))
  +                else if ((nextCh >= 0xD800) && (nextCh <= 0xDBFF))
                   {
  +                    // Deal with surrogate pairs
                       //  Its a leading surrogate. If we already got one, then
                       //  issue an error, else set leading flag to make sure that
                       //  we look for a trailing next time.
  @@ -3308,22 +3296,17 @@
                           // Its got to at least be a valid XML character
                           else if (!fReaderMgr.getCurrentReader()->isXMLChar(nextCh))
                           {
  -                            // if it was a character reference and is control char, 
then it's ok
  -                            if (!(charref_expanded && 
fReaderMgr.getCurrentReader()->isControlChar(nextCh)))
  -                            {
  -                                XMLCh tmpBuf[9];
  -                                XMLString::binToText
  -                                (
  -                                    nextCh
  -                                    , tmpBuf
  -                                    , 8
  -                                    , 16
  -                                );
  -                                emitError(XMLErrs::InvalidCharacterInAttrValue, 
attrName, tmpBuf);
  -                            }
  +                            XMLCh tmpBuf[9];
  +                            XMLString::binToText
  +                            (
  +                                nextCh
  +                                , tmpBuf
  +                                , 8
  +                                , 16
  +                            );
  +                            emitError(XMLErrs::InvalidCharacterInAttrValue, 
attrName, tmpBuf);
                           }
                       }
  -                    charref_expanded = false;
                       gotLeadingSurrogate = false;
                   }
   
  @@ -3336,6 +3319,9 @@
   
                   // Else add it to the buffer
                   toFill.append(nextCh);
  +
  +                if (secondCh)
  +                    toFill.append(secondCh);
               }
           }
           catch(const EndOfEntityException&)
  @@ -3553,44 +3539,36 @@
       bool    escaped = false;
       bool    gotLeadingSurrogate = false;
       bool    notDone = true;
  -    bool    charref_expanded = false;
       while (notDone)
       {
           try
           {
               while (true)
               {
  -                if (secondCh)
  +                //  Eat through as many plain content characters as possible without
  +                //  needing special handling.  Moving most content characters here,
  +                //  in this one call, rather than running the overall loop once
  +                //  per content character, is a speed optimization.
  +                if (curState == State_Waiting  &&  !gotLeadingSurrogate)
                   {
  -                    nextCh = secondCh;
  -                    secondCh = 0;
  +                     fReaderMgr.movePlainContentChars(toUse);
                   }
  -                else
  -                {
  -                    //  Eat through as many plain content characters as possible 
without
  -                    //  needing special handling.  Moving most content characters 
here,
  -                    //  in this one call, rather than running the overall loop once
  -                    //  per content character, is a speed optimization.
  -                    if (curState == State_Waiting  &&  !gotLeadingSurrogate)
  -                    {
  -                         fReaderMgr.movePlainContentChars(toUse);
  -                    }
   
  -                    // Try to get another char from the source
  -                    //   The code from here on down covers all contengencies,
  -                    if (!fReaderMgr.getNextCharIfNot(chOpenAngle, nextCh))
  -                    {
  -                        // If we were waiting for a trailing surrogate, its an error
  -                        if (gotLeadingSurrogate)
  -                            emitError(XMLErrs::Expected2ndSurrogateChar);
  +                // Try to get another char from the source
  +                //   The code from here on down covers all contengencies,
  +                if (!fReaderMgr.getNextCharIfNot(chOpenAngle, nextCh))
  +                {
  +                    // If we were waiting for a trailing surrogate, its an error
  +                    if (gotLeadingSurrogate)
  +                        emitError(XMLErrs::Expected2ndSurrogateChar);
   
  -                        notDone = false;
  -                        break;
  -                    }
  +                    notDone = false;
  +                    break;
                   }
   
                   //  Watch for a reference. Note that the escapement mechanism
                   //  is ignored in this content.
  +                escaped = false;
                   if (nextCh == chAmpersand)
                   {
                       sendCharData(toUse);
  @@ -3603,42 +3581,10 @@
                           gotLeadingSurrogate = false;
                           continue;
                       }
  -                    charref_expanded = true;
  -                }
  -                else
  -                {
  -                    escaped = false;
  -                }
  -
  -                 // Keep the state machine up to date
  -                if (!escaped)
  -                {
  -                    if (nextCh == chCloseSquare)
  -                    {
  -                        if (curState == State_Waiting)
  -                            curState = State_GotOne;
  -                        else if (curState == State_GotOne)
  -                            curState = State_GotTwo;
  -                    }
  -                    else if (nextCh == chCloseAngle)
  -                    {
  -                        if (curState == State_GotTwo)
  -                            emitError(XMLErrs::BadSequenceInCharData);
  -                        curState = State_Waiting;
  -                    }
  -                    else
  -                    {
  -                        curState = State_Waiting;
  -                    }
  -                }
  -                else
  -                {
  -                    curState = State_Waiting;
                   }
  -
  -                // Deal with surrogate pairs
  -                if ((nextCh >= 0xD800) && (nextCh <= 0xDBFF))
  +                else if ((nextCh >= 0xD800) && (nextCh <= 0xDBFF))
                   {
  +                    // Deal with surrogate pairs
                       //  Its a leading surrogate. If we already got one, then
                       //  issue an error, else set leading flag to make sure that
                       //  we look for a trailing next time.
  @@ -3668,27 +3614,51 @@
                           // Make sure the returned char is a valid XML char
                           if (!fReaderMgr.getCurrentReader()->isXMLChar(nextCh))
                           {
  -                            // if it was a character reference and is control char, 
then it's ok
  -                            if (!(charref_expanded && 
fReaderMgr.getCurrentReader()->isControlChar(nextCh)))
  -                            {
  -                                XMLCh tmpBuf[9];
  -                                XMLString::binToText
  -                                (
  -                                    nextCh
  -                                    , tmpBuf
  -                                    , 8
  -                                    , 16
  -                                );
  -                                emitError(XMLErrs::InvalidCharacter, tmpBuf);
  -                            }
  +                            XMLCh tmpBuf[9];
  +                            XMLString::binToText
  +                            (
  +                                nextCh
  +                                , tmpBuf
  +                                , 8
  +                                , 16
  +                            );
  +                            emitError(XMLErrs::InvalidCharacter, tmpBuf);
                           }
                       }
  -                    charref_expanded = false;
                       gotLeadingSurrogate = false;
                   }
   
  +                // Keep the state machine up to date
  +                if (!escaped)
  +                {
  +                    if (nextCh == chCloseSquare)
  +                    {
  +                        if (curState == State_Waiting)
  +                            curState = State_GotOne;
  +                        else if (curState == State_GotOne)
  +                            curState = State_GotTwo;
  +                    }
  +                    else if (nextCh == chCloseAngle)
  +                    {
  +                        if (curState == State_GotTwo)
  +                            emitError(XMLErrs::BadSequenceInCharData);
  +                        curState = State_Waiting;
  +                    }
  +                    else
  +                    {
  +                        curState = State_Waiting;
  +                    }
  +                }
  +                else
  +                {
  +                    curState = State_Waiting;
  +                }
  +
                   // Add this char to the buffer
                   toUse.append(nextCh);
  +
  +                if (secondCh)
  +                    toUse.append(secondCh);
               }
           }
           catch(const EndOfEntityException& toCatch)
  
  
  
  1.4       +72 -105   xml-xerces/c/src/xercesc/internal/WFXMLScanner.cpp
  
  Index: WFXMLScanner.cpp
  ===================================================================
  RCS file: /home/cvs/xml-xerces/c/src/xercesc/internal/WFXMLScanner.cpp,v
  retrieving revision 1.3
  retrieving revision 1.4
  diff -u -r1.3 -r1.4
  --- WFXMLScanner.cpp  20 Dec 2002 22:09:56 -0000      1.3
  +++ WFXMLScanner.cpp  24 Dec 2002 16:11:39 -0000      1.4
  @@ -1522,23 +1522,13 @@
       bool    firstNonWS = false;
       bool    gotLeadingSurrogate = false;
       bool    escaped;
  -    bool    charref_expanded = false;
       while (true)
       {
       try
       {
           while(true)
           {
  -            // Get another char. Use second char if one is waiting
  -            if (secondCh)
  -            {
  -                nextCh = secondCh;
  -                secondCh = 0;
  -            }
  -            else
  -            {
  -                nextCh = fReaderMgr.getNextChar();
  -            }
  +            nextCh = fReaderMgr.getNextChar();
   
               if (!nextCh)
                   ThrowXML(UnexpectedEOFException, XMLExcepts::Gen_UnexpectedEOF);
  @@ -1568,12 +1558,10 @@
                       gotLeadingSurrogate = false;
                       continue;
                   }
  -                charref_expanded = true;
               }
  -
  -            // Deal with surrogate pairs
  -            if ((nextCh >= 0xD800) && (nextCh <= 0xDBFF))
  +            else if ((nextCh >= 0xD800) && (nextCh <= 0xDBFF))
               {
  +                // Deal with surrogate pairs
                   //  Its a leading surrogate. If we already got one, then
                   //  issue an error, else set leading flag to make sure that
                   //  we look for a trailing next time.
  @@ -1605,30 +1593,24 @@
                       // Its got to at least be a valid XML character
                       else if (!fReaderMgr.getCurrentReader()->isXMLChar(nextCh))
                       {
  -                        // if it was a character reference and is control char, 
then it's ok
  -                        if (!(charref_expanded && 
fReaderMgr.getCurrentReader()->isControlChar(nextCh)))
  -                        {
  -
  -                            XMLCh tmpBuf[9];
  -                            XMLString::binToText
  -                            (
  -                                nextCh
  -                                , tmpBuf
  -                                , 8
  -                                , 16
  -                            );
  -                            emitError(XMLErrs::InvalidCharacterInAttrValue, 
attrName, tmpBuf);
  -                        }
  +                        XMLCh tmpBuf[9];
  +                        XMLString::binToText
  +                        (
  +                            nextCh
  +                            , tmpBuf
  +                            , 8
  +                            , 16
  +                        );
  +                        emitError(XMLErrs::InvalidCharacterInAttrValue, attrName, 
tmpBuf);
                       }
                   }
  -                charref_expanded = false;
                   gotLeadingSurrogate = false;
               }
   
               //  If its not escaped, then make sure its not a < character, which
               //  is not allowed in attribute values.
               if (!escaped) {
  -                             if (nextCh == chOpenAngle)
  +                if (nextCh == chOpenAngle)
                       emitError(XMLErrs::BracketInAttrValue, attrName);
                   else if (fReaderMgr.getCurrentReader()->isWhitespace(nextCh))
                       nextCh = chSpace;
  @@ -1636,6 +1618,9 @@
   
               // Else add it to the buffer
               toFill.append(nextCh);
  +
  +            if (secondCh)
  +               toFill.append(secondCh);
           }
       }
       catch(const EndOfEntityException&)
  @@ -1807,44 +1792,36 @@
       bool    escaped = false;
       bool    gotLeadingSurrogate = false;
       bool    notDone = true;
  -    bool    charref_expanded = false;
       while (notDone)
       {
           try
           {
               while (true)
               {
  -                if (secondCh)
  +                //  Eat through as many plain content characters as possible without
  +                //  needing special handling.  Moving most content characters here,
  +                //  in this one call, rather than running the overall loop once
  +                //  per content character, is a speed optimization.
  +                if (curState == State_Waiting  &&  !gotLeadingSurrogate)
                   {
  -                    nextCh = secondCh;
  -                    secondCh = 0;
  +                     fReaderMgr.movePlainContentChars(toUse);
                   }
  -                else
  -                {
  -                    //  Eat through as many plain content characters as possible 
without
  -                    //  needing special handling.  Moving most content characters 
here,
  -                    //  in this one call, rather than running the overall loop once
  -                    //  per content character, is a speed optimization.
  -                    if (curState == State_Waiting  &&  !gotLeadingSurrogate)
  -                    {
  -                         fReaderMgr.movePlainContentChars(toUse);
  -                    }
   
  -                    // Try to get another char from the source
  -                    //   The code from here on down covers all contengencies,
  -                    if (!fReaderMgr.getNextCharIfNot(chOpenAngle, nextCh))
  -                    {
  -                        // If we were waiting for a trailing surrogate, its an error
  -                        if (gotLeadingSurrogate)
  -                            emitError(XMLErrs::Expected2ndSurrogateChar);
  +                // Try to get another char from the source
  +                //   The code from here on down covers all contengencies,
  +                if (!fReaderMgr.getNextCharIfNot(chOpenAngle, nextCh))
  +                {
  +                    // If we were waiting for a trailing surrogate, its an error
  +                    if (gotLeadingSurrogate)
  +                        emitError(XMLErrs::Expected2ndSurrogateChar);
   
  -                        notDone = false;
  -                        break;
  -                    }
  +                    notDone = false;
  +                    break;
                   }
   
                   //  Watch for a reference. Note that the escapement mechanism
                   //  is ignored in this content.
  +                escaped = false;
                   if (nextCh == chAmpersand)
                   {
                       sendCharData(toUse);
  @@ -1857,42 +1834,10 @@
                           gotLeadingSurrogate = false;
                           continue;
                       }
  -                    charref_expanded = true;
  -                }
  -                else
  -                {
  -                    escaped = false;
  -                }
  -
  -                 // Keep the state machine up to date
  -                if (!escaped)
  -                {
  -                    if (nextCh == chCloseSquare)
  -                    {
  -                        if (curState == State_Waiting)
  -                            curState = State_GotOne;
  -                        else if (curState == State_GotOne)
  -                            curState = State_GotTwo;
  -                    }
  -                    else if (nextCh == chCloseAngle)
  -                    {
  -                        if (curState == State_GotTwo)
  -                            emitError(XMLErrs::BadSequenceInCharData);
  -                        curState = State_Waiting;
  -                    }
  -                    else
  -                    {
  -                        curState = State_Waiting;
  -                    }
                   }
  -                else
  -                {
  -                    curState = State_Waiting;
  -                }
  -
  -                // Deal with surrogate pairs
  -                if ((nextCh >= 0xD800) && (nextCh <= 0xDBFF))
  +                else if ((nextCh >= 0xD800) && (nextCh <= 0xDBFF))
                   {
  +                    // Deal with surrogate pairs
                       //  Its a leading surrogate. If we already got one, then
                       //  issue an error, else set leading flag to make sure that
                       //  we look for a trailing next time.
  @@ -1924,29 +1869,51 @@
                           // Its got to at least be a valid XML character
                           else if (!fReaderMgr.getCurrentReader()->isXMLChar(nextCh))
                           {
  -                            // if it was a character reference and is control char, 
then it's ok
  -                            if (!(charref_expanded && 
fReaderMgr.getCurrentReader()->isControlChar(nextCh)))
  -                            {
  -
  -                                XMLCh tmpBuf[9];
  -                                XMLString::binToText
  -                                (
  -                                    nextCh
  -                                    , tmpBuf
  -                                    , 8
  -                                    , 16
  -                                );
  -                                emitError(XMLErrs::InvalidCharacter, tmpBuf);
  -                            }
  +                            XMLCh tmpBuf[9];
  +                            XMLString::binToText
  +                            (
  +                                nextCh
  +                                , tmpBuf
  +                                , 8
  +                                , 16
  +                            );
  +                            emitError(XMLErrs::InvalidCharacter, tmpBuf);
                           }
                       }
  -                    charref_expanded = false;
                       gotLeadingSurrogate = false;
                   }
   
  +                // Keep the state machine up to date
  +                if (!escaped)
  +                {
  +                    if (nextCh == chCloseSquare)
  +                    {
  +                        if (curState == State_Waiting)
  +                            curState = State_GotOne;
  +                        else if (curState == State_GotOne)
  +                            curState = State_GotTwo;
  +                    }
  +                    else if (nextCh == chCloseAngle)
  +                    {
  +                        if (curState == State_GotTwo)
  +                            emitError(XMLErrs::BadSequenceInCharData);
  +                        curState = State_Waiting;
  +                    }
  +                    else
  +                    {
  +                        curState = State_Waiting;
  +                    }
  +                }
  +                else
  +                {
  +                    curState = State_Waiting;
  +                }
   
                   // Add this char to the buffer
                   toUse.append(nextCh);
  +
  +                if (secondCh)
  +                    toUse.append(secondCh);
               }
           }
           catch(const EndOfEntityException& toCatch)
  
  
  
  1.32      +14 -3     xml-xerces/c/src/xercesc/internal/XMLScanner.cpp
  
  Index: XMLScanner.cpp
  ===================================================================
  RCS file: /home/cvs/xml-xerces/c/src/xercesc/internal/XMLScanner.cpp,v
  retrieving revision 1.31
  retrieving revision 1.32
  diff -u -r1.31 -r1.32
  --- XMLScanner.cpp    23 Dec 2002 19:34:37 -0000      1.31
  +++ XMLScanner.cpp    24 Dec 2002 16:11:39 -0000      1.32
  @@ -1725,16 +1725,27 @@
       }
   
       // Return the char (or chars)
  -    if (value >= 0x10000)
  +    // And check if the character expanded is valid or not
  +    if (value >= 0x10000 && value <= 0x10FFFF)
       {
           value -= 0x10000;
           toFill = XMLCh((value >> 10) + 0xD800);
           second = XMLCh((value & 0x3FF) + 0xDC00);
       }
  -    else
  +    else if (value <= 0xFFFD)
       {
           toFill = XMLCh(value);
           second = 0;
  +        if (!fReaderMgr.getCurrentReader()->isXMLChar(toFill) && 
!fReaderMgr.getCurrentReader()->isControlChar(toFill)) {
  +            // Character reference was not in the valid range
  +            emitError(XMLErrs::InvalidCharacterRef);
  +            return false;
  +        }
  +    }
  +    else {
  +        // Character reference was not in the valid range
  +        emitError(XMLErrs::InvalidCharacterRef);
  +        return false;
       }
   
       return true;
  
  
  

---------------------------------------------------------------------
To unsubscribe, e-mail: [EMAIL PROTECTED]
For additional commands, e-mail: [EMAIL PROTECTED]

Reply via email to