peiyongz    2004/06/14 08:18:53

  Modified:    c/src/xercesc/internal ReaderMgr.cpp ReaderMgr.hpp
                        XMLReader.cpp XMLReader.hpp XMLScanner.cpp
                        XMLScanner.hpp
  Log:
  Consolidated End Of Line Handling
  
  Revision  Changes    Path
  1.25      +3 -27     xml-xerces/c/src/xercesc/internal/ReaderMgr.cpp
  
  Index: ReaderMgr.cpp
  ===================================================================
  RCS file: /home/cvs/xml-xerces/c/src/xercesc/internal/ReaderMgr.cpp,v
  retrieving revision 1.24
  retrieving revision 1.25
  diff -u -r1.24 -r1.25
  --- ReaderMgr.cpp     3 Jun 2004 15:38:27 -0000       1.24
  +++ ReaderMgr.cpp     14 Jun 2004 15:18:52 -0000      1.25
  @@ -286,7 +286,7 @@
   }
   
   
  -bool ReaderMgr::skipPastSpaces()
  +bool ReaderMgr::skipPastSpaces(bool inDecl)
   {
       bool skippedSomething = false;
       bool tmpFlag;
  @@ -297,31 +297,7 @@
           //  it hit a non-space, break out. Else we have to pop another entity
           //  and keep going.
           //
  -        if (fCurReader->skipSpaces(tmpFlag))
  -            break;
  -
  -        if (tmpFlag)
  -            skippedSomething = true;
  -
  -        // Try to pop another enitity. If we can't then we are done
  -        if (!popReader())
  -            break;
  -    }
  -    return (tmpFlag || skippedSomething);
  -}
  -
  -bool ReaderMgr::skipPastSpacesInDecl()
  -{
  -    bool skippedSomething = false;
  -    bool tmpFlag;
  -    while (true)
  -    {
  -        //
  -        //  Skip all the spaces in the current reader. If it returned because
  -        //  it hit a non-space, break out. Else we have to pop another entity
  -        //  and keep going.
  -        //
  -        if (fCurReader->skipSpacesInDecl(tmpFlag))
  +        if (fCurReader->skipSpaces(tmpFlag, inDecl))
               break;
   
           if (tmpFlag)
  
  
  
  1.13      +7 -2      xml-xerces/c/src/xercesc/internal/ReaderMgr.hpp
  
  Index: ReaderMgr.hpp
  ===================================================================
  RCS file: /home/cvs/xml-xerces/c/src/xercesc/internal/ReaderMgr.hpp,v
  retrieving revision 1.12
  retrieving revision 1.13
  diff -u -r1.12 -r1.13
  --- ReaderMgr.hpp     3 Jun 2004 15:38:27 -0000       1.12
  +++ ReaderMgr.hpp     14 Jun 2004 15:18:52 -0000      1.13
  @@ -56,6 +56,9 @@
   
   /*
    * $Log$
  + * Revision 1.13  2004/06/14 15:18:52  peiyongz
  + * Consolidated End Of Line Handling
  + *
    * Revision 1.12  2004/06/03 15:38:27  peiyongz
    * XML1.1:  The characters #x85 and #x2028 cannot be reliably recognized
    * and translated until an entity's encoding declaration (if present) has been
  @@ -223,8 +226,7 @@
       XMLCh peekNextChar();
       bool skipIfQuote(XMLCh& chGotten);
       void skipPastChar(const XMLCh toSkip);
  -    bool skipPastSpaces();
  -    bool skipPastSpacesInDecl();
  +    bool skipPastSpaces(bool inDecl = false);
       void skipToChar(const XMLCh toSkipTo);
       bool skippedChar(const XMLCh toSkip);
       bool skippedSpace();
  
  
  
  1.22      +103 -230  xml-xerces/c/src/xercesc/internal/XMLReader.cpp
  
  Index: XMLReader.cpp
  ===================================================================
  RCS file: /home/cvs/xml-xerces/c/src/xercesc/internal/XMLReader.cpp,v
  retrieving revision 1.21
  retrieving revision 1.22
  diff -u -r1.21 -r1.22
  --- XMLReader.cpp     4 Jun 2004 14:31:15 -0000       1.21
  +++ XMLReader.cpp     14 Jun 2004 15:18:52 -0000      1.22
  @@ -72,7 +72,6 @@
   #include <xercesc/util/XMLString.hpp>
   #include <xercesc/util/Janitor.hpp>
   
  -
   XERCES_CPP_NAMESPACE_BEGIN
   
   // ---------------------------------------------------------------------------
  @@ -695,42 +694,7 @@
                   // Eat this char
                   fCharIndex++;
   
  -                //
  -                //  Ok, we've got some whitespace here. So we have to store
  -                //  it. But we have to normalize it and update the line and
  -                //  column info along the way.
  -                //
  -                if (curCh == chCR)
  -                {
  -                    fCurCol = 1;
  -                    fCurLine++;
  -
  -                    //
  -                    //  If not already internalized, then convert it to an
  -                    //  LF and eat any following LF.
  -                    //
  -                    if (fSource == Source_External)
  -                    {
  -                        if ((fCharIndex < fCharsAvail) || refreshCharBuffer())
  -                        {
  -                            if (fCharBuf[fCharIndex] == chLF
  -                                || ((fCharBuf[fCharIndex] == chNEL) && fNEL))
  -                                fCharIndex++;
  -                        }
  -                        curCh = chLF;
  -                    }
  -                }
  -                 else if (curCh == chLF
  -                          || ((curCh == chNEL || curCh == chLineSeparator) && fNEL))
  -                {
  -                    curCh = chLF;
  -                    fCurCol = 1;
  -                    fCurLine++;
  -                }
  -                 else
  -                {
  -                    fCurCol++;
  -                }
  +                handleEOL(curCh, false);
   
                   // Ok we can add this guy to our buffer
                   toFill.append(curCh);
  @@ -773,42 +737,7 @@
                   // Eat this char
                   fCharIndex++;
   
  -                //
  -                //  Ok, we've got some whitespace here. So we have to store
  -                //  it. But we have to normalize it and update the line and
  -                //  column info along the way.
  -                //
  -                if (curCh == chCR)
  -                {
  -                    fCurCol = 1;
  -                    fCurLine++;
  -
  -                    //
  -                    //  If not already internalized, then convert it to an
  -                    //  LF and eat any following LF.
  -                    //
  -                    if (fSource == Source_External)
  -                    {
  -                        if ((fCharIndex < fCharsAvail) || refreshCharBuffer())
  -                        {
  -                            if (fCharBuf[fCharIndex] == chLF
  -                                || ((fCharBuf[fCharIndex] == chNEL) && fNEL))
  -                                fCharIndex++;
  -                        }
  -                        curCh = chLF;
  -                    }
  -                }
  -                 else if (curCh == chLF
  -                          || ((curCh == chNEL || curCh == chLineSeparator) && fNEL))
  -                {
  -                    curCh = chLF;
  -                    fCurCol = 1;
  -                    fCurLine++;
  -                }
  -                 else
  -                {
  -                    fCurCol++;
  -                }
  +                handleEOL(curCh, false);
   
                   // Add it to our buffer
                   toFill.append(curCh);
  @@ -853,89 +782,7 @@
   }
   
   
  -bool XMLReader::skipSpaces(bool& skippedSomething)
  -{
  -    // Remember the current line and column
  -    XMLSSize_t    orgLine = fCurLine;
  -    XMLSSize_t    orgCol  = fCurCol;
  -
  -    //  We enter a loop where we skip over spaces until we hit the end of
  -    //  this reader or a non-space value. The return indicates whether we
  -    //  hit the non-space (true) or the end (false).
  -    while (true)
  -    {
  -        // Loop through the current chars in the buffer
  -        while (fCharIndex < fCharsAvail)
  -        {
  -            //  See if its a white space char. If so, then process it. Else
  -            //  we've hit a non-space and need to return.
  -            if (isWhitespace(fCharBuf[fCharIndex]))
  -            {
  -                // Get the current char out of the buffer and eat it
  -                XMLCh curCh = fCharBuf[fCharIndex++];
  -
  -                //  Ok, we've got some whitespace here. So we have to store
  -                //  it. But we have to normalize it and update the line and
  -                //  column info along the way.
  -                if (curCh == chCR)
  -                {
  -                    fCurCol = 1;
  -                    fCurLine++;
  -
  -                    //  If not already internalized, then convert it to an
  -                    //  LF and eat any following LF.
  -                    if (fSource == Source_External)
  -                    {
  -                        if ((fCharIndex < fCharsAvail) || refreshCharBuffer())
  -                        {
  -                            if (fCharBuf[fCharIndex] == chLF
  -                                || ((fCharBuf[fCharIndex] == chNEL) && fNEL))
  -                                fCharIndex++;
  -                        }
  -                    }
  -                }
  -                else if (curCh == chLF
  -                         || ((curCh == chNEL || curCh == chLineSeparator) && fNEL))
  -                {
  -                    fCurCol = 1;
  -                    fCurLine++;
  -                }
  -                else
  -                {
  -                    fCurCol++;
  -                }
  -            }
  -            else
  -            {
  -                skippedSomething = (orgLine != fCurLine) || (orgCol != fCurCol);
  -                return true;
  -            }
  -        }
  -
  -        //  We've eaten up the current buffer, so lets try to reload it. If
  -        //  we don't get anything new, then break out. If we do, then we go
  -        //  back to the top to keep getting spaces.
  -        if (!refreshCharBuffer())
  -            break;
  -    }
  -
  -    // We never hit any non-space and ate up the whole reader
  -    skippedSomething = (orgLine != fCurLine) || (orgCol != fCurCol);
  -    return false;
  -}
  -
  -/***
  - * XML1.1
  - *
  - * 2.11 End-of-Line Handling
  - *  ...
  - *   The characters #x85 and #x2028 cannot be reliably recognized and translated 
  - *   until an entity's encoding declaration (if present) has been read. 
  - *   Therefore, it is a fatal error to use them within the XML declaration or 
  - *   text declaration. 
  - *
  -***/
  -bool XMLReader::skipSpacesInDecl(bool& skippedSomething)
  +bool XMLReader::skipSpaces(bool& skippedSomething, bool inDecl)
   {
       // Remember the current line and column
       XMLSSize_t    orgLine = fCurLine;
  @@ -956,56 +803,8 @@
                   // Get the current char out of the buffer and eat it
                   XMLCh curCh = fCharBuf[fCharIndex++];
   
  -                //  Ok, we've got some whitespace here. So we have to store
  -                //  it. But we have to normalize it and update the line and
  -                //  column info along the way.
  -                if (curCh == chCR)
  -                {
  -                    fCurCol = 1;
  -                    fCurLine++;
  +                handleEOL(curCh, inDecl);
   
  -                    //  If not already internalized, then convert it to an
  -                    //  LF and eat any following LF.
  -                    if (fSource == Source_External)
  -                    {
  -                        if ((fCharIndex < fCharsAvail) || refreshCharBuffer())
  -                        {
  -                            if (fCharBuf[fCharIndex] == chLF
  -                                || ((fCharBuf[fCharIndex] == chNEL) && fNEL))
  -                                fCharIndex++;
  -                        }
  -                    }
  -                }
  -                else if (curCh == chLF)                   
  -                {
  -                    fCurCol = 1;
  -                    fCurLine++;
  -                }
  -                else if (curCh == chNEL || curCh == chLineSeparator)
  -                {
  -                    if (fXMLVersion == XMLV1_1)
  -                    {
  -                        ThrowXMLwithMemMgr1
  -                        (
  -                            TranscodingException
  -                          , XMLExcepts::Reader_NelLsepinDecl
  -                          , fSystemId
  -                          , fMemoryManager
  -                        );
  -                    }
  -                    else //XMLV1_0
  -                    {
  -                        if (fNEL)
  -                        {
  -                            fCurCol = 1;
  -                            fCurLine++;
  -                        }
  -                    }
  -                }
  -                else
  -                {
  -                    fCurCol++;
  -                }
               }
               else
               {
  @@ -1074,31 +873,8 @@
           // Eat the character
           fCharIndex++;
   
  -        if (curCh == chCR)
  -        {
  -            fCurLine++;
  -            fCurCol = 1;
  +        handleEOL((XMLCh&)curCh, false);
   
  -            if (fSource == Source_External)
  -            {
  -                if ((fCharIndex < fCharsAvail) || refreshCharBuffer())
  -                {
  -                    if (fCharBuf[fCharIndex] == chLF
  -                        || ((fCharBuf[fCharIndex] == chNEL) && fNEL))
  -                        fCharIndex++;
  -                }
  -            }
  -        }
  -         else if (curCh == chLF
  -                  || ((curCh == chNEL || curCh == chLineSeparator) && fNEL))
  -        {
  -            fCurLine++;
  -            fCurCol = 1;
  -        }
  -         else
  -        {
  -            fCurCol++;
  -        }
           return true;
       }
       return false;
  @@ -1723,6 +1499,103 @@
       fRawBufIndex += bytesEaten;
   
       return charsDone;
  +}
  +
  +/***
  + *
  + * XML1.1
  + *
  + * 2.11 End-of-Line Handling
  + *
  + *    XML parsed entities are often stored in computer files which, for editing 
  + *    convenience, are organized into lines. These lines are typically separated 
  + *    by some combination of the characters CARRIAGE RETURN (#xD) and LINE FEED 
(#xA).
  + *
  + *    To simplify the tasks of applications, the XML processor MUST behave as if 
  + *    it normalized all line breaks in external parsed entities (including the 
document 
  + *    entity) on input, before parsing, by translating all of the following to a 
single 
  + *    #xA character:
  + *
  + *  1. the two-character sequence #xD #xA
  + *  2. the two-character sequence #xD #x85
  + *  3. the single character #x85
  + *  4. the single character #x2028
  + *  5. any #xD character that is not immediately followed by #xA or #x85.
  + *
  + *
  + ***/
  +inline void XMLReader::handleEOL(XMLCh& curCh, bool inDecl)
  +{
  +    // 1. the two-character sequence #xD #xA
  +    // 2. the two-character sequence #xD #x85
  +    // 5. any #xD character that is not immediately followed by #xA or #x85.
  +    if (curCh == chCR)
  +    {
  +        fCurCol = 1;
  +        fCurLine++;
  +
  +        //
  +        //  If not already internalized, then convert it to an
  +        //  LF and eat any following LF.
  +        //
  +        if (fSource == Source_External)
  +        {
  +            if ((fCharIndex < fCharsAvail) || refreshCharBuffer())
  +            {
  +                if ( fCharBuf[fCharIndex] == chLF              || 
  +                    ((fCharBuf[fCharIndex] == chNEL) && fNEL)  )
  +                {
  +                    fCharIndex++;
  +                }
  +            }
  +            curCh = chLF;
  +        }
  +    }
  +    else if (curCh == chLF)                   
  +    {
  +        fCurCol = 1;
  +        fCurLine++;
  +    }
  +    // 3. the single character #x85
  +    // 4. the single character #x2028
  +    else if (curCh == chNEL || curCh == chLineSeparator)
  +    {
  +        if (inDecl && fXMLVersion == XMLV1_1)
  +        {
  +
  +        /***
  +         * XML1.1
  +         *
  +         * 2.11 End-of-Line Handling
  +         *  ...
  +         *   The characters #x85 and #x2028 cannot be reliably recognized and 
translated 
  +         *   until an entity's encoding declaration (if present) has been read. 
  +         *   Therefore, it is a fatal error to use them within the XML declaration 
or 
  +         *   text declaration. 
  +         *
  +         ***/
  +            ThrowXMLwithMemMgr1
  +                (
  +                TranscodingException
  +                , XMLExcepts::Reader_NelLsepinDecl
  +                , fSystemId
  +                , fMemoryManager
  +                );
  +        }
  +
  +        if (fNEL && fSource == Source_External)
  +        {
  +            fCurCol = 1;
  +            fCurLine++;
  +            curCh = chLF;
  +        }
  +    }
  +    else
  +    {
  +        fCurCol++;
  +    }
  +
  +    return;
   }
   
   XERCES_CPP_NAMESPACE_END
  
  
  
  1.17      +16 -105   xml-xerces/c/src/xercesc/internal/XMLReader.hpp
  
  Index: XMLReader.hpp
  ===================================================================
  RCS file: /home/cvs/xml-xerces/c/src/xercesc/internal/XMLReader.hpp,v
  retrieving revision 1.16
  retrieving revision 1.17
  diff -u -r1.16 -r1.17
  --- XMLReader.hpp     3 Jun 2004 15:38:27 -0000       1.16
  +++ XMLReader.hpp     14 Jun 2004 15:18:53 -0000      1.17
  @@ -56,6 +56,9 @@
   
   /*
    * $Log$
  + * Revision 1.17  2004/06/14 15:18:53  peiyongz
  + * Consolidated End Of Line Handling
  + *
    * Revision 1.16  2004/06/03 15:38:27  peiyongz
    * XML1.1:  The characters #x85 and #x2028 cannot be reliably recognized
    * and translated until an entity's encoding declaration (if present) has been
  @@ -333,8 +336,7 @@
       bool getUpToCharOrWS(XMLBuffer& toFill, const XMLCh toCheck);
       bool peekNextChar(XMLCh& chGotten);
       bool skipIfQuote(XMLCh& chGotten);
  -    bool skipSpaces(bool& skippedSomething);
  -    bool skipSpacesInDecl(bool& skippedSomething);
  +    bool skipSpaces(bool& skippedSomething, bool inDecl = false);
       bool skippedChar(const XMLCh toSkip);
       bool skippedSpace();
       bool skippedString(const XMLCh* const toSkip);
  @@ -428,6 +430,11 @@
           , const unsigned int            maxChars
       );
   
  +    inline void handleEOL
  +    (
  +              XMLCh&   curCh
  +            , bool     inDecl = false
  +    );
   
       // -----------------------------------------------------------------------
       //  Data members
  @@ -801,62 +808,8 @@
       chGotten = fCharBuf[fCharIndex++];
   
       // Handle end of line normalization and line/col member maintenance.
  -    if (chGotten == chCR)
  -    {
  -        //
  -        //  Do the normalization. We return chLF regardless of which was
  -        //  found. We also eat a chCR followed by an chLF.
  -        //
  -        //  We only do this if the content being spooled is not already
  -        //  internalized.
  -        //
  -        if (fSource == Source_External)
  -        {
  -            //
  -            //  See if we have another char left. If not, don't bother.
  -            //  Else, see if its an chLF to eat. If it is, bump the
  -            //  index again.
  -            //
  -            if (fCharIndex < fCharsAvail)
  -            {
  -                if (fCharBuf[fCharIndex] == chLF
  -                    || ((fCharBuf[fCharIndex] == chNEL) && fNEL))
  -                    fCharIndex++;
  -            }
  -             else
  -            {
  -                if (refreshCharBuffer())
  -                {
  -                    if (fCharBuf[fCharIndex] == chLF
  -                        || ((fCharBuf[fCharIndex] == chNEL) && fNEL))
  -                        fCharIndex++;
  -                }
  -            }
  -
  -            // And return just an chLF
  -            chGotten = chLF;
  -        }
  -
  -        // And handle the line/col stuff
  -        fCurCol = 1;
  -        fCurLine++;
  -    }
  -     else if (chGotten == chLF
  -              || ((chGotten == chNEL || chGotten == chLineSeparator) && fNEL))
  -    {
  -        chGotten = chLF;
  -        fCurLine++;
  -        fCurCol = 1;
  -    }
  -     else if (chGotten)
  -    {
  -        //
  -        //  Only do this is not a null char. Null chars are not part of the
  -        //  real content. They are just marker characters inserted into
  -        //  the stream.
  -        //
  -        fCurCol++;
  -    }
  +    handleEOL(chGotten, false);
  +
       return true;
   }
   
  @@ -883,53 +836,8 @@
       chGotten = fCharBuf[fCharIndex++];
   
       // Handle end of line normalization and line/col member maintenance.
  -    if (chGotten == chCR)
  -    {
  -        //
  -        //  Do the normalization. We return chLF regardless of which was
  -        //  found. We also eat a chCR followed by an chLF.
  -        //
  -        //  We only do this if the content being spooled is not already
  -        //  internalized.
  -        //
  -        if (fSource == Source_External)
  -        {
  -            //
  -            //  See if we have another char left. If not, don't bother.
  -            //  Else, see if its an chLF to eat. If it is, bump the
  -            //  index again.
  -            //
  -            if ((fCharIndex < fCharsAvail) || refreshCharBuffer())
  -            {
  -                if (fCharBuf[fCharIndex] == chLF
  -                    || ((fCharBuf[fCharIndex] == chNEL) && fNEL))
  -                    fCharIndex++;
  -            }
  -
  -            // And return just an chLF
  -            chGotten = chLF;
  -        }
  -
  -        // And handle the line/col stuff
  -        fCurCol = 1;
  -        fCurLine++;
  -    }
  -     else if (chGotten == chLF
  -              || ((chGotten == chNEL || chGotten == chLineSeparator) && fNEL))
  -    {
  -        chGotten = chLF;
  -        fCurLine++;
  -        fCurCol = 1;
  -    }
  -     else if (chGotten)
  -    {
  -        //
  -        //  Only do this is not a null char. Null chars are not part of the
  -        //  real content. They are just marker characters inserted into
  -        //  the stream.
  -        //
  -        fCurCol++;
  -    }
  +    handleEOL(chGotten, false);
  +
       return true;
   }
   
  
  
  
  1.67      +6 -6      xml-xerces/c/src/xercesc/internal/XMLScanner.cpp
  
  Index: XMLScanner.cpp
  ===================================================================
  RCS file: /home/cvs/xml-xerces/c/src/xercesc/internal/XMLScanner.cpp,v
  retrieving revision 1.66
  retrieving revision 1.67
  diff -u -r1.66 -r1.67
  --- XMLScanner.cpp    3 Jun 2004 15:38:27 -0000       1.66
  +++ XMLScanner.cpp    14 Jun 2004 15:18:53 -0000      1.67
  @@ -1340,7 +1340,7 @@
       while (true)
       {
           // Skip any spaces
  -        const unsigned int spaceCount = fReaderMgr.skipPastSpacesInDecl();
  +        const unsigned int spaceCount = fReaderMgr.skipPastSpaces(true);
   
           // If we are looking at a question mark, then break out
           if (fReaderMgr.lookingAtChar(chQuestion))
  @@ -1375,7 +1375,7 @@
   
           //  Scan for an equal's sign. If we don't find it, issue an error
           //  but keep trying to go on.
  -        if (!scanEq())
  +        if (!scanEq(true))
               emitError(XMLErrs::ExpectedEqSign);
   
           //  Get a quote string into the buffer for the string that we are
  @@ -2168,12 +2168,12 @@
   
   //  Most equal signs can have white space around them, so this little guy
   //  just makes the calling code cleaner by eating whitespace.
  -bool XMLScanner::scanEq()
  +bool XMLScanner::scanEq(bool inDecl)
   {
  -    fReaderMgr.skipPastSpacesInDecl();
  +    fReaderMgr.skipPastSpaces(inDecl);
       if (fReaderMgr.skippedChar(chEqual))
       {
  -        fReaderMgr.skipPastSpacesInDecl();
  +        fReaderMgr.skipPastSpaces(inDecl);
           return true;
       }
       return false;
  
  
  
  1.36      +4 -1      xml-xerces/c/src/xercesc/internal/XMLScanner.hpp
  
  Index: XMLScanner.hpp
  ===================================================================
  RCS file: /home/cvs/xml-xerces/c/src/xercesc/internal/XMLScanner.hpp,v
  retrieving revision 1.35
  retrieving revision 1.36
  diff -u -r1.35 -r1.36
  --- XMLScanner.hpp    13 Apr 2004 18:57:54 -0000      1.35
  +++ XMLScanner.hpp    14 Jun 2004 15:18:53 -0000      1.36
  @@ -56,6 +56,9 @@
   
   /*
    * $Log$
  + * Revision 1.36  2004/06/14 15:18:53  peiyongz
  + * Consolidated End Of Line Handling
  + *
    * Revision 1.35  2004/04/13 18:57:54  peiyongz
    * Unrelavant comment removal
    *
  @@ -734,7 +737,7 @@
       // -----------------------------------------------------------------------
       bool scanCharRef(XMLCh& toFill, XMLCh& second);
       void scanComment();
  -    bool scanEq();
  +    bool scanEq(bool inDecl = false);
       void scanMiscellaneous();
       void scanPI();
       void scanProlog();
  
  
  

---------------------------------------------------------------------
To unsubscribe, e-mail: [EMAIL PROTECTED]
For additional commands, e-mail: [EMAIL PROTECTED]

Reply via email to