tng         2002/12/24 08:12:19

  Modified:    c/src/xercesc/validators/DTD DTDScanner.cpp
  Log:
  For performance reason, move the character check to scancharref.
  
  Revision  Changes    Path
  1.20      +57 -70    xml-xerces/c/src/xercesc/validators/DTD/DTDScanner.cpp
  
  Index: DTDScanner.cpp
  ===================================================================
  RCS file: /home/cvs/xml-xerces/c/src/xercesc/validators/DTD/DTDScanner.cpp,v
  retrieving revision 1.19
  retrieving revision 1.20
  diff -u -r1.19 -r1.20
  --- DTDScanner.cpp    20 Dec 2002 22:10:47 -0000      1.19
  +++ DTDScanner.cpp    24 Dec 2002 16:12:19 -0000      1.20
  @@ -56,6 +56,9 @@
   
   /*
    * $Log$
  + * Revision 1.20  2002/12/24 16:12:19  tng
  + * For performance reason, move the character check to scancharref.
  + *
    * Revision 1.19  2002/12/20 22:10:47  tng
    * XML 1.1
    *
  @@ -918,23 +921,13 @@
       bool    firstNonWS = false;
       bool    gotLeadingSurrogate = false;
       bool    escaped;
  -    bool    charref_expanded = false;
       while (true)
       {
       try
       {
           while(true)
           {
  -            // Get another char. Use second char from prevous is its there
  -            if (secondCh)
  -            {
  -                nextCh = secondCh;
  -                secondCh = 0;
  -            }
  -             else
  -            {
  -                nextCh = fReaderMgr->getNextChar();
  -            }
  +            nextCh = fReaderMgr->getNextChar();
   
               if (!nextCh)
                   ThrowXML(UnexpectedEOFException, XMLExcepts::Gen_UnexpectedEOF);
  @@ -966,12 +959,10 @@
                       gotLeadingSurrogate = false;
                       continue;
                   }
  -                charref_expanded = true;
               }
  -
  -            // Check for correct surrogate pairs
  -            if ((nextCh >= 0xD800) && (nextCh <= 0xDBFF))
  +            else if ((nextCh >= 0xD800) && (nextCh <= 0xDBFF))
               {
  +                // Check for correct surrogate pairs
                   if (gotLeadingSurrogate)
                       fScanner->emitError(XMLErrs::Expected2ndSurrogateChar);
                   else
  @@ -985,29 +976,25 @@
                           fScanner->emitError(XMLErrs::Expected2ndSurrogateChar);
                   }
                   // Its got to at least be a valid XML character
  -                else if (!fReaderMgr->getCurrentReader()->isXMLChar(nextCh)) {
  -                    // if it was a character reference and is control char, then 
it's ok
  -                    if (!(charref_expanded && 
fReaderMgr->getCurrentReader()->isControlChar(nextCh)))
  -                    {
  -                        XMLCh tmpBuf[9];
  -                        XMLString::binToText
  -                        (
  -                            nextCh
  -                            , tmpBuf
  -                            , 8
  -                            , 16
  -                        );
  -                        fScanner->emitError
  -                        (
  -                            XMLErrs::InvalidCharacterInAttrValue
  -                            , attrName
  -                            , tmpBuf
  -                        );
  -                    }
  +                else if (!fReaderMgr->getCurrentReader()->isXMLChar(nextCh))
  +                {
  +                    XMLCh tmpBuf[9];
  +                    XMLString::binToText
  +                    (
  +                        nextCh
  +                        , tmpBuf
  +                        , 8
  +                        , 16
  +                    );
  +                    fScanner->emitError
  +                    (
  +                        XMLErrs::InvalidCharacterInAttrValue
  +                        , attrName
  +                        , tmpBuf
  +                    );
                   }
   
                   gotLeadingSurrogate = false;
  -                charref_expanded = false;
               }
   
               //
  @@ -1063,6 +1050,9 @@
   
               // Else add it to the buffer
               toFill.append(nextCh);
  +
  +            if (secondCh)
  +               toFill.append(secondCh);
           }
       }
   
  @@ -1164,17 +1154,29 @@
       }
   
       // Return the char (or chars)
  -    if (value >= 0x10000)
  +    // And check if the character expanded is valid or not
  +    if (value >= 0x10000 && value <= 0x10FFFF)
       {
           value -= 0x10000;
  -        first = XMLCh((value >> 10) + 0xD800);
  +        first  = XMLCh((value >> 10) + 0xD800);
           second = XMLCh((value & 0x3FF) + 0xDC00);
       }
  -     else
  +    else if (value <= 0xFFFD)
       {
  -        first = XMLCh(value);
  +        first  = XMLCh(value);
           second = 0;
  +        if (!fReaderMgr->getCurrentReader()->isXMLChar(first) && 
!fReaderMgr->getCurrentReader()->isControlChar(first)) {
  +            // Character reference was not in the valid range
  +            fScanner->emitError(XMLErrs::InvalidCharacterRef);
  +            return false;
  +        }
       }
  +    else {
  +        // Character reference was not in the valid range
  +        fScanner->emitError(XMLErrs::InvalidCharacterRef);
  +        return false;
  +    }
  +
       return true;
   }
   
  @@ -2156,19 +2158,9 @@
       XMLCh   nextCh;
       XMLCh   secondCh = 0;
       bool    gotLeadingSurrogate = false;
  -    bool    charref_expanded = false;
       while (true)
       {
  -        // Get the second char if we have one, else get another
  -        if (secondCh)
  -        {
  -            nextCh = secondCh;
  -            secondCh = 0;
  -        }
  -         else
  -        {
  -            nextCh = fReaderMgr->getNextChar();
  -        }
  +        nextCh = fReaderMgr->getNextChar();
   
           //
           //  Watch specifically for EOF and issue a more meaningful error
  @@ -2226,8 +2218,6 @@
                       gotLeadingSurrogate = false;
                       continue;
                   }
  -                charref_expanded = true;
  -
               }
                else
               {
  @@ -2262,8 +2252,7 @@
                   gotLeadingSurrogate = false;
               }
           }
  -
  -        if ((nextCh >= 0xD800) && (nextCh <= 0xDBFF))
  +        else if ((nextCh >= 0xD800) && (nextCh <= 0xDBFF))
           {
               if (gotLeadingSurrogate)
                   fScanner->emitError(XMLErrs::Expected2ndSurrogateChar);
  @@ -2279,28 +2268,26 @@
               }
                else if (!fReaderMgr->getCurrentReader()->isXMLChar(nextCh))
               {
  -                // if it was a character reference and is control char, then it's ok
  -                if (!(charref_expanded && 
fReaderMgr->getCurrentReader()->isControlChar(nextCh)))
  -                {
  -                    XMLCh tmpBuf[9];
  -                    XMLString::binToText
  -                    (
  -                        nextCh
  -                        , tmpBuf
  -                        , 8
  -                        , 16
  -                    );
  -                    fScanner->emitError(XMLErrs::InvalidCharacter, tmpBuf);
  -                    fReaderMgr->skipPastChar(quoteCh);
  -                    return false;
  -                }
  +                XMLCh tmpBuf[9];
  +                XMLString::binToText
  +                (
  +                    nextCh
  +                    , tmpBuf
  +                    , 8
  +                    , 16
  +                );
  +                fScanner->emitError(XMLErrs::InvalidCharacter, tmpBuf);
  +                fReaderMgr->skipPastChar(quoteCh);
  +                return false;
               }
  -            charref_expanded = false;
               gotLeadingSurrogate = false;
           }
   
           // Looks ok, so add it to the literal
           toFill.append(nextCh);
  +
  +        if (secondCh)
  +            toFill.append(secondCh);
       }
   
       //
  
  
  

---------------------------------------------------------------------
To unsubscribe, e-mail: [EMAIL PROTECTED]
For additional commands, e-mail: [EMAIL PROTECTED]

Reply via email to