dbertoni    01/01/08 10:24:32

  Modified:    c/src/XMLSupport FormatterToHTML.cpp FormatterToHTML.hpp
                        FormatterToXML.cpp FormatterToXML.hpp
  Log:
  Cleaned up some unused code and moved common code into a function.
  
  Revision  Changes    Path
  1.43      +166 -74   xml-xalan/c/src/XMLSupport/FormatterToHTML.cpp
  
  Index: FormatterToHTML.cpp
  ===================================================================
  RCS file: /home/cvs/xml-xalan/c/src/XMLSupport/FormatterToHTML.cpp,v
  retrieving revision 1.42
  retrieving revision 1.43
  diff -u -r1.42 -r1.43
  --- FormatterToHTML.cpp       2001/01/04 19:22:37     1.42
  +++ FormatterToHTML.cpp       2001/01/08 18:24:30     1.43
  @@ -79,6 +79,7 @@
   #include <PlatformSupport/DOMStringHelper.hpp>
   #include <PlatformSupport/Writer.hpp>
   #include <PlatformSupport/XalanUnicode.hpp>
  +#include <PlatformSupport/XalanXMLChar.hpp>
   
   
   
  @@ -226,7 +227,7 @@
                        int                                             indent,
                        const XalanDOMString&   version,
                        const XalanDOMString&   standalone,
  -                     bool xmlDecl) :
  +                     bool                                    xmlDecl) :
        FormatterToXML(
                        writer,
                        version,
  @@ -243,7 +244,8 @@
        m_inBlockElem(false),
        m_isRawStack(),
        m_isScriptOrStyleElem(false),
  -     m_isFirstElem(true)
  +     m_escapeURLs(false),
  +     m_elementLevel(0)
   {
        initCharsMap();
   }
  @@ -326,6 +328,12 @@
   void
   FormatterToHTML::startDocument()
   {
  +     // Clear the buffer, just in case...
  +     clear(m_stringBuffer);
  +
  +     // Reset this, just in case...
  +     m_elementLevel = 0;
  +
       m_startNewLine = false;
        m_shouldWriteXMLHeader = false;
   
  @@ -375,7 +383,7 @@
   void
   FormatterToHTML::endDocument()
   {
  -     m_isFirstElem = true;
  +     assert(m_elementLevel == 0);
   
        FormatterToXML::endDocument();
   }
  @@ -404,7 +412,7 @@
                m_ispreserve = false;
        }
       else if(m_doIndent &&
  -                     m_isFirstElem == false &&
  +                     m_elementLevel > 0 &&
                        (m_inBlockElem == false || isBlockElement == true))
       {
                m_startNewLine = true;
  @@ -434,8 +442,6 @@
       
       m_isprevtext = false;
   
  -     m_isFirstElem = false;
  -
        if (isHeadElement)
       {
         writeParentTagEnd();
  @@ -448,6 +454,11 @@
         accumContent(XalanUnicode::charQuoteMark);
         accumContent(XalanUnicode::charGreaterThanSign);
       }
  +
  +     // Increment the level...
  +     ++m_elementLevel;
  +
  +     assert(m_elementLevel > 0);
   }
   
   
  @@ -523,9 +534,12 @@
                }
       }
   
  -     m_isFirstElem = false;
  -
       m_isprevtext = false;
  +
  +     // Decrement the level...
  +     --m_elementLevel;
  +
  +     assert(m_elementLevel >= 0);
   }
   
   
  @@ -668,10 +682,10 @@
   
                // If outside of an element, then put in a new line.  This 
whitespace
                // is not significant.
  -//           if (m_elemStack.empty() == true)
  -//           {
  -//                   outputLineSep();
  -//           }
  +             if (m_elementLevel == 0)
  +             {
  +                     outputLineSep();
  +             }
   
                m_startNewLine = true;
        }
  @@ -811,7 +825,7 @@
                {
                        accumContent(ch); // no escaping in this case, as 
specified in 15.2
                }
  -             else if (accumDefaultEntity(ch, i, string, strLen, false) == 
false)
  +             else if (accumDefaultEntity(ch, i, string, strLen, true) == 
false)
                {
                        if (0xd800 <= ch && ch < 0xdc00) 
                        {
  @@ -837,7 +851,10 @@
   
                                accumContent(XalanUnicode::charAmpersand);
                                accumContent(XalanUnicode::charNumberSign);
  -                             accumContent(UnsignedLongToDOMString(next));
  +
  +                             accumContent(UnsignedLongToDOMString(next, 
m_stringBuffer));
  +                             clear(m_stringBuffer);
  +
                                accumContent(XalanUnicode::charSemicolon);
                        }
                        else if(ch >= 160 && ch <= 255)
  @@ -933,21 +950,6 @@
   
   
   void
  -FormatterToHTML::copyEntityIntoBuffer(const XalanDOMCharVectorType&          
s)
  -{
  -    accumContent(XalanUnicode::charAmpersand);
  -
  -    for(XalanDOMCharVectorType::const_iterator i = s.begin(); *i != 0; ++i)
  -    {
  -             accumContent(*i);
  -    }
  -
  -    accumContent(XalanUnicode::charSemicolon);
  -}
  -
  -
  -
  -void
   FormatterToHTML::processAttribute(
                        const XalanDOMChar*             name,
                        const XalanDOMChar*             value,
  @@ -1030,91 +1032,181 @@
        // causing damage.      If the URL is already properly escaped, in 
theory, this 
        // function should not change the string value.
   
  -     char[] stringArray = string.toCharArray();
  -     int len = stringArray.length;
  -             
  -     accum('"');
  +     const unsigned int      len = length(string);
   
  -     for (int i = 0; i < len; i++)
  -     {
  -             char ch = stringArray[i];
  +    for (unsigned int i = 0; i < len; ++i)
  +    {
  +             const XalanDOMChar      ch = string[i];
   
  -             // if first 8 bytes are 0, no need to append them.
  -             if ((ch < 9) || (ch > 127)
  -                       || /*(ch == '"') || -sb, as per #PDIK4L9LZY */ (ch == 
' '))
  +             if (ch < 33 || ch > 126)
                {
  -                     if (m_specialEscapeURLs)
  +                     if (m_escapeURLs == true)
                        {
  +                             // For the gory details of encoding these 
characters as
  +                             // UTF-8 hex, see:
  +                             // 
  +                             // Unicode, A Primer, by Tony Graham, p. 92.
  +                             //
                                if(ch <= 0x7F)
                                {
  -                                     accum("%");
  -                                     
accum(Integer.toHexString(ch).toUpperCase());              
  +                                     accumHexNumber(ch);
                                }
                                else if(ch <= 0x7FF)
                                {
  -                                     int high = (int) ((((int) ch) & 0xFFC0) 
>> 6) | 0xC0; // Clear high bytes?
  -                                     int low = (int) (((int) ch) & 0x3F) | 
0x80; // First 6 bits, + high bit
  -                                     accum("%");
  -                                     
accum(Integer.toHexString(high).toUpperCase());
  -                                     accum("%");
  -                                     
accum(Integer.toHexString(low).toUpperCase());
  +                                     const XalanDOMChar      highByte = 
XalanDOMChar((ch >> 6) | 0xC0);
  +                                     const XalanDOMChar      lowByte = 
XalanDOMChar((ch & 0x3F) | 0x80);
  +
  +                                     accumHexNumber(highByte);
  +
  +                                     accumHexNumber(lowByte);
  +                             }
  +                             else if(isUTF16Surrogate(ch) == true) // high 
surrogate
  +                             {
  +                                     // I'm sure this can be done in 3 
instructions, but I choose 
  +                                     // to try and do it exactly like it is 
done in the book, at least 
  +                                     // until we are sure this is totally 
clean.  I don't think performance 
  +                                     // is a big issue with this particular 
function, though I could be 
  +                                     // wrong.  Also, the stuff below 
clearly does more masking than 
  +                                     // it needs to do.
  +            
  +                                     // Clear high 6 bits.
  +                                     const XalanDOMChar      highSurrogate = 
XalanDOMChar(ch & 0x03FF);
  +
  +                                     // Middle 4 bits (wwww) + 1
  +                                     // "Note that the value of wwww from 
the high surrogate bit pattern
  +                                     // is incremented to make the uuuuu bit 
pattern in the scalar value 
  +                                     // so the surrogate pair don't address 
the BMP."
  +                                     const XalanDOMChar      wwww = 
XalanDOMChar((highSurrogate & 0x03C0) >> 6);
  +                                     const XalanDOMChar      uuuuu = 
XalanDOMChar(wwww + 1);  
  +
  +                                     // next 4 bits
  +                                     const XalanDOMChar      zzzz = 
XalanDOMChar((highSurrogate & 0x003C) >> 2);
  +            
  +                                     // low 2 bits
  +                                     const XalanDOMChar      temp = 
XalanDOMChar(((highSurrogate & 0x0003) << 4) & 0x30);
  +            
  +                                     // Get low surrogate character.
  +                                     const XalanDOMChar      nextChar = 
string[++i];
  +            
  +                                     // Clear high 6 bits.
  +                                     const XalanDOMChar      lowSurrogate = 
XalanDOMChar(nextChar & 0x03FF);
  +            
  +                                     // put the middle 4 bits into the 
bottom of yyyyyy (byte 3)
  +                                     const XalanDOMChar      yyyyyy = 
XalanDOMChar(temp | ((lowSurrogate & 0x03C0) >> 6));
  +            
  +                                     // bottom 6 bits.
  +                                     const XalanDOMChar      xxxxxx = 
XalanDOMChar(lowSurrogate & 0x003F);
  +            
  +                                     const XalanDOMChar      byte1 = 
XalanDOMChar(0xF0 | (uuuuu >> 2)); // top 3 bits of uuuuu
  +                                     const XalanDOMChar      byte2 = 
XalanDOMChar(0x80 | (((uuuuu & 0x03) << 4) & 0x30) | zzzz);
  +                                     const XalanDOMChar      byte3 = 
XalanDOMChar(0x80 | yyyyyy);
  +                                     const XalanDOMChar      byte4 = 
XalanDOMChar(0x80 | xxxxxx);
  +            
  +                                     accumHexNumber(byte1);
  +
  +                                     accumHexNumber(byte2);
  +
  +                                     accumHexNumber(byte3);
  +
  +                                     accumHexNumber(byte4);
                                }
                                else
                                {
  -                                     int high = (int) ((((int) ch) & 0xF000) 
>> 12) | 0xE0; // top 4 bits
  -                                     int middle = (int) ((((int) ch) & 
0x0FC0) >> 6) | 0x80; // middle 6 bits
  -                                     int low = (int) (((int) ch) & 0x3F) | 
0x80; // First 6 bits, + high bit
  -                                     accum("%");
  -                                     
accum(Integer.toHexString(high).toUpperCase());
  -                                     accum("%");
  -                                     
accum(Integer.toHexString(middle).toUpperCase());
  -                                     accum("%");
  -                                     
accum(Integer.toHexString(low).toUpperCase());
  +                                     const XalanDOMChar      highByte = 
XalanDOMChar((ch >> 12) | 0xE0);
  +                                     const XalanDOMChar      middleByte = 
XalanDOMChar(((ch & 0x0FC0) >> 6) | 0x80);
  +                                     const XalanDOMChar      lowByte = 
XalanDOMChar((ch & 0x3F) | 0x80);
  +
  +                                     accumHexNumber(highByte);
  +
  +                                     accumHexNumber(middleByte);
  +
  +                                     accumHexNumber(lowByte);
                                }
                        }
  +                     else if (ch == XalanUnicode::charSpace)
  +                     {
  +                             accumHexNumber(ch);
  +                     }
                        else if (ch < m_maxCharacter)
                        {
  -                             accum(ch);
  +                             accumContent(ch);
                        }
                        else
                        {
  -                             accum("&#");
  -                             accum(Integer.toString(ch));
  -                             accum(';');
  +                             accumContent(XalanUnicode::charAmpersand);
  +                             accumContent(XalanUnicode::charNumberSign);
  +    
  +                             accumContent(UnsignedLongToDOMString(ch, 
m_stringBuffer));
  +                             clear(m_stringBuffer);
  +
  +                             accumContent(XalanUnicode::charSemicolon);
                        }
                }
  -             else if('%' == ch)
  +             else if(ch == XalanUnicode::charPercentSign)
                {
                        // If the character is a '%' number number, try to 
avoid double-escaping.
                        // There is a question if this is legal behavior.
  -                     if(((i+2) < len) && Character.isDigit(stringArray[i+1])
  -                     && Character.isDigit(stringArray[i+2]))
  +                     if (i + 2 < len &&
  +                             XalanXMLChar::isDigit(string[i + 1]) == true &&
  +                             XalanXMLChar::isDigit(string[i + 2]) == true)
                        {
  -                             accum(ch);
  +                             accumContent(ch);
                        }
                        else
                        {
  -                             accum("%");
  -                             accum(Integer.toHexString(ch).toUpperCase());
  +                             if (m_escapeURLs == true)
  +                             {
  +                                     accumHexNumber(ch);
  +                             }
  +                             else
  +                             {
  +                                     accumContent(ch);
  +                             }
                        }
                } 
                // Since http://www.ietf.org/rfc/rfc2396.txt refers to the URI 
grammar as
                // not allowing quotes in the URI proper syntax, nor in the 
fragment 
                // identifier, we believe that double quotes should be escaped.
  -             else if (ch == '"')
  +             else if (ch == XalanUnicode::charQuoteMark)
                {
  -                     accum('%');
  -                     accum('2');
  -                     accum('2');
  +                     if (m_escapeURLs == true)
  +                     {
  +                             accumContent(XalanUnicode::charPercentSign);
  +                             accumContent(XalanUnicode::charDigit_2);
  +                             accumContent(XalanUnicode::charDigit_2);
  +                     }
  +                     else
  +                     {
  +                             accumDefaultEntity(ch, i, string, len, true);
  +                     }
                }
                else
                {
  -                     accum(ch);
  +                     accumContent(ch);
                }
        }
  -
  -     accum('"');
   #endif
  +}
  +
  +
  +
  +void
  +FormatterToHTML::accumHexNumber(const XalanDOMChar   theChar)
  +{
  +     accumContent(XalanUnicode::charPercentSign);
  +
  +     assert(length(m_stringBuffer) == 0);
  +
  +     UnsignedLongToHexDOMString(theChar, m_stringBuffer);
  +
  +     if (length(m_stringBuffer) == 1)
  +     {
  +             accumContent(XalanUnicode::charDigit_0);
  +     }
  +
  +     accumContent(m_stringBuffer);
  +
  +     clear(m_stringBuffer);
   }
   
   
  
  
  
  1.20      +19 -7     xml-xalan/c/src/XMLSupport/FormatterToHTML.hpp
  
  Index: FormatterToHTML.hpp
  ===================================================================
  RCS file: /home/cvs/xml-xalan/c/src/XMLSupport/FormatterToHTML.hpp,v
  retrieving revision 1.19
  retrieving revision 1.20
  diff -u -r1.19 -r1.20
  --- FormatterToHTML.hpp       2000/12/18 20:05:44     1.19
  +++ FormatterToHTML.hpp       2001/01/08 18:24:30     1.20
  @@ -58,7 +58,7 @@
   #define FORMATTERTOHTML_HEADER_GUARD_1357924680
   
   /**
  - * $Id: FormatterToHTML.hpp,v 1.19 2000/12/18 20:05:44 auriemma Exp $
  + * $Id: FormatterToHTML.hpp,v 1.20 2001/01/08 18:24:30 dbertoni Exp $
    * 
    * $State: Exp $
    * 
  @@ -374,9 +374,6 @@
        void
        copyEntityIntoBuffer(const XalanDOMString&      s);
   
  -     void
  -     copyEntityIntoBuffer(const XalanDOMCharVectorType&      s);
  -
        /**
         * Get an ElemDesc instance for the specified name.
         *
  @@ -392,7 +389,7 @@
         * @return map of element flags.
         */
        static void
  -     initializeElementFlagsMap(ElementFlagsMapType&  );
  +     initializeElementFlagsMap(ElementFlagsMapType&  theMap);
   
        /**
         * Process an attribute.
  @@ -410,7 +407,6 @@
         * with <CODE>%HH</CODE>, where HH is the hex of the byte value.
         *
         * @param   string      String to convert to XML format.
  -      * @param   specials    Chracters, should be represeted in chracter 
referenfces.
         * @param   encoding    CURRENTLY NOT IMPLEMENTED.
         */
        void
  @@ -418,6 +414,16 @@
                        const XalanDOMChar*             string,
                        const XalanDOMString    encoding);
   
  +     /**
  +      * Accumulate the specified character by converting its numeric value to
  +      * a hex string, making sure that any string of length 1 are written 
with
  +      * a '0' before the number.
  +      *
  +      * @param theChar The character to accumulate
  +      */
  +     void
  +     accumHexNumber(const XalanDOMChar       theChar);
  +
        XalanDOMString  m_currentElementName;
   
        bool                    m_inBlockElem;
  @@ -425,8 +431,14 @@
        BoolStackType   m_isRawStack;
   
        bool                    m_isScriptOrStyleElem;
  +
  +     bool                    m_escapeURLs;
  +
  +     /**
  +      * A counter so we can tell if we're inside the document element.
  +      */
  +     int                             m_elementLevel;
   
  -     bool                    m_isFirstElem;
   };
   
   
  
  
  
  1.42      +10 -20    xml-xalan/c/src/XMLSupport/FormatterToXML.cpp
  
  Index: FormatterToXML.cpp
  ===================================================================
  RCS file: /home/cvs/xml-xalan/c/src/XMLSupport/FormatterToXML.cpp,v
  retrieving revision 1.41
  retrieving revision 1.42
  diff -u -r1.41 -r1.42
  --- FormatterToXML.cpp        2000/12/18 20:06:14     1.41
  +++ FormatterToXML.cpp        2001/01/08 18:24:30     1.42
  @@ -115,11 +115,11 @@
        m_isUTF8(false),
        m_doctypeSystem(doctypeSystem),
        m_doctypePublic(doctypePublic),
  -     m_encoding(isEmpty(encoding) == false ? encoding :
  -                     XalanDOMString(&s_defaultMIMEEncoding[0], 
s_defaultMIMEEncoding.size() - 1)),
  +     m_encoding(isEmpty(encoding) == false ? encoding : 
XalanDOMString(XalanTranscodingServices::s_utf8String)),
        m_currentIndent(0),
        m_indent(indent),
        m_preserves(),
  +     m_stringBuffer(),
        m_bytesEqualChars(false),
        m_shouldFlush(true),
        m_spaceBeforeClose(false),
  @@ -132,7 +132,6 @@
        m_charBuf(),
        m_pos(0),
        m_byteBuf(),
  -     m_level(0),
        m_elemStack(),
        m_accumNameFunction(0),
        m_accumContentFunction(0)
  @@ -431,22 +430,6 @@
   
   
   void
  -FormatterToXML::accumName(const XalanDOMCharVectorType&      theVector)
  -{
  -     accumName(c_wstr(theVector), 0, theVector.size() - 1);
  -}
  -
  -
  -
  -void
  -FormatterToXML::accumContent(const XalanDOMCharVectorType&   theVector)
  -{
  -     accumContent(c_wstr(theVector), 0, theVector.size() - 1);
  -}
  -
  -
  -
  -void
   FormatterToXML::throwInvalidUTF16SurrogateException(XalanDOMChar     ch)
   {
        const XalanDOMString    theMessage(TranscodeFromLocalCodePage("Invalid 
UTF-16 surrogate detected: ") +
  @@ -666,6 +649,9 @@
   void
   FormatterToXML::startDocument()
   {
  +     // Clear the buffer, just in case...
  +     clear(m_stringBuffer);
  +
        if(m_inEntityRef == false)
        {
                m_needToOutputDocTypeDecl = true;
  @@ -719,6 +705,7 @@
        }
   
        flush();
  +
        flushWriter();
   }
   
  @@ -1127,7 +1114,10 @@
   {
        accumContent(XalanUnicode::charAmpersand);
        accumContent(XalanUnicode::charNumberSign);
  -     accumContent(UnsignedLongToDOMString(theNumber));
  +
  +     accumContent(UnsignedLongToDOMString(theNumber, m_stringBuffer));
  +     clear(m_stringBuffer);
  +
        accumContent(XalanUnicode::charSemicolon);
   }
   
  
  
  
  1.28      +12 -26    xml-xalan/c/src/XMLSupport/FormatterToXML.hpp
  
  Index: FormatterToXML.hpp
  ===================================================================
  RCS file: /home/cvs/xml-xalan/c/src/XMLSupport/FormatterToXML.hpp,v
  retrieving revision 1.27
  retrieving revision 1.28
  diff -u -r1.27 -r1.28
  --- FormatterToXML.hpp        2000/12/18 20:06:14     1.27
  +++ FormatterToXML.hpp        2001/01/08 18:24:31     1.28
  @@ -417,22 +417,6 @@
        accumContent(const XalanDOMString&      str);
   
        /**
  -      * Append a vector of wide characters to the buffer.
  -      *
  -      * @param theVector the vector to append
  -      */
  -     void
  -     accumName(const XalanDOMCharVectorType&         theVector);
  -
  -     /**
  -      * Append a vector of wide characters to the buffer.
  -      *
  -      * @param theVector the vector to append
  -      */
  -     void
  -     accumContent(const XalanDOMCharVectorType&      theVector);
  -
  -     /**
         * Escape and accum a character.
         */
        void
  @@ -569,6 +553,12 @@
                        XalanDOMChar    ch,
                        unsigned int    next);
   
  +     static bool
  +     isUTF16Surrogate(XalanDOMChar   ch)
  +     {
  +             return (ch & 0xFC00) == 0xD800 ? true : false;
  +     }
  +
        enum eDummyTwo { SPECIALSSIZE = 256};
   
        /**
  @@ -677,6 +667,11 @@
         */
        BoolStackType   m_preserves;
   
  +     // A text buffer.  We use it mostly for converting
  +     // to string values.  See uses of UnsignedLongToString()
  +     // and UnsignedLongToHexString().
  +     XalanDOMString  m_stringBuffer;
  +
   private:
   
        // These are not implemented.
  @@ -889,19 +884,10 @@
        static const DOMCharBufferType::size_type       s_maxBufferSize;
   
        /**
  -      * Current level of indent.
  -      */
  -     int             m_level;
  -
  -protected:
  -
  -     /**
         * A stack of Boolean objects that tell if the given element 
         * has children.
         */
  -     BoolStackType           m_elemStack;
  -
  -private:
  +     BoolStackType   m_elemStack;
   
        /**
         * A pointer to the member function that will do the accumulating
  
  
  

Reply via email to