dbertoni    00/10/02 09:52:41

  Modified:    c/src/XMLSupport FormatterToHTML.cpp FormatterToXML.cpp
                        FormatterToXML.hpp
  Log:
  Better handling of encodings.  Avoid output transcoding with certain 
encodings.
  
  Revision  Changes    Path
  1.26      +6 -1      xml-xalan/c/src/XMLSupport/FormatterToHTML.cpp
  
  Index: FormatterToHTML.cpp
  ===================================================================
  RCS file: /home/cvs/xml-xalan/c/src/XMLSupport/FormatterToHTML.cpp,v
  retrieving revision 1.25
  retrieving revision 1.26
  diff -u -r1.25 -r1.26
  --- FormatterToHTML.cpp       2000/09/27 16:39:23     1.25
  +++ FormatterToHTML.cpp       2000/10/02 16:52:32     1.26
  @@ -55,7 +55,7 @@
    * <http://www.apache.org/>.
    */
   /**
  - * $Id: FormatterToHTML.cpp,v 1.25 2000/09/27 16:39:23 dbertoni Exp $
  + * $Id: FormatterToHTML.cpp,v 1.26 2000/10/02 16:52:32 dbertoni Exp $
    * 
    * $State: Exp $
    * 
  @@ -244,6 +244,11 @@
        m_attrCharsMap[XalanUnicode::charLF] = 'S';
        m_attrCharsMap[XalanUnicode::charLessThanSign] = 0;
        m_attrCharsMap[XalanUnicode::charGreaterThanSign] = 0;
  +
  +     for(unsigned int i = 160; i < SPECIALSSIZE; i++)
  +     {
  +             m_attrCharsMap[i] = 'S';
  +     }
   }
   
   
  
  
  
  1.30      +136 -101  xml-xalan/c/src/XMLSupport/FormatterToXML.cpp
  
  Index: FormatterToXML.cpp
  ===================================================================
  RCS file: /home/cvs/xml-xalan/c/src/XMLSupport/FormatterToXML.cpp,v
  retrieving revision 1.29
  retrieving revision 1.30
  diff -u -r1.29 -r1.30
  --- FormatterToXML.cpp        2000/09/29 22:04:38     1.29
  +++ FormatterToXML.cpp        2000/10/02 16:52:35     1.30
  @@ -99,7 +99,7 @@
                        eFormat                                 format) :
        FormatterListener(format),
        m_writer(writer),
  -     m_maxCharacter(0x7Fu),
  +     m_maxCharacter(0),
        m_attrCharsMap(),
        m_charsMap(),
        m_shouldWriteXMLHeader(xmlDecl),
  @@ -130,11 +130,10 @@
        m_attrSpecialChars(theDefaultAttrSpecialChars),
        m_charBuf(),
        m_pos(0),
  +     m_byteBuf(),
        m_level(0),
        m_elemStack()
   {
  -     initCharsMap();
  -
        if(isEmpty(m_doctypePublic) == false)
        {
                if(startsWith(
  @@ -165,30 +164,26 @@
                }
        }
   
  -     m_isUTF8 = equals(m_encoding, s_utf8EncodingString); // || 
isEmpty(m_encoding);
  +     m_maxCharacter = getMaximumCharacterValue(m_encoding);
   
  +     m_isUTF8 = equals(m_encoding, s_utf8EncodingString);
  +
        if (equals(m_encoding, s_windows1250EncodingString) == true ||
                equals(m_encoding, s_usASCIIEncodingString) == true ||
                equals(m_encoding, s_asciiEncodingString) == true)
        {
                m_bytesEqualChars = true;
  -     }
  -
  -     m_charBuf.resize(s_maxBufferSize + 1);
  -
  -#if 0
  -     DOMString2IntMapType::const_iterator it =
  -             s_revsize.find(toUpperCase(m_encoding));
   
  -     if (it != s_revsize.end())
  -             m_maxCharacter = (*it).second;
  -     m_isUTF8 = equals(m_encoding, XALAN_STATIC_UCODE_STRING("UTF-8"));
  -     it = s_revsize.find(toUpperCase(m_encoding));
  -     if (it != s_revsize.end())
  +             m_byteBuf.resize(s_maxBufferSize);
  +     }
  +     else
        {
  -       m_maxCharacter = (*it).second;
  +             m_charBuf.resize(s_maxBufferSize);
        }
  -#endif
  +
  +     // Do this last so we initialize the map according to the value of
  +     // m_maxCharacter for the encoding.
  +     initCharsMap();
   }
   
   
  @@ -215,13 +210,6 @@
   
        m_attrCharsMap[0x0A] = 'S';
        m_attrCharsMap[0x0D] = 'S';
  -
  -     {
  -             for(unsigned int i = 160; i < SPECIALSSIZE; i++)
  -             {
  -                     m_attrCharsMap[i] = 'S';
  -             }
  -     }
   }
   
   
  @@ -244,8 +232,10 @@
        m_charsMap[0x0A] = 'S';
        m_charsMap[0x0D] = 'S';
        m_charsMap[9] = '\0';
  +
  +     assert(m_maxCharacter != 0);
   
  -     for(int i = m_maxCharacter; i < SPECIALSSIZE; ++i)
  +     for(XalanDOMChar i = m_maxCharacter; i < SPECIALSSIZE; ++i)
        {
                m_charsMap[i] = 'S';
        }
  @@ -292,18 +282,23 @@
                        writeNumberedEntityReference(ch);
                }
                else
  +             {
  +                     m_byteBuf[m_pos++] = char(ch);
  +             }
  +
  +             if(m_pos == s_maxBufferSize)
                {
  -                     m_charBuf[m_pos++] = char(ch);
  +                     flushBytes();
                }
        }
        else
        {
                m_charBuf[m_pos++] = ch;
  -     }
   
  -     if(m_pos == s_maxBufferSize)
  -     {
  -             flushChars();
  +             if(m_pos == s_maxBufferSize)
  +             {
  +                     flushChars();
  +             }
        }
   }
   
  @@ -312,37 +307,10 @@
   void
   FormatterToXML::accum(const XalanDOMChar*    chars)
   {
  -     if (m_bytesEqualChars == true)
  +     for(const XalanDOMChar* current = chars; *current != 0; ++current)
        {
  -             for(const XalanDOMChar* current = chars; *current != 0; 
++current)
  -             {
  -                     if (*current > 255)
  -                     {
  -                             writeNumberedEntityReference(*current);
  -                     }
  -                     else
  -                     {
  -                             m_charBuf[m_pos++] = *current;
  -                     }
  -
  -                     if(m_pos == s_maxBufferSize)
  -                     {
  -                             flushChars();
  -                     }
  -             }
  +             accum(*current);
        }
  -     else
  -     {
  -             for(const XalanDOMChar* current = chars; *current != 0; 
++current)
  -             {
  -                     m_charBuf[m_pos++] = *current;
  -
  -                     if(m_pos == s_maxBufferSize)
  -                     {
  -                             flushChars();
  -                     }
  -             }
  -     }
   }
   
   
  @@ -354,37 +322,10 @@
                        unsigned int            length)
   {
        const DOMCharBufferType::size_type      n = start + length;
  -
  -     if (m_bytesEqualChars == true)
  -     {
  -             for(DOMCharBufferType::size_type i = start; i < n; ++i)
  -             {
  -                     if (chars[i] > 255)
  -                     {
  -                             writeNumberedEntityReference(chars[i]);
  -                     }
  -                     else
  -                     {
  -                             m_charBuf[m_pos++] = char(chars[i]);
  -                     }
   
  -                     if(m_pos == s_maxBufferSize)
  -                     {
  -                             flushChars();
  -                     }
  -             }
  -     }
  -     else
  +     for(DOMCharBufferType::size_type i = start; i < n; ++i)
        {
  -             for(DOMCharBufferType::size_type i = start; i < n; ++i)
  -             {
  -                     m_charBuf[m_pos++] = chars[i];
  -
  -                     if(m_pos == s_maxBufferSize)
  -                     {
  -                             flushChars();
  -                     }
  -             }
  +             accum(chars[i]);
        }
   }
   
  @@ -433,6 +374,24 @@
   
   
   
  +XalanDOMChar
  +FormatterToXML::getMaximumCharacterValue(const XalanDOMString&       
theEncoding)
  +{
  +     const MaximumCharacterValueMapType::const_iterator      i =
  +             s_maximumCharacterValues.find(toUpperCase(theEncoding));
  +
  +     if (i == s_maximumCharacterValues.end())
  +     {
  +             return XalanDOMChar(0x7fu);
  +     }
  +     else
  +     {
  +             return (*i).second;
  +     }
  +}
  +
  +
  +
   void
   FormatterToXML::accumDefaultEscape(
                        XalanDOMChar            ch,
  @@ -571,21 +530,38 @@
   void
   FormatterToXML::flushChars()
   {
  -     assert(m_charBuf.size() > 0 && m_charBuf.size() > m_pos);
  +     assert(m_charBuf.size() > 0 && m_charBuf.size() >= m_pos);
   
  -     m_charBuf[m_pos] = 0;
  +     m_writer.write(&m_charBuf[0], 0, m_pos);
   
  -     m_writer.write(&m_charBuf[0]);
  -
        m_pos = 0;
   }
   
   
   
   void
  +FormatterToXML::flushBytes()
  +{
  +     assert(m_byteBuf.size() > 0 && m_byteBuf.size() >= m_pos);
  +
  +     m_writer.write(&m_byteBuf[0], 0, m_pos);
  +
  +     m_pos = 0;
  +}
  +
  +
  +     
  +void
   FormatterToXML::flush()
   {
  -     flushChars();
  +     if (m_bytesEqualChars == true)
  +     {
  +             flushBytes();
  +     }
  +     else
  +     {
  +             flushChars();
  +     }
   }
   
   
  @@ -848,13 +824,15 @@
                        {
                                const XalanDOMChar      ch = chars[i];
   
  -                             if(ch < SPECIALSSIZE && m_charsMap[ch] != 'S')
  +                             if((ch < SPECIALSSIZE &&
  +                                     m_charsMap[ch] == 'S') ||
  +                                     ch > m_maxCharacter)
                                {
  -                                     accum(ch);
  +                                     accumDefaultEscape(ch, i, chars, 
length, false);
                                }
                                else
                                {
  -                                     accumDefaultEscape(ch, i, chars, 
length, false);
  +                                     accum(ch);
                                }
                        }
   
  @@ -896,14 +874,15 @@
       {
                const XalanDOMChar      ch = string[i];
   
  -             if(ch < SPECIALSSIZE &&
  -                m_attrCharsMap[ch] != 'S')
  +             if((ch < SPECIALSSIZE &&
  +                 m_attrCharsMap[ch] == 'S') ||
  +                     ch > m_maxCharacter)
                {
  -                     accum(ch);
  +                     accumDefaultEscape(ch, i, string, len, true);
                }
                else
                {
  -                     accumDefaultEscape(ch, i, string, len, true);
  +                     accum(ch);
                }
       }
   }
  @@ -1582,8 +1561,10 @@
   
   static XalanDOMString                        s_utf8EncodingString;
   
  +static FormatterToXML::MaximumCharacterValueMapType          
s_maximumCharacterValues;
   
   
  +
   const XalanDOMCharVectorType&        FormatterToXML::s_xsltNextIsRawString = 
::s_xsltNextIsRawString;
   
   const XalanDOMCharVectorType&        FormatterToXML::s_formatterToDOMString 
= ::s_formatterToDOMString;
  @@ -1618,8 +1599,58 @@
   
   const FormatterToXML::DOMCharBufferType::size_type   
FormatterToXML::s_maxBufferSize = 512;
   
  +const FormatterToXML::MaximumCharacterValueMapType&          
FormatterToXML::s_maximumCharacterValues =
  +                     ::s_maximumCharacterValues;
  +
   
   
  +static void
  +initMaximumCharacterValueMap(FormatterToXML::MaximumCharacterValueMapType&   
theMap)
  +{
  +     typedef FormatterToXML::MaximumCharacterValueMapType::value_type        
value_type;
  +
  +     
theMap.insert(value_type(XalanDOMString(XALAN_STATIC_UCODE_STRING("WINDOWS-1250")),
      0xFF)); // Windows 1250 Peter Smolik
  +     
theMap.insert(value_type(XalanDOMString(XALAN_STATIC_UCODE_STRING("UTF-8")),    
                 0xFFFF)); // Universal Transformation Format 8
  +     
theMap.insert(value_type(XalanDOMString(XALAN_STATIC_UCODE_STRING("US-ASCII")), 
         0x7F));
  +     
theMap.insert(value_type(XalanDOMString(XALAN_STATIC_UCODE_STRING("ISO-8859-1")),
        0xFF));
  +     
theMap.insert(value_type(XalanDOMString(XALAN_STATIC_UCODE_STRING("ISO-8859-2")),
        0xFF));
  +     
theMap.insert(value_type(XalanDOMString(XALAN_STATIC_UCODE_STRING("ISO-8859-3")),
        0xFF));
  +     
theMap.insert(value_type(XalanDOMString(XALAN_STATIC_UCODE_STRING("ISO-8859-4")),
        0xFF));
  +     
theMap.insert(value_type(XalanDOMString(XALAN_STATIC_UCODE_STRING("ISO-8859-5")),
        0xFF));
  +     
theMap.insert(value_type(XalanDOMString(XALAN_STATIC_UCODE_STRING("ISO-8859-6")),
        0xFF));
  +     
theMap.insert(value_type(XalanDOMString(XALAN_STATIC_UCODE_STRING("ISO-8859-7")),
        0xFF));
  +     
theMap.insert(value_type(XalanDOMString(XALAN_STATIC_UCODE_STRING("ISO-8859-8")),
        0xFF));
  +     
theMap.insert(value_type(XalanDOMString(XALAN_STATIC_UCODE_STRING("ISO-8859-9")),
        0xFF));
  +     
theMap.insert(value_type(XalanDOMString(XALAN_STATIC_UCODE_STRING("ISO-2022-JP")),
       0xFFFF));
  +     
theMap.insert(value_type(XalanDOMString(XALAN_STATIC_UCODE_STRING("SHIFT_JIS")),
                 0xFFFF));
  +     
theMap.insert(value_type(XalanDOMString(XALAN_STATIC_UCODE_STRING("EUC-JP")),   
         0xFFFF));
  +     
theMap.insert(value_type(XalanDOMString(XALAN_STATIC_UCODE_STRING("GB2312")),   
         0xFFFF));
  +     
theMap.insert(value_type(XalanDOMString(XALAN_STATIC_UCODE_STRING("BIG5")),     
                 0xFFFF));
  +     
theMap.insert(value_type(XalanDOMString(XALAN_STATIC_UCODE_STRING("EUC-KR")),   
         0xFFFF));
  +     
theMap.insert(value_type(XalanDOMString(XALAN_STATIC_UCODE_STRING("ISO-2022-KR")),
       0xFFFF));
  +     
theMap.insert(value_type(XalanDOMString(XALAN_STATIC_UCODE_STRING("KOI8-R")),   
         0xFFFF));
  +     
theMap.insert(value_type(XalanDOMString(XALAN_STATIC_UCODE_STRING("EBCDIC-CP-US")),
      0xFF));
  +     
theMap.insert(value_type(XalanDOMString(XALAN_STATIC_UCODE_STRING("EBCDIC-CP-CA")),
      0xFF));
  +     
theMap.insert(value_type(XalanDOMString(XALAN_STATIC_UCODE_STRING("EBCDIC-CP-NL")),
      0xFF));
  +     
theMap.insert(value_type(XalanDOMString(XALAN_STATIC_UCODE_STRING("EBCDIC-CP-DK")),
      0xFF));
  +     
theMap.insert(value_type(XalanDOMString(XALAN_STATIC_UCODE_STRING("EBCDIC-CP-NO")),
      0xFF));
  +     
theMap.insert(value_type(XalanDOMString(XALAN_STATIC_UCODE_STRING("EBCDIC-CP-FI")),
      0xFF));
  +     
theMap.insert(value_type(XalanDOMString(XALAN_STATIC_UCODE_STRING("EBCDIC-CP-SE")),
      0xFF));
  +     
theMap.insert(value_type(XalanDOMString(XALAN_STATIC_UCODE_STRING("EBCDIC-CP-IT")),
      0xFF));
  +     
theMap.insert(value_type(XalanDOMString(XALAN_STATIC_UCODE_STRING("EBCDIC-CP-ES")),
      0xFF));
  +     
theMap.insert(value_type(XalanDOMString(XALAN_STATIC_UCODE_STRING("EBCDIC-CP-GB")),
      0xFF));
  +     
theMap.insert(value_type(XalanDOMString(XALAN_STATIC_UCODE_STRING("EBCDIC-CP-FR")),
      0xFF));
  +     
theMap.insert(value_type(XalanDOMString(XALAN_STATIC_UCODE_STRING("EBCDIC-CP-AR1")),
     0xFF));
  +     
theMap.insert(value_type(XalanDOMString(XALAN_STATIC_UCODE_STRING("EBCDIC-CP-HE")),
      0xFF));
  +     
theMap.insert(value_type(XalanDOMString(XALAN_STATIC_UCODE_STRING("EBCDIC-CP-CH")),
      0xFF));
  +     
theMap.insert(value_type(XalanDOMString(XALAN_STATIC_UCODE_STRING("EBCDIC-CP-ROECE")),
 0xFF));
  +     
theMap.insert(value_type(XalanDOMString(XALAN_STATIC_UCODE_STRING("EBCDIC-CP-YU")),
      0xFF));
  +     
theMap.insert(value_type(XalanDOMString(XALAN_STATIC_UCODE_STRING("EBCDIC-CP-IS")),
      0xFF));
  +     
theMap.insert(value_type(XalanDOMString(XALAN_STATIC_UCODE_STRING("EBCDIC-CP-AR2")),
     0xFF));
  +}
  +
  +
  +
   void
   FormatterToXML::initialize()
   {
  @@ -1666,6 +1697,8 @@
                
MakeXalanDOMCharVector(c_wstr(XALAN_STATIC_UCODE_STRING("ASCII")));
   
        ::s_utf8EncodingString = XALAN_STATIC_UCODE_STRING("UTF-8");
  +
  +     initMaximumCharacterValueMap(::s_maximumCharacterValues);
   }
   
   
  @@ -1702,4 +1735,6 @@
        XalanDOMCharVectorType().swap(::s_asciiEncodingString);
   
        clear(::s_utf8EncodingString);
  +
  +     MaximumCharacterValueMapType().swap(::s_maximumCharacterValues);
   }
  
  
  
  1.18      +37 -12    xml-xalan/c/src/XMLSupport/FormatterToXML.hpp
  
  Index: FormatterToXML.hpp
  ===================================================================
  RCS file: /home/cvs/xml-xalan/c/src/XMLSupport/FormatterToXML.hpp,v
  retrieving revision 1.17
  retrieving revision 1.18
  diff -u -r1.17 -r1.18
  --- FormatterToXML.hpp        2000/09/27 20:36:43     1.17
  +++ FormatterToXML.hpp        2000/10/02 16:52:36     1.18
  @@ -65,7 +65,7 @@
   
   
   
  -#include <stack>
  +#include <map>
   #include <vector>
   
   
  @@ -325,12 +325,27 @@
                return m_indent;
        }
   
  +#if defined(XALAN_NO_NAMESPACES)
  +     typedef vector<bool>                            BoolStackType;
  +     typedef vector<XalanDOMChar>            DOMCharBufferType;
  +     typedef vector<char>                            ByteBufferType;
  +     typedef map<XalanDOMString,
  +                             XalanDOMChar,
  +                             less<XalanDOMString> >  
MaximumCharacterValueMapType;
  +#else
  +     typedef std::vector<bool>                       BoolStackType;
  +     typedef std::vector<XalanDOMChar>       DOMCharBufferType;
  +     typedef std::vector<char>                       ByteBufferType;
  +     typedef std::map<XalanDOMString,
  +                                      XalanDOMChar>          
MaximumCharacterValueMapType;
  +#endif
  +
   protected:
   
        /** 
         * The writer where the XML will be written.
         */
  -     Writer&                                 m_writer;
  +     Writer&         m_writer;
   
        /**
         * Output a line break.
  @@ -425,7 +440,13 @@
        void
        flushChars();
   
  +     /**
  +      * Flush the byte buffer.
  +      */
        void
  +     flushBytes();
  +
  +     void
        flush();
   
        void
  @@ -508,6 +529,16 @@
                        XalanDOMChar    ch,
                        unsigned int    next);
   
  +     /**
  +      * Get the maximum character value for the encoding.
  +      *
  +      * @param theEncoding The encoding name.
  +      * @return The maximum character value that the
  +      * encoding supports.
  +      */
  +     static XalanDOMChar
  +     getMaximumCharacterValue(const XalanDOMString&  theEncoding);
  +
        enum eDummyTwo { SPECIALSSIZE = 256};
   
        /**
  @@ -610,16 +641,6 @@
         */
        static const XalanDOMCharVectorType&    s_formatterToDOMString;
   
  -#if defined(XALAN_NO_NAMESPACES)
  -     typedef vector<bool>                            BoolStackType;
  -     typedef vector<XalanDOMChar>            DOMCharBufferType;
  -     typedef vector<char>                            ByteBufferType;
  -#else
  -     typedef std::vector<bool>                       BoolStackType;
  -     typedef std::vector<XalanDOMChar>       DOMCharBufferType;
  -     typedef std::vector<char>                       ByteBufferType;
  -#endif
  -
        /**
         * Stack to keep track of whether or not we need to 
         * preserve whitespace.
  @@ -799,7 +820,11 @@
   
        DOMCharBufferType::size_type    m_pos;
   
  +     ByteBufferType                                  m_byteBuf;
  +
        static const DOMCharBufferType::size_type       s_maxBufferSize;
  +
  +     static const MaximumCharacterValueMapType&      
s_maximumCharacterValues;
   
        /**
         * Current level of indent.
  
  
  

Reply via email to