dbertoni 00/10/02 09:52:41
Modified: c/src/XMLSupport FormatterToHTML.cpp FormatterToXML.cpp
FormatterToXML.hpp
Log:
Better handling of encodings. Avoid output transcoding with certain
encodings.
Revision Changes Path
1.26 +6 -1 xml-xalan/c/src/XMLSupport/FormatterToHTML.cpp
Index: FormatterToHTML.cpp
===================================================================
RCS file: /home/cvs/xml-xalan/c/src/XMLSupport/FormatterToHTML.cpp,v
retrieving revision 1.25
retrieving revision 1.26
diff -u -r1.25 -r1.26
--- FormatterToHTML.cpp 2000/09/27 16:39:23 1.25
+++ FormatterToHTML.cpp 2000/10/02 16:52:32 1.26
@@ -55,7 +55,7 @@
* <http://www.apache.org/>.
*/
/**
- * $Id: FormatterToHTML.cpp,v 1.25 2000/09/27 16:39:23 dbertoni Exp $
+ * $Id: FormatterToHTML.cpp,v 1.26 2000/10/02 16:52:32 dbertoni Exp $
*
* $State: Exp $
*
@@ -244,6 +244,11 @@
m_attrCharsMap[XalanUnicode::charLF] = 'S';
m_attrCharsMap[XalanUnicode::charLessThanSign] = 0;
m_attrCharsMap[XalanUnicode::charGreaterThanSign] = 0;
+
+ for(unsigned int i = 160; i < SPECIALSSIZE; i++)
+ {
+ m_attrCharsMap[i] = 'S';
+ }
}
1.30 +136 -101 xml-xalan/c/src/XMLSupport/FormatterToXML.cpp
Index: FormatterToXML.cpp
===================================================================
RCS file: /home/cvs/xml-xalan/c/src/XMLSupport/FormatterToXML.cpp,v
retrieving revision 1.29
retrieving revision 1.30
diff -u -r1.29 -r1.30
--- FormatterToXML.cpp 2000/09/29 22:04:38 1.29
+++ FormatterToXML.cpp 2000/10/02 16:52:35 1.30
@@ -99,7 +99,7 @@
eFormat format) :
FormatterListener(format),
m_writer(writer),
- m_maxCharacter(0x7Fu),
+ m_maxCharacter(0),
m_attrCharsMap(),
m_charsMap(),
m_shouldWriteXMLHeader(xmlDecl),
@@ -130,11 +130,10 @@
m_attrSpecialChars(theDefaultAttrSpecialChars),
m_charBuf(),
m_pos(0),
+ m_byteBuf(),
m_level(0),
m_elemStack()
{
- initCharsMap();
-
if(isEmpty(m_doctypePublic) == false)
{
if(startsWith(
@@ -165,30 +164,26 @@
}
}
- m_isUTF8 = equals(m_encoding, s_utf8EncodingString); // ||
isEmpty(m_encoding);
+ m_maxCharacter = getMaximumCharacterValue(m_encoding);
+ m_isUTF8 = equals(m_encoding, s_utf8EncodingString);
+
if (equals(m_encoding, s_windows1250EncodingString) == true ||
equals(m_encoding, s_usASCIIEncodingString) == true ||
equals(m_encoding, s_asciiEncodingString) == true)
{
m_bytesEqualChars = true;
- }
-
- m_charBuf.resize(s_maxBufferSize + 1);
-
-#if 0
- DOMString2IntMapType::const_iterator it =
- s_revsize.find(toUpperCase(m_encoding));
- if (it != s_revsize.end())
- m_maxCharacter = (*it).second;
- m_isUTF8 = equals(m_encoding, XALAN_STATIC_UCODE_STRING("UTF-8"));
- it = s_revsize.find(toUpperCase(m_encoding));
- if (it != s_revsize.end())
+ m_byteBuf.resize(s_maxBufferSize);
+ }
+ else
{
- m_maxCharacter = (*it).second;
+ m_charBuf.resize(s_maxBufferSize);
}
-#endif
+
+ // Do this last so we initialize the map according to the value of
+ // m_maxCharacter for the encoding.
+ initCharsMap();
}
@@ -215,13 +210,6 @@
m_attrCharsMap[0x0A] = 'S';
m_attrCharsMap[0x0D] = 'S';
-
- {
- for(unsigned int i = 160; i < SPECIALSSIZE; i++)
- {
- m_attrCharsMap[i] = 'S';
- }
- }
}
@@ -244,8 +232,10 @@
m_charsMap[0x0A] = 'S';
m_charsMap[0x0D] = 'S';
m_charsMap[9] = '\0';
+
+ assert(m_maxCharacter != 0);
- for(int i = m_maxCharacter; i < SPECIALSSIZE; ++i)
+ for(XalanDOMChar i = m_maxCharacter; i < SPECIALSSIZE; ++i)
{
m_charsMap[i] = 'S';
}
@@ -292,18 +282,23 @@
writeNumberedEntityReference(ch);
}
else
+ {
+ m_byteBuf[m_pos++] = char(ch);
+ }
+
+ if(m_pos == s_maxBufferSize)
{
- m_charBuf[m_pos++] = char(ch);
+ flushBytes();
}
}
else
{
m_charBuf[m_pos++] = ch;
- }
- if(m_pos == s_maxBufferSize)
- {
- flushChars();
+ if(m_pos == s_maxBufferSize)
+ {
+ flushChars();
+ }
}
}
@@ -312,37 +307,10 @@
void
FormatterToXML::accum(const XalanDOMChar* chars)
{
- if (m_bytesEqualChars == true)
+ for(const XalanDOMChar* current = chars; *current != 0; ++current)
{
- for(const XalanDOMChar* current = chars; *current != 0;
++current)
- {
- if (*current > 255)
- {
- writeNumberedEntityReference(*current);
- }
- else
- {
- m_charBuf[m_pos++] = *current;
- }
-
- if(m_pos == s_maxBufferSize)
- {
- flushChars();
- }
- }
+ accum(*current);
}
- else
- {
- for(const XalanDOMChar* current = chars; *current != 0;
++current)
- {
- m_charBuf[m_pos++] = *current;
-
- if(m_pos == s_maxBufferSize)
- {
- flushChars();
- }
- }
- }
}
@@ -354,37 +322,10 @@
unsigned int length)
{
const DOMCharBufferType::size_type n = start + length;
-
- if (m_bytesEqualChars == true)
- {
- for(DOMCharBufferType::size_type i = start; i < n; ++i)
- {
- if (chars[i] > 255)
- {
- writeNumberedEntityReference(chars[i]);
- }
- else
- {
- m_charBuf[m_pos++] = char(chars[i]);
- }
- if(m_pos == s_maxBufferSize)
- {
- flushChars();
- }
- }
- }
- else
+ for(DOMCharBufferType::size_type i = start; i < n; ++i)
{
- for(DOMCharBufferType::size_type i = start; i < n; ++i)
- {
- m_charBuf[m_pos++] = chars[i];
-
- if(m_pos == s_maxBufferSize)
- {
- flushChars();
- }
- }
+ accum(chars[i]);
}
}
@@ -433,6 +374,24 @@
+XalanDOMChar
+FormatterToXML::getMaximumCharacterValue(const XalanDOMString&
theEncoding)
+{
+ const MaximumCharacterValueMapType::const_iterator i =
+ s_maximumCharacterValues.find(toUpperCase(theEncoding));
+
+ if (i == s_maximumCharacterValues.end())
+ {
+ return XalanDOMChar(0x7fu);
+ }
+ else
+ {
+ return (*i).second;
+ }
+}
+
+
+
void
FormatterToXML::accumDefaultEscape(
XalanDOMChar ch,
@@ -571,21 +530,38 @@
void
FormatterToXML::flushChars()
{
- assert(m_charBuf.size() > 0 && m_charBuf.size() > m_pos);
+ assert(m_charBuf.size() > 0 && m_charBuf.size() >= m_pos);
- m_charBuf[m_pos] = 0;
+ m_writer.write(&m_charBuf[0], 0, m_pos);
- m_writer.write(&m_charBuf[0]);
-
m_pos = 0;
}
void
+FormatterToXML::flushBytes()
+{
+ assert(m_byteBuf.size() > 0 && m_byteBuf.size() >= m_pos);
+
+ m_writer.write(&m_byteBuf[0], 0, m_pos);
+
+ m_pos = 0;
+}
+
+
+
+void
FormatterToXML::flush()
{
- flushChars();
+ if (m_bytesEqualChars == true)
+ {
+ flushBytes();
+ }
+ else
+ {
+ flushChars();
+ }
}
@@ -848,13 +824,15 @@
{
const XalanDOMChar ch = chars[i];
- if(ch < SPECIALSSIZE && m_charsMap[ch] != 'S')
+ if((ch < SPECIALSSIZE &&
+ m_charsMap[ch] == 'S') ||
+ ch > m_maxCharacter)
{
- accum(ch);
+ accumDefaultEscape(ch, i, chars,
length, false);
}
else
{
- accumDefaultEscape(ch, i, chars,
length, false);
+ accum(ch);
}
}
@@ -896,14 +874,15 @@
{
const XalanDOMChar ch = string[i];
- if(ch < SPECIALSSIZE &&
- m_attrCharsMap[ch] != 'S')
+ if((ch < SPECIALSSIZE &&
+ m_attrCharsMap[ch] == 'S') ||
+ ch > m_maxCharacter)
{
- accum(ch);
+ accumDefaultEscape(ch, i, string, len, true);
}
else
{
- accumDefaultEscape(ch, i, string, len, true);
+ accum(ch);
}
}
}
@@ -1582,8 +1561,10 @@
static XalanDOMString s_utf8EncodingString;
+static FormatterToXML::MaximumCharacterValueMapType
s_maximumCharacterValues;
+
const XalanDOMCharVectorType& FormatterToXML::s_xsltNextIsRawString =
::s_xsltNextIsRawString;
const XalanDOMCharVectorType& FormatterToXML::s_formatterToDOMString
= ::s_formatterToDOMString;
@@ -1618,8 +1599,58 @@
const FormatterToXML::DOMCharBufferType::size_type
FormatterToXML::s_maxBufferSize = 512;
+const FormatterToXML::MaximumCharacterValueMapType&
FormatterToXML::s_maximumCharacterValues =
+ ::s_maximumCharacterValues;
+
+static void
+initMaximumCharacterValueMap(FormatterToXML::MaximumCharacterValueMapType&
theMap)
+{
+ typedef FormatterToXML::MaximumCharacterValueMapType::value_type
value_type;
+
+
theMap.insert(value_type(XalanDOMString(XALAN_STATIC_UCODE_STRING("WINDOWS-1250")),
0xFF)); // Windows 1250 Peter Smolik
+
theMap.insert(value_type(XalanDOMString(XALAN_STATIC_UCODE_STRING("UTF-8")),
0xFFFF)); // Universal Transformation Format 8
+
theMap.insert(value_type(XalanDOMString(XALAN_STATIC_UCODE_STRING("US-ASCII")),
0x7F));
+
theMap.insert(value_type(XalanDOMString(XALAN_STATIC_UCODE_STRING("ISO-8859-1")),
0xFF));
+
theMap.insert(value_type(XalanDOMString(XALAN_STATIC_UCODE_STRING("ISO-8859-2")),
0xFF));
+
theMap.insert(value_type(XalanDOMString(XALAN_STATIC_UCODE_STRING("ISO-8859-3")),
0xFF));
+
theMap.insert(value_type(XalanDOMString(XALAN_STATIC_UCODE_STRING("ISO-8859-4")),
0xFF));
+
theMap.insert(value_type(XalanDOMString(XALAN_STATIC_UCODE_STRING("ISO-8859-5")),
0xFF));
+
theMap.insert(value_type(XalanDOMString(XALAN_STATIC_UCODE_STRING("ISO-8859-6")),
0xFF));
+
theMap.insert(value_type(XalanDOMString(XALAN_STATIC_UCODE_STRING("ISO-8859-7")),
0xFF));
+
theMap.insert(value_type(XalanDOMString(XALAN_STATIC_UCODE_STRING("ISO-8859-8")),
0xFF));
+
theMap.insert(value_type(XalanDOMString(XALAN_STATIC_UCODE_STRING("ISO-8859-9")),
0xFF));
+
theMap.insert(value_type(XalanDOMString(XALAN_STATIC_UCODE_STRING("ISO-2022-JP")),
0xFFFF));
+
theMap.insert(value_type(XalanDOMString(XALAN_STATIC_UCODE_STRING("SHIFT_JIS")),
0xFFFF));
+
theMap.insert(value_type(XalanDOMString(XALAN_STATIC_UCODE_STRING("EUC-JP")),
0xFFFF));
+
theMap.insert(value_type(XalanDOMString(XALAN_STATIC_UCODE_STRING("GB2312")),
0xFFFF));
+
theMap.insert(value_type(XalanDOMString(XALAN_STATIC_UCODE_STRING("BIG5")),
0xFFFF));
+
theMap.insert(value_type(XalanDOMString(XALAN_STATIC_UCODE_STRING("EUC-KR")),
0xFFFF));
+
theMap.insert(value_type(XalanDOMString(XALAN_STATIC_UCODE_STRING("ISO-2022-KR")),
0xFFFF));
+
theMap.insert(value_type(XalanDOMString(XALAN_STATIC_UCODE_STRING("KOI8-R")),
0xFFFF));
+
theMap.insert(value_type(XalanDOMString(XALAN_STATIC_UCODE_STRING("EBCDIC-CP-US")),
0xFF));
+
theMap.insert(value_type(XalanDOMString(XALAN_STATIC_UCODE_STRING("EBCDIC-CP-CA")),
0xFF));
+
theMap.insert(value_type(XalanDOMString(XALAN_STATIC_UCODE_STRING("EBCDIC-CP-NL")),
0xFF));
+
theMap.insert(value_type(XalanDOMString(XALAN_STATIC_UCODE_STRING("EBCDIC-CP-DK")),
0xFF));
+
theMap.insert(value_type(XalanDOMString(XALAN_STATIC_UCODE_STRING("EBCDIC-CP-NO")),
0xFF));
+
theMap.insert(value_type(XalanDOMString(XALAN_STATIC_UCODE_STRING("EBCDIC-CP-FI")),
0xFF));
+
theMap.insert(value_type(XalanDOMString(XALAN_STATIC_UCODE_STRING("EBCDIC-CP-SE")),
0xFF));
+
theMap.insert(value_type(XalanDOMString(XALAN_STATIC_UCODE_STRING("EBCDIC-CP-IT")),
0xFF));
+
theMap.insert(value_type(XalanDOMString(XALAN_STATIC_UCODE_STRING("EBCDIC-CP-ES")),
0xFF));
+
theMap.insert(value_type(XalanDOMString(XALAN_STATIC_UCODE_STRING("EBCDIC-CP-GB")),
0xFF));
+
theMap.insert(value_type(XalanDOMString(XALAN_STATIC_UCODE_STRING("EBCDIC-CP-FR")),
0xFF));
+
theMap.insert(value_type(XalanDOMString(XALAN_STATIC_UCODE_STRING("EBCDIC-CP-AR1")),
0xFF));
+
theMap.insert(value_type(XalanDOMString(XALAN_STATIC_UCODE_STRING("EBCDIC-CP-HE")),
0xFF));
+
theMap.insert(value_type(XalanDOMString(XALAN_STATIC_UCODE_STRING("EBCDIC-CP-CH")),
0xFF));
+
theMap.insert(value_type(XalanDOMString(XALAN_STATIC_UCODE_STRING("EBCDIC-CP-ROECE")),
0xFF));
+
theMap.insert(value_type(XalanDOMString(XALAN_STATIC_UCODE_STRING("EBCDIC-CP-YU")),
0xFF));
+
theMap.insert(value_type(XalanDOMString(XALAN_STATIC_UCODE_STRING("EBCDIC-CP-IS")),
0xFF));
+
theMap.insert(value_type(XalanDOMString(XALAN_STATIC_UCODE_STRING("EBCDIC-CP-AR2")),
0xFF));
+}
+
+
+
void
FormatterToXML::initialize()
{
@@ -1666,6 +1697,8 @@
MakeXalanDOMCharVector(c_wstr(XALAN_STATIC_UCODE_STRING("ASCII")));
::s_utf8EncodingString = XALAN_STATIC_UCODE_STRING("UTF-8");
+
+ initMaximumCharacterValueMap(::s_maximumCharacterValues);
}
@@ -1702,4 +1735,6 @@
XalanDOMCharVectorType().swap(::s_asciiEncodingString);
clear(::s_utf8EncodingString);
+
+ MaximumCharacterValueMapType().swap(::s_maximumCharacterValues);
}
1.18 +37 -12 xml-xalan/c/src/XMLSupport/FormatterToXML.hpp
Index: FormatterToXML.hpp
===================================================================
RCS file: /home/cvs/xml-xalan/c/src/XMLSupport/FormatterToXML.hpp,v
retrieving revision 1.17
retrieving revision 1.18
diff -u -r1.17 -r1.18
--- FormatterToXML.hpp 2000/09/27 20:36:43 1.17
+++ FormatterToXML.hpp 2000/10/02 16:52:36 1.18
@@ -65,7 +65,7 @@
-#include <stack>
+#include <map>
#include <vector>
@@ -325,12 +325,27 @@
return m_indent;
}
+#if defined(XALAN_NO_NAMESPACES)
+ typedef vector<bool> BoolStackType;
+ typedef vector<XalanDOMChar> DOMCharBufferType;
+ typedef vector<char> ByteBufferType;
+ typedef map<XalanDOMString,
+ XalanDOMChar,
+ less<XalanDOMString> >
MaximumCharacterValueMapType;
+#else
+ typedef std::vector<bool> BoolStackType;
+ typedef std::vector<XalanDOMChar> DOMCharBufferType;
+ typedef std::vector<char> ByteBufferType;
+ typedef std::map<XalanDOMString,
+ XalanDOMChar>
MaximumCharacterValueMapType;
+#endif
+
protected:
/**
* The writer where the XML will be written.
*/
- Writer& m_writer;
+ Writer& m_writer;
/**
* Output a line break.
@@ -425,7 +440,13 @@
void
flushChars();
+ /**
+ * Flush the byte buffer.
+ */
void
+ flushBytes();
+
+ void
flush();
void
@@ -508,6 +529,16 @@
XalanDOMChar ch,
unsigned int next);
+ /**
+ * Get the maximum character value for the encoding.
+ *
+ * @param theEncoding The encoding name.
+ * @return The maximum character value that the
+ * encoding supports.
+ */
+ static XalanDOMChar
+ getMaximumCharacterValue(const XalanDOMString& theEncoding);
+
enum eDummyTwo { SPECIALSSIZE = 256};
/**
@@ -610,16 +641,6 @@
*/
static const XalanDOMCharVectorType& s_formatterToDOMString;
-#if defined(XALAN_NO_NAMESPACES)
- typedef vector<bool> BoolStackType;
- typedef vector<XalanDOMChar> DOMCharBufferType;
- typedef vector<char> ByteBufferType;
-#else
- typedef std::vector<bool> BoolStackType;
- typedef std::vector<XalanDOMChar> DOMCharBufferType;
- typedef std::vector<char> ByteBufferType;
-#endif
-
/**
* Stack to keep track of whether or not we need to
* preserve whitespace.
@@ -799,7 +820,11 @@
DOMCharBufferType::size_type m_pos;
+ ByteBufferType m_byteBuf;
+
static const DOMCharBufferType::size_type s_maxBufferSize;
+
+ static const MaximumCharacterValueMapType&
s_maximumCharacterValues;
/**
* Current level of indent.