carnold 2005/04/28 16:26:33
Modified: include/log4cxx/helpers charsetencoder.h unicodehelper.h
src charsetdecoder.cpp charsetencoder.cpp
unicodehelper.cpp
Log:
LOGCXX-59: Encoding, Linux iter
Revision Changes Path
1.4 +6 -4 logging-log4cxx/include/log4cxx/helpers/charsetencoder.h
Index: charsetencoder.h
===================================================================
RCS file: /home/cvs/logging-log4cxx/include/log4cxx/helpers/charsetencoder.h,v
retrieving revision 1.3
retrieving revision 1.4
diff -u -r1.3 -r1.4
--- charsetencoder.h 28 Apr 2005 20:53:45 -0000 1.3
+++ charsetencoder.h 28 Apr 2005 23:26:33 -0000 1.4
@@ -59,8 +59,8 @@
/**
* Get encoder for specified character set.
- * @param charset, the following values should be recognized:
- * "US-ASCII", "ISO-8859-1", "UTF-8",
+ * @param charset, the following values should be recognized:
+ * "US-ASCII", "ISO-8859-1", "UTF-8",
* "UTF-16BE", "UTF-16LE".
* @return encoder, may be null if charset was not recognized.
*/
@@ -74,8 +74,8 @@
/**
* Get encoder for specified character set.
- * @param charset, the following values should be recognized:
- * "US-ASCII", "ISO-8859-1", "UTF-8",
+ * @param charset, the following values should be recognized:
+ * "US-ASCII", "ISO-8859-1", "UTF-8",
* "UTF-16BE", "UTF-16LE".
* @return encoder, may be null if charset was not recognized.
*/
@@ -133,6 +133,8 @@
* Private assignment operator.
*/
CharsetEncoder& operator=(const CharsetEncoder&);
+
+ static CharsetEncoder* createDefaultEncoder();
};
} // namespace helpers
1.2 +13 -5 logging-log4cxx/include/log4cxx/helpers/unicodehelper.h
Index: unicodehelper.h
===================================================================
RCS file: /home/cvs/logging-log4cxx/include/log4cxx/helpers/unicodehelper.h,v
retrieving revision 1.1
retrieving revision 1.2
diff -u -r1.1 -r1.2
--- unicodehelper.h 28 Apr 2005 20:53:45 -0000 1.1
+++ unicodehelper.h 28 Apr 2005 23:26:33 -0000 1.2
@@ -39,7 +39,7 @@
/**
* Decodes next character from a sequence of UTF-8 bytes.
* @param src start of character, will be modified to point
at next character.
- * @param srcEnd end of sequence.
+ * @param srcEnd end of sequence.
* @return scalar value (UCS-4) or 0xFFFF if invalid
sequence.
*/
static unsigned int decodeUTF8(const char*& src,
@@ -71,11 +71,11 @@
static int encodeUTF16LE(unsigned int ch, char* dst);
-#if LOG4CXX_HAS_WCHAR_T
+#if LOG4CXX_HAS_WCHAR_T
/**
* Decodes next character from a sequence of wchar_t values.
* @param src start of character, will be modified to point
at next character.
- * @param srcEnd end of sequence.
+ * @param srcEnd end of sequence.
* @return scalar value (UCS-4) or 0xFFFF if invalid
sequence.
*/
static unsigned int decodeWide(const wchar_t*& src, const
wchar_t* srcEnd);
@@ -97,18 +97,26 @@
*/
static int UnicodeHelper::lengthUTF8(wchar_t ch);
-#endif
+#endif
/**
* Decodes next character from a LogString.
* @param in string from which the character is extracted.
* @param iter iterator addressing start of character, will
be
- * advanced to next character if successful.
+ * advanced to next character if successful.
* @return scalar value (UCS-4) or 0xFFFF if invalid
sequence.
*/
static unsigned int decode(const LogString& in,
LogString::const_iterator& iter);
+ /**
+ * Encodes a UCS-4 value to logchar.
+ * @param ch UCS-4 value.
+ * @param dst buffer to receive logchar encoding (must be at
least 8)
+ * @return number of logchar needed to represent character
+ */
+ static int encode(unsigned int ch, logchar* dst);
+
};
}
}
1.4 +139 -12 logging-log4cxx/src/charsetdecoder.cpp
Index: charsetdecoder.cpp
===================================================================
RCS file: /home/cvs/logging-log4cxx/src/charsetdecoder.cpp,v
retrieving revision 1.3
retrieving revision 1.4
diff -u -r1.3 -r1.4
--- charsetdecoder.cpp 28 Apr 2005 20:53:45 -0000 1.3
+++ charsetdecoder.cpp 28 Apr 2005 23:26:33 -0000 1.4
@@ -19,9 +19,7 @@
#include <log4cxx/helpers/exception.h>
#include <log4cxx/helpers/unicodehelper.h>
#include <apr_xlate.h>
-#if HAS_LANGINFO_CODESET
-#include <langinfo.h>
-#endif
+
using namespace log4cxx;
using namespace log4cxx::helpers;
@@ -190,7 +188,7 @@
/**
* Decoder used when the external and internal charsets
- * are the same.
+ * are the same.
*
*/
class TrivialCharsetDecoder : public CharsetDecoder
@@ -221,6 +219,143 @@
TrivialCharsetDecoder& operator=(const
TrivialCharsetDecoder&);
};
+
+#if LOG4CXX_LOGCHAR_IS_UTF8
+typedef TrivialCharsetDecoder UTF8CharsetDecoder;
+#endif
+
+#if LOG4CXX_LOGCHAR_IS_WCHAR
+/**
+* Converts from UTF-8 to std::wstring
+*
+*/
+class UTF8CharsetDecoder : public CharsetDecoder
+{
+public:
+ UTF8CharsetDecoder() {
+ }
+
+ virtual ~UTF8CharsetDecoder() {
+ }
+
+private:
+ virtual log4cxx_status_t decode(ByteBuffer& in,
+ LogString& out) {
+ log4cxx_status_t stat = APR_SUCCESS;
+ if (in.remaining() > 0) {
+ wchar_t buf[2];
+
+ const char* src = in.current();
+ const char* srcEnd = in.data() + in.limit();
+ while(src < srcEnd) {
+ unsigned int sv = UnicodeHelper::decodeUTF8(src, srcEnd);
+ if (sv == 0xFFFF) {
+ stat = APR_BADARG;
+ break;
+ }
+ int wchars = UnicodeHelper::encodeWide(sv, buf);
+ out.append(buf, wchars);
+ }
+ in.position(src - in.data());
+ }
+ return stat;
+ }
+
+
+
+private:
+ UTF8CharsetDecoder(const UTF8CharsetDecoder&);
+ UTF8CharsetDecoder& operator=(const UTF8CharsetDecoder&);
+};
+#endif
+
+/**
+* Converts from ISO-8859-1 to LogString.
+*
+*/
+class ISOLatinCharsetDecoder : public CharsetDecoder
+{
+public:
+ ISOLatinCharsetDecoder() {
+ }
+
+ virtual ~ISOLatinCharsetDecoder() {
+ }
+
+private:
+ virtual log4cxx_status_t decode(ByteBuffer& in,
+ LogString& out) {
+ log4cxx_status_t stat = APR_SUCCESS;
+ if (in.remaining() > 0) {
+ logchar buf[8];
+
+ const unsigned char* src = (unsigned char*) in.current();
+ const unsigned char* srcEnd = src + in.remaining();
+ while(src < srcEnd) {
+ unsigned int sv = *(src++);
+ int logchars = UnicodeHelper::encode(sv, buf);
+ out.append(buf, logchars);
+ }
+ in.position(in.limit());
+ }
+ return stat;
+ }
+
+
+
+private:
+ ISOLatinCharsetDecoder(const ISOLatinCharsetDecoder&);
+ ISOLatinCharsetDecoder& operator=(const ISOLatinCharsetDecoder&);
+};
+
+
+/**
+* Converts from ISO-8859-1 to LogString.
+*
+*/
+class USASCIICharsetDecoder : public CharsetDecoder
+{
+public:
+ USASCIICharsetDecoder() {
+ }
+
+ virtual ~USASCIICharsetDecoder() {
+ }
+
+private:
+
+ virtual log4cxx_status_t decode(ByteBuffer& in,
+ LogString& out) {
+ log4cxx_status_t stat = APR_SUCCESS;
+ if (in.remaining() > 0) {
+ logchar buf[8];
+
+ const unsigned char* src = (unsigned char*) in.current();
+ const unsigned char* srcEnd = src + in.remaining();
+ while(src < srcEnd) {
+ unsigned char sv = *src;
+ if (sv < 0x80) {
+ src++;
+ int logchars = UnicodeHelper::encode(sv, buf);
+ out.append(buf, logchars);
+ } else {
+ stat = APR_BADARG;
+ break;
+ }
+ }
+ in.position(src - (const unsigned char*) in.data());
+ }
+ return stat;
+ }
+
+
+
+private:
+ USASCIICharsetDecoder(const USASCIICharsetDecoder&);
+ USASCIICharsetDecoder& operator=(const USASCIICharsetDecoder&);
+};
+
+
#if LOG4CXX_LOGCHAR_IS_UTF8
/**
* Decoder to convert array of wchar_t to UTF-8 bytes.
@@ -276,14 +411,6 @@
}
CharsetDecoder* CharsetDecoder::createDefaultDecoder() {
-#if LOG4CXX_LOGCHAR_IS_UTF8 && HAS_LANGINFO_CODESET
- //
- // detect if encoding is UTF-8
- //
- if(strcmp(nl_langinfo(CODESET), "UTF-8") == 0) {
- return new TrivialCharsetDecoder();
- }
-#endif
#if LOG4CXX_HAS_WCHAR_T
return new MbstowcsCharsetDecoder();
#else
1.5 +25 -19 logging-log4cxx/src/charsetencoder.cpp
Index: charsetencoder.cpp
===================================================================
RCS file: /home/cvs/logging-log4cxx/src/charsetencoder.cpp,v
retrieving revision 1.4
retrieving revision 1.5
diff -u -r1.4 -r1.5
--- charsetencoder.cpp 28 Apr 2005 20:53:45 -0000 1.4
+++ charsetencoder.cpp 28 Apr 2005 23:26:33 -0000 1.5
@@ -21,6 +21,7 @@
#include <log4cxx/helpers/stringhelper.h>
#include <log4cxx/helpers/unicodehelper.h>
+
using namespace log4cxx;
using namespace log4cxx::helpers;
@@ -100,7 +101,7 @@
#if LOG4CXX_HAS_WCHAR_T
/**
* A character encoder implemented using wcstombs.
- */
+ */
class WcstombsCharsetEncoder : public CharsetEncoder
{
public:
@@ -248,10 +249,10 @@
/**
* Converts a LogString to ISO-8859-1.
*/
- class ISOLatin1CharsetEncoder : public CharsetEncoder
+ class ISOLatinCharsetEncoder : public CharsetEncoder
{
public:
- ISOLatin1CharsetEncoder() {
+ ISOLatinCharsetEncoder() {
}
virtual log4cxx_status_t encode(const LogString& in,
@@ -260,12 +261,12 @@
log4cxx_status_t stat = APR_SUCCESS;
if (iter != in.end()) {
while(out.remaining() > 0 && iter != in.end()) {
- LogString::const_iterator prev(iter);
+ LogString::const_iterator prev(iter);
unsigned int sv = UnicodeHelper::decode(in, iter);
if (sv <= 0xFF) {
out.put((char) sv);
} else {
- iter = prev;
+ iter = prev;
stat = APR_BADARG;
break;
}
@@ -273,10 +274,10 @@
}
return stat;
}
-
+
private:
- ISOLatin1CharsetEncoder(const ISOLatin1CharsetEncoder&);
- ISOLatin1CharsetEncoder& operator=(const
ISOLatin1CharsetEncoder&);
+ ISOLatinCharsetEncoder(const ISOLatinCharsetEncoder&);
+ ISOLatinCharsetEncoder& operator=(const
ISOLatinCharsetEncoder&);
};
/**
@@ -297,7 +298,7 @@
if (requested > out.remaining()/sizeof(logchar)) {
requested = out.remaining()/sizeof(logchar);
}
- memcpy(out.current(),
+ memcpy(out.current(),
(const char*) in.data() + (iter - in.begin()),
requested * sizeof(logchar));
iter += requested;
@@ -317,7 +318,7 @@
#if LOG4CXX_LOGCHAR_IS_WCHAR
/**
- * Converts a wstring to UTF-8.
+ * Converts a wstring to UTF-8.
*/
class UTF8CharsetEncoder : public CharsetEncoder
{
@@ -462,19 +463,23 @@
}
CharsetEncoderPtr CharsetEncoder::getDefaultEncoder() {
+ static CharsetEncoderPtr encoder(createDefaultEncoder());
+ return encoder;
+}
+
+CharsetEncoder* CharsetEncoder::createDefaultEncoder() {
#if LOG4CXX_HAS_WCHAR_T
- static CharsetEncoderPtr encoder(new WcstombsCharsetEncoder());
+ return new WcstombsCharsetEncoder();
#else
- static CharsetEncoderPtr encoder(new
APRCharsetEncoder(APR_LOCALE_CHARSET));
+ return new APRCharsetEncoder(APR_LOCALE_CHARSET);
#endif
- return encoder;
}
CharsetEncoderPtr CharsetEncoder::getEncoder(const std::wstring& charset) {
std::string cs(charset.size(), ' ');
- for(std::wstring::size_type i = 0;
- i < charset.length();
+ for(std::wstring::size_type i = 0;
+ i < charset.length();
i++) {
cs[i] = (char) charset[i];
}
@@ -496,13 +501,13 @@
CharsetEncoderPtr CharsetEncoder::getEncoder(const std::string& charset) {
-#if defined(_WIN32)
if (StringHelper::equalsIgnoreCase(charset, "US-ASCII", "us-ascii") ||
- StringHelper::equalsIgnoreCase(charset, "ISO646-US", "iso646-US")) {
+ StringHelper::equalsIgnoreCase(charset, "ISO646-US", "iso646-US") ||
+ StringHelper::equalsIgnoreCase(charset, "ANSI_X3.4-1968",
"ansi_x3.4-1968")) {
return new USASCIICharsetEncoder();
} else if (StringHelper::equalsIgnoreCase(charset, "ISO-8859-1",
"iso-8859-1") ||
StringHelper::equalsIgnoreCase(charset, "ISO-LATIN-1",
"iso-latin-1")) {
- return new ISOLatin1CharsetEncoder();
+ return new ISOLatinCharsetEncoder();
} else if (StringHelper::equalsIgnoreCase(charset, "UTF-8", "utf-8")) {
return new UTF8CharsetEncoder();
} else if (StringHelper::equalsIgnoreCase(charset, "UTF-16BE",
"utf-16be")
@@ -511,9 +516,10 @@
} else if (StringHelper::equalsIgnoreCase(charset, "UTF-16LE",
"utf-16le")) {
return new UTF16LECharsetEncoder();
}
+#if defined(_WIN32)
throw IllegalArgumentException(charset);
#else
- return new APRCharsetEncoder(charset.c_str());
+ return new APRCharsetEncoder(charset.c_str());
#endif
}
1.2 +19 -4 logging-log4cxx/src/unicodehelper.cpp
Index: unicodehelper.cpp
===================================================================
RCS file: /home/cvs/logging-log4cxx/src/unicodehelper.cpp,v
retrieving revision 1.1
retrieving revision 1.2
diff -u -r1.1 -r1.2
--- unicodehelper.cpp 28 Apr 2005 20:53:45 -0000 1.1
+++ unicodehelper.cpp 28 Apr 2005 23:26:33 -0000 1.2
@@ -22,7 +22,7 @@
-unsigned int UnicodeHelper::decodeUTF8(const char*& src,
+unsigned int UnicodeHelper::decodeUTF8(const char*& src,
const char* srcEnd) {
const char* start = src;
unsigned char ch1 = *(src++);
@@ -61,7 +61,7 @@
}
if ((ch1 & 0xF0) == 0xE0) {
unsigned rv = ((ch1 & 0x0F) << 12)
- + ((ch2 & 0x3F) << 6)
+ + ((ch2 & 0x3F) << 6)
+ (ch3 & 0x3F);
if (rv <= 0x800) {
src = start;
@@ -124,7 +124,7 @@
int UnicodeHelper::lengthUTF8(wchar_t ch) {
if (ch <= 0x7F) {
return 1;
- }
+ }
if(ch <= 0x7FF) {
return 2;
}
@@ -153,7 +153,7 @@
int UnicodeHelper::lengthUTF8(wchar_t ch) {
if (ch <= 0x7F) {
return 1;
- }
+ }
if(ch <= 0x7FF) {
return 2;
}
@@ -256,3 +256,18 @@
}
#endif
+
+#if LOG4CXX_LOGCHAR_IS_WCHAR
+int UnicodeHelper::encode(unsigned int sv, logchar* out) {
+ return encodeWide(sv, out);
+}
+#endif
+
+
+#if LOG4CXX_LOGCHAR_IS_UTF8
+int UnicodeHelper::encode(unsigned int sv, logchar* out) {
+ return encodeUTF8(sv, out);
+}
+#endif
+
+