carnold 2005/04/21 22:27:22
Modified: include/log4cxx/helpers charsetdecoder.h charsetencoder.h
src charsetdecoder.cpp charsetencoder.cpp
tests/src/helpers charsetdecodertestcase.cpp
charsetencodertestcase.cpp
Log:
LOGCXX-59: Windows (non APR) encoding support
Revision Changes Path
1.2 +8 -13 logging-log4cxx/include/log4cxx/helpers/charsetdecoder.h
Index: charsetdecoder.h
===================================================================
RCS file: /home/cvs/logging-log4cxx/include/log4cxx/helpers/charsetdecoder.h,v
retrieving revision 1.1
retrieving revision 1.2
diff -u -r1.1 -r1.2
--- charsetdecoder.h 1 Apr 2005 02:02:33 -0000 1.1
+++ charsetdecoder.h 22 Apr 2005 05:27:22 -0000 1.2
@@ -18,7 +18,6 @@
#define _LOG4CXX_HELPERS_CHARSETDECODER_H
#include <log4cxx/helpers/objectimpl.h>
-#include <log4cxx/helpers/pool.h>
namespace log4cxx
{
@@ -39,29 +38,25 @@
BEGIN_LOG4CXX_CAST_MAP()
LOG4CXX_CAST_ENTRY(CharsetDecoder)
END_LOG4CXX_CAST_MAP()
-
- private:
- CharsetDecoder(const char* fromset);
+ protected:
+ CharsetDecoder();
+ public:
virtual ~CharsetDecoder();
-
- public:
- static CharsetDecoderPtr getDefaultDecoder();
- static CharsetDecoderPtr getWideDecoder();
- static CharsetDecoderPtr getDecoder(const LogString&
charset);
+ static CharsetDecoderPtr getDefaultDecoder();
+#if LOG4CXX_HAS_WCHAR_T
+ static CharsetDecoderPtr getWideDecoder();
+#endif
virtual log4cxx_status_t decode(ByteBuffer& in,
- LogString& out);
+ LogString& out) = 0;
inline static bool isError(log4cxx_status_t stat) {
return (stat != 0);
}
-
private:
CharsetDecoder(const CharsetDecoder&);
CharsetDecoder& operator=(const CharsetDecoder&);
- Pool pool;
- void *convset;
};
} // namespace helpers
1.2 +8 -9 logging-log4cxx/include/log4cxx/helpers/charsetencoder.h
Index: charsetencoder.h
===================================================================
RCS file: /home/cvs/logging-log4cxx/include/log4cxx/helpers/charsetencoder.h,v
retrieving revision 1.1
retrieving revision 1.2
diff -u -r1.1 -r1.2
--- charsetencoder.h 1 Apr 2005 02:02:33 -0000 1.1
+++ charsetencoder.h 22 Apr 2005 05:27:22 -0000 1.2
@@ -41,14 +41,15 @@
LOG4CXX_CAST_ENTRY(CharsetEncoder)
END_LOG4CXX_CAST_MAP()
- private:
- CharsetEncoder(const char* topage);
- virtual ~CharsetEncoder();
-
+ protected:
+ CharsetEncoder();
+
public:
+ virtual ~CharsetEncoder();
static CharsetEncoderPtr getDefaultEncoder();
- static CharsetEncoderPtr getWideEncoder();
- static CharsetEncoderPtr getEncoder(const LogString&
charset);
+// static CharsetEncoderPtr getWideEncoder();
+ static CharsetEncoderPtr getEncoder(const std::wstring&
charset);
+ static CharsetEncoderPtr getEncoder(const std::string&
charset);
/**
* Encodes a string replacing unmappable
@@ -62,7 +63,7 @@
virtual log4cxx_status_t encode(const LogString& in,
LogString::const_iterator& iter,
- ByteBuffer& out);
+ ByteBuffer& out) = 0;
virtual void reset();
@@ -76,8 +77,6 @@
private:
CharsetEncoder(const CharsetEncoder&);
CharsetEncoder& operator=(const CharsetEncoder&);
- Pool pool;
- void *convset;
};
} // namespace helpers
1.2 +249 -69 logging-log4cxx/src/charsetdecoder.cpp
Index: charsetdecoder.cpp
===================================================================
RCS file: /home/cvs/logging-log4cxx/src/charsetdecoder.cpp,v
retrieving revision 1.1
retrieving revision 1.2
diff -u -r1.1 -r1.2
--- charsetdecoder.cpp 1 Apr 2005 02:02:33 -0000 1.1
+++ charsetdecoder.cpp 22 Apr 2005 05:27:22 -0000 1.2
@@ -22,82 +22,262 @@
using namespace log4cxx;
using namespace log4cxx::helpers;
-IMPLEMENT_LOG4CXX_OBJECT(CharsetDecoder)
+IMPLEMENT_LOG4CXX_OBJECT(CharsetDecoder)
+
+
+namespace log4cxx
+{
+ namespace helpers {
+
+#if !defined(_WIN32)
+ /**
+ * Converts from an arbitrary encoding to LogString
+ * using apr_xlate.
+ */
+ class APRCharsetDecoder : public CharsetDecoder
+ {
+ public:
+ APRCharsetDecoder(const char* frompage) {
+#if LOG4CXX_LOGCHAR_IS_WCHAR
+ const char* topage = "WCHAR_T";
+#endif
+#if LOG4CXX_LOGCHAR_IS_UTF8
+ const char* topage = "UTF-8";
+#endif
+ apr_status_t stat = apr_pool_create(&pool, NULL);
+ if (stat != APR_SUCCESS) {
+ throw PoolException(stat);
+ }
+ stat = apr_xlate_open(&convset,
+ topage,
+ frompage,
+ pool);
+ if (stat != APR_SUCCESS) {
+ throw IllegalArgumentException(topage);
+ }
+ }
+
+ virtual ~APRCharsetDecoder() {
+ apr_xlate_close(convset);
+ apr_pool_destroy(pool);
+ }
+
+ virtual log4cxx_status_t decode(ByteBuffer& in,
+ LogString& out) {
+ enum { BUFSIZE = 256 };
+ logchar buf[BUFSIZE];
+ const apr_size_t initial_outbytes_left = BUFSIZE *
sizeof(logchar);
+ apr_status_t stat = APR_SUCCESS;
+ if (in.remaining() == 0) {
+ size_t outbytes_left = initial_outbytes_left;
+ stat = apr_xlate_conv_buffer((apr_xlate_t*) convset,
+ NULL, NULL, (char*) buf, &outbytes_left);
+ out.append(buf, (initial_outbytes_left -
outbytes_left)/sizeof(logchar));
+ } else {
+ while(in.remaining() > 0 && stat == APR_SUCCESS) {
+ size_t inbytes_left = in.remaining();
+ size_t initial_inbytes_left = inbytes_left;
+ size_t pos = in.position();
+ apr_size_t outbytes_left = initial_outbytes_left;
+ stat = apr_xlate_conv_buffer((apr_xlate_t*) convset,
+ in.data() + pos,
+ &inbytes_left,
+ (char*) buf,
+ &outbytes_left);
+ out.append(buf, (initial_outbytes_left -
outbytes_left)/sizeof(logchar));
+ in.position(pos + (initial_inbytes_left -
inbytes_left));
+ }
+ }
+ return stat;
+ }
+
+ private:
+ APRCharsetDecoder(const APRCharsetDecoder&);
+ APRCharsetDecoder& operator=(const APRCharsetDecoder&);
+ apr_pool_t* pool;
+ apr_xlate_t *convset;
+ };
+#endif
+
+
+#if LOG4CXX_HAS_WCHAR_T || defined(_WIN32)
+ /**
+ * Converts from the default multi-byte string to
+ * LogString using mbstowcs.
+ *
+ */
+ class MbstowcsCharsetDecoder : public CharsetDecoder
+ {
+ public:
+ MbstowcsCharsetDecoder() {
+ }
+
+ virtual ~MbstowcsCharsetDecoder() {
+ }
+
+ virtual log4cxx_status_t decode(ByteBuffer& in,
+ LogString& out) {
+ enum { BUFSIZE = 256 };
+ wchar_t buf[BUFSIZE];
+
+ while(in.remaining() > 0) {
+ size_t requested = in.remaining();
+ if (requested > BUFSIZE - 1) {
+ requested = BUFSIZE - 1;
+ }
+
+ for(; requested > 0; requested--) {
+ memset(buf, 0, BUFSIZE*sizeof(wchar_t));
+ size_t converted = mbstowcs(buf, in.data() +
in.position(), requested);
+ if (converted != (size_t) -1) {
+#if LOG4CXX_LOGCHAR_IS_WCHAR
+ out.append(buf);
+#else
+ size_t wlen = wcslen(buf);
+ out.reserve(out.length() + wlen);
+ for(int i = 0; i < wlen; i++) {
+ encodeUTF8(buf[i], out);
+ }
+#endif
+ in.position(in.position() + converted);
+ break;
+ }
+ }
+ if (requested == 0) {
+ return APR_BADARG;
+ }
+ }
+ return APR_SUCCESS;
+ }
+
+
+ static void encodeUTF8(unsigned short ch, std::string& out) {
+ if (ch <= 0x7F) {
+ out.append(1, (char) ch);
+ } else {
+ //
+ // TODO
+ //
+ out.append(1, '?');
+ }
+ }
+
+
+
+ private:
+ MbstowcsCharsetDecoder(const MbstowcsCharsetDecoder&);
+ MbstowcsCharsetDecoder& operator=(const
MbstowcsCharsetDecoder&);
+ };
+#endif
+
+
+#if LOG4CXX_LOGCHAR_IS_WCHAR
+ /**
+ * Converts from the default multi-byte string to
+ * LogString using mbstowcs.
+ *
+ */
+ class TrivialWideCharsetDecoder : public CharsetDecoder
+ {
+ public:
+ TrivialWideCharsetDecoder() {
+ }
+
+ virtual ~TrivialWideCharsetDecoder() {
+ }
+
+ virtual log4cxx_status_t decode(ByteBuffer& in,
+ LogString& out) {
+ const wchar_t* src = (const wchar_t*) (in.data() +
in.position());
+ size_t remaining = in.remaining();
+ size_t count = remaining / sizeof(wchar_t);
+ out.append(src, count);
+ in.position(in.position() + count * sizeof(wchar_t));
+ if (remaining & 1) {
+ return APR_BADARG;
+ }
+ return APR_SUCCESS;
+ }
+
+
+
+ private:
+ TrivialWideCharsetDecoder(const
TrivialWideCharsetDecoder&);
+ TrivialWideCharsetDecoder& operator=(const
TrivialWideCharsetDecoder&);
+ };
+#endif
+
+#if LOG4CXX_LOGCHAR_IS_UTF8 && defined(_WIN32)
+ /**
+ * Converts from the default multi-byte string to
+ * UTF-8 std::string
+ *
+ */
+ class WideToUTF8CharsetDecoder : public CharsetDecoder
+ {
+ public:
+ WideToUTF8CharsetDecoder() {
+ }
+
+ virtual ~WideToUTF8CharsetDecoder() {
+ }
+
+ virtual log4cxx_status_t decode(ByteBuffer& in,
+ LogString& out) {
+ const wchar_t* src = in.data() + in.position();
+ size_t remaining = in.remaining();
+ size_t count = remaining / sizeof(wchar_t);
+ out.reserve(out.length() + count;
+ for(int i = 0; i < count; i++, src++) {
+ MbstowcsCharsetDecoder::encodeUTF8(*src, out);
+ }
+ in.position(in.position() + count * sizeof(wchar_t));
+ if (remaining & 1) {
+ return APR_BADARG;
+ }
+ return APR_SUCCESS;
+ }
+
+
+
+ private:
+ WideToUTF8CharsetDecoder(const WideToUTF8CharsetDecoder&);
+ WideToUTF8CharsetDecoder& operator=(const
WideToUTF8CharsetDecoder&);
+ };
+#endif
+
+
+ } // namespace helpers
+
+} //namespace log4cxx
-CharsetDecoder::CharsetDecoder(const char* frompage) {
-#if LOG4CXX_LOGCHAR_IS_WCHAR
- const char* topage = "WCHAR_T";
-#endif
-#if LOG4CXX_LOGCHAR_IS_UTF8
- const char* topage = "UTF-8";
-#endif
- apr_status_t stat = apr_xlate_open((apr_xlate_t**) &convset,
- topage,
- frompage,
- (apr_pool_t*) pool.getAPRPool());
- if (stat != APR_SUCCESS) {
- throw IllegalArgumentException(topage);
- }
-}
+CharsetDecoder::CharsetDecoder() {
+}
-CharsetDecoder::~CharsetDecoder() {
- apr_xlate_close((apr_xlate_t*) convset);
-}
-CharsetDecoderPtr CharsetDecoder::getDefaultDecoder() {
- static CharsetDecoderPtr decoder(new CharsetDecoder(APR_LOCALE_CHARSET));
- return decoder;
+CharsetDecoder::~CharsetDecoder() {
}
-CharsetDecoderPtr CharsetDecoder::getWideDecoder() {
- static CharsetDecoderPtr decoder(new CharsetDecoder("WCHAR_T"));
- return decoder;
+CharsetDecoderPtr CharsetDecoder::getDefaultDecoder() {
+#if LOG4CXX_HAS_WCHAR_T || defined(_WIN32)
+ static CharsetDecoderPtr decoder(new MbstowcsCharsetDecoder());
+#else
+ static CharsetDecoderPtr decoder(new
APRCharsetDecoder(APR_LOCALE_CHARSET));
+#endif
+ return decoder;
}
-
-CharsetDecoderPtr CharsetDecoder::getDecoder(const LogString& charset) {
-#if LOG4CXX_LOGCHAR_IS_WCHAR
- std::string cs(charset.size(), ' ');
- for(int i = 0; i < charset.size(); i++) {
- cs[i] = (char) charset[i];
- }
- return new CharsetDecoder(cs.c_str());
+
+#if LOG4CXX_HAS_WCHAR_T
+CharsetDecoderPtr CharsetDecoder::getWideDecoder() {
+#if LOG4CXX_LOGCHAR_IS_WCHAR
+ static CharsetDecoderPtr decoder(new TrivialWideCharsetDecoder());
+#elif defined(_WIN32)
+ static CharsetDecoderPtr decoder(new WideToUTF8CharsetDecoder());
+#else
+ static CharsetDecoderPtr decoder(new APRCharsetDecoder("WCHAR_T"));
#endif
-#if LOG4CXX_LOGCHAR_IS_UTF8
- return new CharsetDecoder(charset.c_str());
+ return decoder;
+}
#endif
-}
-
-
-log4cxx_status_t CharsetDecoder::decode(
- ByteBuffer& in,
- LogString& out) {
- enum { BUFSIZE = 256 };
- logchar buf[BUFSIZE];
- const apr_size_t initial_outbytes_left = BUFSIZE * sizeof(logchar);
- apr_status_t stat = APR_SUCCESS;
- if (in.remaining() == 0) {
- size_t outbytes_left = initial_outbytes_left;
- stat = apr_xlate_conv_buffer((apr_xlate_t*) convset,
- NULL, NULL, (char*) buf, &outbytes_left);
- out.append(buf, (initial_outbytes_left -
outbytes_left)/sizeof(logchar));
- } else {
- while(in.remaining() > 0 && stat == APR_SUCCESS) {
- size_t inbytes_left = in.remaining();
- size_t initial_inbytes_left = inbytes_left;
- size_t pos = in.position();
- apr_size_t outbytes_left = initial_outbytes_left;
- stat = apr_xlate_conv_buffer((apr_xlate_t*) convset,
- in.data() + pos,
- &inbytes_left,
- (char*) buf,
- &outbytes_left);
- out.append(buf, (initial_outbytes_left -
outbytes_left)/sizeof(logchar));
- in.position(pos + (initial_inbytes_left - inbytes_left));
- }
- }
- return stat;
-}
-
-
1.2 +411 -58 logging-log4cxx/src/charsetencoder.cpp
Index: charsetencoder.cpp
===================================================================
RCS file: /home/cvs/logging-log4cxx/src/charsetencoder.cpp,v
retrieving revision 1.1
retrieving revision 1.2
diff -u -r1.1 -r1.2
--- charsetencoder.cpp 1 Apr 2005 02:02:33 -0000 1.1
+++ charsetencoder.cpp 22 Apr 2005 05:27:22 -0000 1.2
@@ -18,82 +18,437 @@
#include <log4cxx/helpers/bytebuffer.h>
#include <log4cxx/helpers/exception.h>
#include <apr_xlate.h>
+#include <log4cxx/helpers/stringhelper.h>
using namespace log4cxx;
using namespace log4cxx::helpers;
-IMPLEMENT_LOG4CXX_OBJECT(CharsetEncoder)
+IMPLEMENT_LOG4CXX_OBJECT(CharsetEncoder)
+
+namespace log4cxx
+{
+
+ namespace helpers {
+
+#if !defined(_WIN32)
+ /**
+ * An engine to transform LogStrings into bytes
+ * for the specific character set.
+ */
+ class APRCharsetEncoder : public CharsetEncoder
+ {
+ public:
+ APRCharsetEncoder(const char* topage) {
+#if LOG4CXX_LOGCHAR_IS_WCHAR
+ const char* frompage = "WCHAR_T";
+#endif
+#if LOG4CXX_LOGCHAR_IS_UTF8
+ const char* frompage = "UTF-8";
+#endif
+ apr_status_t stat = apr_pool_create(&pool, NULL);
+ if (stat != APR_SUCCESS) {
+ throw PoolException(stat);
+ }
+ stat = apr_xlate_open(&convset,
+ topage,
+ frompage,
+ pool);
+ if (stat != APR_SUCCESS) {
+ throw IllegalArgumentException(topage);
+ }
+ }
+
+ virtual ~APRCharsetEncoder() {
+ apr_xlate_close(convset);
+ apr_pool_destroy(pool);
+ }
+
+ virtual log4cxx_status_t encode(const LogString& in,
+ LogString::const_iterator& iter,
+ ByteBuffer& out) {
+ apr_status_t stat;
+ size_t outbytes_left = out.remaining();
+ size_t initial_outbytes_left = outbytes_left;
+ size_t position = out.position();
+ if (iter == in.end()) {
+ stat = apr_xlate_conv_buffer(convset, NULL, NULL,
+ out.data() + position, &outbytes_left);
+ } else {
+ LogString::size_type inOffset = (iter - in.begin());
+ apr_size_t inbytes_left =
+ (in.size() - inOffset) *
sizeof(LogString::value_type);
+ apr_size_t initial_inbytes_left = inbytes_left;
+ stat = apr_xlate_conv_buffer(convset,
+ (const char*) (in.data() + inOffset),
+ &inbytes_left,
+ out.data() + position,
+ &outbytes_left);
+ iter += ((initial_inbytes_left - inbytes_left) /
sizeof(LogString::value_type));
+ }
+ out.position(out.position() + (initial_outbytes_left -
outbytes_left));
+ return stat;
+ }
+
+ private:
+ APRCharsetEncoder(const APRCharsetEncoder&);
+ APRCharsetEncoder& operator=(const APRCharsetEncoder&);
+ apr_pool_t* pool;
+ apr_xlate_t *convset;
+ };
+#endif
+
+#if LOG4CXX_LOGCHAR_IS_WCHAR
+ /**
+ * An engine to transform LogStrings into bytes
+ * for the specific character set.
+ */
+ class WcstombsCharsetEncoder : public CharsetEncoder
+ {
+ public:
+ WcstombsCharsetEncoder() {
+ }
+
+ virtual log4cxx_status_t encode(const LogString& in,
+ LogString::const_iterator& iter,
+ ByteBuffer& out) {
+ log4cxx_status_t stat = APR_SUCCESS;
+
+ if (iter != in.end()) {
+ size_t outbytes_left = out.remaining();
+ size_t initial_outbytes_left = outbytes_left;
+ size_t position = out.position();
+ LogString::size_type inOffset = (iter - in.begin());
+ size_t inchars_left = (in.size() - inOffset);
+ apr_size_t initial_inchars_left = inchars_left;
+ enum { BUFSIZE = 256 };
+ wchar_t buf[BUFSIZE];
+ size_t chunkSize = BUFSIZE - 1;
+ if (chunkSize * MB_LEN_MAX > outbytes_left) {
+ chunkSize = outbytes_left / MB_LEN_MAX;
+ }
+ if (chunkSize > in.length() - inOffset) {
+ chunkSize = in.length() - inOffset;
+ }
+ memset(buf, 0, BUFSIZE * sizeof(wchar_t));
+ memcpy(buf,
+ in.data() + inOffset,
+ chunkSize * sizeof(wchar_t));
+ size_t converted = wcstombs(out.data() + position,
buf, outbytes_left);
+
+ if (converted == (size_t) -1) {
+ stat = APR_BADARG;
+ //
+ // if unconvertable character was encountered
+ // repeatedly halve source to get
fragment that
+ // can be converted
+ for(chunkSize /= 2;
+ chunkSize > 0;
+ chunkSize /= 2) {
+ buf[chunkSize] = 0;
+ converted = wcstombs(out.data() + position,
buf, outbytes_left);
+ if (converted != (size_t) -1) {
+ iter += chunkSize;
+ out.position(out.position() + converted);
+ }
+ }
+ } else {
+ iter += chunkSize;
+ out.position(out.position() + converted);
+ }
+ }
+ return stat;
+ }
+
+ private:
+ WcstombsCharsetEncoder(const WcstombsCharsetEncoder&);
+ WcstombsCharsetEncoder& operator=(const
WcstombsCharsetEncoder&);
+ };
+#endif
+
+
+#if defined(_WIN32)
+ /**
+ * An engine to transform LogStrings into bytes
+ * for the specific character set.
+ */
+ class USASCIICharsetEncoder : public CharsetEncoder
+ {
+ public:
+ USASCIICharsetEncoder() {
+ }
+
+ virtual log4cxx_status_t encode(const LogString& in,
+ LogString::const_iterator& iter,
+ ByteBuffer& out) {
+ log4cxx_status_t stat = APR_SUCCESS;
+ if (iter != in.end()) {
+ char* dstEnd = out.data() + out.limit();
+ char* dst = out.data() + out.position();
+ for(;
+ dst < dstEnd && iter != in.end();
+ iter++, dst++) {
+ unsigned short ch = *iter;
+ if (0x7F < ch) {
+ stat = APR_BADARG;
+ break;
+ }
+ *dst = ch;
+ }
+ out.position(dst - out.data());
+ }
+ return stat;
+ }
+
+ private:
+ USASCIICharsetEncoder(const USASCIICharsetEncoder&);
+ USASCIICharsetEncoder& operator=(const
USASCIICharsetEncoder&);
+ };
+
+ /**
+ * An engine to transform LogStrings into bytes
+ * for the specific character set.
+ */
+ class ISOLatin1CharsetEncoder : public CharsetEncoder
+ {
+ public:
+ ISOLatin1CharsetEncoder() {
+ }
+
+#if LOG4CXX_LOGCHAR_IS_WCHAR
+ virtual log4cxx_status_t encode(const LogString& in,
+ LogString::const_iterator& iter,
+ ByteBuffer& out) {
+ log4cxx_status_t stat = APR_SUCCESS;
+ if (iter != in.end()) {
+ char* dstEnd = out.data() + out.limit();
+ char* dst = out.data() + out.position();
+ for(;
+ dst < dstEnd && iter != in.end();
+ iter++, dst++) {
+ unsigned short ch = *iter;
+ if (0xFF < ch) {
+ stat = APR_BADARG;
+ break;
+ }
+ *dst = ch;
+ }
+ out.position(dst - out.data());
+ }
+ return stat;
+ }
+#endif
+
+ private:
+ ISOLatin1CharsetEncoder(const ISOLatin1CharsetEncoder&);
+ ISOLatin1CharsetEncoder& operator=(const
ISOLatin1CharsetEncoder&);
+ };
+
+
+ /**
+ * An engine to transform LogStrings into bytes
+ * for the specific character set.
+ */
+ class UTF8CharsetEncoder : public CharsetEncoder
+ {
+ public:
+ UTF8CharsetEncoder() {
+ }
+
+#if LOG4CXX_LOGCHAR_IS_UTF8
+ virtual log4cxx_status_t encode(const LogString& in,
+ LogString::const_iterator& iter,
+ ByteBuffer& out) {
+ if (iter != in.end()) {
+ size_t inOffset = iter - in.begin();
+ char* dst = out.data() + out.position();
+ size_t count = in.length() - inOffset;
+ if (count > out.remaining()) {
+ count = out.remaining();
+ }
+ memcpy(out.data() + out.position(),
+ in.data() + inOffset,
+ count);
+ out.position(out.position() + count);
+ iter += count;
+ }
+ return APR_SUCCESS;
+ }
+#endif
+
+#if LOG4CXX_LOGCHAR_IS_WCHAR
+ virtual log4cxx_status_t encode(const LogString& in,
+ LogString::const_iterator& iter,
+ ByteBuffer& out) {
+ if (iter != in.end()) {
+ size_t inOffset = iter - in.begin();
+ char* dst = out.data() + out.position();
+ char* dstEnd = out.data() + out.limit();
+ for(;
+ dst < dstEnd && iter != in.end();
+ iter++) {
+ unsigned short sv = *iter;
+ if (sv <= 0x7F) {
+ *(dst++) = sv;
+ } else if (sv <= 0x7FF) {
+ if(dst + 1 < dstEnd) {
+ *(dst++) = 0xC0 | (sv >> 6);
+ *(dst++) = 0x80 | (sv & 0x3F);
+ } else {
+ break;
+ }
+ } else if (sv < 0xD800 || sv > 0xDFFF) {
+ if (dst + 2 < dstEnd) {
+ *(dst++) = 0xE0 | (sv >> 12);
+ *(dst++) = 0x80 | ((sv >> 6) & 0x3F);
+ *(dst++) = 0x80 | (sv & 0x3F);
+ } else {
+ break;
+ }
+ } else {
+ if (dst + 3 < dstEnd && (iter + 1) !=
in.end()) {
+ *(dst++) = 0xF0 | ((sv >> 8) & 0x03);
+ *(dst++) = 0x80 | ((sv >> 2) & 0x3F);
+ unsigned short ls = *(++iter);
+ *(dst++) = 0x80
+ | ((sv & 0x03) << 4)
+ | ((ls >> 6) & 0x0F);
+ *(dst++) = 0x80 | (ls & 0x3F);
+ } else {
+ break;
+ }
+ }
+ }
+ out.position(dst - out.data());
+ }
+ return APR_SUCCESS;
+ }
+#endif
+
+ private:
+ UTF8CharsetEncoder(const UTF8CharsetEncoder&);
+ UTF8CharsetEncoder& operator=(const UTF8CharsetEncoder&);
+ };
+
+
+ /**
+ * An engine to transform LogStrings into bytes
+ * for the specific character set.
+ */
+ class UTF16BECharsetEncoder : public CharsetEncoder
+ {
+ public:
+ UTF16BECharsetEncoder() {
+ }
+
+#if LOG4CXX_LOGCHAR_IS_WCHAR
+ virtual log4cxx_status_t encode(const LogString& in,
+ LogString::const_iterator& iter,
+ ByteBuffer& out) {
+ log4cxx_status_t stat = APR_SUCCESS;
+ char* dstEnd = out.data() + out.limit() - 1;
+ char* dst = out.data() + out.position();
+ for(;
+ dst < dstEnd && iter != in.end();
+ iter++) {
+ *(dst++) = (*iter & 0xFF00) >> 8;
+ *(dst++) = *iter & 0x00FF;
+ }
+ out.position(dst - out.data());
+ return stat;
+ }
+#endif
+
+ private:
+ UTF16BECharsetEncoder(const UTF16BECharsetEncoder&);
+ UTF16BECharsetEncoder& operator=(const
UTF16BECharsetEncoder&);
+ };
+
+ /**
+ * An engine to transform LogStrings into bytes
+ * for the specific character set.
+ */
+ class UTF16LECharsetEncoder : public CharsetEncoder
+ {
+ public:
+ UTF16LECharsetEncoder() {
+ }
+
+#if LOG4CXX_LOGCHAR_IS_WCHAR
+ virtual log4cxx_status_t encode(const LogString& in,
+ LogString::const_iterator& iter,
+ ByteBuffer& out) {
+ log4cxx_status_t stat = APR_SUCCESS;
+ char* dstEnd = out.data() + out.limit() - 1;
+ char* dst = out.data() + out.position();
+ for(;
+ dst < dstEnd && iter != in.end();
+ iter++) {
+ *(dst++) = *iter & 0x00FF;
+ *(dst++) = (*iter & 0xFF00) >> 8;
+ }
+ out.position(dst - out.data());
+ return stat;
+ }
+#endif
+
+ private:
+ UTF16LECharsetEncoder(const UTF16LECharsetEncoder&);
+ UTF16LECharsetEncoder& operator=(const
UTF16LECharsetEncoder&);
+ };
+
+#endif
+
+ } // namespace helpers
+
+} //namespace log4cxx
-CharsetEncoder::CharsetEncoder(const char* topage) {
-#if LOG4CXX_LOGCHAR_IS_WCHAR
- const char* frompage = "WCHAR_T";
-#endif
-#if LOG4CXX_LOGCHAR_IS_UTF8
- const char* frompage = "UTF-8";
-#endif
- apr_status_t stat = apr_xlate_open((apr_xlate_t**) &convset,
- topage,
- frompage,
- (apr_pool_t*) pool.getAPRPool());
- if (stat != APR_SUCCESS) {
- throw IllegalArgumentException(topage);
- }
-}
-CharsetEncoder::~CharsetEncoder() {
- apr_xlate_close((apr_xlate_t*) convset);
+CharsetEncoder::CharsetEncoder() {
}
-CharsetEncoderPtr CharsetEncoder::getDefaultEncoder() {
- static CharsetEncoderPtr encoder(new CharsetEncoder(APR_LOCALE_CHARSET));
- return encoder;
+CharsetEncoder::~CharsetEncoder() {
}
-CharsetEncoderPtr CharsetEncoder::getWideEncoder() {
- static CharsetEncoderPtr encoder(new CharsetEncoder("WCHAR_T"));
+CharsetEncoderPtr CharsetEncoder::getDefaultEncoder() {
+#if LOG4CXX_HAS_WCHAR_T || defined(_WIN32)
+ static CharsetEncoderPtr encoder(new WcstombsCharsetEncoder());
+#else
+ static CharsetEncoderPtr encoder(new CharsetEncoder(APR_LOCALE_CHARSET));
+#endif
return encoder;
}
+
-CharsetEncoderPtr CharsetEncoder::getEncoder(const LogString& charset) {
-#if LOG4CXX_LOGCHAR_IS_WCHAR
+CharsetEncoderPtr CharsetEncoder::getEncoder(const std::wstring& charset) {
std::string cs(charset.size(), ' ');
for(int i = 0; i < charset.size(); i++) {
cs[i] = (char) charset[i];
}
- return new CharsetEncoder(cs.c_str());
-#endif
-#if LOG4CXX_LOGCHAR_IS_UTF8
- return new CharsetEncoder(charset.c_str());
-#endif
+ return getEncoder(cs);
}
-
-log4cxx_status_t CharsetEncoder::encode(const LogString& in,
- LogString::const_iterator& iter,
- ByteBuffer& out) {
- apr_status_t stat;
- size_t outbytes_left = out.remaining();
- size_t initial_outbytes_left = outbytes_left;
- size_t position = out.position();
- if (iter == in.end()) {
- stat = apr_xlate_conv_buffer((apr_xlate_t*) convset, NULL, NULL,
- out.data() + position, &outbytes_left);
- } else {
- LogString::size_type inOffset = (iter - in.begin());
- apr_size_t inbytes_left =
- (in.size() - inOffset) * sizeof(LogString::value_type);
- apr_size_t initial_inbytes_left = inbytes_left;
- stat = apr_xlate_conv_buffer((apr_xlate_t*) convset,
- (const char*) (in.data() + inOffset),
- &inbytes_left,
- out.data() + position,
- &outbytes_left);
- iter += ((initial_inbytes_left - inbytes_left) /
sizeof(LogString::value_type));
- }
- out.position(out.position() + (initial_outbytes_left - outbytes_left));
- return stat;
-}
+CharsetEncoderPtr CharsetEncoder::getEncoder(const std::string& charset) {
+#if defined(_WIN32)
+ if (StringHelper::equalsIgnoreCase(charset, "US-ASCII", "us-ascii") ||
+ StringHelper::equalsIgnoreCase(charset, "ISO646-US", "iso646-US")) {
+ return new USASCIICharsetEncoder();
+ } else if (StringHelper::equalsIgnoreCase(charset, "ISO-8859-1",
"iso-8859-1") ||
+ StringHelper::equalsIgnoreCase(charset, "ISO-LATIN-1",
"iso-latin-1")) {
+ return new ISOLatin1CharsetEncoder();
+ } else if (StringHelper::equalsIgnoreCase(charset, "UTF-8", "utf-8")) {
+ return new UTF8CharsetEncoder();
+ } else if (StringHelper::equalsIgnoreCase(charset, "UTF-16BE",
"utf-16be")
+ || StringHelper::equalsIgnoreCase(charset, "UTF-16", "utf-16")) {
+ return new UTF16BECharsetEncoder();
+ } else if (StringHelper::equalsIgnoreCase(charset, "UTF-16LE",
"utf-16le")) {
+ return new UTF16LECharsetEncoder();
+ }
+ throw IllegalArgumentException(charset);
+#else
+ return new APRCharsetEncoder(charset.c_str());
+#endif
+}
void CharsetEncoder::reset() {
@@ -103,8 +458,6 @@
}
-
-
void CharsetEncoder::encode(CharsetEncoderPtr& enc,
const LogString& src,
LogString::const_iterator& iter,
1.2 +2 -48
logging-log4cxx/tests/src/helpers/charsetdecodertestcase.cpp
Index: charsetdecodertestcase.cpp
===================================================================
RCS file:
/home/cvs/logging-log4cxx/tests/src/helpers/charsetdecodertestcase.cpp,v
retrieving revision 1.1
retrieving revision 1.2
diff -u -r1.1 -r1.2
--- charsetdecodertestcase.cpp 1 Apr 2005 02:02:34 -0000 1.1
+++ charsetdecodertestcase.cpp 22 Apr 2005 05:27:22 -0000 1.2
@@ -31,8 +31,6 @@
CPPUNIT_TEST_SUITE(CharsetDecoderTestCase);
CPPUNIT_TEST(decode1);
CPPUNIT_TEST(decode2);
- CPPUNIT_TEST(decode3);
- CPPUNIT_TEST(decode4);
#if LOG4CXX_HAS_WCHAR_T
CPPUNIT_TEST(decode5);
CPPUNIT_TEST(decode6);
@@ -49,7 +47,7 @@
char buf[] = "Hello, World";
ByteBuffer src(buf, strlen(buf));
- CharsetDecoderPtr
dec(CharsetDecoder::getDecoder(LOG4CXX_STR("US-ASCII")));
+ CharsetDecoderPtr dec(CharsetDecoder::getDefaultDecoder());
LogString greeting;
log4cxx_status_t stat = dec->decode(src, greeting);
CPPUNIT_ASSERT_EQUAL(APR_SUCCESS, stat);
@@ -67,7 +65,7 @@
strcpy(buf + BUFSIZE - 3, "Hello");
ByteBuffer src(buf, strlen(buf));
- CharsetDecoderPtr
dec(CharsetDecoder::getDecoder(LOG4CXX_STR("US-ASCII")));
+ CharsetDecoderPtr dec(CharsetDecoder::getDefaultDecoder());
LogString greeting;
log4cxx_status_t stat = dec->decode(src, greeting);
@@ -84,50 +82,6 @@
}
- void decode3() {
- const char buf[] = { 'A', 0xB6, 0 };
- ByteBuffer src((char*) buf, strlen(buf));
-
- CharsetDecoderPtr
dec(CharsetDecoder::getDecoder(LOG4CXX_STR("US-ASCII")));
-
- LogString greeting;
- log4cxx_status_t stat = dec->decode(src, greeting);
- CPPUNIT_ASSERT_EQUAL(true, CharsetDecoder::isError(stat));
- CPPUNIT_ASSERT_EQUAL((size_t) 1, src.position());
-
- }
-
-
- void decode4() {
- const char utf8_greet[] = { 'A',
- 0xD8, 0x85,
- 0xD4, 0xB0,
- 0xE0, 0xA6, 0x86,
- 0xE4, 0xB8, 0x83,
- 0xD0, 0x80,
- 0 };
-#if LOG4CXX_LOGCHAR_IS_WCHAR
- // arbitrary, hopefully meaningless, characters from
- // Latin, Arabic, Armenian, Bengali, CJK and Cyrillic
- const logchar greet[] = { L'A', 0x0605, 0x0530, 0x986, 0x4E03,
0x400, 0 };
-#endif
-
-#if LOG4CXX_LOGCHAR_IS_UTF8
- const logchar *greet = utf8_greet;
-#endif
- ByteBuffer src((char*) utf8_greet, strlen(utf8_greet));
-
- CharsetDecoderPtr
dec(CharsetDecoder::getDecoder(LOG4CXX_STR("UTF-8")));
-
- LogString greeting;
- log4cxx_status_t stat = dec->decode(src, greeting);
- CPPUNIT_ASSERT_EQUAL(false, CharsetDecoder::isError(stat));
- stat = dec->decode(src, greeting);
- CPPUNIT_ASSERT_EQUAL(false, CharsetDecoder::isError(stat));
-
- CPPUNIT_ASSERT_EQUAL((LogString) greet, greeting);
- }
-
#if LOG4CXX_HAS_WCHAR_T
void decode5() {
1.2 +0 -107
logging-log4cxx/tests/src/helpers/charsetencodertestcase.cpp
Index: charsetencodertestcase.cpp
===================================================================
RCS file:
/home/cvs/logging-log4cxx/tests/src/helpers/charsetencodertestcase.cpp,v
retrieving revision 1.1
retrieving revision 1.2
diff -u -r1.1 -r1.2
--- charsetencodertestcase.cpp 1 Apr 2005 02:02:34 -0000 1.1
+++ charsetencodertestcase.cpp 22 Apr 2005 05:27:22 -0000 1.2
@@ -33,11 +33,6 @@
CPPUNIT_TEST(encode2);
CPPUNIT_TEST(encode3);
CPPUNIT_TEST(encode4);
-#if LOG4CXX_HAS_WCHAR_T
- CPPUNIT_TEST(encode5);
- CPPUNIT_TEST(encode6);
- CPPUNIT_TEST(encode7);
-#endif
CPPUNIT_TEST_SUITE_END();
enum { BUFSIZE = 256 };
@@ -170,108 +165,6 @@
}
-#if LOG4CXX_HAS_WCHAR_T
- void encode5() {
- const LogString greeting(LOG4CXX_STR("Hello, World"));
-
- CharsetEncoderPtr enc(CharsetEncoder::getWideEncoder());
-
- char buf[BUFSIZE];
- memset(buf, 0, BUFSIZE);
- ByteBuffer out(buf, BUFSIZE);
- LogString::const_iterator iter = greeting.begin();
- log4cxx_status_t stat = enc->encode(greeting, iter, out);
- CPPUNIT_ASSERT_EQUAL(APR_SUCCESS, stat);
- CPPUNIT_ASSERT(iter == greeting.end());
-
- stat = enc->encode(greeting, iter, out);
- CPPUNIT_ASSERT_EQUAL(APR_SUCCESS, stat);
- out.flip();
- std::wstring encoded((wchar_t*) out.data());
- CPPUNIT_ASSERT_EQUAL((std::wstring) L"Hello, World", encoded);
- CPPUNIT_ASSERT(iter == greeting.end());
- CPPUNIT_ASSERT_EQUAL(12 * sizeof(wchar_t), out.limit());
- }
-
- void encode6() {
- LogString greeting(BUFSIZE - 3, LOG4CXX_STR('A'));
- greeting.append(LOG4CXX_STR("Hello"));
-
- CharsetEncoderPtr enc(CharsetEncoder::getWideEncoder());
-
- char buf[BUFSIZE * sizeof(wchar_t) + 4];
- memset(buf, 0, BUFSIZE * sizeof(wchar_t) + 4);
- ByteBuffer out(buf, BUFSIZE * sizeof(wchar_t));
- LogString::const_iterator iter = greeting.begin();
- log4cxx_status_t stat = enc->encode(greeting, iter, out);
- CPPUNIT_ASSERT_EQUAL(APR_SUCCESS, stat);
- CPPUNIT_ASSERT_EQUAL((size_t) 0, out.remaining());
- CPPUNIT_ASSERT_EQUAL(LOG4CXX_STR('o'), *(iter+1));
- out.flip();
- std::wstring encoded((const wchar_t*) out.data());
- out.clear();
-
- memset(buf, 0, BUFSIZE * sizeof(wchar_t) + 4);
- stat = enc->encode(greeting, iter, out);
- CPPUNIT_ASSERT_EQUAL(APR_SUCCESS, stat);
- CPPUNIT_ASSERT_EQUAL(2 * sizeof(wchar_t), out.position());
- CPPUNIT_ASSERT(iter == greeting.end());
-
- stat = enc->encode(greeting, iter, out);
- CPPUNIT_ASSERT_EQUAL(APR_SUCCESS, stat);
- out.flip();
- encoded.append((const wchar_t*) out.data());
-
- std::wstring manyAs(BUFSIZE - 3, L'A');
- CPPUNIT_ASSERT_EQUAL(manyAs, encoded.substr(0, BUFSIZE - 3));
- CPPUNIT_ASSERT_EQUAL(std::wstring(L"Hello"),
encoded.substr(BUFSIZE - 3));
-
-
- }
-
- void encode7() {
- // arbitrary, hopefully meaningless, characters from
- // Latin, Arabic, Armenian, Bengali, CJK and Cyrillic
- const wchar_t wide_greet[] = { L'A', 0x0605, 0x0530, 0x986,
0x4E03, 0x400, 0 };
-#if LOG4CXX_LOGCHAR_IS_WCHAR
- const logchar *greet = wide_greet;
-#endif
-
-#if LOG4CXX_LOGCHAR_IS_UTF8
- const logchar greet[] = { 'A',
- 0xD8, 0x85,
- 0xD4, 0xB0,
- 0xE0, 0xA6, 0x86,
- 0xE4, 0xB8, 0x83,
- 0xD0, 0x80,
- 0 };
-#endif
-
- CharsetEncoderPtr enc(CharsetEncoder::getWideEncoder());
-
- char buf[BUFSIZE * sizeof(wchar_t)];
- ByteBuffer out(buf, BUFSIZE * sizeof(wchar_t));
-
- LogString greeting(greet);
- LogString::const_iterator iter = greeting.begin();
- log4cxx_status_t stat = enc->encode(greeting, iter, out);
- CPPUNIT_ASSERT_EQUAL(false, CharsetEncoder::isError(stat));
- stat = enc->encode(greeting, iter, out);
- CPPUNIT_ASSERT_EQUAL(false, CharsetEncoder::isError(stat));
- out.flip();
- CPPUNIT_ASSERT_EQUAL((size_t) 6 * sizeof(wchar_t), out.limit());
-
- const wchar_t* actual = (const wchar_t*) out.data();
- for(int i = 0; i < 6; i++) {
- CPPUNIT_ASSERT_EQUAL((int) wide_greet[i], (int) actual[i]);
- }
- CPPUNIT_ASSERT(iter == greeting.end());
-
-
- }
-
-#endif
-
};