carnold     2005/04/21 22:27:22

  Modified:    include/log4cxx/helpers charsetdecoder.h charsetencoder.h
               src      charsetdecoder.cpp charsetencoder.cpp
               tests/src/helpers charsetdecodertestcase.cpp
                        charsetencodertestcase.cpp
  Log:
  LOGCXX-59: Windows (non APR) encoding support
  
  Revision  Changes    Path
  1.2       +8 -13     logging-log4cxx/include/log4cxx/helpers/charsetdecoder.h
  
  Index: charsetdecoder.h
  ===================================================================
  RCS file: /home/cvs/logging-log4cxx/include/log4cxx/helpers/charsetdecoder.h,v
  retrieving revision 1.1
  retrieving revision 1.2
  diff -u -r1.1 -r1.2
  --- charsetdecoder.h  1 Apr 2005 02:02:33 -0000       1.1
  +++ charsetdecoder.h  22 Apr 2005 05:27:22 -0000      1.2
  @@ -18,7 +18,6 @@
   #define _LOG4CXX_HELPERS_CHARSETDECODER_H
   
   #include <log4cxx/helpers/objectimpl.h>
  -#include <log4cxx/helpers/pool.h>
   
   namespace log4cxx
   {
  @@ -39,29 +38,25 @@
                     BEGIN_LOG4CXX_CAST_MAP()
                             LOG4CXX_CAST_ENTRY(CharsetDecoder)
                     END_LOG4CXX_CAST_MAP()
  -
  -          private:
  -                  CharsetDecoder(const char* fromset);
  +          protected:
  +                  CharsetDecoder();
  +          public:
                     virtual ~CharsetDecoder();
  -
  -          public:
  -                  static CharsetDecoderPtr getDefaultDecoder();
  -                  static CharsetDecoderPtr getWideDecoder();
  -                  static CharsetDecoderPtr getDecoder(const LogString& 
charset);
  +                  static CharsetDecoderPtr getDefaultDecoder();
  +#if LOG4CXX_HAS_WCHAR_T
  +                  static CharsetDecoderPtr getWideDecoder();
  +#endif
   
                     virtual log4cxx_status_t decode(ByteBuffer& in,
  -                        LogString& out);
  +                        LogString& out) = 0;
   
                     inline static bool isError(log4cxx_status_t stat) {
                        return (stat != 0);
                     }
   
  -
             private:
                     CharsetDecoder(const CharsetDecoder&);
                     CharsetDecoder& operator=(const CharsetDecoder&);
  -                  Pool pool;
  -                  void *convset;
             };
   
           } // namespace helpers
  
  
  
  1.2       +8 -9      logging-log4cxx/include/log4cxx/helpers/charsetencoder.h
  
  Index: charsetencoder.h
  ===================================================================
  RCS file: /home/cvs/logging-log4cxx/include/log4cxx/helpers/charsetencoder.h,v
  retrieving revision 1.1
  retrieving revision 1.2
  diff -u -r1.1 -r1.2
  --- charsetencoder.h  1 Apr 2005 02:02:33 -0000       1.1
  +++ charsetencoder.h  22 Apr 2005 05:27:22 -0000      1.2
  @@ -41,14 +41,15 @@
                             LOG4CXX_CAST_ENTRY(CharsetEncoder)
                     END_LOG4CXX_CAST_MAP()
   
  -          private:
  -                  CharsetEncoder(const char* topage);
  -                  virtual ~CharsetEncoder();
  -
  +          protected:
  +                  CharsetEncoder();
  +
             public:
  +                  virtual ~CharsetEncoder();
                     static CharsetEncoderPtr getDefaultEncoder();
  -                  static CharsetEncoderPtr getWideEncoder();
  -                  static CharsetEncoderPtr getEncoder(const LogString& 
charset);
  +//                  static CharsetEncoderPtr getWideEncoder();
  +                  static CharsetEncoderPtr getEncoder(const std::wstring& 
charset);
  +                  static CharsetEncoderPtr getEncoder(const std::string& 
charset);
   
                     /**
                     * Encodes a string replacing unmappable
  @@ -62,7 +63,7 @@
   
                     virtual log4cxx_status_t encode(const LogString& in,
                           LogString::const_iterator& iter,
  -                        ByteBuffer& out);
  +                        ByteBuffer& out) = 0;
   
                     virtual void reset();
   
  @@ -76,8 +77,6 @@
             private:
                     CharsetEncoder(const CharsetEncoder&);
                     CharsetEncoder& operator=(const CharsetEncoder&);
  -                  Pool pool;
  -                  void *convset;
             };
   
           } // namespace helpers
  
  
  
  1.2       +249 -69   logging-log4cxx/src/charsetdecoder.cpp
  
  Index: charsetdecoder.cpp
  ===================================================================
  RCS file: /home/cvs/logging-log4cxx/src/charsetdecoder.cpp,v
  retrieving revision 1.1
  retrieving revision 1.2
  diff -u -r1.1 -r1.2
  --- charsetdecoder.cpp        1 Apr 2005 02:02:33 -0000       1.1
  +++ charsetdecoder.cpp        22 Apr 2005 05:27:22 -0000      1.2
  @@ -22,82 +22,262 @@
   using namespace log4cxx;
   using namespace log4cxx::helpers;
   
  -IMPLEMENT_LOG4CXX_OBJECT(CharsetDecoder)
  +IMPLEMENT_LOG4CXX_OBJECT(CharsetDecoder)
  +
  +
  +namespace log4cxx
  +{
  +        namespace helpers {
  +
  +#if !defined(_WIN32)
  +          /**
  +           *  Converts from an arbitrary encoding to LogString
  +           *    using apr_xlate.
  +           */
  +          class APRCharsetDecoder : public CharsetDecoder
  +          {
  +          public:
  +              APRCharsetDecoder(const char* frompage) {
  +#if LOG4CXX_LOGCHAR_IS_WCHAR
  +                const char* topage = "WCHAR_T";
  +#endif
  +#if LOG4CXX_LOGCHAR_IS_UTF8
  +                const char* topage = "UTF-8";
  +#endif
  +                apr_status_t stat = apr_pool_create(&pool, NULL);
  +                if (stat != APR_SUCCESS) {
  +                    throw PoolException(stat);
  +                }
  +                stat = apr_xlate_open(&convset,
  +                    topage,
  +                    frompage,
  +                    pool);
  +                if (stat != APR_SUCCESS) {
  +                    throw IllegalArgumentException(topage);
  +                }
  +              }
  +
  +              virtual ~APRCharsetDecoder() {
  +                apr_xlate_close(convset);
  +                apr_pool_destroy(pool);
  +              }
  +
  +              virtual log4cxx_status_t decode(ByteBuffer& in,
  +                  LogString& out) {
  +                  enum { BUFSIZE = 256 };
  +                  logchar buf[BUFSIZE];
  +                  const apr_size_t initial_outbytes_left = BUFSIZE * 
sizeof(logchar);
  +                  apr_status_t stat = APR_SUCCESS;
  +                  if (in.remaining() == 0) {
  +                    size_t outbytes_left = initial_outbytes_left;
  +                    stat = apr_xlate_conv_buffer((apr_xlate_t*) convset,
  +                        NULL, NULL, (char*) buf, &outbytes_left);
  +                    out.append(buf, (initial_outbytes_left - 
outbytes_left)/sizeof(logchar));
  +                  } else {
  +                    while(in.remaining() > 0 && stat == APR_SUCCESS) {
  +                      size_t inbytes_left = in.remaining();
  +                      size_t initial_inbytes_left = inbytes_left;
  +                      size_t pos = in.position();
  +                      apr_size_t outbytes_left = initial_outbytes_left;
  +                      stat = apr_xlate_conv_buffer((apr_xlate_t*) convset,
  +                           in.data() + pos,
  +                           &inbytes_left,
  +                           (char*) buf,
  +                           &outbytes_left);
  +                      out.append(buf, (initial_outbytes_left - 
outbytes_left)/sizeof(logchar));
  +                      in.position(pos + (initial_inbytes_left - 
inbytes_left));
  +                    }
  +                  }
  +                  return stat;
  +              }
  +
  +          private:
  +                  APRCharsetDecoder(const APRCharsetDecoder&);
  +                  APRCharsetDecoder& operator=(const APRCharsetDecoder&);
  +                  apr_pool_t* pool;
  +                  apr_xlate_t *convset;
  +          };
  +#endif
  +
  +
  +#if LOG4CXX_HAS_WCHAR_T || defined(_WIN32)
  +          /**
  +          *    Converts from the default multi-byte string to
  +          *        LogString using mbstowcs.
  +          *
  +          */
  +          class MbstowcsCharsetDecoder : public CharsetDecoder
  +          {
  +          public:
  +              MbstowcsCharsetDecoder() {
  +              }
  +
  +              virtual ~MbstowcsCharsetDecoder() {
  +              }
  +
  +              virtual log4cxx_status_t decode(ByteBuffer& in,
  +                  LogString& out) {
  +                  enum { BUFSIZE = 256 };
  +                  wchar_t buf[BUFSIZE];
  +
  +                  while(in.remaining() > 0) {
  +                      size_t requested = in.remaining();
  +                      if (requested > BUFSIZE - 1) {
  +                          requested = BUFSIZE - 1;
  +                      }
  +
  +                      for(; requested > 0; requested--) {
  +                        memset(buf, 0, BUFSIZE*sizeof(wchar_t));
  +                        size_t converted = mbstowcs(buf, in.data() + 
in.position(), requested);
  +                        if (converted != (size_t) -1) {
  +#if LOG4CXX_LOGCHAR_IS_WCHAR
  +                            out.append(buf);
  +#else
  +                            size_t wlen = wcslen(buf);
  +                            out.reserve(out.length() + wlen);
  +                            for(int i = 0; i < wlen; i++) {
  +                                encodeUTF8(buf[i], out);
  +                            }
  +#endif
  +                            in.position(in.position() + converted);
  +                            break;
  +                        }
  +                      }
  +                      if (requested == 0) {
  +                          return APR_BADARG;
  +                      }
  +                  }
  +                  return APR_SUCCESS;
  +              }
  +
  +
  +              static void encodeUTF8(unsigned short ch, std::string& out) {
  +                  if (ch <= 0x7F) {
  +                      out.append(1, (char) ch);
  +                  } else {
  +                      //
  +                      //   TODO
  +                      //
  +                      out.append(1, '?');
  +                  }
  +              }
  +
  +
  +
  +          private:
  +                  MbstowcsCharsetDecoder(const MbstowcsCharsetDecoder&);
  +                  MbstowcsCharsetDecoder& operator=(const 
MbstowcsCharsetDecoder&);
  +          };
  +#endif
  +
  +
  +#if LOG4CXX_LOGCHAR_IS_WCHAR
  +          /**
  +          *    Converts from the default multi-byte string to
  +          *        LogString using mbstowcs.
  +          *
  +          */
  +          class TrivialWideCharsetDecoder : public CharsetDecoder
  +          {
  +          public:
  +              TrivialWideCharsetDecoder() {
  +              }
  +
  +              virtual ~TrivialWideCharsetDecoder() {
  +              }
  +
  +              virtual log4cxx_status_t decode(ByteBuffer& in,
  +                  LogString& out) {
  +                  const wchar_t* src = (const wchar_t*) (in.data() + 
in.position());
  +                  size_t remaining = in.remaining();
  +                  size_t count = remaining / sizeof(wchar_t);
  +                  out.append(src, count);
  +                  in.position(in.position() + count * sizeof(wchar_t));
  +                  if (remaining & 1) {
  +                      return APR_BADARG;
  +                  }
  +                  return APR_SUCCESS;
  +              }
  +
  +
  +
  +          private:
  +                  TrivialWideCharsetDecoder(const 
TrivialWideCharsetDecoder&);
  +                  TrivialWideCharsetDecoder& operator=(const 
TrivialWideCharsetDecoder&);
  +          };
  +#endif
  +
  +#if LOG4CXX_LOGCHAR_IS_UTF8 && defined(_WIN32)
  +          /**
  +          *    Converts from the default multi-byte string to
  +          *        UTF-8 std::string
  +          *
  +          */
  +          class WideToUTF8CharsetDecoder : public CharsetDecoder
  +          {
  +          public:
  +              WideToUTF8CharsetDecoder() {
  +              }
  +
  +              virtual ~WideToUTF8CharsetDecoder() {
  +              }
  +
  +              virtual log4cxx_status_t decode(ByteBuffer& in,
  +                  LogString& out) {
  +                  const wchar_t* src = in.data() + in.position();
  +                  size_t remaining = in.remaining();
  +                  size_t count = remaining / sizeof(wchar_t);
  +                  out.reserve(out.length() + count;
  +                  for(int i = 0; i < count; i++, src++) {
  +                      MbstowcsCharsetDecoder::encodeUTF8(*src, out);
  +                  }
  +                  in.position(in.position() + count * sizeof(wchar_t));
  +                  if (remaining & 1) {
  +                      return APR_BADARG;
  +                  }
  +                  return APR_SUCCESS;
  +              }
  +
  +
  +
  +          private:
  +                  WideToUTF8CharsetDecoder(const WideToUTF8CharsetDecoder&);
  +                  WideToUTF8CharsetDecoder& operator=(const 
WideToUTF8CharsetDecoder&);
  +          };
  +#endif
  +
  +
  +        } // namespace helpers
  +
  +}  //namespace log4cxx
   
   
  -CharsetDecoder::CharsetDecoder(const char* frompage) {
  -#if LOG4CXX_LOGCHAR_IS_WCHAR
  -  const char* topage = "WCHAR_T";
  -#endif
  -#if LOG4CXX_LOGCHAR_IS_UTF8
  -  const char* topage = "UTF-8";
  -#endif
  -  apr_status_t stat = apr_xlate_open((apr_xlate_t**) &convset,
  -     topage,
  -     frompage,
  -     (apr_pool_t*) pool.getAPRPool());
  -  if (stat != APR_SUCCESS) {
  -    throw IllegalArgumentException(topage);
  -  }
  -}
  +CharsetDecoder::CharsetDecoder() {
  +}
   
  -CharsetDecoder::~CharsetDecoder() {
  -  apr_xlate_close((apr_xlate_t*) convset);
  -}
   
  -CharsetDecoderPtr CharsetDecoder::getDefaultDecoder() {
  -  static CharsetDecoderPtr decoder(new CharsetDecoder(APR_LOCALE_CHARSET));
  -  return decoder;
  +CharsetDecoder::~CharsetDecoder() {
   }
   
  -CharsetDecoderPtr CharsetDecoder::getWideDecoder() {
  -  static CharsetDecoderPtr decoder(new CharsetDecoder("WCHAR_T"));
  -  return decoder;
  +CharsetDecoderPtr CharsetDecoder::getDefaultDecoder() {
  +#if LOG4CXX_HAS_WCHAR_T || defined(_WIN32)
  +    static CharsetDecoderPtr decoder(new MbstowcsCharsetDecoder());
  +#else
  +    static CharsetDecoderPtr decoder(new 
APRCharsetDecoder(APR_LOCALE_CHARSET));
  +#endif
  +    return decoder;
   }
  -
  -CharsetDecoderPtr CharsetDecoder::getDecoder(const LogString& charset) {
  -#if LOG4CXX_LOGCHAR_IS_WCHAR
  -   std::string cs(charset.size(), ' ');
  -   for(int i = 0; i < charset.size(); i++) {
  -      cs[i] = (char) charset[i];
  -   }
  -   return new CharsetDecoder(cs.c_str());
  +
  +#if LOG4CXX_HAS_WCHAR_T
  +CharsetDecoderPtr CharsetDecoder::getWideDecoder() {
  +#if LOG4CXX_LOGCHAR_IS_WCHAR
  +  static CharsetDecoderPtr decoder(new TrivialWideCharsetDecoder());
  +#elif defined(_WIN32)
  +  static CharsetDecoderPtr decoder(new WideToUTF8CharsetDecoder());
  +#else
  +  static CharsetDecoderPtr decoder(new APRCharsetDecoder("WCHAR_T"));
   #endif
  -#if LOG4CXX_LOGCHAR_IS_UTF8
  -   return new CharsetDecoder(charset.c_str());
  +  return decoder;
  +}
   #endif
  -}
  -
  -
  -log4cxx_status_t CharsetDecoder::decode(
  -      ByteBuffer& in,
  -      LogString& out) {
  -      enum { BUFSIZE = 256 };
  -      logchar buf[BUFSIZE];
  -      const apr_size_t initial_outbytes_left = BUFSIZE * sizeof(logchar);
  -      apr_status_t stat = APR_SUCCESS;
  -      if (in.remaining() == 0) {
  -        size_t outbytes_left = initial_outbytes_left;
  -        stat = apr_xlate_conv_buffer((apr_xlate_t*) convset,
  -            NULL, NULL, (char*) buf, &outbytes_left);
  -        out.append(buf, (initial_outbytes_left - 
outbytes_left)/sizeof(logchar));
  -      } else {
  -        while(in.remaining() > 0 && stat == APR_SUCCESS) {
  -          size_t inbytes_left = in.remaining();
  -          size_t initial_inbytes_left = inbytes_left;
  -          size_t pos = in.position();
  -          apr_size_t outbytes_left = initial_outbytes_left;
  -          stat = apr_xlate_conv_buffer((apr_xlate_t*) convset,
  -               in.data() + pos,
  -               &inbytes_left,
  -               (char*) buf,
  -               &outbytes_left);
  -          out.append(buf, (initial_outbytes_left - 
outbytes_left)/sizeof(logchar));
  -          in.position(pos + (initial_inbytes_left - inbytes_left));
  -        }
  -      }
  -      return stat;
  -}
  -
  -
   
  
  
  
  1.2       +411 -58   logging-log4cxx/src/charsetencoder.cpp
  
  Index: charsetencoder.cpp
  ===================================================================
  RCS file: /home/cvs/logging-log4cxx/src/charsetencoder.cpp,v
  retrieving revision 1.1
  retrieving revision 1.2
  diff -u -r1.1 -r1.2
  --- charsetencoder.cpp        1 Apr 2005 02:02:33 -0000       1.1
  +++ charsetencoder.cpp        22 Apr 2005 05:27:22 -0000      1.2
  @@ -18,82 +18,437 @@
   #include <log4cxx/helpers/bytebuffer.h>
   #include <log4cxx/helpers/exception.h>
   #include <apr_xlate.h>
  +#include <log4cxx/helpers/stringhelper.h>
   
   using namespace log4cxx;
   using namespace log4cxx::helpers;
   
  -IMPLEMENT_LOG4CXX_OBJECT(CharsetEncoder)
  +IMPLEMENT_LOG4CXX_OBJECT(CharsetEncoder)
  +
  +namespace log4cxx
  +{
  +
  +        namespace helpers {
  +
  +#if !defined(_WIN32)
  +          /**
  +          *   An engine to transform LogStrings into bytes
  +          *     for the specific character set.
  +          */
  +          class APRCharsetEncoder : public CharsetEncoder
  +          {
  +          public:
  +              APRCharsetEncoder(const char* topage) {
  +#if LOG4CXX_LOGCHAR_IS_WCHAR
  +                  const char* frompage = "WCHAR_T";
  +#endif
  +#if LOG4CXX_LOGCHAR_IS_UTF8
  +                  const char* frompage = "UTF-8";
  +#endif
  +                  apr_status_t stat = apr_pool_create(&pool, NULL);
  +                  if (stat != APR_SUCCESS) {
  +                      throw PoolException(stat);
  +                  }
  +                  stat = apr_xlate_open(&convset,
  +                     topage,
  +                     frompage,
  +                     pool);
  +                  if (stat != APR_SUCCESS) {
  +                    throw IllegalArgumentException(topage);
  +                  }
  +              }
  +              
  +              virtual ~APRCharsetEncoder() {
  +                    apr_xlate_close(convset);
  +                    apr_pool_destroy(pool);
  +              }
  +
  +              virtual log4cxx_status_t encode(const LogString& in,
  +                    LogString::const_iterator& iter,
  +                    ByteBuffer& out) {
  +                      apr_status_t stat;
  +                      size_t outbytes_left = out.remaining();
  +                      size_t initial_outbytes_left = outbytes_left;
  +                      size_t position = out.position();
  +                      if (iter == in.end()) {
  +                        stat = apr_xlate_conv_buffer(convset, NULL, NULL,
  +                           out.data() + position, &outbytes_left);
  +                      } else {
  +                        LogString::size_type inOffset = (iter - in.begin());
  +                        apr_size_t inbytes_left =
  +                            (in.size() - inOffset) * 
sizeof(LogString::value_type);
  +                        apr_size_t initial_inbytes_left = inbytes_left;
  +                        stat = apr_xlate_conv_buffer(convset,
  +                             (const char*) (in.data() + inOffset),
  +                             &inbytes_left,
  +                             out.data() + position,
  +                             &outbytes_left);
  +                        iter += ((initial_inbytes_left - inbytes_left) / 
sizeof(LogString::value_type));
  +                      }
  +                      out.position(out.position() + (initial_outbytes_left - 
outbytes_left));
  +                      return stat;
  +              }
  +
  +          private:
  +                  APRCharsetEncoder(const APRCharsetEncoder&);
  +                  APRCharsetEncoder& operator=(const APRCharsetEncoder&);
  +                  apr_pool_t* pool;
  +                  apr_xlate_t *convset;
  +          };
  +#endif
  +
  +#if LOG4CXX_LOGCHAR_IS_WCHAR
  +          /**
  +          *   An engine to transform LogStrings into bytes
  +          *     for the specific character set.
  +          */
  +          class WcstombsCharsetEncoder : public CharsetEncoder
  +          {
  +          public:
  +              WcstombsCharsetEncoder() {
  +              }
  +              
  +              virtual log4cxx_status_t encode(const LogString& in,
  +                    LogString::const_iterator& iter,
  +                    ByteBuffer& out) {
  +                      log4cxx_status_t stat = APR_SUCCESS;
  +
  +                      if (iter != in.end()) {
  +                         size_t outbytes_left = out.remaining();
  +                         size_t initial_outbytes_left = outbytes_left;
  +                         size_t position = out.position();
  +                         LogString::size_type inOffset = (iter - in.begin());
  +                         size_t inchars_left = (in.size() - inOffset);
  +                         apr_size_t initial_inchars_left = inchars_left;
  +                         enum { BUFSIZE = 256 };
  +                         wchar_t buf[BUFSIZE];
  +                         size_t chunkSize = BUFSIZE - 1;
  +                         if (chunkSize * MB_LEN_MAX > outbytes_left) {
  +                             chunkSize = outbytes_left / MB_LEN_MAX;
  +                         }
  +                         if (chunkSize > in.length() - inOffset) {
  +                             chunkSize = in.length() - inOffset;
  +                         }
  +                         memset(buf, 0, BUFSIZE * sizeof(wchar_t));
  +                         memcpy(buf, 
  +                             in.data() + inOffset, 
  +                             chunkSize * sizeof(wchar_t));
  +                         size_t converted = wcstombs(out.data() + position, 
buf, outbytes_left);
  +
  +                         if (converted == (size_t) -1) {
  +                             stat = APR_BADARG;
  +                             //
  +                             //   if unconvertable character was encountered
  +                             //       repeatedly halve source to get 
fragment that
  +                             //       can be converted
  +                             for(chunkSize /= 2;
  +                                 chunkSize > 0;
  +                                 chunkSize /= 2) {
  +                                 buf[chunkSize] = 0;
  +                                 converted = wcstombs(out.data() + position, 
buf, outbytes_left);
  +                                 if (converted != (size_t) -1) {
  +                                    iter += chunkSize;
  +                                    out.position(out.position() + converted);
  +                                 }
  +                             }
  +                         } else {
  +                            iter += chunkSize;
  +                            out.position(out.position() + converted);
  +                         }
  +                      }
  +                      return stat;
  +              }
  +
  +          private:
  +                  WcstombsCharsetEncoder(const WcstombsCharsetEncoder&);
  +                  WcstombsCharsetEncoder& operator=(const 
WcstombsCharsetEncoder&);
  +          };
  +#endif
  +
  +
  +#if defined(_WIN32)
  +          /**
  +          *   An engine to transform LogStrings into bytes
  +          *     for the specific character set.
  +          */
  +          class USASCIICharsetEncoder : public CharsetEncoder
  +          {
  +          public:
  +              USASCIICharsetEncoder() {
  +              }
  +              
  +              virtual log4cxx_status_t encode(const LogString& in,
  +                    LogString::const_iterator& iter,
  +                    ByteBuffer& out) {
  +                  log4cxx_status_t stat = APR_SUCCESS;
  +                  if (iter != in.end()) {
  +                      char* dstEnd = out.data() + out.limit();
  +                      char* dst = out.data() + out.position();
  +                      for(;
  +                          dst < dstEnd && iter != in.end();
  +                          iter++, dst++) {
  +                          unsigned short ch = *iter;
  +                          if (0x7F < ch) {
  +                              stat = APR_BADARG;
  +                              break;
  +                          }
  +                          *dst = ch;
  +                      }
  +                      out.position(dst - out.data());
  +                  }
  +                  return stat;
  +              }
  +
  +          private:
  +                  USASCIICharsetEncoder(const USASCIICharsetEncoder&);
  +                  USASCIICharsetEncoder& operator=(const 
USASCIICharsetEncoder&);
  +          };
  +
  +          /**
  +          *   An engine to transform LogStrings into bytes
  +          *     for the specific character set.
  +          */
  +          class ISOLatin1CharsetEncoder : public CharsetEncoder
  +          {
  +          public:
  +              ISOLatin1CharsetEncoder() {
  +              }
  +              
  +#if LOG4CXX_LOGCHAR_IS_WCHAR
  +              virtual log4cxx_status_t encode(const LogString& in,
  +                    LogString::const_iterator& iter,
  +                    ByteBuffer& out) {
  +                  log4cxx_status_t stat = APR_SUCCESS;
  +                  if (iter != in.end()) {
  +                      char* dstEnd = out.data() + out.limit();
  +                      char* dst = out.data() + out.position();
  +                      for(;
  +                          dst < dstEnd && iter != in.end();
  +                          iter++, dst++) {
  +                          unsigned short ch = *iter;
  +                          if (0xFF < ch) {
  +                              stat = APR_BADARG;
  +                              break;
  +                          }
  +                          *dst = ch;
  +                      }
  +                      out.position(dst - out.data());
  +                  }
  +                  return stat;
  +              }
  +#endif
  +
  +          private:
  +                  ISOLatin1CharsetEncoder(const ISOLatin1CharsetEncoder&);
  +                  ISOLatin1CharsetEncoder& operator=(const 
ISOLatin1CharsetEncoder&);
  +          };
  +
  +
  +          /**
  +          *   An engine to transform LogStrings into bytes
  +          *     for the specific character set.
  +          */
  +          class UTF8CharsetEncoder : public CharsetEncoder
  +          {
  +          public:
  +              UTF8CharsetEncoder() {
  +              }
  +
  +#if LOG4CXX_LOGCHAR_IS_UTF8              
  +              virtual log4cxx_status_t encode(const LogString& in,
  +                    LogString::const_iterator& iter,
  +                    ByteBuffer& out) {
  +                  if (iter != in.end()) {
  +                      size_t inOffset = iter - in.begin();
  +                      char* dst = out.data() + out.position();
  +                      size_t count = in.length() - inOffset;
  +                      if (count > out.remaining()) {
  +                          count = out.remaining();
  +                      }
  +                      memcpy(out.data() + out.position(), 
  +                             in.data() + inOffset,
  +                             count);
  +                      out.position(out.position() + count);
  +                      iter += count;
  +                  }
  +                  return APR_SUCCESS;
  +              }
  +#endif
  +
  +#if LOG4CXX_LOGCHAR_IS_WCHAR              
  +              virtual log4cxx_status_t encode(const LogString& in,
  +                    LogString::const_iterator& iter,
  +                    ByteBuffer& out) {
  +                  if (iter != in.end()) {
  +                      size_t inOffset = iter - in.begin();
  +                      char* dst = out.data() + out.position();
  +                      char* dstEnd = out.data() + out.limit();
  +                      for(;
  +                           dst < dstEnd && iter != in.end();
  +                           iter++) {
  +                           unsigned short sv = *iter;
  +                           if (sv <= 0x7F) {
  +                               *(dst++) = sv;
  +                           } else if (sv <= 0x7FF) {
  +                               if(dst + 1 < dstEnd) {
  +                                   *(dst++) = 0xC0 | (sv >> 6);
  +                                   *(dst++) = 0x80 | (sv & 0x3F);
  +                               } else {
  +                                   break;
  +                               }
  +                           } else if (sv < 0xD800 || sv > 0xDFFF) {
  +                               if (dst + 2 < dstEnd) {
  +                                   *(dst++) = 0xE0 | (sv >> 12);
  +                                   *(dst++) = 0x80 | ((sv >> 6) & 0x3F);
  +                                   *(dst++) = 0x80 | (sv & 0x3F);
  +                               } else {
  +                                   break;
  +                               }
  +                           } else {
  +                               if (dst + 3 < dstEnd && (iter + 1) != 
in.end()) {
  +                                   *(dst++) = 0xF0 | ((sv >> 8) & 0x03);
  +                                   *(dst++) = 0x80 | ((sv >> 2) & 0x3F);
  +                                   unsigned short ls = *(++iter); 
  +                                   *(dst++) = 0x80 
  +                                               | ((sv & 0x03) << 4) 
  +                                               | ((ls >> 6) & 0x0F);
  +                                   *(dst++) = 0x80 | (ls & 0x3F);
  +                               } else {
  +                                   break;
  +                               }
  +                           }
  +                       }
  +                       out.position(dst - out.data());
  +                  }
  +                  return APR_SUCCESS;
  +              }
  +#endif
  +
  +          private:
  +                  UTF8CharsetEncoder(const UTF8CharsetEncoder&);
  +                  UTF8CharsetEncoder& operator=(const UTF8CharsetEncoder&);
  +          };
  +
  +
  +          /**
  +          *   An engine to transform LogStrings into bytes
  +          *     for the specific character set.
  +          */
  +          class UTF16BECharsetEncoder : public CharsetEncoder
  +          {
  +          public:
  +              UTF16BECharsetEncoder() {
  +              }
  +              
  +#if LOG4CXX_LOGCHAR_IS_WCHAR
  +              virtual log4cxx_status_t encode(const LogString& in,
  +                    LogString::const_iterator& iter,
  +                    ByteBuffer& out) {
  +                  log4cxx_status_t stat = APR_SUCCESS;
  +                  char* dstEnd = out.data() + out.limit() - 1;
  +                  char* dst = out.data() + out.position();
  +                  for(;
  +                      dst < dstEnd && iter != in.end();
  +                      iter++) {
  +                      *(dst++) = (*iter & 0xFF00) >> 8;
  +                      *(dst++) = *iter & 0x00FF;
  +                  }
  +                  out.position(dst - out.data());
  +                  return stat;
  +              }
  +#endif
  +
  +          private:
  +                  UTF16BECharsetEncoder(const UTF16BECharsetEncoder&);
  +                  UTF16BECharsetEncoder& operator=(const 
UTF16BECharsetEncoder&);
  +          };
  +
  +          /**
  +          *   An engine to transform LogStrings into bytes
  +          *     for the specific character set.
  +          */
  +          class UTF16LECharsetEncoder : public CharsetEncoder
  +          {
  +          public:
  +              UTF16LECharsetEncoder() {
  +              }
  +              
  +#if LOG4CXX_LOGCHAR_IS_WCHAR
  +              virtual log4cxx_status_t encode(const LogString& in,
  +                    LogString::const_iterator& iter,
  +                    ByteBuffer& out) {
  +                  log4cxx_status_t stat = APR_SUCCESS;
  +                  char* dstEnd = out.data() + out.limit() - 1;
  +                  char* dst = out.data() + out.position();
  +                  for(;
  +                      dst < dstEnd && iter != in.end();
  +                      iter++) {
  +                      *(dst++) = *iter & 0x00FF;
  +                      *(dst++) = (*iter & 0xFF00) >> 8;
  +                  }
  +                  out.position(dst - out.data());
  +                  return stat;
  +              }
  +#endif
  +
  +          private:
  +                  UTF16LECharsetEncoder(const UTF16LECharsetEncoder&);
  +                  UTF16LECharsetEncoder& operator=(const 
UTF16LECharsetEncoder&);
  +          };
  +
  +#endif
  +
  +        } // namespace helpers
  +
  +}  //namespace log4cxx
   
   
  -CharsetEncoder::CharsetEncoder(const char* topage) {
  -#if LOG4CXX_LOGCHAR_IS_WCHAR
  -  const char* frompage = "WCHAR_T";
  -#endif
  -#if LOG4CXX_LOGCHAR_IS_UTF8
  -  const char* frompage = "UTF-8";
  -#endif
  -  apr_status_t stat = apr_xlate_open((apr_xlate_t**) &convset,
  -     topage,
  -     frompage,
  -     (apr_pool_t*) pool.getAPRPool());
  -  if (stat != APR_SUCCESS) {
  -    throw IllegalArgumentException(topage);
  -  }
  -}
   
  -CharsetEncoder::~CharsetEncoder() {
  -  apr_xlate_close((apr_xlate_t*) convset);
  +CharsetEncoder::CharsetEncoder() {
   }
   
  -CharsetEncoderPtr CharsetEncoder::getDefaultEncoder() {
  -  static CharsetEncoderPtr encoder(new CharsetEncoder(APR_LOCALE_CHARSET));
  -  return encoder;
  +CharsetEncoder::~CharsetEncoder() {
   }
   
  -CharsetEncoderPtr CharsetEncoder::getWideEncoder() {
  -  static CharsetEncoderPtr encoder(new CharsetEncoder("WCHAR_T"));
  +CharsetEncoderPtr CharsetEncoder::getDefaultEncoder() {
  +#if LOG4CXX_HAS_WCHAR_T || defined(_WIN32)
  +  static CharsetEncoderPtr encoder(new WcstombsCharsetEncoder());
  +#else
  +  static CharsetEncoderPtr encoder(new CharsetEncoder(APR_LOCALE_CHARSET));
  +#endif
     return encoder;
   }
  +
   
  -CharsetEncoderPtr CharsetEncoder::getEncoder(const LogString& charset) {
  -#if LOG4CXX_LOGCHAR_IS_WCHAR
  +CharsetEncoderPtr CharsetEncoder::getEncoder(const std::wstring& charset) {
      std::string cs(charset.size(), ' ');
      for(int i = 0; i < charset.size(); i++) {
         cs[i] = (char) charset[i];
      }
  -   return new CharsetEncoder(cs.c_str());
  -#endif
  -#if LOG4CXX_LOGCHAR_IS_UTF8
  -   return new CharsetEncoder(charset.c_str());
  -#endif
  +   return getEncoder(cs);
   }
   
  -
  -log4cxx_status_t CharsetEncoder::encode(const LogString& in,
  -      LogString::const_iterator& iter,
  -      ByteBuffer& out) {
  -      apr_status_t stat;
  -      size_t outbytes_left = out.remaining();
  -      size_t initial_outbytes_left = outbytes_left;
  -      size_t position = out.position();
  -      if (iter == in.end()) {
  -        stat = apr_xlate_conv_buffer((apr_xlate_t*) convset, NULL, NULL,
  -           out.data() + position, &outbytes_left);
  -      } else {
  -        LogString::size_type inOffset = (iter - in.begin());
  -        apr_size_t inbytes_left =
  -            (in.size() - inOffset) * sizeof(LogString::value_type);
  -        apr_size_t initial_inbytes_left = inbytes_left;
  -        stat = apr_xlate_conv_buffer((apr_xlate_t*) convset,
  -             (const char*) (in.data() + inOffset),
  -             &inbytes_left,
  -             out.data() + position,
  -             &outbytes_left);
  -        iter += ((initial_inbytes_left - inbytes_left) / 
sizeof(LogString::value_type));
  -      }
  -      out.position(out.position() + (initial_outbytes_left - outbytes_left));
  -      return stat;
  -}
  +CharsetEncoderPtr CharsetEncoder::getEncoder(const std::string& charset) {
  +#if defined(_WIN32)
  +    if (StringHelper::equalsIgnoreCase(charset, "US-ASCII", "us-ascii") ||
  +        StringHelper::equalsIgnoreCase(charset, "ISO646-US", "iso646-US")) {
  +        return new USASCIICharsetEncoder();
  +    } else if (StringHelper::equalsIgnoreCase(charset, "ISO-8859-1", 
"iso-8859-1") ||
  +        StringHelper::equalsIgnoreCase(charset, "ISO-LATIN-1", 
"iso-latin-1")) {
  +        return new ISOLatin1CharsetEncoder();
  +    } else if (StringHelper::equalsIgnoreCase(charset, "UTF-8", "utf-8")) {
  +        return new UTF8CharsetEncoder();
  +    } else if (StringHelper::equalsIgnoreCase(charset, "UTF-16BE", 
"utf-16be")
  +        || StringHelper::equalsIgnoreCase(charset, "UTF-16", "utf-16")) {
  +        return new UTF16BECharsetEncoder();
  +    } else if (StringHelper::equalsIgnoreCase(charset, "UTF-16LE", 
"utf-16le")) {
  +        return new UTF16LECharsetEncoder();
  +    } 
  +    throw IllegalArgumentException(charset);
  +#else
  +   return new APRCharsetEncoder(charset.c_str());
  +#endif
  +}
   
   
   void CharsetEncoder::reset() {
  @@ -103,8 +458,6 @@
   }
   
   
  -
  -
   void CharsetEncoder::encode(CharsetEncoderPtr& enc,
       const LogString& src,
       LogString::const_iterator& iter,
  
  
  
  1.2       +2 -48     
logging-log4cxx/tests/src/helpers/charsetdecodertestcase.cpp
  
  Index: charsetdecodertestcase.cpp
  ===================================================================
  RCS file: 
/home/cvs/logging-log4cxx/tests/src/helpers/charsetdecodertestcase.cpp,v
  retrieving revision 1.1
  retrieving revision 1.2
  diff -u -r1.1 -r1.2
  --- charsetdecodertestcase.cpp        1 Apr 2005 02:02:34 -0000       1.1
  +++ charsetdecodertestcase.cpp        22 Apr 2005 05:27:22 -0000      1.2
  @@ -31,8 +31,6 @@
           CPPUNIT_TEST_SUITE(CharsetDecoderTestCase);
                   CPPUNIT_TEST(decode1);
                   CPPUNIT_TEST(decode2);
  -                CPPUNIT_TEST(decode3);
  -                CPPUNIT_TEST(decode4);
   #if LOG4CXX_HAS_WCHAR_T
                   CPPUNIT_TEST(decode5);
                   CPPUNIT_TEST(decode6);
  @@ -49,7 +47,7 @@
             char buf[] = "Hello, World";
             ByteBuffer src(buf, strlen(buf));
   
  -          CharsetDecoderPtr 
dec(CharsetDecoder::getDecoder(LOG4CXX_STR("US-ASCII")));
  +          CharsetDecoderPtr dec(CharsetDecoder::getDefaultDecoder());
             LogString greeting;
             log4cxx_status_t stat = dec->decode(src, greeting);
             CPPUNIT_ASSERT_EQUAL(APR_SUCCESS, stat);
  @@ -67,7 +65,7 @@
             strcpy(buf + BUFSIZE - 3, "Hello");
             ByteBuffer src(buf, strlen(buf));
   
  -          CharsetDecoderPtr 
dec(CharsetDecoder::getDecoder(LOG4CXX_STR("US-ASCII")));
  +          CharsetDecoderPtr dec(CharsetDecoder::getDefaultDecoder());
   
             LogString greeting;
             log4cxx_status_t stat = dec->decode(src, greeting);
  @@ -84,50 +82,6 @@
           }
   
   
  -        void decode3() {
  -          const char buf[] = { 'A', 0xB6, 0 };
  -          ByteBuffer src((char*) buf, strlen(buf));
  -
  -          CharsetDecoderPtr 
dec(CharsetDecoder::getDecoder(LOG4CXX_STR("US-ASCII")));
  -
  -          LogString greeting;
  -          log4cxx_status_t stat = dec->decode(src, greeting);
  -          CPPUNIT_ASSERT_EQUAL(true, CharsetDecoder::isError(stat));
  -          CPPUNIT_ASSERT_EQUAL((size_t) 1, src.position());
  -
  -        }
  -
  -
  -        void decode4() {
  -          const char utf8_greet[] = { 'A',
  -                                    0xD8, 0x85,
  -                                    0xD4, 0xB0,
  -                                    0xE0, 0xA6, 0x86,
  -                                    0xE4, 0xB8, 0x83,
  -                                    0xD0, 0x80,
  -                                    0 };
  -#if LOG4CXX_LOGCHAR_IS_WCHAR
  -          //   arbitrary, hopefully meaningless, characters from
  -          //     Latin, Arabic, Armenian, Bengali, CJK and Cyrillic
  -          const logchar greet[] = { L'A', 0x0605, 0x0530, 0x986, 0x4E03, 
0x400, 0 };
  -#endif
  -
  -#if LOG4CXX_LOGCHAR_IS_UTF8
  -          const logchar *greet = utf8_greet;
  -#endif
  -          ByteBuffer src((char*) utf8_greet, strlen(utf8_greet));
  -
  -          CharsetDecoderPtr 
dec(CharsetDecoder::getDecoder(LOG4CXX_STR("UTF-8")));
  -
  -          LogString greeting;
  -          log4cxx_status_t stat = dec->decode(src, greeting);
  -          CPPUNIT_ASSERT_EQUAL(false, CharsetDecoder::isError(stat));
  -          stat = dec->decode(src, greeting);
  -          CPPUNIT_ASSERT_EQUAL(false, CharsetDecoder::isError(stat));
  -
  -          CPPUNIT_ASSERT_EQUAL((LogString) greet, greeting);
  -        }
  -
   
   #if LOG4CXX_HAS_WCHAR_T
           void decode5() {
  
  
  
  1.2       +0 -107    
logging-log4cxx/tests/src/helpers/charsetencodertestcase.cpp
  
  Index: charsetencodertestcase.cpp
  ===================================================================
  RCS file: 
/home/cvs/logging-log4cxx/tests/src/helpers/charsetencodertestcase.cpp,v
  retrieving revision 1.1
  retrieving revision 1.2
  diff -u -r1.1 -r1.2
  --- charsetencodertestcase.cpp        1 Apr 2005 02:02:34 -0000       1.1
  +++ charsetencodertestcase.cpp        22 Apr 2005 05:27:22 -0000      1.2
  @@ -33,11 +33,6 @@
                   CPPUNIT_TEST(encode2);
                   CPPUNIT_TEST(encode3);
                   CPPUNIT_TEST(encode4);
  -#if LOG4CXX_HAS_WCHAR_T
  -                CPPUNIT_TEST(encode5);
  -                CPPUNIT_TEST(encode6);
  -                CPPUNIT_TEST(encode7);
  -#endif
           CPPUNIT_TEST_SUITE_END();
   
           enum { BUFSIZE = 256 };
  @@ -170,108 +165,6 @@
           }
   
   
  -#if LOG4CXX_HAS_WCHAR_T
  -        void encode5() {
  -          const LogString greeting(LOG4CXX_STR("Hello, World"));
  -
  -          CharsetEncoderPtr enc(CharsetEncoder::getWideEncoder());
  -
  -          char buf[BUFSIZE];
  -          memset(buf, 0, BUFSIZE);
  -          ByteBuffer out(buf, BUFSIZE);
  -          LogString::const_iterator iter = greeting.begin();
  -          log4cxx_status_t stat = enc->encode(greeting, iter, out);
  -          CPPUNIT_ASSERT_EQUAL(APR_SUCCESS, stat);
  -          CPPUNIT_ASSERT(iter == greeting.end());
  -
  -          stat = enc->encode(greeting, iter, out);
  -          CPPUNIT_ASSERT_EQUAL(APR_SUCCESS, stat);
  -          out.flip();
  -          std::wstring encoded((wchar_t*) out.data());
  -          CPPUNIT_ASSERT_EQUAL((std::wstring) L"Hello, World", encoded);
  -          CPPUNIT_ASSERT(iter == greeting.end());
  -          CPPUNIT_ASSERT_EQUAL(12 * sizeof(wchar_t), out.limit());
  -        }
  -
  -        void encode6() {
  -          LogString greeting(BUFSIZE - 3, LOG4CXX_STR('A'));
  -          greeting.append(LOG4CXX_STR("Hello"));
  -
  -          CharsetEncoderPtr enc(CharsetEncoder::getWideEncoder());
  -
  -          char buf[BUFSIZE * sizeof(wchar_t) + 4];
  -          memset(buf, 0, BUFSIZE * sizeof(wchar_t) + 4);
  -          ByteBuffer out(buf, BUFSIZE * sizeof(wchar_t));
  -          LogString::const_iterator iter = greeting.begin();
  -          log4cxx_status_t stat = enc->encode(greeting, iter, out);
  -          CPPUNIT_ASSERT_EQUAL(APR_SUCCESS, stat);
  -          CPPUNIT_ASSERT_EQUAL((size_t) 0, out.remaining());
  -          CPPUNIT_ASSERT_EQUAL(LOG4CXX_STR('o'), *(iter+1));
  -          out.flip();
  -          std::wstring encoded((const wchar_t*) out.data());
  -          out.clear();
  -
  -          memset(buf, 0, BUFSIZE * sizeof(wchar_t) + 4);
  -          stat = enc->encode(greeting, iter, out);
  -          CPPUNIT_ASSERT_EQUAL(APR_SUCCESS, stat);
  -          CPPUNIT_ASSERT_EQUAL(2 * sizeof(wchar_t), out.position());
  -          CPPUNIT_ASSERT(iter == greeting.end());
  -
  -          stat = enc->encode(greeting, iter, out);
  -          CPPUNIT_ASSERT_EQUAL(APR_SUCCESS, stat);
  -          out.flip();
  -          encoded.append((const wchar_t*) out.data());
  -
  -          std::wstring manyAs(BUFSIZE - 3, L'A');
  -          CPPUNIT_ASSERT_EQUAL(manyAs, encoded.substr(0, BUFSIZE - 3));
  -          CPPUNIT_ASSERT_EQUAL(std::wstring(L"Hello"), 
encoded.substr(BUFSIZE - 3));
  -
  -
  -        }
  -
  -        void encode7() {
  -          //   arbitrary, hopefully meaningless, characters from
  -          //     Latin, Arabic, Armenian, Bengali, CJK and Cyrillic
  -          const wchar_t wide_greet[] = { L'A', 0x0605, 0x0530, 0x986, 
0x4E03, 0x400, 0 };
  -#if LOG4CXX_LOGCHAR_IS_WCHAR
  -          const logchar *greet = wide_greet;
  -#endif
  -
  -#if LOG4CXX_LOGCHAR_IS_UTF8
  -          const logchar greet[] = { 'A',
  -                          0xD8, 0x85,
  -                          0xD4, 0xB0,
  -                          0xE0, 0xA6, 0x86,
  -                          0xE4, 0xB8, 0x83,
  -                          0xD0, 0x80,
  -                          0 };
  -#endif
  -
  -            CharsetEncoderPtr enc(CharsetEncoder::getWideEncoder());
  -
  -            char buf[BUFSIZE * sizeof(wchar_t)];
  -            ByteBuffer out(buf, BUFSIZE * sizeof(wchar_t));
  -
  -            LogString greeting(greet);
  -            LogString::const_iterator iter = greeting.begin();
  -            log4cxx_status_t stat = enc->encode(greeting, iter, out);
  -            CPPUNIT_ASSERT_EQUAL(false, CharsetEncoder::isError(stat));
  -            stat = enc->encode(greeting, iter, out);
  -            CPPUNIT_ASSERT_EQUAL(false, CharsetEncoder::isError(stat));
  -            out.flip();
  -            CPPUNIT_ASSERT_EQUAL((size_t) 6 * sizeof(wchar_t), out.limit());
  -
  -            const wchar_t* actual = (const wchar_t*) out.data();
  -            for(int i = 0; i < 6; i++) {
  -               CPPUNIT_ASSERT_EQUAL((int) wide_greet[i], (int) actual[i]);
  -            }
  -            CPPUNIT_ASSERT(iter == greeting.end());
  -
  -
  -        }
  -
  -#endif
  -
   
   };
   
  
  
  

Reply via email to