carnold     2005/04/28 16:26:33

  Modified:    include/log4cxx/helpers charsetencoder.h unicodehelper.h
               src      charsetdecoder.cpp charsetencoder.cpp
                        unicodehelper.cpp
  Log:
  LOGCXX-59: Encoding, Linux iter
  
  Revision  Changes    Path
  1.4       +6 -4      logging-log4cxx/include/log4cxx/helpers/charsetencoder.h
  
  Index: charsetencoder.h
  ===================================================================
  RCS file: /home/cvs/logging-log4cxx/include/log4cxx/helpers/charsetencoder.h,v
  retrieving revision 1.3
  retrieving revision 1.4
  diff -u -r1.3 -r1.4
  --- charsetencoder.h  28 Apr 2005 20:53:45 -0000      1.3
  +++ charsetencoder.h  28 Apr 2005 23:26:33 -0000      1.4
  @@ -59,8 +59,8 @@
   
                 /**
                  *  Get encoder for specified character set.
  -               *  @param charset, the following values should be recognized: 
  -               *     "US-ASCII", "ISO-8859-1", "UTF-8", 
  +               *  @param charset, the following values should be recognized:
  +               *     "US-ASCII", "ISO-8859-1", "UTF-8",
                  *     "UTF-16BE", "UTF-16LE".
                  *  @return encoder, may be null if charset was not recognized.
                  */
  @@ -74,8 +74,8 @@
   
                 /**
                  *  Get encoder for specified character set.
  -               *  @param charset, the following values should be recognized: 
  -               *     "US-ASCII", "ISO-8859-1", "UTF-8", 
  +               *  @param charset, the following values should be recognized:
  +               *     "US-ASCII", "ISO-8859-1", "UTF-8",
                  *     "UTF-16BE", "UTF-16LE".
                  *  @return encoder, may be null if charset was not recognized.
                  */
  @@ -133,6 +133,8 @@
                  *   Private assignment operator.
                  */
                     CharsetEncoder& operator=(const CharsetEncoder&);
  +
  +              static CharsetEncoder* createDefaultEncoder();
             };
   
           } // namespace helpers
  
  
  
  1.2       +13 -5     logging-log4cxx/include/log4cxx/helpers/unicodehelper.h
  
  Index: unicodehelper.h
  ===================================================================
  RCS file: /home/cvs/logging-log4cxx/include/log4cxx/helpers/unicodehelper.h,v
  retrieving revision 1.1
  retrieving revision 1.2
  diff -u -r1.1 -r1.2
  --- unicodehelper.h   28 Apr 2005 20:53:45 -0000      1.1
  +++ unicodehelper.h   28 Apr 2005 23:26:33 -0000      1.2
  @@ -39,7 +39,7 @@
                 /**
                  *   Decodes next character from a sequence of UTF-8 bytes.
                  *   @param src start of character, will be modified to point 
at next character.
  -               *   @param srcEnd end of sequence. 
  +               *   @param srcEnd end of sequence.
                  *   @return scalar value (UCS-4) or 0xFFFF if invalid 
sequence.
                  */
                 static unsigned int decodeUTF8(const char*& src,
  @@ -71,11 +71,11 @@
                 static int encodeUTF16LE(unsigned int ch, char* dst);
   
   
  -#if LOG4CXX_HAS_WCHAR_T             
  +#if LOG4CXX_HAS_WCHAR_T
                 /**
                  *   Decodes next character from a sequence of wchar_t values.
                  *   @param src start of character, will be modified to point 
at next character.
  -               *   @param srcEnd end of sequence. 
  +               *   @param srcEnd end of sequence.
                  *   @return scalar value (UCS-4) or 0xFFFF if invalid 
sequence.
                  */
                 static unsigned int decodeWide(const wchar_t*& src, const 
wchar_t* srcEnd);
  @@ -97,18 +97,26 @@
               */
                 static int UnicodeHelper::lengthUTF8(wchar_t ch);
   
  -#endif              
  +#endif
   
                 /**
                  *   Decodes next character from a LogString.
                  *   @param in string from which the character is extracted.
                  *   @param iter iterator addressing start of character, will 
be
  -            *   advanced to next character if successful. 
  +            *   advanced to next character if successful.
                  *   @return scalar value (UCS-4) or 0xFFFF if invalid 
sequence.
                  */
                 static unsigned int decode(const LogString& in,
                     LogString::const_iterator& iter);
   
  +              /**
  +               *   Encodes a UCS-4 value to logchar.
  +               *   @param ch UCS-4 value.
  +               *   @param dst buffer to receive logchar encoding (must be at 
least 8)
  +               *   @return number of logchar needed to represent character
  +               */
  +              static int encode(unsigned int ch, logchar* dst);
  +
             };
       }
   }
  
  
  
  1.4       +139 -12   logging-log4cxx/src/charsetdecoder.cpp
  
  Index: charsetdecoder.cpp
  ===================================================================
  RCS file: /home/cvs/logging-log4cxx/src/charsetdecoder.cpp,v
  retrieving revision 1.3
  retrieving revision 1.4
  diff -u -r1.3 -r1.4
  --- charsetdecoder.cpp        28 Apr 2005 20:53:45 -0000      1.3
  +++ charsetdecoder.cpp        28 Apr 2005 23:26:33 -0000      1.4
  @@ -19,9 +19,7 @@
   #include <log4cxx/helpers/exception.h>
   #include <log4cxx/helpers/unicodehelper.h>
   #include <apr_xlate.h>
  -#if HAS_LANGINFO_CODESET
  -#include <langinfo.h>
  -#endif
  +
   
   using namespace log4cxx;
   using namespace log4cxx::helpers;
  @@ -190,7 +188,7 @@
   
             /**
             *    Decoder used when the external and internal charsets
  -        *    are the same.
  +          *    are the same.
             *
             */
             class TrivialCharsetDecoder : public CharsetDecoder
  @@ -221,6 +219,143 @@
                     TrivialCharsetDecoder& operator=(const 
TrivialCharsetDecoder&);
             };
   
  +
  +#if LOG4CXX_LOGCHAR_IS_UTF8
  +typedef TrivialCharsetDecoder UTF8CharsetDecoder;
  +#endif
  +
  +#if LOG4CXX_LOGCHAR_IS_WCHAR
  +/**
  +*    Converts from UTF-8 to std::wstring
  +*
  +*/
  +class UTF8CharsetDecoder : public CharsetDecoder
  +{
  +public:
  +    UTF8CharsetDecoder() {
  +    }
  +
  +    virtual ~UTF8CharsetDecoder() {
  +    }
  +
  +private:
  +    virtual log4cxx_status_t decode(ByteBuffer& in,
  +        LogString& out) {
  +        log4cxx_status_t stat = APR_SUCCESS;
  +        if (in.remaining() > 0) {
  +          wchar_t buf[2];
  +
  +          const char* src = in.current();
  +          const char* srcEnd = in.data() + in.limit();
  +          while(src < srcEnd) {
  +             unsigned int sv = UnicodeHelper::decodeUTF8(src, srcEnd);
  +             if (sv == 0xFFFF) {
  +                stat = APR_BADARG;
  +                break;
  +             }
  +             int wchars = UnicodeHelper::encodeWide(sv, buf);
  +             out.append(buf, wchars);
  +          }
  +          in.position(src - in.data());
  +        }
  +        return stat;
  +    }
  +
  +
  +
  +private:
  +        UTF8CharsetDecoder(const UTF8CharsetDecoder&);
  +        UTF8CharsetDecoder& operator=(const UTF8CharsetDecoder&);
  +};
  +#endif
  +
  +/**
  +*    Converts from ISO-8859-1 to LogString.
  +*
  +*/
  +class ISOLatinCharsetDecoder : public CharsetDecoder
  +{
  +public:
  +    ISOLatinCharsetDecoder() {
  +    }
  +
  +    virtual ~ISOLatinCharsetDecoder() {
  +    }
  +
  +private:
  +    virtual log4cxx_status_t decode(ByteBuffer& in,
  +        LogString& out) {
  +        log4cxx_status_t stat = APR_SUCCESS;
  +        if (in.remaining() > 0) {
  +          logchar buf[8];
  +
  +          const unsigned char* src = (unsigned char*) in.current();
  +          const unsigned char* srcEnd = src + in.remaining();
  +          while(src < srcEnd) {
  +             unsigned int sv = *(src++);
  +             int logchars = UnicodeHelper::encode(sv, buf);
  +             out.append(buf, logchars);
  +          }
  +          in.position(in.limit());
  +        }
  +        return stat;
  +    }
  +
  +
  +
  +private:
  +        ISOLatinCharsetDecoder(const ISOLatinCharsetDecoder&);
  +        ISOLatinCharsetDecoder& operator=(const ISOLatinCharsetDecoder&);
  +};
  +
  +
  +/**
  +*    Converts from ISO-8859-1 to LogString.
  +*
  +*/
  +class USASCIICharsetDecoder : public CharsetDecoder
  +{
  +public:
  +    USASCIICharsetDecoder() {
  +    }
  +
  +    virtual ~USASCIICharsetDecoder() {
  +    }
  +
  +private:
  +
  +  virtual log4cxx_status_t decode(ByteBuffer& in,
  +      LogString& out) {
  +      log4cxx_status_t stat = APR_SUCCESS;
  +      if (in.remaining() > 0) {
  +        logchar buf[8];
  +
  +        const unsigned char* src = (unsigned char*) in.current();
  +        const unsigned char* srcEnd = src + in.remaining();
  +        while(src < srcEnd) {
  +           unsigned char sv = *src;
  +           if (sv < 0x80) {
  +              src++;
  +              int logchars = UnicodeHelper::encode(sv, buf);
  +              out.append(buf, logchars);
  +           } else {
  +             stat = APR_BADARG;
  +             break;
  +           }
  +        }
  +        in.position(src - (const unsigned char*) in.data());
  +      }
  +      return stat;
  +    }
  +
  +
  +
  +private:
  +        USASCIICharsetDecoder(const USASCIICharsetDecoder&);
  +        USASCIICharsetDecoder& operator=(const USASCIICharsetDecoder&);
  +};
  +
  +
   #if LOG4CXX_LOGCHAR_IS_UTF8
             /**
             *    Decoder to convert array of wchar_t to UTF-8 bytes.
  @@ -276,14 +411,6 @@
   }
   
   CharsetDecoder* CharsetDecoder::createDefaultDecoder() {
  -#if LOG4CXX_LOGCHAR_IS_UTF8 && HAS_LANGINFO_CODESET
  -    //
  -    //   detect if encoding is UTF-8
  -    //
  -    if(strcmp(nl_langinfo(CODESET), "UTF-8") == 0) {
  -        return new TrivialCharsetDecoder();
  -    }
  -#endif
   #if LOG4CXX_HAS_WCHAR_T
       return new MbstowcsCharsetDecoder();
   #else
  
  
  
  1.5       +25 -19    logging-log4cxx/src/charsetencoder.cpp
  
  Index: charsetencoder.cpp
  ===================================================================
  RCS file: /home/cvs/logging-log4cxx/src/charsetencoder.cpp,v
  retrieving revision 1.4
  retrieving revision 1.5
  diff -u -r1.4 -r1.5
  --- charsetencoder.cpp        28 Apr 2005 20:53:45 -0000      1.4
  +++ charsetencoder.cpp        28 Apr 2005 23:26:33 -0000      1.5
  @@ -21,6 +21,7 @@
   #include <log4cxx/helpers/stringhelper.h>
   #include <log4cxx/helpers/unicodehelper.h>
   
  +
   using namespace log4cxx;
   using namespace log4cxx::helpers;
   
  @@ -100,7 +101,7 @@
   #if LOG4CXX_HAS_WCHAR_T
             /**
              *  A character encoder implemented using wcstombs.
  -          */   
  +          */
             class WcstombsCharsetEncoder : public CharsetEncoder
             {
             public:
  @@ -248,10 +249,10 @@
             /**
             *   Converts a LogString to ISO-8859-1.
             */
  -          class ISOLatin1CharsetEncoder : public CharsetEncoder
  +          class ISOLatinCharsetEncoder : public CharsetEncoder
             {
             public:
  -              ISOLatin1CharsetEncoder() {
  +              ISOLatinCharsetEncoder() {
                 }
   
                 virtual log4cxx_status_t encode(const LogString& in,
  @@ -260,12 +261,12 @@
                     log4cxx_status_t stat = APR_SUCCESS;
                     if (iter != in.end()) {
                         while(out.remaining() > 0 && iter != in.end()) {
  -                    LogString::const_iterator prev(iter);
  +                          LogString::const_iterator prev(iter);
                             unsigned int sv = UnicodeHelper::decode(in, iter);
                             if (sv <= 0xFF) {
                                 out.put((char) sv);
                             } else {
  -                       iter = prev;
  +                              iter = prev;
                                 stat = APR_BADARG;
                                 break;
                             }
  @@ -273,10 +274,10 @@
                     }
                     return stat;
                 }
  -          
  +
             private:
  -                  ISOLatin1CharsetEncoder(const ISOLatin1CharsetEncoder&);
  -                  ISOLatin1CharsetEncoder& operator=(const 
ISOLatin1CharsetEncoder&);
  +                  ISOLatinCharsetEncoder(const ISOLatinCharsetEncoder&);
  +                  ISOLatinCharsetEncoder& operator=(const 
ISOLatinCharsetEncoder&);
             };
   
             /**
  @@ -297,7 +298,7 @@
                    if (requested > out.remaining()/sizeof(logchar)) {
                       requested = out.remaining()/sizeof(logchar);
                    }
  -                 memcpy(out.current(), 
  +                 memcpy(out.current(),
                          (const char*) in.data() + (iter - in.begin()),
                         requested * sizeof(logchar));
                    iter += requested;
  @@ -317,7 +318,7 @@
   
   #if LOG4CXX_LOGCHAR_IS_WCHAR
             /**
  -         *  Converts a wstring to UTF-8. 
  +         *  Converts a wstring to UTF-8.
             */
             class UTF8CharsetEncoder : public CharsetEncoder
             {
  @@ -462,19 +463,23 @@
   }
   
   CharsetEncoderPtr CharsetEncoder::getDefaultEncoder() {
  +  static CharsetEncoderPtr encoder(createDefaultEncoder());
  +  return encoder;
  +}
  +
  +CharsetEncoder* CharsetEncoder::createDefaultEncoder() {
   #if LOG4CXX_HAS_WCHAR_T
  -  static CharsetEncoderPtr encoder(new WcstombsCharsetEncoder());
  +  return new WcstombsCharsetEncoder();
   #else
  -  static CharsetEncoderPtr encoder(new 
APRCharsetEncoder(APR_LOCALE_CHARSET));
  +  return new APRCharsetEncoder(APR_LOCALE_CHARSET);
   #endif
  -  return encoder;
   }
   
   
   CharsetEncoderPtr CharsetEncoder::getEncoder(const std::wstring& charset) {
      std::string cs(charset.size(), ' ');
  -   for(std::wstring::size_type i = 0; 
  -      i < charset.length(); 
  +   for(std::wstring::size_type i = 0;
  +      i < charset.length();
        i++) {
         cs[i] = (char) charset[i];
      }
  @@ -496,13 +501,13 @@
   
   
   CharsetEncoderPtr CharsetEncoder::getEncoder(const std::string& charset) {
  -#if defined(_WIN32)
       if (StringHelper::equalsIgnoreCase(charset, "US-ASCII", "us-ascii") ||
  -        StringHelper::equalsIgnoreCase(charset, "ISO646-US", "iso646-US")) {
  +        StringHelper::equalsIgnoreCase(charset, "ISO646-US", "iso646-US") ||
  +        StringHelper::equalsIgnoreCase(charset, "ANSI_X3.4-1968", 
"ansi_x3.4-1968")) {
           return new USASCIICharsetEncoder();
       } else if (StringHelper::equalsIgnoreCase(charset, "ISO-8859-1", 
"iso-8859-1") ||
           StringHelper::equalsIgnoreCase(charset, "ISO-LATIN-1", 
"iso-latin-1")) {
  -        return new ISOLatin1CharsetEncoder();
  +        return new ISOLatinCharsetEncoder();
       } else if (StringHelper::equalsIgnoreCase(charset, "UTF-8", "utf-8")) {
           return new UTF8CharsetEncoder();
       } else if (StringHelper::equalsIgnoreCase(charset, "UTF-16BE", 
"utf-16be")
  @@ -511,9 +516,10 @@
       } else if (StringHelper::equalsIgnoreCase(charset, "UTF-16LE", 
"utf-16le")) {
           return new UTF16LECharsetEncoder();
       }
  +#if defined(_WIN32)
       throw IllegalArgumentException(charset);
   #else
  -   return new APRCharsetEncoder(charset.c_str());
  +    return new APRCharsetEncoder(charset.c_str());
   #endif
   }
   
  
  
  
  1.2       +19 -4     logging-log4cxx/src/unicodehelper.cpp
  
  Index: unicodehelper.cpp
  ===================================================================
  RCS file: /home/cvs/logging-log4cxx/src/unicodehelper.cpp,v
  retrieving revision 1.1
  retrieving revision 1.2
  diff -u -r1.1 -r1.2
  --- unicodehelper.cpp 28 Apr 2005 20:53:45 -0000      1.1
  +++ unicodehelper.cpp 28 Apr 2005 23:26:33 -0000      1.2
  @@ -22,7 +22,7 @@
   
   
   
  -unsigned int UnicodeHelper::decodeUTF8(const char*& src, 
  +unsigned int UnicodeHelper::decodeUTF8(const char*& src,
                                          const char* srcEnd) {
     const char* start = src;
     unsigned char ch1 = *(src++);
  @@ -61,7 +61,7 @@
             }
             if ((ch1 & 0xF0) == 0xE0) {
                 unsigned rv = ((ch1 & 0x0F) << 12)
  -              + ((ch2 & 0x3F) << 6) 
  +              + ((ch2 & 0x3F) << 6)
                 + (ch3 & 0x3F);
                 if (rv <= 0x800) {
                 src = start;
  @@ -124,7 +124,7 @@
   int UnicodeHelper::lengthUTF8(wchar_t ch) {
     if (ch <= 0x7F) {
         return 1;
  -  } 
  +  }
     if(ch <= 0x7FF) {
         return 2;
     }
  @@ -153,7 +153,7 @@
   int UnicodeHelper::lengthUTF8(wchar_t ch) {
     if (ch <= 0x7F) {
         return 1;
  -  } 
  +  }
     if(ch <= 0x7FF) {
         return 2;
     }
  @@ -256,3 +256,18 @@
   }
   #endif
   
  +
  +#if LOG4CXX_LOGCHAR_IS_WCHAR
  +int UnicodeHelper::encode(unsigned int sv, logchar* out) {
  +    return encodeWide(sv, out);
  +}
  +#endif
  +
  +
  +#if LOG4CXX_LOGCHAR_IS_UTF8
  +int UnicodeHelper::encode(unsigned int sv, logchar* out) {
  +    return encodeUTF8(sv, out);
  +}
  +#endif
  +
  +
  
  
  

Reply via email to