[M-CVS] SF.net SVN: mahogany: [7344] trunk/M

vadz Mon, 27 Aug 2007 13:03:22 -0700

Revision: 7344
          http://mahogany.svn.sourceforge.net/mahogany/?rev=7344&view=rev
Author:   vadz
Date:     2007-08-27 13:03:17 -0700 (Mon, 27 Aug 2007)


Log Message:
-----------
revised the code dealing with headers encoding and made sure it works in 
Unicode build

Modified Paths:
--------------
    trunk/M/include/Address.h
    trunk/M/include/AddressCC.h
    trunk/M/include/SendMessageCC.h
    trunk/M/include/mail/MimeDecode.h
    trunk/M/src/gui/wxComposeView.cpp
    trunk/M/src/mail/Address.cpp
    trunk/M/src/mail/AddressCC.cpp
    trunk/M/src/mail/MailFolder.cpp
    trunk/M/src/mail/MimeDecode.cpp
    trunk/M/src/mail/SendMessageCC.cpp
    trunk/M/tests/mime/decode.cpp

Modified: trunk/M/include/Address.h
===================================================================
--- trunk/M/include/Address.h   2007-08-26 12:07:41 UTC (rev 7343)
+++ trunk/M/include/Address.h   2007-08-27 20:03:17 UTC (rev 7344)
@@ -15,6 +15,8 @@
 
 #include "MObject.h"
 
+#include <wx/fontenc.h>
+
 class Profile;
 
 // see near definition of this symbol in src/modules/PalmOS.cpp
@@ -94,6 +96,14 @@
    static String BuildFullForm(const String& personal, const String& address);
 
    /**
+      Returns the sender address from the given profile.
+
+      The address is constructed using the personal name, host name and default
+      domain (if necessary) options.
+    */
+   static String GetSenderAddress(Profile *profile);
+
+   /**
       Returns true if the address matches any of the entries in the array.
 
       Array entries may contain wildcards (? and *).
@@ -136,13 +146,18 @@
 class AddressList : public MObjectRC
 {
 public:
-   /// create the address list from string (may be empty)
+   /**
+      Create the address list from string.
+
+      @param address the string with the address
+      @param defhost the default host name to use for unqualified addresses
+      @param enc the encoding to use for non-ASCII characters if possible (if
+                 not, UTF-8 is used, as with MIME::EncodeHeader())
+    */
    static AddressList *Create(const String& address,
-                              const String& defhost = wxEmptyString);
+                              const String& defhost = wxEmptyString,
+                              wxFontEncoding enc = wxFONTENCODING_SYSTEM);
 
-   /// create the "From" address using settings in this profile
-   static AddressList *CreateFromAddress(Profile *profile);
-
    /// get the first address in the list, return NULL if list is empty
    virtual Address *GetFirst() const = 0;
 

Modified: trunk/M/include/AddressCC.h
===================================================================
--- trunk/M/include/AddressCC.h 2007-08-26 12:07:41 UTC (rev 7343)
+++ trunk/M/include/AddressCC.h 2007-08-27 20:03:17 UTC (rev 7344)
@@ -84,8 +84,9 @@
    String m_addressHeader;
 
    // these methods use our private ctor
-   friend AddressList *AddressList::Create(const String&, const String&);
-   friend AddressList *AddressList::CreateFromAddress(Profile *profile);
+   friend AddressList *AddressList::Create(const String& address,
+                                           const String& defHost,
+                                           wxFontEncoding enc);
 
    MOBJECT_DEBUG(AddressListCC)
    DECLARE_NO_COPY_CLASS(AddressListCC)
@@ -93,7 +94,9 @@
 
 // wrapper around rfc822_parse_adrlist() c-client function
 extern
-mail_address *ParseAddressList(const String& address, const String& defhost);
+mail_address *ParseAddressList(const String& address,
+                               const String& defhost,
+                               wxFontEncoding enc);
 
 #endif // _ADDRESSCC_H_
 

Modified: trunk/M/include/SendMessageCC.h
===================================================================
--- trunk/M/include/SendMessageCC.h     2007-08-26 12:07:41 UTC (rev 7343)
+++ trunk/M/include/SendMessageCC.h     2007-08-27 20:03:17 UTC (rev 7344)
@@ -148,15 +148,6 @@
    /// translate the (wxWin) encoding to (MIME) charset
    String EncodingToCharset(wxFontEncoding enc);
 
-   /// encode the string using m_encHeaders encoding
-   String EncodeHeaderString(const String& header);
-
-   /// encode the address field using m_encHeaders
-   void EncodeAddress(struct mail_address *adr);
-
-   /// encode all entries in the list of addresses
-   void EncodeAddressList(struct mail_address *adr);
-
    /// write the message using the specified writer function
    bool WriteMessage(soutr_t writer, void *where);
 

Modified: trunk/M/include/mail/MimeDecode.h
===================================================================
--- trunk/M/include/mail/MimeDecode.h   2007-08-26 12:07:41 UTC (rev 7343)
+++ trunk/M/include/mail/MimeDecode.h   2007-08-27 20:03:17 UTC (rev 7344)
@@ -1,6 +1,6 @@
 //////////////////////////////////////////////////////////////////////////////
 // Project:     M - cross platform e-mail GUI client
-// File name:   mail/MimeDecode.h: functions for MIME words decoding
+// File name:   mail/MimeDecode.h: functions for MIME words encoding/decoding
 // Author:      Vadim Zeitlin
 // Created:     2007-07-29
 // CVS-ID:      $Id$
@@ -12,6 +12,7 @@
 #define M_MAIL_MIMEDECODE_H
 
 #include <wx/fontenc.h>
+#include <wx/buffer.h>
 
 /**
    Various MIME helpers.
@@ -20,6 +21,55 @@
 {
 
 /**
+   MIME encodings defined by RFC 2047.
+
+   NB: don't change the values of the enum elements, EncodeHeader() relies on
+       them being what they are!
+ */
+enum Encoding
+{
+   Encoding_Unknown,
+   Encoding_Base64 = 'B',
+   Encoding_QuotedPrintable = 'Q'
+};
+
+/**
+   Return the MIME encoding which should be preferrably used for the given font
+   encoding.
+
+   For encodings which use a lot of ASCII characters, QP MIME encoding is
+   preferred as it is more space efficient and results in more or less readable
+   headers. For the others, Base64 is used.
+
+   @param enc encoding not equal to wxFONTENCODING_SYSTEM or
+              wxFONTENCODING_DEFAULT
+   @return the corresponding MIME encoding or Encoding_Unknown if enc is 
invalid
+ */
+Encoding GetEncodingForFontEncoding(wxFontEncoding enc);
+
+/**
+   Return the MIME charset corresponding to the given font encoding.
+
+   @param enc encoding not equal to wxFONTENCODING_SYSTEM or
+              wxFONTENCODING_DEFAULT
+   @return the charset or empty string if encoding is invalid
+ */
+String GetCharsetForFontEncoding(wxFontEncoding enc);
+
+/**
+   Encode a header containing special symbols using RFC 2047 mechanism.
+
+   @param in text containing arbitrary Unicode characters
+   @param enc suggestion for the encoding to use for encoding the input text,
+              another encoding (typically UTF-8) will be used if the input
+              can't be converted to the specified encoding; default means to
+              use the encoding of the current locale
+   @return the encoded text or NULL buffer if encoding failed
+ */
+wxCharBuffer
+EncodeHeader(const wxString& in, wxFontEncoding enc = wxFONTENCODING_SYSTEM);
+
+/**
    RFC 2047 compliant message decoding.
 
    All encoded words from the header are decoded, but only the encoding of the

Modified: trunk/M/src/gui/wxComposeView.cpp
===================================================================
--- trunk/M/src/gui/wxComposeView.cpp   2007-08-26 12:07:41 UTC (rev 7343)
+++ trunk/M/src/gui/wxComposeView.cpp   2007-08-27 20:03:17 UTC (rev 7344)
@@ -4932,19 +4932,13 @@
       m_txtFrom->SetValue(from);
 }
 
-/// sets From field using the current profile
+/// sets From field using the default value for it from the current profile
 void
 wxComposeView::SetDefaultFrom()
 {
    if ( m_txtFrom )
    {
-      AddressList_obj addrList(AddressList::CreateFromAddress(m_Profile));
-
-      Address *addr = addrList->GetFirst();
-      if ( addr )
-      {
-         SetFrom(addr->GetAddress());
-      }
+      SetFrom(Address::GetSenderAddress(m_Profile));
    }
 }
 

Modified: trunk/M/src/mail/Address.cpp
===================================================================
--- trunk/M/src/mail/Address.cpp        2007-08-26 12:07:41 UTC (rev 7343)
+++ trunk/M/src/mail/Address.cpp        2007-08-27 20:03:17 UTC (rev 7344)
@@ -40,11 +40,13 @@
 // options we use here
 // ----------------------------------------------------------------------------
 
+extern const MOption MP_ADD_DEFAULT_HOSTNAME;
 extern const MOption MP_EQUIV_ADDRESSES;
 extern const MOption MP_FROM_REPLACE_ADDRESSES;
 extern const MOption MP_FROM_ADDRESS;
 extern const MOption MP_HOSTNAME;
 extern const MOption MP_LIST_ADDRESSES;
+extern const MOption MP_PERSONALNAME;
 
 // ============================================================================
 // implementation
@@ -385,6 +387,29 @@
    return false;
 }
 
+/* static */
+String Address::GetSenderAddress(Profile *profile)
+{
+   String email(READ_CONFIG_TEXT(profile, MP_FROM_ADDRESS));
+
+   // check that the email address has the domain part
+   if ( email.find('@') == String::npos )
+   {
+      String host;
+      if ( READ_CONFIG(profile, MP_ADD_DEFAULT_HOSTNAME) )
+      {
+         host = READ_CONFIG_TEXT(profile, MP_HOSTNAME);
+      }
+
+      // append '@' even if host is empty: this tricks c-client into accepting
+      // addresses without host names instead of using a stupid
+      // MISSING.WHATEVER instead of the host part
+      email << '@' << host;
+   }
+
+   return BuildFullForm(READ_CONFIG(profile, MP_PERSONALNAME), email);
+}
+
 // ----------------------------------------------------------------------------
 // AddressList
 // ----------------------------------------------------------------------------

Modified: trunk/M/src/mail/AddressCC.cpp
===================================================================
--- trunk/M/src/mail/AddressCC.cpp      2007-08-26 12:07:41 UTC (rev 7343)
+++ trunk/M/src/mail/AddressCC.cpp      2007-08-27 20:03:17 UTC (rev 7344)
@@ -27,17 +27,9 @@
 #endif // USE_PCH
 
 #include "AddressCC.h"
+#include "mail/MimeDecode.h"
 
 // ----------------------------------------------------------------------------
-// options we use here
-// ----------------------------------------------------------------------------
-
-extern const MOption MP_ADD_DEFAULT_HOSTNAME;
-extern const MOption MP_FROM_ADDRESS;
-extern const MOption MP_HOSTNAME;
-extern const MOption MP_PERSONALNAME;
-
-// ----------------------------------------------------------------------------
 // constants
 // ----------------------------------------------------------------------------
 
@@ -249,56 +241,16 @@
 }
 
 /* static */
-AddressList *AddressList::CreateFromAddress(Profile *profile)
+AddressList *
+AddressList::Create(const String& address,
+                    const String& defhost,
+                    wxFontEncoding enc)
 {
-   // it is a bit difficult for From because we have 2 entries in config to
-   // specify it (for historic reasons mainly, I don't think this is actually
-   // useful) and so we must combine them together
-
-   // set personal name
-   ADDRESS *adr = mail_newaddr();
-   adr->personal = cpystr(wxConvertWX2MB(READ_CONFIG_TEXT(profile, 
MP_PERSONALNAME)));
-
-   // set mailbox/host
-   String email = READ_CONFIG(profile, MP_FROM_ADDRESS);
-   size_t pos = email.find('@');
-   if ( pos != String::npos )
-   {
-      adr->mailbox = cpystr(email.substr(0, pos).c_str());
-      adr->host = cpystr(email.c_str() + pos + 1);
-   }
-   else // no '@'?
-   {
-      adr->mailbox = cpystr(wxConvertWX2MB(email));
-
-      String host;
-      if ( READ_CONFIG(profile, MP_ADD_DEFAULT_HOSTNAME) )
-      {
-         host = READ_CONFIG_TEXT(profile, MP_HOSTNAME);
-      }
-
-      if ( host.empty() )
-      {
-         // trick c-client into accepting addresses without host names
-         // instead of using a stupid MISSING.WHATEVER instead of the host
-         // part
-         host = _T('@');
-      }
-
-      adr->host = cpystr(wxConvertWX2MB(host));
-   }
-
-   return new AddressListCC(adr);
-}
-
-/* static */
-AddressList *AddressList::Create(const String& address, const String& defhost)
-{
    ADDRESS *adr = NULL;
 
    if ( !address.empty() )
    {
-      adr = ParseAddressList(address, defhost);
+      adr = ParseAddressList(address, defhost, enc);
 
       if ( !adr || adr->error )
       {
@@ -598,20 +550,37 @@
    return result;
 }
 
-extern ADDRESS *ParseAddressList(const String& address, const String& defhost)
+extern ADDRESS *
+ParseAddressList(const String& address,
+                 const String& defhost,
+                 wxFontEncoding enc)
 {
    // NB: rfc822_parse_adrlist() modifies the string passed in, copy them!
 
-   char *addressCopy = strdup(wxConvertWX2MB(RemoveEmptyListParts(address)));
+   // encode the header in UTF-8 as this never fails
+   wxCharBuffer addressBuf(RemoveEmptyListParts(address).utf8_str());
 
    // use '@' to trick c-client into accepting addresses without host names
-   char *defhostCopy = strdup(defhost.empty() ? "@" : 
wxConvertWX2MB(defhost.c_str()));
+   wxCharBuffer defhostBuf;
+   if ( defhost.empty() )
+      defhostBuf = wxCharBuffer("@");
+   else
+      defhostBuf = defhost.ToAscii();
 
    ADDRESS *adr = NULL;
-   rfc822_parse_adrlist(&adr, addressCopy, defhostCopy);
+   rfc822_parse_adrlist(&adr, addressBuf.data(), defhostBuf.data());
 
-   free(defhostCopy);
-   free(addressCopy);
+   // encode the personal part of the header as it can contain non-ASCII
+   // characters
+   for ( ADDRESS *adr2 = adr; adr2; adr2 = adr2->next )
+   {
+      if ( adr2->personal )
+      {
+         String personal = wxString::FromUTF8(adr2->personal);
+         fs_give((void **)&adr2->personal);
+         adr2->personal = cpystr(MIME::EncodeHeader(personal, enc));
+      }
+   }
 
    return adr;
 }

Modified: trunk/M/src/mail/MailFolder.cpp
===================================================================
--- trunk/M/src/mail/MailFolder.cpp     2007-08-26 12:07:41 UTC (rev 7343)
+++ trunk/M/src/mail/MailFolder.cpp     2007-08-27 20:03:17 UTC (rev 7344)
@@ -63,6 +63,7 @@
 extern const MOption MP_IMAPHOST;
 extern const MOption MP_LIST_ADDRESSES;
 extern const MOption MP_NNTPHOST;
+extern const MOption MP_PERSONALNAME;
 extern const MOption MP_POPHOST;
 extern const MOption MP_REPLY_COLLAPSE_PREFIX;
 extern const MOption MP_REPLY_PREFIX;
@@ -1082,12 +1083,11 @@
             if ( READ_CONFIG(profile, MP_SET_REPLY_STD_NAME) )
             {
                // use the standard personal name
-               AddressList_obj 
addrOwn(AddressList::CreateFromAddress(profile));
-               addr = addrOwn->GetFirst();
-               if ( addr )
-               {
-                  from = Address::BuildFullForm(addr->GetName(), from);
-               }
+               from = Address::BuildFullForm
+                      (
+                        READ_CONFIG(profile, MP_PERSONALNAME),
+                        addr->GetEMail()
+                      );
             }
          }
 

Modified: trunk/M/src/mail/MimeDecode.cpp
===================================================================
--- trunk/M/src/mail/MimeDecode.cpp     2007-08-26 12:07:41 UTC (rev 7343)
+++ trunk/M/src/mail/MimeDecode.cpp     2007-08-27 20:03:17 UTC (rev 7344)
@@ -25,6 +25,7 @@
 #include "mail/MimeDecode.h"
 
 #include <wx/fontmap.h>
+#include <wx/tokenzr.h>
 
 // ----------------------------------------------------------------------------
 // local helper functions
@@ -40,6 +41,75 @@
 // implementation
 // ============================================================================
 
+// ----------------------------------------------------------------------------
+// font encoding <-> MIME functions
+// ----------------------------------------------------------------------------
+
+String MIME::GetCharsetForFontEncoding(wxFontEncoding enc)
+{
+   // translate encoding to the charset
+   wxString cs;
+   if ( enc != wxFONTENCODING_SYSTEM && enc != wxFONTENCODING_DEFAULT )
+   {
+      cs = wxFontMapper::GetEncodingName(enc).Upper();
+   }
+
+   return cs;
+}
+
+MIME::Encoding MIME::GetEncodingForFontEncoding(wxFontEncoding enc)
+{
+   // QP should be used for the encodings which mostly overlap with US_ASCII,
+   // Base64 for the others - choose the encoding method
+   switch ( enc )
+   {
+      case wxFONTENCODING_ISO8859_1:
+      case wxFONTENCODING_ISO8859_2:
+      case wxFONTENCODING_ISO8859_3:
+      case wxFONTENCODING_ISO8859_4:
+      case wxFONTENCODING_ISO8859_9:
+      case wxFONTENCODING_ISO8859_10:
+      case wxFONTENCODING_ISO8859_13:
+      case wxFONTENCODING_ISO8859_14:
+      case wxFONTENCODING_ISO8859_15:
+
+      case wxFONTENCODING_CP1250:
+      case wxFONTENCODING_CP1252:
+      case wxFONTENCODING_CP1254:
+      case wxFONTENCODING_CP1257:
+
+      case wxFONTENCODING_UTF7:
+      case wxFONTENCODING_UTF8:
+         return Encoding_QuotedPrintable;
+
+
+      case wxFONTENCODING_ISO8859_5:
+      case wxFONTENCODING_ISO8859_6:
+      case wxFONTENCODING_ISO8859_7:
+      case wxFONTENCODING_ISO8859_8:
+      case wxFONTENCODING_ISO8859_11:
+      case wxFONTENCODING_ISO8859_12:
+
+      case wxFONTENCODING_CP1251:
+      case wxFONTENCODING_CP1253:
+      case wxFONTENCODING_CP1255:
+      case wxFONTENCODING_CP1256:
+
+      case wxFONTENCODING_KOI8:
+         return Encoding_Base64;
+
+      default:
+         FAIL_MSG( _T("unknown encoding") );
+
+      case wxFONTENCODING_SYSTEM:
+         return Encoding_Unknown;
+   }
+}
+
+// ----------------------------------------------------------------------------
+// decoding
+// ----------------------------------------------------------------------------
+
 /*
    See RFC 2047 for the description of the encodings used in the mail headers.
    Briefly, "encoded words" can be inserted which have the form of
@@ -100,8 +170,8 @@
          // pass false to prevent asking the user from here: we can be called
          // during non-interactive operations and popping up a dialog for an
          // unknown charset can be inappropriate
-         const wxFontEncoding
-            encodingWord = wxFontMapper::Get()->CharsetToEncoding(csName, 
false);
+         const wxFontEncoding encodingWord = wxFontMapperBase::Get()->
+                                               CharsetToEncoding(csName, 
false);
 
          if ( encodingWord == wxFONTENCODING_SYSTEM )
          {
@@ -313,3 +383,250 @@
    return header;
 }
 
+// ----------------------------------------------------------------------------
+// encoding
+// ----------------------------------------------------------------------------
+
+// returns true if the character must be encoded in a MIME header
+//
+// NB: we suppose that any special characters had been already escaped
+static inline bool NeedsEncodingInHeader(wxUChar c)
+{
+   return iscntrl(c) || c >= 127;
+}
+
+// return true if the string contains any characters which must be encoded
+static bool NeedsEncoding(const String& in)
+{
+   // if input contains "=?", encode it anyhow to avoid generating invalid
+   // encoded words
+   if ( in.find(_T("=?")) == wxString::npos )
+   {
+      // only encode the strings which contain the characters unallowed in RFC
+      // 822 headers
+      wxString::const_iterator p;
+      const wxString::const_iterator end = in.end();
+      for ( p = in.begin(); p != end; ++p )
+      {
+         if ( NeedsEncodingInHeader(*p) )
+            break;
+      }
+
+      if ( p == end )
+      {
+         // string has only valid chars, don't encode
+         return false;
+      }
+   }
+
+   return true;
+}
+
+static String
+EncodeWord(const String& in,
+           wxFontEncoding enc,
+           MIME::Encoding enc2047,
+           const String& csName)
+{
+   if ( !NeedsEncoding(in) )
+      return in;
+
+
+   // encode the word splitting it in the chunks such that they will be no
+   // longer than 75 characters each
+   wxCharBuffer buf(in.mb_str(wxCSConv(enc)));
+   if ( !buf )
+   {
+      // if the header can't be encoded using the given encoding, use UTF-8
+      // which always works
+      buf = in.utf8_str();
+   }
+
+   String out;
+   out.reserve(csName.length() + strlen(buf) + 7 /* for =?...?X?...?= */);
+
+   const char *s = buf;
+   while ( *s )
+   {
+      // if we wrapped, insert a line break
+      if ( !out.empty() )
+         out += "\r\n  ";
+
+      static const size_t RFC2047_MAXWORD_LEN = 75;
+
+      // how many characters may we put in this encoded word?
+      size_t len = 0;
+
+      // take into account the length of "=?charset?...?="
+      int lenRemaining = RFC2047_MAXWORD_LEN - (5 + csName.length());
+
+      // for QP we need to examine all characters
+      if ( enc2047 == MIME::Encoding_QuotedPrintable )
+      {
+         for ( ; s[len]; len++ )
+         {
+            const char c = s[len];
+
+            // normal characters stand for themselves in QP, the encoded ones
+            // take 3 positions (=XX)
+            lenRemaining -= (NeedsEncodingInHeader(c) || strchr(" \t=?", c))
+                              ? 3 : 1;
+
+            if ( lenRemaining <= 0 )
+            {
+               // can't put any more chars into this word
+               break;
+            }
+         }
+      }
+      else // Base64
+      {
+         // we can calculate how many characters we may put into lenRemaining
+         // directly
+         len = (lenRemaining / 4) * 3 - 2;
+
+         // but not more than what we have
+         size_t lenMax = wxStrlen(s);
+         if ( len > lenMax )
+         {
+            len = lenMax;
+         }
+      }
+
+      // do encode this word
+      unsigned char *text = (unsigned char *)s; // cast for cclient
+
+      // length of the encoded text and the text itself
+      unsigned long lenEnc;
+      unsigned char *textEnc;
+
+      if ( enc2047 == MIME::Encoding_QuotedPrintable )
+      {
+            textEnc = rfc822_8bit(text, len, &lenEnc);
+      }
+      else // Encoding_Base64
+      {
+            textEnc = rfc822_binary(text, len, &lenEnc);
+            while ( textEnc[lenEnc - 2] == '\r' && textEnc[lenEnc - 1] == '\n' 
)
+            {
+               // discard eol which we don't need in the header
+               lenEnc -= 2;
+            }
+      }
+
+      // put into string as we might want to do some more replacements...
+      String encword(wxString::FromAscii(CHAR_CAST(textEnc)
+#if wxCHECK_VERSION(2, 9, 0)
+                                         , lenEnc
+#endif
+                                        ));
+
+      // hack: rfc822_8bit() doesn't encode spaces normally but we must
+      // do it inside the headers
+      //
+      // we also have to encode '?'s in the headers which are not encoded by it
+      if ( enc2047 == MIME::Encoding_QuotedPrintable )
+      {
+         String encword2;
+         encword2.reserve(encword.length());
+
+         bool replaced = false;
+         for ( const wxChar *p = encword.c_str(); *p; p++ )
+         {
+            switch ( *p )
+            {
+               case ' ':
+                  encword2 += _T("=20");
+                  break;
+
+               case '\t':
+                  encword2 += _T("=09");
+                  break;
+
+               case '?':
+                  encword2 += _T("=3F");
+                  break;
+
+               default:
+                  encword2 += *p;
+
+                  // skip assignment to replaced below
+                  continue;
+            }
+
+            replaced = true;
+         }
+
+         if ( replaced )
+         {
+            encword = encword2;
+         }
+      }
+
+      // append this word to the header
+      out << _T("=?") << csName << _T('?') << (char)enc2047 << _T('?')
+          << encword
+          << _T("?=");
+
+      fs_give((void **)&textEnc);
+
+      // skip the already encoded part
+      s += len;
+   }
+
+   return out;
+}
+
+wxCharBuffer MIME::EncodeHeader(const String& in, wxFontEncoding enc)
+{
+   if ( !NeedsEncoding(in) )
+      return in.ToAscii();
+
+   // get the encoding in RFC 2047 sense: choose the most reasonable one
+   if ( enc == wxFONTENCODING_SYSTEM )
+      enc = wxLocale::GetSystemEncoding();
+
+   MIME::Encoding enc2047 = MIME::GetEncodingForFontEncoding(enc);
+
+   if ( enc2047 == MIME::Encoding_Unknown )
+   {
+      FAIL_MSG( _T("should have valid MIME encoding") );
+
+      enc2047 = MIME::Encoding_QuotedPrintable;
+   }
+
+   // get the name of the charset to use
+   String csName = MIME::GetCharsetForFontEncoding(enc);
+   if ( csName.empty() )
+   {
+      FAIL_MSG( _T("should have a valid charset name!") );
+
+      csName = _T("UNKNOWN");
+   }
+
+
+   String headerEnc;
+   headerEnc.reserve(2*in.length());
+
+   // for QP we encode each header word separately so that the header remains
+   // readable, but for Base64 it's useless to do this as it's unreadable
+   // anyhow so we just encode everything at once
+   if ( enc2047 == MIME::Encoding_QuotedPrintable )
+   {
+      // encode each word of the header
+      const wxArrayString words(wxStringTokenize(in));
+      const size_t count = words.size();
+      for ( size_t n = 0; n < count; ++n )
+      {
+         headerEnc += EncodeWord(words[n], enc, enc2047, csName);
+         if ( n + 1 < count )
+            headerEnc += ' ';
+      }
+   }
+   else // MIME::Encoding_Base64
+   {
+      headerEnc = EncodeWord(in, enc, enc2047, csName);
+   }
+
+   return headerEnc.ToAscii();
+}

Modified: trunk/M/src/mail/SendMessageCC.cpp
===================================================================
--- trunk/M/src/mail/SendMessageCC.cpp  2007-08-26 12:07:41 UTC (rev 7343)
+++ trunk/M/src/mail/SendMessageCC.cpp  2007-08-27 20:03:17 UTC (rev 7344)
@@ -32,6 +32,7 @@
 
 #include "Mversion.h"
 #include "MailFolderCC.h"
+#include "mail/MimeDecode.h"
 
 #include "LogCircle.h"
 
@@ -113,17 +114,6 @@
 // constants
 // ----------------------------------------------------------------------------
 
-// the encodings defined by RFC 2047
-//
-// NB: don't change the values of the enum elements, EncodeHeaderString()
-//     relies on them being what they are!
-enum MimeEncoding
-{
-   MimeEncoding_Unknown,
-   MimeEncoding_Base64 = 'B',
-   MimeEncoding_QuotedPrintable = 'Q'
-};
-
 // trace mask for message sending/queuing operations
 #define TRACE_SEND   _T("send")
 
@@ -342,12 +332,7 @@
 
    // set up default value for From (Reply-To is set in InitNew() as it isn't
    // needed for the resent messages)
-   AddressList_obj addrList(AddressList::CreateFromAddress(m_profile));
-   Address *addrFrom = addrList->GetFirst();
-   if ( addrFrom )
-   {
-      m_From = addrFrom->GetAddress();
-   }
+   m_From = Address::GetSenderAddress(m_profile);
 
    // remember the default hostname to use for addresses without host part
    m_DefaultHost = READ_CONFIG_TEXT(profile, MP_HOSTNAME);
@@ -636,55 +621,6 @@
 // SendMessageCC encodings
 // ----------------------------------------------------------------------------
 
-static MimeEncoding GetMimeEncodingForFontEncoding(wxFontEncoding enc)
-{
-   // QP should be used for the encodings which mostly overlap with US_ASCII,
-   // Base64 for the others - choose the encoding method
-   switch ( enc )
-   {
-      case wxFONTENCODING_ISO8859_1:
-      case wxFONTENCODING_ISO8859_2:
-      case wxFONTENCODING_ISO8859_3:
-      case wxFONTENCODING_ISO8859_4:
-      case wxFONTENCODING_ISO8859_9:
-      case wxFONTENCODING_ISO8859_10:
-      case wxFONTENCODING_ISO8859_13:
-      case wxFONTENCODING_ISO8859_14:
-      case wxFONTENCODING_ISO8859_15:
-
-      case wxFONTENCODING_CP1250:
-      case wxFONTENCODING_CP1252:
-      case wxFONTENCODING_CP1254:
-      case wxFONTENCODING_CP1257:
-
-      case wxFONTENCODING_UTF7:
-      case wxFONTENCODING_UTF8:
-
-         return MimeEncoding_QuotedPrintable;
-
-      case wxFONTENCODING_ISO8859_5:
-      case wxFONTENCODING_ISO8859_6:
-      case wxFONTENCODING_ISO8859_7:
-      case wxFONTENCODING_ISO8859_8:
-      case wxFONTENCODING_ISO8859_11:
-      case wxFONTENCODING_ISO8859_12:
-
-      case wxFONTENCODING_CP1251:
-      case wxFONTENCODING_CP1253:
-      case wxFONTENCODING_CP1255:
-      case wxFONTENCODING_CP1256:
-
-      case wxFONTENCODING_KOI8:
-         return MimeEncoding_Base64;
-
-      default:
-         FAIL_MSG( _T("unknown encoding") );
-
-      case wxFONTENCODING_SYSTEM:
-         return MimeEncoding_Unknown;
-   }
-}
-
 // Check if text can be sent without encoding it (using QP or Base64): for
 // this it must not contain 8bit chars and must not have too long lines
 static bool NeedsToBeEncoded(const unsigned char *text)
@@ -724,239 +660,17 @@
    m_encHeaders = enc;
 }
 
-// returns true if the character must be encoded in an SMTP [address] header
-static inline bool NeedsEncodingInHeader(unsigned char c)
-{
-   return iscntrl(c) || c >= 127;
-}
-
-String
-SendMessageCC::EncodeHeaderString(const String& header)
-{
-   // if headers are already encoded, don't do anything
-   if ( !m_encodeHeaders )
-      return header;
-
-   // if a header contains "=?", encode it anyhow to avoid generating invalid
-   // encoded words
-   if ( !wxStrstr(header, _T("=?")) )
-   {
-      // only encode the strings which contain the characters unallowed in RFC
-      // 822 headers
-      const unsigned char *p;
-      for ( p = (const unsigned char *)header.c_str(); *p; p++ )
-      {
-         if ( NeedsEncodingInHeader(*p) )
-            break;
-      }
-
-      if ( !*p )
-      {
-         // string has only valid chars, don't encode
-         return header;
-      }
-   }
-
-   // get the encoding in RFC 2047 sense: choose the most reasonable one
-   wxFontEncoding enc = m_encHeaders == wxFONTENCODING_SYSTEM
-                           ? wxLocale::GetSystemEncoding()
-                           : m_encHeaders;
-
-   MimeEncoding enc2047 = GetMimeEncodingForFontEncoding(enc);
-
-   if ( enc2047 == MimeEncoding_Unknown )
-   {
-      FAIL_MSG( _T("should have valid MIME encoding") );
-
-      enc2047 = MimeEncoding_QuotedPrintable;
-   }
-
-   // get the name of the charset to use
-   String csName = EncodingToCharset(enc);
-   if ( csName.empty() )
-   {
-      FAIL_MSG( _T("should have a valid charset name!") );
-
-      csName = _T("UNKNOWN");
-   }
-
-   // the entire encoded header
-   String headerEnc;
-   headerEnc.reserve(csName.length() + 2*header.length() + 16);
-
-   // encode the header splitting it in the chunks such that they will be no
-   // longer than 75 characters each
-   //
-   // FIXME-Unicode: we shouldn't use a global encoding for headers any more,
-   //                we could mix different encoding inside the same header
-   const wxCharBuffer buf(header.mb_str(wxCSConv(enc)));
-   const char *s = buf;
-   while ( *s )
-   {
-      // if this is not the first line, insert a line break
-      if ( !headerEnc.empty() )
-      {
-         headerEnc << _T("\r\n ");
-      }
-
-      static const size_t RFC2047_MAXWORD_LEN = 75;
-
-      // how many characters may we put in this encoded word?
-      size_t len = 0;
-
-      // take into account the length of "=?charset?...?="
-      int lenRemaining = RFC2047_MAXWORD_LEN - (5 + csName.length());
-
-      // for QP we need to examine all characters
-      if ( enc2047 == MimeEncoding_QuotedPrintable )
-      {
-         for ( ; s[len]; len++ )
-         {
-            const char c = s[len];
-
-            // normal characters stand for themselves in QP, the encoded ones
-            // take 3 positions (=XX)
-            lenRemaining -= (NeedsEncodingInHeader(c) || strchr(" \t=?", c))
-                              ? 3 : 1;
-
-            if ( lenRemaining <= 0 )
-            {
-               // can't put any more chars into this word
-               break;
-            }
-         }
-      }
-      else // Base64
-      {
-         // we can calculate how many characters we may put into lenRemaining
-         // directly
-         len = (lenRemaining / 4) * 3 - 2;
-
-         // but not more than what we have
-         size_t lenMax = wxStrlen(s);
-         if ( len > lenMax )
-         {
-            len = lenMax;
-         }
-      }
-
-      // do encode this word
-      unsigned char *text = (unsigned char *)s; // cast for cclient
-
-      // length of the encoded text and the text itself
-      unsigned long lenEnc;
-      unsigned char *textEnc;
-
-      if ( enc2047 == MimeEncoding_QuotedPrintable )
-      {
-            textEnc = rfc822_8bit(text, len, &lenEnc);
-      }
-      else // MimeEncoding_Base64
-      {
-            textEnc = rfc822_binary(text, len, &lenEnc);
-            while ( textEnc[lenEnc - 2] == '\r' && textEnc[lenEnc - 1] == '\n' 
)
-            {
-               // discard eol which we don't need in the header
-               lenEnc -= 2;
-            }
-      }
-
-      // put into string as we might want to do some more replacements...
-      String encword(wxString::FromAscii(CHAR_CAST(textEnc)
-#if wxCHECK_VERSION(2, 9, 0)
-                                         , lenEnc
-#endif
-                                        ));
-
-      // hack: rfc822_8bit() doesn't encode spaces normally but we must
-      // do it inside the headers
-      //
-      // we also have to encode '?'s in the headers which are not encoded by it
-      if ( enc2047 == MimeEncoding_QuotedPrintable )
-      {
-         String encword2;
-         encword2.reserve(encword.length());
-
-         bool replaced = false;
-         for ( const wxChar *p = encword.c_str(); *p; p++ )
-         {
-            switch ( *p )
-            {
-               case ' ':
-                  encword2 += _T("=20");
-                  break;
-
-               case '\t':
-                  encword2 += _T("=09");
-                  break;
-
-               case '?':
-                  encword2 += _T("=3F");
-                  break;
-
-               default:
-                  encword2 += *p;
-
-                  // skip assignment to replaced below
-                  continue;
-            }
-
-            replaced = true;
-         }
-
-         if ( replaced )
-         {
-            encword = encword2;
-         }
-      }
-
-      // append this word to the header
-      headerEnc << _T("=?") << csName << _T('?') << (char)enc2047 << _T('?')
-                << encword
-                << _T("?=");
-
-      fs_give((void **)&textEnc);
-
-      // skip the already encoded part
-      s += len;
-   }
-
-   return headerEnc;
-}
-
-// unlike EncodeHeaderString(), we should only encode the personal name part 
of the
-// address headers
 void
-SendMessageCC::EncodeAddress(struct mail_address *adr)
+SendMessageCC::SetSubject(const String& subject)
 {
-   if ( adr->personal )
-   {
-      char *tmp = adr->personal;
-      adr->personal = 
cpystr(wxConvertWX2MB(EncodeHeaderString(wxConvertMB2WX(tmp))));
-
-      fs_give((void **)&tmp);
-   }
-}
-
-void
-SendMessageCC::EncodeAddressList(struct mail_address *adr)
-{
-   while ( adr )
-   {
-      EncodeAddress(adr);
-
-      adr = adr->next;
-   }
-}
-
-void
-SendMessageCC::SetSubject(const String &subject)
-{
    if(m_Envelope->subject)
       fs_give((void **)&m_Envelope->subject);
 
-   String subj = EncodeHeaderString(subject);
-   m_Envelope->subject = cpystr(wxConvertWX2MB(subj.c_str()));
+   // if headers are already encoded, don't do anything, they must be already
+   // in ASCII
+   wxCharBuffer buf(m_encodeHeaders ? MIME::EncodeHeader(subject, m_encHeaders)
+                                    : subject.ToAscii());
+   m_Envelope->subject = cpystr(buf);
 }
 
 void
@@ -1013,7 +727,7 @@
    }
 
    // parse into ADDRESS struct
-   *pAdr = ParseAddressList(address, m_DefaultHost);
+   *pAdr = ParseAddressList(address, m_DefaultHost, m_encHeaders);
 
    // finally filter out any invalid addressees
    CheckAddressFieldForErrors(*pAdr);
@@ -1042,8 +756,6 @@
 
       adr = adrNext;
    }
-
-   EncodeAddressList(adrStart);
 }
 
 void
@@ -1121,19 +833,6 @@
    return true;
 }
 
-String
-SendMessageCC::EncodingToCharset(wxFontEncoding enc)
-{
-   // translate encoding to the charset
-   wxString cs;
-   if ( enc != wxFONTENCODING_SYSTEM && enc != wxFONTENCODING_DEFAULT )
-   {
-      cs = wxFontMapper::GetEncodingName(enc).Upper();
-   }
-
-   return cs;
-}
-
 // ----------------------------------------------------------------------------
 // methods to manage the extra headers
 // ----------------------------------------------------------------------------
@@ -1507,12 +1206,12 @@
          {
             // some encodings should be encoded in QP as they typically contain
             // only a small number of non printable characters while others
-            // should be incoded in Base64 as almost all characters used in 
them
+            // should be encoded in Base64 as almost all characters used in 
them
             // are outside basic Ascii set
-            switch ( GetMimeEncodingForFontEncoding(enc) )
+            switch ( MIME::GetEncodingForFontEncoding(enc) )
             {
-               case MimeEncoding_Unknown:
-               case MimeEncoding_QuotedPrintable:
+               case MIME::Encoding_Unknown:
+               case MIME::Encoding_QuotedPrintable:
                   // automatically translated to QP by c-client
                   bdy->encoding = ENC8BIT;
                   break;
@@ -1521,7 +1220,7 @@
                   FAIL_MSG( _T("unknown MIME encoding") );
                   // fall through
 
-               case MimeEncoding_Base64:
+               case MIME::Encoding_Base64:
                   if ( m_Protocol == Prot_SMTP &&
                         READ_CONFIG_BOOL(m_profile, MP_SMTP_USE_8BIT) )
                   {
@@ -1594,7 +1293,7 @@
       }
       else // 8bit message
       {
-         cs = EncodingToCharset(enc);
+         cs = MIME::GetCharsetForFontEncoding(enc);
          if ( cs.empty() )
          {
             cs = m_CharSet;

Modified: trunk/M/tests/mime/decode.cpp
===================================================================
--- trunk/M/tests/mime/decode.cpp       2007-08-26 12:07:41 UTC (rev 7343)
+++ trunk/M/tests/mime/decode.cpp       2007-08-27 20:03:17 UTC (rev 7344)
@@ -50,6 +50,54 @@
   return ((c1 -= (isdigit (c1) ? '0' : ((c1 <= 'Z') ? 'A' : 'a') - 10)) << 4) +
     (c2 - (isdigit (c2) ? '0' : ((c2 <= 'Z') ? 'A' : 'a') - 10));
 }
+
+/* Convert binary contents to BASE64
+ * Accepts: source
+ *         length of source
+ *         pointer to return destination length
+ * Returns: destination as BASE64
+ */
+
+unsigned char *rfc822_binary (void *src,unsigned long srcl,unsigned long *len)
+{
+  unsigned char *ret,*d;
+  unsigned char *s = (unsigned char *) src;
+  char *v = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
+  unsigned long i = ((srcl + 2) / 3) * 4;
+  *len = i += 2 * ((i / 60) + 1);
+  d = ret = (unsigned char *) fs_get ((size_t) ++i);
+                               /* process tuplets */
+  for (i = 0; srcl >= 3; s += 3, srcl -= 3) {
+    *d++ = v[s[0] >> 2];       /* byte 1: high 6 bits (1) */
+                               /* byte 2: low 2 bits (1), high 4 bits (2) */
+    *d++ = v[((s[0] << 4) + (s[1] >> 4)) & 0x3f];
+                               /* byte 3: low 4 bits (2), high 2 bits (3) */
+    *d++ = v[((s[1] << 2) + (s[2] >> 6)) & 0x3f];
+    *d++ = v[s[2] & 0x3f];     /* byte 4: low 6 bits (3) */
+    if ((++i) == 15) {         /* output 60 characters? */
+      i = 0;                   /* restart line break count, insert CRLF */
+      *d++ = '\015'; *d++ = '\012';
+    }
+  }
+  if (srcl) {
+    *d++ = v[s[0] >> 2];       /* byte 1: high 6 bits (1) */
+                               /* byte 2: low 2 bits (1), high 4 bits (2) */
+    *d++ = v[((s[0] << 4) + (--srcl ? (s[1] >> 4) : 0)) & 0x3f];
+                               /* byte 3: low 4 bits (2), high 2 bits (3) */
+    *d++ = srcl ? v[((s[1] << 2) + (--srcl ? (s[2] >> 6) : 0)) & 0x3f] : '=';
+                               /* byte 4: low 6 bits (3) */
+    *d++ = srcl ? v[s[2] & 0x3f] : '=';
+    if (srcl) srcl--;          /* count third character if processed */
+    if ((++i) == 15) {         /* output 60 characters? */
+      i = 0;                   /* restart line break count, insert CRLF */
+      *d++ = '\015'; *d++ = '\012';
+    }
+  }
+  *d++ = '\015'; *d++ = '\012';        /* insert final CRLF */
+  *d = '\0';                   /* tie off string */
+  if (((unsigned long) (d - ret)) != *len) fatal ("rfc822_binary logic flaw");
+  return ret;                  /* return the resulting string */
+}
 /* Convert QUOTED-PRINTABLE contents to 8BIT
  * Accepts: source
  *         length of source
@@ -223,19 +271,120 @@
   *d = '\0';                   /* NUL terminate just in case */
   return ret;                  /* return the string */
 }
+
+/* Convert 8BIT contents to QUOTED-PRINTABLE
+ * Accepts: source
+ *         length of source
+ *         pointer to return destination length
+ * Returns: destination as quoted-printable text
+ */
+
+#define MAXL (size_t) 75       /* 76th position only used by continuation = */
+
+unsigned char *rfc822_8bit (unsigned char *src,unsigned long srcl,
+                           unsigned long *len)
+{
+  unsigned long lp = 0;
+  unsigned char *ret = (unsigned char *)
+    fs_get ((size_t) (3*srcl + 3*(((3*srcl)/MAXL) + 1)));
+  unsigned char *d = ret;
+  char *hex = "0123456789ABCDEF";
+  unsigned char c;
+  while (srcl--) {             /* for each character */
+                               /* true line break? */
+    if (((c = *src++) == '\015') && (*src == '\012') && srcl) {
+      *d++ = '\015'; *d++ = *src++; srcl--;
+      lp = 0;                  /* reset line count */
+    }
+    else {                     /* not a line break */
+                               /* quoting required? */
+      if (iscntrl (c) || (c == 0x7f) || (c & 0x80) || (c == '=') ||
+         ((c == ' ') && (*src == '\015'))) {
+       if ((lp += 3) > MAXL) { /* yes, would line overflow? */
+         *d++ = '='; *d++ = '\015'; *d++ = '\012';
+         lp = 3;               /* set line count */
+       }
+       *d++ = '=';             /* quote character */
+       *d++ = hex[c >> 4];     /* high order 4 bits */
+       *d++ = hex[c & 0xf];    /* low order 4 bits */
+      }
+      else {                   /* ordinary character */
+       if ((++lp) > MAXL) {    /* would line overflow? */
+         *d++ = '='; *d++ = '\015'; *d++ = '\012';
+         lp = 1;               /* set line count */
+       }
+       *d++ = c;               /* ordinary character */
+      }
+    }
+  }
+  *d = '\0';                   /* tie off destination */
+  *len = d - ret;              /* calculate true size */
+                               /* try to give some space back */
+  fs_resize ((void **) &ret,(size_t) *len + 1);
+  return ret;
 }
+}
 
 int main()
 {
     wxInitializer init;
 
-    String s;
+    static const struct MimeTestData
+    {
+        const char *encoded;
+        const char *utf8;
+        wxFontEncoding enc;
+    } data[] =
+    {
+        {
+            "=?KOI8-R?B?79TXxdTZIM7BINfP0NLP09k=?=", 
+            "\xd0\x9e\xd1\x82\xd0\xb2\xd0\xb5\xd1\x82\xd1\x8b\x20"
+            "\xd0\xbd\xd0\xb0\x20\xd0\xb2\xd0\xbe\xd0\xbf\xd1\x80"
+            "\xd0\xbe\xd1\x81\xd1\x8b",
+            wxFONTENCODING_KOI8
+        },
+        {
+            "=?KOI8-R?B?99jA1sHOyc4g68/O09TBztTJziBcKENvbnN0YW50a"
+            "W5lIFZ5dXpoYW5pblwp?=",
+            "\xd0\x92\xd1\x8c\xd1\x8e\xd0\xb6\xd0\xb0\xd0\xbd"
+            "\xd0\xb8\xd0\xbd\x20\xd0\x9a\xd0\xbe\xd0\xbd\xd1\x81"
+            "\xd1\x82\xd0\xb0\xd0\xbd\xd1\x82\xd0\xb8\xd0\xbd\x20"
+            "\x5c\x28\x43\x6f\x6e\x73\x74\x61\x6e\x74\x69\x6e\x65"
+            "\x20\x56\x79\x75\x7a\x68\x61\x6e\x69\x6e\x5c\x29",
+            wxFONTENCODING_KOI8
+        },
+        {
+            "Ludovic =?ISO-8859-1?Q?P=E9net?= <[EMAIL PROTECTED]>",
+            "Ludovic P\303\251net <[EMAIL PROTECTED]>",
+            wxFONTENCODING_ISO8859_1
+        },
+        {
+            "Ludovic =?UTF-8?Q?P=C3=A9net?= <[EMAIL PROTECTED]>",
+            "Ludovic P\303\251net <[EMAIL PROTECTED]>",
+            wxFONTENCODING_UTF8
+        },
+    };
 
-    s = MIME::DecodeHeader("=?koi8-r?B?79TXxdTZIM7BINfP0NLP09k=?=");
-    s = 
MIME::DecodeHeader("=?koi8-r?B?99jA1sHOyc4g68/O09TBztTJziBcKENvbnN0YW50aW5lIFZ5dXpoYW5pblwp?=
 <[EMAIL PROTECTED]>");
-    s = MIME::DecodeHeader("Ludovic =?ISO-8859-1?Q?P=E9net?= <[EMAIL 
PROTECTED]>");
-    printf(s.utf8_str());
+    int rc = EXIT_SUCCESS;
+    for ( unsigned n = 0; n < WXSIZEOF(data); ++n )
+    {
+        const MimeTestData& d = data[n];
+        const wxString s = MIME::DecodeHeader(d.encoded);
+        if ( s != wxString::FromUTF8(d.utf8) )
+        {
+            printf("ERROR: decoding #%u: expected \"%s\", got \"%s\"\n",
+                   n, d.utf8, (const char *)s.utf8_str());
+            rc = EXIT_FAILURE;
+        }
 
-    return 0;
+        const wxCharBuffer buf = MIME::EncodeHeader(s, d.enc);
+        if ( strcmp(buf, d.encoded) != 0 )
+        {
+            printf("ERROR: encoding #%u: expected \"%s\", got \"%s\"\n",
+                   n, d.encoded, (const char *)buf);
+        }
+    }
+
+    return rc;
 }
 


This was sent by the SourceForge.net collaborative development platform, the 
world's largest Open Source development site.

-------------------------------------------------------------------------
This SF.net email is sponsored by: Splunk Inc.
Still grepping through log files to find problems?  Stop.
Now Search log events and configuration files using AJAX and a browser.
Download your FREE copy of Splunk now >>  http://get.splunk.com/
_______________________________________________
Mahogany-cvsupdates mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/mahogany-cvsupdates

[M-CVS] SF.net SVN: mahogany: [7344] trunk/M

Reply via email to