[M-CVS] SF.net SVN: mahogany: [7354] trunk/M

vadz Fri, 31 Aug 2007 14:59:02 -0700

Revision: 7354
          http://mahogany.svn.sourceforge.net/mahogany/?rev=7354&view=rev
Author:   vadz
Date:     2007-08-31 14:58:51 -0700 (Fri, 31 Aug 2007)


Log Message:
-----------
fix encoding of consecutive encoded words in QP: we must encode the spaces as 
they're ignored between encode words otherwise

Modified Paths:
--------------
    trunk/M/src/mail/MimeDecode.cpp
    trunk/M/tests/mime/decode.cpp

Modified: trunk/M/src/mail/MimeDecode.cpp
===================================================================
--- trunk/M/src/mail/MimeDecode.cpp     2007-08-30 20:24:02 UTC (rev 7353)
+++ trunk/M/src/mail/MimeDecode.cpp     2007-08-31 21:58:51 UTC (rev 7354)
@@ -422,16 +422,15 @@
    return true;
 }
 
+// encode the given text unconditionally, i.e. without checking if it must be
+// encoded (this is supposed to be done in the caller) and using the specified
+// encodings andcharset (which are supposed to be detected by the caller too)
 static String
-EncodeWord(const String& in,
+EncodeText(const String& in,
            wxFontEncoding enc,
            MIME::Encoding enc2047,
            const String& csName)
 {
-   if ( !NeedsEncoding(in) )
-      return in;
-
-
    // encode the word splitting it in the chunks such that they will be no
    // longer than 75 characters each
    wxCharBuffer buf(in.mb_str(wxCSConv(enc)));
@@ -608,24 +607,65 @@
    String headerEnc;
    headerEnc.reserve(2*in.length());
 
-   // for QP we encode each header word separately so that the header remains
-   // readable, but for Base64 it's useless to do this as it's unreadable
-   // anyhow so we just encode everything at once
+   // for QP we encode each header word separately as some might not need being
+   // encoded at all and the header remains more readable if we don't encode
+   // them unnecessarily, but for Base64 it's useless to do this as it's
+   // unreadable anyhow so we just encode everything at once
    if ( enc2047 == MIME::Encoding_QuotedPrintable )
    {
-      // encode each word of the header
+      // encode each word of the header if necessary, taking into account one
+      // added complication: white space between 2 consecutive encoded words is
+      // ignored during decoding, so we must encode 2 consecutive words both of
+      // which need encoding as one single encoded word or the space between
+      // them would be lost
+      bool lastWordEncoded = false;
       const wxArrayString words(wxStringTokenize(in));
       const size_t count = words.size();
       for ( size_t n = 0; n < count; ++n )
       {
-         headerEnc += EncodeWord(words[n], enc, enc2047, csName);
-         if ( n + 1 < count )
-            headerEnc += ' ';
+         const wxString& word = words[n];
+         if ( NeedsEncoding(word) )
+         {
+            const String wordEnc = EncodeText(word, enc, enc2047, csName);
+
+            if ( lastWordEncoded )
+            {
+               // we need to merge the 2 consecutive encoded words together: we
+               // do it by removing "?=" suffix from the previous word, adding
+               // a space and remove the "=?charset?Q?" prefix from this word
+               ASSERT_MSG( headerEnc.length() > 7, "bad QP-encoded last word" 
);
+               headerEnc.RemoveLast(2); // "?="
+
+               headerEnc += '_'; // space can be represented like this in QP
+
+               const size_t posText = wordEnc.find("?Q?");
+               ASSERT_MSG( posText != String::npos, "bad QP-encoded word" );
+               headerEnc += wordEnc.substr(posText + 3);
+            }
+            else // last word not encoded, just append this one
+            {
+               if ( !headerEnc.empty() )
+                  headerEnc += ' ';
+
+               headerEnc += wordEnc;
+            }
+
+            lastWordEncoded = true;
+         }
+         else // this word doesn't need to be encoded, simply append it
+         {
+            if ( !headerEnc.empty() )
+               headerEnc += ' ';
+
+            headerEnc += word;
+
+            lastWordEncoded = false;
+         }
       }
    }
    else // MIME::Encoding_Base64
    {
-      headerEnc = EncodeWord(in, enc, enc2047, csName);
+      headerEnc = EncodeText(in, enc, enc2047, csName);
    }
 
    return headerEnc.ToAscii();

Modified: trunk/M/tests/mime/decode.cpp
===================================================================
--- trunk/M/tests/mime/decode.cpp       2007-08-30 20:24:02 UTC (rev 7353)
+++ trunk/M/tests/mime/decode.cpp       2007-08-31 21:58:51 UTC (rev 7354)
@@ -363,6 +363,26 @@
             "Ludovic P\303\251net <[EMAIL PROTECTED]>",
             wxFONTENCODING_UTF8
         },
+
+        {
+            "=?UTF-8?Q?=D0=92=D0=B0=D0=B4=D0=B8=D0=BC_"
+            "=D0=A6=D0=B5=D0=B9=D1=82=D0=BB=D0=B8=D0=BD?=",
+            "\xD0\x92\xD0\xB0\xD0\xB4\xD0\xB8\xD0\xBC "
+            "\xD0\xA6\xD0\xB5\xD0\xB9\xD1\x82\xD0\xBB\xD0\xB8\xD0\xBD",
+            wxFONTENCODING_UTF8
+        },
+
+        {
+            "2006 =?UTF-8?Q?=D0=92_=D0=A6?= 2007",
+            "2006 \xD0\x92 \xD0\xA6 2007",
+            wxFONTENCODING_UTF8
+        },
+
+        {
+            "=?UTF-8?Q?=D0=92_=D0=A6?= 2007",
+            "\xD0\x92 \xD0\xA6 2007",
+            wxFONTENCODING_UTF8
+        }
     };
 
     int rc = EXIT_SUCCESS;


This was sent by the SourceForge.net collaborative development platform, the 
world's largest Open Source development site.

-------------------------------------------------------------------------
This SF.net email is sponsored by: Splunk Inc.
Still grepping through log files to find problems?  Stop.
Now Search log events and configuration files using AJAX and a browser.
Download your FREE copy of Splunk now >>  http://get.splunk.com/
_______________________________________________
Mahogany-cvsupdates mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/mahogany-cvsupdates

[M-CVS] SF.net SVN: mahogany: [7354] trunk/M

Reply via email to