On Fri, 14 May 2010, Igor Shenderovich wrote:
> Hello all,
> 
> I'm using the latest version of notmuch (cloned from git on May 13), but I
> can't handle with utf-8 symbols in the authors field. For example, I have a
> letter with the field
> 
> "authors":
> "=?UTF-8?B?Z3JpZmZvbiAtINCa0L7QvNC80LXQvdGC0LDRgNC40Lkg0LIg0JbQlg==?=",
> 
> (got it from usual emacs interface).
> 
> However, the body of this letter is pretty readable (it also contains some
> utf-8 characters).
> 
> What should one do to see the true list of authors?

Hi,

I encounter the same when headers are not encoded properly according to
RFC 2047. I commonly see the violation of section 5, paragraph (3),
sentence "An 'encoded-word' MUST NOT appear within a 'quoted-string'".
That is when the encoded word is enclosed in double quotes. I guess, the
"problem" is not only notmuch related, but all users of gmime library
must be affected.

I use the following patch for notmuch to sanitize headers from a popular
mailing list server in Czech republic:

Cheers,
Michal



From: Michal Sojka <[email protected]>
Subject: Fix broken headers from pandora.cz


---
 lib/message-file.c |   34 ++++++++++++++++++++++++++++++++++
 1 files changed, 34 insertions(+), 0 deletions(-)

diff --git a/lib/message-file.c b/lib/message-file.c
index 7722832..abfedc1 100644
--- a/lib/message-file.c
+++ b/lib/message-file.c
@@ -42,6 +42,7 @@ struct _notmuch_message_file {
     int broken_headers;
     int good_headers;
     size_t header_size; /* Length of full message header in bytes. */
+    notmuch_bool_t pandora_cz_quirk;

     /* Parsing state */
     char *line;
@@ -324,7 +325,40 @@ notmuch_message_file_get_header (notmuch_message_file_t 
*message,
        else
            match = (strcasecmp (header, header_desired) == 0);

+       if (strstr(message->value.str, "=40pandora=2Ecz=29") ||
+           strstr(message->value.str, "@pandora.cz") ||
+           message->pandora_cz_quirk)
+       {
+           char *quote = message->value.str;
+           message->pandora_cz_quirk = TRUE;
+           if (*quote == '"') {
+               int len = strlen(quote);
+               bcopy(quote+1, quote, len);
+               quote = strchr(quote, '"');
+               if (quote) {
+                   len = strlen(quote);
+                   bcopy(quote+1, quote, len);
+               }
+           }
+       }
+
        decoded_value = g_mime_utils_header_decode_text (message->value.str);
+
+       if (message->pandora_cz_quirk &&
+           strcasecmp (header, "From") == 0)
+       {
+           /* remove "(<conf>@pandora.cz)" */
+           char *langle = strchr(decoded_value, '<');
+           if (langle) {
+               char *comment = langle - 2;
+               if (comment > decoded_value && *comment == ')')
+                   while (comment > decoded_value && *comment != '(')
+                       comment--;
+               if (comment > decoded_value)
+                   bcopy(langle, comment, strlen(langle)+1);
+           }
+       }
+
        header_sofar = (char *)g_hash_table_lookup (message->headers, header);
        /* we treat the Received: header special - we want to concat ALL of 
         * the Received: headers we encounter.
-- 
tg: (417274d..) t/Fix-broken-headers-from-pandora.cz (depends on: master)


Reply via email to