Re: [Evolution-hackers] improved rfc2047 decode patch

jacky Wed, 26 Dec 2007 08:22:45 -0800

It seem that your patch don't support this kind of
encoded string:
=?gb2312?b?<any-encoded-text?==?gb2312?b?<any-encoded-text?=
Two encoded-words are not separated by any character.


--- Jeffrey Stedfast <[EMAIL PROTECTED]>wrote:

> This patch is a port of my GMime rfc2047 decoder
> which is even more
> liberal in what it accepts than Thunderbird and is
> what I will be
> committing to svn.
> 
> closing bugs:
> 
> #302991
> #315513
> #502178
> 
> Jeff
> 
> > Index: camel-mime-utils.c
>
===================================================================
> --- camel-mime-utils.c        (revision 8315)
> +++ camel-mime-utils.c        (working copy)
> @@ -821,116 +821,321 @@
>       *in = inptr;
>  }
>  
> -/* decode rfc 2047 encoded string segment */
>  static char *
> -rfc2047_decode_word(const char *in, size_t len)
> +camel_iconv_strndup (iconv_t cd, const char
> *string, size_t n)
>  {
> -     const char *inptr = in+2;
> -     const char *inend = in+len-2;
> +     size_t inleft, outleft, converted = 0;
> +     char *out, *outbuf;
>       const char *inbuf;
> -     const char *charset;
> -     char *encname, *p;
> -     int tmplen;
> -     size_t ret;
> -     char *decword = NULL;
> -     char *decoded = NULL;
> -     char *outbase = NULL;
> -     char *outbuf;
> -     size_t inlen, outlen;
> -     gboolean retried = FALSE;
> -     iconv_t ic;
> -
> -     d(printf("rfc2047: decoding '%.*s'\n", len, in));
> -
> -     /* quick check to see if this could possibly be a
> real encoded word */
> -     if (len < 8 || !(in[0] == '=' && in[1] == '?' &&
> in[len-1] == '=' && in[len-2] == '?')) {
> -             d(printf("invalid\n"));
> -             return NULL;
> -     }
> -
> -     /* skip past the charset to the encoding type */
> -     inptr = memchr (inptr, '?', inend-inptr);
> -     if (inptr != NULL && inptr < inend + 2 && inptr[2]
> == '?') {
> -             d(printf("found ?, encoding is '%c'\n",
> inptr[0]));
> -             inptr++;
> -             tmplen = inend-inptr-2;
> -             decword = g_alloca (tmplen); /* this will always
> be more-than-enough room */
> -             switch(toupper(inptr[0])) {
> -             case 'Q':
> -                     inlen = quoted_decode((const unsigned char *)
> inptr+2, tmplen, (unsigned char *) decword);
> -                     break;
> -             case 'B': {
> -                     int state = 0;
> -                     unsigned int save = 0;
> -
> -                     inlen = camel_base64_decode_step((unsigned char
> *) inptr+2, tmplen, (unsigned char *) decword,
> &state, &save);
> -                     /* if state != 0 then error? */
> -                     break;
> +     size_t outlen;
> +     int errnosav;
> +     
> +     if (cd == (iconv_t) -1)
> +             return g_strndup (string, n);
> +     
> +     outlen = n * 2 + 16;
> +     out = g_malloc (outlen + 4);
> +     
> +     inbuf = string;
> +     inleft = n;
> +     
> +     do {
> +             errno = 0;
> +             outbuf = out + converted;
> +             outleft = outlen - converted;
> +             
> +             converted = iconv (cd, (char **) &inbuf, &inleft,
> &outbuf, &outleft);
> +             if (converted == (size_t) -1) {
> +                     if (errno != E2BIG && errno != EINVAL)
> +                             goto fail;
>               }
> -             default:
> -                     /* uhhh, unknown encoding type - probably an
> invalid encoded word string */
> -                     return NULL;
> +             
> +             /*
> +              * E2BIG   There is not sufficient room at
> *outbuf.
> +              *
> +              * We just need to grow our outbuffer and try
> again.
> +              */
> +             
> +             converted = outbuf - out;
> +             if (errno == E2BIG) {
> +                     outlen += inleft * 2 + 16;
> +                     out = g_realloc (out, outlen + 4);
> +                     outbuf = out + converted;
>               }
> -             d(printf("The encoded length = %d\n", inlen));
> -             if (inlen > 0) {
> -                     /* yuck, all this snot is to setup iconv! */
> -                     tmplen = inptr - in - 3;
> -                     encname = g_alloca (tmplen + 1);
> -                     memcpy (encname, in + 2, tmplen);
> -                     encname[tmplen] = '\0';
> +     } while (errno == E2BIG && inleft > 0);
> +     
> +     /*
> +      * EINVAL  An  incomplete  multibyte sequence has
> been encoun
> +      *         tered in the input.
> +      *
> +      * We'll just have to ignore it...
> +      */
> +     
> +     /* flush the iconv conversion */
> +     iconv (cd, NULL, NULL, &outbuf, &outleft);
> +     
> +     /* Note: not all charsets can be nul-terminated
> with a single
> +           nul byte. UCS2, for example, needs 2 nul
> bytes and UCS4
> +           needs 4. I hope that 4 nul bytes is
> enough to terminate all
> +           multibyte charsets? */
> +     
> +     /* nul-terminate the string */
> +     memset (outbuf, 0, 4);
> +     
> +     /* reset the cd */
> +     iconv (cd, NULL, NULL, NULL, NULL);
> +     
> +     return out;
> +     
> + fail:
> +     
> +     errnosav = errno;
> +     
> +     w(g_warning ("camel_iconv_strndup: %s at byte
> %lu", strerror (errno), n - inleft));
> +     
> +     g_free (out);
> +     
> +     /* reset the cd */
> +     iconv (cd, NULL, NULL, NULL, NULL);
> +     
> +     errno = errnosav;
> +     
> +     return NULL;
> +}
>  
> -                     /* rfc2231 updates rfc2047 encoded words...
> -                      * The ABNF given in RFC 2047 for encoded-words
> is:
> -                      *   encoded-word := "=?" charset "?" encoding
> "?" encoded-text "?="
> -                      * This specification changes this ABNF to:
> -                      *   encoded-word := "=?" charset ["*" language]
> "?" encoding "?" encoded-text "?="
> -                      */
> +#define is_ascii(c) isascii ((int) ((unsigned char)
> (c)))
>  
> -                     /* trim off the 'language' part if it's there...
> */
> -                     p = strchr (encname, '*');
> -                     if (p)
> -                             *p = '\0';
> -
> -                     charset = e_iconv_charset_name (encname);
> -
> -                     inbuf = decword;
> -
> -                     outlen = inlen * 6 + 16;
> -                     outbase = g_alloca (outlen);
> -                     outbuf = outbase;
> -
> -             retry:
> -                     ic = e_iconv_open ("UTF-8", charset);
> -                     if (ic != (iconv_t) -1) {
> -                             ret = e_iconv (ic, &inbuf, &inlen, &outbuf,
> &outlen);
> -                             if (ret != (size_t) -1) {
> -                                     e_iconv (ic, NULL, 0, &outbuf, &outlen);
> -                                     *outbuf = 0;
> -                                     decoded = g_strdup (outbase);
> +static char *
> +decode_8bit (const char *text, size_t len, const
> char *default_charset)
> +{
> +     const char *charsets[4] = { "UTF-8", NULL, NULL,
> NULL };
> 
=== message truncated ===



      ___________________________________________________________ 
雅虎邮箱传递新年祝福，个性贺卡送亲朋！ 
http://cn.mail.yahoo.com/gc/index.html?entry=5&souce=mail_mailletter_tagline
_______________________________________________
Evolution-hackers mailing list
Evolution-hackers@gnome.org
http://mail.gnome.org/mailman/listinfo/evolution-hackers

Re: [Evolution-hackers] improved rfc2047 decode patch

Reply via email to