It seem that your patch don't support this kind of encoded string: =?gb2312?b?<any-encoded-text?==?gb2312?b?<any-encoded-text?= Two encoded-words are not separated by any character.
--- Jeffrey Stedfast <[EMAIL PROTECTED]>wrote: > This patch is a port of my GMime rfc2047 decoder > which is even more > liberal in what it accepts than Thunderbird and is > what I will be > committing to svn. > > closing bugs: > > #302991 > #315513 > #502178 > > Jeff > > > Index: camel-mime-utils.c > =================================================================== > --- camel-mime-utils.c (revision 8315) > +++ camel-mime-utils.c (working copy) > @@ -821,116 +821,321 @@ > *in = inptr; > } > > -/* decode rfc 2047 encoded string segment */ > static char * > -rfc2047_decode_word(const char *in, size_t len) > +camel_iconv_strndup (iconv_t cd, const char > *string, size_t n) > { > - const char *inptr = in+2; > - const char *inend = in+len-2; > + size_t inleft, outleft, converted = 0; > + char *out, *outbuf; > const char *inbuf; > - const char *charset; > - char *encname, *p; > - int tmplen; > - size_t ret; > - char *decword = NULL; > - char *decoded = NULL; > - char *outbase = NULL; > - char *outbuf; > - size_t inlen, outlen; > - gboolean retried = FALSE; > - iconv_t ic; > - > - d(printf("rfc2047: decoding '%.*s'\n", len, in)); > - > - /* quick check to see if this could possibly be a > real encoded word */ > - if (len < 8 || !(in[0] == '=' && in[1] == '?' && > in[len-1] == '=' && in[len-2] == '?')) { > - d(printf("invalid\n")); > - return NULL; > - } > - > - /* skip past the charset to the encoding type */ > - inptr = memchr (inptr, '?', inend-inptr); > - if (inptr != NULL && inptr < inend + 2 && inptr[2] > == '?') { > - d(printf("found ?, encoding is '%c'\n", > inptr[0])); > - inptr++; > - tmplen = inend-inptr-2; > - decword = g_alloca (tmplen); /* this will always > be more-than-enough room */ > - switch(toupper(inptr[0])) { > - case 'Q': > - inlen = quoted_decode((const unsigned char *) > inptr+2, tmplen, (unsigned char *) decword); > - break; > - case 'B': { > - int state = 0; > - unsigned int save = 0; > - > - inlen = camel_base64_decode_step((unsigned char > *) inptr+2, tmplen, (unsigned char *) decword, > &state, &save); > - /* if state != 0 then error? */ > - break; > + size_t outlen; > + int errnosav; > + > + if (cd == (iconv_t) -1) > + return g_strndup (string, n); > + > + outlen = n * 2 + 16; > + out = g_malloc (outlen + 4); > + > + inbuf = string; > + inleft = n; > + > + do { > + errno = 0; > + outbuf = out + converted; > + outleft = outlen - converted; > + > + converted = iconv (cd, (char **) &inbuf, &inleft, > &outbuf, &outleft); > + if (converted == (size_t) -1) { > + if (errno != E2BIG && errno != EINVAL) > + goto fail; > } > - default: > - /* uhhh, unknown encoding type - probably an > invalid encoded word string */ > - return NULL; > + > + /* > + * E2BIG There is not sufficient room at > *outbuf. > + * > + * We just need to grow our outbuffer and try > again. > + */ > + > + converted = outbuf - out; > + if (errno == E2BIG) { > + outlen += inleft * 2 + 16; > + out = g_realloc (out, outlen + 4); > + outbuf = out + converted; > } > - d(printf("The encoded length = %d\n", inlen)); > - if (inlen > 0) { > - /* yuck, all this snot is to setup iconv! */ > - tmplen = inptr - in - 3; > - encname = g_alloca (tmplen + 1); > - memcpy (encname, in + 2, tmplen); > - encname[tmplen] = '\0'; > + } while (errno == E2BIG && inleft > 0); > + > + /* > + * EINVAL An incomplete multibyte sequence has > been encoun > + * tered in the input. > + * > + * We'll just have to ignore it... > + */ > + > + /* flush the iconv conversion */ > + iconv (cd, NULL, NULL, &outbuf, &outleft); > + > + /* Note: not all charsets can be nul-terminated > with a single > + nul byte. UCS2, for example, needs 2 nul > bytes and UCS4 > + needs 4. I hope that 4 nul bytes is > enough to terminate all > + multibyte charsets? */ > + > + /* nul-terminate the string */ > + memset (outbuf, 0, 4); > + > + /* reset the cd */ > + iconv (cd, NULL, NULL, NULL, NULL); > + > + return out; > + > + fail: > + > + errnosav = errno; > + > + w(g_warning ("camel_iconv_strndup: %s at byte > %lu", strerror (errno), n - inleft)); > + > + g_free (out); > + > + /* reset the cd */ > + iconv (cd, NULL, NULL, NULL, NULL); > + > + errno = errnosav; > + > + return NULL; > +} > > - /* rfc2231 updates rfc2047 encoded words... > - * The ABNF given in RFC 2047 for encoded-words > is: > - * encoded-word := "=?" charset "?" encoding > "?" encoded-text "?=" > - * This specification changes this ABNF to: > - * encoded-word := "=?" charset ["*" language] > "?" encoding "?" encoded-text "?=" > - */ > +#define is_ascii(c) isascii ((int) ((unsigned char) > (c))) > > - /* trim off the 'language' part if it's there... > */ > - p = strchr (encname, '*'); > - if (p) > - *p = '\0'; > - > - charset = e_iconv_charset_name (encname); > - > - inbuf = decword; > - > - outlen = inlen * 6 + 16; > - outbase = g_alloca (outlen); > - outbuf = outbase; > - > - retry: > - ic = e_iconv_open ("UTF-8", charset); > - if (ic != (iconv_t) -1) { > - ret = e_iconv (ic, &inbuf, &inlen, &outbuf, > &outlen); > - if (ret != (size_t) -1) { > - e_iconv (ic, NULL, 0, &outbuf, &outlen); > - *outbuf = 0; > - decoded = g_strdup (outbase); > +static char * > +decode_8bit (const char *text, size_t len, const > char *default_charset) > +{ > + const char *charsets[4] = { "UTF-8", NULL, NULL, > NULL }; > === message truncated === ___________________________________________________________ 雅虎邮箱传递新年祝福,个性贺卡送亲朋! http://cn.mail.yahoo.com/gc/index.html?entry=5&souce=mail_mailletter_tagline _______________________________________________ Evolution-hackers mailing list Evolution-hackers@gnome.org http://mail.gnome.org/mailman/listinfo/evolution-hackers