moriyoshi Tue Dec 23 21:36:22 2003 EDT
Modified files:
/php-src/ext/iconv iconv.c
Log:
- Supply some comments. Hope those help when you have to debug this dead beef.
- Fix iconv_mime_decode() to correctly handle "malformed" headers :)
- Fix iconv_mime_decode() bug, that the result is mangled when
ICONV_MIME_CONTINUE_ON_ERROR is enabled and the function encounters an
unconvertable encoded chunk.
Index: php-src/ext/iconv/iconv.c
diff -u php-src/ext/iconv/iconv.c:1.112 php-src/ext/iconv/iconv.c:1.113
--- php-src/ext/iconv/iconv.c:1.112 Tue Dec 23 12:10:54 2003
+++ php-src/ext/iconv/iconv.c Tue Dec 23 21:36:21 2003
@@ -18,7 +18,7 @@
+----------------------------------------------------------------------+
*/
-/* $Id: iconv.c,v 1.112 2003/12/23 17:10:54 moriyoshi Exp $ */
+/* $Id: iconv.c,v 1.113 2003/12/24 02:36:21 moriyoshi Exp $ */
#ifdef HAVE_CONFIG_H
#include "config.h"
@@ -1271,9 +1271,9 @@
int eos = 0;
switch (scan_stat) {
- case 0:
+ case 0: /* expecting any character */
switch (*p1) {
- case '\r':
+ case '\r': /* part of an EOL sequence? */
scan_stat = 7;
break;
@@ -1281,17 +1281,17 @@
scan_stat = 8;
break;
- case '=':
+ case '=': /* first letter of an encoded chunk
*/
encoded_word = p1;
scan_stat = 1;
break;
- case ' ': case '\t':
+ case ' ': case '\t': /* a chunk of whitespaces
*/
spaces = p1;
scan_stat = 11;
break;
- default:
+ default: /* first letter of a non-encoded word
*/
_php_iconv_appendc(pretval, *p1,
cd_pl);
encoded_word = NULL;
if ((mode &
PHP_ICONV_MIME_DECODE_STRICT)) {
@@ -1301,7 +1301,7 @@
}
break;
- case 1:
+ case 1: /* expecting a delimiter */
if (*p1 != '?') {
err = _php_iconv_appendl(pretval,
encoded_word, (size_t)((p1 + 1) - encoded_word), cd_pl);
if (err != PHP_ICONV_ERR_SUCCESS) {
@@ -1319,13 +1319,13 @@
scan_stat = 2;
break;
- case 2: /* charset name */
+ case 2: /* expecting a charset name */
switch (*p1) {
- case '?':
+ case '?': /* normal delimiter: encoding scheme
follows */
scan_stat = 3;
break;
- case '*':
+ case '*': /* new style delimiter: locale id
follows */
scan_stat = 10;
break;
}
@@ -1396,7 +1396,7 @@
}
break;
- case 3:
+ case 3: /* expecting a encoding scheme specifier */
switch (*p1) {
case 'B':
enc_scheme =
PHP_ICONV_ENC_SCHEME_BASE64;
@@ -1428,9 +1428,10 @@
}
break;
- case 4:
+ case 4: /* expecting a delimiter */
if (*p1 != '?') {
if ((mode &
PHP_ICONV_MIME_DECODE_CONTINUE_ON_ERROR)) {
+ /* pass the entire chunk through the
converter */
err = _php_iconv_appendl(pretval,
encoded_word, (size_t)((p1 + 1) - encoded_word), cd_pl);
if (err != PHP_ICONV_ERR_SUCCESS) {
goto out;
@@ -1451,14 +1452,14 @@
scan_stat = 5;
break;
- case 5:
+ case 5: /* expecting an encoded portion */
if (*p1 == '?') {
encoded_text_len = (size_t)(p1 - encoded_text);
scan_stat = 6;
}
break;
- case 7:
+ case 7: /* expecting a "\n" character */
if (*p1 == '\n') {
scan_stat = 8;
} else {
@@ -1469,7 +1470,8 @@
}
break;
- case 8:
+ case 8: /* checking whether the following line is part of a
+ folded header */
if (*p1 != ' ' && *p1 != '\t') {
--p1;
str_left = 1; /* quit_loop */
@@ -1482,9 +1484,10 @@
scan_stat = 11;
break;
- case 6:
+ case 6: /* expecting a End-Of-Chunk character "=" */
if (*p1 != '=') {
if ((mode &
PHP_ICONV_MIME_DECODE_CONTINUE_ON_ERROR)) {
+ /* pass the entire chunk through the
converter */
err = _php_iconv_appendl(pretval,
encoded_word, (size_t)((p1 + 1) - encoded_word), cd_pl);
if (err != PHP_ICONV_ERR_SUCCESS) {
goto out;
@@ -1508,11 +1511,20 @@
break;
}
- case 9:
+ case 9: /* choice point, seeing what to do next.*/
switch (*p1) {
default:
+ /* Handle non-RFC-compliant formats
+ *
+ * RFC2047 requires the character that
comes right
+ * after an encoded word (chunk) to be
a whitespace,
+ * while there are lots of broken
implementations that
+ * generate such malformed headers
that don't fulfill
+ * that requirement.
+ */
if (!eos) {
if ((mode &
PHP_ICONV_MIME_DECODE_STRICT)) {
+ /* pass the entire
chunk through the converter */
err =
_php_iconv_appendl(pretval, encoded_word, (size_t)((p1 + 1) - encoded_word), cd_pl);
if (err !=
PHP_ICONV_ERR_SUCCESS) {
goto out;
@@ -1544,6 +1556,7 @@
if (decoded_text == NULL) {
if ((mode &
PHP_ICONV_MIME_DECODE_CONTINUE_ON_ERROR)) {
+ /* pass the entire
chunk through the converter */
err =
_php_iconv_appendl(pretval, encoded_word, (size_t)((p1 + 1) - encoded_word), cd_pl);
if (err !=
PHP_ICONV_ERR_SUCCESS) {
goto out;
@@ -1566,29 +1579,24 @@
if (err != PHP_ICONV_ERR_SUCCESS) {
if ((mode &
PHP_ICONV_MIME_DECODE_CONTINUE_ON_ERROR)) {
+ /* pass the entire
chunk through the converter */
err =
_php_iconv_appendl(pretval, encoded_word, (size_t)((p1 + 1) - encoded_word), cd_pl);
if (err !=
PHP_ICONV_ERR_SUCCESS) {
goto out;
}
encoded_word = NULL;
- if ((mode &
PHP_ICONV_MIME_DECODE_STRICT)) {
- scan_stat = 12;
- } else {
- scan_stat = 0;
- }
- break;
} else {
goto out;
}
}
- if (eos) {
+ if (eos) { /* reached end-of-string.
done. */
scan_stat = 0;
break;
}
switch (*p1) {
- case '\r':
+ case '\r': /* part of an EOL
sequence? */
scan_stat = 7;
break;
@@ -1596,16 +1604,16 @@
scan_stat = 8;
break;
- case '=':
+ case '=': /* first letter of
an encoded chunk */
scan_stat = 1;
break;
- case ' ': case '\t':
+ case ' ': case '\t': /* medial
whitespaces */
spaces = p1;
scan_stat = 11;
break;
- default:
+ default: /* first letter of a
non-encoded word */
_php_iconv_appendc(pretval, *p1, cd_pl);
scan_stat = 12;
break;
@@ -1614,15 +1622,15 @@
}
break;
- case 10: /* language spec */
+ case 10: /* expects a language specifier. dismiss it for now */
if (*p1 == '?') {
scan_stat = 3;
}
break;
- case 11:
+ case 11: /* expecting a chunk of whitespaces */
switch (*p1) {
- case '\r':
+ case '\r': /* part of an EOL sequence? */
scan_stat = 7;
break;
@@ -1630,7 +1638,7 @@
scan_stat = 8;
break;
- case '=':
+ case '=': /* first letter of an encoded chunk
*/
if (spaces != NULL && encoded_word ==
NULL) {
_php_iconv_appendl(pretval,
spaces, (size_t)(p1 - spaces), cd_pl);
spaces = NULL;
@@ -1642,7 +1650,7 @@
case ' ': case '\t':
break;
- default: /* beginning of a word delimited by
white spaces */
+ default: /* first letter of a non-encoded word
*/
if (spaces != NULL) {
_php_iconv_appendl(pretval,
spaces, (size_t)(p1 - spaces), cd_pl);
spaces = NULL;
@@ -1658,9 +1666,9 @@
}
break;
- case 12:
+ case 12: /* expecting a non-encoded word */
switch (*p1) {
- case '\r':
+ case '\r': /* part of an EOL sequence? */
scan_stat = 7;
break;
@@ -1673,6 +1681,14 @@
scan_stat = 11;
break;
+ case '=': /* first letter of an encoded chunk
*/
+ if (!(mode &
PHP_ICONV_MIME_DECODE_STRICT)) {
+ encoded_word = p1;
+ scan_stat = 1;
+ break;
+ }
+ /* break is omitted intentionally */
+
default:
_php_iconv_appendc(pretval, *p1,
cd_pl);
break;
@@ -1680,7 +1696,6 @@
break;
}
}
-
switch (scan_stat) {
case 0: case 8: case 11: case 12:
break;-- PHP CVS Mailing List (http://www.php.net/) To unsubscribe, visit: http://www.php.net/unsub.php
