hirokawa Sun Jul 13 03:34:28 2003 EDT Modified files: (Branch: PHP_4_3) /php-src/ext/mbstring mbfilter.c mbfilter.h mbstring.c Log: added strict detection mode in mb_detect_encoding to detect corrupted byte sequence. Index: php-src/ext/mbstring/mbfilter.c diff -u php-src/ext/mbstring/mbfilter.c:1.52.2.4 php-src/ext/mbstring/mbfilter.c:1.52.2.5 --- php-src/ext/mbstring/mbfilter.c:1.52.2.4 Fri May 30 09:20:17 2003 +++ php-src/ext/mbstring/mbfilter.c Sun Jul 13 03:34:27 2003 @@ -80,7 +80,7 @@ * */ -/* $Id: mbfilter.c,v 1.52.2.4 2003/05/30 13:20:17 iliaa Exp $ */ +/* $Id: mbfilter.c,v 1.52.2.5 2003/07/13 07:34:27 hirokawa Exp $ */ #ifdef HAVE_CONFIG_H @@ -7473,7 +7473,7 @@ * identify encoding */ const mbfl_encoding * -mbfl_identify_encoding(mbfl_string *string, enum mbfl_no_encoding *elist, int eliztsz TSRMLS_DC) +mbfl_identify_encoding(mbfl_string *string, enum mbfl_no_encoding *elist, int eliztsz, int strict TSRMLS_DC) { int i, n, num, bad; unsigned char *p; @@ -7517,7 +7517,7 @@ } i++; } - if ((num - 1) <= bad) { + if ((num - 1) <= bad && !strict) { break; } p++; @@ -7556,11 +7556,11 @@ } const char* -mbfl_identify_encoding_name(mbfl_string *string, enum mbfl_no_encoding *elist, int eliztsz TSRMLS_DC) +mbfl_identify_encoding_name(mbfl_string *string, enum mbfl_no_encoding *elist, int eliztsz, int strict TSRMLS_DC) { const mbfl_encoding *encoding; - encoding = mbfl_identify_encoding(string, elist, eliztsz TSRMLS_CC); + encoding = mbfl_identify_encoding(string, elist, eliztsz, strict TSRMLS_CC); if (encoding != NULL && encoding->no_encoding > mbfl_no_encoding_charset_min && encoding->no_encoding < mbfl_no_encoding_charset_max) { @@ -7575,7 +7575,7 @@ { const mbfl_encoding *encoding; - encoding = mbfl_identify_encoding(string, elist, eliztsz TSRMLS_CC); + encoding = mbfl_identify_encoding(string, elist, eliztsz, 0 TSRMLS_CC); if (encoding != NULL && encoding->no_encoding > mbfl_no_encoding_charset_min && encoding->no_encoding < mbfl_no_encoding_charset_max) { Index: php-src/ext/mbstring/mbfilter.h diff -u php-src/ext/mbstring/mbfilter.h:1.17 php-src/ext/mbstring/mbfilter.h:1.17.2.1 --- php-src/ext/mbstring/mbfilter.h:1.17 Tue Nov 12 14:24:45 2002 +++ php-src/ext/mbstring/mbfilter.h Sun Jul 13 03:34:28 2003 @@ -86,7 +86,7 @@ * */ -/* $Id: mbfilter.h,v 1.17 2002/11/12 19:24:45 moriyoshi Exp $ */ +/* $Id: mbfilter.h,v 1.17.2.1 2003/07/13 07:34:28 hirokawa Exp $ */ #ifndef MBFL_MBFILTER_H @@ -460,10 +460,10 @@ * identify encoding */ const mbfl_encoding * -mbfl_identify_encoding(mbfl_string *string, enum mbfl_no_encoding *elist, int eliztsz TSRMLS_DC); +mbfl_identify_encoding(mbfl_string *string, enum mbfl_no_encoding *elist, int eliztsz, int strict TSRMLS_DC); const char * -mbfl_identify_encoding_name(mbfl_string *string, enum mbfl_no_encoding *elist, int eliztsz TSRMLS_DC); +mbfl_identify_encoding_name(mbfl_string *string, enum mbfl_no_encoding *elist, int eliztsz, int strict TSRMLS_DC); const enum mbfl_no_encoding mbfl_identify_encoding_no(mbfl_string *string, enum mbfl_no_encoding *elist, int eliztsz TSRMLS_DC); Index: php-src/ext/mbstring/mbstring.c diff -u php-src/ext/mbstring/mbstring.c:1.142.2.19 php-src/ext/mbstring/mbstring.c:1.142.2.20 --- php-src/ext/mbstring/mbstring.c:1.142.2.19 Sat Jun 28 07:00:37 2003 +++ php-src/ext/mbstring/mbstring.c Sun Jul 13 03:34:28 2003 @@ -17,7 +17,7 @@ +----------------------------------------------------------------------+ */ -/* $Id: mbstring.c,v 1.142.2.19 2003/06/28 11:00:37 hirokawa Exp $ */ +/* $Id: mbstring.c,v 1.142.2.20 2003/07/13 07:34:28 hirokawa Exp $ */ /* * PHP4 Multibyte String module "mbstring" @@ -2694,15 +2694,15 @@ } /* }}} */ -/* {{{ proto string mb_detect_encoding(string str [, mixed encoding_list]) +/* {{{ proto string mb_detect_encoding(string str [, mixed encoding_list [, bool strict]]) Encodings of the given string is returned (as a string) */ PHP_FUNCTION(mb_detect_encoding) { - pval **arg_str, **arg_list; + pval **arg_str, **arg_list, **arg_strict; mbfl_string string; const char *ret; enum mbfl_no_encoding *elist; - int size, *list; + int size, *list, strict = 0; if (ZEND_NUM_ARGS() == 1) { if (zend_get_parameters_ex(1, &arg_str) == FAILURE) { @@ -2712,6 +2712,10 @@ if (zend_get_parameters_ex(2, &arg_str, &arg_list) == FAILURE) { WRONG_PARAM_COUNT; } + } else if (ZEND_NUM_ARGS() == 3) { + if (zend_get_parameters_ex(3, &arg_str, &arg_list, &arg_strict) == FAILURE) { + WRONG_PARAM_COUNT; + } } else { WRONG_PARAM_COUNT; } @@ -2719,7 +2723,7 @@ /* make encoding list */ list = NULL; size = 0; - if (ZEND_NUM_ARGS() >= 2) { + if (ZEND_NUM_ARGS() >= 2 && Z_STRVAL_PP(arg_list)) { switch (Z_TYPE_PP(arg_list)) { case IS_ARRAY: if (!php_mb_parse_encoding_array(*arg_list, &list, &size, 0)) { @@ -2744,6 +2748,11 @@ } } + if (ZEND_NUM_ARGS() == 3) { + convert_to_long_ex(arg_strict); + strict = Z_LVAL_PP(arg_strict); + } + if (size > 0 && list != NULL) { elist = list; } else { @@ -2756,7 +2765,7 @@ string.no_language = MBSTRG(current_language); string.val = (unsigned char *)Z_STRVAL_PP(arg_str); string.len = Z_STRLEN_PP(arg_str); - ret = mbfl_identify_encoding_name(&string, elist, size TSRMLS_CC); + ret = mbfl_identify_encoding_name(&string, elist, size, strict TSRMLS_CC); if (list != NULL) { efree((void *)list); }
-- PHP CVS Mailing List (http://www.php.net/) To unsubscribe, visit: http://www.php.net/unsub.php