moriyoshi Thu Jul 31 17:36:35 2008 UTC Modified files: /php-src/ext/mbstring/libmbfl/mbfl mbfl_convert.c /php-src/ext/mbstring/tests mb_substitute_character.phpt Log: - Fixed bug #44617 (wrong HTML entity output when substitute_character=entity) http://cvs.php.net/viewvc.cgi/php-src/ext/mbstring/libmbfl/mbfl/mbfl_convert.c?r1=1.8&r2=1.9&diff_format=u Index: php-src/ext/mbstring/libmbfl/mbfl/mbfl_convert.c diff -u php-src/ext/mbstring/libmbfl/mbfl/mbfl_convert.c:1.8 php-src/ext/mbstring/libmbfl/mbfl/mbfl_convert.c:1.9 --- php-src/ext/mbstring/libmbfl/mbfl/mbfl_convert.c:1.8 Mon Sep 24 11:50:54 2007 +++ php-src/ext/mbstring/libmbfl/mbfl/mbfl_convert.c Thu Jul 31 17:36:35 2008 @@ -356,22 +356,6 @@ return 0; } -#if 0 -static int -mbfl_convert_filter_strncat(mbfl_convert_filter *filter, const unsigned char *p, - int n) -{ - while (n > 0) { - if ((*filter->filter_function)(*p++, filter) < 0) { - return -1; - } - n--; - } - - return n; -} -#endif - /* illegal character output function for conv-filter */ int mbfl_filt_conv_illegal_output(int c, mbfl_convert_filter *filter) @@ -386,14 +370,9 @@ ret = (*filter->filter_function)(filter->illegal_substchar, filter); break; case MBFL_OUTPUTFILTER_ILLEGAL_MODE_LONG: - case MBFL_OUTPUTFILTER_ILLEGAL_MODE_ENTITY: if (c >= 0) { if (c < MBFL_WCSGROUP_UCS4MAX) { /* unicode */ - if (mode_backup == MBFL_OUTPUTFILTER_ILLEGAL_MODE_LONG) { - ret = mbfl_convert_filter_strcat(filter, (const unsigned char *)"U+"); - } else { /* entity */ - ret = mbfl_convert_filter_strcat(filter, (const unsigned char *)"&#"); - } + ret = mbfl_convert_filter_strcat(filter, (const unsigned char *)"U+"); } else { if (c < MBFL_WCSGROUP_WCHARMAX) { m = c & ~MBFL_WCSPLANE_MASK; @@ -437,9 +416,38 @@ if (m == 0 && ret >= 0) { ret = (*filter->filter_function)(mbfl_hexchar_table[0], filter); } - if (mode_backup == MBFL_OUTPUTFILTER_ILLEGAL_MODE_ENTITY) { - ret = mbfl_convert_filter_strcat(filter, (const unsigned char *)";"); + } + } + break; + case MBFL_OUTPUTFILTER_ILLEGAL_MODE_ENTITY: + if (c >= 0) { + if (c < MBFL_WCSGROUP_UCS4MAX) { /* unicode */ + ret = mbfl_convert_filter_strcat(filter, (const unsigned char *)"&#x"); + if (ret < 0) + break; + + m = 0; + r = 28; + while (r >= 0) { + n = (c >> r) & 0xf; + if (n || m) { + m = 1; + ret = (*filter->filter_function)(mbfl_hexchar_table[n], filter); + if (ret < 0) { + break; + } + } + r -= 4; + } + if (ret < 0) { + break; } + if (m == 0) { + ret = (*filter->filter_function)(mbfl_hexchar_table[0], filter); + } + ret = mbfl_convert_filter_strcat(filter, (const unsigned char *)";"); + } else { + ret = (*filter->filter_function)(filter->illegal_substchar, filter); } } break; @@ -461,8 +469,8 @@ to == mbfl_no_encoding_7bit) { from = mbfl_no_encoding_8bit; } else if (from == mbfl_no_encoding_base64 || - from == mbfl_no_encoding_qprint || - from == mbfl_no_encoding_uuencode) { + from == mbfl_no_encoding_qprint || + from == mbfl_no_encoding_uuencode) { to = mbfl_no_encoding_8bit; } http://cvs.php.net/viewvc.cgi/php-src/ext/mbstring/tests/mb_substitute_character.phpt?r1=1.2&r2=1.3&diff_format=u Index: php-src/ext/mbstring/tests/mb_substitute_character.phpt diff -u php-src/ext/mbstring/tests/mb_substitute_character.phpt:1.2 php-src/ext/mbstring/tests/mb_substitute_character.phpt:1.3 --- php-src/ext/mbstring/tests/mb_substitute_character.phpt:1.2 Wed Oct 30 08:06:52 2002 +++ php-src/ext/mbstring/tests/mb_substitute_character.phpt Thu Jul 31 17:36:35 2008 @@ -10,40 +10,36 @@ // Note: It does not return TRUE/FALSE for setting char -// Use Unicode val -$r = mb_substitute_character(0x3013); -//$r = mb_substitute_character('U+3013'); -($r === TRUE) ? print "OK_UTF\n" : print("NG_UTF: ".gettype($r)." $r\n"); -print mb_substitute_character() . "\n"; - - -// Use "long" -$r = mb_substitute_character('long'); -($r === TRUE) ? print "OK_LONG\n" : print("NG_LONG: ".gettype($r)." $r\n"); -print mb_substitute_character() . "\n"; - - -// Use "none" -$r = mb_substitute_character('none'); -($r === TRUE) ? print "OK_NONE\n" : print("NG_NONE: ".gettype($r)." $r\n"); -print mb_substitute_character() . "\n"; - - -// Set invalid string. Should fail. -print "== INVALID PARAMETER ==\n"; -$r = mb_substitute_character('BAD_NAME'); -($r === FALSE) ? print "OK_BAD_NAME\n" : print("NG_BAD_NAME: ".gettype($r)." $r\n"); +var_dump(mb_substitute_character(0x3044)); +var_dump(mb_substitute_character()); +var_dump(bin2hex(mb_convert_encoding("\xe2\x99\xa0\xe3\x81\x82", "CP932", "UTF-8"))); + +var_dump(mb_substitute_character('long')); +var_dump(mb_substitute_character()); +var_dump(bin2hex(mb_convert_encoding("\xe2\x99\xa0\xe3\x81\x82", "CP932", "UTF-8"))); + +var_dump(mb_substitute_character('none')); +var_dump(mb_substitute_character()); +var_dump(bin2hex(mb_convert_encoding("\xe2\x99\xa0\xe3\x81\x82", "CP932", "UTF-8"))); + +var_dump(mb_substitute_character('entity')); +var_dump(mb_substitute_character()); +var_dump(bin2hex(mb_convert_encoding("\xe2\x99\xa0\xe3\x81\x82", "CP932", "UTF-8"))); +var_dump(mb_substitute_character('BAD_NAME')); ?> - --EXPECT-- -OK_UTF -12307 -OK_LONG -long -OK_NONE -none -== INVALID PARAMETER == +bool(true) +int(12356) +string(8) "82a282a0" +bool(true) +string(4) "long" +string(16) "552b3236363082a0" +bool(true) +string(4) "none" +string(4) "82a0" +bool(true) +string(6) "entity" +string(20) "262378323636303b82a0" ERR: Warning -OK_BAD_NAME - +bool(false)
-- PHP CVS Mailing List (http://www.php.net/) To unsubscribe, visit: http://www.php.net/unsub.php