moriyoshi Fri, 11 Sep 2009 08:22:19 +0000 Revision: http://svn.php.net/viewvc?view=revision&revision=288260
Log: - Fix bug #49528 (UTF-16 strings prefixed by BOM wrongly converted). Bug: http://bugs.php.net/49528 (Assigned) UTF-16 strings prefixed by BOMs wrondly converted Changed paths: U php/php-src/branches/PHP_5_3/ext/mbstring/libmbfl/filters/mbfilter_utf16.c A php/php-src/branches/PHP_5_3/ext/mbstring/tests/bug49528.phpt U php/php-src/trunk/ext/mbstring/libmbfl/filters/mbfilter_utf16.c A php/php-src/trunk/ext/mbstring/tests/bug49528.phpt Modified: php/php-src/branches/PHP_5_3/ext/mbstring/libmbfl/filters/mbfilter_utf16.c =================================================================== --- php/php-src/branches/PHP_5_3/ext/mbstring/libmbfl/filters/mbfilter_utf16.c 2009-09-11 06:43:09 UTC (rev 288259) +++ php/php-src/branches/PHP_5_3/ext/mbstring/libmbfl/filters/mbfilter_utf16.c 2009-09-11 08:22:19 UTC (rev 288260) @@ -127,7 +127,7 @@ int n, endian; endian = filter->status & 0xff00; - switch (filter->status & 0xff) { + switch (filter->status & 0x0f) { case 0: if (endian) { n = c & 0xff; @@ -144,15 +144,8 @@ n = c & 0xff; } n |= filter->cache & 0xffff; - filter->status &= ~0xff; - if (n == 0xfffe) { - if (endian) { - filter->status = 0; /* big-endian */ - } else { - filter->status = 0x100; /* little-endian */ - } - CK((*filter->output_function)(0xfeff, filter->data)); - } else if (n >= 0xd800 && n < 0xdc00) { + filter->status &= ~0x0f; + if (n >= 0xd800 && n < 0xdc00) { filter->cache = ((n & 0x3ff) << 16) + 0x400000; } else if (n >= 0xdc00 && n < 0xe000) { n &= 0x3ff; @@ -166,7 +159,21 @@ CK((*filter->output_function)(n, filter->data)); } } else { + int is_first = filter->status & 0x10; filter->cache = 0; + filter->status |= 0x10; + if (!is_first) { + if (n == 0xfffe) { + if (endian) { + filter->status &= ~0x100; /* big-endian */ + } else { + filter->status |= 0x100; /* little-endian */ + } + break; + } else if (n == 0xfeff) { + break; + } + } CK((*filter->output_function)(n, filter->data)); } break; Added: php/php-src/branches/PHP_5_3/ext/mbstring/tests/bug49528.phpt =================================================================== --- php/php-src/branches/PHP_5_3/ext/mbstring/tests/bug49528.phpt (rev 0) +++ php/php-src/branches/PHP_5_3/ext/mbstring/tests/bug49528.phpt 2009-09-11 08:22:19 UTC (rev 288260) @@ -0,0 +1,20 @@ +--TEST-- +Bug #49528 (UTF-16 strings prefixed by BOM wrongly converted) +--SKIPIF-- +<?php extension_loaded('mbstring') or die('skip mbstring not available'); ?> +--FILE-- +<?php +var_dump(bin2hex(mb_convert_encoding("\xff\xfe\x01\x02\x03\x04", "UCS-2BE", "UTF-16"))); +var_dump(bin2hex(mb_convert_encoding("\xfe\xff\x01\x02\x03\x04", "UCS-2BE", "UTF-16"))); +var_dump(bin2hex(mb_convert_encoding("\xff\xfe\xff\xfe\x01\x02\x03\x04", "UCS-2BE", "UTF-16"))); +var_dump(bin2hex(mb_convert_encoding("\xff\xfe\xfe\xff\x01\x02\x03\x04", "UCS-2BE", "UTF-16"))); +var_dump(bin2hex(mb_convert_encoding("\xfe\xff\xff\xfe\x01\x02\x03\x04", "UCS-2BE", "UTF-16"))); +var_dump(bin2hex(mb_convert_encoding("\xfe\xff\xfe\xff\x01\x02\x03\x04", "UCS-2BE", "UTF-16"))); +?> +--EXPECT-- +string(8) "02010403" +string(8) "01020304" +string(12) "feff02010403" +string(12) "fffe02010403" +string(12) "fffe01020304" +string(12) "feff01020304" Modified: php/php-src/trunk/ext/mbstring/libmbfl/filters/mbfilter_utf16.c =================================================================== --- php/php-src/trunk/ext/mbstring/libmbfl/filters/mbfilter_utf16.c 2009-09-11 06:43:09 UTC (rev 288259) +++ php/php-src/trunk/ext/mbstring/libmbfl/filters/mbfilter_utf16.c 2009-09-11 08:22:19 UTC (rev 288260) @@ -127,7 +127,7 @@ int n, endian; endian = filter->status & 0xff00; - switch (filter->status & 0xff) { + switch (filter->status & 0x0f) { case 0: if (endian) { n = c & 0xff; @@ -144,15 +144,8 @@ n = c & 0xff; } n |= filter->cache & 0xffff; - filter->status &= ~0xff; - if (n == 0xfffe) { - if (endian) { - filter->status = 0; /* big-endian */ - } else { - filter->status = 0x100; /* little-endian */ - } - CK((*filter->output_function)(0xfeff, filter->data)); - } else if (n >= 0xd800 && n < 0xdc00) { + filter->status &= ~0x0f; + if (n >= 0xd800 && n < 0xdc00) { filter->cache = ((n & 0x3ff) << 16) + 0x400000; } else if (n >= 0xdc00 && n < 0xe000) { n &= 0x3ff; @@ -166,7 +159,21 @@ CK((*filter->output_function)(n, filter->data)); } } else { + int is_first = filter->status & 0x10; filter->cache = 0; + filter->status |= 0x10; + if (!is_first) { + if (n == 0xfffe) { + if (endian) { + filter->status &= ~0x100; /* big-endian */ + } else { + filter->status |= 0x100; /* little-endian */ + } + break; + } else if (n == 0xfeff) { + break; + } + } CK((*filter->output_function)(n, filter->data)); } break; Added: php/php-src/trunk/ext/mbstring/tests/bug49528.phpt =================================================================== --- php/php-src/trunk/ext/mbstring/tests/bug49528.phpt (rev 0) +++ php/php-src/trunk/ext/mbstring/tests/bug49528.phpt 2009-09-11 08:22:19 UTC (rev 288260) @@ -0,0 +1,20 @@ +--TEST-- +Bug #49528 (UTF-16 strings prefixed by BOM wrongly converted) +--SKIPIF-- +<?php extension_loaded('mbstring') or die('skip mbstring not available'); ?> +--FILE-- +<?php +var_dump(bin2hex(mb_convert_encoding("\xff\xfe\x01\x02\x03\x04", "UCS-2BE", "UTF-16"))); +var_dump(bin2hex(mb_convert_encoding("\xfe\xff\x01\x02\x03\x04", "UCS-2BE", "UTF-16"))); +var_dump(bin2hex(mb_convert_encoding("\xff\xfe\xff\xfe\x01\x02\x03\x04", "UCS-2BE", "UTF-16"))); +var_dump(bin2hex(mb_convert_encoding("\xff\xfe\xfe\xff\x01\x02\x03\x04", "UCS-2BE", "UTF-16"))); +var_dump(bin2hex(mb_convert_encoding("\xfe\xff\xff\xfe\x01\x02\x03\x04", "UCS-2BE", "UTF-16"))); +var_dump(bin2hex(mb_convert_encoding("\xfe\xff\xfe\xff\x01\x02\x03\x04", "UCS-2BE", "UTF-16"))); +?> +--EXPECT-- +string(8) "02010403" +string(8) "01020304" +string(12) "feff02010403" +string(12) "fffe02010403" +string(12) "fffe01020304" +string(12) "feff01020304"
-- PHP CVS Mailing List (http://www.php.net/) To unsubscribe, visit: http://www.php.net/unsub.php
