moriyoshi                                Fri, 11 Sep 2009 08:22:19 +0000

Revision: http://svn.php.net/viewvc?view=revision&revision=288260

Log:
- Fix bug #49528 (UTF-16 strings prefixed by BOM wrongly converted).

Bug: http://bugs.php.net/49528 (Assigned) UTF-16 strings prefixed by BOMs 
wrondly converted
      
Changed paths:
    U   
php/php-src/branches/PHP_5_3/ext/mbstring/libmbfl/filters/mbfilter_utf16.c
    A   php/php-src/branches/PHP_5_3/ext/mbstring/tests/bug49528.phpt
    U   php/php-src/trunk/ext/mbstring/libmbfl/filters/mbfilter_utf16.c
    A   php/php-src/trunk/ext/mbstring/tests/bug49528.phpt

Modified: 
php/php-src/branches/PHP_5_3/ext/mbstring/libmbfl/filters/mbfilter_utf16.c
===================================================================
--- php/php-src/branches/PHP_5_3/ext/mbstring/libmbfl/filters/mbfilter_utf16.c  
2009-09-11 06:43:09 UTC (rev 288259)
+++ php/php-src/branches/PHP_5_3/ext/mbstring/libmbfl/filters/mbfilter_utf16.c  
2009-09-11 08:22:19 UTC (rev 288260)
@@ -127,7 +127,7 @@
        int n, endian;

        endian = filter->status & 0xff00;
-       switch (filter->status & 0xff) {
+       switch (filter->status & 0x0f) {
        case 0:
                if (endian) {
                        n = c & 0xff;
@@ -144,15 +144,8 @@
                        n = c & 0xff;
                }
                n |= filter->cache & 0xffff;
-               filter->status &= ~0xff;
-               if (n == 0xfffe) {
-                       if (endian) {
-                               filter->status = 0;             /* big-endian */
-                       } else {
-                               filter->status = 0x100;         /* 
little-endian */
-                       }
-                       CK((*filter->output_function)(0xfeff, filter->data));
-               } else if (n >= 0xd800 && n < 0xdc00) {
+               filter->status &= ~0x0f;
+               if (n >= 0xd800 && n < 0xdc00) {
                        filter->cache = ((n & 0x3ff) << 16) + 0x400000;
                } else if (n >= 0xdc00 && n < 0xe000) {
                        n &= 0x3ff;
@@ -166,7 +159,21 @@
                                CK((*filter->output_function)(n, filter->data));
                        }
                } else {
+                       int is_first = filter->status & 0x10;
                        filter->cache = 0;
+                       filter->status |= 0x10;
+                       if (!is_first) {
+                               if (n == 0xfffe) {
+                                       if (endian) {
+                                               filter->status &= ~0x100;       
        /* big-endian */
+                                       } else {
+                                               filter->status |= 0x100;        
        /* little-endian */
+                                       }
+                                       break;
+                               } else if (n == 0xfeff) {
+                                       break;
+                               }
+                       }
                        CK((*filter->output_function)(n, filter->data));
                }
                break;

Added: php/php-src/branches/PHP_5_3/ext/mbstring/tests/bug49528.phpt
===================================================================
--- php/php-src/branches/PHP_5_3/ext/mbstring/tests/bug49528.phpt               
                (rev 0)
+++ php/php-src/branches/PHP_5_3/ext/mbstring/tests/bug49528.phpt       
2009-09-11 08:22:19 UTC (rev 288260)
@@ -0,0 +1,20 @@
+--TEST--
+Bug #49528 (UTF-16 strings prefixed by BOM wrongly converted)
+--SKIPIF--
+<?php extension_loaded('mbstring') or die('skip mbstring not available'); ?>
+--FILE--
+<?php
+var_dump(bin2hex(mb_convert_encoding("\xff\xfe\x01\x02\x03\x04", "UCS-2BE", 
"UTF-16")));
+var_dump(bin2hex(mb_convert_encoding("\xfe\xff\x01\x02\x03\x04", "UCS-2BE", 
"UTF-16")));
+var_dump(bin2hex(mb_convert_encoding("\xff\xfe\xff\xfe\x01\x02\x03\x04", 
"UCS-2BE", "UTF-16")));
+var_dump(bin2hex(mb_convert_encoding("\xff\xfe\xfe\xff\x01\x02\x03\x04", 
"UCS-2BE", "UTF-16")));
+var_dump(bin2hex(mb_convert_encoding("\xfe\xff\xff\xfe\x01\x02\x03\x04", 
"UCS-2BE", "UTF-16")));
+var_dump(bin2hex(mb_convert_encoding("\xfe\xff\xfe\xff\x01\x02\x03\x04", 
"UCS-2BE", "UTF-16")));
+?>
+--EXPECT--
+string(8) "02010403"
+string(8) "01020304"
+string(12) "feff02010403"
+string(12) "fffe02010403"
+string(12) "fffe01020304"
+string(12) "feff01020304"

Modified: php/php-src/trunk/ext/mbstring/libmbfl/filters/mbfilter_utf16.c
===================================================================
--- php/php-src/trunk/ext/mbstring/libmbfl/filters/mbfilter_utf16.c     
2009-09-11 06:43:09 UTC (rev 288259)
+++ php/php-src/trunk/ext/mbstring/libmbfl/filters/mbfilter_utf16.c     
2009-09-11 08:22:19 UTC (rev 288260)
@@ -127,7 +127,7 @@
        int n, endian;

        endian = filter->status & 0xff00;
-       switch (filter->status & 0xff) {
+       switch (filter->status & 0x0f) {
        case 0:
                if (endian) {
                        n = c & 0xff;
@@ -144,15 +144,8 @@
                        n = c & 0xff;
                }
                n |= filter->cache & 0xffff;
-               filter->status &= ~0xff;
-               if (n == 0xfffe) {
-                       if (endian) {
-                               filter->status = 0;             /* big-endian */
-                       } else {
-                               filter->status = 0x100;         /* 
little-endian */
-                       }
-                       CK((*filter->output_function)(0xfeff, filter->data));
-               } else if (n >= 0xd800 && n < 0xdc00) {
+               filter->status &= ~0x0f;
+               if (n >= 0xd800 && n < 0xdc00) {
                        filter->cache = ((n & 0x3ff) << 16) + 0x400000;
                } else if (n >= 0xdc00 && n < 0xe000) {
                        n &= 0x3ff;
@@ -166,7 +159,21 @@
                                CK((*filter->output_function)(n, filter->data));
                        }
                } else {
+                       int is_first = filter->status & 0x10;
                        filter->cache = 0;
+                       filter->status |= 0x10;
+                       if (!is_first) {
+                               if (n == 0xfffe) {
+                                       if (endian) {
+                                               filter->status &= ~0x100;       
        /* big-endian */
+                                       } else {
+                                               filter->status |= 0x100;        
        /* little-endian */
+                                       }
+                                       break;
+                               } else if (n == 0xfeff) {
+                                       break;
+                               }
+                       }
                        CK((*filter->output_function)(n, filter->data));
                }
                break;

Added: php/php-src/trunk/ext/mbstring/tests/bug49528.phpt
===================================================================
--- php/php-src/trunk/ext/mbstring/tests/bug49528.phpt                          
(rev 0)
+++ php/php-src/trunk/ext/mbstring/tests/bug49528.phpt  2009-09-11 08:22:19 UTC 
(rev 288260)
@@ -0,0 +1,20 @@
+--TEST--
+Bug #49528 (UTF-16 strings prefixed by BOM wrongly converted)
+--SKIPIF--
+<?php extension_loaded('mbstring') or die('skip mbstring not available'); ?>
+--FILE--
+<?php
+var_dump(bin2hex(mb_convert_encoding("\xff\xfe\x01\x02\x03\x04", "UCS-2BE", 
"UTF-16")));
+var_dump(bin2hex(mb_convert_encoding("\xfe\xff\x01\x02\x03\x04", "UCS-2BE", 
"UTF-16")));
+var_dump(bin2hex(mb_convert_encoding("\xff\xfe\xff\xfe\x01\x02\x03\x04", 
"UCS-2BE", "UTF-16")));
+var_dump(bin2hex(mb_convert_encoding("\xff\xfe\xfe\xff\x01\x02\x03\x04", 
"UCS-2BE", "UTF-16")));
+var_dump(bin2hex(mb_convert_encoding("\xfe\xff\xff\xfe\x01\x02\x03\x04", 
"UCS-2BE", "UTF-16")));
+var_dump(bin2hex(mb_convert_encoding("\xfe\xff\xfe\xff\x01\x02\x03\x04", 
"UCS-2BE", "UTF-16")));
+?>
+--EXPECT--
+string(8) "02010403"
+string(8) "01020304"
+string(12) "feff02010403"
+string(12) "fffe02010403"
+string(12) "fffe01020304"
+string(12) "feff01020304"

-- 
PHP CVS Mailing List (http://www.php.net/)
To unsubscribe, visit: http://www.php.net/unsub.php

Reply via email to