hirokawa                                 Sat, 06 Aug 2011 04:28:26 +0000

Revision: http://svn.php.net/viewvc?view=revision&revision=314360

Log:
MFH: improved error handling for ill-formed utf-8 based on the Unicode 
recommendation.

Changed paths:
    U   
php/php-src/branches/PHP_5_4/ext/mbstring/libmbfl/filters/mbfilter_utf8.c

Modified: 
php/php-src/branches/PHP_5_4/ext/mbstring/libmbfl/filters/mbfilter_utf8.c
===================================================================
--- php/php-src/branches/PHP_5_4/ext/mbstring/libmbfl/filters/mbfilter_utf8.c   
2011-08-06 04:25:44 UTC (rev 314359)
+++ php/php-src/branches/PHP_5_4/ext/mbstring/libmbfl/filters/mbfilter_utf8.c   
2011-08-06 04:28:26 UTC (rev 314360)
@@ -98,7 +98,7 @@
  */
 int mbfl_filt_conv_utf8_wchar(int c, mbfl_convert_filter *filter)
 {
-       int s, c1, w = 0;
+       int s, c1, w = 0, flag = 0;

        if (c < 0x80) {
                if (c >= 0) {
@@ -120,8 +120,7 @@
                                CK((*filter->output_function)(s, filter->data));
                        } else {
                                w = s & MBFL_WCSGROUP_MASK;
-                               w |= MBFL_WCSGROUP_THROUGH;
-                               CK((*filter->output_function)(w, filter->data));
+                               flag = 1;
                        }
                        break;
                case 0x20: /* 3byte code 2nd char: 
0:0xa0-0xbf,D:0x80-9F,1-C,E-F:0x80-0x9f */
@@ -134,10 +133,7 @@
                                filter->status++;
                        } else {
                                w = s & MBFL_WCSGROUP_MASK;
-                               w |= MBFL_WCSGROUP_THROUGH;
-                               CK((*filter->output_function)(w, filter->data));
-                               filter->status = 0;
-                               filter->cache = 0;
+                               flag = 1;
                        }
                        break;
                case 0x31: /* 4byte code 3rd char: 0x80-0xbf */
@@ -154,33 +150,47 @@
                                filter->status++;
                        } else {
                                w = s & MBFL_WCSGROUP_MASK;
-                               w |= MBFL_WCSGROUP_THROUGH;
-                               CK((*filter->output_function)(w, filter->data));
-                               filter->status = 0;
-                               filter->cache = 0;
+                               flag = 1;
                        }
                        break;
                default:
-                       filter->status = 0;
+                       w = c & MBFL_WCSGROUP_MASK;
+                       flag = 1;
                        break;
                }
        } else if (c < 0xc2) { /* invalid: 0xc0,0xc1 */
                w = c & MBFL_WCSGROUP_MASK;
-               w |= MBFL_WCSGROUP_THROUGH;
-               CK((*filter->output_function)(w, filter->data));
-               filter->status = 0;
-               filter->cache = 0;
+               flag = 1;
        } else if (c < 0xe0) { /* 2byte code first char: 0xc2-0xdf */
-               filter->status = 0x10;
-               filter->cache = (c & 0x1f) << 6;
+               if (filter->status == 0x0) {
+                       filter->status = 0x10;
+                       filter->cache = (c & 0x1f) << 6;
+               } else {
+                       w = c & MBFL_WCSGROUP_MASK;
+                       flag = 1;
+               }
        } else if (c < 0xf0) { /* 3byte code first char: 0xe0-0xef */
-               filter->status = 0x20;
-               filter->cache = (c & 0xf) << 12;
+               if (filter->status == 0x0) {
+                       filter->status = 0x20;
+                       filter->cache = (c & 0xf) << 12;
+               } else {
+                       w = c & MBFL_WCSGROUP_MASK;
+                       flag = 1;
+               }
        } else if (c < 0xf5) { /* 4byte code first char: 0xf0-0xf4 */
-               filter->status = 0x30;
-               filter->cache = (c & 0x7) << 18;
+               if (filter->status == 0x0) {
+                       filter->status = 0x30;
+                       filter->cache = (c & 0x7) << 18;
+               } else {
+                       w = c & MBFL_WCSGROUP_MASK;
+                       flag = 1;
+               }
        } else {
                w = c & MBFL_WCSGROUP_MASK;
+               flag = 1;
+       }
+
+       if (flag) {
                w |= MBFL_WCSGROUP_THROUGH;
                CK((*filter->output_function)(w, filter->data));
                filter->status = 0;

-- 
PHP CVS Mailing List (http://www.php.net/)
To unsubscribe, visit: http://www.php.net/unsub.php

Reply via email to