moriyoshi               Thu Jul 31 17:37:12 2008 UTC

  Modified files:              (Branch: PHP_5_3)
    /php-src/ext/mbstring/libmbfl/mbfl  mbfl_convert.c 
    /php-src/ext/mbstring/tests mb_substitute_character.phpt 
  Log:
  - MFH: Fixed bug #44617 (wrong HTML entity output when 
substitute_character=entity)
  
  
http://cvs.php.net/viewvc.cgi/php-src/ext/mbstring/libmbfl/mbfl/mbfl_convert.c?r1=1.5.2.1.2.3.2.1&r2=1.5.2.1.2.3.2.2&diff_format=u
Index: php-src/ext/mbstring/libmbfl/mbfl/mbfl_convert.c
diff -u php-src/ext/mbstring/libmbfl/mbfl/mbfl_convert.c:1.5.2.1.2.3.2.1 
php-src/ext/mbstring/libmbfl/mbfl/mbfl_convert.c:1.5.2.1.2.3.2.2
--- php-src/ext/mbstring/libmbfl/mbfl/mbfl_convert.c:1.5.2.1.2.3.2.1    Sat Jul 
 5 06:52:04 2008
+++ php-src/ext/mbstring/libmbfl/mbfl/mbfl_convert.c    Thu Jul 31 17:37:12 2008
@@ -363,22 +363,6 @@
        return 0;
 }
 
-#if 0
-static int
-mbfl_convert_filter_strncat(mbfl_convert_filter *filter, const unsigned char 
*p, 
-                           int n)
-{
-       while (n > 0) {
-               if ((*filter->filter_function)(*p++, filter) < 0) {
-                       return -1;
-               }
-               n--;
-       }
-
-       return n;
-}
-#endif
-
 /* illegal character output function for conv-filter */
 int
 mbfl_filt_conv_illegal_output(int c, mbfl_convert_filter *filter)
@@ -393,14 +377,9 @@
                ret = (*filter->filter_function)(filter->illegal_substchar, 
filter);
                break;
        case MBFL_OUTPUTFILTER_ILLEGAL_MODE_LONG:
-       case MBFL_OUTPUTFILTER_ILLEGAL_MODE_ENTITY:
                if (c >= 0) {
                        if (c < MBFL_WCSGROUP_UCS4MAX) {        /* unicode */
-                         if (mode_backup == 
MBFL_OUTPUTFILTER_ILLEGAL_MODE_LONG) {
-                           ret = mbfl_convert_filter_strcat(filter, (const 
unsigned char *)"U+");
-                         } else { /* entity */
-                           ret = mbfl_convert_filter_strcat(filter, (const 
unsigned char *)"&#");
-                         }
+                               ret = mbfl_convert_filter_strcat(filter, (const 
unsigned char *)"U+");
                        } else {
                                if (c < MBFL_WCSGROUP_WCHARMAX) {
                                        m = c & ~MBFL_WCSPLANE_MASK;
@@ -444,9 +423,38 @@
                                if (m == 0 && ret >= 0) {
                                        ret = 
(*filter->filter_function)(mbfl_hexchar_table[0], filter);
                                }
-                               if (mode_backup == 
MBFL_OUTPUTFILTER_ILLEGAL_MODE_ENTITY) {
-                                 ret = mbfl_convert_filter_strcat(filter, 
(const unsigned char *)";");
+                       }
+               }
+               break;
+       case MBFL_OUTPUTFILTER_ILLEGAL_MODE_ENTITY:
+               if (c >= 0) {
+                       if (c < MBFL_WCSGROUP_UCS4MAX) {        /* unicode */
+                               ret = mbfl_convert_filter_strcat(filter, (const 
unsigned char *)"&#x");
+                               if (ret < 0)
+                                       break;
+
+                               m = 0;
+                               r = 28;
+                               while (r >= 0) {
+                                       n = (c >> r) & 0xf;
+                                       if (n || m) {
+                                               m = 1;
+                                               ret = 
(*filter->filter_function)(mbfl_hexchar_table[n], filter);
+                                               if (ret < 0) {
+                                                       break;
+                                               }
+                                       }
+                                       r -= 4;
+                               }
+                               if (ret < 0) {
+                                       break;
                                }
+                               if (m == 0) {
+                                       ret = 
(*filter->filter_function)(mbfl_hexchar_table[0], filter);
+                               }
+                               ret = mbfl_convert_filter_strcat(filter, (const 
unsigned char *)";");
+                       } else {
+                               ret = 
(*filter->filter_function)(filter->illegal_substchar, filter);
                        }
                }
                break;
@@ -468,8 +476,8 @@
            to == mbfl_no_encoding_7bit) {
                from = mbfl_no_encoding_8bit;
        } else if (from == mbfl_no_encoding_base64 ||
-                  from == mbfl_no_encoding_qprint ||
-                                 from == mbfl_no_encoding_uuencode) {
+                          from == mbfl_no_encoding_qprint ||
+                          from == mbfl_no_encoding_uuencode) {
                to = mbfl_no_encoding_8bit;
        }
 
http://cvs.php.net/viewvc.cgi/php-src/ext/mbstring/tests/mb_substitute_character.phpt?r1=1.2.22.1&r2=1.2.22.2&diff_format=u
Index: php-src/ext/mbstring/tests/mb_substitute_character.phpt
diff -u php-src/ext/mbstring/tests/mb_substitute_character.phpt:1.2.22.1 
php-src/ext/mbstring/tests/mb_substitute_character.phpt:1.2.22.2
--- php-src/ext/mbstring/tests/mb_substitute_character.phpt:1.2.22.1    Thu Jul 
17 16:30:32 2008
+++ php-src/ext/mbstring/tests/mb_substitute_character.phpt     Thu Jul 31 
17:37:12 2008
@@ -10,40 +10,36 @@
 
 // Note: It does not return TRUE/FALSE for setting char
 
-// Use Unicode val
-$r = mb_substitute_character(0x3013);
-//$r = mb_substitute_character('U+3013');
-($r === TRUE) ?    print "OK_UTF\n" :  print("NG_UTF: ".gettype($r)." $r\n");
-print mb_substitute_character() . "\n";
-
-
-// Use "long"
-$r = mb_substitute_character('long');
-($r === TRUE) ? print "OK_LONG\n" : print("NG_LONG: ".gettype($r)." $r\n");
-print mb_substitute_character() . "\n";
-
-
-// Use "none"
-$r = mb_substitute_character('none');
-($r === TRUE) ? print "OK_NONE\n" : print("NG_NONE: ".gettype($r)." $r\n");
-print mb_substitute_character() . "\n";
-
-
-// Set invalid string. Should fail.
-print "== INVALID PARAMETER ==\n";
-$r = mb_substitute_character('BAD_NAME');
-($r === FALSE) ? print "OK_BAD_NAME\n" : print("NG_BAD_NAME: ".gettype($r)." 
$r\n");
+var_dump(mb_substitute_character(0x3044));
+var_dump(mb_substitute_character());
+var_dump(bin2hex(mb_convert_encoding("\xe2\x99\xa0\xe3\x81\x82", "CP932", 
"UTF-8")));
+
+var_dump(mb_substitute_character('long'));
+var_dump(mb_substitute_character());
+var_dump(bin2hex(mb_convert_encoding("\xe2\x99\xa0\xe3\x81\x82", "CP932", 
"UTF-8")));
+
+var_dump(mb_substitute_character('none'));
+var_dump(mb_substitute_character());
+var_dump(bin2hex(mb_convert_encoding("\xe2\x99\xa0\xe3\x81\x82", "CP932", 
"UTF-8")));
+
+var_dump(mb_substitute_character('entity'));
+var_dump(mb_substitute_character());
+var_dump(bin2hex(mb_convert_encoding("\xe2\x99\xa0\xe3\x81\x82", "CP932", 
"UTF-8")));
 
+var_dump(mb_substitute_character('BAD_NAME'));
 ?>
-
 --EXPECT--
-OK_UTF
-12307
-OK_LONG
-long
-OK_NONE
-none
-== INVALID PARAMETER ==
+bool(true)
+int(12356)
+string(8) "82a282a0"
+bool(true)
+string(4) "long"
+string(16) "552b3236363082a0"
+bool(true)
+string(4) "none"
+string(4) "82a0"
+bool(true)
+string(6) "entity"
+string(20) "262378323636303b82a0"
 ERR: Warning
-OK_BAD_NAME
-
+bool(false)



-- 
PHP CVS Mailing List (http://www.php.net/)
To unsubscribe, visit: http://www.php.net/unsub.php

Reply via email to