moriyoshi                                Sun, 11 Oct 2009 23:52:33 +0000

Revision: http://svn.php.net/viewvc?view=revision&revision=289554

Log:
- A couple more fix for my previous fix.
  (one of the fix by Arnaud Le Blanc. Thanks!)

Changed paths:
    U   php/php-src/branches/PHP_5_2/ext/standard/html.c
    U   php/php-src/branches/PHP_5_2/ext/standard/tests/strings/bug49785.phpt
    U   php/php-src/branches/PHP_5_3/ext/standard/html.c
    U   php/php-src/branches/PHP_5_3/ext/standard/tests/strings/bug49785.phpt
    U   
php/php-src/branches/PHP_5_3/ext/standard/tests/strings/htmlentities-utf-2.phpt
    U   
php/php-src/branches/PHP_5_3/ext/standard/tests/strings/htmlentities-utf.phpt
    U   php/php-src/trunk/ext/standard/html.c
    U   php/php-src/trunk/ext/standard/tests/strings/bug49785.phpt
    U   php/php-src/trunk/ext/standard/tests/strings/htmlentities-utf-2.phpt
    U   php/php-src/trunk/ext/standard/tests/strings/htmlentities-utf.phpt

Modified: php/php-src/branches/PHP_5_2/ext/standard/html.c
===================================================================
--- php/php-src/branches/PHP_5_2/ext/standard/html.c	2009-10-11 23:37:52 UTC (rev 289553)
+++ php/php-src/branches/PHP_5_2/ext/standard/html.c	2009-10-11 23:52:33 UTC (rev 289554)
@@ -484,6 +484,11 @@
 			}                        \
 			mbseq[mbpos++] = (mbchar); }

+#define MB_FAILURE(pos) do { \
+	*status = FAILURE; \
+	return 0; \
+} while (0)
+
 #define CHECK_LEN(pos, chars_need)			\
 	if((str_len - (pos)) < chars_need) {	\
 		*status = FAILURE;					\
@@ -526,10 +531,12 @@
                     this_char = c;
 					pos++;
 				} else if (c < 0xc0) {
-					*status = FAILURE;
-					return 0;
+					MB_FAILURE(pos);
 				} else if (c < 0xe0) {
 					CHECK_LEN(pos, 2);
+					if (str[pos + 1] < 0x80 || str[pos + 1] > 0xbf) {
+                        MB_FAILURE(pos);
+					}
 					this_char = ((c & 0x1f) << 6) | (str[pos + 1] & 0x3f);
 					if (this_char < 0x80) {
 						*status = FAILURE;
@@ -540,10 +547,15 @@
 					pos += 2;
 				} else if (c < 0xf0) {
 					CHECK_LEN(pos, 3);
+					if (str[pos + 1] < 0x80 || str[pos + 1] > 0xbf) {
+                        MB_FAILURE(pos);
+					}
+					if (str[pos + 2] < 0x80 || str[pos + 2] > 0xbf) {
+                        MB_FAILURE(pos);
+					}
 					this_char = ((c & 0x0f) << 12) | ((str[pos + 1] & 0x3f) << 6) | (str[pos + 2] & 0x3f);
 					if (this_char < 0x800) {
-						*status = FAILURE;
-						return 0;
+                        MB_FAILURE(pos);
 					}
 					MB_WRITE((unsigned char)c);
 					MB_WRITE((unsigned char)str[pos + 1]);
@@ -551,10 +563,18 @@
 					pos += 3;
 				} else if (c < 0xf8) {
 					CHECK_LEN(pos, 4);
+					if (str[pos + 1] < 0x80 || str[pos + 1] > 0xbf) {
+                        MB_FAILURE(pos);
+					}
+					if (str[pos + 2] < 0x80 || str[pos + 2] > 0xbf) {
+                        MB_FAILURE(pos);
+					}
+					if (str[pos + 3] < 0x80 || str[pos + 3] > 0xbf) {
+                        MB_FAILURE(pos);
+					}
 					this_char = ((c & 0x07) << 18) | ((str[pos + 1] & 0x3f) << 12) | ((str[pos + 2] & 0x3f) << 6) | (str[pos + 3] & 0x3f);
 					if (this_char < 0x10000) {
-						*status = FAILURE;
-						return 0;
+                        MB_FAILURE(pos);
 					}
 					MB_WRITE((unsigned char)c);
 					MB_WRITE((unsigned char)str[pos + 1]);
@@ -562,8 +582,7 @@
 					MB_WRITE((unsigned char)str[pos + 3]);
 					pos += 4;
 				} else {
-					*status = FAILURE;
-					return 0;
+                    MB_FAILURE(pos);
 				}
 			}
 			break;
@@ -585,8 +604,7 @@
 						MB_WRITE(next_char);
 						this_char = (this_char << 8) | next_char;
 					} else {
-						*status = FAILURE;
-						return 0;
+						MB_FAILURE(pos);
 					}
 				} else {
 					MB_WRITE(this_char);
@@ -611,8 +629,7 @@
 						MB_WRITE(next_char);
 						this_char = (this_char << 8) | next_char;
 					} else {
-						*status = FAILURE;
-						return 0;
+						MB_FAILURE(pos);
 					}
 				} else {
 					MB_WRITE(this_char);
@@ -634,8 +651,7 @@
 						MB_WRITE(next_char);
 						this_char = (this_char << 8) | next_char;
 					} else {
-						*status = FAILURE;
-						return 0;
+						MB_FAILURE(pos);
 					}
 				} else if (this_char == 0x8e) {
 					/* peek at the next char */
@@ -647,8 +663,7 @@
 						MB_WRITE(next_char);
 						this_char = (this_char << 8) | next_char;
 					} else {
-						*status = FAILURE;
-						return 0;
+						MB_FAILURE(pos);
 					}
 				} else if (this_char == 0x8f) {
 					/* peek at the next two char */
@@ -665,8 +680,7 @@
 						MB_WRITE(next2_char);
 						this_char = (this_char << 16) | (next_char << 8) | next_char;
 					} else {
-						*status = FAILURE;
-						return 0;
+						MB_FAILURE(pos);
 					}
 				} else {
 					MB_WRITE(this_char);

Modified: php/php-src/branches/PHP_5_2/ext/standard/tests/strings/bug49785.phpt
===================================================================
--- php/php-src/branches/PHP_5_2/ext/standard/tests/strings/bug49785.phpt	2009-10-11 23:37:52 UTC (rev 289553)
+++ php/php-src/branches/PHP_5_2/ext/standard/tests/strings/bug49785.phpt	2009-10-11 23:52:33 UTC (rev 289554)
@@ -7,19 +7,33 @@
 }

 // UTF-8: basic tests
-var_dump(bin2hex(htmlentities("\xc1\xbf", ENT_QUOTES, "UTF-8")));
-var_dump(bin2hex(htmlentities("\xc2\x80", ENT_QUOTES, "UTF-8")));
-var_dump(bin2hex(htmlentities("\xce\x91", ENT_QUOTES, "UTF-8")));
-var_dump(bin2hex(htmlentities("\xce\xb1", ENT_QUOTES, "UTF-8")));
-var_dump(bin2hex(htmlentities("\xdf\xbf", ENT_QUOTES, "UTF-8")));
-var_dump(bin2hex(htmlentities("\xe0\xa0\x80", ENT_QUOTES, "UTF-8")));
-var_dump(bin2hex(htmlentities("\xe0\x9f\xbf", ENT_QUOTES, "UTF-8")));
-var_dump(bin2hex(htmlentities("\xe2\x99\xa5", ENT_QUOTES, "UTF-8")));
-var_dump(bin2hex(htmlentities("\xef\xbf\xbf", ENT_QUOTES, "UTF-8")));
-var_dump(bin2hex(htmlentities("\xf0\x8f\xbf\xbf", ENT_QUOTES, "UTF-8")));
-var_dump(bin2hex(htmlentities("\xf0\x90\x80\x80", ENT_QUOTES, "UTF-8")));
-var_dump(bin2hex(htmlentities("\xf7\xbf\xbf\xbf", ENT_QUOTES, "UTF-8")));
-var_dump(bin2hex(htmlentities("\xf8\x88\x80\x80\x80", ENT_QUOTES, "UTF-8")));
+var_dump(_bin2hex(htmlentities("\xc1\xbf", ENT_QUOTES, "UTF-8")));
+var_dump(_bin2hex(htmlentities("\xc2\x80", ENT_QUOTES, "UTF-8")));
+var_dump(_bin2hex(htmlentities("\xc2\x00", ENT_QUOTES, "UTF-8")));
+var_dump(_bin2hex(htmlentities("\xc2\xc0", ENT_QUOTES, "UTF-8")));
+var_dump(_bin2hex(htmlentities("\xce\x91", ENT_QUOTES, "UTF-8")));
+var_dump(_bin2hex(htmlentities("\xce\xb1", ENT_QUOTES, "UTF-8")));
+var_dump(_bin2hex(htmlentities("\xdf\xbf", ENT_QUOTES, "UTF-8")));
+var_dump(_bin2hex(htmlentities("\xe0\xa0\x80", ENT_QUOTES, "UTF-8")));
+var_dump(_bin2hex(htmlentities("\xe0\x9f\xbf", ENT_QUOTES, "UTF-8")));
+var_dump(_bin2hex(htmlentities("\xe0\x9f\xbf", ENT_QUOTES, "UTF-8")));
+var_dump(_bin2hex(htmlentities("\xe0\x1f\xbf", ENT_QUOTES, "UTF-8")));
+var_dump(_bin2hex(htmlentities("\xe0\x9f\x3f", ENT_QUOTES, "UTF-8")));
+var_dump(_bin2hex(htmlentities("\xe0\x1f\x3f", ENT_QUOTES, "UTF-8")));
+var_dump(_bin2hex(htmlentities("\xe2\x99\xa5", ENT_QUOTES, "UTF-8")));
+var_dump(_bin2hex(htmlentities("\xef\xbf\xbf", ENT_QUOTES, "UTF-8")));
+var_dump(_bin2hex(htmlentities("\xef\xff\xbf", ENT_QUOTES, "UTF-8")));
+var_dump(_bin2hex(htmlentities("\xef\xbf\xff", ENT_QUOTES, "UTF-8")));
+var_dump(_bin2hex(htmlentities("\xf0\x8f\xbf\xbf", ENT_QUOTES, "UTF-8")));
+var_dump(_bin2hex(htmlentities("\xf0\x90\x80\x80", ENT_QUOTES, "UTF-8")));
+var_dump(_bin2hex(htmlentities("\xf7\xbf\xbf\xbf", ENT_QUOTES, "UTF-8")));
+var_dump(_bin2hex(htmlentities("\xf7\x3f\xbf\xbf", ENT_QUOTES, "UTF-8")));
+var_dump(_bin2hex(htmlentities("\xf7\xbf\x3f\xbf", ENT_QUOTES, "UTF-8")));
+var_dump(_bin2hex(htmlentities("\xf7\xbf\xbf\x3f", ENT_QUOTES, "UTF-8")));
+var_dump(_bin2hex(htmlentities("\xf7\xff\xbf\xbf", ENT_QUOTES, "UTF-8")));
+var_dump(_bin2hex(htmlentities("\xf7\xbf\xff\xbf", ENT_QUOTES, "UTF-8")));
+var_dump(_bin2hex(htmlentities("\xf7\xbf\xbf\xff", ENT_QUOTES, "UTF-8")));
+var_dump(_bin2hex(htmlentities("\xf8\x88\x80\x80\x80", ENT_QUOTES, "UTF-8")));

 echo "--\n";
 // UTF-8: alternative (invalid) UTF-8 sequence
@@ -115,17 +129,31 @@
 --EXPECT--
 string(0) ""
 string(4) "c280"
+string(0) ""
+string(0) ""
 string(14) "26416c7068613b"
 string(14) "26616c7068613b"
 string(4) "dfbf"
 string(6) "e0a080"
 string(0) ""
+string(0) ""
+string(0) ""
+string(0) ""
+string(0) ""
 string(16) "266865617274733b"
 string(6) "efbfbf"
 string(0) ""
+string(0) ""
+string(0) ""
 string(8) "f0908080"
 string(8) "f7bfbfbf"
 string(0) ""
+string(0) ""
+string(0) ""
+string(0) ""
+string(0) ""
+string(0) ""
+string(0) ""
 --
 string(0) ""
 string(0) ""

Modified: php/php-src/branches/PHP_5_3/ext/standard/html.c
===================================================================
--- php/php-src/branches/PHP_5_3/ext/standard/html.c	2009-10-11 23:37:52 UTC (rev 289553)
+++ php/php-src/branches/PHP_5_3/ext/standard/html.c	2009-10-11 23:52:33 UTC (rev 289554)
@@ -483,11 +483,26 @@
 			}                        \
 			mbseq[mbpos++] = (mbchar); }

+/* skip one byte and return */
+#define MB_FAILURE(pos) do { \
+	*newpos = pos + 1; \
+	*status = FAILURE; \
+	return 0; \
+} while (0)
+
 #define CHECK_LEN(pos, chars_need)			\
-	if((str_len - (pos)) < chars_need) {	\
-		*newpos = pos;						\
-		*status = FAILURE;					\
-		return 0;							\
+	if (chars_need < 1) {						\
+		if((str_len - (pos)) < chars_need) {	\
+			*newpos = pos;						\
+			*status = FAILURE;					\
+			return 0;							\
+		}										\
+	} else {									\
+		if((str_len - (pos)) < chars_need) {	\
+			*newpos = pos + 1;					\
+			*status = FAILURE;					\
+			return 0;							\
+		}										\
 	}

 /* {{{ get_next_char
@@ -526,10 +541,12 @@
                     this_char = c;
 					pos++;
 				} else if (c < 0xc0) {
-					*status = FAILURE;
-					return 0;
+					MB_FAILURE(pos);
 				} else if (c < 0xe0) {
 					CHECK_LEN(pos, 2);
+					if (str[pos + 1] < 0x80 || str[pos + 1] > 0xbf) {
+                        MB_FAILURE(pos);
+					}
 					this_char = ((c & 0x1f) << 6) | (str[pos + 1] & 0x3f);
 					if (this_char < 0x80) {
 						*status = FAILURE;
@@ -540,10 +557,15 @@
 					pos += 2;
 				} else if (c < 0xf0) {
 					CHECK_LEN(pos, 3);
+					if (str[pos + 1] < 0x80 || str[pos + 1] > 0xbf) {
+                        MB_FAILURE(pos);
+					}
+					if (str[pos + 2] < 0x80 || str[pos + 2] > 0xbf) {
+                        MB_FAILURE(pos);
+					}
 					this_char = ((c & 0x0f) << 12) | ((str[pos + 1] & 0x3f) << 6) | (str[pos + 2] & 0x3f);
 					if (this_char < 0x800) {
-						*status = FAILURE;
-						return 0;
+                        MB_FAILURE(pos);
 					}
 					MB_WRITE((unsigned char)c);
 					MB_WRITE((unsigned char)str[pos + 1]);
@@ -551,10 +573,18 @@
 					pos += 3;
 				} else if (c < 0xf8) {
 					CHECK_LEN(pos, 4);
+					if (str[pos + 1] < 0x80 || str[pos + 1] > 0xbf) {
+                        MB_FAILURE(pos);
+					}
+					if (str[pos + 2] < 0x80 || str[pos + 2] > 0xbf) {
+                        MB_FAILURE(pos);
+					}
+					if (str[pos + 3] < 0x80 || str[pos + 3] > 0xbf) {
+                        MB_FAILURE(pos);
+					}
 					this_char = ((c & 0x07) << 18) | ((str[pos + 1] & 0x3f) << 12) | ((str[pos + 2] & 0x3f) << 6) | (str[pos + 3] & 0x3f);
 					if (this_char < 0x10000) {
-						*status = FAILURE;
-						return 0;
+                        MB_FAILURE(pos);
 					}
 					MB_WRITE((unsigned char)c);
 					MB_WRITE((unsigned char)str[pos + 1]);
@@ -562,8 +592,7 @@
 					MB_WRITE((unsigned char)str[pos + 3]);
 					pos += 4;
 				} else {
-					*status = FAILURE;
-					return 0;
+                    MB_FAILURE(pos);
 				}
 			}
 			break;
@@ -585,8 +614,7 @@
 						MB_WRITE(next_char);
 						this_char = (this_char << 8) | next_char;
 					} else {
-						*status = FAILURE;
-						return 0;
+						MB_FAILURE(pos);
 					}
 				} else {
 					MB_WRITE(this_char);
@@ -611,8 +639,7 @@
 						MB_WRITE(next_char);
 						this_char = (this_char << 8) | next_char;
 					} else {
-						*status = FAILURE;
-						return 0;
+						MB_FAILURE(pos);
 					}
 				} else {
 					MB_WRITE(this_char);
@@ -634,8 +661,7 @@
 						MB_WRITE(next_char);
 						this_char = (this_char << 8) | next_char;
 					} else {
-						*status = FAILURE;
-						return 0;
+						MB_FAILURE(pos);
 					}
 				} else if (this_char == 0x8e) {
 					/* peek at the next char */
@@ -647,8 +673,7 @@
 						MB_WRITE(next_char);
 						this_char = (this_char << 8) | next_char;
 					} else {
-						*status = FAILURE;
-						return 0;
+						MB_FAILURE(pos);
 					}
 				} else if (this_char == 0x8f) {
 					/* peek at the next two char */
@@ -665,8 +690,7 @@
 						MB_WRITE(next2_char);
 						this_char = (this_char << 16) | (next_char << 8) | next_char;
 					} else {
-						*status = FAILURE;
-						return 0;
+						MB_FAILURE(pos);
 					}
 				} else {
 					MB_WRITE(this_char);

Modified: php/php-src/branches/PHP_5_3/ext/standard/tests/strings/bug49785.phpt
===================================================================
--- php/php-src/branches/PHP_5_3/ext/standard/tests/strings/bug49785.phpt	2009-10-11 23:37:52 UTC (rev 289553)
+++ php/php-src/branches/PHP_5_3/ext/standard/tests/strings/bug49785.phpt	2009-10-11 23:52:33 UTC (rev 289554)
@@ -7,19 +7,33 @@
 }

 // UTF-8: basic tests
-var_dump(bin2hex(htmlentities("\xc1\xbf", ENT_QUOTES, "UTF-8")));
-var_dump(bin2hex(htmlentities("\xc2\x80", ENT_QUOTES, "UTF-8")));
-var_dump(bin2hex(htmlentities("\xce\x91", ENT_QUOTES, "UTF-8")));
-var_dump(bin2hex(htmlentities("\xce\xb1", ENT_QUOTES, "UTF-8")));
-var_dump(bin2hex(htmlentities("\xdf\xbf", ENT_QUOTES, "UTF-8")));
-var_dump(bin2hex(htmlentities("\xe0\xa0\x80", ENT_QUOTES, "UTF-8")));
-var_dump(bin2hex(htmlentities("\xe0\x9f\xbf", ENT_QUOTES, "UTF-8")));
-var_dump(bin2hex(htmlentities("\xe2\x99\xa5", ENT_QUOTES, "UTF-8")));
-var_dump(bin2hex(htmlentities("\xef\xbf\xbf", ENT_QUOTES, "UTF-8")));
-var_dump(bin2hex(htmlentities("\xf0\x8f\xbf\xbf", ENT_QUOTES, "UTF-8")));
-var_dump(bin2hex(htmlentities("\xf0\x90\x80\x80", ENT_QUOTES, "UTF-8")));
-var_dump(bin2hex(htmlentities("\xf7\xbf\xbf\xbf", ENT_QUOTES, "UTF-8")));
-var_dump(bin2hex(htmlentities("\xf8\x88\x80\x80\x80", ENT_QUOTES, "UTF-8")));
+var_dump(_bin2hex(htmlentities("\xc1\xbf", ENT_QUOTES, "UTF-8")));
+var_dump(_bin2hex(htmlentities("\xc2\x80", ENT_QUOTES, "UTF-8")));
+var_dump(_bin2hex(htmlentities("\xc2\x00", ENT_QUOTES, "UTF-8")));
+var_dump(_bin2hex(htmlentities("\xc2\xc0", ENT_QUOTES, "UTF-8")));
+var_dump(_bin2hex(htmlentities("\xce\x91", ENT_QUOTES, "UTF-8")));
+var_dump(_bin2hex(htmlentities("\xce\xb1", ENT_QUOTES, "UTF-8")));
+var_dump(_bin2hex(htmlentities("\xdf\xbf", ENT_QUOTES, "UTF-8")));
+var_dump(_bin2hex(htmlentities("\xe0\xa0\x80", ENT_QUOTES, "UTF-8")));
+var_dump(_bin2hex(htmlentities("\xe0\x9f\xbf", ENT_QUOTES, "UTF-8")));
+var_dump(_bin2hex(htmlentities("\xe0\x9f\xbf", ENT_QUOTES, "UTF-8")));
+var_dump(_bin2hex(htmlentities("\xe0\x1f\xbf", ENT_QUOTES, "UTF-8")));
+var_dump(_bin2hex(htmlentities("\xe0\x9f\x3f", ENT_QUOTES, "UTF-8")));
+var_dump(_bin2hex(htmlentities("\xe0\x1f\x3f", ENT_QUOTES, "UTF-8")));
+var_dump(_bin2hex(htmlentities("\xe2\x99\xa5", ENT_QUOTES, "UTF-8")));
+var_dump(_bin2hex(htmlentities("\xef\xbf\xbf", ENT_QUOTES, "UTF-8")));
+var_dump(_bin2hex(htmlentities("\xef\xff\xbf", ENT_QUOTES, "UTF-8")));
+var_dump(_bin2hex(htmlentities("\xef\xbf\xff", ENT_QUOTES, "UTF-8")));
+var_dump(_bin2hex(htmlentities("\xf0\x8f\xbf\xbf", ENT_QUOTES, "UTF-8")));
+var_dump(_bin2hex(htmlentities("\xf0\x90\x80\x80", ENT_QUOTES, "UTF-8")));
+var_dump(_bin2hex(htmlentities("\xf7\xbf\xbf\xbf", ENT_QUOTES, "UTF-8")));
+var_dump(_bin2hex(htmlentities("\xf7\x3f\xbf\xbf", ENT_QUOTES, "UTF-8")));
+var_dump(_bin2hex(htmlentities("\xf7\xbf\x3f\xbf", ENT_QUOTES, "UTF-8")));
+var_dump(_bin2hex(htmlentities("\xf7\xbf\xbf\x3f", ENT_QUOTES, "UTF-8")));
+var_dump(_bin2hex(htmlentities("\xf7\xff\xbf\xbf", ENT_QUOTES, "UTF-8")));
+var_dump(_bin2hex(htmlentities("\xf7\xbf\xff\xbf", ENT_QUOTES, "UTF-8")));
+var_dump(_bin2hex(htmlentities("\xf7\xbf\xbf\xff", ENT_QUOTES, "UTF-8")));
+var_dump(_bin2hex(htmlentities("\xf8\x88\x80\x80\x80", ENT_QUOTES, "UTF-8")));

 echo "--\n";
 // UTF-8: alternative (invalid) UTF-8 sequence
@@ -115,17 +129,31 @@
 --EXPECT--
 string(0) ""
 string(4) "c280"
+string(0) ""
+string(0) ""
 string(14) "26416c7068613b"
 string(14) "26616c7068613b"
 string(4) "dfbf"
 string(6) "e0a080"
 string(0) ""
+string(0) ""
+string(0) ""
+string(0) ""
+string(0) ""
 string(16) "266865617274733b"
 string(6) "efbfbf"
 string(0) ""
+string(0) ""
+string(0) ""
 string(8) "f0908080"
 string(8) "f7bfbfbf"
 string(0) ""
+string(0) ""
+string(0) ""
+string(0) ""
+string(0) ""
+string(0) ""
+string(0) ""
 --
 string(0) ""
 string(0) ""

Modified: php/php-src/branches/PHP_5_3/ext/standard/tests/strings/htmlentities-utf-2.phpt
===================================================================
--- php/php-src/branches/PHP_5_3/ext/standard/tests/strings/htmlentities-utf-2.phpt	2009-10-11 23:37:52 UTC (rev 289553)
+++ php/php-src/branches/PHP_5_3/ext/standard/tests/strings/htmlentities-utf-2.phpt	2009-10-11 23:52:33 UTC (rev 289554)
@@ -36,8 +36,8 @@
 %unicode|string%(0) ""
 %unicode|string%(2) "79"
 %unicode|string%(2) "79"
-%unicode|string%(8) "2667743b"
-%unicode|string%(8) "2667743b"
+%unicode|string%(0) ""
+%unicode|string%(0) ""
 %unicode|string%(8) "566f696c"
 %unicode|string%(8) "566f696c"
 %unicode|string%(12) "436c69636873"
@@ -52,10 +52,10 @@
 %unicode|string%(2) "79"
 %unicode|string%(8) "f7bfbfbf"
 %unicode|string%(8) "f7bfbfbf"
-%unicode|string%(10) "fbbfbfbfbf"
-%unicode|string%(10) "fbbfbfbfbf"
-%unicode|string%(12) "fdbfbfbfbfbf"
-%unicode|string%(12) "fdbfbfbfbfbf"
+%unicode|string%(0) ""
+%unicode|string%(0) ""
+%unicode|string%(0) ""
+%unicode|string%(0) ""
 %unicode|string%(4) "4142"
 %unicode|string%(4) "4142"
 %unicode|string%(4) "4242"

Modified: php/php-src/branches/PHP_5_3/ext/standard/tests/strings/htmlentities-utf.phpt
===================================================================
--- php/php-src/branches/PHP_5_3/ext/standard/tests/strings/htmlentities-utf.phpt	2009-10-11 23:37:52 UTC (rev 289553)
+++ php/php-src/branches/PHP_5_3/ext/standard/tests/strings/htmlentities-utf.phpt	2009-10-11 23:52:33 UTC (rev 289554)
@@ -36,8 +36,6 @@
 %unicode|string%(0) ""
 %unicode|string%(0) ""
 %unicode|string%(0) ""
-%unicode|string%(8) "2667743b"
-%unicode|string%(8) "2667743b"
 %unicode|string%(0) ""
 %unicode|string%(0) ""
 %unicode|string%(0) ""
@@ -46,16 +44,14 @@
 %unicode|string%(0) ""
 %unicode|string%(0) ""
 %unicode|string%(0) ""
+%unicode|string%(0) ""
+%unicode|string%(0) ""
 %unicode|string%(4) "c3a9"
 %unicode|string%(16) "266561637574653b"
 %unicode|string%(0) ""
 %unicode|string%(0) ""
 %unicode|string%(8) "f7bfbfbf"
 %unicode|string%(8) "f7bfbfbf"
-%unicode|string%(10) "fbbfbfbfbf"
-%unicode|string%(10) "fbbfbfbfbf"
-%unicode|string%(12) "fdbfbfbfbfbf"
-%unicode|string%(12) "fdbfbfbfbfbf"
 %unicode|string%(0) ""
 %unicode|string%(0) ""
 %unicode|string%(0) ""
@@ -68,3 +64,7 @@
 %unicode|string%(0) ""
 %unicode|string%(0) ""
 %unicode|string%(0) ""
+%unicode|string%(0) ""
+%unicode|string%(0) ""
+%unicode|string%(0) ""
+%unicode|string%(0) ""

Modified: php/php-src/trunk/ext/standard/html.c
===================================================================
--- php/php-src/trunk/ext/standard/html.c	2009-10-11 23:37:52 UTC (rev 289553)
+++ php/php-src/trunk/ext/standard/html.c	2009-10-11 23:52:33 UTC (rev 289554)
@@ -489,11 +489,26 @@
 			}                        \
 			mbseq[mbpos++] = (mbchar); }

+/* skip one byte and return */
+#define MB_FAILURE(pos) do { \
+	*newpos = pos + 1; \
+	*status = FAILURE; \
+	return 0; \
+} while (0)
+
 #define CHECK_LEN(pos, chars_need)			\
-	if((str_len - (pos)) < chars_need) {	\
-		*newpos = pos;						\
-		*status = FAILURE;					\
-		return 0;							\
+	if (chars_need < 1) {						\
+		if((str_len - (pos)) < chars_need) {	\
+			*newpos = pos;						\
+			*status = FAILURE;					\
+			return 0;							\
+		}										\
+	} else {									\
+		if((str_len - (pos)) < chars_need) {	\
+			*newpos = pos + 1;					\
+			*status = FAILURE;					\
+			return 0;							\
+		}										\
 	}

 /* {{{ get_next_char
@@ -532,10 +547,12 @@
                     this_char = c;
 					pos++;
 				} else if (c < 0xc0) {
-					*status = FAILURE;
-					return 0;
+					MB_FAILURE(pos);
 				} else if (c < 0xe0) {
 					CHECK_LEN(pos, 2);
+					if (str[pos + 1] < 0x80 || str[pos + 1] > 0xbf) {
+                        MB_FAILURE(pos);
+					}
 					this_char = ((c & 0x1f) << 6) | (str[pos + 1] & 0x3f);
 					if (this_char < 0x80) {
 						*status = FAILURE;
@@ -546,10 +563,15 @@
 					pos += 2;
 				} else if (c < 0xf0) {
 					CHECK_LEN(pos, 3);
+					if (str[pos + 1] < 0x80 || str[pos + 1] > 0xbf) {
+                        MB_FAILURE(pos);
+					}
+					if (str[pos + 2] < 0x80 || str[pos + 2] > 0xbf) {
+                        MB_FAILURE(pos);
+					}
 					this_char = ((c & 0x0f) << 12) | ((str[pos + 1] & 0x3f) << 6) | (str[pos + 2] & 0x3f);
 					if (this_char < 0x800) {
-						*status = FAILURE;
-						return 0;
+                        MB_FAILURE(pos);
 					}
 					MB_WRITE((unsigned char)c);
 					MB_WRITE((unsigned char)str[pos + 1]);
@@ -557,10 +579,18 @@
 					pos += 3;
 				} else if (c < 0xf8) {
 					CHECK_LEN(pos, 4);
+					if (str[pos + 1] < 0x80 || str[pos + 1] > 0xbf) {
+                        MB_FAILURE(pos);
+					}
+					if (str[pos + 2] < 0x80 || str[pos + 2] > 0xbf) {
+                        MB_FAILURE(pos);
+					}
+					if (str[pos + 3] < 0x80 || str[pos + 3] > 0xbf) {
+                        MB_FAILURE(pos);
+					}
 					this_char = ((c & 0x07) << 18) | ((str[pos + 1] & 0x3f) << 12) | ((str[pos + 2] & 0x3f) << 6) | (str[pos + 3] & 0x3f);
 					if (this_char < 0x10000) {
-						*status = FAILURE;
-						return 0;
+                        MB_FAILURE(pos);
 					}
 					MB_WRITE((unsigned char)c);
 					MB_WRITE((unsigned char)str[pos + 1]);
@@ -568,8 +598,7 @@
 					MB_WRITE((unsigned char)str[pos + 3]);
 					pos += 4;
 				} else {
-					*status = FAILURE;
-					return 0;
+                    MB_FAILURE(pos);
 				}
 			}
 			break;
@@ -591,8 +620,7 @@
 						MB_WRITE(next_char);
 						this_char = (this_char << 8) | next_char;
 					} else {
-						*status = FAILURE;
-						return 0;
+						MB_FAILURE(pos);
 					}
 				} else {
 					MB_WRITE(this_char);
@@ -617,8 +645,7 @@
 						MB_WRITE(next_char);
 						this_char = (this_char << 8) | next_char;
 					} else {
-						*status = FAILURE;
-						return 0;
+						MB_FAILURE(pos);
 					}
 				} else {
 					MB_WRITE(this_char);
@@ -640,8 +667,7 @@
 						MB_WRITE(next_char);
 						this_char = (this_char << 8) | next_char;
 					} else {
-						*status = FAILURE;
-						return 0;
+						MB_FAILURE(pos);
 					}
 				} else if (this_char == 0x8e) {
 					/* peek at the next char */
@@ -653,8 +679,7 @@
 						MB_WRITE(next_char);
 						this_char = (this_char << 8) | next_char;
 					} else {
-						*status = FAILURE;
-						return 0;
+						MB_FAILURE(pos);
 					}
 				} else if (this_char == 0x8f) {
 					/* peek at the next two char */
@@ -671,8 +696,7 @@
 						MB_WRITE(next2_char);
 						this_char = (this_char << 16) | (next_char << 8) | next_char;
 					} else {
-						*status = FAILURE;
-						return 0;
+						MB_FAILURE(pos);
 					}
 				} else {
 					MB_WRITE(this_char);

Modified: php/php-src/trunk/ext/standard/tests/strings/bug49785.phpt
===================================================================
--- php/php-src/trunk/ext/standard/tests/strings/bug49785.phpt	2009-10-11 23:37:52 UTC (rev 289553)
+++ php/php-src/trunk/ext/standard/tests/strings/bug49785.phpt	2009-10-11 23:52:33 UTC (rev 289554)
@@ -7,19 +7,33 @@
 }

 // UTF-8: basic tests
-var_dump(bin2hex(htmlentities("\xc1\xbf", ENT_QUOTES, "UTF-8")));
-var_dump(bin2hex(htmlentities("\xc2\x80", ENT_QUOTES, "UTF-8")));
-var_dump(bin2hex(htmlentities("\xce\x91", ENT_QUOTES, "UTF-8")));
-var_dump(bin2hex(htmlentities("\xce\xb1", ENT_QUOTES, "UTF-8")));
-var_dump(bin2hex(htmlentities("\xdf\xbf", ENT_QUOTES, "UTF-8")));
-var_dump(bin2hex(htmlentities("\xe0\xa0\x80", ENT_QUOTES, "UTF-8")));
-var_dump(bin2hex(htmlentities("\xe0\x9f\xbf", ENT_QUOTES, "UTF-8")));
-var_dump(bin2hex(htmlentities("\xe2\x99\xa5", ENT_QUOTES, "UTF-8")));
-var_dump(bin2hex(htmlentities("\xef\xbf\xbf", ENT_QUOTES, "UTF-8")));
-var_dump(bin2hex(htmlentities("\xf0\x8f\xbf\xbf", ENT_QUOTES, "UTF-8")));
-var_dump(bin2hex(htmlentities("\xf0\x90\x80\x80", ENT_QUOTES, "UTF-8")));
-var_dump(bin2hex(htmlentities("\xf7\xbf\xbf\xbf", ENT_QUOTES, "UTF-8")));
-var_dump(bin2hex(htmlentities("\xf8\x88\x80\x80\x80", ENT_QUOTES, "UTF-8")));
+var_dump(_bin2hex(htmlentities("\xc1\xbf", ENT_QUOTES, "UTF-8")));
+var_dump(_bin2hex(htmlentities("\xc2\x80", ENT_QUOTES, "UTF-8")));
+var_dump(_bin2hex(htmlentities("\xc2\x00", ENT_QUOTES, "UTF-8")));
+var_dump(_bin2hex(htmlentities("\xc2\xc0", ENT_QUOTES, "UTF-8")));
+var_dump(_bin2hex(htmlentities("\xce\x91", ENT_QUOTES, "UTF-8")));
+var_dump(_bin2hex(htmlentities("\xce\xb1", ENT_QUOTES, "UTF-8")));
+var_dump(_bin2hex(htmlentities("\xdf\xbf", ENT_QUOTES, "UTF-8")));
+var_dump(_bin2hex(htmlentities("\xe0\xa0\x80", ENT_QUOTES, "UTF-8")));
+var_dump(_bin2hex(htmlentities("\xe0\x9f\xbf", ENT_QUOTES, "UTF-8")));
+var_dump(_bin2hex(htmlentities("\xe0\x9f\xbf", ENT_QUOTES, "UTF-8")));
+var_dump(_bin2hex(htmlentities("\xe0\x1f\xbf", ENT_QUOTES, "UTF-8")));
+var_dump(_bin2hex(htmlentities("\xe0\x9f\x3f", ENT_QUOTES, "UTF-8")));
+var_dump(_bin2hex(htmlentities("\xe0\x1f\x3f", ENT_QUOTES, "UTF-8")));
+var_dump(_bin2hex(htmlentities("\xe2\x99\xa5", ENT_QUOTES, "UTF-8")));
+var_dump(_bin2hex(htmlentities("\xef\xbf\xbf", ENT_QUOTES, "UTF-8")));
+var_dump(_bin2hex(htmlentities("\xef\xff\xbf", ENT_QUOTES, "UTF-8")));
+var_dump(_bin2hex(htmlentities("\xef\xbf\xff", ENT_QUOTES, "UTF-8")));
+var_dump(_bin2hex(htmlentities("\xf0\x8f\xbf\xbf", ENT_QUOTES, "UTF-8")));
+var_dump(_bin2hex(htmlentities("\xf0\x90\x80\x80", ENT_QUOTES, "UTF-8")));
+var_dump(_bin2hex(htmlentities("\xf7\xbf\xbf\xbf", ENT_QUOTES, "UTF-8")));
+var_dump(_bin2hex(htmlentities("\xf7\x3f\xbf\xbf", ENT_QUOTES, "UTF-8")));
+var_dump(_bin2hex(htmlentities("\xf7\xbf\x3f\xbf", ENT_QUOTES, "UTF-8")));
+var_dump(_bin2hex(htmlentities("\xf7\xbf\xbf\x3f", ENT_QUOTES, "UTF-8")));
+var_dump(_bin2hex(htmlentities("\xf7\xff\xbf\xbf", ENT_QUOTES, "UTF-8")));
+var_dump(_bin2hex(htmlentities("\xf7\xbf\xff\xbf", ENT_QUOTES, "UTF-8")));
+var_dump(_bin2hex(htmlentities("\xf7\xbf\xbf\xff", ENT_QUOTES, "UTF-8")));
+var_dump(_bin2hex(htmlentities("\xf8\x88\x80\x80\x80", ENT_QUOTES, "UTF-8")));

 echo "--\n";
 // UTF-8: alternative (invalid) UTF-8 sequence
@@ -115,17 +129,31 @@
 --EXPECT--
 string(0) ""
 string(4) "c280"
+string(0) ""
+string(0) ""
 string(14) "26416c7068613b"
 string(14) "26616c7068613b"
 string(4) "dfbf"
 string(6) "e0a080"
 string(0) ""
+string(0) ""
+string(0) ""
+string(0) ""
+string(0) ""
 string(16) "266865617274733b"
 string(6) "efbfbf"
 string(0) ""
+string(0) ""
+string(0) ""
 string(8) "f0908080"
 string(8) "f7bfbfbf"
 string(0) ""
+string(0) ""
+string(0) ""
+string(0) ""
+string(0) ""
+string(0) ""
+string(0) ""
 --
 string(0) ""
 string(0) ""

Modified: php/php-src/trunk/ext/standard/tests/strings/htmlentities-utf-2.phpt
===================================================================
--- php/php-src/trunk/ext/standard/tests/strings/htmlentities-utf-2.phpt	2009-10-11 23:37:52 UTC (rev 289553)
+++ php/php-src/trunk/ext/standard/tests/strings/htmlentities-utf-2.phpt	2009-10-11 23:52:33 UTC (rev 289554)
@@ -36,8 +36,8 @@
 %unicode|string%(0) ""
 %unicode|string%(2) "79"
 %unicode|string%(2) "79"
-%unicode|string%(8) "2667743b"
-%unicode|string%(8) "2667743b"
+%unicode|string%(0) ""
+%unicode|string%(0) ""
 %unicode|string%(8) "566f696c"
 %unicode|string%(8) "566f696c"
 %unicode|string%(12) "436c69636873"
@@ -52,10 +52,10 @@
 %unicode|string%(2) "79"
 %unicode|string%(8) "f7bfbfbf"
 %unicode|string%(8) "f7bfbfbf"
-%unicode|string%(10) "fbbfbfbfbf"
-%unicode|string%(10) "fbbfbfbfbf"
-%unicode|string%(12) "fdbfbfbfbfbf"
-%unicode|string%(12) "fdbfbfbfbfbf"
+%unicode|string%(0) ""
+%unicode|string%(0) ""
+%unicode|string%(0) ""
+%unicode|string%(0) ""
 %unicode|string%(4) "4142"
 %unicode|string%(4) "4142"
 %unicode|string%(4) "4242"

Modified: php/php-src/trunk/ext/standard/tests/strings/htmlentities-utf.phpt
===================================================================
--- php/php-src/trunk/ext/standard/tests/strings/htmlentities-utf.phpt	2009-10-11 23:37:52 UTC (rev 289553)
+++ php/php-src/trunk/ext/standard/tests/strings/htmlentities-utf.phpt	2009-10-11 23:52:33 UTC (rev 289554)
@@ -36,8 +36,6 @@
 %unicode|string%(0) ""
 %unicode|string%(0) ""
 %unicode|string%(0) ""
-%unicode|string%(8) "2667743b"
-%unicode|string%(8) "2667743b"
 %unicode|string%(0) ""
 %unicode|string%(0) ""
 %unicode|string%(0) ""
@@ -46,16 +44,14 @@
 %unicode|string%(0) ""
 %unicode|string%(0) ""
 %unicode|string%(0) ""
+%unicode|string%(0) ""
+%unicode|string%(0) ""
 %unicode|string%(4) "c3a9"
 %unicode|string%(16) "266561637574653b"
 %unicode|string%(0) ""
 %unicode|string%(0) ""
 %unicode|string%(8) "f7bfbfbf"
 %unicode|string%(8) "f7bfbfbf"
-%unicode|string%(10) "fbbfbfbfbf"
-%unicode|string%(10) "fbbfbfbfbf"
-%unicode|string%(12) "fdbfbfbfbfbf"
-%unicode|string%(12) "fdbfbfbfbfbf"
 %unicode|string%(0) ""
 %unicode|string%(0) ""
 %unicode|string%(0) ""
@@ -68,3 +64,7 @@
 %unicode|string%(0) ""
 %unicode|string%(0) ""
 %unicode|string%(0) ""
+%unicode|string%(0) ""
+%unicode|string%(0) ""
+%unicode|string%(0) ""
+%unicode|string%(0) ""
-- 
PHP CVS Mailing List (http://www.php.net/)
To unsubscribe, visit: http://www.php.net/unsub.php

Reply via email to