stas Wed Oct 3 04:53:06 2007 UTC
Modified files: (Branch: PHP_5_2)
/php-src/ext/standard html.c
Log:
do not accept partial multibyte sequences in html* functions
http://cvs.php.net/viewvc.cgi/php-src/ext/standard/html.c?r1=1.111.2.2.2.14&r2=1.111.2.2.2.15&diff_format=u
Index: php-src/ext/standard/html.c
diff -u php-src/ext/standard/html.c:1.111.2.2.2.14
php-src/ext/standard/html.c:1.111.2.2.2.15
--- php-src/ext/standard/html.c:1.111.2.2.2.14 Sun May 27 15:57:11 2007
+++ php-src/ext/standard/html.c Wed Oct 3 04:53:05 2007
@@ -18,7 +18,7 @@
+----------------------------------------------------------------------+
*/
-/* $Id: html.c,v 1.111.2.2.2.14 2007/05/27 15:57:11 nlopess Exp $ */
+/* $Id: html.c,v 1.111.2.2.2.15 2007/10/03 04:53:05 stas Exp $ */
/*
* HTML entity resources:
@@ -484,18 +484,29 @@
} \
mbseq[mbpos++] = (mbchar); }
+#define CHECK_LEN(pos, chars_need) \
+ if((str_len - (pos)) < chars_need) { \
+ *status = FAILURE; \
+ return 0;
\
+ }
+
/* {{{ get_next_char
*/
inline static unsigned short get_next_char(enum entity_charset charset,
unsigned char * str,
+ int str_len,
int * newpos,
unsigned char * mbseq,
- int * mbseqlen)
+ int * mbseqlen,
+ int *status)
{
int pos = *newpos;
int mbpos = 0;
int mbspace = *mbseqlen;
unsigned short this_char = str[pos++];
+ unsigned char next_char;
+
+ *status = SUCCESS;
if (mbspace <= 0) {
*mbseqlen = 0;
@@ -517,6 +528,10 @@
do {
if (this_char < 0x80) {
more = 0;
+ if(stat) {
+ /* we didn't finish the
UTF sequence correctly */
+ *status = FAILURE;
+ }
break;
} else if (this_char < 0xc0) {
switch (stat) {
@@ -555,6 +570,7 @@
break;
default:
/* invalid */
+ *status =
FAILURE;
more = 0;
}
}
@@ -562,21 +578,27 @@
else if (this_char < 0xe0) {
stat = 0x10; /* 2 byte */
utf = (this_char & 0x1f) << 6;
+ CHECK_LEN(pos, 1);
} else if (this_char < 0xf0) {
stat = 0x20; /* 3 byte */
utf = (this_char & 0xf) << 12;
+ CHECK_LEN(pos, 2);
} else if (this_char < 0xf8) {
stat = 0x30; /* 4 byte */
utf = (this_char & 0x7) << 18;
+ CHECK_LEN(pos, 3);
} else if (this_char < 0xfc) {
stat = 0x40; /* 5 byte */
utf = (this_char & 0x3) << 24;
+ CHECK_LEN(pos, 4);
} else if (this_char < 0xfe) {
stat = 0x50; /* 6 byte */
utf = (this_char & 0x1) << 30;
+ CHECK_LEN(pos, 5);
} else {
/* invalid; bail */
more = 0;
+ *status = FAILURE;
break;
}
@@ -594,7 +616,8 @@
/* check if this is the first of a 2-byte
sequence */
if (this_char >= 0xa1 && this_char <= 0xfe) {
/* peek at the next char */
- unsigned char next_char = str[pos];
+ CHECK_LEN(pos, 1);
+ next_char = str[pos];
if ((next_char >= 0x40 && next_char <=
0x7e) ||
(next_char >= 0xa1 &&
next_char <= 0xfe)) {
/* yes, this a wide char */
@@ -614,7 +637,8 @@
(this_char >= 0xe0 && this_char <=
0xef)
) {
/* peek at the next char */
- unsigned char next_char = str[pos];
+ CHECK_LEN(pos, 1);
+ next_char = str[pos];
if ((next_char >= 0x40 && next_char <=
0x7e) ||
(next_char >= 0x80 && next_char
<= 0xfc))
{
@@ -633,7 +657,8 @@
/* check if this is the first of a multi-byte
sequence */
if (this_char >= 0xa1 && this_char <= 0xfe) {
/* peek at the next char */
- unsigned char next_char = str[pos];
+ CHECK_LEN(pos, 1);
+ next_char = str[pos];
if (next_char >= 0xa1 && next_char <=
0xfe) {
/* yes, this a jis kanji char */
this_char <<= 8;
@@ -644,7 +669,8 @@
} else if (this_char == 0x8e) {
/* peek at the next char */
- unsigned char next_char = str[pos];
+ CHECK_LEN(pos, 1);
+ next_char = str[pos];
if (next_char >= 0xa1 && next_char <=
0xdf) {
/* JIS X 0201 kana */
this_char <<= 8;
@@ -655,8 +681,10 @@
} else if (this_char == 0x8f) {
/* peek at the next two char */
- unsigned char next_char = str[pos];
- unsigned char next2_char = str[pos+1];
+ unsigned char next2_char;
+ CHECK_LEN(pos, 2);
+ next_char = str[pos];
+ next2_char = str[pos+1];
if ((next_char >= 0xa1 && next_char <=
0xfe) &&
(next2_char >= 0xa1 &&
next2_char <= 0xfe)) {
/* JIS X 0212 hojo-kanji */
@@ -1098,13 +1126,22 @@
maxlen = 128;
replaced = emalloc (maxlen);
len = 0;
-
i = 0;
while (i < oldlen) {
unsigned char mbsequence[16]; /* allow up to 15 characters in
a multibyte sequence */
int mbseqlen = sizeof(mbsequence);
- unsigned short this_char = get_next_char(charset, old, &i,
mbsequence, &mbseqlen);
+ int status = SUCCESS;
+ unsigned short this_char = get_next_char(charset, old, oldlen,
&i, mbsequence, &mbseqlen, &status);
+ if(status == FAILURE) {
+ /* invalid MB sequence */
+ efree(replaced);
+ if(!PG(display_errors)) {
+ php_error_docref(NULL TSRMLS_CC, E_WARNING,
"Invalid multibyte sequence in argument");
+ }
+ *newlen = 0;
+ return STR_EMPTY_ALLOC();
+ }
matches_map = 0;
if (len + 16 > maxlen)-- PHP CVS Mailing List (http://www.php.net/) To unsubscribe, visit: http://www.php.net/unsub.php
