stas            Wed Oct  3 05:05:08 2007 UTC

  Modified files:              (Branch: PHP_5_3)
    /php-src/ext/standard       html.c 
  Log:
  MFB do not accept partial multibyte sequences in html* functions
  
  
http://cvs.php.net/viewvc.cgi/php-src/ext/standard/html.c?r1=1.111.2.2.2.14&r2=1.111.2.2.2.14.2.1&diff_format=u
Index: php-src/ext/standard/html.c
diff -u php-src/ext/standard/html.c:1.111.2.2.2.14 
php-src/ext/standard/html.c:1.111.2.2.2.14.2.1
--- php-src/ext/standard/html.c:1.111.2.2.2.14  Sun May 27 15:57:11 2007
+++ php-src/ext/standard/html.c Wed Oct  3 05:05:08 2007
@@ -18,7 +18,7 @@
    +----------------------------------------------------------------------+
 */
 
-/* $Id: html.c,v 1.111.2.2.2.14 2007/05/27 15:57:11 nlopess Exp $ */
+/* $Id: html.c,v 1.111.2.2.2.14.2.1 2007/10/03 05:05:08 stas Exp $ */
 
 /*
  * HTML entity resources:
@@ -484,18 +484,29 @@
                        }                        \
                        mbseq[mbpos++] = (mbchar); }
 
+#define CHECK_LEN(pos, chars_need)                     \
+       if((str_len - (pos)) < chars_need) {    \
+               *status = FAILURE;                                      \
+               return 0;                                                       
\
+       }
+
 /* {{{ get_next_char
  */
 inline static unsigned short get_next_char(enum entity_charset charset,
                unsigned char * str,
+               int str_len,
                int * newpos,
                unsigned char * mbseq,
-               int * mbseqlen)
+               int * mbseqlen, 
+               int *status)
 {
        int pos = *newpos;
        int mbpos = 0;
        int mbspace = *mbseqlen;
        unsigned short this_char = str[pos++];
+       unsigned char next_char;
+
+       *status = SUCCESS;
        
        if (mbspace <= 0) {
                *mbseqlen = 0;
@@ -555,6 +566,7 @@
                                                                break;
                                                        default:
                                                                /* invalid */
+                                                               *status = 
FAILURE;
                                                                more = 0;
                                                }
                                        }
@@ -562,21 +574,27 @@
                                        else if (this_char < 0xe0) {
                                                stat = 0x10;    /* 2 byte */
                                                utf = (this_char & 0x1f) << 6;
+                                               CHECK_LEN(pos, 1);
                                        } else if (this_char < 0xf0) {
                                                stat = 0x20;    /* 3 byte */
                                                utf = (this_char & 0xf) << 12;
+                                               CHECK_LEN(pos, 2);
                                        } else if (this_char < 0xf8) {
                                                stat = 0x30;    /* 4 byte */
                                                utf = (this_char & 0x7) << 18;
+                                               CHECK_LEN(pos, 3);
                                        } else if (this_char < 0xfc) {
                                                stat = 0x40;    /* 5 byte */
                                                utf = (this_char & 0x3) << 24;
+                                               CHECK_LEN(pos, 4);
                                        } else if (this_char < 0xfe) {
                                                stat = 0x50;    /* 6 byte */
                                                utf = (this_char & 0x1) << 30;
+                                               CHECK_LEN(pos, 5);
                                        } else {
                                                /* invalid; bail */
                                                more = 0;
+                                               *status = FAILURE;
                                                break;
                                        }
 
@@ -594,7 +612,8 @@
                                /* check if this is the first of a 2-byte 
sequence */
                                if (this_char >= 0xa1 && this_char <= 0xfe) {
                                        /* peek at the next char */
-                                       unsigned char next_char = str[pos];
+                                       CHECK_LEN(pos, 1);
+                                       next_char = str[pos];
                                        if ((next_char >= 0x40 && next_char <= 
0x7e) ||
                                                        (next_char >= 0xa1 && 
next_char <= 0xfe)) {
                                                /* yes, this a wide char */
@@ -614,7 +633,8 @@
                                         (this_char >= 0xe0 && this_char <= 
0xef)
                                        ) {
                                        /* peek at the next char */
-                                       unsigned char next_char = str[pos];
+                                       CHECK_LEN(pos, 1);
+                                       next_char = str[pos];
                                        if ((next_char >= 0x40 && next_char <= 
0x7e) ||
                                                (next_char >= 0x80 && next_char 
<= 0xfc))
                                        {
@@ -633,7 +653,8 @@
                                /* check if this is the first of a multi-byte 
sequence */
                                if (this_char >= 0xa1 && this_char <= 0xfe) {
                                        /* peek at the next char */
-                                       unsigned char next_char = str[pos];
+                                       CHECK_LEN(pos, 1);
+                                       next_char = str[pos];
                                        if (next_char >= 0xa1 && next_char <= 
0xfe) {
                                                /* yes, this a jis kanji char */
                                                this_char <<= 8;
@@ -644,7 +665,8 @@
                                        
                                } else if (this_char == 0x8e) {
                                        /* peek at the next char */
-                                       unsigned char next_char = str[pos];
+                                       CHECK_LEN(pos, 1);
+                                       next_char = str[pos];
                                        if (next_char >= 0xa1 && next_char <= 
0xdf) {
                                                /* JIS X 0201 kana */
                                                this_char <<= 8;
@@ -655,8 +677,10 @@
                                        
                                } else if (this_char == 0x8f) {
                                        /* peek at the next two char */
-                                       unsigned char next_char = str[pos];
-                                       unsigned char next2_char = str[pos+1];
+                                       unsigned char next2_char;
+                                       CHECK_LEN(pos, 2);
+                                       next_char = str[pos];
+                                       next2_char = str[pos+1];
                                        if ((next_char >= 0xa1 && next_char <= 
0xfe) &&
                                                (next2_char >= 0xa1 && 
next2_char <= 0xfe)) {
                                                /* JIS X 0212 hojo-kanji */
@@ -1098,13 +1122,23 @@
                maxlen = 128;
        replaced = emalloc (maxlen);
        len = 0;
-
+       //Sleep(10*1000);
        i = 0;
        while (i < oldlen) {
                unsigned char mbsequence[16];   /* allow up to 15 characters in 
a multibyte sequence */
                int mbseqlen = sizeof(mbsequence);
-               unsigned short this_char = get_next_char(charset, old, &i, 
mbsequence, &mbseqlen);
+               int status = SUCCESS;
+               unsigned short this_char = get_next_char(charset, old, oldlen, 
&i, mbsequence, &mbseqlen, &status);
 
+               if(status == FAILURE) {
+                       /* invalid MB sequence */
+                       efree(replaced);
+                       if(!PG(display_errors)) {
+                               php_error_docref(NULL TSRMLS_CC, E_WARNING, 
"Invalid multibyte sequence in argument");
+                       }
+                       *newlen = 0;
+                       return STR_EMPTY_ALLOC();
+               }
                matches_map = 0;
 
                if (len + 16 > maxlen)

-- 
PHP CVS Mailing List (http://www.php.net/)
To unsubscribe, visit: http://www.php.net/unsub.php

Reply via email to