andrei          Fri Dec  1 19:25:11 2006 UTC

  Modified files:              
    /php-src/ext/standard       var.c var_unserializer.c var_unserializer.re 
  Log:
  Finalize Unicode support in serialize() and unserialize().
  
  
http://cvs.php.net/viewvc.cgi/php-src/ext/standard/var.c?r1=1.247&r2=1.248&diff_format=u
Index: php-src/ext/standard/var.c
diff -u php-src/ext/standard/var.c:1.247 php-src/ext/standard/var.c:1.248
--- php-src/ext/standard/var.c:1.247    Mon Oct  9 18:09:42 2006
+++ php-src/ext/standard/var.c  Fri Dec  1 19:25:10 2006
@@ -18,7 +18,7 @@
    +----------------------------------------------------------------------+
 */
 
-/* $Id: var.c,v 1.247 2006/10/09 18:09:42 iliaa Exp $ */
+/* $Id: var.c,v 1.248 2006/12/01 19:25:10 andrei Exp $ */
 
 
 
@@ -671,25 +671,40 @@
 
 static inline void php_var_serialize_string(smart_str *buf, char *str, int len)
 {
+       static const char hex[] = "0123456789abcdef";
+       unsigned char c;
+       int i;
+
        smart_str_appendl(buf, "s:", 2);
        smart_str_append_long(buf, len);
        smart_str_appendl(buf, ":\"", 2);
-       smart_str_appendl(buf, str, len);
+
+       for (i = 0; i < len; i++) {
+               c = (unsigned char) str[i];
+               if (c < 128 && c != 0x5c /*'\\'*/) {
+                       smart_str_appendc(buf, c & 0xff);
+               } else {
+                       smart_str_appendc(buf, 0x5c /*'\\'*/);
+                       smart_str_appendc(buf, hex[(c >> 4) & 0xf]);
+                       smart_str_appendc(buf, hex[(c >> 0) & 0xf]);
+               }
+       }
+
        smart_str_appendl(buf, "\";", 2);
 }
 
 static inline void php_var_serialize_ustr(smart_str *buf, UChar *ustr, int len)
 {
        static const char hex[] = "0123456789abcdef";
-       UChar32 c;
-       int32_t i;
+       UChar c;
+       int i;
 
-       for(i=0; i<len; /* U16_NEXT post-increments */) {
-               U16_NEXT(ustr, i, len, c);
-               if (c < 128 && c != '\\') {
+       for (i = 0; i < len; i++) {
+               c = ustr[i];
+               if (c < 128 && c != 0x5c /*'\\'*/) {
                        smart_str_appendc(buf, c & 0xff);
                } else {
-                       smart_str_appendc(buf, '\\');
+                       smart_str_appendc(buf, 0x5c /*'\\'*/);
                        smart_str_appendc(buf, hex[(c >> 12) & 0xf]);
                        smart_str_appendc(buf, hex[(c >> 8) & 0xf]);
                        smart_str_appendc(buf, hex[(c >> 4) & 0xf]);
@@ -1062,7 +1077,7 @@
        
 /* }}} */
 
-/* {{{ proto string serialize(mixed variable)
+/* {{{ proto string serialize(mixed variable) U
    Returns a string representation of variable (which can later be 
unserialized) */
 PHP_FUNCTION(serialize)
 {
@@ -1093,22 +1108,20 @@
 }
 
 /* }}} */
-/* {{{ proto mixed unserialize(string variable_representation)
+/* {{{ proto mixed unserialize(string variable_representation) U
    Takes a string representation of variable and recreates it */
 
 
 PHP_FUNCTION(unserialize)
 {
-       zstr buf;
-       char *str = NULL;
+       char *buf = NULL;
        int buf_len;
-       zend_uchar buf_type;
        const unsigned char *p;
 
        php_unserialize_data_t var_hash;
        
-       if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "T",
-                                                         &buf, &buf_len, 
&buf_type) == FAILURE) {
+       if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s&",
+                                                         &buf, &buf_len, 
UG(ascii_conv)) == FAILURE) {
                RETURN_FALSE;
        }
 
@@ -1116,34 +1129,15 @@
                RETURN_FALSE;
        }
 
-       if (buf_type == IS_UNICODE) {
-               /* ASCII unicode string to binary string conversion */
-               int i;
-
-               str = emalloc(buf_len+1);
-               for (i = 0; i < buf_len; i++) {
-                       if (buf.u[i] > 128) {
-                               php_error_docref(NULL TSRMLS_CC, E_NOTICE, 
"Error at offset %d of %d bytes", i, buf_len);
-                               STR_FREE(str);
-                               RETURN_FALSE;
-                       }
-                       str[i] = buf.u[i];
-               }
-               str[i] = '\0';
-               buf.s = str;
-       }
-       
-       p = (const unsigned char*)buf.s;
+       p = (const unsigned char*) buf;
        PHP_VAR_UNSERIALIZE_INIT(var_hash);
        if (!php_var_unserialize(&return_value, &p, p + buf_len,  &var_hash 
TSRMLS_CC)) {
                PHP_VAR_UNSERIALIZE_DESTROY(var_hash);
                zval_dtor(return_value);
-               php_error_docref(NULL TSRMLS_CC, E_NOTICE, "Error at offset %ld 
of %d bytes", (long)((char*)p - buf.s), buf_len);
-               STR_FREE(str);
+               php_error_docref(NULL TSRMLS_CC, E_NOTICE, "Error at offset %ld 
of %d bytes", (long)((char*)p - buf), buf_len);
                RETURN_FALSE;
        }
        PHP_VAR_UNSERIALIZE_DESTROY(var_hash);
-       STR_FREE(str);
 }
 
 /* }}} */
http://cvs.php.net/viewvc.cgi/php-src/ext/standard/var_unserializer.c?r1=1.80&r2=1.81&diff_format=u
Index: php-src/ext/standard/var_unserializer.c
diff -u php-src/ext/standard/var_unserializer.c:1.80 
php-src/ext/standard/var_unserializer.c:1.81
--- php-src/ext/standard/var_unserializer.c:1.80        Mon Mar 27 14:19:18 2006
+++ php-src/ext/standard/var_unserializer.c     Fri Dec  1 19:25:11 2006
@@ -1,4 +1,4 @@
-/* Generated by re2c 0.9.10 on Mon Mar 27 17:59:43 2006 */
+/* Generated by re2c 0.9.12 on Fri Dec  1 11:18:14 2006 */
 #line 1 "ext/standard/var_unserializer.re"
 /*
   +----------------------------------------------------------------------+
@@ -18,7 +18,7 @@
   +----------------------------------------------------------------------+
 */
 
-/* $Id: var_unserializer.c,v 1.80 2006/03/27 14:19:18 dmitry Exp $ */
+/* $Id: var_unserializer.c,v 1.81 2006/12/01 19:25:11 andrei Exp $ */
 
 #include "php.h"
 #include "ext/standard/php_var.h"
@@ -112,6 +112,38 @@
        return ustr;
 }
 
+static char *unserialize_str(const unsigned char **p, int len)
+{
+       int i, j;
+       char *str = emalloc(len+1);
+
+       for (i = 0; i < len; i++) {
+               if (**p != '\\') {
+                       str[i] = (char)**p;
+               } else {
+                       unsigned char ch = 0;
+
+                       for (j = 0; j < 2; j++) {
+                               (*p)++;
+                               if (**p >= '0' && **p <= '9') {
+                                       ch = (ch << 4) + (**p -'0');
+                               } else if (**p >= 'a' && **p <= 'f') {
+                                       ch = (ch << 4) + (**p -'a'+10);
+                               } else if (**p >= 'A' && **p <= 'F') {
+                                       ch = (ch << 4) + (**p -'A'+10);
+                               } else {
+                                       efree(str);
+                                       return NULL;
+                               }
+                       }
+                       str[i] = (char)ch;
+               }
+               (*p)++;
+       }
+       str[i] = 0;
+       return str;
+}
+
 PHPAPI void var_replace(php_unserialize_data_t *var_hashx, zval *ozval, zval 
**nzval)
 {
        long i;
@@ -179,7 +211,7 @@
 #define YYMARKER marker
 
 
-#line 187 "ext/standard/var_unserializer.re"
+#line 219 "ext/standard/var_unserializer.re"
 
 
 
@@ -426,10 +458,10 @@
          0,   0,   0,   0,   0,   0,   0,   0, 
        };
 
-#line 430 "ext/standard/var_unserializer.c"
+#line 462 "ext/standard/var_unserializer.c"
 {
        YYCTYPE yych;
-       unsigned int yyaccept;
+       unsigned int yyaccept = 0;
        goto yy0;
        ++YYCURSOR;
 yy0:
@@ -455,9 +487,9 @@
        if(yych == ':') goto yy95;
        goto yy3;
 yy3:
-#line 693 "ext/standard/var_unserializer.re"
+#line 725 "ext/standard/var_unserializer.re"
 { return 0; }
-#line 461 "ext/standard/var_unserializer.c"
+#line 493 "ext/standard/var_unserializer.c"
 yy4:   yyaccept = 0;
        yych = *(YYMARKER = ++YYCURSOR);
        if(yych == ':') goto yy89;
@@ -500,13 +532,13 @@
 yy14:  ++YYCURSOR;
        goto yy15;
 yy15:
-#line 687 "ext/standard/var_unserializer.re"
+#line 719 "ext/standard/var_unserializer.re"
 {
        /* this is the case where we have less data than planned */
        php_error_docref(NULL TSRMLS_CC, E_NOTICE, "Unexpected end of 
serialized data");
        return 0; /* not sure if it should be 0 or 1 here? */
 }
-#line 510 "ext/standard/var_unserializer.c"
+#line 542 "ext/standard/var_unserializer.c"
 yy16:  yych = *++YYCURSOR;
        goto yy3;
 yy17:  yych = *++YYCURSOR;
@@ -539,7 +571,7 @@
 yy23:  ++YYCURSOR;
        goto yy24;
 yy24:
-#line 574 "ext/standard/var_unserializer.re"
+#line 606 "ext/standard/var_unserializer.re"
 {
        size_t len, len2, len3, maxlen;
        long elements;
@@ -652,7 +684,7 @@
 
        return object_common2(UNSERIALIZE_PASSTHRU, elements);
 }
-#line 656 "ext/standard/var_unserializer.c"
+#line 688 "ext/standard/var_unserializer.c"
 yy25:  yych = *++YYCURSOR;
        if(yych <= ','){
                if(yych != '+') goto yy18;
@@ -681,7 +713,7 @@
 yy30:  ++YYCURSOR;
        goto yy31;
 yy31:
-#line 566 "ext/standard/var_unserializer.re"
+#line 598 "ext/standard/var_unserializer.re"
 {
 
        INIT_PZVAL(*rval);
@@ -689,7 +721,7 @@
        return object_common2(UNSERIALIZE_PASSTHRU,
                        object_common1(UNSERIALIZE_PASSTHRU, 
ZEND_STANDARD_CLASS_DEF_PTR));
 }
-#line 693 "ext/standard/var_unserializer.c"
+#line 725 "ext/standard/var_unserializer.c"
 yy32:  yych = *++YYCURSOR;
        if(yych == '+') goto yy33;
        if(yych <= '/') goto yy18;
@@ -713,7 +745,7 @@
 yy37:  ++YYCURSOR;
        goto yy38;
 yy38:
-#line 544 "ext/standard/var_unserializer.re"
+#line 576 "ext/standard/var_unserializer.re"
 {
        long elements = parse_iv(start + 2);
        /* use iv() not uiv() in order to check data range */
@@ -735,7 +767,7 @@
 
        return finish_nested_data(UNSERIALIZE_PASSTHRU);
 }
-#line 739 "ext/standard/var_unserializer.c"
+#line 771 "ext/standard/var_unserializer.c"
 yy39:  yych = *++YYCURSOR;
        if(yych == '+') goto yy40;
        if(yych <= '/') goto yy18;
@@ -759,7 +791,7 @@
 yy44:  ++YYCURSOR;
        goto yy45;
 yy45:
-#line 514 "ext/standard/var_unserializer.re"
+#line 547 "ext/standard/var_unserializer.re"
 {
        size_t len, maxlen;
        UChar *ustr;
@@ -772,7 +804,6 @@
        }
 
        if ((ustr = unserialize_ustr(&YYCURSOR, len)) == NULL) {
-               efree(ustr);
                return 0;
        }
 
@@ -789,7 +820,7 @@
        ZVAL_UNICODEL(*rval, ustr, len, 0);
        return 1;
 }
-#line 793 "ext/standard/var_unserializer.c"
+#line 824 "ext/standard/var_unserializer.c"
 yy46:  yych = *++YYCURSOR;
        if(yych == '+') goto yy47;
        if(yych <= '/') goto yy18;
@@ -813,7 +844,7 @@
 yy51:  ++YYCURSOR;
        goto yy52;
 yy52:
-#line 486 "ext/standard/var_unserializer.re"
+#line 518 "ext/standard/var_unserializer.re"
 {
        size_t len, maxlen;
        char *str;
@@ -825,11 +856,12 @@
                return 0;
        }
 
-       str = (char*)YYCURSOR;
-
-       YYCURSOR += len;
+       if ((str = unserialize_str(&YYCURSOR, len)) == NULL) {
+               return 0;
+       }
 
        if (*(YYCURSOR) != '"') {
+               efree(str);
                *p = YYCURSOR;
                return 0;
        }
@@ -838,10 +870,10 @@
        *p = YYCURSOR;
 
        INIT_PZVAL(*rval);
-       ZVAL_RT_STRINGL(*rval, str, len, 1);
+       ZVAL_STRINGL(*rval, str, len, 0);
        return 1;
 }
-#line 845 "ext/standard/var_unserializer.c"
+#line 877 "ext/standard/var_unserializer.c"
 yy53:  yych = *++YYCURSOR;
        if(yych <= '/'){
                if(yych <= ','){
@@ -930,14 +962,14 @@
 yy63:  ++YYCURSOR;
        goto yy64;
 yy64:
-#line 479 "ext/standard/var_unserializer.re"
+#line 511 "ext/standard/var_unserializer.re"
 {
        *p = YYCURSOR;
        INIT_PZVAL(*rval);
        ZVAL_DOUBLE(*rval, zend_strtod((const char *)start + 2, NULL));
        return 1;
 }
-#line 941 "ext/standard/var_unserializer.c"
+#line 973 "ext/standard/var_unserializer.c"
 yy65:  yych = *++YYCURSOR;
        if(yych <= ','){
                if(yych != '+') goto yy18;
@@ -997,7 +1029,7 @@
 yy74:  ++YYCURSOR;
        goto yy75;
 yy75:
-#line 464 "ext/standard/var_unserializer.re"
+#line 496 "ext/standard/var_unserializer.re"
 {
        *p = YYCURSOR;
        INIT_PZVAL(*rval);
@@ -1012,7 +1044,7 @@
 
        return 1;
 }
-#line 1016 "ext/standard/var_unserializer.c"
+#line 1048 "ext/standard/var_unserializer.c"
 yy76:  yych = *++YYCURSOR;
        if(yych == 'N') goto yy73;
        goto yy18;
@@ -1041,14 +1073,14 @@
 yy81:  ++YYCURSOR;
        goto yy82;
 yy82:
-#line 457 "ext/standard/var_unserializer.re"
+#line 489 "ext/standard/var_unserializer.re"
 {
        *p = YYCURSOR;
        INIT_PZVAL(*rval);
        ZVAL_LONG(*rval, parse_iv(start + 2));
        return 1;
 }
-#line 1052 "ext/standard/var_unserializer.c"
+#line 1084 "ext/standard/var_unserializer.c"
 yy83:  yych = *++YYCURSOR;
        if(yych <= '/') goto yy18;
        if(yych >= '2') goto yy18;
@@ -1059,25 +1091,25 @@
 yy85:  ++YYCURSOR;
        goto yy86;
 yy86:
-#line 450 "ext/standard/var_unserializer.re"
+#line 482 "ext/standard/var_unserializer.re"
 {
        *p = YYCURSOR;
        INIT_PZVAL(*rval);
        ZVAL_BOOL(*rval, parse_iv(start + 2));
        return 1;
 }
-#line 1070 "ext/standard/var_unserializer.c"
+#line 1102 "ext/standard/var_unserializer.c"
 yy87:  ++YYCURSOR;
        goto yy88;
 yy88:
-#line 443 "ext/standard/var_unserializer.re"
+#line 475 "ext/standard/var_unserializer.re"
 {
        *p = YYCURSOR;
        INIT_PZVAL(*rval);
        ZVAL_NULL(*rval);
        return 1;
 }
-#line 1081 "ext/standard/var_unserializer.c"
+#line 1113 "ext/standard/var_unserializer.c"
 yy89:  yych = *++YYCURSOR;
        if(yych <= ','){
                if(yych != '+') goto yy18;
@@ -1103,7 +1135,7 @@
 yy93:  ++YYCURSOR;
        goto yy94;
 yy94:
-#line 420 "ext/standard/var_unserializer.re"
+#line 452 "ext/standard/var_unserializer.re"
 {
        long id;
 
@@ -1126,7 +1158,7 @@
        
        return 1;
 }
-#line 1130 "ext/standard/var_unserializer.c"
+#line 1162 "ext/standard/var_unserializer.c"
 yy95:  yych = *++YYCURSOR;
        if(yych <= ','){
                if(yych != '+') goto yy18;
@@ -1152,7 +1184,7 @@
 yy99:  ++YYCURSOR;
        goto yy100;
 yy100:
-#line 399 "ext/standard/var_unserializer.re"
+#line 431 "ext/standard/var_unserializer.re"
 {
        long id;
 
@@ -1173,10 +1205,10 @@
        
        return 1;
 }
-#line 1177 "ext/standard/var_unserializer.c"
+#line 1209 "ext/standard/var_unserializer.c"
 }
 }
-#line 695 "ext/standard/var_unserializer.re"
+#line 727 "ext/standard/var_unserializer.re"
 
 
        return 0;
http://cvs.php.net/viewvc.cgi/php-src/ext/standard/var_unserializer.re?r1=1.59&r2=1.60&diff_format=u
Index: php-src/ext/standard/var_unserializer.re
diff -u php-src/ext/standard/var_unserializer.re:1.59 
php-src/ext/standard/var_unserializer.re:1.60
--- php-src/ext/standard/var_unserializer.re:1.59       Mon Mar 27 14:19:18 2006
+++ php-src/ext/standard/var_unserializer.re    Fri Dec  1 19:25:11 2006
@@ -16,7 +16,7 @@
   +----------------------------------------------------------------------+
 */
 
-/* $Id: var_unserializer.re,v 1.59 2006/03/27 14:19:18 dmitry Exp $ */
+/* $Id: var_unserializer.re,v 1.60 2006/12/01 19:25:11 andrei Exp $ */
 
 #include "php.h"
 #include "ext/standard/php_var.h"
@@ -110,6 +110,38 @@
        return ustr;
 }
 
+static char *unserialize_str(const unsigned char **p, int len)
+{
+       int i, j;
+       char *str = emalloc(len+1);
+
+       for (i = 0; i < len; i++) {
+               if (**p != '\\') {
+                       str[i] = (char)**p;
+               } else {
+                       unsigned char ch = 0;
+
+                       for (j = 0; j < 2; j++) {
+                               (*p)++;
+                               if (**p >= '0' && **p <= '9') {
+                                       ch = (ch << 4) + (**p -'0');
+                               } else if (**p >= 'a' && **p <= 'f') {
+                                       ch = (ch << 4) + (**p -'a'+10);
+                               } else if (**p >= 'A' && **p <= 'F') {
+                                       ch = (ch << 4) + (**p -'A'+10);
+                               } else {
+                                       efree(str);
+                                       return NULL;
+                               }
+                       }
+                       str[i] = (char)ch;
+               }
+               (*p)++;
+       }
+       str[i] = 0;
+       return str;
+}
+
 PHPAPI void var_replace(php_unserialize_data_t *var_hashx, zval *ozval, zval 
**nzval)
 {
        long i;
@@ -494,11 +526,12 @@
                return 0;
        }
 
-       str = (char*)YYCURSOR;
-
-       YYCURSOR += len;
+       if ((str = unserialize_str(&YYCURSOR, len)) == NULL) {
+               return 0;
+       }
 
        if (*(YYCURSOR) != '"') {
+               efree(str);
                *p = YYCURSOR;
                return 0;
        }
@@ -507,7 +540,7 @@
        *p = YYCURSOR;
 
        INIT_PZVAL(*rval);
-       ZVAL_RT_STRINGL(*rval, str, len, 1);
+       ZVAL_STRINGL(*rval, str, len, 0);
        return 1;
 }
 
@@ -523,7 +556,6 @@
        }
 
        if ((ustr = unserialize_ustr(&YYCURSOR, len)) == NULL) {
-               efree(ustr);
                return 0;
        }
 

-- 
PHP CVS Mailing List (http://www.php.net/)
To unsubscribe, visit: http://www.php.net/unsub.php

Reply via email to