andrei Fri Dec 1 19:25:11 2006 UTC Modified files: /php-src/ext/standard var.c var_unserializer.c var_unserializer.re Log: Finalize Unicode support in serialize() and unserialize().
http://cvs.php.net/viewvc.cgi/php-src/ext/standard/var.c?r1=1.247&r2=1.248&diff_format=u Index: php-src/ext/standard/var.c diff -u php-src/ext/standard/var.c:1.247 php-src/ext/standard/var.c:1.248 --- php-src/ext/standard/var.c:1.247 Mon Oct 9 18:09:42 2006 +++ php-src/ext/standard/var.c Fri Dec 1 19:25:10 2006 @@ -18,7 +18,7 @@ +----------------------------------------------------------------------+ */ -/* $Id: var.c,v 1.247 2006/10/09 18:09:42 iliaa Exp $ */ +/* $Id: var.c,v 1.248 2006/12/01 19:25:10 andrei Exp $ */ @@ -671,25 +671,40 @@ static inline void php_var_serialize_string(smart_str *buf, char *str, int len) { + static const char hex[] = "0123456789abcdef"; + unsigned char c; + int i; + smart_str_appendl(buf, "s:", 2); smart_str_append_long(buf, len); smart_str_appendl(buf, ":\"", 2); - smart_str_appendl(buf, str, len); + + for (i = 0; i < len; i++) { + c = (unsigned char) str[i]; + if (c < 128 && c != 0x5c /*'\\'*/) { + smart_str_appendc(buf, c & 0xff); + } else { + smart_str_appendc(buf, 0x5c /*'\\'*/); + smart_str_appendc(buf, hex[(c >> 4) & 0xf]); + smart_str_appendc(buf, hex[(c >> 0) & 0xf]); + } + } + smart_str_appendl(buf, "\";", 2); } static inline void php_var_serialize_ustr(smart_str *buf, UChar *ustr, int len) { static const char hex[] = "0123456789abcdef"; - UChar32 c; - int32_t i; + UChar c; + int i; - for(i=0; i<len; /* U16_NEXT post-increments */) { - U16_NEXT(ustr, i, len, c); - if (c < 128 && c != '\\') { + for (i = 0; i < len; i++) { + c = ustr[i]; + if (c < 128 && c != 0x5c /*'\\'*/) { smart_str_appendc(buf, c & 0xff); } else { - smart_str_appendc(buf, '\\'); + smart_str_appendc(buf, 0x5c /*'\\'*/); smart_str_appendc(buf, hex[(c >> 12) & 0xf]); smart_str_appendc(buf, hex[(c >> 8) & 0xf]); smart_str_appendc(buf, hex[(c >> 4) & 0xf]); @@ -1062,7 +1077,7 @@ /* }}} */ -/* {{{ proto string serialize(mixed variable) +/* {{{ proto string serialize(mixed variable) U Returns a string representation of variable (which can later be unserialized) */ PHP_FUNCTION(serialize) { @@ -1093,22 +1108,20 @@ } /* }}} */ -/* {{{ proto mixed unserialize(string variable_representation) +/* {{{ proto mixed unserialize(string variable_representation) U Takes a string representation of variable and recreates it */ PHP_FUNCTION(unserialize) { - zstr buf; - char *str = NULL; + char *buf = NULL; int buf_len; - zend_uchar buf_type; const unsigned char *p; php_unserialize_data_t var_hash; - if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "T", - &buf, &buf_len, &buf_type) == FAILURE) { + if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s&", + &buf, &buf_len, UG(ascii_conv)) == FAILURE) { RETURN_FALSE; } @@ -1116,34 +1129,15 @@ RETURN_FALSE; } - if (buf_type == IS_UNICODE) { - /* ASCII unicode string to binary string conversion */ - int i; - - str = emalloc(buf_len+1); - for (i = 0; i < buf_len; i++) { - if (buf.u[i] > 128) { - php_error_docref(NULL TSRMLS_CC, E_NOTICE, "Error at offset %d of %d bytes", i, buf_len); - STR_FREE(str); - RETURN_FALSE; - } - str[i] = buf.u[i]; - } - str[i] = '\0'; - buf.s = str; - } - - p = (const unsigned char*)buf.s; + p = (const unsigned char*) buf; PHP_VAR_UNSERIALIZE_INIT(var_hash); if (!php_var_unserialize(&return_value, &p, p + buf_len, &var_hash TSRMLS_CC)) { PHP_VAR_UNSERIALIZE_DESTROY(var_hash); zval_dtor(return_value); - php_error_docref(NULL TSRMLS_CC, E_NOTICE, "Error at offset %ld of %d bytes", (long)((char*)p - buf.s), buf_len); - STR_FREE(str); + php_error_docref(NULL TSRMLS_CC, E_NOTICE, "Error at offset %ld of %d bytes", (long)((char*)p - buf), buf_len); RETURN_FALSE; } PHP_VAR_UNSERIALIZE_DESTROY(var_hash); - STR_FREE(str); } /* }}} */ http://cvs.php.net/viewvc.cgi/php-src/ext/standard/var_unserializer.c?r1=1.80&r2=1.81&diff_format=u Index: php-src/ext/standard/var_unserializer.c diff -u php-src/ext/standard/var_unserializer.c:1.80 php-src/ext/standard/var_unserializer.c:1.81 --- php-src/ext/standard/var_unserializer.c:1.80 Mon Mar 27 14:19:18 2006 +++ php-src/ext/standard/var_unserializer.c Fri Dec 1 19:25:11 2006 @@ -1,4 +1,4 @@ -/* Generated by re2c 0.9.10 on Mon Mar 27 17:59:43 2006 */ +/* Generated by re2c 0.9.12 on Fri Dec 1 11:18:14 2006 */ #line 1 "ext/standard/var_unserializer.re" /* +----------------------------------------------------------------------+ @@ -18,7 +18,7 @@ +----------------------------------------------------------------------+ */ -/* $Id: var_unserializer.c,v 1.80 2006/03/27 14:19:18 dmitry Exp $ */ +/* $Id: var_unserializer.c,v 1.81 2006/12/01 19:25:11 andrei Exp $ */ #include "php.h" #include "ext/standard/php_var.h" @@ -112,6 +112,38 @@ return ustr; } +static char *unserialize_str(const unsigned char **p, int len) +{ + int i, j; + char *str = emalloc(len+1); + + for (i = 0; i < len; i++) { + if (**p != '\\') { + str[i] = (char)**p; + } else { + unsigned char ch = 0; + + for (j = 0; j < 2; j++) { + (*p)++; + if (**p >= '0' && **p <= '9') { + ch = (ch << 4) + (**p -'0'); + } else if (**p >= 'a' && **p <= 'f') { + ch = (ch << 4) + (**p -'a'+10); + } else if (**p >= 'A' && **p <= 'F') { + ch = (ch << 4) + (**p -'A'+10); + } else { + efree(str); + return NULL; + } + } + str[i] = (char)ch; + } + (*p)++; + } + str[i] = 0; + return str; +} + PHPAPI void var_replace(php_unserialize_data_t *var_hashx, zval *ozval, zval **nzval) { long i; @@ -179,7 +211,7 @@ #define YYMARKER marker -#line 187 "ext/standard/var_unserializer.re" +#line 219 "ext/standard/var_unserializer.re" @@ -426,10 +458,10 @@ 0, 0, 0, 0, 0, 0, 0, 0, }; -#line 430 "ext/standard/var_unserializer.c" +#line 462 "ext/standard/var_unserializer.c" { YYCTYPE yych; - unsigned int yyaccept; + unsigned int yyaccept = 0; goto yy0; ++YYCURSOR; yy0: @@ -455,9 +487,9 @@ if(yych == ':') goto yy95; goto yy3; yy3: -#line 693 "ext/standard/var_unserializer.re" +#line 725 "ext/standard/var_unserializer.re" { return 0; } -#line 461 "ext/standard/var_unserializer.c" +#line 493 "ext/standard/var_unserializer.c" yy4: yyaccept = 0; yych = *(YYMARKER = ++YYCURSOR); if(yych == ':') goto yy89; @@ -500,13 +532,13 @@ yy14: ++YYCURSOR; goto yy15; yy15: -#line 687 "ext/standard/var_unserializer.re" +#line 719 "ext/standard/var_unserializer.re" { /* this is the case where we have less data than planned */ php_error_docref(NULL TSRMLS_CC, E_NOTICE, "Unexpected end of serialized data"); return 0; /* not sure if it should be 0 or 1 here? */ } -#line 510 "ext/standard/var_unserializer.c" +#line 542 "ext/standard/var_unserializer.c" yy16: yych = *++YYCURSOR; goto yy3; yy17: yych = *++YYCURSOR; @@ -539,7 +571,7 @@ yy23: ++YYCURSOR; goto yy24; yy24: -#line 574 "ext/standard/var_unserializer.re" +#line 606 "ext/standard/var_unserializer.re" { size_t len, len2, len3, maxlen; long elements; @@ -652,7 +684,7 @@ return object_common2(UNSERIALIZE_PASSTHRU, elements); } -#line 656 "ext/standard/var_unserializer.c" +#line 688 "ext/standard/var_unserializer.c" yy25: yych = *++YYCURSOR; if(yych <= ','){ if(yych != '+') goto yy18; @@ -681,7 +713,7 @@ yy30: ++YYCURSOR; goto yy31; yy31: -#line 566 "ext/standard/var_unserializer.re" +#line 598 "ext/standard/var_unserializer.re" { INIT_PZVAL(*rval); @@ -689,7 +721,7 @@ return object_common2(UNSERIALIZE_PASSTHRU, object_common1(UNSERIALIZE_PASSTHRU, ZEND_STANDARD_CLASS_DEF_PTR)); } -#line 693 "ext/standard/var_unserializer.c" +#line 725 "ext/standard/var_unserializer.c" yy32: yych = *++YYCURSOR; if(yych == '+') goto yy33; if(yych <= '/') goto yy18; @@ -713,7 +745,7 @@ yy37: ++YYCURSOR; goto yy38; yy38: -#line 544 "ext/standard/var_unserializer.re" +#line 576 "ext/standard/var_unserializer.re" { long elements = parse_iv(start + 2); /* use iv() not uiv() in order to check data range */ @@ -735,7 +767,7 @@ return finish_nested_data(UNSERIALIZE_PASSTHRU); } -#line 739 "ext/standard/var_unserializer.c" +#line 771 "ext/standard/var_unserializer.c" yy39: yych = *++YYCURSOR; if(yych == '+') goto yy40; if(yych <= '/') goto yy18; @@ -759,7 +791,7 @@ yy44: ++YYCURSOR; goto yy45; yy45: -#line 514 "ext/standard/var_unserializer.re" +#line 547 "ext/standard/var_unserializer.re" { size_t len, maxlen; UChar *ustr; @@ -772,7 +804,6 @@ } if ((ustr = unserialize_ustr(&YYCURSOR, len)) == NULL) { - efree(ustr); return 0; } @@ -789,7 +820,7 @@ ZVAL_UNICODEL(*rval, ustr, len, 0); return 1; } -#line 793 "ext/standard/var_unserializer.c" +#line 824 "ext/standard/var_unserializer.c" yy46: yych = *++YYCURSOR; if(yych == '+') goto yy47; if(yych <= '/') goto yy18; @@ -813,7 +844,7 @@ yy51: ++YYCURSOR; goto yy52; yy52: -#line 486 "ext/standard/var_unserializer.re" +#line 518 "ext/standard/var_unserializer.re" { size_t len, maxlen; char *str; @@ -825,11 +856,12 @@ return 0; } - str = (char*)YYCURSOR; - - YYCURSOR += len; + if ((str = unserialize_str(&YYCURSOR, len)) == NULL) { + return 0; + } if (*(YYCURSOR) != '"') { + efree(str); *p = YYCURSOR; return 0; } @@ -838,10 +870,10 @@ *p = YYCURSOR; INIT_PZVAL(*rval); - ZVAL_RT_STRINGL(*rval, str, len, 1); + ZVAL_STRINGL(*rval, str, len, 0); return 1; } -#line 845 "ext/standard/var_unserializer.c" +#line 877 "ext/standard/var_unserializer.c" yy53: yych = *++YYCURSOR; if(yych <= '/'){ if(yych <= ','){ @@ -930,14 +962,14 @@ yy63: ++YYCURSOR; goto yy64; yy64: -#line 479 "ext/standard/var_unserializer.re" +#line 511 "ext/standard/var_unserializer.re" { *p = YYCURSOR; INIT_PZVAL(*rval); ZVAL_DOUBLE(*rval, zend_strtod((const char *)start + 2, NULL)); return 1; } -#line 941 "ext/standard/var_unserializer.c" +#line 973 "ext/standard/var_unserializer.c" yy65: yych = *++YYCURSOR; if(yych <= ','){ if(yych != '+') goto yy18; @@ -997,7 +1029,7 @@ yy74: ++YYCURSOR; goto yy75; yy75: -#line 464 "ext/standard/var_unserializer.re" +#line 496 "ext/standard/var_unserializer.re" { *p = YYCURSOR; INIT_PZVAL(*rval); @@ -1012,7 +1044,7 @@ return 1; } -#line 1016 "ext/standard/var_unserializer.c" +#line 1048 "ext/standard/var_unserializer.c" yy76: yych = *++YYCURSOR; if(yych == 'N') goto yy73; goto yy18; @@ -1041,14 +1073,14 @@ yy81: ++YYCURSOR; goto yy82; yy82: -#line 457 "ext/standard/var_unserializer.re" +#line 489 "ext/standard/var_unserializer.re" { *p = YYCURSOR; INIT_PZVAL(*rval); ZVAL_LONG(*rval, parse_iv(start + 2)); return 1; } -#line 1052 "ext/standard/var_unserializer.c" +#line 1084 "ext/standard/var_unserializer.c" yy83: yych = *++YYCURSOR; if(yych <= '/') goto yy18; if(yych >= '2') goto yy18; @@ -1059,25 +1091,25 @@ yy85: ++YYCURSOR; goto yy86; yy86: -#line 450 "ext/standard/var_unserializer.re" +#line 482 "ext/standard/var_unserializer.re" { *p = YYCURSOR; INIT_PZVAL(*rval); ZVAL_BOOL(*rval, parse_iv(start + 2)); return 1; } -#line 1070 "ext/standard/var_unserializer.c" +#line 1102 "ext/standard/var_unserializer.c" yy87: ++YYCURSOR; goto yy88; yy88: -#line 443 "ext/standard/var_unserializer.re" +#line 475 "ext/standard/var_unserializer.re" { *p = YYCURSOR; INIT_PZVAL(*rval); ZVAL_NULL(*rval); return 1; } -#line 1081 "ext/standard/var_unserializer.c" +#line 1113 "ext/standard/var_unserializer.c" yy89: yych = *++YYCURSOR; if(yych <= ','){ if(yych != '+') goto yy18; @@ -1103,7 +1135,7 @@ yy93: ++YYCURSOR; goto yy94; yy94: -#line 420 "ext/standard/var_unserializer.re" +#line 452 "ext/standard/var_unserializer.re" { long id; @@ -1126,7 +1158,7 @@ return 1; } -#line 1130 "ext/standard/var_unserializer.c" +#line 1162 "ext/standard/var_unserializer.c" yy95: yych = *++YYCURSOR; if(yych <= ','){ if(yych != '+') goto yy18; @@ -1152,7 +1184,7 @@ yy99: ++YYCURSOR; goto yy100; yy100: -#line 399 "ext/standard/var_unserializer.re" +#line 431 "ext/standard/var_unserializer.re" { long id; @@ -1173,10 +1205,10 @@ return 1; } -#line 1177 "ext/standard/var_unserializer.c" +#line 1209 "ext/standard/var_unserializer.c" } } -#line 695 "ext/standard/var_unserializer.re" +#line 727 "ext/standard/var_unserializer.re" return 0; http://cvs.php.net/viewvc.cgi/php-src/ext/standard/var_unserializer.re?r1=1.59&r2=1.60&diff_format=u Index: php-src/ext/standard/var_unserializer.re diff -u php-src/ext/standard/var_unserializer.re:1.59 php-src/ext/standard/var_unserializer.re:1.60 --- php-src/ext/standard/var_unserializer.re:1.59 Mon Mar 27 14:19:18 2006 +++ php-src/ext/standard/var_unserializer.re Fri Dec 1 19:25:11 2006 @@ -16,7 +16,7 @@ +----------------------------------------------------------------------+ */ -/* $Id: var_unserializer.re,v 1.59 2006/03/27 14:19:18 dmitry Exp $ */ +/* $Id: var_unserializer.re,v 1.60 2006/12/01 19:25:11 andrei Exp $ */ #include "php.h" #include "ext/standard/php_var.h" @@ -110,6 +110,38 @@ return ustr; } +static char *unserialize_str(const unsigned char **p, int len) +{ + int i, j; + char *str = emalloc(len+1); + + for (i = 0; i < len; i++) { + if (**p != '\\') { + str[i] = (char)**p; + } else { + unsigned char ch = 0; + + for (j = 0; j < 2; j++) { + (*p)++; + if (**p >= '0' && **p <= '9') { + ch = (ch << 4) + (**p -'0'); + } else if (**p >= 'a' && **p <= 'f') { + ch = (ch << 4) + (**p -'a'+10); + } else if (**p >= 'A' && **p <= 'F') { + ch = (ch << 4) + (**p -'A'+10); + } else { + efree(str); + return NULL; + } + } + str[i] = (char)ch; + } + (*p)++; + } + str[i] = 0; + return str; +} + PHPAPI void var_replace(php_unserialize_data_t *var_hashx, zval *ozval, zval **nzval) { long i; @@ -494,11 +526,12 @@ return 0; } - str = (char*)YYCURSOR; - - YYCURSOR += len; + if ((str = unserialize_str(&YYCURSOR, len)) == NULL) { + return 0; + } if (*(YYCURSOR) != '"') { + efree(str); *p = YYCURSOR; return 0; } @@ -507,7 +540,7 @@ *p = YYCURSOR; INIT_PZVAL(*rval); - ZVAL_RT_STRINGL(*rval, str, len, 1); + ZVAL_STRINGL(*rval, str, len, 0); return 1; } @@ -523,7 +556,6 @@ } if ((ustr = unserialize_ustr(&YYCURSOR, len)) == NULL) { - efree(ustr); return 0; }
-- PHP CVS Mailing List (http://www.php.net/) To unsubscribe, visit: http://www.php.net/unsub.php