tony2001 Thu Jul 12 09:55:41 2007 UTC Modified files: /php-src/ext/json JSON_parser.c utf8_decode.c utf8_to_utf16.c Log: fix WS, CS and other S
http://cvs.php.net/viewvc.cgi/php-src/ext/json/JSON_parser.c?r1=1.15&r2=1.16&diff_format=u Index: php-src/ext/json/JSON_parser.c diff -u php-src/ext/json/JSON_parser.c:1.15 php-src/ext/json/JSON_parser.c:1.16 --- php-src/ext/json/JSON_parser.c:1.15 Wed Jun 13 17:57:10 2007 +++ php-src/ext/json/JSON_parser.c Thu Jul 12 09:55:41 2007 @@ -38,6 +38,8 @@ significant reductions in the size of the state transition table. */ +/* {{{ constants */ + /* error */ #define S_ERR -1 @@ -134,7 +136,9 @@ /* everything else */ #define S_ETC 30 +/* }}} */ +/* {{{ tables */ /* This table maps the 128 ASCII characters into the 32 character classes. The remaining Unicode characters should be mapped to S_ETC. @@ -161,7 +165,6 @@ S_ETC, S_ETC, S_ETC, S_LBE, S_ETC, S_RBE, S_ETC, S_ETC }; - /* The state transition table takes the current state and the current symbol, and returns either a new state or an action. A new state is a number between @@ -201,8 +204,9 @@ /*29*/ {29,29,-1,-1,-1,-1,-1,-1, 3,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1} }; -#define JSON_PARSER_MAX_DEPTH 128 +/* }}} */ +#define JSON_PARSER_MAX_DEPTH 128 /* A stack maintains the states of nested structures. @@ -215,7 +219,6 @@ int the_top; } json_parser; - /* These modes can be pushed on the PDA stack. */ @@ -227,154 +230,125 @@ /* Push a mode onto the stack. Return false if there is overflow. */ -static int -push(json_parser *json, zval *z, int mode) +static int push(json_parser *json, zval *z, int mode) /* {{{ */ { - json->the_top += 1; - if (json->the_top >= JSON_PARSER_MAX_DEPTH) { - return false; - } + json->the_top += 1; + if (json->the_top >= JSON_PARSER_MAX_DEPTH) { + return false; + } - json->the_stack[json->the_top] = mode; - return true; + json->the_stack[json->the_top] = mode; + return true; } - +/* }}} */ /* Pop the stack, assuring that the current mode matches the expectation. Return false if there is underflow or if the modes mismatch. */ -static int -pop(json_parser *json, zval *z, int mode) +static int pop(json_parser *json, zval *z, int mode) /* {{{ */ { - if (json->the_top < 0 || json->the_stack[json->the_top] != mode) { - return false; - } - json->the_stack[json->the_top] = 0; - json->the_top -= 1; + if (json->the_top < 0 || json->the_stack[json->the_top] != mode) { + return false; + } + json->the_stack[json->the_top] = 0; + json->the_top -= 1; - return true; + return true; } +/* }}} */ - -static int dehexchar(char c) +static int dehexchar(char c) /* {{{ */ { - if (c >= '0' && c <= '9') - { + if (c >= '0' && c <= '9') { return c - '0'; - } - else if (c >= 'A' && c <= 'F') - { + } else if (c >= 'A' && c <= 'F') { return c - ('A' - 10); - } - else if (c >= 'a' && c <= 'f') - { + } else if (c >= 'a' && c <= 'f') { return c - ('a' - 10); - } - else - { + } else { return -1; } } +/* }}} */ - -static void json_create_zval(zval **z, smart_str *buf, int type TSRMLS_DC) +static void json_create_zval(zval **z, smart_str *buf, int type TSRMLS_DC) /* {{{ */ { - ALLOC_INIT_ZVAL(*z); + ALLOC_INIT_ZVAL(*z); - if (type == IS_LONG) - { - double d = zend_strtod(buf->c, NULL); - if (d > LONG_MAX || d < -LONG_MAX) { - ZVAL_DOUBLE(*z, d); - } else { - ZVAL_LONG(*z, (long)d); + if (type == IS_LONG) { + double d = zend_strtod(buf->c, NULL); + if (d > LONG_MAX || d < -LONG_MAX) { + ZVAL_DOUBLE(*z, d); + } else { + ZVAL_LONG(*z, (long)d); + } + } else if (type == IS_DOUBLE) { + ZVAL_DOUBLE(*z, zend_strtod(buf->c, NULL)); + } else if (type == IS_STRING) { + ZVAL_UTF8_STRINGL(*z, buf->c, buf->len, ZSTR_DUPLICATE); + } else if (type == IS_BOOL) { + ZVAL_BOOL(*z, (*(buf->c) == 't')); + } else { /* type == IS_NULL) || type unknown */ + ZVAL_NULL(*z); } - } - else if (type == IS_DOUBLE) - { - ZVAL_DOUBLE(*z, zend_strtod(buf->c, NULL)); - } - else if (type == IS_STRING) - { - ZVAL_UTF8_STRINGL(*z, buf->c, buf->len, ZSTR_DUPLICATE); - } - else if (type == IS_BOOL) - { - ZVAL_BOOL(*z, (*(buf->c) == 't')); - } - else /* type == IS_NULL) || type unknown */ - { - ZVAL_NULL(*z); - } } +/* }}} */ - -static void utf16_to_utf8(smart_str *buf, unsigned short utf16) +static void utf16_to_utf8(smart_str *buf, unsigned short utf16) /* {{{ */ { - if (utf16 < 0x80) - { - smart_str_appendc(buf, (unsigned char) utf16); - } - else if (utf16 < 0x800) - { - smart_str_appendc(buf, 0xc0 | (utf16 >> 6)); - smart_str_appendc(buf, 0x80 | (utf16 & 0x3f)); - } - else if ((utf16 & 0xfc00) == 0xdc00 - && buf->len >= 3 - && ((unsigned char) buf->c[buf->len - 3]) == 0xed - && ((unsigned char) buf->c[buf->len - 2] & 0xf0) == 0xa0 - && ((unsigned char) buf->c[buf->len - 1] & 0xc0) == 0x80) - { - /* found surrogate pair */ - unsigned long utf32; - - utf32 = (((buf->c[buf->len - 2] & 0xf) << 16) - | ((buf->c[buf->len - 1] & 0x3f) << 10) - | (utf16 & 0x3ff)) + 0x10000; - buf->len -= 3; - - smart_str_appendc(buf, 0xf0 | (utf32 >> 18)); - smart_str_appendc(buf, 0x80 | ((utf32 >> 12) & 0x3f)); - smart_str_appendc(buf, 0x80 | ((utf32 >> 6) & 0x3f)); - smart_str_appendc(buf, 0x80 | (utf32 & 0x3f)); - } - else - { - smart_str_appendc(buf, 0xe0 | (utf16 >> 12)); - smart_str_appendc(buf, 0x80 | ((utf16 >> 6) & 0x3f)); - smart_str_appendc(buf, 0x80 | (utf16 & 0x3f)); - } + if (utf16 < 0x80) { + smart_str_appendc(buf, (unsigned char) utf16); + } else if (utf16 < 0x800) { + smart_str_appendc(buf, 0xc0 | (utf16 >> 6)); + smart_str_appendc(buf, 0x80 | (utf16 & 0x3f)); + } else if ((utf16 & 0xfc00) == 0xdc00 + && buf->len >= 3 + && ((unsigned char) buf->c[buf->len - 3]) == 0xed + && ((unsigned char) buf->c[buf->len - 2] & 0xf0) == 0xa0 + && ((unsigned char) buf->c[buf->len - 1] & 0xc0) == 0x80) + { + /* found surrogate pair */ + unsigned long utf32; + + utf32 = (((buf->c[buf->len - 2] & 0xf) << 16) + | ((buf->c[buf->len - 1] & 0x3f) << 10) + | (utf16 & 0x3ff)) + 0x10000; + buf->len -= 3; + + smart_str_appendc(buf, 0xf0 | (utf32 >> 18)); + smart_str_appendc(buf, 0x80 | ((utf32 >> 12) & 0x3f)); + smart_str_appendc(buf, 0x80 | ((utf32 >> 6) & 0x3f)); + smart_str_appendc(buf, 0x80 | (utf32 & 0x3f)); + } else { + smart_str_appendc(buf, 0xe0 | (utf16 >> 12)); + smart_str_appendc(buf, 0x80 | ((utf16 >> 6) & 0x3f)); + smart_str_appendc(buf, 0x80 | (utf16 & 0x3f)); + } } +/* }}} */ -static void attach_zval(json_parser *json, int up, int cur, smart_str *key, int assoc TSRMLS_DC) +static void attach_zval(json_parser *json, int up, int cur, smart_str *key, int assoc TSRMLS_DC) /* {{{ */ { - zval *root = json->the_zstack[up]; - zval *child = json->the_zstack[cur]; - int up_mode = json->the_stack[up]; - - if (up_mode == MODE_ARRAY) - { - add_next_index_zval(root, child); - } - else if (up_mode == MODE_OBJECT) - { - if (!assoc) - { - add_utf8_property_zval_ex(root, (key->len ? key->c : "_empty_"), (key->len ? (key->len + 1) : sizeof("_empty_")), child TSRMLS_CC); + zval *root = json->the_zstack[up]; + zval *child = json->the_zstack[cur]; + int up_mode = json->the_stack[up]; + + if (up_mode == MODE_ARRAY) { + add_next_index_zval(root, child); + } else if (up_mode == MODE_OBJECT) { + if (!assoc) { + add_utf8_property_zval_ex(root, (key->len ? key->c : "_empty_"), (key->len ? (key->len + 1) : sizeof("_empty_")), child TSRMLS_CC); #if PHP_MAJOR_VERSION >= 5 - ZVAL_DELREF(child); + ZVAL_DELREF(child); #endif - } - else - { - add_utf8_assoc_zval_ex(root, (key->len ? key->c : ""), (key->len ? (key->len + 1) : sizeof("")), child); - } - key->len = 0; - } + } else { + add_utf8_assoc_zval_ex(root, (key->len ? key->c : ""), (key->len ? (key->len + 1) : sizeof("")), child); + } + key->len = 0; + } } - +/* }}} */ #define FREE_BUFFERS() do { smart_str_free(&buf); smart_str_free(&key); } while (0); #define SWAP_BUFFERS(from, to) do { \ @@ -390,7 +364,6 @@ #define JSON_RESET_TYPE() do { type = -1; } while(0); #define JSON(x) the_json.x - /* The JSON_parser takes a UTF-16 encoded string and determines if it is a syntactically correct JSON text. Along the way, it creates a PHP variable. @@ -398,386 +371,336 @@ It is implemented as a Pushdown Automaton; that means it is a finite state machine with a stack. */ -int -JSON_parser(zval *z, unsigned short p[], int length, int assoc TSRMLS_DC) +int JSON_parser(zval *z, unsigned short p[], int length, int assoc TSRMLS_DC) /* {{{ */ { - int b; /* the next character */ - int c; /* the next character class */ - int s; /* the next state */ - json_parser the_json; /* the parser state */ - int the_state = 0; - int the_index; - - smart_str buf = {0}; - smart_str key = {0}; - - int type = -1; - unsigned short utf16 = 0; - - JSON(the_top) = -1; - push(&the_json, z, MODE_DONE); - - for (the_index = 0; the_index < length; the_index += 1) { - b = p[the_index]; - if ((b & 127) == b) { - c = ascii_class[b]; - if (c <= S_ERR) { - FREE_BUFFERS(); - return false; - } - } else { - c = S_ETC; - } -/* - Get the next state from the transition table. -*/ - s = state_transition_table[the_state][c]; - if (s < 0) { -/* - Perform one of the predefined actions. -*/ - switch (s) { -/* - empty } -*/ - case -9: - if (!pop(&the_json, z, MODE_KEY)) { - FREE_BUFFERS(); - return false; - } - the_state = 9; - break; -/* - { -*/ - case -8: - if (!push(&the_json, z, MODE_KEY)) { - FREE_BUFFERS(); - return false; - } - - the_state = 1; - if (JSON(the_top) > 0) - { - zval *obj; - - if (JSON(the_top) == 1) - { - obj = z; - } - else - { - ALLOC_INIT_ZVAL(obj); - } - - if (!assoc) - { - object_init(obj); - } - else - { - array_init(obj); - } - - JSON(the_zstack)[JSON(the_top)] = obj; - - if (JSON(the_top) > 1) - { - attach_zval(&the_json, JSON(the_top-1), JSON(the_top), &key, assoc TSRMLS_CC); - } + int b; /* the next character */ + int c; /* the next character class */ + int s; /* the next state */ + json_parser the_json; /* the parser state */ + int the_state = 0; + int the_index; + + smart_str buf = {0}; + smart_str key = {0}; + + int type = -1; + unsigned short utf16 = 0; + + JSON(the_top) = -1; + push(&the_json, z, MODE_DONE); + + for (the_index = 0; the_index < length; the_index += 1) { + b = p[the_index]; + if ((b & 127) == b) { + c = ascii_class[b]; + if (c <= S_ERR) { + FREE_BUFFERS(); + return false; + } + } else { + c = S_ETC; + } + /* + Get the next state from the transition table. + */ + s = state_transition_table[the_state][c]; + if (s < 0) { + /* + Perform one of the predefined actions. + */ + switch (s) { + /* empty "}" {{{ */ + case -9: + if (!pop(&the_json, z, MODE_KEY)) { + FREE_BUFFERS(); + return false; + } + the_state = 9; + break; + /* }}} */ + + /* "{" {{{ */ + case -8: + if (!push(&the_json, z, MODE_KEY)) { + FREE_BUFFERS(); + return false; + } - JSON_RESET_TYPE(); - } + the_state = 1; + if (JSON(the_top) > 0) { + zval *obj; + + if (JSON(the_top) == 1) { + obj = z; + } else { + ALLOC_INIT_ZVAL(obj); + } + + if (!assoc) { + object_init(obj); + } else { + array_init(obj); + } + + JSON(the_zstack)[JSON(the_top)] = obj; + + if (JSON(the_top) > 1) { + attach_zval(&the_json, JSON(the_top-1), JSON(the_top), &key, assoc TSRMLS_CC); + } - break; -/* - } -*/ - case -7: - if (type != -1 && - (JSON(the_stack)[JSON(the_top)] == MODE_OBJECT || - JSON(the_stack)[JSON(the_top)] == MODE_ARRAY)) - { - zval *mval; - smart_str_0(&buf); - - json_create_zval(&mval, &buf, type TSRMLS_CC); - - if (!assoc) - { - add_utf8_property_zval_ex(JSON(the_zstack)[JSON(the_top)], (key.len ? key.c : "_empty_"), (key.len ? (key.len + 1) : sizeof("_empty_")), mval TSRMLS_CC); + JSON_RESET_TYPE(); + } + break; + /* }}} */ + + /* "}" {{{ */ + case -7: + if (type != -1 && + (JSON(the_stack)[JSON(the_top)] == MODE_OBJECT || + JSON(the_stack)[JSON(the_top)] == MODE_ARRAY)) + { + zval *mval; + smart_str_0(&buf); + + json_create_zval(&mval, &buf, type TSRMLS_CC); + + if (!assoc) { + add_utf8_property_zval_ex(JSON(the_zstack)[JSON(the_top)], (key.len ? key.c : "_empty_"), (key.len ? (key.len + 1) : sizeof("_empty_")), mval TSRMLS_CC); #if PHP_MAJOR_VERSION >= 5 - ZVAL_DELREF(mval); + ZVAL_DELREF(mval); #endif - } - else - { - add_utf8_assoc_zval_ex(JSON(the_zstack)[JSON(the_top)], (key.len ? key.c : ""), (key.len ? (key.len + 1) : sizeof("")), mval); - } - key.len = 0; - buf.len = 0; - JSON_RESET_TYPE(); - } - - - if (!pop(&the_json, z, MODE_OBJECT)) { - FREE_BUFFERS(); - return false; - } - the_state = 9; - break; -/* - [ -*/ - case -6: - if (!push(&the_json, z, MODE_ARRAY)) { - FREE_BUFFERS(); - return false; - } - the_state = 2; - - if (JSON(the_top) > 0) - { - zval *arr; - - if (JSON(the_top) == 1) - { - arr = z; - } - else - { - ALLOC_INIT_ZVAL(arr); - } - - array_init(arr); - JSON(the_zstack)[JSON(the_top)] = arr; - - if (JSON(the_top) > 1) - { - attach_zval(&the_json, JSON(the_top-1), JSON(the_top), &key, assoc TSRMLS_CC); - } + } else { + add_utf8_assoc_zval_ex(JSON(the_zstack)[JSON(the_top)], (key.len ? key.c : ""), (key.len ? (key.len + 1) : sizeof("")), mval); + } + key.len = 0; + buf.len = 0; + JSON_RESET_TYPE(); + } - JSON_RESET_TYPE(); - } - break; -/* - ] -*/ - case -5: - { - if (type != -1 && - (JSON(the_stack)[JSON(the_top)] == MODE_OBJECT || - JSON(the_stack)[JSON(the_top)] == MODE_ARRAY)) - { - zval *mval; - smart_str_0(&buf); - - json_create_zval(&mval, &buf, type TSRMLS_CC); - add_next_index_zval(JSON(the_zstack)[JSON(the_top)], mval); - buf.len = 0; - JSON_RESET_TYPE(); - } - - if (!pop(&the_json, z, MODE_ARRAY)) { - FREE_BUFFERS(); - return false; - } - the_state = 9; - } - break; -/* - " -*/ - case -4: - switch (JSON(the_stack)[JSON(the_top)]) { - case MODE_KEY: - the_state = 27; - smart_str_0(&buf); - SWAP_BUFFERS(buf, key); - JSON_RESET_TYPE(); - break; - case MODE_ARRAY: - case MODE_OBJECT: - the_state = 9; - break; - case MODE_DONE: - if (type == IS_STRING) { - smart_str_0(&buf); - ZVAL_UTF8_STRINGL(z, buf.c, buf.len, ZSTR_DUPLICATE); + if (!pop(&the_json, z, MODE_OBJECT)) { + FREE_BUFFERS(); + return false; + } + the_state = 9; + break; + /* }}} */ + + /* "[" {{{ */ + case -6: + if (!push(&the_json, z, MODE_ARRAY)) { + FREE_BUFFERS(); + return false; + } + the_state = 2; + + if (JSON(the_top) > 0) { + zval *arr; + + if (JSON(the_top) == 1) { + arr = z; + } else { + ALLOC_INIT_ZVAL(arr); + } + + array_init(arr); + JSON(the_zstack)[JSON(the_top)] = arr; + + if (JSON(the_top) > 1) { + attach_zval(&the_json, JSON(the_top-1), JSON(the_top), &key, assoc TSRMLS_CC); + } + + JSON_RESET_TYPE(); + } + + break; + /* }}} */ + + /* "]" {{{ */ + case -5: + { + if (type != -1 && + (JSON(the_stack)[JSON(the_top)] == MODE_OBJECT || + JSON(the_stack)[JSON(the_top)] == MODE_ARRAY)) + { + zval *mval; + smart_str_0(&buf); + + json_create_zval(&mval, &buf, type TSRMLS_CC); + add_next_index_zval(JSON(the_zstack)[JSON(the_top)], mval); + buf.len = 0; + JSON_RESET_TYPE(); + } + + if (!pop(&the_json, z, MODE_ARRAY)) { + FREE_BUFFERS(); + return false; + } the_state = 9; - break; } - /* fall through if not IS_STRING */ - default: - FREE_BUFFERS(); - return false; - } - break; -/* - , -*/ - case -3: - { - zval *mval; - - if (type != -1 && - (JSON(the_stack)[JSON(the_top)] == MODE_OBJECT || - JSON(the_stack[JSON(the_top)]) == MODE_ARRAY)) - { - smart_str_0(&buf); - json_create_zval(&mval, &buf, type TSRMLS_CC); - } - - switch (JSON(the_stack)[JSON(the_top)]) { - case MODE_OBJECT: - if (pop(&the_json, z, MODE_OBJECT) && push(&the_json, z, MODE_KEY)) { - if (type != -1) - { - if (!assoc) - { - add_utf8_property_zval_ex(JSON(the_zstack)[JSON(the_top)], (key.len ? key.c : "_empty_"), (key.len ? (key.len + 1) : sizeof("_empty_")), mval TSRMLS_CC); + break; + /* }}} */ + + /* "\"" {{{ */ + case -4: + switch (JSON(the_stack)[JSON(the_top)]) { + case MODE_KEY: + the_state = 27; + smart_str_0(&buf); + SWAP_BUFFERS(buf, key); + JSON_RESET_TYPE(); + break; + case MODE_ARRAY: + case MODE_OBJECT: + the_state = 9; + break; + case MODE_DONE: + if (type == IS_STRING) { + smart_str_0(&buf); + ZVAL_UTF8_STRINGL(z, buf.c, buf.len, ZSTR_DUPLICATE); + the_state = 9; + break; + } + /* fall through if not IS_STRING */ + default: + FREE_BUFFERS(); + return false; + } + break; + /* }}} */ + + /* "'" {{{ */ + case -3: + { + zval *mval; + + if (type != -1 && + (JSON(the_stack)[JSON(the_top)] == MODE_OBJECT || + JSON(the_stack[JSON(the_top)]) == MODE_ARRAY)) + { + smart_str_0(&buf); + json_create_zval(&mval, &buf, type TSRMLS_CC); + } + + switch (JSON(the_stack)[JSON(the_top)]) { + case MODE_OBJECT: + if (pop(&the_json, z, MODE_OBJECT) && push(&the_json, z, MODE_KEY)) { + if (type != -1) { + if (!assoc) { + add_utf8_property_zval_ex(JSON(the_zstack)[JSON(the_top)], (key.len ? key.c : "_empty_"), (key.len ? (key.len + 1) : sizeof("_empty_")), mval TSRMLS_CC); #if PHP_MAJOR_VERSION >= 5 - ZVAL_DELREF(mval); + ZVAL_DELREF(mval); #endif - } - else - { - add_utf8_assoc_zval_ex(JSON(the_zstack)[JSON(the_top)], (key.len ? key.c : ""), (key.len ? (key.len + 1) : sizeof("")), mval); - } - key.len = 0; - } - the_state = 29; - } - break; - case MODE_ARRAY: - if (type != -1) - { - add_next_index_zval(JSON(the_zstack)[JSON(the_top)], mval); - } - the_state = 28; - break; - default: - FREE_BUFFERS(); - return false; - } - buf.len = 0; - JSON_RESET_TYPE(); - } - break; -/* - : -*/ - case -2: - if (pop(&the_json, z, MODE_KEY) && push(&the_json, z, MODE_OBJECT)) { - the_state = 28; - break; - } -/* - syntax error -*/ - case -1: - { - FREE_BUFFERS(); - return false; - } - } - } else { -/* - Change the state and iterate. -*/ - if (type == IS_STRING) - { - if (s == 3 && the_state != 8) - { - if (the_state != 4) - { - utf16_to_utf8(&buf, b); - } - else - { - switch (b) - { - case 'b': - smart_str_appendc(&buf, '\b'); - break; - case 't': - smart_str_appendc(&buf, '\t'); - break; - case 'n': - smart_str_appendc(&buf, '\n'); - break; - case 'f': - smart_str_appendc(&buf, '\f'); - break; - case 'r': - smart_str_appendc(&buf, '\r'); - break; - default: - utf16_to_utf8(&buf, b); - break; - } - } - } - else if (s == 6) - { - utf16 = dehexchar(b) << 12; - } - else if (s == 7) - { - utf16 += dehexchar(b) << 8; - } - else if (s == 8) - { - utf16 += dehexchar(b) << 4; - } - else if (s == 3 && the_state == 8) - { - utf16 += dehexchar(b); - utf16_to_utf8(&buf, utf16); - } - } - else if (type < IS_LONG && (c == S_DIG || c == S_ZER)) - { - type = IS_LONG; - smart_str_appendc(&buf, b); - } - else if (type == IS_LONG && s == 24) - { - type = IS_DOUBLE; - smart_str_appendc(&buf, b); - } - else if (type < IS_DOUBLE && c == S_DOT) - { - type = IS_DOUBLE; - smart_str_appendc(&buf, b); - } - else if (type < IS_STRING && c == S_QUO) - { - type = IS_STRING; - } - else if (type < IS_BOOL && ((the_state == 12 && s == 9) || (the_state == 16 && s == 9))) - { - type = IS_BOOL; - } - else if (type < IS_NULL && the_state == 19 && s == 9) - { - type = IS_NULL; - } - else if (type != IS_STRING && c > S_WSP) - { - utf16_to_utf8(&buf, b); - } + } else { + add_utf8_assoc_zval_ex(JSON(the_zstack)[JSON(the_top)], (key.len ? key.c : ""), (key.len ? (key.len + 1) : sizeof("")), mval); + } + key.len = 0; + } + the_state = 29; + } + break; + case MODE_ARRAY: + if (type != -1) { + add_next_index_zval(JSON(the_zstack)[JSON(the_top)], mval); + } + the_state = 28; + break; + default: + FREE_BUFFERS(); + return false; + } + buf.len = 0; + JSON_RESET_TYPE(); + } + break; + /* }}} */ - the_state = s; - } - } + /* ":" {{{ */ + /* + : + */ + case -2: + if (pop(&the_json, z, MODE_KEY) && push(&the_json, z, MODE_OBJECT)) { + the_state = 28; + break; + } + /* }}} */ - FREE_BUFFERS(); + /* syntax error {{{ */ + case -1: + { + FREE_BUFFERS(); + return false; + } + /* }}} */ + } + } else { + /* + Change the state and iterate. + */ + if (type == IS_STRING) { + if (s == 3 && the_state != 8) { + if (the_state != 4) { + utf16_to_utf8(&buf, b); + } else { + switch (b) { + case 'b': + smart_str_appendc(&buf, '\b'); + break; + case 't': + smart_str_appendc(&buf, '\t'); + break; + case 'n': + smart_str_appendc(&buf, '\n'); + break; + case 'f': + smart_str_appendc(&buf, '\f'); + break; + case 'r': + smart_str_appendc(&buf, '\r'); + break; + default: + utf16_to_utf8(&buf, b); + break; + } + } + } else if (s == 6) { + utf16 = dehexchar(b) << 12; + } else if (s == 7) { + utf16 += dehexchar(b) << 8; + } else if (s == 8) { + utf16 += dehexchar(b) << 4; + } else if (s == 3 && the_state == 8) { + utf16 += dehexchar(b); + utf16_to_utf8(&buf, utf16); + } + } else if (type < IS_LONG && (c == S_DIG || c == S_ZER)) { + type = IS_LONG; + smart_str_appendc(&buf, b); + } else if (type == IS_LONG && s == 24) { + type = IS_DOUBLE; + smart_str_appendc(&buf, b); + } else if (type < IS_DOUBLE && c == S_DOT) { + type = IS_DOUBLE; + smart_str_appendc(&buf, b); + } else if (type < IS_STRING && c == S_QUO) { + type = IS_STRING; + } else if (type < IS_BOOL && ((the_state == 12 && s == 9) || (the_state == 16 && s == 9))) { + type = IS_BOOL; + } else if (type < IS_NULL && the_state == 19 && s == 9) { + type = IS_NULL; + } else if (type != IS_STRING && c > S_WSP) { + utf16_to_utf8(&buf, b); + } - return the_state == 9 && pop(&the_json, z, MODE_DONE); -} + the_state = s; + } + } + FREE_BUFFERS(); + return the_state == 9 && pop(&the_json, z, MODE_DONE); +} +/* }}} */ /* * Local variables: http://cvs.php.net/viewvc.cgi/php-src/ext/json/utf8_decode.c?r1=1.1&r2=1.2&diff_format=u Index: php-src/ext/json/utf8_decode.c diff -u php-src/ext/json/utf8_decode.c:1.1 php-src/ext/json/utf8_decode.c:1.2 --- php-src/ext/json/utf8_decode.c:1.1 Tue Jan 31 08:59:06 2006 +++ php-src/ext/json/utf8_decode.c Thu Jul 12 09:55:41 2007 @@ -55,65 +55,60 @@ /* Get the next byte. It returns UTF8_END if there are no more bytes. */ -static int -get(json_utf8_decode *utf8) +static int get(json_utf8_decode *utf8) /* {{{ */ { - int c; - if (utf8->the_index >= utf8->the_length) { - return UTF8_END; - } - c = utf8->the_input[utf8->the_index] & 0xFF; - utf8->the_index += 1; - return c; + int c; + if (utf8->the_index >= utf8->the_length) { + return UTF8_END; + } + c = utf8->the_input[utf8->the_index] & 0xFF; + utf8->the_index += 1; + return c; } - +/* }}} */ /* Get the 6-bit payload of the next continuation byte. Return UTF8_ERROR if it is not a contination byte. */ -static int -cont(json_utf8_decode *utf8) +static int cont(json_utf8_decode *utf8) /* {{{ */ { - int c = get(utf8); - return ((c & 0xC0) == 0x80) ? (c & 0x3F) : UTF8_ERROR; + int c = get(utf8); + return ((c & 0xC0) == 0x80) ? (c & 0x3F) : UTF8_ERROR; } - +/* }}} */ /* Initialize the UTF-8 decoder. The decoder is not reentrant, */ -void -utf8_decode_init(json_utf8_decode *utf8, char p[], int length) +void utf8_decode_init(json_utf8_decode *utf8, char p[], int length) /* {{{ */ { - utf8->the_index = 0; - utf8->the_input = p; - utf8->the_length = length; - utf8->the_char = 0; - utf8->the_byte = 0; + utf8->the_index = 0; + utf8->the_input = p; + utf8->the_length = length; + utf8->the_char = 0; + utf8->the_byte = 0; } - +/* }}} */ /* Get the current byte offset. This is generally used in error reporting. */ -int -utf8_decode_at_byte(json_utf8_decode *utf8) +int utf8_decode_at_byte(json_utf8_decode *utf8) /* {{{ */ { - return utf8->the_byte; + return utf8->the_byte; } - +/* }}} */ /* Get the current character offset. This is generally used in error reporting. The character offset matches the byte offset if the text is strictly ASCII. */ -int -utf8_decode_at_character(json_utf8_decode *utf8) +int utf8_decode_at_character(json_utf8_decode *utf8) /* {{{ */ { - return utf8->the_char > 0 ? utf8->the_char - 1 : 0; + return utf8->the_char > 0 ? utf8->the_char - 1 : 0; } - +/* }}} */ /* Extract the next character. @@ -121,59 +116,68 @@ or UTF8_END (the end) or UTF8_ERROR (error) */ -int -utf8_decode_next(json_utf8_decode *utf8) +int utf8_decode_next(json_utf8_decode *utf8) /* {{{ */ { - int c; /* the first byte of the character */ - int r; /* the result */ + int c; /* the first byte of the character */ + int r; /* the result */ - if (utf8->the_index >= utf8->the_length) { - return utf8->the_index == utf8->the_length ? UTF8_END : UTF8_ERROR; - } - utf8->the_byte = utf8->the_index; - utf8->the_char += 1; - c = get(utf8); -/* - Zero continuation (0 to 127) -*/ - if ((c & 0x80) == 0) { - return c; - } -/* - One contination (128 to 2047) -*/ - if ((c & 0xE0) == 0xC0) { - int c1 = cont(utf8); - if (c1 < 0) { - return UTF8_ERROR; - } - r = ((c & 0x1F) << 6) | c1; - return r >= 128 ? r : UTF8_ERROR; - } -/* - Two continuation (2048 to 55295 and 57344 to 65535) -*/ - if ((c & 0xF0) == 0xE0) { - int c1 = cont(utf8); - int c2 = cont(utf8); - if (c1 < 0 || c2 < 0) { - return UTF8_ERROR; - } - r = ((c & 0x0F) << 12) | (c1 << 6) | c2; - return r >= 2048 && (r < 55296 || r > 57343) ? r : UTF8_ERROR; - } -/* - Three continuation (65536 to 1114111) -*/ - if ((c & 0xF1) == 0xF0) { - int c1 = cont(utf8); - int c2 = cont(utf8); - int c3 = cont(utf8); - if (c1 < 0 || c2 < 0 || c3 < 0) { - return UTF8_ERROR; - } - r = ((c & 0x0F) << 18) | (c1 << 12) | (c2 << 6) | c3; - return r >= 65536 && r <= 1114111 ? r : UTF8_ERROR; - } - return UTF8_ERROR; -} + if (utf8->the_index >= utf8->the_length) { + return utf8->the_index == utf8->the_length ? UTF8_END : UTF8_ERROR; + } + utf8->the_byte = utf8->the_index; + utf8->the_char += 1; + c = get(utf8); + /* + Zero continuation (0 to 127) + */ + if ((c & 0x80) == 0) { + return c; + } + /* + One contination (128 to 2047) + */ + if ((c & 0xE0) == 0xC0) { + int c1 = cont(utf8); + if (c1 < 0) { + return UTF8_ERROR; + } + r = ((c & 0x1F) << 6) | c1; + return r >= 128 ? r : UTF8_ERROR; + } + /* + Two continuation (2048 to 55295 and 57344 to 65535) + */ + if ((c & 0xF0) == 0xE0) { + int c1 = cont(utf8); + int c2 = cont(utf8); + if (c1 < 0 || c2 < 0) { + return UTF8_ERROR; + } + r = ((c & 0x0F) << 12) | (c1 << 6) | c2; + return r >= 2048 && (r < 55296 || r > 57343) ? r : UTF8_ERROR; + } + /* + Three continuation (65536 to 1114111) + */ + if ((c & 0xF1) == 0xF0) { + int c1 = cont(utf8); + int c2 = cont(utf8); + int c3 = cont(utf8); + if (c1 < 0 || c2 < 0 || c3 < 0) { + return UTF8_ERROR; + } + r = ((c & 0x0F) << 18) | (c1 << 12) | (c2 << 6) | c3; + return r >= 65536 && r <= 1114111 ? r : UTF8_ERROR; + } + return UTF8_ERROR; +} +/* }}} */ + +/* + * Local variables: + * tab-width: 4 + * c-basic-offset: 4 + * End: + * vim600: noet sw=4 ts=4 + * vim<600: noet sw=4 ts=4 + */ http://cvs.php.net/viewvc.cgi/php-src/ext/json/utf8_to_utf16.c?r1=1.1&r2=1.2&diff_format=u Index: php-src/ext/json/utf8_to_utf16.c diff -u php-src/ext/json/utf8_to_utf16.c:1.1 php-src/ext/json/utf8_to_utf16.c:1.2 --- php-src/ext/json/utf8_to_utf16.c:1.1 Tue Jan 31 08:59:06 2006 +++ php-src/ext/json/utf8_to_utf16.c Thu Jul 12 09:55:41 2007 @@ -29,28 +29,37 @@ #include "utf8_to_utf16.h" #include "utf8_decode.h" -int -utf8_to_utf16(unsigned short w[], char p[], int length) +int utf8_to_utf16(unsigned short w[], char p[], int length) /* {{{ */ { - int c; - int the_index = 0; - json_utf8_decode utf8; - - utf8_decode_init(&utf8, p, length); - for (;;) { - c = utf8_decode_next(&utf8); - if (c < 0) { - return UTF8_END ? the_index : UTF8_ERROR; - } - if (c < 0x10000) { - w[the_index] = (unsigned short)c; - the_index += 1; - } else { - c &= 0xFFFF; - w[the_index] = (unsigned short)(0xD800 | (c >> 10)); - the_index += 1; - w[the_index] = (unsigned short)(0xDC00 | (c & 0x3FF)); - the_index += 1; - } - } + int c; + int the_index = 0; + json_utf8_decode utf8; + + utf8_decode_init(&utf8, p, length); + for (;;) { + c = utf8_decode_next(&utf8); + if (c < 0) { + return UTF8_END ? the_index : UTF8_ERROR; + } + if (c < 0x10000) { + w[the_index] = (unsigned short)c; + the_index += 1; + } else { + c &= 0xFFFF; + w[the_index] = (unsigned short)(0xD800 | (c >> 10)); + the_index += 1; + w[the_index] = (unsigned short)(0xDC00 | (c & 0x3FF)); + the_index += 1; + } + } } +/* }}} */ + +/* + * Local variables: + * tab-width: 4 + * c-basic-offset: 4 + * End: + * vim600: noet sw=4 ts=4 + * vim<600: noet sw=4 ts=4 + */
-- PHP CVS Mailing List (http://www.php.net/) To unsubscribe, visit: http://www.php.net/unsub.php