gwynne Mon, 29 Aug 2011 14:56:19 +0000
Revision: http://svn.php.net/viewvc?view=revision&revision=315707
Log:
Add unescaped Unicode encoding to json_encode(). Closes bug #53946. Patch by
Irker and Gwynne.
Bug: https://bugs.php.net/53946 (Assigned) add json_encode option for not
escaping unnecessary character
Changed paths:
U php/php-src/branches/PHP_5_4/ext/json/json.c
U php/php-src/branches/PHP_5_4/ext/json/php_json.h
U php/php-src/branches/PHP_5_4/ext/json/utf8_to_utf16.c
U php/php-src/branches/PHP_5_4/ext/json/utf8_to_utf16.h
U php/php-src/trunk/ext/json/json.c
U php/php-src/trunk/ext/json/php_json.h
A php/php-src/trunk/ext/json/tests/bug53946.phpt
U php/php-src/trunk/ext/json/utf8_to_utf16.c
U php/php-src/trunk/ext/json/utf8_to_utf16.h
Modified: php/php-src/branches/PHP_5_4/ext/json/json.c
===================================================================
--- php/php-src/branches/PHP_5_4/ext/json/json.c 2011-08-29 14:32:46 UTC (rev 315706)
+++ php/php-src/branches/PHP_5_4/ext/json/json.c 2011-08-29 14:56:19 UTC (rev 315707)
@@ -95,6 +95,7 @@
REGISTER_LONG_CONSTANT("JSON_NUMERIC_CHECK", PHP_JSON_NUMERIC_CHECK, CONST_CS | CONST_PERSISTENT);
REGISTER_LONG_CONSTANT("JSON_UNESCAPED_SLASHES", PHP_JSON_UNESCAPED_SLASHES, CONST_CS | CONST_PERSISTENT);
REGISTER_LONG_CONSTANT("JSON_PRETTY_PRINT", PHP_JSON_PRETTY_PRINT, CONST_CS | CONST_PERSISTENT);
+ REGISTER_LONG_CONSTANT("JSON_UNESCAPED_UNICODE", PHP_JSON_UNESCAPED_UNICODE, CONST_CS | CONST_PERSISTENT);
REGISTER_LONG_CONSTANT("JSON_ERROR_NONE", PHP_JSON_ERROR_NONE, CONST_CS | CONST_PERSISTENT);
REGISTER_LONG_CONSTANT("JSON_ERROR_DEPTH", PHP_JSON_ERROR_DEPTH, CONST_CS | CONST_PERSISTENT);
@@ -346,7 +347,7 @@
static void json_escape_string(smart_str *buf, char *s, int len, int options TSRMLS_DC) /* {{{ */
{
- int pos = 0;
+ int pos = 0, ulen = 0;
unsigned short us;
unsigned short *utf16;
@@ -378,15 +379,14 @@
}
}
-
- utf16 = (unsigned short *) safe_emalloc(len, sizeof(unsigned short), 0);
-
- len = utf8_to_utf16(utf16, s, len);
- if (len <= 0) {
+
+ utf16 = (options & PHP_JSON_UNESCAPED_UNICODE) ? NULL : (unsigned short *) safe_emalloc(len, sizeof(unsigned short), 0);
+ ulen = utf8_to_utf16(utf16, s, len);
+ if (ulen <= 0) {
if (utf16) {
efree(utf16);
}
- if (len < 0) {
+ if (ulen < 0) {
JSON_G(error_code) = PHP_JSON_ERROR_UTF8;
if (!PG(display_errors)) {
php_error_docref(NULL TSRMLS_CC, E_WARNING, "Invalid UTF-8 sequence in argument");
@@ -397,12 +397,15 @@
}
return;
}
+ if (!(options & PHP_JSON_UNESCAPED_UNICODE)) {
+ len = ulen;
+ }
smart_str_appendc(buf, '"');
while (pos < len)
{
- us = utf16[pos++];
+ us = (options & PHP_JSON_UNESCAPED_UNICODE) ? s[pos++] : utf16[pos++];
switch (us)
{
@@ -479,7 +482,7 @@
break;
default:
- if (us >= ' ' && (us & 127) == us) {
+ if (us >= ' ' && ((options & PHP_JSON_UNESCAPED_UNICODE) || (us & 127) == us)) {
smart_str_appendc(buf, (unsigned char) us);
} else {
smart_str_appendl(buf, "\\u", 2);
@@ -498,7 +501,9 @@
}
smart_str_appendc(buf, '"');
- efree(utf16);
+ if (utf16) {
+ efree(utf16);
+ }
}
/* }}} */
Modified: php/php-src/branches/PHP_5_4/ext/json/php_json.h
===================================================================
--- php/php-src/branches/PHP_5_4/ext/json/php_json.h 2011-08-29 14:32:46 UTC (rev 315706)
+++ php/php-src/branches/PHP_5_4/ext/json/php_json.h 2011-08-29 14:56:19 UTC (rev 315707)
@@ -62,6 +62,7 @@
#define PHP_JSON_NUMERIC_CHECK (1<<5)
#define PHP_JSON_UNESCAPED_SLASHES (1<<6)
#define PHP_JSON_PRETTY_PRINT (1<<7)
+#define PHP_JSON_UNESCAPED_UNICODE (1<<8)
/* Internal flags */
#define PHP_JSON_OUTPUT_ARRAY 0
Modified: php/php-src/branches/PHP_5_4/ext/json/utf8_to_utf16.c
===================================================================
--- php/php-src/branches/PHP_5_4/ext/json/utf8_to_utf16.c 2011-08-29 14:32:46 UTC (rev 315706)
+++ php/php-src/branches/PHP_5_4/ext/json/utf8_to_utf16.c 2011-08-29 14:56:19 UTC (rev 315707)
@@ -30,7 +30,7 @@
#include "utf8_decode.h"
int
-utf8_to_utf16(unsigned short w[], char p[], int length)
+utf8_to_utf16(unsigned short *w, char p[], int length)
{
int c;
int the_index = 0;
@@ -43,14 +43,17 @@
return (c == UTF8_END) ? the_index : UTF8_ERROR;
}
if (c < 0x10000) {
- w[the_index] = (unsigned short)c;
+ if (w) {
+ w[the_index] = (unsigned short)c;
+ }
the_index += 1;
} else {
c -= 0x10000;
- w[the_index] = (unsigned short)(0xD800 | (c >> 10));
- the_index += 1;
- w[the_index] = (unsigned short)(0xDC00 | (c & 0x3FF));
- the_index += 1;
+ if (w) {
+ w[the_index] = (unsigned short)(0xD800 | (c >> 10));
+ w[the_index + 1] = (unsigned short)(0xDC00 | (c & 0x3FF));
+ }
+ the_index += 2;
}
}
}
Modified: php/php-src/branches/PHP_5_4/ext/json/utf8_to_utf16.h
===================================================================
--- php/php-src/branches/PHP_5_4/ext/json/utf8_to_utf16.h 2011-08-29 14:32:46 UTC (rev 315706)
+++ php/php-src/branches/PHP_5_4/ext/json/utf8_to_utf16.h 2011-08-29 14:56:19 UTC (rev 315707)
@@ -1,3 +1,3 @@
/* utf8_to_utf16.h */
-extern int utf8_to_utf16(unsigned short w[], char p[], int length);
+extern int utf8_to_utf16(unsigned short *w, char p[], int length);
Modified: php/php-src/trunk/ext/json/json.c
===================================================================
--- php/php-src/trunk/ext/json/json.c 2011-08-29 14:32:46 UTC (rev 315706)
+++ php/php-src/trunk/ext/json/json.c 2011-08-29 14:56:19 UTC (rev 315707)
@@ -95,6 +95,7 @@
REGISTER_LONG_CONSTANT("JSON_NUMERIC_CHECK", PHP_JSON_NUMERIC_CHECK, CONST_CS | CONST_PERSISTENT);
REGISTER_LONG_CONSTANT("JSON_UNESCAPED_SLASHES", PHP_JSON_UNESCAPED_SLASHES, CONST_CS | CONST_PERSISTENT);
REGISTER_LONG_CONSTANT("JSON_PRETTY_PRINT", PHP_JSON_PRETTY_PRINT, CONST_CS | CONST_PERSISTENT);
+ REGISTER_LONG_CONSTANT("JSON_UNESCAPED_UNICODE", PHP_JSON_UNESCAPED_UNICODE, CONST_CS | CONST_PERSISTENT);
REGISTER_LONG_CONSTANT("JSON_ERROR_NONE", PHP_JSON_ERROR_NONE, CONST_CS | CONST_PERSISTENT);
REGISTER_LONG_CONSTANT("JSON_ERROR_DEPTH", PHP_JSON_ERROR_DEPTH, CONST_CS | CONST_PERSISTENT);
@@ -346,7 +347,7 @@
static void json_escape_string(smart_str *buf, char *s, int len, int options TSRMLS_DC) /* {{{ */
{
- int pos = 0;
+ int pos = 0, ulen = 0;
unsigned short us;
unsigned short *utf16;
@@ -378,15 +379,14 @@
}
}
-
- utf16 = (unsigned short *) safe_emalloc(len, sizeof(unsigned short), 0);
-
- len = utf8_to_utf16(utf16, s, len);
- if (len <= 0) {
+
+ utf16 = (options & PHP_JSON_UNESCAPED_UNICODE) ? NULL : (unsigned short *) safe_emalloc(len, sizeof(unsigned short), 0);
+ ulen = utf8_to_utf16(utf16, s, len);
+ if (ulen <= 0) {
if (utf16) {
efree(utf16);
}
- if (len < 0) {
+ if (ulen < 0) {
JSON_G(error_code) = PHP_JSON_ERROR_UTF8;
if (!PG(display_errors)) {
php_error_docref(NULL TSRMLS_CC, E_WARNING, "Invalid UTF-8 sequence in argument");
@@ -397,12 +397,15 @@
}
return;
}
+ if (!(options & PHP_JSON_UNESCAPED_UNICODE)) {
+ len = ulen;
+ }
smart_str_appendc(buf, '"');
while (pos < len)
{
- us = utf16[pos++];
+ us = (options & PHP_JSON_UNESCAPED_UNICODE) ? s[pos++] : utf16[pos++];
switch (us)
{
@@ -479,7 +482,7 @@
break;
default:
- if (us >= ' ' && (us & 127) == us) {
+ if (us >= ' ' && ((options & PHP_JSON_UNESCAPED_UNICODE) || (us & 127) == us)) {
smart_str_appendc(buf, (unsigned char) us);
} else {
smart_str_appendl(buf, "\\u", 2);
@@ -498,7 +501,9 @@
}
smart_str_appendc(buf, '"');
- efree(utf16);
+ if (utf16) {
+ efree(utf16);
+ }
}
/* }}} */
Modified: php/php-src/trunk/ext/json/php_json.h
===================================================================
--- php/php-src/trunk/ext/json/php_json.h 2011-08-29 14:32:46 UTC (rev 315706)
+++ php/php-src/trunk/ext/json/php_json.h 2011-08-29 14:56:19 UTC (rev 315707)
@@ -62,6 +62,7 @@
#define PHP_JSON_NUMERIC_CHECK (1<<5)
#define PHP_JSON_UNESCAPED_SLASHES (1<<6)
#define PHP_JSON_PRETTY_PRINT (1<<7)
+#define PHP_JSON_UNESCAPED_UNICODE (1<<8)
/* Internal flags */
#define PHP_JSON_OUTPUT_ARRAY 0
Added: php/php-src/trunk/ext/json/tests/bug53946.phpt
===================================================================
--- php/php-src/trunk/ext/json/tests/bug53946.phpt (rev 0)
+++ php/php-src/trunk/ext/json/tests/bug53946.phpt 2011-08-29 14:56:19 UTC (rev 315707)
@@ -0,0 +1,16 @@
+--TEST--
+bug #53946 (json_encode() with JSON_UNESCAPED_UNICODE)
+--SKIPIF--
+<?php if (!extension_loaded("json")) print "skip"; ?>
+--FILE--
+<?php
+var_dump(json_encode("latin 1234 -/ russian мама мыла раму specialchars \x02 \x08 \n U+1D11E >𝄞<"));
+var_dump(json_encode("latin 1234 -/ russian мама мыла раму specialchars \x02 \x08 \n U+1D11E >𝄞<", JSON_UNESCAPED_UNICODE));
+var_dump(json_encode("ab\xE0"));
+var_dump(json_encode("ab\xE0", JSON_UNESCAPED_UNICODE));
+?>
+--EXPECT--
+string(156) ""latin 1234 -\/ russian \u043c\u0430\u043c\u0430 \u043c\u044b\u043b\u0430 \u0440\u0430\u043c\u0443 specialchars \u0002 \b \n U+1D11E >\ud834\udd1e<""
+string(100) ""latin 1234 -\/ russian мама мыла раму specialchars \u0002 \b \n U+1D11E >𝄞<""
+string(4) "null"
+string(4) "null"
Modified: php/php-src/trunk/ext/json/utf8_to_utf16.c
===================================================================
--- php/php-src/trunk/ext/json/utf8_to_utf16.c 2011-08-29 14:32:46 UTC (rev 315706)
+++ php/php-src/trunk/ext/json/utf8_to_utf16.c 2011-08-29 14:56:19 UTC (rev 315707)
@@ -30,7 +30,7 @@
#include "utf8_decode.h"
int
-utf8_to_utf16(unsigned short w[], char p[], int length)
+utf8_to_utf16(unsigned short *w, char p[], int length)
{
int c;
int the_index = 0;
@@ -43,14 +43,17 @@
return (c == UTF8_END) ? the_index : UTF8_ERROR;
}
if (c < 0x10000) {
- w[the_index] = (unsigned short)c;
+ if (w) {
+ w[the_index] = (unsigned short)c;
+ }
the_index += 1;
} else {
c -= 0x10000;
- w[the_index] = (unsigned short)(0xD800 | (c >> 10));
- the_index += 1;
- w[the_index] = (unsigned short)(0xDC00 | (c & 0x3FF));
- the_index += 1;
+ if (w) {
+ w[the_index] = (unsigned short)(0xD800 | (c >> 10));
+ w[the_index + 1] = (unsigned short)(0xDC00 | (c & 0x3FF));
+ }
+ the_index += 2;
}
}
}
Modified: php/php-src/trunk/ext/json/utf8_to_utf16.h
===================================================================
--- php/php-src/trunk/ext/json/utf8_to_utf16.h 2011-08-29 14:32:46 UTC (rev 315706)
+++ php/php-src/trunk/ext/json/utf8_to_utf16.h 2011-08-29 14:56:19 UTC (rev 315707)
@@ -1,3 +1,3 @@
/* utf8_to_utf16.h */
-extern int utf8_to_utf16(unsigned short w[], char p[], int length);
+extern int utf8_to_utf16(unsigned short *w, char p[], int length);
--
PHP CVS Mailing List (http://www.php.net/)
To unsubscribe, visit: http://www.php.net/unsub.php