gwynne                                   Mon, 29 Aug 2011 14:56:19 +0000

Revision: http://svn.php.net/viewvc?view=revision&revision=315707

Log:
Add unescaped Unicode encoding to json_encode(). Closes bug #53946. Patch by 
Irker and Gwynne.

Bug: https://bugs.php.net/53946 (Assigned) add json_encode option for not 
escaping unnecessary character
      
Changed paths:
    U   php/php-src/branches/PHP_5_4/ext/json/json.c
    U   php/php-src/branches/PHP_5_4/ext/json/php_json.h
    U   php/php-src/branches/PHP_5_4/ext/json/utf8_to_utf16.c
    U   php/php-src/branches/PHP_5_4/ext/json/utf8_to_utf16.h
    U   php/php-src/trunk/ext/json/json.c
    U   php/php-src/trunk/ext/json/php_json.h
    A   php/php-src/trunk/ext/json/tests/bug53946.phpt
    U   php/php-src/trunk/ext/json/utf8_to_utf16.c
    U   php/php-src/trunk/ext/json/utf8_to_utf16.h

Modified: php/php-src/branches/PHP_5_4/ext/json/json.c
===================================================================
--- php/php-src/branches/PHP_5_4/ext/json/json.c	2011-08-29 14:32:46 UTC (rev 315706)
+++ php/php-src/branches/PHP_5_4/ext/json/json.c	2011-08-29 14:56:19 UTC (rev 315707)
@@ -95,6 +95,7 @@
 	REGISTER_LONG_CONSTANT("JSON_NUMERIC_CHECK", PHP_JSON_NUMERIC_CHECK, CONST_CS | CONST_PERSISTENT);
 	REGISTER_LONG_CONSTANT("JSON_UNESCAPED_SLASHES", PHP_JSON_UNESCAPED_SLASHES, CONST_CS | CONST_PERSISTENT);
 	REGISTER_LONG_CONSTANT("JSON_PRETTY_PRINT", PHP_JSON_PRETTY_PRINT, CONST_CS | CONST_PERSISTENT);
+	REGISTER_LONG_CONSTANT("JSON_UNESCAPED_UNICODE", PHP_JSON_UNESCAPED_UNICODE, CONST_CS | CONST_PERSISTENT);

 	REGISTER_LONG_CONSTANT("JSON_ERROR_NONE", PHP_JSON_ERROR_NONE, CONST_CS | CONST_PERSISTENT);
 	REGISTER_LONG_CONSTANT("JSON_ERROR_DEPTH", PHP_JSON_ERROR_DEPTH, CONST_CS | CONST_PERSISTENT);
@@ -346,7 +347,7 @@

 static void json_escape_string(smart_str *buf, char *s, int len, int options TSRMLS_DC) /* {{{ */
 {
-	int pos = 0;
+	int pos = 0, ulen = 0;
 	unsigned short us;
 	unsigned short *utf16;

@@ -378,15 +379,14 @@
 		}

 	}
-
-	utf16 = (unsigned short *) safe_emalloc(len, sizeof(unsigned short), 0);
-
-	len = utf8_to_utf16(utf16, s, len);
-	if (len <= 0) {
+
+	utf16 = (options & PHP_JSON_UNESCAPED_UNICODE) ? NULL : (unsigned short *) safe_emalloc(len, sizeof(unsigned short), 0);
+	ulen = utf8_to_utf16(utf16, s, len);
+	if (ulen <= 0) {
 		if (utf16) {
 			efree(utf16);
 		}
-		if (len < 0) {
+		if (ulen < 0) {
 			JSON_G(error_code) = PHP_JSON_ERROR_UTF8;
 			if (!PG(display_errors)) {
 				php_error_docref(NULL TSRMLS_CC, E_WARNING, "Invalid UTF-8 sequence in argument");
@@ -397,12 +397,15 @@
 		}
 		return;
 	}
+	if (!(options & PHP_JSON_UNESCAPED_UNICODE)) {
+		len = ulen;
+	}

 	smart_str_appendc(buf, '"');

 	while (pos < len)
 	{
-		us = utf16[pos++];
+		us = (options & PHP_JSON_UNESCAPED_UNICODE) ? s[pos++] : utf16[pos++];

 		switch (us)
 		{
@@ -479,7 +482,7 @@
 				break;

 			default:
-				if (us >= ' ' && (us & 127) == us) {
+				if (us >= ' ' && ((options & PHP_JSON_UNESCAPED_UNICODE) || (us & 127) == us)) {
 					smart_str_appendc(buf, (unsigned char) us);
 				} else {
 					smart_str_appendl(buf, "\\u", 2);
@@ -498,7 +501,9 @@
 	}

 	smart_str_appendc(buf, '"');
-	efree(utf16);
+	if (utf16) {
+		efree(utf16);
+	}
 }
 /* }}} */


Modified: php/php-src/branches/PHP_5_4/ext/json/php_json.h
===================================================================
--- php/php-src/branches/PHP_5_4/ext/json/php_json.h	2011-08-29 14:32:46 UTC (rev 315706)
+++ php/php-src/branches/PHP_5_4/ext/json/php_json.h	2011-08-29 14:56:19 UTC (rev 315707)
@@ -62,6 +62,7 @@
 #define PHP_JSON_NUMERIC_CHECK	(1<<5)
 #define PHP_JSON_UNESCAPED_SLASHES	(1<<6)
 #define PHP_JSON_PRETTY_PRINT	(1<<7)
+#define PHP_JSON_UNESCAPED_UNICODE	(1<<8)

 /* Internal flags */
 #define PHP_JSON_OUTPUT_ARRAY	0

Modified: php/php-src/branches/PHP_5_4/ext/json/utf8_to_utf16.c
===================================================================
--- php/php-src/branches/PHP_5_4/ext/json/utf8_to_utf16.c	2011-08-29 14:32:46 UTC (rev 315706)
+++ php/php-src/branches/PHP_5_4/ext/json/utf8_to_utf16.c	2011-08-29 14:56:19 UTC (rev 315707)
@@ -30,7 +30,7 @@
 #include "utf8_decode.h"

 int
-utf8_to_utf16(unsigned short w[], char p[], int length)
+utf8_to_utf16(unsigned short *w, char p[], int length)
 {
     int c;
     int the_index = 0;
@@ -43,14 +43,17 @@
             return (c == UTF8_END) ? the_index : UTF8_ERROR;
         }
         if (c < 0x10000) {
-            w[the_index] = (unsigned short)c;
+            if (w) {
+                w[the_index] = (unsigned short)c;
+            }
             the_index += 1;
         } else {
             c -= 0x10000;
-            w[the_index] = (unsigned short)(0xD800 | (c >> 10));
-            the_index += 1;
-            w[the_index] = (unsigned short)(0xDC00 | (c & 0x3FF));
-            the_index += 1;
+            if (w) {
+                w[the_index] = (unsigned short)(0xD800 | (c >> 10));
+                w[the_index + 1] = (unsigned short)(0xDC00 | (c & 0x3FF));
+            }
+            the_index += 2;
         }
     }
 }

Modified: php/php-src/branches/PHP_5_4/ext/json/utf8_to_utf16.h
===================================================================
--- php/php-src/branches/PHP_5_4/ext/json/utf8_to_utf16.h	2011-08-29 14:32:46 UTC (rev 315706)
+++ php/php-src/branches/PHP_5_4/ext/json/utf8_to_utf16.h	2011-08-29 14:56:19 UTC (rev 315707)
@@ -1,3 +1,3 @@
 /* utf8_to_utf16.h */

-extern int utf8_to_utf16(unsigned short w[], char p[], int length);
+extern int utf8_to_utf16(unsigned short *w, char p[], int length);

Modified: php/php-src/trunk/ext/json/json.c
===================================================================
--- php/php-src/trunk/ext/json/json.c	2011-08-29 14:32:46 UTC (rev 315706)
+++ php/php-src/trunk/ext/json/json.c	2011-08-29 14:56:19 UTC (rev 315707)
@@ -95,6 +95,7 @@
 	REGISTER_LONG_CONSTANT("JSON_NUMERIC_CHECK", PHP_JSON_NUMERIC_CHECK, CONST_CS | CONST_PERSISTENT);
 	REGISTER_LONG_CONSTANT("JSON_UNESCAPED_SLASHES", PHP_JSON_UNESCAPED_SLASHES, CONST_CS | CONST_PERSISTENT);
 	REGISTER_LONG_CONSTANT("JSON_PRETTY_PRINT", PHP_JSON_PRETTY_PRINT, CONST_CS | CONST_PERSISTENT);
+	REGISTER_LONG_CONSTANT("JSON_UNESCAPED_UNICODE", PHP_JSON_UNESCAPED_UNICODE, CONST_CS | CONST_PERSISTENT);

 	REGISTER_LONG_CONSTANT("JSON_ERROR_NONE", PHP_JSON_ERROR_NONE, CONST_CS | CONST_PERSISTENT);
 	REGISTER_LONG_CONSTANT("JSON_ERROR_DEPTH", PHP_JSON_ERROR_DEPTH, CONST_CS | CONST_PERSISTENT);
@@ -346,7 +347,7 @@

 static void json_escape_string(smart_str *buf, char *s, int len, int options TSRMLS_DC) /* {{{ */
 {
-	int pos = 0;
+	int pos = 0, ulen = 0;
 	unsigned short us;
 	unsigned short *utf16;

@@ -378,15 +379,14 @@
 		}

 	}
-
-	utf16 = (unsigned short *) safe_emalloc(len, sizeof(unsigned short), 0);
-
-	len = utf8_to_utf16(utf16, s, len);
-	if (len <= 0) {
+
+	utf16 = (options & PHP_JSON_UNESCAPED_UNICODE) ? NULL : (unsigned short *) safe_emalloc(len, sizeof(unsigned short), 0);
+	ulen = utf8_to_utf16(utf16, s, len);
+	if (ulen <= 0) {
 		if (utf16) {
 			efree(utf16);
 		}
-		if (len < 0) {
+		if (ulen < 0) {
 			JSON_G(error_code) = PHP_JSON_ERROR_UTF8;
 			if (!PG(display_errors)) {
 				php_error_docref(NULL TSRMLS_CC, E_WARNING, "Invalid UTF-8 sequence in argument");
@@ -397,12 +397,15 @@
 		}
 		return;
 	}
+	if (!(options & PHP_JSON_UNESCAPED_UNICODE)) {
+		len = ulen;
+	}

 	smart_str_appendc(buf, '"');

 	while (pos < len)
 	{
-		us = utf16[pos++];
+		us = (options & PHP_JSON_UNESCAPED_UNICODE) ? s[pos++] : utf16[pos++];

 		switch (us)
 		{
@@ -479,7 +482,7 @@
 				break;

 			default:
-				if (us >= ' ' && (us & 127) == us) {
+				if (us >= ' ' && ((options & PHP_JSON_UNESCAPED_UNICODE) || (us & 127) == us)) {
 					smart_str_appendc(buf, (unsigned char) us);
 				} else {
 					smart_str_appendl(buf, "\\u", 2);
@@ -498,7 +501,9 @@
 	}

 	smart_str_appendc(buf, '"');
-	efree(utf16);
+	if (utf16) {
+		efree(utf16);
+	}
 }
 /* }}} */


Modified: php/php-src/trunk/ext/json/php_json.h
===================================================================
--- php/php-src/trunk/ext/json/php_json.h	2011-08-29 14:32:46 UTC (rev 315706)
+++ php/php-src/trunk/ext/json/php_json.h	2011-08-29 14:56:19 UTC (rev 315707)
@@ -62,6 +62,7 @@
 #define PHP_JSON_NUMERIC_CHECK	(1<<5)
 #define PHP_JSON_UNESCAPED_SLASHES	(1<<6)
 #define PHP_JSON_PRETTY_PRINT	(1<<7)
+#define PHP_JSON_UNESCAPED_UNICODE	(1<<8)

 /* Internal flags */
 #define PHP_JSON_OUTPUT_ARRAY	0

Added: php/php-src/trunk/ext/json/tests/bug53946.phpt
===================================================================
--- php/php-src/trunk/ext/json/tests/bug53946.phpt	                        (rev 0)
+++ php/php-src/trunk/ext/json/tests/bug53946.phpt	2011-08-29 14:56:19 UTC (rev 315707)
@@ -0,0 +1,16 @@
+--TEST--
+bug #53946 (json_encode() with JSON_UNESCAPED_UNICODE)
+--SKIPIF--
+<?php if (!extension_loaded("json")) print "skip"; ?>
+--FILE--
+<?php
+var_dump(json_encode("latin 1234 -/    russian мама мыла раму  specialchars \x02   \x08 \n   U+1D11E >𝄞<"));
+var_dump(json_encode("latin 1234 -/    russian мама мыла раму  specialchars \x02   \x08 \n   U+1D11E >𝄞<", JSON_UNESCAPED_UNICODE));
+var_dump(json_encode("ab\xE0"));
+var_dump(json_encode("ab\xE0", JSON_UNESCAPED_UNICODE));
+?>
+--EXPECT--
+string(156) ""latin 1234 -\/    russian \u043c\u0430\u043c\u0430 \u043c\u044b\u043b\u0430 \u0440\u0430\u043c\u0443  specialchars \u0002   \b \n   U+1D11E >\ud834\udd1e<""
+string(100) ""latin 1234 -\/    russian мама мыла раму  specialchars \u0002   \b \n   U+1D11E >𝄞<""
+string(4) "null"
+string(4) "null"

Modified: php/php-src/trunk/ext/json/utf8_to_utf16.c
===================================================================
--- php/php-src/trunk/ext/json/utf8_to_utf16.c	2011-08-29 14:32:46 UTC (rev 315706)
+++ php/php-src/trunk/ext/json/utf8_to_utf16.c	2011-08-29 14:56:19 UTC (rev 315707)
@@ -30,7 +30,7 @@
 #include "utf8_decode.h"

 int
-utf8_to_utf16(unsigned short w[], char p[], int length)
+utf8_to_utf16(unsigned short *w, char p[], int length)
 {
     int c;
     int the_index = 0;
@@ -43,14 +43,17 @@
             return (c == UTF8_END) ? the_index : UTF8_ERROR;
         }
         if (c < 0x10000) {
-            w[the_index] = (unsigned short)c;
+            if (w) {
+                w[the_index] = (unsigned short)c;
+            }
             the_index += 1;
         } else {
             c -= 0x10000;
-            w[the_index] = (unsigned short)(0xD800 | (c >> 10));
-            the_index += 1;
-            w[the_index] = (unsigned short)(0xDC00 | (c & 0x3FF));
-            the_index += 1;
+            if (w) {
+                w[the_index] = (unsigned short)(0xD800 | (c >> 10));
+                w[the_index + 1] = (unsigned short)(0xDC00 | (c & 0x3FF));
+            }
+            the_index += 2;
         }
     }
 }

Modified: php/php-src/trunk/ext/json/utf8_to_utf16.h
===================================================================
--- php/php-src/trunk/ext/json/utf8_to_utf16.h	2011-08-29 14:32:46 UTC (rev 315706)
+++ php/php-src/trunk/ext/json/utf8_to_utf16.h	2011-08-29 14:56:19 UTC (rev 315707)
@@ -1,3 +1,3 @@
 /* utf8_to_utf16.h */

-extern int utf8_to_utf16(unsigned short w[], char p[], int length);
+extern int utf8_to_utf16(unsigned short *w, char p[], int length);
-- 
PHP CVS Mailing List (http://www.php.net/)
To unsubscribe, visit: http://www.php.net/unsub.php

Reply via email to