While it's technically "safe" to include user supplied data in json_encode() serialized values. The fact that characters such as <>&' remain as is means there room for some as-yet unidentified problem either in the browser's rendering or (more likely) elsewhere in one's codebase for this data to get into the wrong context and be executed.

To that end, the attached patch allows the caller to be paranoid about their data and stipulate that <>&' should be encoded to hex references instead. This doesn't stop a web developer from dropping that content into an innerHTML of course, but it's one more rope holding the ship together.

Obviously, since this adds five characters per pedantically escaped character, it's not something you'd want on by default, so the normal behavior would be to leave them alone.

echo json_encode("<foo>");
"<foo>"

echo json_encode("<foo>", JSON_HEX_TAG);
"\u003Cfoo\u003E"

echo json_encode("<foo bar='baz'>", JSON_HEX_TAG | JSON_HEX_APOS);
"\u003Cfoo bar=\u0027baz\u0027\u003E"

If noone objects, I'll commit this in a week along with an MFH for 5.3

-Sara



Index: json.c
===================================================================
RCS file: /repository/pecl/json/json.c,v
retrieving revision 1.31
diff -u -p -r1.31 json.c
--- json.c      1 Oct 2007 15:25:01 -0000       1.31
+++ json.c      29 Nov 2007 19:01:34 -0000
@@ -32,6 +32,10 @@

 static const char digits[] = "0123456789abcdef";

+#define PHP_JSON_HEX_TAG       (1<<0)
+#define PHP_JSON_HEX_AMP       (1<<1)
+#define PHP_JSON_HEX_APOS      (1<<2)
+
 /* {{{ json_functions[]
  *
  * Every user visible function must have an entry in json_functions[].
@@ -43,6 +47,18 @@ const function_entry json_functions[] =
 };
 /* }}} */

+/* {{{ MINIT */
+static PHP_MINIT_FUNCTION(json)
+{
+ REGISTER_LONG_CONSTANT("JSON_HEX_TAG", PHP_JSON_HEX_TAG, CONST_CS | CONST_PERSISTENT); + REGISTER_LONG_CONSTANT("JSON_HEX_AMP", PHP_JSON_HEX_AMP, CONST_CS | CONST_PERSISTENT); + REGISTER_LONG_CONSTANT("JSON_HEX_APOS", PHP_JSON_HEX_APOS, CONST_CS | CONST_PERSISTENT);
+
+       return SUCCESS;
+}
+/* }}} */
+
+
 /* {{{ json_module_entry
  */
 zend_module_entry json_module_entry = {
@@ -51,7 +67,7 @@ zend_module_entry json_module_entry = {
 #endif
        "json",
        json_functions,
-       NULL,
+       PHP_MINIT(json),
        NULL,
        NULL,
        NULL,
@@ -78,8 +94,8 @@ PHP_MINFO_FUNCTION(json)
 }
 /* }}} */

-static void json_encode_r(smart_str *buf, zval *val TSRMLS_DC);
-static void json_escape_string(smart_str *buf, zstr s, int len, zend_uchar type); +static void json_encode_r(smart_str *buf, zval *val, int options TSRMLS_DC); +static void json_escape_string(smart_str *buf, zstr s, int len, zend_uchar type, int options);

 static int json_determine_array_type(zval **val TSRMLS_DC) /* {{{ */
 {
@@ -115,7 +131,7 @@ static int json_determine_array_type(zva
 }
 /* }}} */

-static void json_encode_array(smart_str *buf, zval **val TSRMLS_DC) /* {{{ */ +static void json_encode_array(smart_str *buf, zval **val, int options TSRMLS_DC) /* {{{ */
 {
        int i, r;
        HashTable *myht;
@@ -172,7 +188,7 @@ static void json_encode_array(smart_str
                                                need_comma = 1;
                                        }

-                                       json_encode_r(buf, *data TSRMLS_CC);
+                                       json_encode_r(buf, *data, options 
TSRMLS_CC);
                                } else if (r == 1) {
                                        if (i == HASH_KEY_IS_STRING ||
                                                i == HASH_KEY_IS_UNICODE) {
@@ -187,10 +203,10 @@ static void json_encode_array(smart_str
                                                        need_comma = 1;
                                                }

- json_escape_string(buf, key, key_len - 1, (i==HASH_KEY_IS_UNICODE)?IS_UNICODE:IS_STRING); + json_escape_string(buf, key, key_len - 1, (i==HASH_KEY_IS_UNICODE)?IS_UNICODE:IS_STRING, options);
                                                smart_str_appendc(buf, ':');

-                                               json_encode_r(buf, *data 
TSRMLS_CC);
+                                               json_encode_r(buf, *data, 
options TSRMLS_CC);
                                        } else {
                                                if (need_comma) {
                                                        smart_str_appendc(buf, 
',');
@@ -203,7 +219,7 @@ static void json_encode_array(smart_str
                                                smart_str_appendc(buf, '"');
                                                smart_str_appendc(buf, ':');

-                                               json_encode_r(buf, *data 
TSRMLS_CC);
+                                               json_encode_r(buf, *data, 
options TSRMLS_CC);
                                        }
                                }

@@ -227,7 +243,7 @@ static void json_encode_array(smart_str

#define REVERSE16(us) (((us & 0xf) << 12) | (((us >> 4) & 0xf) << 8) | (((us >> 8) & 0xf) << 4) | ((us >> 12) & 0xf))

-static void json_escape_string(smart_str *buf, zstr s, int len, zend_uchar type) /* {{{ */ +static void json_escape_string(smart_str *buf, zstr s, int len, zend_uchar type, int options) /* {{{ */
 {
        int pos = 0;
        unsigned short us;
@@ -305,6 +321,42 @@ static void json_escape_string(smart_str
                                        smart_str_appendl(buf, "\\t", 2);
                                }
                                break;
+                       case '<':
+                               {
+                                       if (options & PHP_JSON_HEX_TAG) {
+                                               smart_str_appendl(buf, 
"\\u003C", 6);
+                                       } else {
+                                               smary_str_appendc(buf, '<');
+                                       }
+                               }
+                               break;
+                       case '>':
+                               {
+                                       if (options & PHP_JSON_HEX_TAG) {
+                                               smart_str_appendl(buf, 
"\\u003E", 6);
+                                       } else {
+                                               smary_str_appendc(buf, '>');
+                                       }
+                               }
+                               break;
+                       case '&':
+                               {
+                                       if (options & PHP_JSON_HEX_AMP) {
+                                               smart_str_appendl(buf, 
"\\u0026", 6);
+                                       } else {
+                                               smary_str_appendc(buf, '&');
+                                       }
+                               }
+                               break;
+                       case '\'':
+                               {
+                                       if (options & PHP_JSON_HEX_APOS) {
+                                               smart_str_appendl(buf, 
"\\u0027", 6);
+                                       } else {
+                                               smary_str_appendc(buf, '\'');
+                                       }
+                               }
+                               break;
                        default:
                                {
                                        if (us >= ' ' && (us & 127) == us)
@@ -337,7 +389,7 @@ static void json_escape_string(smart_str
 }
 /* }}} */

-static void json_encode_r(smart_str *buf, zval *val TSRMLS_DC) /* {{{ */
+static void json_encode_r(smart_str *buf, zval *val, int options TSRMLS_DC) /* {{{ */
 {
        switch (Z_TYPE_P(val)) {
                case IS_NULL:
@@ -374,11 +426,11 @@ static void json_encode_r(smart_str *buf
                        break;
                case IS_STRING:
                case IS_UNICODE:
- json_escape_string(buf, Z_UNIVAL_P(val), Z_UNILEN_P(val), Z_TYPE_P(val)); + json_escape_string(buf, Z_UNIVAL_P(val), Z_UNILEN_P(val), Z_TYPE_P(val), options);
                        break;
                case IS_ARRAY:
                case IS_OBJECT:
-                       json_encode_array(buf, &val TSRMLS_CC);
+                       json_encode_array(buf, &val, options TSRMLS_CC);
                        break;
                default:
zend_error(E_WARNING, "[json] (json_encode_r) type is unsupported, encoded as null.");
@@ -396,12 +448,13 @@ PHP_FUNCTION(json_encode)
 {
        zval *parameter;
        smart_str buf = {0};
+       long options = 0;

- if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "z", &parameter) == FAILURE) { + if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "z|l", &parameter, &options) == FAILURE) {
                return;
        }

-       json_encode_r(&buf, parameter TSRMLS_CC);
+       json_encode_r(&buf, parameter, options TSRMLS_CC);

        /*
         * Return as binary string, since the result is 99% likely to be just

--
PHP Internals - PHP Runtime Development Mailing List
To unsubscribe, visit: http://www.php.net/unsub.php

Reply via email to