PleaseStand has uploaded a new change for review.
https://gerrit.wikimedia.org/r/78941
Change subject: Introducing JsonFallback
......................................................................
Introducing JsonFallback
With this library, MediaWiki and extensions no longer depend on the
native JSON extension, which cannot be treated as a "standard"
extension in the face of concern that the license of one of its
components is "non-free".
Nearly all PHP 5.5 JSON features (and bugs!) are supported. Assuming
that PHP 5.6 is released with a free JSON extension, JsonFallback can
be removed when support for PHP 5.5 and below is dropped.
Besides the wide feature set, an advantage of JsonFallback over
Services_JSON is its decoder, which is up to 9x faster and doesn't
fall over on megabyte-sized input.
Bug: 47431
Change-Id: I0a06b10eeee9a6bb04529d669fed8c69a4d9c172
---
M RELEASE-NOTES-1.22
M includes/GlobalFunctions.php
M includes/json/FormatJson.php
A includes/libs/JsonFallback.php
4 files changed, 921 insertions(+), 5 deletions(-)
git pull ssh://gerrit.wikimedia.org:29418/mediawiki/core
refs/changes/41/78941/1
diff --git a/RELEASE-NOTES-1.22 b/RELEASE-NOTES-1.22
index b9890a0..7d99a22 100644
--- a/RELEASE-NOTES-1.22
+++ b/RELEASE-NOTES-1.22
@@ -184,6 +184,9 @@
setcookie() or setrawcookie() should begin using this instead.
* New hook WebResponseSetCookie, called from WebResponse::setcookie().
* New hook ResetSessionID, called when the session id is reset.
+* (bug 47431) Added a new JSON library to provide the standard set of JSON
+ functions when they are not compiled into PHP. It is somewhat faster than
+ Services_JSON and supports a wider range of options.
=== Bug fixes in 1.22 ===
* Disable Special:PasswordReset when $wgEnableEmail is false. Previously one
@@ -321,8 +324,6 @@
=== Other changes in 1.22 ===
* BREAKING CHANGE: Implementation of MediaWiki's JS and JSON value encoding
has changed:
-** MediaWiki no longer supports PHP installations in which the native JSON
- extension is missing or disabled.
** XmlJsCode objects can no longer be nested inside objects or arrays.
(For Xml::encodeJsCall(), this individually applies to each argument.)
** The sets of characters escaped by default, along with the precise escape
diff --git a/includes/GlobalFunctions.php b/includes/GlobalFunctions.php
index be4ec3e..1cd386b 100644
--- a/includes/GlobalFunctions.php
+++ b/includes/GlobalFunctions.php
@@ -93,6 +93,11 @@
return Fallback::mb_strrpos( $haystack, $needle, $offset,
$encoding );
}
}
+
+if ( !function_exists( 'json_encode' ) ) {
+ require_once __DIR__ . '/libs/JsonFallback.php';
+}
+
/// @endcond
/**
diff --git a/includes/json/FormatJson.php b/includes/json/FormatJson.php
index 91e1e87..c66b4b1 100644
--- a/includes/json/FormatJson.php
+++ b/includes/json/FormatJson.php
@@ -90,10 +90,10 @@
* @return string|bool: String if successful; false upon failure
*/
public static function encode( $value, $pretty = false, $escaping = 0 )
{
- if ( version_compare( PHP_VERSION, '5.4.0', '<' ) ) {
- return self::encode53( $value, $pretty, $escaping );
+ if ( defined( 'JSON_UNESCAPED_UNICODE' ) ) {
+ return self::encode54( $value, $pretty, $escaping );
}
- return self::encode54( $value, $pretty, $escaping );
+ return self::encode53( $value, $pretty, $escaping );
}
/**
diff --git a/includes/libs/JsonFallback.php b/includes/libs/JsonFallback.php
new file mode 100644
index 0000000..58171de
--- /dev/null
+++ b/includes/libs/JsonFallback.php
@@ -0,0 +1,910 @@
+<?php
+/**
+ * A pure PHP reimplementation of the PHP 5.5 JSON functions, useful for
situations in which
+ * native JSON support is not compiled into PHP.
+ *
+ * More information is at <https://github.com/plstand/jsonfallback>.
+ *
+ * Copyright © 2013 Kevin Israel
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
copy of this software
+ * and associated documentation files (the "Software"), to deal in the
Software without restriction,
+ * including without limitation the rights to use, copy, modify, merge,
publish, distribute,
+ * sublicense, and/or sell copies of the Software, and to permit persons to
whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
all copies or
+ * substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING
+ * BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
+ *
+ * @file
+ */
+
+/**
+ * Holds the JSON functions and associated global state.
+ */
+class JsonFallback {
+
+ const HEX_TAG = 1;
+ const HEX_AMP = 2;
+ const HEX_APOS = 4;
+ const HEX_QUOT = 8;
+ const FORCE_OBJECT = 16;
+ const NUMERIC_CHECK = 32;
+ const UNESCAPED_SLASHES = 64;
+ const PRETTY_PRINT = 128;
+ const UNESCAPED_UNICODE = 256;
+ const PARTIAL_OUTPUT_ON_ERROR = 512;
+
+ const ERROR_NONE = 0;
+ const ERROR_DEPTH = 1;
+ const ERROR_STATE_MISMATCH = 2;
+ const ERROR_CTRL_CHAR = 3; // Not used in this library
+ const ERROR_SYNTAX = 4;
+ const ERROR_UTF8 = 5;
+ const ERROR_RECURSION = 6;
+ const ERROR_INF_OR_NAN = 7;
+ const ERROR_UNSUPPORTED_TYPE = 8;
+
+ const OBJECT_AS_ARRAY = 1; // Not used in this library
+ const BIGINT_AS_STRING = 2;
+
+ private static $lastError = self::ERROR_NONE;
+
+ /**
+ * @see http://php.net/json_encode
+ * @param mixed $value Anything but a resource
+ * @param int $options Bitfield of JsonFallback constants
+ * @param int $depth Maximum recursion depth
+ * @return string: JSON representation of $value
+ */
+ public static function encode( $value, $options = 0, $depth = 512 ) {
+ $ctx = new JsonFallbackEncoder( $options );
+ $json = $ctx->encode( $value, $depth );
+ self::$lastError = $ctx->lastError;
+ return $json;
+ }
+
+ /**
+ * @see http://php.net/json_decode
+ * @param string $json String to decode
+ * @param bool $assoc Whether to decode JSON objects as PHP arrays
+ * @param int $depth Maximum recursion depth
+ * @param int $options Either JsonFallback::BIGINT_AS_STRING or 0
+ * @return mixed: The decoded value
+ */
+ public static function decode( $json, $assoc = false, $depth = 512,
$options = 0 ) {
+ $ctx = new JsonFallbackDecoder( $json, $assoc, $options );
+ $value = $ctx->decode( $depth );
+ self::$lastError = $ctx->lastError;
+ return $value;
+ }
+
+ /**
+ * @see http://php.net/json_last_error
+ * @return int: One of the JsonFallback::ERROR_* constants
+ */
+ public static function last_error() {
+ return self::$lastError;
+ }
+
+ /**
+ * @see http://php.net/json_last_error_msg
+ * @return string: Error message
+ */
+ public static function last_error_msg() {
+ $errorMap = array(
+ self::ERROR_NONE => 'No error',
+ self::ERROR_DEPTH => 'Maximum stack depth exceeded',
+ self::ERROR_STATE_MISMATCH => 'State mismatch (invalid
or malformed JSON)',
+ self::ERROR_CTRL_CHAR => 'Control character error,
possibly incorrectly encoded',
+ self::ERROR_SYNTAX => 'Syntax error',
+ self::ERROR_UTF8 => 'Malformed UTF-8 characters,
possibly incorrectly encoded',
+ self::ERROR_RECURSION => 'Recursion detected',
+ self::ERROR_INF_OR_NAN => 'Inf and NaN cannot be JSON
encoded',
+ self::ERROR_UNSUPPORTED_TYPE => 'Type is not supported',
+ );
+ return $errorMap[self::$lastError];
+ }
+
+ /**
+ * Exports prefixed class constants and functions into the global scope.
+ *
+ * @private
+ */
+ public static function addGlobals() {
+ $ref = new ReflectionClass( __CLASS__ );
+ foreach ( $ref->getConstants() as $k => $v ) {
+ define( 'JSON_' . $k, $v );
+ }
+
+ function json_encode( $value, $options = 0, $depth = 512 ) {
+ return JsonFallback::encode( $value, $options, $depth );
+ }
+
+ function json_decode( $json, $assoc = false, $depth = 512,
$options = 0 ) {
+ return JsonFallback::decode( $json, $assoc, $depth,
$options );
+ }
+
+ function json_last_error() {
+ return JsonFallback::last_error();
+ }
+
+ function json_last_error_msg() {
+ return JsonFallback::last_error_msg();
+ }
+ }
+
+}
+
+/**
+ * Holds the state of an encode operation.
+ *
+ * @private
+ */
+class JsonFallbackEncoder {
+
+ public $lastError = JsonFallback::ERROR_NONE;
+ private $visitedArrays = array();
+ private $visitedObjects = array();
+ private $options;
+ private $escapeMap;
+
+ /**
+ * @param int $options Bitfield of JsonFallback constants
+ */
+ public function __construct( $options ) {
+ $this->options = (int)$options;
+
+ static $defaultEscapeMap = array();
+ if ( !$defaultEscapeMap ) {
+ $specials = array(
+ 0x08 => '\b', 0x09 => '\t', 0x0a => '\n', 0x0c
=> '\f', 0x0d => '\r',
+ 0x22 => '\"', 0x2f => '\/', 0x5c => '\\\\',
+ );
+ for ( $i = 0x00; $i < 0x80; $i++ ) {
+ if ( isset( $specials[$i] ) ) {
+ $defaultEscapeMap[$i] = $specials[$i];
+ } elseif ( $i < 0x20 ) {
+ $defaultEscapeMap[$i] = sprintf(
'\u%04x', $i );
+ } else {
+ $defaultEscapeMap[$i] = chr( $i );
+ }
+ }
+ }
+
+ // $defaultEscapeMap contains default JSON-encoded forms of
ASCII characters.
+ // To reflect any specified non-default options, we modify a
copy of the array.
+ $this->escapeMap = $defaultEscapeMap;
+
+ if ( $this->options & JsonFallback::HEX_QUOT ) {
+ $this->escapeMap[0x22] = '\u0022';
+ }
+ if ( $this->options & JsonFallback::HEX_AMP ) {
+ $this->escapeMap[0x26] = '\u0026';
+ }
+ if ( $this->options & JsonFallback::HEX_APOS ) {
+ $this->escapeMap[0x27] = '\u0027';
+ }
+ if ( $this->options & JsonFallback::UNESCAPED_SLASHES ) {
+ $this->escapeMap[0x2f] = '/';
+ }
+ if ( $this->options & JsonFallback::HEX_TAG ) {
+ // Uppercase as in the native JSON extension
+ $this->escapeMap[0x3c] = '\u003C';
+ $this->escapeMap[0x3e] = '\u003E';
+ }
+ }
+
+ /**
+ * Adds non-significant whitespace to an existing JSON representation
of an object.
+ *
+ * @param string $json
+ * @return string
+ */
+ private static function prettyPrint( $json ) {
+ $buf = '';
+ $indent = 0;
+ $json = strtr( $json, array( '\\\\' => '\\\\', '\"' => "\x01" )
);
+ for ( $i = 0, $n = strlen( $json ); $i < $n; $i += $skip ) {
+ $skip = 1;
+ switch ( $json[$i] ) {
+ case ':':
+ $buf .= ': ';
+ break;
+ case '[':
+ case '{':
+ $indent++; // falls through
+ case ',':
+ $buf .= $json[$i] . "\n" . str_repeat(
' ', $indent );
+ break;
+ case ']':
+ case '}':
+ $indent--;
+ $buf .= "\n" . str_repeat( ' ',
$indent ) . $json[$i];
+ break;
+ case '"':
+ $skip = strcspn( $json, '"', $i + 1 ) +
2;
+ $buf .= substr( $json, $i, $skip );
+ break;
+ default:
+ $skip = strcspn( $json, ',]}"', $i + 1
) + 1;
+ $buf .= substr( $json, $i, $skip );
+ }
+ }
+ return str_replace( "\x01", '\"', preg_replace( '/ +$/m', '',
$buf ) );
+ }
+
+ /**
+ * JSON encodes a value of arbitrary type, catching any
JsonFallbackError exception.
+ *
+ * @param mixed $value Anything but a resource
+ * @param int $depth Maximum recursion depth
+ * @return string: JSON representation of $value
+ */
+ public function encode( $value, $depth ) {
+ $xdebugHack = new JsonFallbackXdebugHack;
+ try {
+ $json = $this->encodeRecursive( $value, (int)$depth );
+ if ( $this->options & JsonFallback::PRETTY_PRINT ) {
+ $json = self::prettyPrint( $json );
+ }
+ } catch ( JsonFallbackError $e ) {
+ $json = false;
+ }
+ return $json;
+ }
+
+ /**
+ * @param mixed $value Anything but a resource
+ * @param int $depth Maximum recursion depth
+ * @return string: JSON representation of $value
+ */
+ private function encodeRecursive( &$value, $depth ) {
+ if ( $value === true ) {
+ return 'true';
+ } elseif ( $value === false ) {
+ return 'false';
+ } elseif ( $value === null ) {
+ return 'null';
+ } elseif ( is_int( $value ) ) {
+ return (string)$value;
+ } elseif ( is_float( $value ) ) {
+ if ( $value === INF || $value === -INF || $value !==
$value ) {
+ return $this->error(
JsonFallback::ERROR_INF_OR_NAN, '0' );
+ } else {
+ return strtolower( $value );
+ }
+ } elseif ( is_string( $value ) ) {
+ if ( $this->options & JsonFallback::NUMERIC_CHECK &&
is_numeric( $value ) ) {
+ // Can't use the plus operator here; it changes
negative zero to positive zero
+ $copy = $value - 0;
+ return $this->encodeRecursive( $copy, $depth );
+ } else {
+ return $this->encodeString( $value );
+ }
+
+ } elseif ( is_object( $value ) ) {
+ // Always encode PHP objects as JSON objects
+ $isPHPObject = true;
+ $isJSONObject = true;
+
+ // Circular reference check
+ if ( in_array( $value, $this->visitedObjects, true ) ) {
+ return $this->error(
JsonFallback::ERROR_RECURSION );
+ }
+
+ $this->visitedObjects[] = $value;
+
+ if ( $value instanceof JsonSerializable ) {
+ $copy = $value->jsonSerialize();
+ if ( $copy !== $value ) {
+ return $this->encodeRecursive( $copy,
$depth );
+ }
+ }
+
+ // Closures cannot have properties, and since PHP
5.3.3, they cast to arrays as
+ // scalars do <https://bugs.php.net/bug.php?id=52193>.
Nevertheless, they are
+ // JSON encoded as what they are: empty objects.
+ $copy = $value instanceof Closure ? array() : (array)
$value;
+
+ } elseif ( is_array( $value ) ) {
+ // Encode PHP arrays as JSON objects when necessary, or
if forced to
+ $isPHPObject = false;
+ $isJSONObject = $this->options &
JsonFallback::FORCE_OBJECT ||
+ $value && array_keys( $value ) !== range( 0,
count( $value ) - 1 );
+
+ // Circular reference check. This is rather complex
because of how PHP works:
+ // * Using `===` could result in a "Fatal error:
Nesting level too deep [...]".
+ // * In Zend PHP, copying the array would not preserve
the value of the
+ // HashTable's nNextFreeElement field. This prevents
temporarily setting the
+ // array to something else and later restoring it
without possibly affecting
+ // how code that uses `$array[] =` would behave
thereafter.
+ // * Temporarily adding an element to the end of the
array is not an option
+ // because the array might already contain the
maximum allowable integer key.
+ // * Adding an element with a known string key is not
an option because the
+ // array could contain *any* key for that matter.
+ // * Fortunately, an array's internal pointer can be
temporarily changed and
+ // put back just the way it was when we are done
checking.
+ $before = array_map( 'key', $this->visitedArrays );
+ key( $value ) === null ? reset( $value ) : next( $value
);
+ $after = array_map( 'key', $this->visitedArrays );
+ key( $value ) === null ? end( $value ) : prev( $value );
+ if ( $before !== $after ) {
+ return $this->error(
JsonFallback::ERROR_RECURSION );
+ }
+
+ $this->visitedArrays[] =& $value;
+
+ // Iterate over a copy to preserve the internal pointer
+ $copy = $value;
+
+ } else {
+ return $this->error(
JsonFallback::ERROR_UNSUPPORTED_TYPE );
+ }
+
+ // Undocumented encoding depth check
<https://bugs.php.net/bug.php?id=62369>
+ // JsonFallback::PARTIAL_OUTPUT_ON_ERROR allows encoding to
continue on here (PHP bug?)
+ if ( $depth < 1 ) {
+ $this->lastError = JsonFallback::ERROR_DEPTH;
+ if ( !( $this->options &
JsonFallback::PARTIAL_OUTPUT_ON_ERROR ) ) {
+ throw new JsonFallbackError;
+ }
+ }
+
+ $parts = array();
+ if ( $isJSONObject ) {
+ foreach ( $copy as $propName => &$propValue ) {
+ // Include keys that are the wrong type (string
vs. integer) for the PHP
+ // value in question (possible through
array/object casting), yet for
+ // PHP objects, don't include properties that
otherwise are inaccessible.
+ $propName = (string)$propName;
+ if ( !$isPHPObject || $propName >= "\x01" ) {
+ // NOTE: when
JsonFallback::PARTIAL_OUTPUT_ON_ERROR is specified, and the key is
+ // not valid UTF-8, invalid JSON is
produced. The native extension is no better.
+ $parts[] = $this->encodeString(
$propName ) . ':' .
+ $this->encodeRecursive(
$propValue, $depth - 1 );
+ }
+ }
+ $json = '{' . implode( ',', $parts ) . '}';
+
+ } else {
+ foreach ( $copy as &$propValue ) {
+ $parts[] = $this->encodeRecursive( $propValue,
$depth - 1 );
+ }
+ $json = '[' . implode( ',', $parts ) . ']';
+ }
+
+ if ( $isPHPObject ) {
+ array_pop( $this->visitedObjects );
+ } else {
+ array_pop( $this->visitedArrays );
+ }
+ return $json;
+ }
+
+ /**
+ * JSON encodes a UTF-8 string.
+ *
+ * The UTF-8 decoding algorithm is based on that described in the
WHATWG Encoding Standard
+ * <http://encoding.spec.whatwg.org/#utf-8>, with some performance
tweaks. Some error handling
+ * details are omitted because invalid UTF-8 aborts the JSON encoding
process anyway.
+ *
+ * @param string $string
+ * @return string
+ */
+ private function encodeString( $string ) {
+ $json = '"';
+
+ // This is the range in which continuation bytes fall. It is
further restricted for
+ // the second byte (first continuation byte) of some 3- and
4-byte sequences.
+ $lbound = 0x80;
+ $ubound = 0xbf;
+
+ for ( $i = 0, $n = strlen( $string ); $i < $n; ) {
+
+ $codeStart = $i++;
+ $code = ord( $string[$codeStart] );
+
+ if ( $code < 0x80 ) {
+ $json .= $this->escapeMap[$code];
+ continue;
+ } elseif ( $code < 0xc2 ) {
+ // Continuation byte or overlong form of ASCII
character
+ return $this->error( JsonFallback::ERROR_UTF8 );
+ } elseif ( $code < 0xe0 ) {
+ $codeLength = 2;
+ $code -= 0xc0;
+ } elseif ( $code < 0xf0 ) {
+ $codeLength = 3;
+ $code -= 0xe0;
+ if ( $code === 0x00 ) {
+ $lbound = 0xa0; // Exclude 3-byte
overlong forms
+ } elseif ( $code === 0x0d ) {
+ $ubound = 0x9f; // Exclude surrogates
(U+D800..U+DFFF)
+ }
+ } elseif ( $code < 0xf5 ) {
+ $codeLength = 4;
+ $code -= 0xf0;
+ if ( $code === 0x00 ) {
+ $lbound = 0x90; // Exclude 4-byte
overlong forms
+ } elseif ( $code === 0x04 ) {
+ $ubound = 0x8f; // Exclude values
beyond U+10FFFF
+ }
+ } else {
+ // Value beyond U+13FFFF
+ return $this->error( JsonFallback::ERROR_UTF8 );
+ }
+
+ $need = $codeLength - 1;
+
+ // Check for premature end of string to prevent a PHP
notice
+ if ( $i + $need > $n ) {
+ return $this->error( JsonFallback::ERROR_UTF8 );
+ }
+
+ // Read $need continuation bytes, the first between
$lbound and $ubound
+ do {
+ $byte = ord( $string[$i++] );
+ if ( $byte < $lbound || $byte > $ubound ) {
+ return $this->error(
JsonFallback::ERROR_UTF8 );
+ }
+
+ $code = $code << 6 | $byte & 0x3f;
+
+ // Restore original values
+ $lbound = 0x80;
+ $ubound = 0xbf;
+
+ } while ( --$need );
+
+ // Escape the code point (if necessary) and append it
to $json
+ if ( $this->options & JsonFallback::UNESCAPED_UNICODE )
{
+ $json .= substr( $string, $codeStart,
$codeLength );
+ } elseif ( $code < 0x10000 ) {
+ // Use a single UTF-16 code unit
+ $json .= sprintf( '\u%04x', $code );
+ } else {
+ // Use a surrogate pair
+ $code -= 0x10000;
+ $json .= sprintf( '\u%04x\u%04x',
+ 0xd800 + ( $code >> 10 ), 0xdc00 + (
$code & 0x3ff ) );
+ }
+ }
+
+ $json .= '"';
+ return $json;
+ }
+
+ /**
+ * Handles an encode error by throwing an exception or returning some
dummy JSON.
+ * The relevant error code is saved.
+ *
+ * @param int $code Error code for JsonFallback::json_last_error() to
return
+ * @param string $dummyJson Representation to use for the non-encodable
value
+ * @return mixed: Value of $dummyJson
+ * @throws JsonFallbackError
+ */
+ private function error( $code, $dummyJson = 'null' ) {
+ $this->lastError = $code;
+ if ( $this->options & JsonFallback::PARTIAL_OUTPUT_ON_ERROR ) {
+ return $dummyJson;
+ }
+ throw new JsonFallbackError;
+ }
+
+}
+
+/**
+ * Holds the state of a decode operation.
+ *
+ * @private
+ */
+class JsonFallbackDecoder {
+
+ public $lastError = JsonFallback::ERROR_NONE;
+ private $s;
+ private $i = 0;
+ private $objectAsArray;
+ private $bigIntAsString;
+
+ /**
+ * @param string $json String to decode
+ * @param bool $assoc Whether to decode JSON objects as PHP arrays
+ * @param int $options Either JsonFallback::BIGINT_AS_STRING or 0
+ */
+ public function __construct( $json, $assoc, $options ) {
+ $this->s = (string)$json;
+ $this->objectAsArray = (bool)$assoc;
+ $this->bigIntAsString = (bool)( (int)$options &
JsonFallback::BIGINT_AS_STRING );
+ }
+
+ /**
+ * Decode the JSON string, catching any JsonFallbackError exception.
+ *
+ * @param int $depth Maximum recursion depth
+ * @return mixed: The decoded value
+ */
+ public function decode( $depth ) {
+ $xdebugHack = new JsonFallbackXdebugHack;
+ $depth = (int)$depth;
+ if ( $depth < 1 ) {
+ trigger_error( 'json_decode(): Depth must be greater
than zero', E_USER_WARNING );
+ return null; // Report JsonFallback::ERROR_NONE though
+ }
+
+ if ( $this->s === '' ) {
+ return null; // Report JsonFallback::ERROR_NONE though
(PHP bug?)
+ }
+
+ // NOTE: This simulates a quirk in the PHP JSON parser arising
from the fact that lone
+ // scalar value parsing was "bolted on", and poorly. Leading
zeros are tolerated, yet
+ // whitespace around true/false/null is not. And
true/false/null are case insensitive;
+ // that one is actually documented.
+ if ( is_numeric( $this->s ) ) {
+ return $this->decodeNumber( $this->s );
+ } elseif ( strcasecmp( $this->s, 'true' ) === 0 ) {
+ return true;
+ } elseif ( strcasecmp( $this->s, 'false' ) === 0 ) {
+ return false;
+ } elseif ( strcasecmp( $this->s, 'null' ) === 0 ) {
+ return null;
+ }
+
+ try {
+ // Validate the UTF-8 before decoding. Use
mb_check_encoding when possible; otherwise
+ // use the slower validation code from the string
encoder. In particular, PHP < 5.4
+ // accepts values above U+10FFFF, so fall back to the
slow way in that case.
+ if ( function_exists( 'mb_check_encoding' ) &&
+ !mb_check_encoding( "\xf4\x90\x80\x80", 'UTF-8'
) )
+ {
+ $valid = mb_check_encoding( $this->s, 'UTF-8' );
+ } else {
+ $ectx = new JsonFallbackEncoder(
+ JsonFallback::UNESCAPED_SLASHES |
JsonFallback::UNESCAPED_UNICODE );
+ $valid = $ectx->encode( $this->s, 0 ) !== false;
+ }
+
+ if ( !$valid ) {
+ $this->error( JsonFallback::ERROR_UTF8 );
+ }
+
+ // Lone number/true/false/null was already handled above
+ $token = $this->nextToken();
+ if ( $token[0] === 's' && !is_string( $token[1] ) ) {
+ $this->error( JsonFallback::ERROR_SYNTAX );
+ }
+
+ $value = $this->decodeRecursive( $token, $depth );
+ $token = $this->nextToken();
+ if ( $token[0] === '$' ) {
+ return $value;
+ } elseif ( $token[0] === ']' || $token[0] === '}' ) {
+ $this->lastError =
JsonFallback::ERROR_STATE_MISMATCH;
+ } else {
+ $this->lastError = JsonFallback::ERROR_SYNTAX;
+ return null;
+ }
+
+ } catch ( JsonFallbackError $e ) {
+ return null;
+ }
+ }
+
+ /**
+ * @param array $token Initial token to examine
+ * @param int $depth Maximum recursion depth
+ * @return mixed: The decoded value
+ */
+ private function decodeRecursive( $token, $depth ) {
+ if ( $token[0] === 's' ) {
+ return $token[1];
+ }
+
+ if ( $depth < 2 ) {
+ $this->error( JsonFallback::ERROR_DEPTH );
+ }
+
+ if ( $token[0] === '[' ) {
+ $retval = array();
+ $token = $this->nextToken();
+ if ( $token[0] === ']' ) {
+ // Special case, empty array
+ return $retval;
+ }
+
+ while ( true ) {
+ $retval[] = $this->decodeRecursive( $token,
$depth - 1 );
+ $token = $this->nextToken();
+ if ( $token[0] === ',' ) {
+ $token = $this->nextToken();
+ } elseif ( $token[0] === ']' ) {
+ return $retval;
+ } elseif ( $token[0] === '}' ) {
+ $this->error(
JsonFallback::ERROR_STATE_MISMATCH );
+ } else {
+ $this->error(
JsonFallback::ERROR_SYNTAX );
+ }
+ }
+
+ } elseif ( $token[0] === '{' ) {
+ $retval = $this->objectAsArray ? array() : new stdClass;
+ $token = $this->nextToken();
+ if ( $token[0] === '}' ) {
+ // Special case, empty object
+ return $retval;
+ }
+
+ while ( true ) {
+ if ( $token[0] !== 's' || !is_string( $token[1]
) ) {
+ $this->error(
JsonFallback::ERROR_SYNTAX );
+ }
+
+ // NOTE: As in the native extension, object
keys beginning with \u0000 can cause
+ // fatal errors (PHP bug?), and duplicate keys
are happily accepted here.
+ if ( !$this->objectAsArray && $token[1] === ''
) {
+ $key = '_empty_';
+ } else {
+ $key = $token[1];
+ }
+
+ $token = $this->nextToken();
+ if ( $token[0] !== ':' ) {
+ $this->error(
JsonFallback::ERROR_SYNTAX );
+ }
+
+ $value = $this->decodeRecursive(
$this->nextToken(), $depth - 1 );
+ if ( $this->objectAsArray ) {
+ $retval[$key] = $value;
+ } else {
+ $retval->$key = $value;
+ }
+
+ $token = $this->nextToken();
+ if ( $token[0] === ',' ) {
+ $token = $this->nextToken();
+ } elseif ( $token[0] === '}' ) {
+ return $retval;
+ } elseif ( $token[0] === ']' ) {
+ $this->error(
JsonFallback::ERROR_STATE_MISMATCH );
+ } else {
+ $this->error(
JsonFallback::ERROR_SYNTAX );
+ }
+ }
+
+ } else {
+ $this->error( JsonFallback::ERROR_SYNTAX );
+ }
+ }
+
+ /**
+ * @return array: Token type at offset 0; for scalars, value at offset 1
+ */
+ private function nextToken() {
+ $regex = '! [\t\n\r ]*+ (?:
+ ( true ) | ( false ) | ( null ) | # simple keyword (1-3)
+
+ ( # number (4)
+ -? (?> 0 | [1-9][0-9]* ) # integer part
+ (?: \.[0-9]* )?+ # fractional
part (laxity: "1." is accepted)
+ (?: [Ee] [+-]? [0-9]+ )? # exponent
+ ) |
+
+ ( " ) | # string (5)
+ ( [\[{\]}:,] ) | # structural
character (6)
+ ( $ ) # end of string (7)
+ ) !Ax';
+
+ if ( !preg_match( $regex, $this->s, $matches, 0, $this->i ) ) {
+ $this->error( JsonFallback::ERROR_SYNTAX );
+ }
+
+ $this->i += strlen( $matches[0] );
+
+ // Determine token type and value using the technique described
in
+ //
<http://nikic.github.io/2011/10/23/Improving-lexing-performance-in-PHP.html>
+ $type = count( $matches ) - 1;
+ switch ( $type ) {
+ case 1:
+ return array( 's', true );
+ case 2:
+ return array( 's', false );
+ case 3:
+ return array( 's', null );
+ case 4:
+ return array( 's', $this->decodeNumber(
$matches[4] ) );
+ case 5:
+ return array( 's', $this->decodeString() );
+ case 6:
+ return array( $matches[6] );
+ case 7:
+ return array( '$' );
+ default:
+ // Should not be possible even for invalid input
+ $this->error( JsonFallback::ERROR_SYNTAX );
+ }
+ }
+
+ /**
+ * @param string $string
+ * @return int|float
+ */
+ private function decodeNumber( $string ) {
+ // Can't use the plus operator here; it changes negative zero
to positive zero
+ $number = $string - 0;
+
+ if ( $this->bigIntAsString ) {
+ // Intentionally disallow whitespace (for the lone
scalar value parsing)
+ if ( is_float( $number ) && ltrim( $string, '-0..9' )
=== '' ) {
+ $number = $string;
+ }
+ }
+
+ return $number;
+ }
+
+ /**
+ * @return string
+ */
+ private function decodeString() {
+ $twoCharUnescaped = array(
+ '\\' => '\\', '"' => '"', 'b' => "\x08", 'f' => "\f",
'n' => "\n", 'r' => "\r",
+ 't' => "\t", '/' => '/',
+ );
+
+ $buf = '';
+
+ for ( $n = strlen( $this->s ); $this->i < $n; $this->i += $skip
) {
+ // Unescaped double quote ends the string
+ if ( $this->s[$this->i] === '"' ) {
+ $this->i++;
+ return $buf;
+ }
+
+ // Unescaped control characters, including CR and LF,
are illegal
+ if ( $this->s[$this->i] < ' ' ) {
+ $this->error( JsonFallback::ERROR_SYNTAX );
+ }
+
+ // Decode other unescaped characters as-is
+ if ( $this->s[$this->i] !== '\\' ) {
+ $skip = strcspn( $this->s, '\"', $this->i );
+ $unescaped = substr( $this->s, $this->i, $skip
);
+ if ( ltrim( $unescaped, " ..\xff" ) !== '' ) {
+ $this->error(
JsonFallback::ERROR_SYNTAX );
+ }
+ $buf .= $unescaped;
+ continue;
+ }
+
+ // Handle two-character escape sequences using a hash
lookup
+ if ( $this->i + 1 >= $n ) {
+ break;
+ }
+ if ( $this->s[$this->i + 1] !== 'u' ) {
+ $key = $this->s[$this->i + 1];
+ if ( !isset( $twoCharUnescaped[$key] ) ) {
+ $this->error(
JsonFallback::ERROR_SYNTAX );
+ }
+ $buf .= $twoCharUnescaped[$key];
+ $skip = 2;
+ continue;
+ }
+
+ // Handle Unicode escape sequences by hex-decoding the
code units
+ // and re-encoding using UTF-8
+ if ( $this->i + 5 >= $n ) {
+ break;
+ }
+ $hex = substr( $this->s, $this->i + 2, 4 );
+ if ( ltrim( $hex, '0..9A..Fa..f' ) !== '' ) {
+ $this->error( JsonFallback::ERROR_SYNTAX );
+ }
+ $code = hexdec( $hex );
+ $skip = 6;
+
+ // U+0000..U+007F (ASCII characters): equal to UTF-8
byte value
+ if ( $code < 0x80 ) {
+ $buf .= chr( $code );
+ continue;
+ }
+
+ // U+0080..U+07FF: decode as two UTF-8 bytes
+ if ( $code <= 0x07ff ) {
+ $buf .= pack( 'C*', 0xc0 | $code >> 6, 0x80 |
$code & 0x3f );
+ continue;
+ }
+
+ // U+D800..U+DBFF when paired with U+DC00..U+DFFF:
decode as four UTF-8 bytes
+ if ( $code >= 0xd800 && $code <= 0xdbff && $this->i +
11 < $n
+ && $this->s[$this->i + 6] === '\\' &&
$this->s[$this->i + 7] === 'u'
+ ) {
+ $hex = substr( $this->s, $this->i + 8, 4 );
+ if ( ltrim( $hex, '0..9A..Fa..f' ) !== '' ) {
+ $this->error(
JsonFallback::ERROR_SYNTAX );
+ }
+ $low = hexdec( $hex );
+ if ( $low >= 0xdc00 && $low <= 0xdfff ) {
+ $code = 0x10000 + ( ( $code & 0x3ff )
<< 10 | $low & 0x3ff );
+ $buf .= pack( 'C*', 0xf0 | $code >> 18,
0x80 | ( $code >> 12 ) & 0x3f,
+ 0x80 | ( $code >> 6 ) & 0x3f,
0x80 | $code & 0x3f );
+ $skip = 12;
+ continue;
+ }
+ }
+
+ // Everything else in U+0800..U+FFFF: decode as three
UTF-8 bytes
+ // NOTE: This includes unpaired surrogates
(U+D800..U+DFFF), which should never
+ // appear in Unicode text yet are decoded by the native
JSON extension anyway
+ // <https://bugs.php.net/bug.php?id=41067#1176758113>
(separate PHP bug?)
+ $buf .= pack( 'C*', 0xe0 | $code >> 12, 0x80 | ( $code
>> 6 ) & 0x3f,
+ 0x80 | $code & 0x3f );
+ }
+
+ // Unexpected end of input string
+ $this->error( JsonFallback::ERROR_SYNTAX );
+ }
+
+ /**
+ * @param int $code: One of the JsonFallback::ERROR_* constants
+ * @throws JsonFallbackError
+ */
+ private function error( $code ) {
+ $this->lastError = $code;
+ throw new JsonFallbackError;
+ }
+
+}
+
+/**
+ * Exception thrown when a value cannot be processed; should never propagate
to user code.
+ * Necessary because the library must not catch exceptions thrown by
jsonSerialize functions.
+ *
+ * @private
+ */
+class JsonFallbackError extends Exception {
+}
+
+/**
+ * Hack to temporarily disable xdebug's protection against infinite recursion.
This is to
+ * prevent fatal errors when parsing deeply nested data structures, especially
because xdebug
+ * defaults to a rather low setting.
+ *
+ * @private
+ */
+class JsonFallbackXdebugHack {
+
+ private $oldValue;
+
+ public function __construct() {
+ $this->oldValue = ini_get( 'xdebug.max_nesting_level' );
+ if ( (int)$this->oldValue > 0 ) {
+ ini_set( 'xdebug.max_nesting_level', 0 );
+ }
+ }
+
+ public function __destruct() {
+ if ( (int)$this->oldValue > 0 ) {
+ ini_set( 'xdebug.max_nesting_level', $this->oldValue );
+ }
+ }
+
+}
+
+define( 'JSONFALLBACK_POLYFILL', !function_exists( 'json_encode' ) );
+define( 'JSONFALLBACK_SERIALIZABLE', !interface_exists( 'JsonSerializable',
false ) );
+
+if ( JSONFALLBACK_POLYFILL ) {
+ JsonFallback::addGlobals();
+}
+
+if ( JSONFALLBACK_SERIALIZABLE ) {
+ interface JsonSerializable {
+ public function jsonSerialize();
+ }
+}
--
To view, visit https://gerrit.wikimedia.org/r/78941
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: newchange
Gerrit-Change-Id: I0a06b10eeee9a6bb04529d669fed8c69a4d9c172
Gerrit-PatchSet: 1
Gerrit-Project: mediawiki/core
Gerrit-Branch: master
Gerrit-Owner: PleaseStand <[email protected]>
_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits