Yurik has uploaded a new change for review.

  https://gerrit.wikimedia.org/r/163344

Change subject: FormatJson::parse( TRY_FIXING ) - remove trailing commas
......................................................................

FormatJson::parse( TRY_FIXING ) - remove trailing commas

Removes trailing commas from json text when parsing
Solves very common cases like [1,2,3,]

Resulting status will be set to OK but not Good to warn caller

Change-Id: Ic0eb0a711da3ae578d6bb58d7474279d6845a4a7
---
M includes/json/FormatJson.php
M languages/i18n/en.json
M languages/i18n/qqq.json
M tests/phpunit/includes/json/FormatJsonTest.php
4 files changed, 68 insertions(+), 1 deletion(-)


  git pull ssh://gerrit.wikimedia.org:29418/mediawiki/core 
refs/changes/44/163344/1

diff --git a/includes/json/FormatJson.php b/includes/json/FormatJson.php
index 5565644..2dbbc30 100644
--- a/includes/json/FormatJson.php
+++ b/includes/json/FormatJson.php
@@ -61,7 +61,14 @@
         *
         * @since 1.24
         */
-       const FORCE_ASSOC = 1;
+       const FORCE_ASSOC = 0x1;
+
+       /**
+        * If set, attempts to fix invalid json.
+        *
+        * @since 1.24
+        */
+       const TRY_FIXING = 0x2;
 
        /**
         * Regex that matches whitespace inside empty arrays and objects.
@@ -149,6 +156,28 @@
                $result = json_decode( $value, $assoc );
                $code = json_last_error();
 
+               if ( $code === JSON_ERROR_SYNTAX && ( $options & 
self::TRY_FIXING ) !== 0 ) {
+                       // The most common error is the trailing comma in a 
list or an object.
+                       // We cannot simply replace /,\s*[}\]]/ because it 
could be inside a string value.
+                       // But we could use the fact that JSON does not allow 
multi-line string values,
+                       // And remove trailing commas if they are et the end of 
a line.
+                       // JSON only allows 4 control characters: [ \t\r\n].  
So we must not use '\s' for matching.
+                       // Regex match   ,]<any non-quote chars>\n   or   ,\n]  
 with optional spaces/tabs.
+                       $count = 0;
+                       $value =
+                               preg_replace( '/,([ 
\t]*[}\]][^"\r\n]*([\r\n]|$)|[ \t]*[\r\n][ \t\r\n]*[}\]])/', '$1',
+                                       $value, - 1, $count );
+                       if ( $count > 0 ) {
+                               $result = json_decode( $value, $assoc );
+                               if ( JSON_ERROR_NONE === json_last_error() ) {
+                                       // Report warning
+                                       $st = Status::newGood( $result );
+                                       $st->warning( wfMessage( 
'json-warn-trailing-comma' )->numParams( $count ) );
+                                       return $st;
+                               }
+                       }
+               }
+
                switch ( $code ) {
                        case JSON_ERROR_NONE:
                                return Status::newGood( $result );
diff --git a/languages/i18n/en.json b/languages/i18n/en.json
index e8a5c3c..7a1d2f5 100644
--- a/languages/i18n/en.json
+++ b/languages/i18n/en.json
@@ -3559,6 +3559,7 @@
        "mediastatistics-header-text": "Textual",
        "mediastatistics-header-executable": "Executables",
        "mediastatistics-header-archive": "Compressed formats",
+       "json-warn-trailing-comma": "$1 trailing {{PLURAL:$1|comma was|commas 
were}} removed from JSON",
        "json-error-unknown": "There was a problem with the JSON. Error: $1",
        "json-error-depth": "The maximum stack depth has been exceeded",
        "json-error-state-mismatch": "Invalid or malformed JSON",
diff --git a/languages/i18n/qqq.json b/languages/i18n/qqq.json
index 2049ae5..96e7fac 100644
--- a/languages/i18n/qqq.json
+++ b/languages/i18n/qqq.json
@@ -3721,6 +3721,7 @@
        "mediastatistics-header-text": "Header on [[Special:MediaStatistics]] 
for file types that are in the text category. This includes simple text 
formats, including plain text formats, json, csv, and xml. Source code of 
compiled programming languages may be included here in the future, but isn't 
currently.",
        "mediastatistics-header-executable": "Header on 
[[Special:MediaStatistics]] for file types that are in the executable category. 
This includes things like source files for interpreted programming language 
(Shell scripts, javascript, etc).",
        "mediastatistics-header-archive": "Header on 
[[Special:MediaStatistics]] for file types that are in the archive category. 
Includes things like tar, zip, gzip etc.",
+       "json-warn-trailing-comma": "A warning message notifying that JSON text 
was automatically corrected by removing erroneous commas.\n\nParameters:\n* $1 
- number of commas that were removed",
        "json-error-unknown": "User error message when there’s an unknown 
error.\n{{Identical|Unknown error}}. This error is shown if we received an 
unexpected value from PHP. See 
http://php.net/manual/en/function.json-last-error.php\n\nParameters:\n* $1 - 
integer error code",
        "json-error-depth": "User error message when the maximum stack depth is 
exceeded.\nSee http://php.net/manual/en/function.json-last-error.php";,
        "json-error-state-mismatch": "User error message when underflow or the 
modes mismatch.\n\n'''Underflow''': A data-processing error arising when the 
absolute value of a computed quantity is smaller than the limits of precision 
of the computing device, retaining at least one significant digit.\nSee 
http://php.net/manual/en/function.json-last-error.php";,
diff --git a/tests/phpunit/includes/json/FormatJsonTest.php 
b/tests/phpunit/includes/json/FormatJsonTest.php
index 0f1cdf7..af68ab0 100644
--- a/tests/phpunit/includes/json/FormatJsonTest.php
+++ b/tests/phpunit/includes/json/FormatJsonTest.php
@@ -169,6 +169,42 @@
                $this->assertEquals( $value, $st->getValue() );
        }
 
+       public static function provideParseTryFixing() {
+               return array(
+                       array( "[,]", '[]' ),
+                       array( "[ , ]", '[]' ),
+                       array( "[ , }", false ),
+                       array( '[1],', false ),
+                       array( "[1,]", '[1]' ),
+                       array( "[1\n,]", '[1]' ),
+                       array( "[1,\n]", '[1]' ),
+                       array( "[1,]\n", '[1]' ),
+                       array( "[1\n,\n]\n", '[1]' ),
+                       array( '["a,",]', '["a,"]' ),
+                       array( "[[1,]\n,[2,\n],[3\n,]]", '[[1],[2],[3]]' ),
+                       array( '[[1,],[2,],[3,]]', false ), // I wish we could 
parse this, but would need quote parsing
+                       array( '[1,,]', false ),
+               );
+       }
+
+       /**
+        * @dataProvider provideParseTryFixing
+        * @param string $value
+        * @param string|bool $expected
+        */
+       public function testParseTryFixing( $value, $expected ) {
+               $st = FormatJson::parse( $value, FormatJson::TRY_FIXING );
+               $this->assertType( 'Status', $st );
+               if ( $expected === false ) {
+                       $this->assertFalse( $st->isOK() );
+               } else {
+                       $this->assertFalse( $st->isGood() );
+                       $this->assertTrue( $st->isOK() );
+                       $val = FormatJson::encode( $st->getValue(), false, 
FormatJson::ALL_OK );
+                       $this->assertEquals( $expected, $val );
+               }
+       }
+
        public static function provideParseErrors() {
                return array(
                        array( 'aaa' ),

-- 
To view, visit https://gerrit.wikimedia.org/r/163344
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: Ic0eb0a711da3ae578d6bb58d7474279d6845a4a7
Gerrit-PatchSet: 1
Gerrit-Project: mediawiki/core
Gerrit-Branch: master
Gerrit-Owner: Yurik <yu...@wikimedia.org>

_______________________________________________
MediaWiki-commits mailing list
MediaWiki-commits@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to