jenkins-bot has submitted this change and it was merged. ( 
https://gerrit.wikimedia.org/r/404738 )

Change subject: Deep clone expression result before modifying
......................................................................


Deep clone expression result before modifying

Necessary for the follow up which tickled the bug.  The cached result is
mutated in the action, followed by some backtracking so that when the
action is hit again, the mutated result is used which drops some tokens
from the pushed attribute.

In order to prevent similar types of bugs, while running parserTests the
results placed in the tokenizer cache are now frozen to catch any
attempts at mutating them.  Verified that this would have caught the
above manually located bug and, indeed, it susses out another location
where mutation occurs.

Change-Id: I380bb7a9bb16026954eb39a24c5915f5992153a9
---
M bin/parserTests.js
M lib/utils/Util.js
M lib/wt2html/pegTokenizer.pegjs
M lib/wt2html/tokenizer.js
4 files changed, 53 insertions(+), 2 deletions(-)

Approvals:
  Subramanya Sastry: Looks good to me, approved
  jenkins-bot: Verified



diff --git a/bin/parserTests.js b/bin/parserTests.js
index a3ec248..6562b6e 100755
--- a/bin/parserTests.js
+++ b/bin/parserTests.js
@@ -1063,6 +1063,9 @@
        });
        this.env = env;
 
+       // A hint to enable some slow paths only while testing
+       env.immutable = true;
+
        // Save default logger so we can be reset it after temporarily
        // switching to the suppressLogger to suppress expected error
        // messages.
diff --git a/lib/utils/Util.js b/lib/utils/Util.js
index cd998cb..5a80806 100644
--- a/lib/utils/Util.js
+++ b/lib/utils/Util.js
@@ -786,6 +786,34 @@
                }
        },
 
+       // Just a copy `Util.clone` used in *testing* to reverse the effects of
+       // freezing an object.  Works with more that just "plain objects"
+       unFreeze: function(obj, deepClone) {
+               if (deepClone === undefined) {
+                       deepClone = true;
+               }
+               if (Array.isArray(obj)) {
+                       if (deepClone) {
+                               return obj.map(function(el) {
+                                       return Util.unFreeze(el, true);
+                               });
+                       } else {
+                               return obj.slice();
+                       }
+               } else if (obj instanceof Object) {
+                       if (deepClone) {
+                               return Object.keys(obj).reduce(function(nobj, 
key) {
+                                       nobj[key] = Util.unFreeze(obj[key], 
true);
+                                       return nobj;
+                               }, new obj.constructor());
+                       } else {
+                               return Object.assign({}, obj);
+                       }
+               } else {
+                       return obj;
+               }
+       },
+
        // 'cb' can only be called once after "everything" is done.
        // But, we need something that can be used in async context where it is
        // called repeatedly till we are done.
diff --git a/lib/wt2html/pegTokenizer.pegjs b/lib/wt2html/pegTokenizer.pegjs
index 1a0f97c..44974cd 100644
--- a/lib/wt2html/pegTokenizer.pegjs
+++ b/lib/wt2html/pegTokenizer.pegjs
@@ -51,6 +51,13 @@
      * current expression can return an empty list (true).
      */
     var emitChunk = function(tokens) {
+        if (env.immutable) {
+            // Tokens placed in the tokenizer's cache have been frozen to
+            // to catch any mutations while testing, which may have led to
+            // subtle, spooky action at a distance.
+            tokens = Util.unFreeze(tokens, true);
+        }
+
         // Shift tsr of all tokens by the pipeline offset
         Util.shiftTokenTSR(tokens, options.pipelineOffset);
         env.log("trace/peg", pegTokenizer.pipelineId, "---->  ", tokens);
@@ -1030,6 +1037,8 @@
     f:(
        &{ return env.langConverterEnabled(); }
        ff:opt_lang_variant_flags {
+         // Avoid mutating cached expression results
+         ff = Util.clone(ff, true);
          // if flags contains 'R', then don't treat ; or : specially inside.
          if (ff.flags) {
            ff.raw = ff.flags.has('R') || ff.flags.has('N');
@@ -1057,6 +1066,11 @@
       }
       var lvsrc = input.substring(lv0, lv1);
       var attribs = [];
+
+      // Do a deep clone since we may be destructively modifying
+      // (the `t[fld] = name;` below) the result of a cached expression
+      ts = Util.clone(ts, true);
+
       ts.forEach(function(t) {
         // move token strings into KV attributes so that they are
         // properly expanded by early stages of the token pipeline
diff --git a/lib/wt2html/tokenizer.js b/lib/wt2html/tokenizer.js
index 4318ed9..2785181 100644
--- a/lib/wt2html/tokenizer.js
+++ b/lib/wt2html/tokenizer.js
@@ -65,6 +65,8 @@
 PegTokenizer.prototype.src = '';
 
 PegTokenizer.prototype.initTokenizer = function() {
+       var env = this.env;
+
        // Construct a singleton static tokenizer.
        var pegSrcPath = path.join(__dirname, 'pegTokenizer.pegjs');
        this.src = fs.readFileSync(pegSrcPath, 'utf8');
@@ -104,8 +106,12 @@
                        store: [
                                'if (checkCache) {',
                                [
-                                       '  peg$cache[bucket][key] = {nextPos: 
', opts.endPos, ', ',
-                                       'result: ', opts.result, '};',
+                                       '  peg$cache[bucket][key] = { nextPos: 
', opts.endPos, ', ',
+                                       'result: ',
+                                       env && env.immutable ? [
+                                               'JSUtils.deepFreeze(', 
opts.result, ')'
+                                       ].join('') : opts.result,
+                                       ' };',
                                ].join(''),
                                '}',
                        ].join('\n'),

-- 
To view, visit https://gerrit.wikimedia.org/r/404738
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: merged
Gerrit-Change-Id: I380bb7a9bb16026954eb39a24c5915f5992153a9
Gerrit-PatchSet: 3
Gerrit-Project: mediawiki/services/parsoid
Gerrit-Branch: master
Gerrit-Owner: Arlolra <[email protected]>
Gerrit-Reviewer: Arlolra <[email protected]>
Gerrit-Reviewer: C. Scott Ananian <[email protected]>
Gerrit-Reviewer: Sbailey <[email protected]>
Gerrit-Reviewer: Subramanya Sastry <[email protected]>
Gerrit-Reviewer: jenkins-bot <>

_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to