Arlolra has uploaded a new change for review. ( 
https://gerrit.wikimedia.org/r/398171 )

Change subject: [WIP] Tokenize noincludeonly content asynchronously
......................................................................

[WIP] Tokenize noincludeonly content asynchronously

Bug: T143378
Change-Id: I465a618eba89bbcf8d4afe8e01610278d2453c75
---
M lib/wt2html/parser.js
M lib/wt2html/pegTokenizer.pegjs
M lib/wt2html/tokenizer.js
M lib/wt2html/tt/NoIncludeOnly.js
4 files changed, 64 insertions(+), 19 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/mediawiki/services/parsoid 
refs/changes/71/398171/1

diff --git a/lib/wt2html/parser.js b/lib/wt2html/parser.js
index 3d63eb2..82e9bc2 100644
--- a/lib/wt2html/parser.js
+++ b/lib/wt2html/parser.js
@@ -38,9 +38,9 @@
 var IncludeOnly = NoIncludeOnly.IncludeOnly;
 var NoInclude = NoIncludeOnly.NoInclude;
 var OnlyInclude = NoIncludeOnly.OnlyInclude;
+var TokenizeInclude = NoIncludeOnly.TokenizeInclude;
 var WikiLinkHandler = LinkHandler.WikiLinkHandler;
 var ExternalLinkHandler = LinkHandler.ExternalLinkHandler;
-
 
 var ParserPipeline; // forward declaration
 var globalPipelineId = 0;
@@ -81,6 +81,13 @@
        // a fully expanded token stream ready for consumption by the
        // tokens/expanded pipeline.
        'tokens/x-mediawiki': [
+               [
+                       AsyncTokenTransformManager,
+                       [ 0, 'tokens/x-mediawiki' ],
+                       [
+                               TokenizeInclude,  // 0.001
+                       ],
+               ],
                // Synchronous in-order per input
                [
                        SyncTokenTransformManager,
diff --git a/lib/wt2html/pegTokenizer.pegjs b/lib/wt2html/pegTokenizer.pegjs
index e1bd3f9..09a916a 100644
--- a/lib/wt2html/pegTokenizer.pegjs
+++ b/lib/wt2html/pegTokenizer.pegjs
@@ -15,7 +15,6 @@
     var DU = pegIncludes.DOMUtils;
     var Util = pegIncludes.Util;
     var JSUtils = pegIncludes.JSUtils;
-    var PegTokenizer = pegIncludes.PegTokenizer;
     var defines = pegIncludes.defines;
     var constants = pegIncludes.constants;
     var tu = pegIncludes.tu;
@@ -239,13 +238,15 @@
             ], dp);
         } else if (isIncludeTag) {
             // Parse ext-content, strip eof, and shift tsr
-            var extContent = dp.src.substring(dp.tagWidths[0], dp.src.length - 
dp.tagWidths[1]);
-            var extContentToks = (new 
PegTokenizer(env)).tokenizeSync(extContent);
-            if (dp.tagWidths[1] > 0) {
-                extContentToks = Util.stripEOFTkfromTokens(extContentToks);
-            }
-            Util.shiftTokenTSR(extContentToks, dp.tsr[0] + dp.tagWidths[0]);
-            return [t].concat(extContentToks);
+            dp.tmp = (dp.tmp || {});
+            dp.tmp.toTokenize = {
+                content: dp.src.substring(dp.tagWidths[0], dp.src.length - 
dp.tagWidths[1]),
+                shift: dp.tsr[0] + dp.tagWidths[0],
+                stripEOF: dp.tagWidths[1] > 0,
+                name: tagName,
+            };
+            t.name = 'tokenizeinclude';
+            return t;
         } else {
             console.assert(false, 'Should not be reachable.');
         }
@@ -2182,16 +2183,17 @@
     }
 
     // Tokenize include content in a new tokenizer
-    var inclContentToks = (new PegTokenizer(env)).tokenizeSync(inclContent);
-    inclContentToks = Util.stripEOFTkfromTokens(inclContentToks);
-
-    // Shift tsr
-    Util.shiftTokenTSR(inclContentToks, endOffset());
+    dp.toTokenize = {
+        content: inclContent,
+        shift: endOffset(),
+        stripEOF: true,
+        name: name,
+    };
 
     // Skip past content
     peg$currPos += inclContent.length;
 
-    return [new TagTk(name, [], dp)].concat(inclContentToks);
+    return new TagTk('tokenizeinclude', [], dp);
   }) & { return !!il; } { return il; }
 
 // Start of file
diff --git a/lib/wt2html/tokenizer.js b/lib/wt2html/tokenizer.js
index 4318ed9..8eacedd 100644
--- a/lib/wt2html/tokenizer.js
+++ b/lib/wt2html/tokenizer.js
@@ -36,8 +36,6 @@
        DOMUtils: require('../utils/DOMUtils.js').DOMUtils,
        JSUtils: JSUtils,
        Util: require('../utils/Util.js').Util,
-       // defined below to satisfy JSHint
-       PegTokenizer: null,
 };
 
 /**
@@ -56,8 +54,6 @@
        this.options = options || {};
        this.offsets = {};
 }
-
-pegIncludes.PegTokenizer = PegTokenizer;
 
 // Inherit from EventEmitter
 util.inherits(PegTokenizer, events.EventEmitter);
diff --git a/lib/wt2html/tt/NoIncludeOnly.js b/lib/wt2html/tt/NoIncludeOnly.js
index 66bce5c..de7db72 100644
--- a/lib/wt2html/tt/NoIncludeOnly.js
+++ b/lib/wt2html/tt/NoIncludeOnly.js
@@ -8,6 +8,8 @@
 var coreutil = require('util');
 var TokenHandler = require('./TokenHandler.js');
 var TokenCollector = require('./TokenCollector.js').TokenCollector;
+var PegTokenizer = require('../tokenizer.js').PegTokenizer;
+var Util = require('../../utils/Util.js').Util;
 var defines = require('../parser.defines.js');
 
 // define some constructor shortcuts
@@ -285,8 +287,46 @@
 };
 
 
+/**
+ * Tokenize the contents of include tags asynchronously.
+ */
+function TokenizeInclude() {
+       TokenHandler.apply(this, arguments);
+}
+coreutil.inherits(TokenizeInclude, TokenHandler);
+
+TokenizeInclude.prototype.rank = 0.001;
+
+TokenizeInclude.prototype.init = function() {
+       this.manager.addTransform(this.onTokenizeInclude.bind(this),
+               'TokenizeInclude:onTokenizeInclude', this.rank, 'tag', 
'tokenizeinclude');
+};
+
+TokenizeInclude.prototype.onTokenizeInclude = function(token, frame, cb) {
+       cb({ async: true });
+
+       var toTokenize = token.dataAttribs.tmp.toTokenize;
+       var tokenizer = new PegTokenizer(this.env);
+
+       var toks = [];
+       tokenizer.on('chunk', function(ts) { toks = toks.concat(ts); });
+       tokenizer.on('end', function() {
+               if (toTokenize.stripEOF) {
+                       toks = Util.stripEOFTkfromTokens(toks);
+               }
+               Util.shiftTokenTSR(toks, toTokenize.shift);
+               token.name = toTokenize.name;
+               toks = [token].concat(toks);
+               toks.rank = this.rank + 0.001;
+               cb({ tokens: toks });
+       }.bind(this));
+
+       tokenizer.process(toTokenize.content);
+};
+
 if (typeof module === "object") {
        module.exports.NoInclude = NoInclude;
        module.exports.IncludeOnly = IncludeOnly;
        module.exports.OnlyInclude = OnlyInclude;
+       module.exports.TokenizeInclude = TokenizeInclude;
 }

-- 
To view, visit https://gerrit.wikimedia.org/r/398171
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: I465a618eba89bbcf8d4afe8e01610278d2453c75
Gerrit-PatchSet: 1
Gerrit-Project: mediawiki/services/parsoid
Gerrit-Branch: master
Gerrit-Owner: Arlolra <abrea...@wikimedia.org>

_______________________________________________
MediaWiki-commits mailing list
MediaWiki-commits@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to