Arlolra has uploaded a new change for review. ( https://gerrit.wikimedia.org/r/398171 )
Change subject: [WIP] Tokenize noincludeonly content asynchronously ...................................................................... [WIP] Tokenize noincludeonly content asynchronously Bug: T143378 Change-Id: I465a618eba89bbcf8d4afe8e01610278d2453c75 --- M lib/wt2html/parser.js M lib/wt2html/pegTokenizer.pegjs M lib/wt2html/tokenizer.js M lib/wt2html/tt/NoIncludeOnly.js 4 files changed, 64 insertions(+), 19 deletions(-) git pull ssh://gerrit.wikimedia.org:29418/mediawiki/services/parsoid refs/changes/71/398171/1 diff --git a/lib/wt2html/parser.js b/lib/wt2html/parser.js index 3d63eb2..82e9bc2 100644 --- a/lib/wt2html/parser.js +++ b/lib/wt2html/parser.js @@ -38,9 +38,9 @@ var IncludeOnly = NoIncludeOnly.IncludeOnly; var NoInclude = NoIncludeOnly.NoInclude; var OnlyInclude = NoIncludeOnly.OnlyInclude; +var TokenizeInclude = NoIncludeOnly.TokenizeInclude; var WikiLinkHandler = LinkHandler.WikiLinkHandler; var ExternalLinkHandler = LinkHandler.ExternalLinkHandler; - var ParserPipeline; // forward declaration var globalPipelineId = 0; @@ -81,6 +81,13 @@ // a fully expanded token stream ready for consumption by the // tokens/expanded pipeline. 'tokens/x-mediawiki': [ + [ + AsyncTokenTransformManager, + [ 0, 'tokens/x-mediawiki' ], + [ + TokenizeInclude, // 0.001 + ], + ], // Synchronous in-order per input [ SyncTokenTransformManager, diff --git a/lib/wt2html/pegTokenizer.pegjs b/lib/wt2html/pegTokenizer.pegjs index e1bd3f9..09a916a 100644 --- a/lib/wt2html/pegTokenizer.pegjs +++ b/lib/wt2html/pegTokenizer.pegjs @@ -15,7 +15,6 @@ var DU = pegIncludes.DOMUtils; var Util = pegIncludes.Util; var JSUtils = pegIncludes.JSUtils; - var PegTokenizer = pegIncludes.PegTokenizer; var defines = pegIncludes.defines; var constants = pegIncludes.constants; var tu = pegIncludes.tu; @@ -239,13 +238,15 @@ ], dp); } else if (isIncludeTag) { // Parse ext-content, strip eof, and shift tsr - var extContent = dp.src.substring(dp.tagWidths[0], dp.src.length - dp.tagWidths[1]); - var extContentToks = (new PegTokenizer(env)).tokenizeSync(extContent); - if (dp.tagWidths[1] > 0) { - extContentToks = Util.stripEOFTkfromTokens(extContentToks); - } - Util.shiftTokenTSR(extContentToks, dp.tsr[0] + dp.tagWidths[0]); - return [t].concat(extContentToks); + dp.tmp = (dp.tmp || {}); + dp.tmp.toTokenize = { + content: dp.src.substring(dp.tagWidths[0], dp.src.length - dp.tagWidths[1]), + shift: dp.tsr[0] + dp.tagWidths[0], + stripEOF: dp.tagWidths[1] > 0, + name: tagName, + }; + t.name = 'tokenizeinclude'; + return t; } else { console.assert(false, 'Should not be reachable.'); } @@ -2182,16 +2183,17 @@ } // Tokenize include content in a new tokenizer - var inclContentToks = (new PegTokenizer(env)).tokenizeSync(inclContent); - inclContentToks = Util.stripEOFTkfromTokens(inclContentToks); - - // Shift tsr - Util.shiftTokenTSR(inclContentToks, endOffset()); + dp.toTokenize = { + content: inclContent, + shift: endOffset(), + stripEOF: true, + name: name, + }; // Skip past content peg$currPos += inclContent.length; - return [new TagTk(name, [], dp)].concat(inclContentToks); + return new TagTk('tokenizeinclude', [], dp); }) & { return !!il; } { return il; } // Start of file diff --git a/lib/wt2html/tokenizer.js b/lib/wt2html/tokenizer.js index 4318ed9..8eacedd 100644 --- a/lib/wt2html/tokenizer.js +++ b/lib/wt2html/tokenizer.js @@ -36,8 +36,6 @@ DOMUtils: require('../utils/DOMUtils.js').DOMUtils, JSUtils: JSUtils, Util: require('../utils/Util.js').Util, - // defined below to satisfy JSHint - PegTokenizer: null, }; /** @@ -56,8 +54,6 @@ this.options = options || {}; this.offsets = {}; } - -pegIncludes.PegTokenizer = PegTokenizer; // Inherit from EventEmitter util.inherits(PegTokenizer, events.EventEmitter); diff --git a/lib/wt2html/tt/NoIncludeOnly.js b/lib/wt2html/tt/NoIncludeOnly.js index 66bce5c..de7db72 100644 --- a/lib/wt2html/tt/NoIncludeOnly.js +++ b/lib/wt2html/tt/NoIncludeOnly.js @@ -8,6 +8,8 @@ var coreutil = require('util'); var TokenHandler = require('./TokenHandler.js'); var TokenCollector = require('./TokenCollector.js').TokenCollector; +var PegTokenizer = require('../tokenizer.js').PegTokenizer; +var Util = require('../../utils/Util.js').Util; var defines = require('../parser.defines.js'); // define some constructor shortcuts @@ -285,8 +287,46 @@ }; +/** + * Tokenize the contents of include tags asynchronously. + */ +function TokenizeInclude() { + TokenHandler.apply(this, arguments); +} +coreutil.inherits(TokenizeInclude, TokenHandler); + +TokenizeInclude.prototype.rank = 0.001; + +TokenizeInclude.prototype.init = function() { + this.manager.addTransform(this.onTokenizeInclude.bind(this), + 'TokenizeInclude:onTokenizeInclude', this.rank, 'tag', 'tokenizeinclude'); +}; + +TokenizeInclude.prototype.onTokenizeInclude = function(token, frame, cb) { + cb({ async: true }); + + var toTokenize = token.dataAttribs.tmp.toTokenize; + var tokenizer = new PegTokenizer(this.env); + + var toks = []; + tokenizer.on('chunk', function(ts) { toks = toks.concat(ts); }); + tokenizer.on('end', function() { + if (toTokenize.stripEOF) { + toks = Util.stripEOFTkfromTokens(toks); + } + Util.shiftTokenTSR(toks, toTokenize.shift); + token.name = toTokenize.name; + toks = [token].concat(toks); + toks.rank = this.rank + 0.001; + cb({ tokens: toks }); + }.bind(this)); + + tokenizer.process(toTokenize.content); +}; + if (typeof module === "object") { module.exports.NoInclude = NoInclude; module.exports.IncludeOnly = IncludeOnly; module.exports.OnlyInclude = OnlyInclude; + module.exports.TokenizeInclude = TokenizeInclude; } -- To view, visit https://gerrit.wikimedia.org/r/398171 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: newchange Gerrit-Change-Id: I465a618eba89bbcf8d4afe8e01610278d2453c75 Gerrit-PatchSet: 1 Gerrit-Project: mediawiki/services/parsoid Gerrit-Branch: master Gerrit-Owner: Arlolra <abrea...@wikimedia.org> _______________________________________________ MediaWiki-commits mailing list MediaWiki-commits@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits