Arlolra has uploaded a new change for review.
https://gerrit.wikimedia.org/r/251194
Change subject: WIP: Refactor WTS to be async
......................................................................
WIP: Refactor WTS to be async
Change-Id: Id68d24007229dcfa0bf6fb16c6ed3ecdaad0c2e5
---
M lib/html2wt/SelectiveSerializer.js
M lib/html2wt/SerializerState.js
M lib/html2wt/WTSUtils.js
M lib/html2wt/WikitextSerializer.js
M lib/utils/DOMUtils.js
5 files changed, 283 insertions(+), 312 deletions(-)
git pull ssh://gerrit.wikimedia.org:29418/mediawiki/services/parsoid
refs/changes/94/251194/1
diff --git a/lib/html2wt/SelectiveSerializer.js
b/lib/html2wt/SelectiveSerializer.js
index fb0fc8a..cfca657 100644
--- a/lib/html2wt/SelectiveSerializer.js
+++ b/lib/html2wt/SelectiveSerializer.js
@@ -41,10 +41,9 @@
* Selectively serialize an HTML DOM document synchronously.
* WARNING: You probably want to use DU.serializeDOM instead.
*/
-SSP.serializeDOMSync = function(body) {
+SSP.serializeDOM = Promise.method(function(body) {
console.assert(DU.isBody(body), 'Expected a body node.');
- var out;
var startTimers = new Map();
if ((!this.env.page.dom && !this.env.page.domdiff) || this.env.page.src
=== null) {
@@ -53,12 +52,12 @@
}
// If there's no old source, fall back to non-selective
serialization.
- out = this.wts.serializeDOMSync(body, false);
-
- if (this.timer) {
- this.timer.timing('html2wt.full.serialize', '',
- (Date.now() -
startTimers.get('html2wt.full.serialize')));
- }
+ return this.wts.serializeDOM(body, false).tap(function() {
+ if (this.timer) {
+ this.timer.timing('html2wt.full.serialize', '',
+ (Date.now() -
startTimers.get('html2wt.full.serialize')));
+ }
+ }.bind(this));
} else {
if (this.timer) {
startTimers.set('html2wt.selser.serialize', Date.now());
@@ -82,9 +81,10 @@
}
}
+ var p;
if (diff.isEmpty) {
// Nothing was modified, just re-use the original source
- out = this.env.page.src;
+ p = Promise.resolve(this.env.page.src);
} else {
body = diff.dom;
this.env.page.editedDoc = body.ownerDocument;
@@ -96,16 +96,16 @@
}
// Call the WikitextSerializer to do our bidding
- out = this.wts.serializeDOMSync(body, true);
+ p = this.wts.serializeDOM(body, true);
}
-
- if (this.timer) {
- this.timer.timing('html2wt.selser.serialize', '',
- (Date.now() -
startTimers.get('html2wt.selser.serialize')));
- }
+ return p.tap(function() {
+ if (this.timer) {
+ this.timer.timing('html2wt.selser.serialize',
'',
+ (Date.now() -
startTimers.get('html2wt.selser.serialize')));
+ }
+ }.bind(this));
}
- return out;
-};
+});
if (typeof module === 'object') {
diff --git a/lib/html2wt/SerializerState.js b/lib/html2wt/SerializerState.js
index 2f5a1d3..ce81040 100644
--- a/lib/html2wt/SerializerState.js
+++ b/lib/html2wt/SerializerState.js
@@ -177,42 +177,27 @@
* Serialize the children of a DOM node, sharing the global serializer state.
* Typically called by a DOM-based handler to continue handling its children.
*/
-SSP.serializeChildren = function(node, wtEscaper) {
- try {
- // TODO gwicke: use nested WikitextSerializer instead?
+SSP.serializeChildren = Promise.method(function(node, wtEscaper) {
+ // SSS FIXME: Unsure if this is the right thing always
+ if (wtEscaper) { this.wteHandlerStack.push(wtEscaper); }
- // SSS FIXME: Unsure if this is the right thing always
- if (wtEscaper) {
- this.wteHandlerStack.push(wtEscaper);
- }
+ var serializeChildChain = Promise.method(function(child) {
+ if (child === null) { return; }
+ return
this.serializer._serializeNode(child).then(function(next) {
+ if (next === node) { return; } // Serialized all
children
+ if (next === child) { next = next.nextSibling; } //
Advance
+ return serializeChildChain(next);
+ });
+ }).bind(this);
- var child = node.firstChild;
- var nextChild = null;
-
- while (child) {
- nextChild = this.serializer._serializeNode(child);
- if (nextChild === node) {
- // serialized all children
- break;
- } else if (nextChild === child) {
- // advance the child
- child = child.nextSibling;
- } else {
- child = nextChild;
- }
- }
+ return serializeChildChain(node.firstChild).then(function() {
+ if (wtEscaper) { this.wteHandlerStack.pop(); }
// If we serialized children explicitly,
// we were obviously processing a modified node.
this.currNodeUnmodified = false;
-
- if (wtEscaper) {
- this.wteHandlerStack.pop();
- }
- } catch (e) {
- this.env.log("fatal", e);
- }
-};
+ }.bind(this));
+});
/**
*/
diff --git a/lib/html2wt/WTSUtils.js b/lib/html2wt/WTSUtils.js
index 8e0f23a..c4f6e4e 100644
--- a/lib/html2wt/WTSUtils.js
+++ b/lib/html2wt/WTSUtils.js
@@ -50,6 +50,21 @@
},
};
+WTSUtils.traceNodeName = function(node) {
+ switch (node.nodeType) {
+ case node.ELEMENT_NODE:
+ return DU.isMarkerMeta(node, "mw:DiffMarker") ?
+ "DIFF_MARK" : "NODE: " + node.nodeName;
+ case node.TEXT_NODE:
+ return "TEXT: " + JSON.stringify(node.nodeValue);
+ case node.COMMENT_NODE:
+ return "CMT : " +
JSON.stringify(WTSUtils.commentWT(node.nodeValue));
+ default:
+ return node.nodeName;
+ }
+};
+
+
if (typeof module === "object") {
module.exports.WTSUtils = WTSUtils;
}
diff --git a/lib/html2wt/WikitextSerializer.js
b/lib/html2wt/WikitextSerializer.js
index fdffa20..4747e3f 100644
--- a/lib/html2wt/WikitextSerializer.js
+++ b/lib/html2wt/WikitextSerializer.js
@@ -658,7 +658,7 @@
/**
* Serialize the content of a text node
*/
-WSP._serializeTextNode = function(node) {
+WSP._serializeTextNode = Promise.method(function(node) {
// write out a potential separator?
var res = node.nodeValue;
var state = this.state;
@@ -727,7 +727,7 @@
/* SSS FIXME: what are we doing with the stripped NLs??
*/
}
}
-};
+});
/**
* Emit non-separator wikitext that does not need to be escaped
@@ -761,268 +761,238 @@
return out;
};
-function traceNodeName(node) {
- switch (node.nodeType) {
- case node.ELEMENT_NODE:
- return DU.isMarkerMeta(node, "mw:DiffMarker") ? "DIFF_MARK" :
"NODE: " + node.nodeName;
- case node.TEXT_NODE:
- return "TEXT: " + JSON.stringify(node.nodeValue);
- case node.COMMENT_NODE:
- return "CMT : " +
JSON.stringify(WTSUtils.commentWT(node.nodeValue));
- default:
- return node.nodeName;
+// DOM-based serialization
+WSP._serializeDOMNode = Promise.method(function(node, domHandler) {
+ // To serialize a node from source, the node should satisfy these
+ // conditions:
+ //
+ // 1. It should not have a diff marker or be in a modified subtree
+ // WTS should not be in a subtree with a modification flag that
+ // applies to every node of a subtree (rather than an indication
+ // that some node in the subtree is modified).
+ //
+ // 2. It should continue to be valid in any surrounding edited context
+ // For some nodes, modification of surrounding context
+ // can change serialized output of this node
+ // (ex: <td>s and whether you emit | or || for them)
+ //
+ // 3. It should have valid, usable DSR
+ //
+ // 4. Either it has non-zero positive DSR width, or meets one of the
+ // following:
+ //
+ // 4a. It is content like <p><br/><p> or an automatically-inserted
+ // wikitext <references/> (HTML <ol>) (will have dsr-width 0)
+ // 4b. it is fostered content (will have dsr-width 0)
+ // 4c. it is misnested content (will have dsr-width 0)
+ //
+ // SSS FIXME: Additionally, we can guard against buggy DSR with
+ // some sanity checks. We can test that non-sep src content
+ // leading wikitext markup corresponds to the node type.
+ //
+ // Ex: If node.nodeName is 'UL', then src[0] should be '*'
+ //
+ // TO BE DONE
+
+ var state = this.state;
+ var handled = false;
+ var wrapperUnmodified = false;
+ var dp = DU.getDataParsoid(node);
+
+ dp.dsr = dp.dsr || [];
+
+ if (state.selserMode
+ && !state.inModifiedContent
+ && DU.origSrcValidInEditedContext(state.env, node)
+ && dp && Util.isValidDSR(dp.dsr)
+ && (dp.dsr[1] > dp.dsr[0]
+ // FIXME: <p><br/></p>
+ // nodes that have dsr width 0 because
currently,
+ // we emit newlines outside the p-nodes. So,
this check
+ // tries to handle that scenario.
+ // Zero-width <ol> corresponds to
automatically-inserted
+ // <references/> nodes.
+ || (dp.dsr[1] === dp.dsr[0] &&
/^(P|BR|OL)$/.test(node.nodeName))
+ || dp.fostered || dp.misnested)) {
+
+ if (!DU.hasDiffMarkers(node, this.env)) {
+ // If this HTML node will disappear in wikitext because
of
+ // zero width, then the separator constraints will
carry over
+ // to the node's children.
+ //
+ // Since we dont recurse into 'node' in selser mode, we
update the
+ // separator constraintInfo to apply to 'node' and its
first child.
+ //
+ // We could clear constraintInfo altogether which would
be
+ // correct (but could normalize separators and
introduce dirty
+ // diffs unnecessarily).
+
+ state.currNodeUnmodified = true;
+
+ if (DU.isZeroWidthWikitextElt(node) &&
+ node.childNodes.length > 0 &&
+ state.sep.constraints.constraintInfo.sepType
=== 'sibling') {
+ state.sep.constraints.constraintInfo.onSOL =
state.onSOL;
+ state.sep.constraints.constraintInfo.sepType =
'parent-child';
+ state.sep.constraints.constraintInfo.nodeA =
node;
+ state.sep.constraints.constraintInfo.nodeB =
node.firstChild;
+ }
+
+ var out = state.getOrigSrc(dp.dsr[0], dp.dsr[1]);
+
+ this.trace("ORIG-src with DSR", function() {
+ return '[' + dp.dsr[0] + ',' + dp.dsr[1] + '] =
' + JSON.stringify(out);
+ });
+
+ // When reusing source, we should only suppress
serializing
+ // to a single line for the cases we've whitelisted in
+ // normal serialization.
+ var suppressSLC =
DU.isFirstEncapsulationWrapperNode(node) ||
+ ['DL', 'UL',
'OL'].indexOf(node.nodeName) > -1 ||
+ (node.nodeName === 'TABLE' &&
+ node.parentNode.nodeName ===
'DD' &&
+ DU.previousNonSepSibling(node)
=== null);
+
+ // Use selser to serialize this text! The original
+ // wikitext is `out`. But first allow
+ // `ConstrainedText.fromSelSer` to figure out the right
+ // type of ConstrainedText chunk(s) to use to represent
+ // `out`, based on the node type. Since we might
actually
+ // have to break this wikitext into multiple chunks,
+ // `fromSelSer` returns an array.
+ if (suppressSLC) { state.singleLineContext.disable(); }
+ ConstrainedText
+ .fromSelSer(out, node, dp, state.env)
+ .forEach(function(ct) {
+ state.emitChunk(ct, ct.node);
+ });
+ if (suppressSLC) { state.singleLineContext.pop(); }
+
+ // Skip over encapsulated content since it has already
been
+ // serialized.
+ if (DU.isFirstEncapsulationWrapperNode(node)) {
+ return DU.skipOverEncapsulatedContent(node);
+ } else {
+ return node.nextSibling;
+ }
+ }
+
+ if (DU.onlySubtreeChanged(node, this.env) &&
+ WTSUtils.hasValidTagWidths(dp.dsr) &&
+ // In general, we want to avoid nodes with
auto-inserted
+ // start/end tags since dsr for them might not
be entirely
+ // trustworthy. But, since wikitext does not
have closing tags
+ // for tr/td/th in the first place, dsr for
them can be trusted.
+ //
+ // SSS FIXME: I think this is only for b/i tags
for which we do
+ // dsr fixups. It may be okay to use this for
other tags.
+ ((!dp.autoInsertedStart && !dp.autoInsertedEnd)
||
+ /^(TD|TH|TR)$/.test(node.nodeName))) {
+ wrapperUnmodified = true;
+ }
}
-}
+
+ state.currNodeUnmodified = false;
+
+ var inModifiedContent = state.selserMode &&
+ DU.hasInsertedDiffMark(node, this.env);
+
+ if (inModifiedContent) { state.inModifiedContent = true; }
+ var next = domHandler.handle(node, state, wrapperUnmodified);
+ if (inModifiedContent) { state.inModifiedContent = false; }
+
+ return next;
+});
/**
* Internal worker. Recursively serialize a DOM subtree.
*/
-WSP._serializeNode = function(node) {
- var prev, next, nextNode;
+WSP._serializeNode = Promise.method(function(node) {
+ var prev, domHandler, p;
var state = this.state;
if (state.selserMode) {
- this.trace(function() { return traceNodeName(node); },
+ this.trace(function() { return WTSUtils.traceNodeName(node); },
"; prev-unmodified: ", state.prevNodeUnmodified,
"; SOL: ", state.onSOL);
} else {
- this.trace(function() { return traceNodeName(node); },
+ this.trace(function() { return WTSUtils.traceNodeName(node); },
"; SOL: ", state.onSOL);
}
- // serialize this node
switch (node.nodeType) {
- case node.ELEMENT_NODE:
-
- // Ignore DiffMarker metas, but clear unmodified node
state
- if (DU.isMarkerMeta(node, "mw:DiffMarker")) {
- state.sep.lastSourceNode = node;
- // Update modification flags
- state.updateModificationFlags(node);
- return node.nextSibling;
- }
-
- var dp = DU.getDataParsoid(node);
- dp.dsr = dp.dsr || [];
-
- // Update separator constraints
- var domHandler = this._getDOMHandler(node);
- prev = DU.previousNonSepSibling(node) ||
node.parentNode;
- if (prev) {
- this.updateSeparatorConstraints(
- prev,
this._getDOMHandler(prev),
- node, domHandler);
- }
-
- var handled = false;
- var wrapperUnmodified = false;
-
- // To serialize a node from source, the node should
satisfy these conditions:
- // 1. It should not have a diff marker or be in a
modified subtree
- // WTS should not be in a subtree with a
modification flag that applies
- // to every node of a subtree (rather than an
indication that some node
- // in the subtree is modified).
- //
- // 2. It should continue to be valid in any surrounding
edited context
- // For some nodes, modification of surrounding
context
- // can change serialized output of this node
- // (ex: <td>s and whether you emit | or || for them)
- //
- // 3. It should have valid, usable DSR
- //
- // 4. Either it has non-zero positive DSR width, or
meets one of the following
- // 4a. It is content like <p><br/><p> or an
- // automatically-inserted wikitext <references/>
(HTML <ol>)
- // (will have dsr-width 0)
- // 4b. it is fostered content (will have dsr-width 0)
- // 4c. it is misnested content (will have dsr-width
0)
- //
- // SSS FIXME: Additionally, we can guard against buggy
DSR with
- // some sanity checks. We can test that non-sep src
content
- // leading wikitext markup corresponds to the node type.
- //
- // Ex: If node.nodeName is 'UL', then src[0] should be
'*'
- //
- // TO BE DONE
-
- if (state.selserMode
- && !state.inModifiedContent
- && DU.origSrcValidInEditedContext(state.env,
node)
- && dp && Util.isValidDSR(dp.dsr)
- && (dp.dsr[1] > dp.dsr[0]
- // FIXME: <p><br/></p>
- // nodes that have dsr width 0
because currently,
- // we emit newlines outside the
p-nodes. So, this check
- // tries to handle that
scenario.
- // Zero-width <ol> corresponds
to automatically-inserted
- // <references/> nodes.
- || (dp.dsr[1] === dp.dsr[0] &&
/^(P|BR|OL)$/.test(node.nodeName))
- || dp.fostered || dp.misnested)) {
- if (!DU.hasDiffMarkers(node, this.env)) {
- state.currNodeUnmodified = true;
- handled = true;
-
- // If this HTML node will disappear in
wikitext because of zero width,
- // then the separator constraints will
carry over to the node's children.
- //
- // Since we dont recurse into 'node' in
selser mode, we update the
- // separator constraintInfo to apply to
'node' and its first child.
- //
- // We could clear constraintInfo
altogether which would be correct (but
- // could normalize separators and
introduce dirty diffs unnecessarily).
- if (DU.isZeroWidthWikitextElt(node) &&
- node.childNodes.length > 0 &&
-
state.sep.constraints.constraintInfo.sepType === 'sibling') {
-
state.sep.constraints.constraintInfo.onSOL = state.onSOL;
-
state.sep.constraints.constraintInfo.sepType = 'parent-child';
-
state.sep.constraints.constraintInfo.nodeA = node;
-
state.sep.constraints.constraintInfo.nodeB = node.firstChild;
- }
-
- var out = state.getOrigSrc(dp.dsr[0],
dp.dsr[1]);
-
- // console.warn("USED ORIG");
- this.trace("ORIG-src with DSR",
function() {
- return '[' + dp.dsr[0] + ',' +
dp.dsr[1] + '] = ' + JSON.stringify(out);
- });
-
- // When reusing source, we should only
suppress serializing
- // to a single line for the cases we've
whitelisted in
- // normal serialization.
- var suppressSLC =
DU.isFirstEncapsulationWrapperNode(node) ||
- ['DL', 'UL',
'OL'].indexOf(node.nodeName) > -1 ||
- (node.nodeName ===
'TABLE' &&
-
node.parentNode.nodeName === 'DD' &&
-
DU.previousNonSepSibling(node) === null);
-
- // Use selser to serialize this text!
The original
- // wikitext is `out`. But first allow
- // `ConstrainedText.fromSelSer` to
figure out the right
- // type of ConstrainedText chunk(s) to
use to represent
- // `out`, based on the node type.
Since we might actually
- // have to break this wikitext into
multiple chunks,
- // `fromSelSer` returns an array.
- if (suppressSLC) {
state.singleLineContext.disable(); }
- ConstrainedText.fromSelSer(out, node,
dp, state.env).forEach(function(ct) {
- state.emitChunk(ct, ct.node);
- });
- if (suppressSLC) {
state.singleLineContext.pop(); }
-
- // Skip over encapsulated content since
it has already been serialized
- if
(DU.isFirstEncapsulationWrapperNode(node)) {
- nextNode =
DU.skipOverEncapsulatedContent(node);
- }
- } else if (DU.onlySubtreeChanged(node,
this.env) &&
- WTSUtils.hasValidTagWidths(dp.dsr) &&
- // In general, we want to avoid nodes
with auto-inserted start/end tags
- // since dsr for them might not be
entirely trustworthy. But, since wikitext
- // does not have closing tags for
tr/td/th in the first place, dsr for them
- // can be trusted.
- //
- // SSS FIXME: I think this is only for
b/i tags for which we do dsr fixups.
- // It may be okay to use this for other
tags.
- ((!dp.autoInsertedStart &&
!dp.autoInsertedEnd) || /^(TD|TH|TR)$/.test(node.nodeName))) {
- wrapperUnmodified = true;
- }
- }
-
- if (!handled) {
- state.currNodeUnmodified = false;
-
- // console.warn("USED NEW");
- if (domHandler && domHandler.handle) {
- // DOM-based serialization
- try {
- if (state.selserMode &&
DU.hasInsertedDiffMark(node, this.env)) {
- state.inModifiedContent
= true;
- nextNode =
domHandler.handle(node, state, wrapperUnmodified);
- state.inModifiedContent
= false;
- } else {
- nextNode =
domHandler.handle(node, state, wrapperUnmodified);
- }
- } catch (e) {
- this.env.log("fatal", e);
- }
- // The handler is responsible for
serializing its children
- } else {
- // Used to be token-based serialization
- this.env.log("error", 'No dom handler
found for', node.outerHTML);
- }
- }
-
+ case node.ELEMENT_NODE:
+ // Ignore DiffMarker metas, but clear unmodified node state
+ if (DU.isMarkerMeta(node, "mw:DiffMarker")) {
+ state.sep.lastSourceNode = node;
// Update modification flags
state.updateModificationFlags(node);
-
- // Update end separator constraints
- next = DU.nextNonSepSibling(node) || node.parentNode;
- if (next) {
- this.updateSeparatorConstraints(
- node, domHandler,
- next,
this._getDOMHandler(next));
+ return node.nextSibling;
+ }
+ domHandler = this._getDOMHandler(node);
+ console.assert(domHandler && domHandler.handle,
+ 'No dom handler found for', node.outerHTML);
+ p = this._serializeDOMNode(node, domHandler);
+ break;
+ case node.TEXT_NODE:
+ if (this.handleSeparatorText(node)) {
+ return node.nextSibling;
+ }
+ if (state.selserMode) {
+ prev = node.previousSibling;
+ if (!state.inModifiedContent && (
+ (!prev && DU.isBody(node.parentNode)) ||
+ (prev && !DU.isMarkerMeta(prev,
"mw:DiffMarker")))
+ ) {
+ state.currNodeUnmodified = true;
+ } else {
+ state.currNodeUnmodified = false;
}
-
- break;
- case node.TEXT_NODE:
- if (!this.handleSeparatorText(node)) {
- if (state.selserMode) {
- // If unmodified, emit output and return
- prev = node.previousSibling;
- if (!state.inModifiedContent && (
- (!prev &&
DU.isBody(node.parentNode)) ||
- (prev && !DU.isMarkerMeta(prev,
"mw:DiffMarker")))
- ) {
- state.currNodeUnmodified = true;
- } else {
- state.currNodeUnmodified =
false;
- }
- }
-
- // Text is not just whitespace
- prev = DU.previousNonSepSibling(node) ||
node.parentNode;
- if (prev) {
- this.updateSeparatorConstraints(
- prev,
this._getDOMHandler(prev),
- node, {});
- }
- // regular serialization
- this._serializeTextNode(node);
- next = DU.nextNonSepSibling(node) ||
node.parentNode;
- if (next) {
- this.updateSeparatorConstraints(
- node, {},
- next,
this._getDOMHandler(next));
- }
-
- // Update modification flags
- state.updateModificationFlags(node);
- }
- break;
- case node.COMMENT_NODE:
- // Merge this into separators
- this.handleSeparatorText(node);
- break;
- default:
- this.env.log("error", "Unhandled node type:",
node.outerHTML);
- break;
+ }
+ domHandler = {};
+ p = this._serializeTextNode(node);
+ break;
+ case node.COMMENT_NODE:
+ // Merge this into separators
+ this.handleSeparatorText(node);
+ return node.nextSibling;
+ default:
+ console.assert("Unhandled node type:", node.outerHTML);
}
- // If handlers didn't provide a valid next node,
- // default to next sibling
- if (nextNode === undefined) {
- nextNode = node.nextSibling;
- }
+ prev = DU.previousNonSepSibling(node) || node.parentNode;
+ this.updateSeparatorConstraints(
+ prev, this._getDOMHandler(prev),
+ node, domHandler);
- return nextNode;
-};
+ return p.then(function(nextNode) {
+ var next = DU.nextNonSepSibling(node) || node.parentNode;
+ this.updateSeparatorConstraints(
+ node, domHandler,
+ next, this._getDOMHandler(next));
-function stripUnnecessaryIndentPreNowikis(env, wt) {
+ // Update modification flags
+ state.updateModificationFlags(node);
+
+ // If handlers didn't provide a valid next node,
+ // default to next sibling.
+ if (nextNode === undefined) {
+ nextNode = node.nextSibling;
+ }
+
+ return nextNode;
+ }.bind(this));
+});
+
+WSP._stripUnnecessaryIndentPreNowikis = function() {
+ var env = this.env;
// FIXME: The solTransparentWikitextRegexp includes redirects, which
really
// only belong at the SOF and should be unique. See the "New redirect"
test.
var noWikiRegexp = new RegExp(
'^' + env.conf.wiki.solTransparentWikitextNoWsRegexp.source +
'(<nowiki>\\s+</nowiki>)([^\n]*(?:\n|$))', 'im'
);
- var pieces = wt.split(noWikiRegexp);
+ var pieces = this.state.out.split(noWikiRegexp);
var out = pieces[0];
for (var i = 1; i < pieces.length; i += 4) {
out += pieces[i];
@@ -1061,15 +1031,15 @@
}
out = out + nowiki + rest + pieces[i + 3];
}
- return out;
-}
+ this.state.out = out;
+};
// This implements a heuristic to strip two common sources of <nowiki/>s.
// When <i> and <b> tags are matched up properly,
// - any single ' char before <i> or <b> does not need <nowiki/> protection.
// - any single ' char before </i> or </b> does not need <nowiki/> protection.
-function stripUnnecessaryQuoteNowikis(env, wt) {
- return wt.split(/\n|$/).map(function(line) {
+WSP._stripUnnecessaryQuoteNowikis = function() {
+ this.state.out = this.state.out.split(/\n|$/).map(function(line) {
// Optimization: We are interested in <nowiki/>s before quote
chars.
// So, skip this if we don't have both.
if (!(/<nowiki\s*\/>/.test(line) && /'/.test(line))) {
@@ -1176,13 +1146,13 @@
return line;
}
}).join("\n");
-}
+};
/**
- * Serialize an HTML DOM document synchronously.
+ * Serialize an HTML DOM document.
* WARNING: You probably want to use DU.serializeDOM instead.
*/
-WSP.serializeDOMSync = function(body, selserMode) {
+WSP.serializeDOM = Promise.method(function(body, selserMode) {
console.assert(DU.isBody(body), 'Expected a body node.');
this.logType = selserMode ? "trace/selser" : "trace/wts";
@@ -1206,30 +1176,31 @@
state.updateSep(body);
state.resetCurrLine(body.firstChild);
- state.serializeChildren(body);
- // Emit child-parent seps.
- state.emitChunk('', body);
- // We've reached EOF, flush the remaining buffered text.
- state.flushLine();
+ return state.serializeChildren(body).then(function() {
+ // Emit child-parent seps.
+ state.emitChunk('', body);
+ // We've reached EOF, flush the remaining buffered text.
+ state.flushLine();
- if (state.hasIndentPreNowikis) {
- // FIXME: Perhaps this can be done on a per-line basis
- // rather than do one post-pass on the entire document.
- //
- // Strip excess/useless nowikis
- state.out = stripUnnecessaryIndentPreNowikis(this.env,
state.out);
- }
+ if (state.hasIndentPreNowikis) {
+ // FIXME: Perhaps this can be done on a per-line basis
+ // rather than do one post-pass on the entire document.
+ //
+ // Strip excess/useless nowikis
+ this._stripUnnecessaryIndentPreNowikis();
+ }
- if (state.hasQuoteNowikis) {
- // FIXME: Perhaps this can be done on a per-line basis
- // rather than do one post-pass on the entire document.
- //
- // Strip excess/useless nowikis
- state.out = stripUnnecessaryQuoteNowikis(this.env, state.out);
- }
+ if (state.hasQuoteNowikis) {
+ // FIXME: Perhaps this can be done on a per-line basis
+ // rather than do one post-pass on the entire document.
+ //
+ // Strip excess/useless nowikis
+ this._stripUnnecessaryQuoteNowikis();
+ }
- return state.out;
-};
+ return state.out;
+ }.bind(this));
+});
if (typeof module === "object") {
diff --git a/lib/utils/DOMUtils.js b/lib/utils/DOMUtils.js
index 9769791..fd755e7 100644
--- a/lib/utils/DOMUtils.js
+++ b/lib/utils/DOMUtils.js
@@ -2715,7 +2715,7 @@
return p.then(function() {
var Serializer = useSelser ? SelectiveSerializer :
WikitextSerializer;
var serializer = new Serializer({ env: env });
- return serializer.serializeDOMSync(body);
+ return serializer.serializeDOM(body);
}).nodify(cb);
};
--
To view, visit https://gerrit.wikimedia.org/r/251194
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: newchange
Gerrit-Change-Id: Id68d24007229dcfa0bf6fb16c6ed3ecdaad0c2e5
Gerrit-PatchSet: 1
Gerrit-Project: mediawiki/services/parsoid
Gerrit-Branch: master
Gerrit-Owner: Arlolra <[email protected]>
_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits