jenkins-bot has submitted this change and it was merged. Change subject: Rename minimizeWTQuoteTags to normalizeDOM ......................................................................
Rename minimizeWTQuoteTags to normalizeDOM In future patches: * The minimization code will be generalized. * Some of the normalizations in core serialization code will be moved to this pass. Change-Id: Iaf08a183885cd9d10ee88ac2611146458102c578 --- M lib/mediawiki.DOMUtils.js M lib/mediawiki.WikitextSerializer.js D lib/wts.minimizeWTQuoteTags.js A lib/wts.normalizeDOM.js 4 files changed, 160 insertions(+), 153 deletions(-) Approvals: Arlolra: Looks good to me, approved jenkins-bot: Verified diff --git a/lib/mediawiki.DOMUtils.js b/lib/mediawiki.DOMUtils.js index d3c7670..483347e 100644 --- a/lib/mediawiki.DOMUtils.js +++ b/lib/mediawiki.DOMUtils.js @@ -153,8 +153,8 @@ return { h: h, count: count }; } - var xA = arrayToHash(nodeA.attributes), - xB = arrayToHash(nodeB.attributes); + var xA = arrayToHash(nodeA.attributes || []), + xB = arrayToHash(nodeB.attributes || []); if (xA.count !== xB.count) { return false; diff --git a/lib/mediawiki.WikitextSerializer.js b/lib/mediawiki.WikitextSerializer.js index 086898b..4323c5b 100644 --- a/lib/mediawiki.WikitextSerializer.js +++ b/lib/mediawiki.WikitextSerializer.js @@ -32,7 +32,7 @@ WTSUtils = require('./wts.utils.js').WTSUtils, pd = require('./mediawiki.parser.defines.js'), ConstrainedText = require('./wts.ConstrainedText.js').ConstrainedText, - minimizeWTQuoteTags = require('./wts.minimizeWTQuoteTags.js').minimizeWTQuoteTags, + normalizeDOM = require('./wts.normalizeDOM.js').normalizeDOM, SerializerState = require('./wts.SerializerState.js').SerializerState, TagHandlers = require('./wts.TagHandlers.js').TagHandlers, LinkHandlersModule = require('./wts.LinkHandler.js'), @@ -1378,8 +1378,8 @@ } var state = new SerializerState(this, this.options); try { - // Minimize I/B tags - minimizeWTQuoteTags(body, state.env); + // Normalize the DOM + normalizeDOM(body, state.env); // Don't serialize the DOM if debugging is disabled this.env.log(this.logType, function() { diff --git a/lib/wts.minimizeWTQuoteTags.js b/lib/wts.minimizeWTQuoteTags.js deleted file mode 100644 index 70f840a..0000000 --- a/lib/wts.minimizeWTQuoteTags.js +++ /dev/null @@ -1,148 +0,0 @@ -"use strict"; - -require('./core-upgrade.js'); -var DU = require('./mediawiki.DOMUtils.js').DOMUtils, - Consts = require('./mediawiki.wikitext.constants.js').WikitextConstants; - -var ignoreableAttribs = new Set(['data-parsoid']); -function similar(a, b) { - var isHtml_a = DU.isLiteralHTMLNode(a), - isHtml_b = DU.isLiteralHTMLNode(b); - - return (!isHtml_a && !isHtml_b) || - (isHtml_a && isHtml_b && DU.attribsEquals(a, b, ignoreableAttribs)); -} - -/** Can a and b be merged into a single node? */ -function mergable(a, b) { - return a.nodeName === b.nodeName && similar(a, b); -} - -/** - * Can a and b be combined into a single node - * if we swap a and a.firstChild? - * - * For example: a='<b><i>x</i></b>' b='<i>y</i>' => '<i><b>x</b>y</i>' - */ -function swappable(a, b) { - return DU.numNonDeletedChildNodes(a) === 1 && - similar(a, DU.firstNonDeletedChildNode(a)) && - mergable(DU.firstNonDeletedChildNode(a), b); -} - -/** Transfer all of b's children to a and delete b */ -function merge(env, a, b) { - DU.migrateChildren(b, a); - b.parentNode.removeChild(b); - - DU.setDiffMark(a, env, "children-changed"); - return a; -} - -/** b is a's sole non-deleted child. Switch them around. */ -function swap(env, a, b) { - DU.migrateChildren(b, a); - a.parentNode.insertBefore(b, a); - b.appendChild(a); - - DU.setDiffMark(a, env, "children-changed"); - DU.setDiffMark(b, env, "children-changed"); - - return b; -} - -/** - * Minimize a pair of tags in the dom tree rooted at node. - * - * This function merges adjacent nodes of the same type - * and swaps nodes where possible to enable further merging. - * - * See examples below for a (B, I) tag-pair: - * - * 1. <b>X</b><b>Y</b> - * ==> <b>XY</b> - * - * 2. <i>A</i><b><i>X</i></b><b><i>Y</i></b><i>Z</i> - * ==> <i>A<b>XY</b>Z</i> - */ -function minimizeTags(env, node, rewriteablePair, recurse) { - if (DU.isFirstEncapsulationWrapperNode(node) || !node.firstChild) { - return; - } - - // minimize the children of `node`. if `recurse` is true we're going to - // recurse to ensure the children are also minimized. if `recurse` is - // false we can assume the children are already minimized. - var a = node.firstChild, b; - - if (DU.isElt(a) && recurse) { - minimizeTags(env, a, rewriteablePair, true); - } - - while (a) { - b = DU.nextNonDeletedSibling(a); - if (!b) { - break; - } - - if (DU.isElt(b) && recurse) { - minimizeTags(env, b, rewriteablePair, true); - } - - // If 'a' and 'b' make a rewriteable tag-pair and neither of them - // is an encapsulated element, we are good to go! - if (rewriteablePair(a, b) && - !DU.isFirstEncapsulationWrapperNode(a) && - !DU.isFirstEncapsulationWrapperNode(b)) { - if (mergable(a, b)) { - a = merge(env, a, b); - // the new a's children have new siblings. so let's look - // at a again. but the children themselves haven't changed, - // so we don't need to recurse. - minimizeTags(env, a, rewriteablePair, false); - } else if (swappable(a, b)) { - a = merge(env, swap(env, a, DU.firstNonDeletedChildNode(a)), b); - // again, a has new children, but the grandkids have already - // been minimized. - minimizeTags(env, a, rewriteablePair, false); - } else if (swappable(b, a)) { - a = merge(env, a, swap(env, b, DU.firstNonDeletedChildNode(b))); - // again, a has new children, but the grandkids have already - // been minimized. - minimizeTags(env, a, rewriteablePair, false); - } else { - a = b; - } - } else { - a = b; - } - } - - // return node to enable chaining - return node; -} - -// NOTE: We need not check whether the node being transformed -// are new / edited, etc. since these minimization scenarios can -// never show up in HTML that came from parsed wikitext -// -// <i>..</i><i>..</i> can never show up without a <nowiki/> in between. -// Similarly for <b>..</b><b>..</b> and <b><i>..</i></b><i>..</i>. -// -// This is because a sequence of 4 quotes is not parsed as ..</i><i>.. -// Neither is a sequence of 7 quotes parsed as ..</i></b><i>.. -// -// So, if we see a minimizable pair of nodes, it is because the HTML -// didn't originate from wikitext OR the HTML has been subsequently edited. -// In both cases, we want to apply the transformation below. -function minimizeWTQuoteTags(node, env) { - return minimizeTags(env, node, function(a, b) { - // - 'a' and 'b' are both B/I tags - return Consts.WTQuoteTags.has( a.nodeName ) && - Consts.WTQuoteTags.has( b.nodeName ); - }, true); -} - -if (typeof module === "object") { - module.exports.minimizeWTQuoteTags = minimizeWTQuoteTags; -} diff --git a/lib/wts.normalizeDOM.js b/lib/wts.normalizeDOM.js new file mode 100644 index 0000000..829e108 --- /dev/null +++ b/lib/wts.normalizeDOM.js @@ -0,0 +1,155 @@ +"use strict"; + +require('./core-upgrade.js'); +var DU = require('./mediawiki.DOMUtils.js').DOMUtils, + Consts = require('./mediawiki.wikitext.constants.js').WikitextConstants; + +var ignoreableAttribs = new Set(['data-parsoid', 'data-parsoid-diff']); + +function similar(a, b) { + var isHtml_a = DU.isLiteralHTMLNode(a); + var isHtml_b = DU.isLiteralHTMLNode(b); + + return (!isHtml_a && !isHtml_b) || + (isHtml_a && isHtml_b && DU.attribsEquals(a, b, ignoreableAttribs)); +} + +/** Can a and b be merged into a single node? */ +function mergable(a, b) { + return a.nodeName === b.nodeName && similar(a, b); +} + +/** + * Can a and b be combined into a single node + * if we swap a and a.firstChild? + * + * For example: a='<b><i>x</i></b>' b='<i>y</i>' => '<i><b>x</b>y</i>' + */ +function swappable(a, b) { + return DU.numNonDeletedChildNodes(a) === 1 && + similar(a, DU.firstNonDeletedChildNode(a)) && + mergable(DU.firstNonDeletedChildNode(a), b); +} + +/** Transfer all of b's children to a and delete b */ +function merge(env, a, b) { + DU.migrateChildren(b, a); + b.parentNode.removeChild(b); + + DU.setDiffMark(a, env, "children-changed"); + return a; +} + +/** b is a's sole non-deleted child. Switch them around. */ +function swap(env, a, b) { + DU.migrateChildren(b, a); + a.parentNode.insertBefore(b, a); + b.appendChild(a); + + DU.setDiffMark(a, env, "children-changed"); + DU.setDiffMark(b, env, "children-changed"); + + return b; +} + +function rewriteablePair(a, b) { + // Currently supported: 'a' and 'b' are both B/I tags + // + // For <i>/<b> pair, we need not check whether the node being transformed + // are new / edited, etc. since these minimization scenarios can + // never show up in HTML that came from parsed wikitext. + // + // <i>..</i><i>..</i> can never show up without a <nowiki/> in between. + // Similarly for <b>..</b><b>..</b> and <b><i>..</i></b><i>..</i>. + // + // This is because a sequence of 4 quotes is not parsed as ..</i><i>.. + // Neither is a sequence of 7 quotes parsed as ..</i></b><i>.. + // + // So, if we see a minimizable pair of nodes, it is because the HTML + // didn't originate from wikitext OR the HTML has been subsequently edited. + // In both cases, we want to transform the DOM. + + return Consts.WTQuoteTags.has(a.nodeName) && + Consts.WTQuoteTags.has(b.nodeName); +} + +/** + * The only normalization implemented right now is I/B tag minimization. + * + * Minimize a pair of tags in the dom tree rooted at node. + * + * This function merges adjacent nodes of the same type + * and swaps nodes where possible to enable further merging. + * + * See examples below for a (B, I) tag-pair: + * + * 1. <b>X</b><b>Y</b> + * ==> <b>XY</b> + * + * 2. <i>A</i><b><i>X</i></b><b><i>Y</i></b><i>Z</i> + * ==> <i>A<b>XY</b>Z</i> + */ +function _normalizeDOM(env, node, recurse) { + if (DU.isFirstEncapsulationWrapperNode(node) || !node.firstChild) { + return; + } + + // Minimize the children of `node`. + // recurse = true => recurse to ensure the children are also minimized + // recurse = false => assume the children are already minimized + var a = node.firstChild, b; + + if (DU.isElt(a) && recurse) { + _normalizeDOM(env, a, true); + } + + while (a) { + b = DU.nextNonDeletedSibling(a); + if (!b) { + break; + } + + if (DU.isElt(b) && recurse) { + _normalizeDOM(env, b, true); + } + + // If 'a' and 'b' make a rewriteable tag-pair and neither of them + // is an encapsulated element, we are good to go. + if (rewriteablePair(a, b) && + !DU.isFirstEncapsulationWrapperNode(a) && + !DU.isFirstEncapsulationWrapperNode(b)) { + if (mergable(a, b)) { + a = merge(env, a, b); + // The new a's children have new siblings. So let's look + // at a again. But the children themselves haven't changed, + // so we don't need to recurse. + _normalizeDOM(env, a, false); + } else if (swappable(a, b)) { + a = merge(env, swap(env, a, DU.firstNonDeletedChildNode(a)), b); + // Again, a has new children, but the grandkids have already + // been minimized. + _normalizeDOM(env, a, false); + } else if (swappable(b, a)) { + a = merge(env, a, swap(env, b, DU.firstNonDeletedChildNode(b))); + // Again, a has new children, but the grandkids have already + // been minimized. + _normalizeDOM(env, a, false); + } else { + a = b; + } + } else { + a = b; + } + } + + // return node to enable chaining + return node; +} + +function normalizeDOM(node, env) { + return _normalizeDOM(env, node, true); +} + +if (typeof module === "object") { + module.exports.normalizeDOM = normalizeDOM; +} -- To view, visit https://gerrit.wikimedia.org/r/201920 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: merged Gerrit-Change-Id: Iaf08a183885cd9d10ee88ac2611146458102c578 Gerrit-PatchSet: 1 Gerrit-Project: mediawiki/services/parsoid Gerrit-Branch: master Gerrit-Owner: Subramanya Sastry <ssas...@wikimedia.org> Gerrit-Reviewer: Arlolra <abrea...@wikimedia.org> Gerrit-Reviewer: Cscott <canan...@wikimedia.org> Gerrit-Reviewer: Marcoil <marc...@wikimedia.org> Gerrit-Reviewer: jenkins-bot <> _______________________________________________ MediaWiki-commits mailing list MediaWiki-commits@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits