jenkins-bot has submitted this change and it was merged.

Change subject: Rename minimizeWTQuoteTags to normalizeDOM
......................................................................


Rename minimizeWTQuoteTags to normalizeDOM

In future patches:
* The minimization code will be generalized.
* Some of the normalizations in core serialization code will be
  moved to this pass.

Change-Id: Iaf08a183885cd9d10ee88ac2611146458102c578
---
M lib/mediawiki.DOMUtils.js
M lib/mediawiki.WikitextSerializer.js
D lib/wts.minimizeWTQuoteTags.js
A lib/wts.normalizeDOM.js
4 files changed, 160 insertions(+), 153 deletions(-)

Approvals:
  Arlolra: Looks good to me, approved
  jenkins-bot: Verified



diff --git a/lib/mediawiki.DOMUtils.js b/lib/mediawiki.DOMUtils.js
index d3c7670..483347e 100644
--- a/lib/mediawiki.DOMUtils.js
+++ b/lib/mediawiki.DOMUtils.js
@@ -153,8 +153,8 @@
                        return { h: h, count: count };
                }
 
-               var xA = arrayToHash(nodeA.attributes),
-                       xB = arrayToHash(nodeB.attributes);
+               var xA = arrayToHash(nodeA.attributes || []),
+                       xB = arrayToHash(nodeB.attributes || []);
 
                if (xA.count !== xB.count) {
                        return false;
diff --git a/lib/mediawiki.WikitextSerializer.js 
b/lib/mediawiki.WikitextSerializer.js
index 086898b..4323c5b 100644
--- a/lib/mediawiki.WikitextSerializer.js
+++ b/lib/mediawiki.WikitextSerializer.js
@@ -32,7 +32,7 @@
        WTSUtils = require('./wts.utils.js').WTSUtils,
        pd = require('./mediawiki.parser.defines.js'),
        ConstrainedText = require('./wts.ConstrainedText.js').ConstrainedText,
-       minimizeWTQuoteTags = 
require('./wts.minimizeWTQuoteTags.js').minimizeWTQuoteTags,
+       normalizeDOM = require('./wts.normalizeDOM.js').normalizeDOM,
        SerializerState = require('./wts.SerializerState.js').SerializerState,
        TagHandlers = require('./wts.TagHandlers.js').TagHandlers,
        LinkHandlersModule = require('./wts.LinkHandler.js'),
@@ -1378,8 +1378,8 @@
        }
        var state = new SerializerState(this, this.options);
        try {
-               // Minimize I/B tags
-               minimizeWTQuoteTags(body, state.env);
+               // Normalize the DOM
+               normalizeDOM(body, state.env);
 
                // Don't serialize the DOM if debugging is disabled
                this.env.log(this.logType, function() {
diff --git a/lib/wts.minimizeWTQuoteTags.js b/lib/wts.minimizeWTQuoteTags.js
deleted file mode 100644
index 70f840a..0000000
--- a/lib/wts.minimizeWTQuoteTags.js
+++ /dev/null
@@ -1,148 +0,0 @@
-"use strict";
-
-require('./core-upgrade.js');
-var DU = require('./mediawiki.DOMUtils.js').DOMUtils,
-       Consts = require('./mediawiki.wikitext.constants.js').WikitextConstants;
-
-var ignoreableAttribs = new Set(['data-parsoid']);
-function similar(a, b) {
-       var isHtml_a = DU.isLiteralHTMLNode(a),
-               isHtml_b = DU.isLiteralHTMLNode(b);
-
-       return (!isHtml_a && !isHtml_b) ||
-               (isHtml_a && isHtml_b && DU.attribsEquals(a, b, 
ignoreableAttribs));
-}
-
-/** Can a and b be merged into a single node? */
-function mergable(a, b) {
-       return a.nodeName === b.nodeName && similar(a, b);
-}
-
-/**
- * Can a and b be combined into a single node
- * if we swap a and a.firstChild?
- *
- * For example: a='<b><i>x</i></b>' b='<i>y</i>' => '<i><b>x</b>y</i>'
- */
-function swappable(a, b) {
-       return DU.numNonDeletedChildNodes(a) === 1 &&
-               similar(a, DU.firstNonDeletedChildNode(a)) &&
-               mergable(DU.firstNonDeletedChildNode(a), b);
-}
-
-/** Transfer all of b's children to a and delete b */
-function merge(env, a, b) {
-       DU.migrateChildren(b, a);
-       b.parentNode.removeChild(b);
-
-       DU.setDiffMark(a, env, "children-changed");
-       return a;
-}
-
-/** b is a's sole non-deleted child.  Switch them around. */
-function swap(env, a, b) {
-       DU.migrateChildren(b, a);
-       a.parentNode.insertBefore(b, a);
-       b.appendChild(a);
-
-       DU.setDiffMark(a, env, "children-changed");
-       DU.setDiffMark(b, env, "children-changed");
-
-       return b;
-}
-
-/**
- * Minimize a pair of tags in the dom tree rooted at node.
- *
- * This function merges adjacent nodes of the same type
- * and swaps nodes where possible to enable further merging.
- *
- * See examples below for a (B, I) tag-pair:
- *
- * 1. <b>X</b><b>Y</b>
- *    ==> <b>XY</b>
- *
- * 2. <i>A</i><b><i>X</i></b><b><i>Y</i></b><i>Z</i>
- *    ==> <i>A<b>XY</b>Z</i>
- */
-function minimizeTags(env, node, rewriteablePair, recurse) {
-       if (DU.isFirstEncapsulationWrapperNode(node) || !node.firstChild) {
-               return;
-       }
-
-       // minimize the children of `node`.  if `recurse` is true we're going to
-       // recurse to ensure the children are also minimized.  if `recurse` is
-       // false we can assume the children are already minimized.
-       var a = node.firstChild, b;
-
-       if (DU.isElt(a) && recurse) {
-               minimizeTags(env, a, rewriteablePair, true);
-       }
-
-       while (a) {
-               b = DU.nextNonDeletedSibling(a);
-               if (!b) {
-                       break;
-               }
-
-               if (DU.isElt(b) && recurse) {
-                       minimizeTags(env, b, rewriteablePair, true);
-               }
-
-               // If 'a' and 'b' make a rewriteable tag-pair and neither of 
them
-               // is an encapsulated element, we are good to go!
-               if (rewriteablePair(a, b) &&
-                       !DU.isFirstEncapsulationWrapperNode(a) &&
-                       !DU.isFirstEncapsulationWrapperNode(b)) {
-                       if (mergable(a, b)) {
-                               a = merge(env, a, b);
-                               // the new a's children have new siblings.  so 
let's look
-                               // at a again.  but the children themselves 
haven't changed,
-                               // so we don't need to recurse.
-                               minimizeTags(env, a, rewriteablePair, false);
-                       } else if (swappable(a, b)) {
-                               a = merge(env, swap(env, a, 
DU.firstNonDeletedChildNode(a)), b);
-                               // again, a has new children, but the grandkids 
have already
-                               // been minimized.
-                               minimizeTags(env, a, rewriteablePair, false);
-                       } else if (swappable(b, a)) {
-                               a = merge(env, a, swap(env, b, 
DU.firstNonDeletedChildNode(b)));
-                               // again, a has new children, but the grandkids 
have already
-                               // been minimized.
-                               minimizeTags(env, a, rewriteablePair, false);
-                       } else {
-                               a = b;
-                       }
-               } else {
-                       a = b;
-               }
-       }
-
-       // return node to enable chaining
-       return node;
-}
-
-// NOTE: We need not check whether the node being transformed
-// are new / edited, etc. since these minimization scenarios can
-// never show up in HTML that came from parsed wikitext
-//
-// <i>..</i><i>..</i> can never show up without a <nowiki/> in between.
-// Similarly for <b>..</b><b>..</b> and <b><i>..</i></b><i>..</i>.
-//
-// This is because a sequence of 4 quotes is not parsed as ..</i><i>..
-// Neither is a sequence of 7 quotes parsed as ..</i></b><i>..
-//
-// So, if we see a minimizable pair of nodes, it is because the HTML
-// didn't originate from wikitext OR the HTML has been subsequently edited.
-// In both cases, we want to apply the transformation below.
-function minimizeWTQuoteTags(node, env) {
-       return minimizeTags(env, node, function(a, b) {
-                       // - 'a' and 'b' are both B/I tags
-                       return Consts.WTQuoteTags.has( a.nodeName ) &&
-                               Consts.WTQuoteTags.has( b.nodeName );
-               }, true);
-}
-
-if (typeof module === "object") {
-       module.exports.minimizeWTQuoteTags = minimizeWTQuoteTags;
-}
diff --git a/lib/wts.normalizeDOM.js b/lib/wts.normalizeDOM.js
new file mode 100644
index 0000000..829e108
--- /dev/null
+++ b/lib/wts.normalizeDOM.js
@@ -0,0 +1,155 @@
+"use strict";
+
+require('./core-upgrade.js');
+var DU = require('./mediawiki.DOMUtils.js').DOMUtils,
+       Consts = require('./mediawiki.wikitext.constants.js').WikitextConstants;
+
+var ignoreableAttribs = new Set(['data-parsoid', 'data-parsoid-diff']);
+
+function similar(a, b) {
+       var isHtml_a = DU.isLiteralHTMLNode(a);
+       var isHtml_b = DU.isLiteralHTMLNode(b);
+
+       return (!isHtml_a && !isHtml_b) ||
+               (isHtml_a && isHtml_b && DU.attribsEquals(a, b, 
ignoreableAttribs));
+}
+
+/** Can a and b be merged into a single node? */
+function mergable(a, b) {
+       return a.nodeName === b.nodeName && similar(a, b);
+}
+
+/**
+ * Can a and b be combined into a single node
+ * if we swap a and a.firstChild?
+ *
+ * For example: a='<b><i>x</i></b>' b='<i>y</i>' => '<i><b>x</b>y</i>'
+ */
+function swappable(a, b) {
+       return DU.numNonDeletedChildNodes(a) === 1 &&
+               similar(a, DU.firstNonDeletedChildNode(a)) &&
+               mergable(DU.firstNonDeletedChildNode(a), b);
+}
+
+/** Transfer all of b's children to a and delete b */
+function merge(env, a, b) {
+       DU.migrateChildren(b, a);
+       b.parentNode.removeChild(b);
+
+       DU.setDiffMark(a, env, "children-changed");
+       return a;
+}
+
+/** b is a's sole non-deleted child.  Switch them around. */
+function swap(env, a, b) {
+       DU.migrateChildren(b, a);
+       a.parentNode.insertBefore(b, a);
+       b.appendChild(a);
+
+       DU.setDiffMark(a, env, "children-changed");
+       DU.setDiffMark(b, env, "children-changed");
+
+       return b;
+}
+
+function rewriteablePair(a, b) {
+       // Currently supported: 'a' and 'b' are both B/I tags
+       //
+       // For <i>/<b> pair, we need not check whether the node being 
transformed
+       // are new / edited, etc. since these minimization scenarios can
+       // never show up in HTML that came from parsed wikitext.
+       //
+       // <i>..</i><i>..</i> can never show up without a <nowiki/> in between.
+       // Similarly for <b>..</b><b>..</b> and <b><i>..</i></b><i>..</i>.
+       //
+       // This is because a sequence of 4 quotes is not parsed as ..</i><i>..
+       // Neither is a sequence of 7 quotes parsed as ..</i></b><i>..
+       //
+       // So, if we see a minimizable pair of nodes, it is because the HTML
+       // didn't originate from wikitext OR the HTML has been subsequently 
edited.
+       // In both cases, we want to transform the DOM.
+
+       return Consts.WTQuoteTags.has(a.nodeName) &&
+               Consts.WTQuoteTags.has(b.nodeName);
+}
+
+/**
+ * The only normalization implemented right now is I/B tag minimization.
+ *
+ * Minimize a pair of tags in the dom tree rooted at node.
+ *
+ * This function merges adjacent nodes of the same type
+ * and swaps nodes where possible to enable further merging.
+ *
+ * See examples below for a (B, I) tag-pair:
+ *
+ * 1. <b>X</b><b>Y</b>
+ *    ==> <b>XY</b>
+ *
+ * 2. <i>A</i><b><i>X</i></b><b><i>Y</i></b><i>Z</i>
+ *    ==> <i>A<b>XY</b>Z</i>
+ */
+function _normalizeDOM(env, node, recurse) {
+       if (DU.isFirstEncapsulationWrapperNode(node) || !node.firstChild) {
+               return;
+       }
+
+       // Minimize the children of `node`.
+       // recurse = true  => recurse to ensure the children are also minimized
+       // recurse = false => assume the children are already minimized
+       var a = node.firstChild, b;
+
+       if (DU.isElt(a) && recurse) {
+               _normalizeDOM(env, a, true);
+       }
+
+       while (a) {
+               b = DU.nextNonDeletedSibling(a);
+               if (!b) {
+                       break;
+               }
+
+               if (DU.isElt(b) && recurse) {
+                       _normalizeDOM(env, b, true);
+               }
+
+               // If 'a' and 'b' make a rewriteable tag-pair and neither of 
them
+               // is an encapsulated element, we are good to go.
+               if (rewriteablePair(a, b) &&
+                       !DU.isFirstEncapsulationWrapperNode(a) &&
+                       !DU.isFirstEncapsulationWrapperNode(b)) {
+                       if (mergable(a, b)) {
+                               a = merge(env, a, b);
+                               // The new a's children have new siblings. So 
let's look
+                               // at a again. But the children themselves 
haven't changed,
+                               // so we don't need to recurse.
+                               _normalizeDOM(env, a, false);
+                       } else if (swappable(a, b)) {
+                               a = merge(env, swap(env, a, 
DU.firstNonDeletedChildNode(a)), b);
+                               // Again, a has new children, but the grandkids 
have already
+                               // been minimized.
+                               _normalizeDOM(env, a, false);
+                       } else if (swappable(b, a)) {
+                               a = merge(env, a, swap(env, b, 
DU.firstNonDeletedChildNode(b)));
+                               // Again, a has new children, but the grandkids 
have already
+                               // been minimized.
+                               _normalizeDOM(env, a, false);
+                       } else {
+                               a = b;
+                       }
+               } else {
+                       a = b;
+               }
+       }
+
+       // return node to enable chaining
+       return node;
+}
+
+function normalizeDOM(node, env) {
+       return _normalizeDOM(env, node, true);
+}
+
+if (typeof module === "object") {
+       module.exports.normalizeDOM = normalizeDOM;
+}

-- 
To view, visit https://gerrit.wikimedia.org/r/201920
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: merged
Gerrit-Change-Id: Iaf08a183885cd9d10ee88ac2611146458102c578
Gerrit-PatchSet: 1
Gerrit-Project: mediawiki/services/parsoid
Gerrit-Branch: master
Gerrit-Owner: Subramanya Sastry <ssas...@wikimedia.org>
Gerrit-Reviewer: Arlolra <abrea...@wikimedia.org>
Gerrit-Reviewer: Cscott <canan...@wikimedia.org>
Gerrit-Reviewer: Marcoil <marc...@wikimedia.org>
Gerrit-Reviewer: jenkins-bot <>

_______________________________________________
MediaWiki-commits mailing list
MediaWiki-commits@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to