jenkins-bot has submitted this change and it was merged.

Change subject: Strip data-parsoid.src from tpls/extension nodes when 
unnecessary
......................................................................


Strip data-parsoid.src from tpls/extension nodes when unnecessary

* Since templates and extensions in the regular serializer are
  serialized from data-mw, dp.src is never used except when data-mw
  is missing (which it should not be if clients are doing their
  job correctly).  This should reduce HTML size a bit.

* Reduces HTML size for the BO page by 13%.

* An unclosed-comment ref test is now passing wt2html mode because
  the unclosed comment is no longer in the output because of dp.src
  stripping.  Earlier, the unclosed comment in dp.src also tripped
  up our normalization code and artificially caused a test failure.

Change-Id: I788b3099d306d0fe4d368af42a90999c7f21ace1
---
M js/lib/mediawiki.DOMPostProcessor.js
M js/lib/mediawiki.WikitextSerializer.js
M js/tests/parserTests-blacklist.js
3 files changed, 33 insertions(+), 14 deletions(-)

Approvals:
  Cscott: Looks good to me, approved
  jenkins-bot: Verified



diff --git a/js/lib/mediawiki.DOMPostProcessor.js 
b/js/lib/mediawiki.DOMPostProcessor.js
index 52daf6c..ca1fe00 100644
--- a/js/lib/mediawiki.DOMPostProcessor.js
+++ b/js/lib/mediawiki.DOMPostProcessor.js
@@ -2442,12 +2442,12 @@
 
 var saveDataParsoid; // forward declaration
 
-function dumpDomWithDataAttribs( root ) {
+function dumpDomWithDataAttribs( options, root ) {
        function cloneData(node, clone) {
                var d = node.data;
                if (d && d.constructor === Object && 
(Object.keys(d.parsoid).length > 0)) {
                        clone.data = Util.clone(d);
-                       saveDataParsoid( clone, true );
+                       saveDataParsoid( options, clone, true );
                }
 
                node = node.firstChild;
@@ -2473,7 +2473,7 @@
 
        if (psd.debug || (psd.dumpFlags && 
(psd.dumpFlags.indexOf("dom:pre-dsr") !== -1))) {
                console.warn("------ DOM: pre-DSR -------");
-               dumpDomWithDataAttribs( root );
+               dumpDomWithDataAttribs( options, root );
                console.warn("----------------------------");
        }
 
@@ -2491,7 +2491,7 @@
 
        if (psd.debug || (psd.dumpFlags && 
(psd.dumpFlags.indexOf("dom:post-dsr") !== -1))) {
                console.warn("------ DOM: post-DSR -------");
-               dumpDomWithDataAttribs( root );
+               dumpDomWithDataAttribs( options, root );
                console.warn("----------------------------");
        }
 }
@@ -2501,13 +2501,13 @@
  * spans and adding RDFa attributes to all subtree roots according to
  * http://www.mediawiki.org/wiki/Parsoid/RDFa_vocabulary#Template_content
  */
-function encapsulateTemplateOutput( document, env ) {
+function encapsulateTemplateOutput( document, env, options ) {
        var tpls = {};
        var psd = env.conf.parsoid;
 
        if (psd.debug || (psd.dumpFlags && 
(psd.dumpFlags.indexOf("dom:pre-encap") !== -1))) {
                console.warn("------ DOM: pre-encapsulation -------");
-               dumpDomWithDataAttribs( document );
+               dumpDomWithDataAttribs( options, document );
                console.warn("----------------------------");
        }
 
@@ -2519,7 +2519,7 @@
 
        if (psd.debug || (psd.dumpFlags && 
(psd.dumpFlags.indexOf("dom:post-encap") !== -1))) {
                console.warn("------ DOM: post-encapsulation -------");
-               dumpDomWithDataAttribs( document );
+               dumpDomWithDataAttribs( options, document );
                console.warn("----------------------------");
        }
 }
@@ -2941,12 +2941,25 @@
  *
  * Save the data-parsoid attributes on each node.
  */
-saveDataParsoid = function( node, debugDump ) {
+saveDataParsoid = function( options, node, debugDump ) {
        if ( node.nodeType === node.ELEMENT_NODE && node.data ) {
                if (!debugDump) {
                        var dp = node.data.parsoid;
                        if (dp) {
                                dp.tagId = undefined;
+
+                               // Remove data-parsoid.src from templates and 
extensions that have
+                               // valid data-mw and dsr.  This should reduce 
data-parsoid bloat.
+                               //
+                               // Transcluded nodes will not have dp.tsr set 
and dont need dp.src either
+                               if 
(/\bmw:(Transclusion|Extension)\b/.test(node.getAttribute("typeof")) &&
+                                       (!dp.tsr ||
+                                       node.getAttribute("data-mw") && dp.dsr 
&& dp.dsr[0] && dp.dsr[1]))
+                               {
+                                       dp.src = undefined;
+                               }
+
+                               // Remove tsr
                                if (dp.tsr) {
                                        dp.tsr = undefined;
                                }
@@ -3020,7 +3033,7 @@
        var domVisitor2 = new DOMTraverser();
        domVisitor2.addHandler( 'meta', stripMarkerMetas.bind(null, 
env.conf.parsoid.editMode) );
        domVisitor2.addHandler( 'li', cleanUpLIHack.bind( null, env ) );
-       domVisitor2.addHandler( null, saveDataParsoid );
+       domVisitor2.addHandler( null, saveDataParsoid.bind(null, this.options) 
);
        this.processors.push(domVisitor2.traverse.bind(domVisitor2));
 }
 
diff --git a/js/lib/mediawiki.WikitextSerializer.js 
b/js/lib/mediawiki.WikitextSerializer.js
index 9009171..c3120f4 100644
--- a/js/lib/mediawiki.WikitextSerializer.js
+++ b/js/lib/mediawiki.WikitextSerializer.js
@@ -3128,9 +3128,6 @@
        if (/\bmw:(?:Transclusion\b|Param\b|Extension\/[^\s]+)/.test(typeOf)) {
                return {
                        handle: function () {
-                               // In RT-testing mode, there will not be any 
edits to tpls/extensions.
-                               // So, use original source to eliminate 
spurious diffs showing up
-                               // in RT testing results.
                                var src, dataMW;
                                if 
(/\bmw:(Transclusion\b|Param\b)/.test(typeOf)) {
                                        dataMW = 
JSON.parse(node.getAttribute("data-mw"));
@@ -3148,7 +3145,17 @@
                                        console.error("ERROR: Should not have 
come here!");
                                }
 
-                               if (src) {
+                               // FIXME: Just adding this here temporarily 
till we go in and
+                               // clean this up and strip this out if we can 
verify that data-mw
+                               // is going to be present always when necessary 
and indicate that
+                               // a missing data-mw is either a parser bug or 
a client error.
+                               //
+                               // Fallback: should be exercised only in 
exceptional situations.
+                               if (src === undefined && state.env.page.src && 
isValidDSR(dp.dsr)) {
+                                       src = state.getOrigSrc(dp.dsr[0], 
dp.dsr[1]);
+                               }
+
+                               if (src !== undefined) {
                                        self.emitWikitext(src, state, cb, node);
                                        return 
self.skipOverEncapsulatedContent(node);
                                } else {
diff --git a/js/tests/parserTests-blacklist.js 
b/js/tests/parserTests-blacklist.js
index 9376d7d..14ea377 100644
--- a/js/tests/parserTests-blacklist.js
+++ b/js/tests/parserTests-blacklist.js
@@ -494,7 +494,6 @@
 add("wt2html", "Language parser function");
 add("wt2html", "Special parser function");
 add("wt2html", "1. SOL-sensitive wikitext tokens as template-args");
-add("wt2html", "Ref: 9. unclosed comments should not leak out of ref-body");
 add("wt2html", "Headings: 2. Outside heading nest on a single line 
<h1>foo</h1>*bar");
 add("wt2html", "Headings: 6a. Heading chars in SOL context (with trailing 
spaces)");
 add("wt2html", "Headings: 6b. Heading chars in SOL context (with trailing 
newlines)");

-- 
To view, visit https://gerrit.wikimedia.org/r/75658
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: merged
Gerrit-Change-Id: I788b3099d306d0fe4d368af42a90999c7f21ace1
Gerrit-PatchSet: 2
Gerrit-Project: mediawiki/extensions/Parsoid
Gerrit-Branch: master
Gerrit-Owner: Subramanya Sastry <[email protected]>
Gerrit-Reviewer: Cscott <[email protected]>
Gerrit-Reviewer: Subramanya Sastry <[email protected]>
Gerrit-Reviewer: jenkins-bot

_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to