Subramanya Sastry has uploaded a new change for review.

  https://gerrit.wikimedia.org/r/75658


Change subject: Strip data-parsoid.src from tpls/extension nodes when 
unnecessary
......................................................................

Strip data-parsoid.src from tpls/extension nodes when unnecessary

* Since templates and extensions in the regular serializer are
  serialized from data-mw, dp.src is never used except when data-mw
  is missing (which it should not be if clients are doing their
  job correctly).  This should reduce HTML size a bit.

* Reduces HTML size for the BO page by 13%.

* Unsure why a ref test is now passing wt2html.

Change-Id: I788b3099d306d0fe4d368af42a90999c7f21ace1
---
M js/lib/mediawiki.DOMPostProcessor.js
M js/lib/mediawiki.WikitextSerializer.js
M js/tests/parserTests-blacklist.js
3 files changed, 38 insertions(+), 18 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/mediawiki/extensions/Parsoid 
refs/changes/58/75658/1

diff --git a/js/lib/mediawiki.DOMPostProcessor.js 
b/js/lib/mediawiki.DOMPostProcessor.js
index f569fe4..585d22f 100644
--- a/js/lib/mediawiki.DOMPostProcessor.js
+++ b/js/lib/mediawiki.DOMPostProcessor.js
@@ -2353,12 +2353,12 @@
 
 var saveDataParsoid; // forward declaration
 
-function dumpDomWithDataAttribs( root ) {
+function dumpDomWithDataAttribs( options, root ) {
        function cloneData(node, clone) {
                var d = node.data;
                if (d && d.constructor === Object && 
(Object.keys(d.parsoid).length > 0)) {
                        clone.data = Util.clone(d);
-                       saveDataParsoid( clone, true );
+                       saveDataParsoid( options, clone, true );
                }
 
                node = node.firstChild;
@@ -2384,7 +2384,7 @@
 
        if (psd.debug || (psd.dumpFlags && 
(psd.dumpFlags.indexOf("dom:pre-dsr") !== -1))) {
                console.warn("------ DOM: pre-DSR -------");
-               dumpDomWithDataAttribs( root );
+               dumpDomWithDataAttribs( options, root );
                console.warn("----------------------------");
        }
 
@@ -2402,7 +2402,7 @@
 
        if (psd.debug || (psd.dumpFlags && 
(psd.dumpFlags.indexOf("dom:post-dsr") !== -1))) {
                console.warn("------ DOM: post-DSR -------");
-               dumpDomWithDataAttribs( root );
+               dumpDomWithDataAttribs( options, root );
                console.warn("----------------------------");
        }
 }
@@ -2412,13 +2412,13 @@
  * spans and adding RDFa attributes to all subtree roots according to
  * http://www.mediawiki.org/wiki/Parsoid/RDFa_vocabulary#Template_content
  */
-function encapsulateTemplateOutput( document, env ) {
+function encapsulateTemplateOutput( document, env, options ) {
        var tpls = {};
        var psd = env.conf.parsoid;
 
        if (psd.debug || (psd.dumpFlags && 
(psd.dumpFlags.indexOf("dom:pre-encap") !== -1))) {
                console.warn("------ DOM: pre-encapsulation -------");
-               dumpDomWithDataAttribs( document );
+               dumpDomWithDataAttribs( options, document );
                console.warn("----------------------------");
        }
 
@@ -2430,7 +2430,7 @@
 
        if (psd.debug || (psd.dumpFlags && 
(psd.dumpFlags.indexOf("dom:post-encap") !== -1))) {
                console.warn("------ DOM: post-encapsulation -------");
-               dumpDomWithDataAttribs( document );
+               dumpDomWithDataAttribs( options, document );
                console.warn("----------------------------");
        }
 }
@@ -2852,14 +2852,28 @@
  *
  * Save the data-parsoid attributes on each node.
  */
-saveDataParsoid = function( node, debugDump ) {
+saveDataParsoid = function( options, node, debugDump ) {
        if ( node.nodeType === node.ELEMENT_NODE && node.data ) {
                if (!debugDump) {
-                       if (node.data.parsoid) {
-                               node.data.parsoid.tagId = undefined;
-                               if (node.data.parsoid.tsr) {
-                                       node.data.parsoid.tsr = undefined;
+                       var dp = node.data.parsoid;
+                       if (dp) {
+                               dp.tagId = undefined;
+                               // Remove data-parsoid.src from templates and 
extensions that have
+                               // valid data-mw and dsr.  This should reduce 
data-parsoid bloat
+                               //
+                               // Transcluded nodes will not have dp.tsr set 
and dont need dp.src either
+                               if 
(/\bmw:(Transclusion|Extension)\b/.test(node.getAttribute("typeof")) &&
+                                       (!dp.tsr ||
+                                       node.getAttribute("data-mw") && dp.dsr 
&& dp.dsr[0] && dp.dsr[1]))
+                               {
+                                       dp.src = undefined;
                                }
+
+                               // Remove tsr
+                               if (dp.tsr) {
+                                       dp.tsr = undefined;
+                               }
+
                        }
                }
                DU.saveDataAttribs( node );
@@ -2922,7 +2936,7 @@
        var domVisitor2 = new DOMTraverser();
        domVisitor2.addHandler( 'meta', stripMarkerMetas.bind(null, 
env.conf.parsoid.editMode) );
        domVisitor2.addHandler( 'li', cleanUpLIHack.bind( null, env ) );
-       domVisitor2.addHandler( null, saveDataParsoid );
+       domVisitor2.addHandler( null, saveDataParsoid.bind(null, this.options) 
);
        this.processors.push(domVisitor2.traverse.bind(domVisitor2));
 }
 
diff --git a/js/lib/mediawiki.WikitextSerializer.js 
b/js/lib/mediawiki.WikitextSerializer.js
index f78e751..60f4993 100644
--- a/js/lib/mediawiki.WikitextSerializer.js
+++ b/js/lib/mediawiki.WikitextSerializer.js
@@ -3114,9 +3114,6 @@
        if (/\bmw:(?:Transclusion\b|Param\b|Extension\/[^\s]+)/.test(typeOf)) {
                return {
                        handle: function () {
-                               // In RT-testing mode, there will not be any 
edits to tpls/extensions.
-                               // So, use original source to eliminate 
spurious diffs showing up
-                               // in RT testing results.
                                var src, dataMW;
                                if 
(/\bmw:(Transclusion\b|Param\b)/.test(typeOf)) {
                                        dataMW = 
JSON.parse(node.getAttribute("data-mw"));
@@ -3134,7 +3131,17 @@
                                        console.error("ERROR: Should not have 
come here!");
                                }
 
-                               if (src) {
+                               // FIXME: Just adding this here temporarily 
till we go in and
+                               // clean this up and strip this out if we can 
verify that data-mw
+                               // is going to be present always when necessary 
and indicate that
+                               // a missing data-mw is either a parser bug or 
a client error.
+                               //
+                               // Fallback: should be exercised only in 
exceptional situations.
+                               if (src === undefined && state.env.page.src && 
isValidDSR(dp.dsr)) {
+                                       src = state.getOrigSrc(dp.dsr[0], 
dp.dsr[1]);
+                               }
+
+                               if (src !== undefined) {
                                        self.emitWikitext(src, state, cb, node);
                                        return 
self.skipOverEncapsulatedContent(node);
                                } else {
diff --git a/js/tests/parserTests-blacklist.js 
b/js/tests/parserTests-blacklist.js
index 735269d..1cbc9b7 100644
--- a/js/tests/parserTests-blacklist.js
+++ b/js/tests/parserTests-blacklist.js
@@ -494,7 +494,6 @@
 add("wt2html", "Language parser function");
 add("wt2html", "Special parser function");
 add("wt2html", "1. SOL-sensitive wikitext tokens as template-args");
-add("wt2html", "Ref: 9. unclosed comments should not leak out of ref-body");
 add("wt2html", "Headings: 2. Outside heading nest on a single line 
<h1>foo</h1>*bar");
 add("wt2html", "Headings: 6a. Heading chars in SOL context (with trailing 
spaces)");
 add("wt2html", "Headings: 6b. Heading chars in SOL context (with trailing 
newlines)");

-- 
To view, visit https://gerrit.wikimedia.org/r/75658
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: I788b3099d306d0fe4d368af42a90999c7f21ace1
Gerrit-PatchSet: 1
Gerrit-Project: mediawiki/extensions/Parsoid
Gerrit-Branch: master
Gerrit-Owner: Subramanya Sastry <[email protected]>

_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to