jenkins-bot has submitted this change and it was merged.
Change subject: Strip data-parsoid.src from tpls/extension nodes when
unnecessary
......................................................................
Strip data-parsoid.src from tpls/extension nodes when unnecessary
* Since templates and extensions in the regular serializer are
serialized from data-mw, dp.src is never used except when data-mw
is missing (which it should not be if clients are doing their
job correctly). This should reduce HTML size a bit.
* Reduces HTML size for the BO page by 13%.
* An unclosed-comment ref test is now passing wt2html mode because
the unclosed comment is no longer in the output because of dp.src
stripping. Earlier, the unclosed comment in dp.src also tripped
up our normalization code and artificially caused a test failure.
Change-Id: I788b3099d306d0fe4d368af42a90999c7f21ace1
---
M js/lib/mediawiki.DOMPostProcessor.js
M js/lib/mediawiki.WikitextSerializer.js
M js/tests/parserTests-blacklist.js
3 files changed, 33 insertions(+), 14 deletions(-)
Approvals:
Cscott: Looks good to me, approved
jenkins-bot: Verified
diff --git a/js/lib/mediawiki.DOMPostProcessor.js
b/js/lib/mediawiki.DOMPostProcessor.js
index 52daf6c..ca1fe00 100644
--- a/js/lib/mediawiki.DOMPostProcessor.js
+++ b/js/lib/mediawiki.DOMPostProcessor.js
@@ -2442,12 +2442,12 @@
var saveDataParsoid; // forward declaration
-function dumpDomWithDataAttribs( root ) {
+function dumpDomWithDataAttribs( options, root ) {
function cloneData(node, clone) {
var d = node.data;
if (d && d.constructor === Object &&
(Object.keys(d.parsoid).length > 0)) {
clone.data = Util.clone(d);
- saveDataParsoid( clone, true );
+ saveDataParsoid( options, clone, true );
}
node = node.firstChild;
@@ -2473,7 +2473,7 @@
if (psd.debug || (psd.dumpFlags &&
(psd.dumpFlags.indexOf("dom:pre-dsr") !== -1))) {
console.warn("------ DOM: pre-DSR -------");
- dumpDomWithDataAttribs( root );
+ dumpDomWithDataAttribs( options, root );
console.warn("----------------------------");
}
@@ -2491,7 +2491,7 @@
if (psd.debug || (psd.dumpFlags &&
(psd.dumpFlags.indexOf("dom:post-dsr") !== -1))) {
console.warn("------ DOM: post-DSR -------");
- dumpDomWithDataAttribs( root );
+ dumpDomWithDataAttribs( options, root );
console.warn("----------------------------");
}
}
@@ -2501,13 +2501,13 @@
* spans and adding RDFa attributes to all subtree roots according to
* http://www.mediawiki.org/wiki/Parsoid/RDFa_vocabulary#Template_content
*/
-function encapsulateTemplateOutput( document, env ) {
+function encapsulateTemplateOutput( document, env, options ) {
var tpls = {};
var psd = env.conf.parsoid;
if (psd.debug || (psd.dumpFlags &&
(psd.dumpFlags.indexOf("dom:pre-encap") !== -1))) {
console.warn("------ DOM: pre-encapsulation -------");
- dumpDomWithDataAttribs( document );
+ dumpDomWithDataAttribs( options, document );
console.warn("----------------------------");
}
@@ -2519,7 +2519,7 @@
if (psd.debug || (psd.dumpFlags &&
(psd.dumpFlags.indexOf("dom:post-encap") !== -1))) {
console.warn("------ DOM: post-encapsulation -------");
- dumpDomWithDataAttribs( document );
+ dumpDomWithDataAttribs( options, document );
console.warn("----------------------------");
}
}
@@ -2941,12 +2941,25 @@
*
* Save the data-parsoid attributes on each node.
*/
-saveDataParsoid = function( node, debugDump ) {
+saveDataParsoid = function( options, node, debugDump ) {
if ( node.nodeType === node.ELEMENT_NODE && node.data ) {
if (!debugDump) {
var dp = node.data.parsoid;
if (dp) {
dp.tagId = undefined;
+
+ // Remove data-parsoid.src from templates and
extensions that have
+ // valid data-mw and dsr. This should reduce
data-parsoid bloat.
+ //
+ // Transcluded nodes will not have dp.tsr set
and dont need dp.src either
+ if
(/\bmw:(Transclusion|Extension)\b/.test(node.getAttribute("typeof")) &&
+ (!dp.tsr ||
+ node.getAttribute("data-mw") && dp.dsr
&& dp.dsr[0] && dp.dsr[1]))
+ {
+ dp.src = undefined;
+ }
+
+ // Remove tsr
if (dp.tsr) {
dp.tsr = undefined;
}
@@ -3020,7 +3033,7 @@
var domVisitor2 = new DOMTraverser();
domVisitor2.addHandler( 'meta', stripMarkerMetas.bind(null,
env.conf.parsoid.editMode) );
domVisitor2.addHandler( 'li', cleanUpLIHack.bind( null, env ) );
- domVisitor2.addHandler( null, saveDataParsoid );
+ domVisitor2.addHandler( null, saveDataParsoid.bind(null, this.options)
);
this.processors.push(domVisitor2.traverse.bind(domVisitor2));
}
diff --git a/js/lib/mediawiki.WikitextSerializer.js
b/js/lib/mediawiki.WikitextSerializer.js
index 9009171..c3120f4 100644
--- a/js/lib/mediawiki.WikitextSerializer.js
+++ b/js/lib/mediawiki.WikitextSerializer.js
@@ -3128,9 +3128,6 @@
if (/\bmw:(?:Transclusion\b|Param\b|Extension\/[^\s]+)/.test(typeOf)) {
return {
handle: function () {
- // In RT-testing mode, there will not be any
edits to tpls/extensions.
- // So, use original source to eliminate
spurious diffs showing up
- // in RT testing results.
var src, dataMW;
if
(/\bmw:(Transclusion\b|Param\b)/.test(typeOf)) {
dataMW =
JSON.parse(node.getAttribute("data-mw"));
@@ -3148,7 +3145,17 @@
console.error("ERROR: Should not have
come here!");
}
- if (src) {
+ // FIXME: Just adding this here temporarily
till we go in and
+ // clean this up and strip this out if we can
verify that data-mw
+ // is going to be present always when necessary
and indicate that
+ // a missing data-mw is either a parser bug or
a client error.
+ //
+ // Fallback: should be exercised only in
exceptional situations.
+ if (src === undefined && state.env.page.src &&
isValidDSR(dp.dsr)) {
+ src = state.getOrigSrc(dp.dsr[0],
dp.dsr[1]);
+ }
+
+ if (src !== undefined) {
self.emitWikitext(src, state, cb, node);
return
self.skipOverEncapsulatedContent(node);
} else {
diff --git a/js/tests/parserTests-blacklist.js
b/js/tests/parserTests-blacklist.js
index 9376d7d..14ea377 100644
--- a/js/tests/parserTests-blacklist.js
+++ b/js/tests/parserTests-blacklist.js
@@ -494,7 +494,6 @@
add("wt2html", "Language parser function");
add("wt2html", "Special parser function");
add("wt2html", "1. SOL-sensitive wikitext tokens as template-args");
-add("wt2html", "Ref: 9. unclosed comments should not leak out of ref-body");
add("wt2html", "Headings: 2. Outside heading nest on a single line
<h1>foo</h1>*bar");
add("wt2html", "Headings: 6a. Heading chars in SOL context (with trailing
spaces)");
add("wt2html", "Headings: 6b. Heading chars in SOL context (with trailing
newlines)");
--
To view, visit https://gerrit.wikimedia.org/r/75658
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: merged
Gerrit-Change-Id: I788b3099d306d0fe4d368af42a90999c7f21ace1
Gerrit-PatchSet: 2
Gerrit-Project: mediawiki/extensions/Parsoid
Gerrit-Branch: master
Gerrit-Owner: Subramanya Sastry <[email protected]>
Gerrit-Reviewer: Cscott <[email protected]>
Gerrit-Reviewer: Subramanya Sastry <[email protected]>
Gerrit-Reviewer: jenkins-bot
_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits