Subramanya Sastry has uploaded a new change for review.
https://gerrit.wikimedia.org/r/75658
Change subject: Strip data-parsoid.src from tpls/extension nodes when
unnecessary
......................................................................
Strip data-parsoid.src from tpls/extension nodes when unnecessary
* Since templates and extensions in the regular serializer are
serialized from data-mw, dp.src is never used except when data-mw
is missing (which it should not be if clients are doing their
job correctly). This should reduce HTML size a bit.
* Reduces HTML size for the BO page by 13%.
* Unsure why a ref test is now passing wt2html.
Change-Id: I788b3099d306d0fe4d368af42a90999c7f21ace1
---
M js/lib/mediawiki.DOMPostProcessor.js
M js/lib/mediawiki.WikitextSerializer.js
M js/tests/parserTests-blacklist.js
3 files changed, 38 insertions(+), 18 deletions(-)
git pull ssh://gerrit.wikimedia.org:29418/mediawiki/extensions/Parsoid
refs/changes/58/75658/1
diff --git a/js/lib/mediawiki.DOMPostProcessor.js
b/js/lib/mediawiki.DOMPostProcessor.js
index f569fe4..585d22f 100644
--- a/js/lib/mediawiki.DOMPostProcessor.js
+++ b/js/lib/mediawiki.DOMPostProcessor.js
@@ -2353,12 +2353,12 @@
var saveDataParsoid; // forward declaration
-function dumpDomWithDataAttribs( root ) {
+function dumpDomWithDataAttribs( options, root ) {
function cloneData(node, clone) {
var d = node.data;
if (d && d.constructor === Object &&
(Object.keys(d.parsoid).length > 0)) {
clone.data = Util.clone(d);
- saveDataParsoid( clone, true );
+ saveDataParsoid( options, clone, true );
}
node = node.firstChild;
@@ -2384,7 +2384,7 @@
if (psd.debug || (psd.dumpFlags &&
(psd.dumpFlags.indexOf("dom:pre-dsr") !== -1))) {
console.warn("------ DOM: pre-DSR -------");
- dumpDomWithDataAttribs( root );
+ dumpDomWithDataAttribs( options, root );
console.warn("----------------------------");
}
@@ -2402,7 +2402,7 @@
if (psd.debug || (psd.dumpFlags &&
(psd.dumpFlags.indexOf("dom:post-dsr") !== -1))) {
console.warn("------ DOM: post-DSR -------");
- dumpDomWithDataAttribs( root );
+ dumpDomWithDataAttribs( options, root );
console.warn("----------------------------");
}
}
@@ -2412,13 +2412,13 @@
* spans and adding RDFa attributes to all subtree roots according to
* http://www.mediawiki.org/wiki/Parsoid/RDFa_vocabulary#Template_content
*/
-function encapsulateTemplateOutput( document, env ) {
+function encapsulateTemplateOutput( document, env, options ) {
var tpls = {};
var psd = env.conf.parsoid;
if (psd.debug || (psd.dumpFlags &&
(psd.dumpFlags.indexOf("dom:pre-encap") !== -1))) {
console.warn("------ DOM: pre-encapsulation -------");
- dumpDomWithDataAttribs( document );
+ dumpDomWithDataAttribs( options, document );
console.warn("----------------------------");
}
@@ -2430,7 +2430,7 @@
if (psd.debug || (psd.dumpFlags &&
(psd.dumpFlags.indexOf("dom:post-encap") !== -1))) {
console.warn("------ DOM: post-encapsulation -------");
- dumpDomWithDataAttribs( document );
+ dumpDomWithDataAttribs( options, document );
console.warn("----------------------------");
}
}
@@ -2852,14 +2852,28 @@
*
* Save the data-parsoid attributes on each node.
*/
-saveDataParsoid = function( node, debugDump ) {
+saveDataParsoid = function( options, node, debugDump ) {
if ( node.nodeType === node.ELEMENT_NODE && node.data ) {
if (!debugDump) {
- if (node.data.parsoid) {
- node.data.parsoid.tagId = undefined;
- if (node.data.parsoid.tsr) {
- node.data.parsoid.tsr = undefined;
+ var dp = node.data.parsoid;
+ if (dp) {
+ dp.tagId = undefined;
+ // Remove data-parsoid.src from templates and
extensions that have
+ // valid data-mw and dsr. This should reduce
data-parsoid bloat
+ //
+ // Transcluded nodes will not have dp.tsr set
and dont need dp.src either
+ if
(/\bmw:(Transclusion|Extension)\b/.test(node.getAttribute("typeof")) &&
+ (!dp.tsr ||
+ node.getAttribute("data-mw") && dp.dsr
&& dp.dsr[0] && dp.dsr[1]))
+ {
+ dp.src = undefined;
}
+
+ // Remove tsr
+ if (dp.tsr) {
+ dp.tsr = undefined;
+ }
+
}
}
DU.saveDataAttribs( node );
@@ -2922,7 +2936,7 @@
var domVisitor2 = new DOMTraverser();
domVisitor2.addHandler( 'meta', stripMarkerMetas.bind(null,
env.conf.parsoid.editMode) );
domVisitor2.addHandler( 'li', cleanUpLIHack.bind( null, env ) );
- domVisitor2.addHandler( null, saveDataParsoid );
+ domVisitor2.addHandler( null, saveDataParsoid.bind(null, this.options)
);
this.processors.push(domVisitor2.traverse.bind(domVisitor2));
}
diff --git a/js/lib/mediawiki.WikitextSerializer.js
b/js/lib/mediawiki.WikitextSerializer.js
index f78e751..60f4993 100644
--- a/js/lib/mediawiki.WikitextSerializer.js
+++ b/js/lib/mediawiki.WikitextSerializer.js
@@ -3114,9 +3114,6 @@
if (/\bmw:(?:Transclusion\b|Param\b|Extension\/[^\s]+)/.test(typeOf)) {
return {
handle: function () {
- // In RT-testing mode, there will not be any
edits to tpls/extensions.
- // So, use original source to eliminate
spurious diffs showing up
- // in RT testing results.
var src, dataMW;
if
(/\bmw:(Transclusion\b|Param\b)/.test(typeOf)) {
dataMW =
JSON.parse(node.getAttribute("data-mw"));
@@ -3134,7 +3131,17 @@
console.error("ERROR: Should not have
come here!");
}
- if (src) {
+ // FIXME: Just adding this here temporarily
till we go in and
+ // clean this up and strip this out if we can
verify that data-mw
+ // is going to be present always when necessary
and indicate that
+ // a missing data-mw is either a parser bug or
a client error.
+ //
+ // Fallback: should be exercised only in
exceptional situations.
+ if (src === undefined && state.env.page.src &&
isValidDSR(dp.dsr)) {
+ src = state.getOrigSrc(dp.dsr[0],
dp.dsr[1]);
+ }
+
+ if (src !== undefined) {
self.emitWikitext(src, state, cb, node);
return
self.skipOverEncapsulatedContent(node);
} else {
diff --git a/js/tests/parserTests-blacklist.js
b/js/tests/parserTests-blacklist.js
index 735269d..1cbc9b7 100644
--- a/js/tests/parserTests-blacklist.js
+++ b/js/tests/parserTests-blacklist.js
@@ -494,7 +494,6 @@
add("wt2html", "Language parser function");
add("wt2html", "Special parser function");
add("wt2html", "1. SOL-sensitive wikitext tokens as template-args");
-add("wt2html", "Ref: 9. unclosed comments should not leak out of ref-body");
add("wt2html", "Headings: 2. Outside heading nest on a single line
<h1>foo</h1>*bar");
add("wt2html", "Headings: 6a. Heading chars in SOL context (with trailing
spaces)");
add("wt2html", "Headings: 6b. Heading chars in SOL context (with trailing
newlines)");
--
To view, visit https://gerrit.wikimedia.org/r/75658
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: newchange
Gerrit-Change-Id: I788b3099d306d0fe4d368af42a90999c7f21ace1
Gerrit-PatchSet: 1
Gerrit-Project: mediawiki/extensions/Parsoid
Gerrit-Branch: master
Gerrit-Owner: Subramanya Sastry <[email protected]>
_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits