Arlolra has uploaded a new change for review.

  https://gerrit.wikimedia.org/r/280760

Change subject: Move dp.src handlers to their respective dom handlers
......................................................................

Move dp.src handlers to their respective dom handlers

 * That these cases still remain in the encapsulated content handler
   looks like a relic of 8939c692.

 * This permits better fallback behaviour when the reuse test fails.
   (see the entity encoding change in the blacklisted test).

 * The newly failing test is because the case now falls to the figure
   handler, which doesn't know about dp.src.  That seems acceptable
   though, since the output is the same as the html2wt test, where we've
   removed the dp.src.  Also, we're only adding the placeholder marking
   so the content is uneditable.  Presumably that means selser will take
   care of it.

Change-Id: I430ee902a80dda199683937eae6dca02d50c3606
---
M lib/html2wt/DOMHandlers.js
M tests/parserTests-blacklist.js
2 files changed, 127 insertions(+), 141 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/mediawiki/services/parsoid 
refs/changes/60/280760/1

diff --git a/lib/html2wt/DOMHandlers.js b/lib/html2wt/DOMHandlers.js
index e9edd17..ea153be 100644
--- a/lib/html2wt/DOMHandlers.js
+++ b/lib/html2wt/DOMHandlers.js
@@ -528,6 +528,21 @@
                !(/^mw:Includes\//.test(node.getAttribute('typeof'))));
 }
 
+// Uneditable forms wrapped with mw:Placeholder tags OR unedited nowikis
+function emitPlaceholderSrc(node, state) {
+       var dp = DU.getDataParsoid(node);
+       if (/<nowiki\s*\/>/.test(dp.src)) {
+               state.hasSelfClosingNowikis = true;
+       }
+       // FIXME: Should this also check for tabs and plain space
+       // chars interspersed with newlines?
+       if (dp.src.match(/^\n+$/)) {
+               state.setSep((state.sep.src || '') + dp.src);
+       } else {
+               state.serializer.emitWikitext(dp.src, node);
+       }
+}
+
 /**
  * A map of `domHandler`s keyed on nodeNames.
  *
@@ -964,6 +979,11 @@
                        var property = node.getAttribute('property');
                        var dp = DU.getDataParsoid(node);
 
+                       if (dp.src !== undefined &&
+                                       
/(^|\s)mw:Placeholder(\/\w*)?$/.test(type)) {
+                               return emitPlaceholderSrc(node, state);
+                       }
+
                        // Check for property before type so that page 
properties with
                        // templated attrs roundtrip properly.
                        // Ex: {{DEFAULTSORT:{{echo|foo}} }}
@@ -1065,9 +1085,14 @@
        },
        span: {
                handle: Promise.method(function(node, state, wrapperUnmodified) 
{
+                       var dp = DU.getDataParsoid(node);
                        var type = node.getAttribute('typeof');
+                       var contentSrc = node.textContent || node.innerHTML;
                        if (isRecognizedSpanWrapper(type)) {
                                if (type === 'mw:Nowiki') {
+                                       if (dp.src !== undefined && contentSrc 
=== dp.src) {
+                                               return emitPlaceholderSrc(node, 
state);
+                                       }
                                        state.emitChunk('<nowiki>', node);
                                        return 
Promise.reduce(Array.from(node.childNodes), function(_, child) {
                                                if (DU.isElt(child)) {
@@ -1094,7 +1119,9 @@
                                } else if (/(?:^|\s)mw\:Entity/.test(type) && 
node.childNodes.length === 1) {
                                        // handle a new mw:Entity (not handled 
by selser) by
                                        // serializing its children
-                                       if (DU.isText(node.firstChild)) {
+                                       if (dp.src !== undefined && contentSrc 
=== dp.srcContent) {
+                                               
state.serializer.emitWikitext(dp.src, node);
+                                       } else if (DU.isText(node.firstChild)) {
                                                state.emitChunk(
                                                        
Util.entityEncodeAll(node.firstChild.nodeValue),
                                                        node.firstChild);
@@ -1103,7 +1130,9 @@
                                                return 
state.serializeChildren(node);
                                        }
                                } else if 
(/(^|\s)mw:Placeholder(\/\w*)?/.test(type)) {
-                                       if (/(^|\s)mw:Placeholder(\s|$)/ &&
+                                       if (dp.src !== undefined) {
+                                               return emitPlaceholderSrc(node, 
state);
+                                       } else if (/(^|\s)mw:Placeholder(\s|$)/ 
&&
                                                node.childNodes.length === 1 &&
                                                DU.isText(node.firstChild) &&
                                                // See the DisplaySpace hack in 
the urltext rule
@@ -1119,7 +1148,6 @@
                                        }
                                }
                        } else {
-                               var dp = DU.getDataParsoid(node);
                                var kvs = 
DU.getAttributeKVArray(node).filter(function(kv) {
                                        return !/^data-parsoid/.test(kv.k) &&
                                                !(kv.k === 'id' && 
/^mw[\w-]{2,}$/.test(kv.v));
@@ -1324,155 +1352,112 @@
  * Function returning `domHandler`s for nodes with encapsulated content.
  */
 var _getEncapsulatedContentHandler = function(n) {
-       var self = this;
-       var env = this.env;
-       var dp = DU.getDataParsoid(n);
-       var typeOf, dataMw;
+       if (!DU.isFirstEncapsulationWrapperNode(n)) {
+               return null;
+       }
+       return {
+               handle: Promise.method(function(node, state, wrapperUnmodified) 
{
+                       var env = state.env;
+                       var self = state.serializer;
+                       var dp = DU.getDataParsoid(node);
+                       var dataMw = DU.getDataMw(node);
+                       var p;
+                       var typeOf = node.getAttribute('typeof') || '';
+                       if (/(?:^|\s)mw:Transclusion(?=$|\s)/.test(typeOf)) {
+                               if (dataMw.parts) {
+                                       p = self._buildTemplateWT(node, 
dataMw.parts);
+                               } else if (dp.src) {
+                                       env.log("error", "data-mw missing in: " 
+ node.outerHTML);
+                                       p = Promise.resolve(dp.src);
+                               } else {
+                                       throw new Error("Cannot serialize 
transclusion without data-mw.parts or data-parsoid.src.");
+                               }
+                       } else if (/(?:^|\s)mw:Param(?=$|\s)/.test(typeOf)) {
+                               if (dp.src) {
+                                       p = Promise.resolve(dp.src);
+                               } else {
+                                       throw new Error("No source for 
params.");
+                               }
+                       } else if (/(?:^|\s)mw:Extension\//.test(typeOf)) {
+                               if (!dataMw.name && !dp.src) {
+                                       // If there was no typeOf name, and no 
dp.src, try getting
+                                       // the name out of the mw:Extension 
type. This will
+                                       // generate an empty extension tag, but 
it's better than
+                                       // just an error.
+                                       var extGivenName = 
typeOf.replace(/(?:^|\s)mw:Extension\/([^\s]+)/, '$1');
+                                       if (extGivenName) {
+                                               env.log('error', 'no data-mw 
name for extension in: ', node.outerHTML);
+                                               dataMw.name = extGivenName;
+                                       }
+                               }
+                               if (dataMw.name) {
+                                       var nativeExt = 
env.conf.wiki.extensionTags.get(dataMw.name.toLowerCase());
+                                       if (nativeExt && 
nativeExt.serialHandler && nativeExt.serialHandler.handle) {
+                                               p = 
nativeExt.serialHandler.handle(node, state, wrapperUnmodified);
+                                       } else {
+                                               p = 
self.defaultExtensionHandler(node, state);
+                                       }
+                               } else if (dp.src) {
+                                       env.log('error', 'data-mw missing in: ' 
+ node.outerHTML);
+                                       p = Promise.resolve(dp.src);
+                               } else {
+                                       throw new Error('Cannot serialize 
extension without data-mw.name or data-parsoid.src.');
+                               }
+                       } else {
+                               throw new Error('Should never reach here');
+                       }
+                       return p.then(function(s) {
+                               state.singleLineContext.disable();
+                               self.emitWikitext(handleListPrefix(node, state) 
+ s, node);
+                               state.singleLineContext.pop();
+                               return DU.skipOverEncapsulatedContent(node);
+                       });
+               }),
+               sepnls: {
+                       // XXX: This is questionable, as the template can expand
+                       // to newlines too. Which default should we pick for new
+                       // content? We don't really want to make separator
+                       // newlines in HTML significant for the semantics of the
+                       // template content.
+                       before: function(node, otherNode, state) {
+                               var env = state.env;
+                               var typeOf = node.getAttribute('typeof') || '';
+                               var dataMw = DU.getDataMw(node);
+                               var dp = DU.getDataParsoid(node);
 
-       if (DU.isFirstEncapsulationWrapperNode(n)) {
-               return {
-                       handle: Promise.method(function(node, state, 
wrapperUnmodified) {
-                               var p;
-                               typeOf = node.getAttribute('typeof') || '';
-                               if 
(/(?:^|\s)mw:Transclusion(?=$|\s)/.test(typeOf)) {
-                                       dataMw = DU.getDataMw(node);
-                                       if (dataMw.parts) {
-                                               p = self._buildTemplateWT(node, 
dataMw.parts);
-                                       } else if (dp.src) {
-                                               env.log("error", "data-mw 
missing in: " + node.outerHTML);
-                                               p = Promise.resolve(dp.src);
-                                       } else {
-                                               throw new Error("Cannot 
serialize transclusion without data-mw.parts or data-parsoid.src.");
-                                       }
-                               } else if 
(/(?:^|\s)mw:Param(?=$|\s)/.test(typeOf)) {
-                                       if (dp.src) {
-                                               p = Promise.resolve(dp.src);
-                                       } else {
-                                               throw new Error("No source for 
params.");
-                                       }
-                               } else if 
(/(?:^|\s)mw:Extension\//.test(typeOf)) {
-                                       dataMw = DU.getDataMw(node);
-                                       if (!dataMw.name && !dp.src) {
-                                               // If there was no typeOf name, 
and no dp.src, try getting
-                                               // the name out of the 
mw:Extension type. This will
-                                               // generate an empty extension 
tag, but it's better than
-                                               // just an error.
-                                               var extGivenName = 
typeOf.replace(/(?:^|\s)mw:Extension\/([^\s]+)/, '$1');
-                                               if (extGivenName) {
-                                                       env.log('error', 'no 
data-mw name for extension in: ', node.outerHTML);
-                                                       dataMw.name = 
extGivenName;
-                                               }
-                                       }
+                               // Handle native extension constraints.
+                               if (/(?:^|\s)mw:Extension\//.test(typeOf) &&
+                                               // Only apply to plain 
extension tags.
+                                               
!/(?:^|\s)mw:Transclusion(?:\s|$)/.test(typeOf)) {
                                        if (dataMw.name) {
                                                var nativeExt = 
env.conf.wiki.extensionTags.get(dataMw.name.toLowerCase());
-                                               if (nativeExt && 
nativeExt.serialHandler && nativeExt.serialHandler.handle) {
-                                                       p = 
nativeExt.serialHandler.handle(node, state, wrapperUnmodified);
-                                               } else {
-                                                       p = 
self.defaultExtensionHandler(node, state);
+                                               if (nativeExt && 
nativeExt.serialHandler && nativeExt.serialHandler.before) {
+                                                       var ret = 
nativeExt.serialHandler.before(node, otherNode, state);
+                                                       if (ret !== null) { 
return ret; }
                                                }
-                                       } else if (dp.src) {
-                                               env.log('error', 'data-mw 
missing in: ' + node.outerHTML);
-                                               p = Promise.resolve(dp.src);
-                                       } else {
-                                               throw new Error('Cannot 
serialize extension without data-mw.name or data-parsoid.src.');
                                        }
-                               } else {
-                                       throw new Error('Should never reach 
here');
                                }
-                               return p.then(function(s) {
-                                       state.singleLineContext.disable();
-                                       
self.emitWikitext(handleListPrefix(node, state) + s, node);
-                                       state.singleLineContext.pop();
-                                       return 
DU.skipOverEncapsulatedContent(node);
-                               });
-                       }),
-                       sepnls: {
-                               // XXX: This is questionable, as the template 
can expand
-                               // to newlines too. Which default should we 
pick for new
-                               // content? We don't really want to make 
separator
-                               // newlines in HTML significant for the 
semantics of the
-                               // template content.
-                               before: function(node, otherNode, state) {
-                                       typeOf = node.getAttribute('typeof') || 
'';
 
-                                       // Handle native extension constraints.
-                                       if 
(/(?:^|\s)mw:Extension\//.test(typeOf) &&
-                                                       // Only apply to plain 
extension tags.
-                                                       
!/(?:^|\s)mw:Transclusion(?:\s|$)/.test(typeOf)) {
-                                               dataMw = DU.getDataMw(node);
-                                               if (dataMw.name) {
-                                                       var nativeExt = 
env.conf.wiki.extensionTags.get(dataMw.name.toLowerCase());
-                                                       if (nativeExt && 
nativeExt.serialHandler && nativeExt.serialHandler.before) {
-                                                               var ret = 
nativeExt.serialHandler.before(node, otherNode, state);
-                                                               if (ret !== 
null) { return ret; }
-                                                       }
-                                               }
+                               // If this content came from a 
multi-part-template-block
+                               // use the first node in that block for 
determining
+                               // newline constraints.
+                               if (dp.firstWikitextNode) {
+                                       var nodeName = 
dp.firstWikitextNode.toLowerCase();
+                                       var h = tagHandlers.get(nodeName);
+                                       if (!h && dp.stx === 'html' && nodeName 
!== 'a') {
+                                               h = htmlElementHandler;
                                        }
-
-                                       // If this content came from a 
multi-part-template-block
-                                       // use the first node in that block for 
determining
-                                       // newline constraints.
-                                       var nodeDP = DU.getDataParsoid(node);
-                                       if (nodeDP.firstWikitextNode) {
-                                               var nodeName = 
nodeDP.firstWikitextNode.toLowerCase();
-                                               var h = 
tagHandlers.get(nodeName);
-                                               if (!h && nodeDP.stx === 'html' 
&& nodeName !== 'a') {
-                                                       h = htmlElementHandler;
-                                               }
-                                               if (h && h.sepnls && 
h.sepnls.before) {
-                                                       return 
h.sepnls.before(node, otherNode, state);
-                                               }
+                                       if (h && h.sepnls && h.sepnls.before) {
+                                               return h.sepnls.before(node, 
otherNode, state);
                                        }
+                               }
 
-                                       // default behavior
-                                       return { min: 0, max: 2 };
-                               },
+                               // default behavior
+                               return { min: 0, max: 2 };
                        },
-               };
-       }
-
-       if (dp.src !== undefined) {
-               // Uneditable forms wrapped with mw:Placeholder tags
-               // OR unedited nowikis
-               typeOf = n.getAttribute('typeof') || '';
-               if (/(^|\s)mw:Placeholder(\/\w*)?$/.test(typeOf) ||
-                               (typeOf === "mw:Nowiki" && n.textContent === 
dp.src)) {
-                       // implement generic src round-tripping:
-                       // return src, and drop the generated content
-                       return {
-                               handle: Promise.method(function(node, state, 
wrapperUnmodified) {
-                                       if (/<nowiki\s*\/>/.test(dp.src)) {
-                                               state.hasSelfClosingNowikis = 
true;
-                                       }
-                                       // FIXME: Should this also check for 
tabs and plain space
-                                       // chars interspersed with newlines?
-                                       if (dp.src.match(/^\n+$/)) {
-                                               state.setSep((state.sep.src || 
'') + dp.src);
-                                       } else {
-                                               self.emitWikitext(dp.src, node);
-                                       }
-                               }),
-                       };
-               }
-
-               // Entities
-               if (typeOf === "mw:Entity" && n.childNodes.length === 1) {
-                       var contentSrc = n.textContent || n.innerHTML;
-                       return {
-                               handle: Promise.method(function(node, state, 
wrapperUnmodified) {
-                                       if (contentSrc === dp.srcContent) {
-                                               self.emitWikitext(dp.src, node);
-                                       } else {
-                                               self.emitWikitext(contentSrc, 
node);
-                                       }
-                               }),
-                       };
-               }
-       }
-
-       return null;
+               },
+       };
 };
-
 
 /**
  * Just the handle for the htmlElementHandler defined below.
diff --git a/tests/parserTests-blacklist.js b/tests/parserTests-blacklist.js
index f6d1503..b4d8ac4 100644
--- a/tests/parserTests-blacklist.js
+++ b/tests/parserTests-blacklist.js
@@ -336,6 +336,7 @@
 add("wt2wt", "Templates: HTML Tag: 9. Multiple template-generated attributes", 
"<div {{echo|1=id=\"v1\" title=\"foo\"}} title=\"foo\">bar</div>");
 add("wt2wt", "Templates: HTML Tables: 5. Proper fostering of categories from 
inside", "[[Category:foo1]]\n<table><tr><td>foo</td></tr></table>\n<!--Two 
categories (Bug 
50330)-->\n[[Category:bar1]]\n[[Category:bar2]]\n<table><tr><td>foo</td></tr></table>");
 add("wt2wt", "Allow empty links in image captions (Bug 60753)", 
"[[File:Foobar.jpg|thumb|Caption 
[[Link1]]\n<nowiki>[[]]</nowiki>\n[[Link2]]\n]]");
+add("wt2wt", "Image with multiple attributes from the same template", 
"[[File:Foobar.jpg|right|Caption text]]");
 add("wt2wt", "Image with multiple widths -- use last", 
"[[File:Foobar.jpg|300px|caption]]\n");
 add("wt2wt", "Render invalid page names as plain text (bug 51090)", 
"[[./../foo|bar]]\n[[foo�|bar]]\n[[foo/.|bar]]\n[[foo/..|bar]]\n<nowiki>[[foo~~~bar]]</nowiki>\n[[foo>bar]]\n[[foo[bar]]\n[[.]]\n[[..]]\n[[foo././bar]]\n\n[[{{echo|./../foo}}<nowiki>|bar]]</nowiki>\n[[{{echo|foo/.}}<nowiki>|bar]]</nowiki>\n[[{{echo|foo/..}}<nowiki>|bar]]</nowiki>\n[[{{echo|foo~~~~bar}}<nowiki>]]</nowiki>\n[[{{echo|foo>bar}}<nowiki>]]</nowiki>\n[[{{echo|foo././bar}}<nowiki>]]</nowiki>\n[[{{echo|foo{bar}}<nowiki>]]</nowiki>\n[[{{echo|foo}bar}}<nowiki>]]</nowiki>\n[[{{echo|foo[bar}}<nowiki>]]</nowiki>\n[[{{echo|foo]bar}}<nowiki>]]</nowiki>\n[[{{echo|foo<bar}}<nowiki>]]</nowiki>\n");
 add("wt2wt", "Handling of sections up to level 6 and beyond", "= Level 1 
Heading=\n== Level 2 Heading==\n=== Level 3 Heading===\n==== Level 4 
Heading====\n===== Level 5 Heading=====\n====== Level 6 
Heading======\n======<nowiki>= Level 7 
Heading=</nowiki>======\n======<nowiki>== Level 8 
Heading==</nowiki>======\n======<nowiki>=== Level 9 
Heading===</nowiki>======\n======<nowiki>==== Level 10 
Heading====</nowiki>======\n");
@@ -361,7 +362,7 @@
 add("wt2wt", "Fuzz testing: Parser24", "{{{|\n<u class=\"|\" 
{{{{SSSll!!!!!!!VVVV)]]][[Special:*xxxxxxx--><noinclude>}}}}>\n<br 
style=\"onmouseover='alert(document.cookie);' \" />\n\nMOVE YOUR MOUSE CURSOR 
OVER THIS TEXT\n{|\n\n|\n|}");
 add("wt2wt", "Inline wiki vs wiki block nesting", "'''Bold paragraph'''\n\nNew 
wiki paragraph\n");
 add("wt2wt", "Mixing markup for italics and bold", 
"'<nowiki/>''bold'<nowiki/>'''''bold''bolditalics'''''\n");
-add("wt2wt", "Illegal character references (T106578)", "; Null: &#00;\n; FF: 
&#xC;\n; CR: \n; Control (low): &#8;\n; Control (high): &#x7F; &#x9F;\n; 
Surrogate: &#xD83D;&#xDCA9;\n; This is an okay astral character: &#x1F4A9;");
+add("wt2wt", "Illegal character references (T106578)", "; Null: &#00;\n; FF: 
&#xC;\n; CR: &#x0A;\n; Control (low): &#8;\n; Control (high): &#x7F; &#x9F;\n; 
Surrogate: &#xD83D;&#xDCA9;\n; This is an okay astral character: &#x1F4A9;");
 add("wt2wt", "Image with page parameter", "[[File:LoremIpsum.djvu]]\n");
 add("wt2wt", "Don't fall for the self-closing div", "<div>hello world</div>");
 add("wt2wt", "Parsing of overlapping (improperly nested) inline html tags", 
"<span><s>x</span>\n");

-- 
To view, visit https://gerrit.wikimedia.org/r/280760
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: I430ee902a80dda199683937eae6dca02d50c3606
Gerrit-PatchSet: 1
Gerrit-Project: mediawiki/services/parsoid
Gerrit-Branch: master
Gerrit-Owner: Arlolra <[email protected]>

_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to