Arlolra has uploaded a new change for review.

  https://gerrit.wikimedia.org/r/201087

Change subject: Combine the cleanup and template cleanup passes
......................................................................

Combine the cleanup and template cleanup passes

 * The template cleanup pass wasn't having a affect on v2 output.

 * The blacklist changes are a little unclear. Not if it was a bug to
   stop processing the entire template when encountering a
   mw-reference-text class, or was the intention just that node, or all
   its children? Hopefully review can shed some light.

Change-Id: I4c6452e49563cf3923a5bb69485f1ea7d0c13e33
---
D lib/dom.cleanUpTemplates.js
M lib/dom.cleanup.js
M lib/dom.linter.js
M lib/mediawiki.DOMPostProcessor.js
M lib/mediawiki.DOMUtils.js
M tests/parserTests-blacklist.js
6 files changed, 223 insertions(+), 256 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/mediawiki/services/parsoid 
refs/changes/87/201087/1

diff --git a/lib/dom.cleanUpTemplates.js b/lib/dom.cleanUpTemplates.js
deleted file mode 100644
index e6d372d..0000000
--- a/lib/dom.cleanUpTemplates.js
+++ /dev/null
@@ -1,44 +0,0 @@
-"use strict";
-
-var DU = require('./mediawiki.DOMUtils.js').DOMUtils;
-
-function stripEmptyElements(node, tplInfo, options) {
-       // Cannot delete if:
-       // * it is the first node since that carries the transclusion
-       //   information (typeof, data-mw). We could delete and migrate
-       //   the info over, but more pain than worth it. We can reconsider if
-       //   this ever becomes an issue.
-       // * it has any attributes.
-       if (!node.firstChild && node !== tplInfo.first &&
-               node.nodeName in {'TR':1, 'LI':1} && node.attributes.length === 
0)
-       {
-               DU.deleteNode(node);
-       }
-}
-
-function removeDataParsoid(node, tplInfo, options) {
-       if (node !== tplInfo.first) {
-               var dp = DU.getDataParsoid(node);
-               // We can't remove data-parsoid from inside <references> text, 
as that's
-               // the only HTML representation we have left for it.
-               if (node.getAttribute('class') === "mw-reference-text") {
-                       tplInfo.done = true;
-                       return;
-               }
-               // TODO: We can't remove dp from nodes with stx information
-               // right now, as the serializer needs that information to know 
which
-               // content model the text came from to emit the right newline 
separators.
-               // For example, both "a\n\nb" and "<p>a</p><p>b/p>" both 
generate
-               // identical html but serialize to different wikitext.
-               if (!dp.stx) {
-                       node.removeAttribute('data-parsoid');
-               }
-       }
-}
-
-if (typeof module === "object") {
-       module.exports.stripEmptyElements =
-               DU.traverseTplOrExtNodes.bind(DU, stripEmptyElements);
-       module.exports.removeDataParsoid =
-               DU.traverseTplOrExtNodes.bind(DU, removeDataParsoid);
-}
diff --git a/lib/dom.cleanup.js b/lib/dom.cleanup.js
index 38f35fe..5d5bc4c 100644
--- a/lib/dom.cleanup.js
+++ b/lib/dom.cleanup.js
@@ -33,74 +33,115 @@
        }
 }
 
-/**
- * Perform some final cleaup and save data-parsoid attributes on each node.
- */
-function cleanupAndSaveDataParsoid( env, node, atTopLevel ) {
-       if ( DU.isElt(node) ) {
-               var dp = DU.getDataParsoid( node );
-               if (dp) {
-                       // Delete empty auto-inserted elements
-                       var next = node.nextSibling;
-                       if (dp.autoInsertedStart && dp.autoInsertedEnd &&
-                               !DU.isTplOrExtToplevelNode(node) &&
-                               (node.childNodes.length === 0 ||
-                               node.childNodes.length === 1 && 
!DU.isElt(node.firstChild) && /^\s*$/.test(node.textContent)))
-                       {
-                               if (node.firstChild) {
-                                       // migrate the ws out
-                                       
node.parentNode.insertBefore(node.firstChild, node);
-                               }
-                               DU.deleteNode(node);
-                               return next;
-                       }
-
-                       dp.tagId = undefined;
-
-                       var validDataMW = 
!!Object.keys(DU.getDataMw(node)).length;
-                       if ( !validDataMW ) {
-                               // strip it
-                               DU.setDataMw(node, undefined);
-                       }
-
-                       // Remove dp.src from elements that have valid data-mw 
and dsr. This
-                       // should reduce data-parsoid bloat.
-                       if (validDataMW && Util.isValidDSR(dp.dsr)) {
-                               dp.src = undefined;
-                       } else if 
(/(?:^|\s)mw:(Transclusion|Extension)(\/[^\s]+)*(?=$|\s)/.test(node.getAttribute("typeof"))
 &&
-                               (!atTopLevel || !dp.tsr)) {
-                               // Transcluded nodes will not have dp.tsr set 
and dont need dp.src either
-                               dp.src = undefined;
-                       }
-
-                       // Remove tsr
-                       if (dp.tsr) {
-                               dp.tsr = undefined;
-                       }
-
-                       // Remove temporary information
-                       dp.tmp = undefined;
-
-                       // Make dsr zero-range for fostered content
-                       // to prevent selser from duplicating this content
-                       // outside the table from where this came.
-                       //
-                       // But, do not zero it out if the node has template 
encapsulation
-                       // information.  That will be disastrous (see bug 
52638, 52488).
-                       if (dp.fostered && dp.dsr && 
!DU.isFirstEncapsulationWrapperNode(node)) {
-                               dp.dsr[0] = dp.dsr[1];
-                       }
-
-                       if ( atTopLevel && env.storeDataParsoid ) {
-                               DU.stripDataParsoid( env, node, dp );
-                       }
-               }
-               DU.saveDataAttribs( node );
+function stripEmptyElements( node, env, atTopLevel, tplInfo ) {
+       if ( !atTopLevel || !tplInfo || !DU.isElt( node ) ) {
+               return;
        }
-       return true;
+       // Cannot delete if:
+       // * it is the first node since that carries the transclusion
+       //   information (typeof, data-mw). We could delete and migrate
+       //   the info over, but more pain than worth it. We can reconsider if
+       //   this ever becomes an issue.
+       // * it has any attributes.
+       if ( !node.firstChild && node !== tplInfo.first &&
+               node.nodeName in {'TR':1, 'LI':1} && node.attributes.length === 0
+       ) {
+               DU.deleteNode(node);
+       }
 }
 
-if (typeof module === "object") {
-       module.exports.cleanupAndSaveDataParsoid = cleanupAndSaveDataParsoid;
+/**
+ * Perform some final cleanup and save data-parsoid attributes on each node.
+ */
+function cleanupAndSaveDataParsoid( node, env, atTopLevel, tplInfo ) {
+       if ( !DU.isElt( node ) ) { return; }
+
+       var dp = DU.getDataParsoid( node );
+
+       // Delete empty auto-inserted elements
+       if ( dp.autoInsertedStart && dp.autoInsertedEnd &&
+               !DU.isTplOrExtToplevelNode( node ) && (
+                       node.childNodes.length === 0 || (
+                               node.childNodes.length === 1 && !DU.isElt( 
node.firstChild ) &&
+                               /^\s*$/.test(node.textContent)
+                       )
+       ) ) {
+               var next = node.nextSibling;
+               if ( node.firstChild ) {
+                       // migrate the ws out
+                       node.parentNode.insertBefore( node.firstChild, node );
+               }
+               DU.deleteNode( node );
+               return next;
+       }
+
+       var validDataMW = !!Object.keys( DU.getDataMw( node ) ).length;
+       if ( !validDataMW ) {
+               // Strip it
+               DU.setDataMw( node, undefined );
+       }
+
+       // Remove dp.src from elements that have valid data-mw and dsr. This
+       // should reduce data-parsoid bloat.
+       if ( validDataMW && Util.isValidDSR(dp.dsr) ) {
+               dp.src = undefined;
+       } else if ( tplInfo && tplInfo.first === node && (!atTopLevel || 
!dp.tsr) ) {
+               // Transcluded nodes will not have dp.tsr set
+               // and don't need dp.src either.
+               dp.src = undefined;
+       }
+
+       // Remove tsr
+       if ( dp.tsr ) {
+               dp.tsr = undefined;
+       }
+
+       // Remove temporary information
+       dp.tmp = undefined;
+       dp.tagId = undefined;
+
+       // Make dsr zero-range for fostered content
+       // to prevent selser from duplicating this content
+       // outside the table from where this came.
+       //
+       // But, do not zero it out if the node has template encapsulation
+       // information.  That will be disastrous (see bug 52638, 52488).
+       if ( dp.fostered && dp.dsr && !DU.isFirstEncapsulationWrapperNode(node) 
) {
+               dp.dsr[0] = dp.dsr[1];
+       }
+
+       if ( atTopLevel ) {
+               // Strip data-parsoid from templated content, where not 
necessary.
+               if ( tplInfo &&
+                       // Keep for the first node.
+                       tplInfo.first !== node &&
+                       // We can't remove data-parsoid from inside 
<references> text,
+                       // as that's the only HTML representation we have left 
for it.
+                       node.getAttribute('class') !== "mw-reference-text" &&
+                       // TODO: We can't remove dp from nodes with stx 
information
+                       // right now, as the serializer needs that information 
to know
+                       // which content model the text came from to emit the 
right
+                       // newline separators. For example, both "a\n\nb" and
+                       // "<p>a</p><p>b/p>" both generate identical html but 
serialize
+                       // to different wikitext.
+                       !dp.stx
+                ) {
+                       DU.getNodeData( node ).parsoid = undefined;
+                       node.removeAttribute( "data-parsoid" );
+               }
+               // Store for v2 purposes.
+               else if ( env.storeDataParsoid ) {
+                       DU.stripDataParsoid( env, node, dp );
+               }
+       }
+
+       DU.saveDataAttribs( node );
+}
+
+if ( typeof module === "object" ) {
+       module.exports.cleanupAndSaveDataParsoid =
+               DU.traverseWithTplOrExtInfo.bind( DU, cleanupAndSaveDataParsoid 
);
+       module.exports.stripEmptyElements =
+               DU.traverseWithTplOrExtInfo.bind(DU, stripEmptyElements);
        module.exports.stripMarkerMetas = stripMarkerMetas;
 }
diff --git a/lib/dom.linter.js b/lib/dom.linter.js
index 96d0470..276c16d 100644
--- a/lib/dom.linter.js
+++ b/lib/dom.linter.js
@@ -28,11 +28,10 @@
 * 
https://www.mediawiki.org/wiki/Parsoid/MediaWiki_DOM_spec#Transclusion_content
 */
 
-function logTransclusions(env, c) {
-
-       var dmw = DU.getDataMw(c);
+function logTransclusions( env, node, dp, tplInfo ) {
+       var dmw = DU.getDataMw(node);
        if ( dmw ) {
-               var dsr = DU.getDataParsoid(c).dsr;
+               var dsr = tplInfo.dsr;
                if (dmw.parts) {
                        var parts = dmw.parts, lintObj;
                        if (typeof parts[0] === 'string' || typeof 
parts[parts.length - 1] === 'string') {
@@ -53,7 +52,6 @@
                        }
                }
        }
-       return;
 }
 
 /*
@@ -64,14 +62,14 @@
 * 2. Unclosed Start Tags
 * 3. Stripped Tags
 */
-function logTreeBuilderFixup(env, c, dp, tmpl) {
+function logTreeBuilderFixup(env, c, dp, tplInfo) {
 
        var cNodeName = c.nodeName.toLowerCase(),
                dsr = dp.dsr,
                lintObj, inTransclusion = false;
 
-       if (tmpl) {
-               dsr = tmpl.dsr;
+       if (tplInfo) {
+               dsr = tplInfo.dsr;
                inTransclusion = true;
        }
 
@@ -94,13 +92,13 @@
                DU.hasLiteralHTMLMarker(dp) &&
                dsr) ) {
 
-               if (dp.autoInsertedEnd === true && (tmpl || dsr[2]>0) ) {
+               if (dp.autoInsertedEnd === true && (tplInfo || dsr[2]>0) ) {
                        lintObj = { src:env.page.src, dsr:dsr,
                                                tip:'Add End Tag to Fix this', 
inTransclusion:inTransclusion};
                        env.log('lint/missing-end-tag', lintObj);
                }
 
-               if (dp.autoInsertedStart === true && (tmpl ||  dsr[3]>0) ) {
+               if (dp.autoInsertedStart === true && (tplInfo ||  dsr[3]>0) ) {
                        lintObj = { src:env.page.src, dsr:dsr,
                                                tip:'Add Start Tag to Fix 
this', inTransclusion:inTransclusion};
                        env.log('lint/missing-start-tag', lintObj);
@@ -119,7 +117,7 @@
 *
 * Here foo gets Ignored and is found in the data-parsoid of <tr> tags.
 */
-function logIgnoredTableAttr(env, c, dp, tmpl) {
+function logIgnoredTableAttr(env, c, dp, tplInfo) {
        var dsr, inTransclusion = false;
        if (DU.hasNodeName(c, "table")) {
                var fc = c.firstChild;
@@ -141,8 +139,8 @@
                                                        }
 
                                                        if (wc) {
-                                                               if ( tmpl ) {
-                                                                  dsr = 
tmpl.dsr;
+                                                               if ( tplInfo ) {
+                                                                  dsr = 
tplInfo.dsr;
                                                                   
inTransclusion = true;
                                                                } else {
                                                                   dsr = dp.dsr;
@@ -172,24 +170,24 @@
 *
 * Here 'foo' gets fostered out.
 */
-function logFosteredContent(env, c, dp, tmpl, nextSibling){
-
-       var dsr, inTransclusion = false;
-       var fosteredSRC = c.innerHTML;
-       while (nextSibling && !DU.hasNodeName(nextSibling,'table')) {
+function logFosteredContent( env, node, dp, tplInfo ) {
+       var fosteredSRC = node.innerHTML,
+               nextSibling = node.nextSibling;
+       while ( nextSibling && !DU.hasNodeName(nextSibling, 'table') ) {
                fosteredSRC += nextSibling.innerHTML;
-               if ( nextSibling === tmpl.last ) {
-                       tmpl.done = true;
+               if ( tplInfo && nextSibling === tplInfo.last ) {
+                       tplInfo.clear = true;
                }
                nextSibling = nextSibling.nextSibling;
        }
-       if (tmpl) {
-               dsr = tmpl.dsr;
+       var dsr, inTransclusion = false;
+       if ( tplInfo ) {
+               dsr = tplInfo.dsr;
                inTransclusion = true;
        } else {
                dsr = DU.getDataParsoid(nextSibling).dsr;
        }
-       var lintObj = { src:fosteredSRC, dsr:dsr, inTransclusion:inTransclusion 
};
+       var lintObj = { src: fosteredSRC, dsr: dsr, inTransclusion: 
inTransclusion };
        env.log('lint/fostered', lintObj);
        return nextSibling;
 }
@@ -200,13 +198,13 @@
 * See - http://www.w3.org/TR/html5/obsolete.html#non-conforming-features
 *
 */
-function logObsoleteHTMLTags(env, c, dp, tmpl) {
+function logObsoleteHTMLTags(env, c, dp, tplInfo) {
 
        var dsr = dp.dsr, inTransclusion = false;
        var re = /^(BIG|CENTER|FONT|STRIKE|TT)$/;
 
-       if (tmpl) {
-               dsr = tmpl.dsr;
+       if (tplInfo) {
+               dsr = tplInfo.dsr;
                inTransclusion = true;
        }
 
@@ -222,15 +220,15 @@
 * See -         https://www.mediawiki.org/wiki/Help:Images#Syntax
 *
 */
-function logBogusImageOptions(env, c, dp, tmpl) {
+function logBogusImageOptions(env, c, dp, tplInfo) {
 
        if(DU.isGeneratedFigure(c)) {
                var optlist = dp.optList;
                optlist.forEach(function (item) {
                        if (item.ck === "bogus") {
                                var dsr, inTransclusion = false;
-                               if ( tmpl ) {
-                                       dsr = tmpl.dsr;
+                               if ( tplInfo ) {
+                                       dsr = tplInfo.dsr;
                                        inTransclusion = true;
                                } else {
                                        dsr = dp.dsr;
@@ -242,65 +240,41 @@
        }
 }
 
-function logWikitextFixups(node, env, options, atTopLevel, tmpl) {
+function logWikitextFixups( node, env, atTopLevel, tplInfo ) {
        // For now, don't run linter in subpipelines.
        // Only on the final DOM for the top-level page.
-       if (!atTopLevel) {
+       if ( !atTopLevel || !DU.isElt( node ) ) {
                return;
        }
 
-       var c = node.firstChild;
+       var dp = DU.getDataParsoid( node );
 
-       while (c) {
-               var nextSibling = c.nextSibling,
-                       dp = DU.getDataParsoid(c);
-
-               // Store info from the first node of an about id group.
-               // Nested templates aren't an issue because we expand top-level
-               // templates with the mediawiki api.
-               if ( !tmpl && DU.isTplOrExtToplevelNode(c) ) {
-                       var about = c.getAttribute('about');
-                       tmpl = {
-                               last: DU.getAboutSiblings(c, about).last(),
-                               dsr: dp.dsr,
-                               done: false
-                       };
-
-                       // Log transclusions with more than one part
-                       logTransclusions(env, c);
-               }
-
-               if (DU.isElt(c)) {
-
-                       // Log Tree Builder fixups
-                       logTreeBuilderFixup(env, c, dp, tmpl);
-
-                       // Log Ignored Table Attributes
-                       logIgnoredTableAttr(env, c, dp, tmpl);
-
-                       // Log obsolete HTML tags
-                       logObsoleteHTMLTags(env, c, dp, tmpl);
-
-                       // Log bogus image options
-                       logBogusImageOptions(env, c, dp, tmpl);
-
-                       if (dp.fostered) {
-                               // Log Fostered content
-                               nextSibling = logFosteredContent(env, c, dp, 
tmpl, nextSibling);
-                       } else if (c.childNodes.length > 0) {
-                               // Process subtree
-                               logWikitextFixups(c, env, options, atTopLevel, 
tmpl);
-                       }
-               }
-
-               if ( tmpl && (c === tmpl.last || tmpl.done) ) {
-                       tmpl = null;
-               }
-
-               c = nextSibling;
+       if ( tplInfo && tplInfo.first === node ) {
+               // Log transclusions with more than one part
+               logTransclusions( env, node, dp, tplInfo );
        }
+
+       // Log Tree Builder fixups
+       logTreeBuilderFixup( env, node, dp, tplInfo );
+
+       // Log Ignored Table Attributes
+       logIgnoredTableAttr( env, node, dp, tplInfo );
+
+       // Log obsolete HTML tags
+       logObsoleteHTMLTags( env, node, dp, tplInfo );
+
+       // Log bogus image options
+       logBogusImageOptions( env, node, dp, tplInfo );
+
+       var next;
+       if ( dp.fostered ) {
+               // Log Fostered content
+               next = logFosteredContent( env, node, dp, tplInfo, next );
+       }
+       return next;
 }
 
 if (typeof module === "object") {
-       module.exports.logWikitextFixups = logWikitextFixups;
+       module.exports.logWikitextFixups =
+               DU.traverseWithTplOrExtInfo.bind( DU, logWikitextFixups );
 }
diff --git a/lib/mediawiki.DOMPostProcessor.js 
b/lib/mediawiki.DOMPostProcessor.js
index 6420b8e..fea4127 100644
--- a/lib/mediawiki.DOMPostProcessor.js
+++ b/lib/mediawiki.DOMPostProcessor.js
@@ -11,7 +11,6 @@
        DU = require('./mediawiki.DOMUtils.js').DOMUtils,
        dumpDOM = require('./dom.dumper.js').dumpDOM,
        CleanUp = require('./dom.cleanup.js'),
-       cleanupAndSaveDataParsoid = CleanUp.cleanupAndSaveDataParsoid,
        computeDSR = require('./dom.computeDSR.js').computeDSR,
        processRefs = require('./dom.processRefs.js').processRefs,
        handleLinkNeighbours = 
require('./dom.t.handleLinkNeighbours.js').handleLinkNeighbours,
@@ -23,11 +22,9 @@
        migrateTemplateMarkerMetas = 
require('./dom.migrateTemplateMarkerMetas.js').migrateTemplateMarkerMetas,
        migrateTrailingNLs = 
require('./dom.migrateTrailingNLs.js').migrateTrailingNLs,
        TableFixups = require('./dom.t.TableFixups.js'),
-       stripMarkerMetas = CleanUp.stripMarkerMetas,
        unpackDOMFragments = 
require('./dom.t.unpackDOMFragments.js').unpackDOMFragments,
        wrapTemplates = require('./dom.wrapTemplates.js').wrapTemplates,
-       cleanUpTemplates = require('./dom.cleanUpTemplates.js'),
-       lintWikitextFixup = require('./dom.linter.js').logWikitextFixups;
+       logWikitextFixup = require('./dom.linter.js').logWikitextFixups;
 
 // map from mediawiki metadata names to RDFa property names
 var metadataMap = {
@@ -188,17 +185,18 @@
                                env.conf.parsoid.nativeExtensions.cite));
 
        // Strip empty elements from template content
-       this.processors.push(cleanUpTemplates.stripEmptyElements);
+       this.processors.push( CleanUp.stripEmptyElements );
 
-    if (env.conf.parsoid.linting) {
-        this.processors.push(lintWikitextFixup);
-    }
+       if ( env.conf.parsoid.linting ) {
+               this.processors.push( logWikitextFixup );
+       }
 
        var domVisitor2 = new DOMTraverser(env),
                tableFixer = new TableFixups.TableFixups(env);
        // 1. Strip marker metas -- removes left over marker metas (ex: metas
        //    nested in expanded tpl/extension output).
-       domVisitor2.addHandler( 'meta', stripMarkerMetas.bind(null, 
env.conf.parsoid.rtTestMode) );
+       domVisitor2.addHandler( 'meta',
+               CleanUp.stripMarkerMetas.bind( null, 
env.conf.parsoid.rtTestMode ) );
        // 2. Fix up DOM for li-hack.
        domVisitor2.addHandler( 'li', handleLIHack.bind( null, env ) );
        // 3. Fix up issues from templated table cells and table cell attributes
@@ -210,12 +208,7 @@
        // Save data.parsoid into data-parsoid html attribute.
        // Make this its own thing so that any changes to the DOM
        // don't affect other handlers that run alongside it.
-       var domVisitor3 = new DOMTraverser(env);
-       domVisitor3.addHandler( null, cleanupAndSaveDataParsoid.bind( null, env 
) );
-       this.processors.push(domVisitor3.traverse.bind(domVisitor3));
-
-       // Remove data-parsoid from transcluded content
-       this.processors.push(cleanUpTemplates.removeDataParsoid);
+       this.processors.push( CleanUp.cleanupAndSaveDataParsoid );
 }
 
 // Inherit from EventEmitter
diff --git a/lib/mediawiki.DOMUtils.js b/lib/mediawiki.DOMUtils.js
index 6e66f41..99375f7 100644
--- a/lib/mediawiki.DOMUtils.js
+++ b/lib/mediawiki.DOMUtils.js
@@ -260,19 +260,19 @@
                if ( !isElt(node) ) {
                        return;
                }
-               var data = this.getNodeData( node );
-               for (var key in data) {
+               var data = DU.getNodeData( node );
+               Object.keys( data ).forEach(function( key ) {
                        if ( key.match( /^tmp_/ ) !== null ) {
-                               continue;
+                               return;
                        }
                        var val = data[key];
                        if ( val && val.constructor === String ) {
                                node.setAttribute('data-' + key, val);
                        } else if (val instanceof Object) {
-                               this.setJSONAttribute(node, 'data-' + key, val);
+                               DU.setJSONAttribute(node, 'data-' + key, val);
                        }
                        // Else: throw error?
-               }
+               });
        },
 
        // Load and stores the data as JSON attributes on the nodes.
@@ -840,55 +840,58 @@
                }
        },
 
-       traverseTplOrExtNodes: function (cb, node, env, options, atTopLevel, 
tplInfo) {
-               // Don't bother with sub-pipelines
-               if (!atTopLevel || !node) {
-                       return;
-               }
+       traverseWithTplOrExtInfo: function( func, node, env, options, 
atTopLevel, tplInfo ) {
+               var next, possibleNext, about, typeOf;
+               while ( node ) {
+                       next = node.nextSibling;
 
-               var c = node.firstChild;
-               while (c) {
-                       var next = c.nextSibling;
-
-                       if (DU.isElt(c)) {
-                               // Identify template/extension content (not 
interested in "mw:Param" nodes).
-                               // We are interested in the very first node.
-                               if (this.isTplOrExtToplevelNode(c) &&
-                                       
/(^|\s)mw:(Extension|Transclusion)/.test(c.getAttribute("typeof")))
-                               {
-                                       // We know that tplInfo will be null 
here since we don't
-                                       // mark up nested transclusions.
-                                       var about = c.getAttribute('about');
+                       if ( DU.isElt( node ) ) {
+                               typeOf = node.getAttribute("typeof");
+                               // Identify the first template/extension node.
+                               // Strictly speaking, the !tplInfo check isn't 
necessary since
+                               // we don't have nested transclusions.
+                               if ( !tplInfo && 
DU.isTplOrExtToplevelNode(node) &&
+                                       // No case for Params yet.
+                                       
/(^|\s)mw:(Extension|Transclusion)/.test( typeOf )
+                               ) {
+                                       about = node.getAttribute("about");
                                        tplInfo = {
-                                               first: c,
-                                               last: DU.getAboutSiblings(c, 
about).last(),
-                                               // Set next to change the next 
node to be traversed
-                                               next: null,
-                                               // Set done to stop traversing
-                                               done: false
+                                               first: node,
+                                               last: DU.getAboutSiblings( 
node, about ).last(),
+                                               dsr: DU.getDataParsoid( node 
).dsr,
+                                               // Give funcs a chance to clear 
the tplInfo after
+                                               // processing.
+                                               clear: false
                                        };
                                }
-
-                               if (tplInfo) {
-                                       cb(c, tplInfo, options);
-                               }
-
-                               if (!(tplInfo && (tplInfo.done || 
tplInfo.next))) {
-                                       // If not done, process subtree
-                                       this.traverseTplOrExtNodes(cb, c, env, 
options, atTopLevel, tplInfo);
-                               }
-
-                               if (tplInfo && (tplInfo.last === c || 
tplInfo.done)) {
-                                       // Clear tplInfo
-                                       tplInfo = null;
-                               }
                        }
 
-                       if (tplInfo && tplInfo.next) {
-                               c = tplInfo.next;
-                       } else {
-                               c = next;
+                       // Possibly skip to processing another sibling.
+                       // node.nextSibling returns a node or null, so 
explicitly test
+                       // against undefined below.
+                       possibleNext = func( node, env, atTopLevel, tplInfo );
+
+                       // We may have walked passed the last about sibling or 
want to
+                       // ignore the template info in future processing.
+                       if ( tplInfo && tplInfo.clear ) {
+                               tplInfo = null;
                        }
+
+                       if ( possibleNext !== undefined ) {
+                               next = possibleNext;
+                       } else if ( DU.isElt( node ) ) {
+                               DU.traverseWithTplOrExtInfo( func, 
node.firstChild, env, options, atTopLevel, tplInfo );
+                       }
+
+                       // Clear the template info after reaching the last 
about sibling.
+                       // The case for clearing (or walking pass the last 
about sibling
+                       // in the subtree seems dubiously useful and error 
prone, consider
+                       // leaving it out.)
+                       if ( tplInfo && ( tplInfo.last === node || 
tplInfo.clear ) ) {
+                               tplInfo = null;
+                       }
+
+                       node = next;
                }
        },
 
@@ -2051,7 +2054,7 @@
                        DU.addNormalizedAttribute( node, "id", uid, origId );
                }
                docDp.ids[uid] = dp;
-               delete DU.getNodeData( node ).parsoid;
+               DU.getNodeData( node ).parsoid = undefined;
                // It would be better to instrument all the load sites.
                node.removeAttribute( "data-parsoid" );
        },
diff --git a/tests/parserTests-blacklist.js b/tests/parserTests-blacklist.js
index 3a58913..044d669 100644
--- a/tests/parserTests-blacklist.js
+++ b/tests/parserTests-blacklist.js
@@ -517,7 +517,7 @@
 add("wt2wt", "Ref: 5. body should accept generic wikitext", "A <ref>This is a 
'''[[bolded link]]''' and this is a 
{{echo|transclusion}}\n</ref>\n\n<references />");
 add("wt2wt", "Ref: 6. indent-pres should not be output in ref-body", "A 
<ref>foo\n bar\n baz\n</ref>\n\n<references />");
 add("wt2wt", "Ref: 7. No p-wrapping in ref-body", "A 
<ref>foo\nbar\nbaz\nbooz\n</ref>\n\n<references />");
-add("wt2wt", "Ref: 8. transclusion wikitext has lower precedence", "A <ref>foo 
{{echo|</ref> B C}}\n\n<references />");
+add("wt2wt", "Ref: 8. transclusion wikitext has lower precedence", "A <ref>foo 
<nowiki>{{</nowiki>echo|</ref> B C}}\n\n<references />");
 add("wt2wt", "Ref: 9. unclosed comments should not leak out of ref-body", "A 
<ref>foo <!----></ref> B C\n<references />");
 add("wt2wt", "Ref: 10. Unclosed HTML tags should not leak out of ref-body", "A 
<ref><b> foo </ref> B C\n\n<references />");
 add("wt2wt", "Ref: 19. ref-tags with identical name encodings should get 
identical indexes", "1 <ref name=\"a & b\">foo</ref> 2 <ref name=\"a &amp;amp; 
b\" />\n\n<references />");
@@ -869,8 +869,8 @@
 add("html2html", "Gallery override link with absolute external link (bug 
34852)", "<p data-parsoid='{\"dsr\":[0,4,0,0]}'>\t\t* </p><div style=\"width: 
155px\" data-parsoid='{\"stx\":\"html\",\"dsr\":[4,239,26,6]}'>\n\t\t\t<div 
class=\"thumb\" style=\"width: 150px;\" 
data-parsoid='{\"stx\":\"html\",\"dsr\":[34,182,41,6]}'><div 
style=\"margin:68px auto;\" 
data-parsoid='{\"stx\":\"html\",\"dsr\":[75,176,31,6]}'><img 
src=\"http://example.com/images/thumb/3/3a/Foobar.jpg/120px-Foobar.jpg\"; 
alt=\"120px-Foobar.jpg\" rel=\"mw:externalImage\" 
data-parsoid='{\"dsr\":[106,170,null,null]}'/></div></div>\n\t\t\t<div 
class=\"gallerytext\" 
data-parsoid='{\"stx\":\"html\",\"dsr\":[186,230,25,6]}'>\n<p 
data-parsoid='{\"dsr\":[212,219,0,0]}'>caption</p>\n\n\t\t\t</div>\n\t\t</div>\n");
 add("html2html", "Gallery override link with malicious javascript (bug 
34852)", "<p data-parsoid='{\"dsr\":[0,4,0,0]}'>\t\t* </p><div style=\"width: 
155px\" data-parsoid='{\"stx\":\"html\",\"dsr\":[4,239,26,6]}'>\n\t\t\t<div 
class=\"thumb\" style=\"width: 150px;\" 
data-parsoid='{\"stx\":\"html\",\"dsr\":[34,182,41,6]}'><div 
style=\"margin:68px auto;\" 
data-parsoid='{\"stx\":\"html\",\"dsr\":[75,176,31,6]}'><img 
src=\"http://example.com/images/thumb/3/3a/Foobar.jpg/120px-Foobar.jpg\"; 
alt=\"120px-Foobar.jpg\" rel=\"mw:externalImage\" 
data-parsoid='{\"dsr\":[106,170,null,null]}'/></div></div>\n\t\t\t<div 
class=\"gallerytext\" 
data-parsoid='{\"stx\":\"html\",\"dsr\":[186,230,25,6]}'>\n<p 
data-parsoid='{\"dsr\":[212,219,0,0]}'>caption</p>\n\n\t\t\t</div>\n\t\t</div>\n");
 add("html2html", "Gallery with invalid title as link (bug 43964)", "<p 
data-parsoid='{\"dsr\":[0,4,0,0]}'>\t\t* </p><div style=\"width: 155px\" 
data-parsoid='{\"stx\":\"html\",\"dsr\":[4,229,26,6]}'>\n\t\t\t<div 
class=\"thumb\" style=\"width: 150px;\" 
data-parsoid='{\"stx\":\"html\",\"dsr\":[34,182,41,6]}'><div 
style=\"margin:68px auto;\" 
data-parsoid='{\"stx\":\"html\",\"dsr\":[75,176,31,6]}'><img 
src=\"http://example.com/images/thumb/3/3a/Foobar.jpg/120px-Foobar.jpg\"; 
alt=\"120px-Foobar.jpg\" rel=\"mw:externalImage\" 
data-parsoid='{\"dsr\":[106,170,null,null]}'/></div></div>\n\t\t\t<div 
class=\"gallerytext\" 
data-parsoid='{\"stx\":\"html\",\"dsr\":[186,220,25,6]}'>\t\t\t</div>\n\t\t</div>\n");
-add("html2html", "Ref: 19. ref-tags with identical name encodings should get 
identical indexes", "<p data-parsoid='{\"dsr\":[0,60,0,0]}'>1 <span 
about=\"#mwt3\" class=\"reference\" id=\"cite_ref-a_.26_b_1-0\" 
rel=\"dc:references\" typeof=\"mw:Extension/ref\" 
data-parsoid='{\"dsr\":[2,29,18,6]}' 
data-mw='{\"name\":\"ref\",\"body\":{\"id\":\"mw-reference-text-cite_note-a_.26_b-1\"},\"attrs\":{\"name\":\"a
 &amp; b\"}}'><a href=\"#cite_note-a_.26_b-1\">[1]</a></span> 2 <span 
about=\"#mwt4\" class=\"reference\" id=\"cite_ref-a_.26amp.3B_b_2-0\" 
rel=\"dc:references\" typeof=\"mw:Extension/ref\" 
data-parsoid='{\"dsr\":[32,60,28,0]}' 
data-mw='{\"name\":\"ref\",\"attrs\":{\"name\":\"a &amp;amp;amp; b\"}}'><a 
href=\"#cite_note-a_.26amp.3B_b-2\">[2]</a></span></p>\n\n<ol 
class=\"references\" typeof=\"mw:Extension/references\" about=\"#mwt6\" 
data-parsoid='{\"dsr\":[62,76,2,2]}' 
data-mw='{\"name\":\"references\",\"attrs\":{}}'><li 
about=\"#cite_note-a_.26_b-1\" id=\"cite_note-a_.26_b-1\"><span 
rel=\"mw:referencedBy\"><a href=\"#cite_ref-a_.26_b_1-0\">↑</a></span> <span 
id=\"mw-reference-text-cite_note-a_.26_b-1\" class=\"mw-reference-text\" 
data-parsoid=\"{}\">foo</span></li><li about=\"#cite_note-a_.26amp.3B_b-2\" 
id=\"cite_note-a_.26amp.3B_b-2\" data-parsoid=\"{}\"><span 
rel=\"mw:referencedBy\" data-parsoid=\"{}\"><a 
href=\"#cite_ref-a_.26amp.3B_b_2-0\" data-parsoid=\"{}\">↑</a></span> <span 
id=\"mw-reference-text-cite_note-a_.26amp.3B_b-2\" class=\"mw-reference-text\" 
data-parsoid=\"{}\"></span></li></ol>");
-add("html2html", "References: 5. ref tags in references should be processed 
while ignoring all other content", "<p data-parsoid='{\"dsr\":[0,44,0,0]}'>A 
<span about=\"#mwt2\" class=\"reference\" id=\"cite_ref-a_1-0\" 
rel=\"dc:references\" typeof=\"mw:Extension/ref\" 
data-parsoid='{\"dsr\":[2,18,16,0]}' 
data-mw='{\"name\":\"ref\",\"attrs\":{\"name\":\"a\"}}'><a 
href=\"#cite_note-a-1\">[1]</a></span>\nB <span about=\"#mwt4\" 
class=\"reference\" id=\"cite_ref-b_2-0\" rel=\"dc:references\" 
typeof=\"mw:Extension/ref\" data-parsoid='{\"dsr\":[21,44,14,6]}' 
data-mw='{\"name\":\"ref\",\"body\":{\"id\":\"mw-reference-text-cite_note-b-2\"},\"attrs\":{\"name\":\"b\"}}'><a
 href=\"#cite_note-b-2\">[2]</a></span></p>\n\n<ol class=\"references\" 
typeof=\"mw:Extension/references\" about=\"#mwt6\" 
data-parsoid='{\"dsr\":[46,96,2,2]}' 
data-mw='{\"name\":\"references\",\"body\":{\"extsrc\":\"&lt;ref 
name=\\\"a\\\">foo&lt;/ref>\",\"html\":\"\\n&lt;span about=\\\"#mwt8\\\" 
class=\\\"reference\\\" rel=\\\"dc:references\\\" 
typeof=\\\"mw:Extension/ref\\\" 
data-parsoid=&#39;{\\\"dsr\\\":[59,82,14,6]}&#39; 
data-mw=&#39;{\\\"name\\\":\\\"ref\\\",\\\"body\\\":{\\\"id\\\":\\\"mw-reference-text-cite_note-a-1\\\"},\\\"attrs\\\":{\\\"name\\\":\\\"a\\\"}}&#39;>&lt;a
 href=\\\"#cite_note-a-1\\\">[1]&lt;/a>&lt;/span>\\n\"},\"attrs\":{}}'><li 
about=\"#cite_note-a-1\" id=\"cite_note-a-1\"><span rel=\"mw:referencedBy\"><a 
href=\"#cite_ref-a_1-0\">↑</a></span> <span 
id=\"mw-reference-text-cite_note-a-1\" class=\"mw-reference-text\" 
data-parsoid=\"{}\">foo</span></li><li about=\"#cite_note-b-2\" 
id=\"cite_note-b-2\" data-parsoid=\"{}\"><span rel=\"mw:referencedBy\" 
data-parsoid=\"{}\"><a href=\"#cite_ref-b_2-0\" 
data-parsoid=\"{}\">↑</a></span> <span id=\"mw-reference-text-cite_note-b-2\" 
class=\"mw-reference-text\" data-parsoid=\"{}\">bar</span></li></ol>");
+add("html2html", "Ref: 19. ref-tags with identical name encodings should get 
identical indexes", "<p data-parsoid='{\"dsr\":[0,60,0,0]}'>1 <span 
about=\"#mwt3\" class=\"reference\" id=\"cite_ref-a_.26_b_1-0\" 
rel=\"dc:references\" typeof=\"mw:Extension/ref\" 
data-parsoid='{\"dsr\":[2,29,18,6]}' 
data-mw='{\"name\":\"ref\",\"body\":{\"id\":\"mw-reference-text-cite_note-a_.26_b-1\"},\"attrs\":{\"name\":\"a
 &amp; b\"}}'><a href=\"#cite_note-a_.26_b-1\">[1]</a></span> 2 <span 
about=\"#mwt4\" class=\"reference\" id=\"cite_ref-a_.26amp.3B_b_2-0\" 
rel=\"dc:references\" typeof=\"mw:Extension/ref\" 
data-parsoid='{\"dsr\":[32,60,28,0]}' 
data-mw='{\"name\":\"ref\",\"attrs\":{\"name\":\"a &amp;amp;amp; b\"}}'><a 
href=\"#cite_note-a_.26amp.3B_b-2\">[2]</a></span></p>\n\n<ol 
class=\"references\" typeof=\"mw:Extension/references\" about=\"#mwt6\" 
data-parsoid='{\"dsr\":[62,76,2,2]}' 
data-mw='{\"name\":\"references\",\"attrs\":{}}'><li 
about=\"#cite_note-a_.26_b-1\" id=\"cite_note-a_.26_b-1\"><span 
rel=\"mw:referencedBy\"><a href=\"#cite_ref-a_.26_b_1-0\">↑</a></span> <span 
id=\"mw-reference-text-cite_note-a_.26_b-1\" class=\"mw-reference-text\" 
data-parsoid=\"{}\">foo</span></li><li about=\"#cite_note-a_.26amp.3B_b-2\" 
id=\"cite_note-a_.26amp.3B_b-2\"><span rel=\"mw:referencedBy\"><a 
href=\"#cite_ref-a_.26amp.3B_b_2-0\">↑</a></span> <span 
id=\"mw-reference-text-cite_note-a_.26amp.3B_b-2\" class=\"mw-reference-text\" 
data-parsoid=\"{}\"></span></li></ol>");
+add("html2html", "References: 5. ref tags in references should be processed 
while ignoring all other content", "<p data-parsoid='{\"dsr\":[0,44,0,0]}'>A 
<span about=\"#mwt2\" class=\"reference\" id=\"cite_ref-a_1-0\" 
rel=\"dc:references\" typeof=\"mw:Extension/ref\" 
data-parsoid='{\"dsr\":[2,18,16,0]}' 
data-mw='{\"name\":\"ref\",\"attrs\":{\"name\":\"a\"}}'><a 
href=\"#cite_note-a-1\">[1]</a></span>\nB <span about=\"#mwt4\" 
class=\"reference\" id=\"cite_ref-b_2-0\" rel=\"dc:references\" 
typeof=\"mw:Extension/ref\" data-parsoid='{\"dsr\":[21,44,14,6]}' 
data-mw='{\"name\":\"ref\",\"body\":{\"id\":\"mw-reference-text-cite_note-b-2\"},\"attrs\":{\"name\":\"b\"}}'><a
 href=\"#cite_note-b-2\">[2]</a></span></p>\n\n<ol class=\"references\" 
typeof=\"mw:Extension/references\" about=\"#mwt6\" 
data-parsoid='{\"dsr\":[46,96,2,2]}' 
data-mw='{\"name\":\"references\",\"body\":{\"extsrc\":\"&lt;ref 
name=\\\"a\\\">foo&lt;/ref>\",\"html\":\"\\n&lt;span about=\\\"#mwt8\\\" 
class=\\\"reference\\\" rel=\\\"dc:references\\\" 
typeof=\\\"mw:Extension/ref\\\" 
data-parsoid=&#39;{\\\"dsr\\\":[59,82,14,6]}&#39; 
data-mw=&#39;{\\\"name\\\":\\\"ref\\\",\\\"body\\\":{\\\"id\\\":\\\"mw-reference-text-cite_note-a-1\\\"},\\\"attrs\\\":{\\\"name\\\":\\\"a\\\"}}&#39;>&lt;a
 href=\\\"#cite_note-a-1\\\">[1]&lt;/a>&lt;/span>\\n\"},\"attrs\":{}}'><li 
about=\"#cite_note-a-1\" id=\"cite_note-a-1\"><span rel=\"mw:referencedBy\"><a 
href=\"#cite_ref-a_1-0\">↑</a></span> <span 
id=\"mw-reference-text-cite_note-a-1\" class=\"mw-reference-text\" 
data-parsoid=\"{}\">foo</span></li><li about=\"#cite_note-b-2\" 
id=\"cite_note-b-2\"><span rel=\"mw:referencedBy\"><a 
href=\"#cite_ref-b_2-0\">↑</a></span> <span 
id=\"mw-reference-text-cite_note-b-2\" class=\"mw-reference-text\" 
data-parsoid=\"{}\">bar</span></li></ol>");
 add("html2html", "Entities in ref name", "<p 
data-parsoid='{\"dsr\":[0,38,0,0]}'><span about=\"#mwt2\" class=\"reference\" 
id=\"cite_ref-test_.26amp.3B_me_1-0\" rel=\"dc:references\" 
typeof=\"mw:Extension/ref\" data-parsoid='{\"dsr\":[0,38,30,6]}' 
data-mw='{\"name\":\"ref\",\"body\":{\"id\":\"mw-reference-text-cite_note-test_.26amp.3B_me-1\"},\"attrs\":{\"name\":\"test
 &amp;amp;amp; me\"}}'><a 
href=\"#cite_note-test_.26amp.3B_me-1\">[1]</a></span></p>\n<ol 
class=\"references\" typeof=\"mw:Extension/references\" about=\"#mwt4\" 
data-parsoid='{\"dsr\":[39,53,2,2]}' 
data-mw='{\"name\":\"references\",\"attrs\":{}}'><li 
about=\"#cite_note-test_.26amp.3B_me-1\" 
id=\"cite_note-test_.26amp.3B_me-1\"><span rel=\"mw:referencedBy\"><a 
href=\"#cite_ref-test_.26amp.3B_me_1-0\">↑</a></span> <span 
id=\"mw-reference-text-cite_note-test_.26amp.3B_me-1\" 
class=\"mw-reference-text\" data-parsoid=\"{}\">hi</span></li></ol>");
 add("html2html", "Empty LI and TR nodes should not be stripped from top-level 
content", "<ul data-parsoid='{\"dsr\":[0,9,0,0]}'><li 
data-parsoid='{\"dsr\":[0,3,1,0]}'> a</li>\n<li 
data-parsoid='{\"dsr\":[4,5,1,0]}'></li>\n<li 
data-parsoid='{\"dsr\":[6,9,1,0]}'> b</li></ul>\n\n<table 
data-parsoid='{\"dsr\":[11,26,2,2]}'>\n\n<tbody 
data-parsoid='{\"dsr\":[15,24,0,0]}'><tr 
data-parsoid='{\"startTagSrc\":\"|-\",\"autoInsertedEnd\":true,\"dsr\":[15,22,2,0]}'>\n<td
 
data-parsoid='{\"autoInsertedEnd\":true,\"dsr\":[18,22,1,0]}'>foo</td></tr>\n\n</tbody></table>\n");
 add("html2html", "Headings: 6a. Heading chars in SOL context (with trailing 
spaces)", "<p data-parsoid='{\"dsr\":[0,20,0,0]}'><span typeof=\"mw:Nowiki\" 
data-parsoid='{\"dsr\":[0,20,8,9]}'>=a=</span></p>\n\n<p 
data-parsoid='{\"dsr\":[22,42,0,0]}'><span typeof=\"mw:Nowiki\" 
data-parsoid='{\"dsr\":[22,42,8,9]}'>=a=</span></p> \n\n<p 
data-parsoid='{\"dsr\":[45,65,0,0]}'><span typeof=\"mw:Nowiki\" 
data-parsoid='{\"dsr\":[45,65,8,9]}'>=a=</span></p>\t\n\n<p 
data-parsoid='{\"dsr\":[68,88,0,0]}'><span typeof=\"mw:Nowiki\" 
data-parsoid='{\"dsr\":[68,88,8,9]}'>=a=</span></p> \t\n");

-- 
To view, visit https://gerrit.wikimedia.org/r/201087
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: I4c6452e49563cf3923a5bb69485f1ea7d0c13e33
Gerrit-PatchSet: 1
Gerrit-Project: mediawiki/services/parsoid
Gerrit-Branch: master
Gerrit-Owner: Arlolra <abrea...@wikimedia.org>

_______________________________________________
MediaWiki-commits mailing list
MediaWiki-commits@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to