Arlolra has uploaded a new change for review. https://gerrit.wikimedia.org/r/324641
Change subject: Convert cleanupFormattingTagFixup to the DOMTraverser interface ...................................................................... Convert cleanupFormattingTagFixup to the DOMTraverser interface Change-Id: I33544f68a4290daf544f6fe9090b31cd1b63fadd --- M lib/utils/DOMTraverser.js M lib/wt2html/DOMPostProcessor.js A lib/wt2html/pp/handlers/cleanupFormattingTagFixup.js D lib/wt2html/pp/processors/cleanupFormattingTagFixup.js 4 files changed, 78 insertions(+), 75 deletions(-) git pull ssh://gerrit.wikimedia.org:29418/mediawiki/services/parsoid refs/changes/41/324641/1 diff --git a/lib/utils/DOMTraverser.js b/lib/utils/DOMTraverser.js index 9352e6f..4cc37a4 100644 --- a/lib/utils/DOMTraverser.js +++ b/lib/utils/DOMTraverser.js @@ -23,6 +23,19 @@ } /** + * Full pass w/ func + * + * @param {MWParserEnvironment} env + * @param {Function} func + * @return {Function} + */ +DOMTraverser.pass = function(env, func) { + var domVisitor = new DOMTraverser(env); + domVisitor.addHandler(null, func); + return domVisitor.traverse.bind(domVisitor); +}; + +/** * Add a handler to the DOM traversal * * @param {string} nodeName diff --git a/lib/wt2html/DOMPostProcessor.js b/lib/wt2html/DOMPostProcessor.js index 940ea1b..1a2b62b 100644 --- a/lib/wt2html/DOMPostProcessor.js +++ b/lib/wt2html/DOMPostProcessor.js @@ -16,7 +16,6 @@ var handleUnbalancedTables = require('./pp/processors/handleUnbalancedTables.js').handleUnbalancedTables; var markTreeBuilderFixups = require('./pp/processors/markTreeBuilderFixups.js').markTreeBuilderFixups; var normalize = require('./pp/processors/normalize.js').normalize; -var cleanupFormattingTagFixup = require('./pp/processors/cleanupFormattingTagFixup.js').cleanupFormattingTagFixup; var migrateTemplateMarkerMetas = require('./pp/processors/migrateTemplateMarkerMetas.js').migrateTemplateMarkerMetas; var handlePres = require('./pp/processors/handlePres.js').handlePres; var migrateTrailingNLs = require('./pp/processors/migrateTrailingNLs.js').migrateTrailingNLs; @@ -31,6 +30,7 @@ var TableFixups = require('./pp/handlers/tableFixups.js').TableFixups; var handleLinkNeighbours = require('./pp/handlers/handleLinkNeighbours.js').handleLinkNeighbours; var liFixups = require('./pp/handlers/liFixups.js'); +var cleanupFormattingTagFixup = require('./pp/handlers/cleanupFormattingTagFixup.js').cleanupFormattingTagFixup; // map from mediawiki metadata names to RDFa property names var metadataMap = { @@ -101,17 +101,13 @@ }); }; - // DOM traverser that runs before the in-order DOM handlers. - var dataParsoidLoader = new DOMTraverser(env); - dataParsoidLoader.addHandler(null, this.prepareDOM.bind(this)); - // Common post processing - addPP('dpLoader', dataParsoidLoader.traverse.bind(dataParsoidLoader)); + addPP('dpLoader', DOMTraverser.pass(env, this.prepareDOM.bind(this))); addPP('markFosteredContent', markFosteredContent); addPP('handleUnbalancedTables', handleUnbalancedTables); addPP('markTreeBuilderFixups', markTreeBuilderFixups); addPP('normalize', normalize); - addPP('cleanupFormattingTagFixup', cleanupFormattingTagFixup); + addPP('cleanupFormattingTagFixup', DOMTraverser.pass(env, cleanupFormattingTagFixup)); // Run this after 'markTreeBuilderFixups' because the mw:StartTag // and mw:EndTag metas would otherwise interfere with the // firstChild/lastChild check that this pass does. @@ -139,14 +135,10 @@ }, this); // Strip empty elements from template content - domVisitor = new DOMTraverser(env); - domVisitor.addHandler(null, CleanUp.stripEmptyElements); - addPP('stripEmptyElts', domVisitor.traverse.bind(domVisitor)); + addPP('stripEmptyElts', DOMTraverser.pass(env, CleanUp.stripEmptyElements)); if (env.conf.parsoid.linting) { - domVisitor = new DOMTraverser(env); - domVisitor.addHandler(null, logWikitextFixup); - addPP('linter', domVisitor.traverse.bind(domVisitor)); + addPP('linter', DOMTraverser.pass(env, logWikitextFixup)); } domVisitor = new DOMTraverser(env); diff --git a/lib/wt2html/pp/handlers/cleanupFormattingTagFixup.js b/lib/wt2html/pp/handlers/cleanupFormattingTagFixup.js new file mode 100644 index 0000000..c0ccdd8 --- /dev/null +++ b/lib/wt2html/pp/handlers/cleanupFormattingTagFixup.js @@ -0,0 +1,60 @@ +'use strict'; + +var DU = require('../../../utils/DOMUtils.js').DOMUtils; + +function cleanupFormattingTagFixup(node, env, atTopLevel, tplInfo) { + if (DU.isGeneratedFigure(node)) { + // Find path of formatting elements. + // NOTE: <a> is a formatting elts as well and should be explicitly skipped + var fpath = []; + var c = node.firstChild; + while (DU.isFormattingElt(c) && c.nodeName !== 'A' && !c.nextSibling) { + fpath.push(c); + c = c.firstChild; + } + + // Make sure that that we stopped at an A-tag and the last child is a caption + var fpathHead = fpath[0]; + var fpathTail = fpath[fpath.length - 1]; + if (fpathHead && fpathTail.firstChild.nodeName === 'A') { + var anchor = fpathTail.firstChild; + var maybeCaption = fpathTail.lastChild; + + // Fix up DOM appropriately + var fig = node; + DU.migrateChildren(fpathTail, fig); + if (maybeCaption.nodeName === 'FIGCAPTION') { + DU.migrateChildren(maybeCaption, fpathTail); + maybeCaption.appendChild(fpathHead); + + // For the formatting elements, if both the start and end tags + // are auto-inserted, DSR algo will automatically ignore the tag. + // + // Otherwise, we need to clear out the TSR for DSR accuracy. + // For simpler logic and code readability reasons, we are + // unconditionally clearing out TSR for the formatting path that + // got displaced from its original location so that DSR computation + // can "recover properly" despite the extra wikitext chars + // that interfere with it. + fpath.forEach(function(n) { + DU.getDataParsoid(n).tsr = null; + }); + } else if (maybeCaption === anchor) { + console.assert(maybeCaption.firstChild.nodeName === 'IMG', 'Expected first child of linked image to be an <img> tag.'); + // Delete the formatting elements since bolding/<small>-ing an image + // is useless and doesn't make sense. + while (fpath.length > 0) { + DU.deleteNode(fpath.pop()); + } + } + } + + return node.nextSibling; + } else { + return true; + } +} + +if (typeof module === "object") { + module.exports.cleanupFormattingTagFixup = cleanupFormattingTagFixup; +} diff --git a/lib/wt2html/pp/processors/cleanupFormattingTagFixup.js b/lib/wt2html/pp/processors/cleanupFormattingTagFixup.js deleted file mode 100644 index 06fa10b..0000000 --- a/lib/wt2html/pp/processors/cleanupFormattingTagFixup.js +++ /dev/null @@ -1,62 +0,0 @@ -'use strict'; - -var DU = require('../../../utils/DOMUtils.js').DOMUtils; - -function cleanupFormattingTagFixup(node, env) { - node = node.firstChild; - while (node !== null) { - if (DU.isGeneratedFigure(node)) { - // Find path of formatting elements. - // NOTE: <a> is a formatting elts as well and should be explicitly skipped - var fpath = []; - var c = node.firstChild; - while (DU.isFormattingElt(c) && c.nodeName !== 'A' && !c.nextSibling) { - fpath.push(c); - c = c.firstChild; - } - - // Make sure that that we stopped at an A-tag and the last child is a caption - var fpathHead = fpath[0]; - var fpathTail = fpath[fpath.length - 1]; - if (fpathHead && fpathTail.firstChild.nodeName === 'A') { - var anchor = fpathTail.firstChild; - var maybeCaption = fpathTail.lastChild; - - // Fix up DOM appropriately - var fig = node; - DU.migrateChildren(fpathTail, fig); - if (maybeCaption.nodeName === 'FIGCAPTION') { - DU.migrateChildren(maybeCaption, fpathTail); - maybeCaption.appendChild(fpathHead); - - // For the formatting elements, if both the start and end tags - // are auto-inserted, DSR algo will automatically ignore the tag. - // - // Otherwise, we need to clear out the TSR for DSR accuracy. - // For simpler logic and code readability reasons, we are - // unconditionally clearing out TSR for the formatting path that - // got displaced from its original location so that DSR computation - // can "recover properly" despite the extra wikitext chars - // that interfere with it. - fpath.forEach(function(n) { - DU.getDataParsoid(n).tsr = null; - }); - } else if (maybeCaption === anchor) { - console.assert(maybeCaption.firstChild.nodeName === 'IMG', 'Expected first child of linked image to be an <img> tag.'); - // Delete the formatting elements since bolding/<small>-ing an image - // is useless and doesn't make sense. - while (fpath.length > 0) { - DU.deleteNode(fpath.pop()); - } - } - } - } else if (DU.isElt(node)) { - cleanupFormattingTagFixup(node, env); - } - node = node.nextSibling; - } -} - -if (typeof module === "object") { - module.exports.cleanupFormattingTagFixup = cleanupFormattingTagFixup; -} -- To view, visit https://gerrit.wikimedia.org/r/324641 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: newchange Gerrit-Change-Id: I33544f68a4290daf544f6fe9090b31cd1b63fadd Gerrit-PatchSet: 1 Gerrit-Project: mediawiki/services/parsoid Gerrit-Branch: master Gerrit-Owner: Arlolra <abrea...@wikimedia.org> _______________________________________________ MediaWiki-commits mailing list MediaWiki-commits@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits