Subramanya Sastry has uploaded a new change for review. https://gerrit.wikimedia.org/r/320929
Change subject: WIP: T102209: Assign ids to headings to match core's section anchors ...................................................................... WIP: T102209: Assign ids to headings to match core's section anchors * This is WIP. Needs testing. * There are a couple edge case divergences from core. * Need to decide if we want to update Parsoid side of parser tests or if we want to normalize those away and add mocha tests to spec id assignment behavior. Probably the latter. Change-Id: I2b2cffd5482c263b309925dfed1a88c46a3ed0cf --- M lib/wt2html/DOMPostProcessor.js A lib/wt2html/pp/handlers/headings.js 2 files changed, 58 insertions(+), 0 deletions(-) git pull ssh://gerrit.wikimedia.org:29418/mediawiki/services/parsoid refs/changes/29/320929/1 diff --git a/lib/wt2html/DOMPostProcessor.js b/lib/wt2html/DOMPostProcessor.js index 35f7d58..427cf49 100644 --- a/lib/wt2html/DOMPostProcessor.js +++ b/lib/wt2html/DOMPostProcessor.js @@ -26,6 +26,7 @@ // handlers var logWikitextFixup = require('./pp/handlers/linter.js').logWikitextFixups; var CleanUp = require('./pp/handlers/cleanup.js'); +var headings = require('./pp/handlers/headings.js'); var unpackDOMFragments = require('./pp/handlers/unpackDOMFragments.js').unpackDOMFragments; var TableFixups = require('./pp/handlers/tableFixups.js').TableFixups; var handleLinkNeighbours = require('./pp/handlers/handleLinkNeighbours.js').handleLinkNeighbours; @@ -167,6 +168,7 @@ // Make this its own thing so that any changes to the DOM // don't affect other handlers that run alongside it. domVisitor = new DOMTraverser(env); + domVisitor.addHandler(null, headings.genAnchors); domVisitor.addHandler(null, CleanUp.cleanupAndSaveDataParsoid); addPP('cleanupAndSaveDP', domVisitor.traverse.bind(domVisitor)); } diff --git a/lib/wt2html/pp/handlers/headings.js b/lib/wt2html/pp/handlers/headings.js new file mode 100644 index 0000000..9b42bf9 --- /dev/null +++ b/lib/wt2html/pp/handlers/headings.js @@ -0,0 +1,56 @@ +'use strict'; + +var DU = require('../../../utils/DOMUtils.js').DOMUtils; +var Sanitizer = require('../../tt/Sanitizer.js').Sanitizer; + +// Generate <a> anchor tags with ids that the PHP parser assigns +// to headings. This is to ensure that links that are out there +// in the wild continue to be valid links into Parsoid HTML. +function genAnchors(node, env, atTopLevel) { + if (!atTopLevel || !/^H[1-6]$/.test(node.nodeName)) { + return true; + } + + // Cannot generate an anchor id if the heading already has an id! + // FIXME: Divergence from PHP parser behavior. + // The PHP parser generates a <h*><span id="anchor-id-here-">..</span><h*> + // So, it can preserve the existing id if any. However, in Parsoid, we are + // generating a <h* id="anchor-id-here"> ..</h*> => we either overwrite or + // preserve the existing id and use it for TOC, etc. We choose to preserve it. + if (node.getAttribute('id') !== null) { + return true; + } + + // Strip HTML tags + normalize whitespace + var anchorText = node.innerHTML.replace(/<.*?>/g, '').replace(/[ _]+/, ' ').trim(); + + // Create an anchor with a sanitized id + var anchorId = Sanitizer.escapeId(anchorText, { noninitial: true }); + + // The ids need to be unique! + var baseId = anchorId; + var suffix = 1; + var document = node.ownerDocument; + if (env.pageBundle) { + var docIds = DU.getDataParsoid(document).pagebundle.parsoid.ids; + while (docIds.hasOwnProperty(anchorId)) { + suffix++; + anchorId = baseId + '_' + suffix; + } + } else { + // FIXME: This is not compliant with how PHP parser does it. + // If there is an id in the doc elsewhere, this will assign + // the heading a suffixed id, whereas the PHP parser doesn't care. + while (document.getElementById(anchorId)) { + suffix++; + anchorId = baseId + '_' + suffix; + } + } + node.setAttribute('id', anchorId); + + return true; +} + +if (typeof module === 'object') { + module.exports.genAnchors = genAnchors; +} -- To view, visit https://gerrit.wikimedia.org/r/320929 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: newchange Gerrit-Change-Id: I2b2cffd5482c263b309925dfed1a88c46a3ed0cf Gerrit-PatchSet: 1 Gerrit-Project: mediawiki/services/parsoid Gerrit-Branch: master Gerrit-Owner: Subramanya Sastry <ssas...@wikimedia.org> _______________________________________________ MediaWiki-commits mailing list MediaWiki-commits@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits