Subramanya Sastry has uploaded a new change for review.

  https://gerrit.wikimedia.org/r/320929

Change subject: WIP: T102209: Assign ids to headings to match core's section 
anchors
......................................................................

WIP: T102209: Assign ids to headings to match core's section anchors

* This is WIP. Needs testing.
* There are a couple edge case divergences from core.
* Need to decide if we want to update Parsoid side of parser tests
  or if we want to normalize those away and add mocha tests to
  spec id assignment behavior. Probably the latter.

Change-Id: I2b2cffd5482c263b309925dfed1a88c46a3ed0cf
---
M lib/wt2html/DOMPostProcessor.js
A lib/wt2html/pp/handlers/headings.js
2 files changed, 58 insertions(+), 0 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/mediawiki/services/parsoid 
refs/changes/29/320929/1

diff --git a/lib/wt2html/DOMPostProcessor.js b/lib/wt2html/DOMPostProcessor.js
index 35f7d58..427cf49 100644
--- a/lib/wt2html/DOMPostProcessor.js
+++ b/lib/wt2html/DOMPostProcessor.js
@@ -26,6 +26,7 @@
 // handlers
 var logWikitextFixup = require('./pp/handlers/linter.js').logWikitextFixups;
 var CleanUp = require('./pp/handlers/cleanup.js');
+var headings = require('./pp/handlers/headings.js');
 var unpackDOMFragments = 
require('./pp/handlers/unpackDOMFragments.js').unpackDOMFragments;
 var TableFixups = require('./pp/handlers/tableFixups.js').TableFixups;
 var handleLinkNeighbours = 
require('./pp/handlers/handleLinkNeighbours.js').handleLinkNeighbours;
@@ -167,6 +168,7 @@
        // Make this its own thing so that any changes to the DOM
        // don't affect other handlers that run alongside it.
        domVisitor = new DOMTraverser(env);
+       domVisitor.addHandler(null, headings.genAnchors);
        domVisitor.addHandler(null, CleanUp.cleanupAndSaveDataParsoid);
        addPP('cleanupAndSaveDP', domVisitor.traverse.bind(domVisitor));
 }
diff --git a/lib/wt2html/pp/handlers/headings.js 
b/lib/wt2html/pp/handlers/headings.js
new file mode 100644
index 0000000..9b42bf9
--- /dev/null
+++ b/lib/wt2html/pp/handlers/headings.js
@@ -0,0 +1,56 @@
+'use strict';
+
+var DU = require('../../../utils/DOMUtils.js').DOMUtils;
+var Sanitizer = require('../../tt/Sanitizer.js').Sanitizer;
+
+// Generate <a> anchor tags with ids that the PHP parser assigns
+// to headings. This is to ensure that links that are out there
+// in the wild continue to be valid links into Parsoid HTML.
+function genAnchors(node, env, atTopLevel) {
+       if (!atTopLevel || !/^H[1-6]$/.test(node.nodeName)) {
+               return true;
+       }
+
+       // Cannot generate an anchor id if the heading already has an id!
+       // FIXME: Divergence from PHP parser behavior.
+       // The PHP parser generates a <h*><span 
id="anchor-id-here-">..</span><h*>
+       // So, it can preserve the existing id if any. However, in Parsoid, we 
are
+       // generating a <h* id="anchor-id-here"> ..</h*> => we either overwrite 
or
+       // preserve the existing id and use it for TOC, etc. We choose to 
preserve it.
+       if (node.getAttribute('id') !== null) {
+               return true;
+       }
+
+       // Strip HTML tags + normalize whitespace
+       var anchorText = node.innerHTML.replace(/<.*?>/g, '').replace(/[ _]+/, 
' ').trim();
+
+       // Create an anchor with a sanitized id
+       var anchorId = Sanitizer.escapeId(anchorText, { noninitial: true });
+
+       // The ids need to be unique!
+       var baseId = anchorId;
+       var suffix = 1;
+       var document = node.ownerDocument;
+       if (env.pageBundle) {
+               var docIds = DU.getDataParsoid(document).pagebundle.parsoid.ids;
+               while (docIds.hasOwnProperty(anchorId)) {
+                       suffix++;
+                       anchorId = baseId + '_' + suffix;
+               }
+       } else {
+               // FIXME: This is not compliant with how PHP parser does it.
+               // If there is an id in the doc elsewhere, this will assign
+               // the heading a suffixed id, whereas the PHP parser doesn't 
care.
+               while (document.getElementById(anchorId)) {
+                       suffix++;
+                       anchorId = baseId + '_' + suffix;
+               }
+       }
+       node.setAttribute('id', anchorId);
+
+       return true;
+}
+
+if (typeof module === 'object') {
+       module.exports.genAnchors = genAnchors;
+}

-- 
To view, visit https://gerrit.wikimedia.org/r/320929
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: I2b2cffd5482c263b309925dfed1a88c46a3ed0cf
Gerrit-PatchSet: 1
Gerrit-Project: mediawiki/services/parsoid
Gerrit-Branch: master
Gerrit-Owner: Subramanya Sastry <ssas...@wikimedia.org>

_______________________________________________
MediaWiki-commits mailing list
MediaWiki-commits@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to