[MediaWiki-commits] [Gerrit] mediawiki...mobileapps[master]: Bring back old section handling code
jenkins-bot has submitted this change and it was merged. ( https://gerrit.wikimedia.org/r/377349 ) Change subject: Bring back old section handling code .. Bring back old section handling code The new section handling code via the tags from parsoid-dom-util caused some issues with some pages that have heading tags wrapped inside tags. This brings back the old behavior. A drawback of this is that the new read-*html endpoints don't use tags anymore; this is until Parsoid implements and deploys T114072 or this code is changed again. Changes: * Moved parsoidSections to parsoidSectionsUsingTags. * parsoidSection now delegates to the proper implementation, either using div or section tags. * Brought back parsoidSectionsUsingDivs.getSectionsText (from 590ef514 parsoid-access) since that is now needed for mobile-sections. * Added more unit tests. * Renamed addSectionDivs to addSectionTags in parsoidSectionsUsingSectionTags. Bug: T175305 Change-Id: I1a469995c1aca38ec662c989d023ca64e5a21b02 --- M lib/parsoid-access.js M lib/parsoidSections.js R lib/parsoidSectionsUsingDivs.js A lib/parsoidSectionsUsingSectionTags.js M test/features/read-html/pagecontent-base.js M test/features/read-html/pagecontent.js M test/lib/parsoid/parsoid-sections-test.js 7 files changed, 155 insertions(+), 67 deletions(-) Approvals: jenkins-bot: Verified Mholloway: Looks good to me, approved diff --git a/lib/parsoid-access.js b/lib/parsoid-access.js index df44853..0f063e8 100644 --- a/lib/parsoid-access.js +++ b/lib/parsoid-access.js @@ -11,7 +11,7 @@ const parseProperty = require('./parseProperty'); const parseDefinition = require('./parseDefinition'); const parsoidSections = require('./parsoidSections'); -const parsoidSectionsForDefinitions = require('./parsoidSectionsFromDivs'); +const parsoidSectionsForDefinitions = require('./parsoidSectionsUsingDivs'); const transforms = require('./transforms'); const HTTPError = sUtil.HTTPError; diff --git a/lib/parsoidSections.js b/lib/parsoidSections.js index 031a245..da1caa1 100644 --- a/lib/parsoidSections.js +++ b/lib/parsoidSections.js @@ -1,6 +1,7 @@ 'use strict'; -const parsoidDomUtils = require('parsoid-dom-utils'); +const parsoidSectionsUsingDivs = require('./parsoidSectionsUsingDivs'); +const parsoidSectionsUsingSectionTags = require('./parsoidSectionsUsingSectionTags'); /** * New sectioning code: wraps sections in tags. Will likely @@ -8,36 +9,10 @@ * @param {!document} doc the parsed DOM Document of the Parsoid output */ function addSectionDivs(doc) { -// in case this is already handled by Parsoid don't try again +// in case this is already handled by Parsoid (T114072) don't try again if (!doc.querySelector('section')) { -parsoidDomUtils.sections.wrap(doc); +parsoidSectionsUsingDivs.addSectionDivs(doc); } -} - -/** - * Parse the next wiki section. A wiki section is contained inside a tag. - * If there is a nested sub section it ends right before that next sub section's - * tag starts. - * @param {!Node} startingNode the DOM node to start parsing - * @return {string} the HTML text of the next wiki section - */ -function parseNextSection(startingNode) { -let sectionText = ''; -let node = startingNode; - -while (node) { -if (node.tagName !== 'SECTION') { -if (node.outerHTML) { -sectionText += node.outerHTML; -} else if (node.nodeType === 3) { -sectionText += node.textContent; -} -node = node.nextSibling; -} else { -return sectionText; -} -} -return sectionText; } /** @@ -47,38 +22,14 @@ * @return {!sections[]} an array of section JSON elements */ function getSectionsText(doc) { -const sections = []; -const sectionElements = doc.querySelectorAll('section'); - -const currentSectionElement = sectionElements[0]; -const currentSection = {}; -currentSection.id = 0; -currentSection.text = currentSectionElement ? currentSectionElement.innerHTML : ''; -sections.push(currentSection); - -for (let i = 1; i < sectionElements.length; i++) { -const currentSection = {}; -const currentSectionElement = sectionElements[i]; -currentSection.id = i; -const childEl = currentSectionElement.firstChild; - -if (childEl && /^H[1-6]$/.test(childEl.tagName)) { -currentSection.text = parseNextSection(childEl.nextSibling); // text starts after H[1-6] -currentSection.toclevel = parseInt(childEl.tagName.charAt(1), 10) - 1; -currentSection.line = childEl.innerHTML.trim(); -currentSection.anchor = childEl.getAttribute('id'); -} - -sections.push(currentSection); +if (!doc.querySelector('section')) { +return parsoidSectionsUsingDivs.getSectionsText(doc); +} else { +return parsoidSecti
[MediaWiki-commits] [Gerrit] mediawiki...mobileapps[master]: Bring back old section handling code
BearND has uploaded a new change for review. ( https://gerrit.wikimedia.org/r/377349 ) Change subject: Bring back old section handling code .. Bring back old section handling code The new section handling code via the tags from parsoid-dom-util caused some issues with some pages that have heading tags wrapped inside tags. This brings back the old behavior. A drawback of this is that the new read-*html endpoints don't use tags anymore; this is until Parsoid implements and deploys T114072 or this code is changed again. Changes: * Moved parsoidSections to parsoidSectionsFromTags. * parsoidSection now delegates to the proper implementation, either using div or section tags. * Brought back parsoidSectionsFromDivs.getSectionsText (from 590ef514 parsoid-access) since that is now needed for mobile-sections. * Added more unit tests. * Renamed addSectionDivs to addSectionTags in parsoidSectionsFromSectionTags Bug: T175305 Change-Id: I1a469995c1aca38ec662c989d023ca64e5a21b02 --- M lib/parsoidSections.js M lib/parsoidSectionsFromDivs.js A lib/parsoidSectionsFromSectionTags.js M test/features/read-html/pagecontent-base.js M test/features/read-html/pagecontent.js M test/lib/parsoid/parsoid-sections-test.js 6 files changed, 151 insertions(+), 63 deletions(-) git pull ssh://gerrit.wikimedia.org:29418/mediawiki/services/mobileapps refs/changes/49/377349/1 diff --git a/lib/parsoidSections.js b/lib/parsoidSections.js index 031a245..0027077 100644 --- a/lib/parsoidSections.js +++ b/lib/parsoidSections.js @@ -1,6 +1,7 @@ 'use strict'; -const parsoidDomUtils = require('parsoid-dom-utils'); +const parsoidSectionsFromDivs = require('./parsoidSectionsFromDivs'); +const parsoidSectionsFromSectionTags = require('./parsoidSectionsFromSectionTags'); /** * New sectioning code: wraps sections in tags. Will likely @@ -8,36 +9,10 @@ * @param {!document} doc the parsed DOM Document of the Parsoid output */ function addSectionDivs(doc) { -// in case this is already handled by Parsoid don't try again +// in case this is already handled by Parsoid (T114072) don't try again if (!doc.querySelector('section')) { -parsoidDomUtils.sections.wrap(doc); +parsoidSectionsFromDivs.addSectionDivs(doc); } -} - -/** - * Parse the next wiki section. A wiki section is contained inside a tag. - * If there is a nested sub section it ends right before that next sub section's - * tag starts. - * @param {!Node} startingNode the DOM node to start parsing - * @return {string} the HTML text of the next wiki section - */ -function parseNextSection(startingNode) { -let sectionText = ''; -let node = startingNode; - -while (node) { -if (node.tagName !== 'SECTION') { -if (node.outerHTML) { -sectionText += node.outerHTML; -} else if (node.nodeType === 3) { -sectionText += node.textContent; -} -node = node.nextSibling; -} else { -return sectionText; -} -} -return sectionText; } /** @@ -47,38 +22,14 @@ * @return {!sections[]} an array of section JSON elements */ function getSectionsText(doc) { -const sections = []; -const sectionElements = doc.querySelectorAll('section'); - -const currentSectionElement = sectionElements[0]; -const currentSection = {}; -currentSection.id = 0; -currentSection.text = currentSectionElement ? currentSectionElement.innerHTML : ''; -sections.push(currentSection); - -for (let i = 1; i < sectionElements.length; i++) { -const currentSection = {}; -const currentSectionElement = sectionElements[i]; -currentSection.id = i; -const childEl = currentSectionElement.firstChild; - -if (childEl && /^H[1-6]$/.test(childEl.tagName)) { -currentSection.text = parseNextSection(childEl.nextSibling); // text starts after H[1-6] -currentSection.toclevel = parseInt(childEl.tagName.charAt(1), 10) - 1; -currentSection.line = childEl.innerHTML.trim(); -currentSection.anchor = childEl.getAttribute('id'); -} - -sections.push(currentSection); +if (!doc.querySelector('section')) { +return parsoidSectionsFromDivs.getSectionsText(doc); +} else { +return parsoidSectionsFromSectionTags.getSectionsText(doc); } - -return sections; } module.exports = { addSectionDivs, -getSectionsText, -testing: { -parseNextSection -} +getSectionsText }; diff --git a/lib/parsoidSectionsFromDivs.js b/lib/parsoidSectionsFromDivs.js index e278ee4..700c716 100644 --- a/lib/parsoidSectionsFromDivs.js +++ b/lib/parsoidSectionsFromDivs.js @@ -30,7 +30,7 @@ } /** - * @deprecated Old sectioning code: wraps wiki sections in elements. + * Old sectioning code: wraps wiki sections in elements. * Just kept for the definitions