Mholloway has uploaded a new change for review. ( https://gerrit.wikimedia.org/r/398181 )
Change subject: Update section validation logic, take 2 ...................................................................... Update section validation logic, take 2 Updated to use the bunyan Logger class correctly! ;) 1) Log warnings rather than throwing errors, and 2) Only for sections with IDs >= 0 (i.e., not non-editable or pseudo- sections). Bug: T182774 Change-Id: I2b037f5c98edf8c964548c011973924aa6ae14cb --- M lib/parsoid-access.js M lib/parsoidSections.js M lib/parsoidSectionsUsingSectionTags.js M test/lib/parsoid/parsoid-sections-section-elements-tests.js 4 files changed, 57 insertions(+), 31 deletions(-) git pull ssh://gerrit.wikimedia.org:29418/mediawiki/services/mobileapps refs/changes/81/398181/1 diff --git a/lib/parsoid-access.js b/lib/parsoid-access.js index 338ce33..3ae3308 100644 --- a/lib/parsoid-access.js +++ b/lib/parsoid-access.js @@ -119,7 +119,7 @@ transforms.stripUnneededMarkup(doc, legacy); parsoidSections.addSectionDivs(doc); - page.sections = parsoidSections.getSectionsText(doc); + page.sections = parsoidSections.getSectionsText(doc, req.logger); return page; }); } diff --git a/lib/parsoidSections.js b/lib/parsoidSections.js index dcdbb72..f8e21d6 100644 --- a/lib/parsoidSections.js +++ b/lib/parsoidSections.js @@ -27,13 +27,14 @@ * Gets the sections array, including HTML string and metadata for all sections * (id, anchor, line, toclevel). * @param {!document} doc the parsed DOM Document of the Parsoid output + * @param {!Logger} logger the app's bunyan logger * @return {!sections[]} an array of section JSON elements */ -function getSectionsText(doc) { +function getSectionsText(doc, logger) { if (!hasParsoidSections(doc)) { return parsoidSectionsUsingDivs.getSectionsText(doc); } else { - return parsoidSectionsUsingSectionTags.getSectionsText(doc); + return parsoidSectionsUsingSectionTags.getSectionsText(doc, logger); } } diff --git a/lib/parsoidSectionsUsingSectionTags.js b/lib/parsoidSectionsUsingSectionTags.js index 76e471d..6fb3b72 100644 --- a/lib/parsoidSectionsUsingSectionTags.js +++ b/lib/parsoidSectionsUsingSectionTags.js @@ -1,8 +1,6 @@ 'use strict'; const parsoidDomUtils = require('parsoid-dom-utils'); -// const sUtil = require('./util'); -// const HTTPError = sUtil.HTTPError; const NodeType = require('./nodeType'); /** @@ -39,7 +37,13 @@ } } -/* function throwIfPreviousSectionIsIncomplete(allSections, sectionObj) { +// skip lead sections since they don't have headings; +// no line and no anchor means incomplete section +function shouldLogInvalidSectionWarning(sectionObj) { + return sectionObj.id >= 1 && !sectionObj.line && !sectionObj.anchor; +} + +function validatePreviousSection(logger, allSections, sectionObj) { if (allSections.length === 0) { return; } @@ -47,30 +51,29 @@ if (!sectionObj) { sectionObj = allSections[allSections.length - 1]; } - // skip lead sections since they don't have headings; - // no line and no anchor means incomplete section - if (allSections.length > 1 && !sectionObj.line && !sectionObj.anchor) { - throw new HTTPError({ - status: 502, - type: 'unsupported_section', - title: 'Section structure not supported', + + if (shouldLogInvalidSectionWarning(sectionObj)) { + logger.log('warn', { + warning: 'invalid_section', + title: 'Found section without expected heading', detail: `Cannot find heading for section number ${sectionObj.id}.` }); } -} */ +} /** * Visits one DOM node. Do the stuff that needs to be done when a single DOM node is handled. * In this case, starts a new section object when a <section> tag is encountered that is a * direct descendant + * @param {!Logger} logger the app's bunyan logger * @param {!DOMNode} node the node to visit * @param {!Object[]} allSections the array containing the results * @param {!Object} state some state to pass around */ -function visit(node, allSections, state) { +function visit(logger, node, allSections, state) { let sectionObj = allSections.length > 0 ? allSections[allSections.length - 1] : undefined; if (node.tagName === 'SECTION') { - // throwIfPreviousSectionIsIncomplete(allSections, sectionObj); + validatePreviousSection(logger, allSections, sectionObj); sectionObj = { id: getSectionNumber(node), text: '' }; allSections.push(sectionObj); @@ -99,16 +102,17 @@ * This started out with a traditional iterative DFS algorithm but added a boolean to stop * descending so we don't duplicate content since elsewhere outerHTML is used (which contains * content of sub-nodes). + * @param {!Logger} logger the app's bunyan logger * @param {!DOMElement} rootElement the root of the DOM tree which needs to be traversed * @param {!Object[]} allSections holds the results */ -function traverseDF(rootElement, allSections) { +function traverseDF(logger, rootElement, allSections) { const state = {}; let nodesToVisit = [ rootElement ]; while (nodesToVisit.length > 0) { const currentNode = nodesToVisit.shift(); - visit(currentNode, allSections, state); + visit(logger, currentNode, allSections, state); if (!state.pauseDescent) { nodesToVisit = [ ...(currentNode.childNodes || []), // depth first @@ -117,12 +121,12 @@ } } ensureLeadSection(allSections); - // throwIfPreviousSectionIsIncomplete(allSections); + validatePreviousSection(logger, allSections); } -function getSectionsText(doc) { +function getSectionsText(doc, logger) { const allSections = []; - traverseDF(doc.body, allSections); + traverseDF(logger, doc.body, allSections); return allSections; } @@ -130,6 +134,7 @@ addSectionTags, getSectionsText, testing: { - parseSections: traverseDF + parseSections: traverseDF, + shouldLogInvalidSectionWarning } }; diff --git a/test/lib/parsoid/parsoid-sections-section-elements-tests.js b/test/lib/parsoid/parsoid-sections-section-elements-tests.js index 77cb023..ecfb94c 100644 --- a/test/lib/parsoid/parsoid-sections-section-elements-tests.js +++ b/test/lib/parsoid/parsoid-sections-section-elements-tests.js @@ -1,11 +1,13 @@ +/* eslint-disable prefer-template */ + 'use strict'; const assert = require('../../utils/assert.js'); const domino = require('domino'); const parsoidSectionsUsingSectionTags = require('../../../lib/parsoidSectionsUsingSectionTags'); +const shouldWarn = parsoidSectionsUsingSectionTags.testing.shouldLogInvalidSectionWarning; describe('lib:parsoid-sections (section elements)', function() { - /* eslint-disable prefer-template */ this.timeout(20000); // eslint-disable-line no-invalid-this @@ -119,14 +121,32 @@ assertSection0(sections, sectionInDiv); }); - it.skip('non-lead section without heading tag should throw error', () => { - const doc = domino.createDocument( - '<section data-mw-section-id="0">text0</section>' + - '<section data-mw-section-id="1">text1</section>'); - assert.throws(() => { - parsoidSectionsUsingSectionTags.getSectionsText(doc); - }, /unsupported_section/); + it('should not warn for page containing only a lead section', () => { + const allSections = [ { id: 0 } ]; + assert.ok(!shouldWarn(allSections[0])); }); - /* eslint-enable prefer-template */ + it('should warn for non-lead section without heading tag', () => { + const allSections = [ { id: 0 }, { id: 1 } ]; + assert.ok(shouldWarn(allSections[1])); + }); + + it('should not warn if id & anchor are found for all sections after the lead section', () => { + const allSections = [ { id: 0 }, { id: 1, line: 'Foo', anchor: 'Foo' } ]; + assert.ok(!shouldWarn(allSections[1])); + }); + + it('should not warn for non-lead non-editable section without heading tag', () => { + const allSections = [ { id: 0 }, { id: -1 } ]; + assert.ok(!shouldWarn(allSections[1])); + }); + + it('should not warn if a non-editable section precedes the true lead section', () => { + const allSections = [ { id: -1 }, { id: 0 }, { id: 1, line: 'Foo', anchor: 'Foo' } ]; + assert.ok(!shouldWarn(allSections[1])); + }); + + it('should throw if sectionObj is invalid', () => { + assert.throws(() => { shouldWarn(undefined); }, /TypeError/); + }); }); -- To view, visit https://gerrit.wikimedia.org/r/398181 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: newchange Gerrit-Change-Id: I2b037f5c98edf8c964548c011973924aa6ae14cb Gerrit-PatchSet: 1 Gerrit-Project: mediawiki/services/mobileapps Gerrit-Branch: master Gerrit-Owner: Mholloway <mhollo...@wikimedia.org> _______________________________________________ MediaWiki-commits mailing list MediaWiki-commits@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits