Mholloway has uploaded a new change for review. ( 
https://gerrit.wikimedia.org/r/398181 )

Change subject: Update section validation logic, take 2
......................................................................

Update section validation logic, take 2

Updated to use the bunyan Logger class correctly! ;)

1) Log warnings rather than throwing errors, and
2) Only for sections with IDs >= 0 (i.e., not non-editable or pseudo-
   sections).

Bug: T182774
Change-Id: I2b037f5c98edf8c964548c011973924aa6ae14cb
---
M lib/parsoid-access.js
M lib/parsoidSections.js
M lib/parsoidSectionsUsingSectionTags.js
M test/lib/parsoid/parsoid-sections-section-elements-tests.js
4 files changed, 57 insertions(+), 31 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/mediawiki/services/mobileapps 
refs/changes/81/398181/1

diff --git a/lib/parsoid-access.js b/lib/parsoid-access.js
index 338ce33..3ae3308 100644
--- a/lib/parsoid-access.js
+++ b/lib/parsoid-access.js
@@ -119,7 +119,7 @@
             transforms.stripUnneededMarkup(doc, legacy);
             parsoidSections.addSectionDivs(doc);
 
-            page.sections = parsoidSections.getSectionsText(doc);
+            page.sections = parsoidSections.getSectionsText(doc, req.logger);
             return page;
         });
 }
diff --git a/lib/parsoidSections.js b/lib/parsoidSections.js
index dcdbb72..f8e21d6 100644
--- a/lib/parsoidSections.js
+++ b/lib/parsoidSections.js
@@ -27,13 +27,14 @@
  * Gets the sections array, including HTML string and metadata for all sections
  * (id, anchor, line, toclevel).
  * @param {!document} doc the parsed DOM Document of the Parsoid output
+ * @param {!Logger} logger the app's bunyan logger
  * @return {!sections[]} an array of section JSON elements
  */
-function getSectionsText(doc) {
+function getSectionsText(doc, logger) {
     if (!hasParsoidSections(doc)) {
         return parsoidSectionsUsingDivs.getSectionsText(doc);
     } else {
-        return parsoidSectionsUsingSectionTags.getSectionsText(doc);
+        return parsoidSectionsUsingSectionTags.getSectionsText(doc, logger);
     }
 }
 
diff --git a/lib/parsoidSectionsUsingSectionTags.js 
b/lib/parsoidSectionsUsingSectionTags.js
index 76e471d..6fb3b72 100644
--- a/lib/parsoidSectionsUsingSectionTags.js
+++ b/lib/parsoidSectionsUsingSectionTags.js
@@ -1,8 +1,6 @@
 'use strict';
 
 const parsoidDomUtils = require('parsoid-dom-utils');
-// const sUtil = require('./util');
-// const HTTPError = sUtil.HTTPError;
 const NodeType = require('./nodeType');
 
 /**
@@ -39,7 +37,13 @@
     }
 }
 
-/* function throwIfPreviousSectionIsIncomplete(allSections, sectionObj) {
+// skip lead sections since they don't have headings;
+// no line and no anchor means incomplete section
+function shouldLogInvalidSectionWarning(sectionObj) {
+    return sectionObj.id >= 1 && !sectionObj.line && !sectionObj.anchor;
+}
+
+function validatePreviousSection(logger, allSections, sectionObj) {
     if (allSections.length === 0) {
         return;
     }
@@ -47,30 +51,29 @@
     if (!sectionObj) {
         sectionObj = allSections[allSections.length - 1];
     }
-    // skip lead sections since they don't have headings;
-    // no line and no anchor means incomplete section
-    if (allSections.length > 1 && !sectionObj.line && !sectionObj.anchor) {
-        throw new HTTPError({
-            status: 502,
-            type: 'unsupported_section',
-            title: 'Section structure not supported',
+
+    if (shouldLogInvalidSectionWarning(sectionObj)) {
+        logger.log('warn', {
+            warning: 'invalid_section',
+            title: 'Found section without expected heading',
             detail: `Cannot find heading for section number ${sectionObj.id}.`
         });
     }
-} */
+}
 
 /**
  * Visits one DOM node. Do the stuff that needs to be done when a single DOM 
node is handled.
  * In this case, starts a new section object when a <section> tag is 
encountered that is a
  * direct descendant
+ * @param {!Logger} logger the app's bunyan logger
  * @param {!DOMNode} node the node to visit
  * @param {!Object[]} allSections the array containing the results
  * @param {!Object} state some state to pass around
  */
-function visit(node, allSections, state) {
+function visit(logger, node, allSections, state) {
     let sectionObj = allSections.length > 0 ? allSections[allSections.length - 
1] : undefined;
     if (node.tagName === 'SECTION') {
-        // throwIfPreviousSectionIsIncomplete(allSections, sectionObj);
+        validatePreviousSection(logger, allSections, sectionObj);
 
         sectionObj = { id: getSectionNumber(node), text: '' };
         allSections.push(sectionObj);
@@ -99,16 +102,17 @@
  * This started out with a traditional iterative DFS algorithm but added a 
boolean to stop
  * descending so we don't duplicate content since elsewhere outerHTML is used 
(which contains
  * content of sub-nodes).
+ * @param {!Logger} logger the app's bunyan logger
  * @param {!DOMElement} rootElement the root of the DOM tree which needs to be 
traversed
  * @param {!Object[]} allSections holds the results
  */
-function traverseDF(rootElement, allSections) {
+function traverseDF(logger, rootElement, allSections) {
     const state = {};
     let nodesToVisit = [ rootElement ];
 
     while (nodesToVisit.length > 0) {
         const currentNode = nodesToVisit.shift();
-        visit(currentNode, allSections, state);
+        visit(logger, currentNode, allSections, state);
         if (!state.pauseDescent) {
             nodesToVisit = [
                 ...(currentNode.childNodes || []), // depth first
@@ -117,12 +121,12 @@
         }
     }
     ensureLeadSection(allSections);
-    // throwIfPreviousSectionIsIncomplete(allSections);
+    validatePreviousSection(logger, allSections);
 }
 
-function getSectionsText(doc) {
+function getSectionsText(doc, logger) {
     const allSections = [];
-    traverseDF(doc.body, allSections);
+    traverseDF(logger, doc.body, allSections);
     return allSections;
 }
 
@@ -130,6 +134,7 @@
     addSectionTags,
     getSectionsText,
     testing: {
-        parseSections: traverseDF
+        parseSections: traverseDF,
+        shouldLogInvalidSectionWarning
     }
 };
diff --git a/test/lib/parsoid/parsoid-sections-section-elements-tests.js 
b/test/lib/parsoid/parsoid-sections-section-elements-tests.js
index 77cb023..ecfb94c 100644
--- a/test/lib/parsoid/parsoid-sections-section-elements-tests.js
+++ b/test/lib/parsoid/parsoid-sections-section-elements-tests.js
@@ -1,11 +1,13 @@
+/* eslint-disable prefer-template */
+
 'use strict';
 
 const assert = require('../../utils/assert.js');
 const domino = require('domino');
 const parsoidSectionsUsingSectionTags = 
require('../../../lib/parsoidSectionsUsingSectionTags');
+const shouldWarn = 
parsoidSectionsUsingSectionTags.testing.shouldLogInvalidSectionWarning;
 
 describe('lib:parsoid-sections (section elements)', function() {
-    /* eslint-disable prefer-template */
 
     this.timeout(20000); // eslint-disable-line no-invalid-this
 
@@ -119,14 +121,32 @@
         assertSection0(sections, sectionInDiv);
     });
 
-    it.skip('non-lead section without heading tag should throw error', () => {
-        const doc = domino.createDocument(
-            '<section data-mw-section-id="0">text0</section>' +
-            '<section data-mw-section-id="1">text1</section>');
-        assert.throws(() => {
-            parsoidSectionsUsingSectionTags.getSectionsText(doc);
-        }, /unsupported_section/);
+    it('should not warn for page containing only a lead section', () => {
+        const allSections = [ { id: 0 } ];
+        assert.ok(!shouldWarn(allSections[0]));
     });
 
-    /* eslint-enable prefer-template */
+    it('should warn for non-lead section without heading tag', () => {
+        const allSections = [ { id: 0 }, { id: 1 } ];
+        assert.ok(shouldWarn(allSections[1]));
+    });
+
+    it('should not warn if id & anchor are found for all sections after the 
lead section', () => {
+        const allSections = [ { id: 0 }, { id: 1, line: 'Foo', anchor: 'Foo' } 
];
+        assert.ok(!shouldWarn(allSections[1]));
+    });
+
+    it('should not warn for non-lead non-editable section without heading 
tag', () => {
+        const allSections = [ { id: 0 }, { id: -1 } ];
+        assert.ok(!shouldWarn(allSections[1]));
+    });
+
+    it('should not warn if a non-editable section precedes the true lead 
section', () => {
+        const allSections = [ { id: -1 }, { id: 0 }, { id: 1, line: 'Foo', 
anchor: 'Foo' } ];
+        assert.ok(!shouldWarn(allSections[1]));
+    });
+
+    it('should throw if sectionObj is invalid', () => {
+        assert.throws(() => { shouldWarn(undefined); }, /TypeError/);
+    });
 });

-- 
To view, visit https://gerrit.wikimedia.org/r/398181
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: I2b037f5c98edf8c964548c011973924aa6ae14cb
Gerrit-PatchSet: 1
Gerrit-Project: mediawiki/services/mobileapps
Gerrit-Branch: master
Gerrit-Owner: Mholloway <mhollo...@wikimedia.org>

_______________________________________________
MediaWiki-commits mailing list
MediaWiki-commits@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to