jenkins-bot has submitted this change and it was merged. ( 
https://gerrit.wikimedia.org/r/404247 )

Change subject: summary: just parse the lead section
......................................................................


summary: just parse the lead section

No need to try to parse other sections for the summary endpoint.

Change-Id: I02828100fd1a81c94e1a842c3264caba51c634ed
---
M lib/parsoidSections.js
M lib/parsoidSectionsUsingDivs.js
M lib/parsoidSectionsUsingSectionTags.js
M lib/summary.js
M routes/summary.js
M test/lib/parsoid/parsoid-sections-div-element-test.js
M test/lib/parsoid/parsoid-sections-section-elements-tests.js
7 files changed, 69 insertions(+), 16 deletions(-)

Approvals:
  jenkins-bot: Verified
  Mholloway: Looks good to me, approved



diff --git a/lib/parsoidSections.js b/lib/parsoidSections.js
index f8e21d6..f785685 100644
--- a/lib/parsoidSections.js
+++ b/lib/parsoidSections.js
@@ -38,8 +38,23 @@
     }
 }
 
+/**
+ * Reduces the input Parsoid document to a new Document containing
+ * just the first section.
+ * @param {!document} doc the parsed DOM Document of the Parsoid output
+ * @return {!Document} a new DOM Document containing only the lead section
+ */
+function justLeadSection(doc) {
+    if (!hasParsoidSections(doc)) {
+        return parsoidSectionsUsingDivs.justLeadSection(doc);
+    } else {
+        return parsoidSectionsUsingSectionTags.justLeadSection(doc);
+    }
+}
+
 module.exports = {
     hasParsoidSections,
     addSectionDivs,
-    getSectionsText
+    getSectionsText,
+    justLeadSection
 };
diff --git a/lib/parsoidSectionsUsingDivs.js b/lib/parsoidSectionsUsingDivs.js
index a5886dd..5d11837 100644
--- a/lib/parsoidSectionsUsingDivs.js
+++ b/lib/parsoidSectionsUsingDivs.js
@@ -1,11 +1,11 @@
 'use strict';
 
 /*
-  This is an old way of doing sectioning, which is preserved only for the 
definitions endpoint.
-  TODO: We should move the definition parsing implementation to the new way
-  (parsoidSectionsUsingSectionTags.js) soon since when Parsoid deploys adding 
<section> tags the
-  definitions code may be negatively affected by that.
+  This is an old way of doing sectioning, which is preserved only just in case
+  we encounter a payload from an older Parsoid version.
 */
+
+const domino = require('domino');
 
 function parseNextSection(sectionDiv, startingNode) {
     let nextNode;
@@ -96,7 +96,18 @@
     return sections;
 }
 
+/**
+ * @param {!document} doc the parsed DOM Document of the Parsoid output
+ * @return {!Document} a new DOM Document containing only the lead section
+ */
+function justLeadSection(doc) {
+    const sectionDiv = doc.createElement('div');
+    parseNextSection(sectionDiv, doc.body.firstChild);
+    return domino.createDocument(sectionDiv.innerHTML);
+}
+
 module.exports = {
     addSectionDivs,
-    getSectionsText
+    getSectionsText,
+    justLeadSection
 };
diff --git a/lib/parsoidSectionsUsingSectionTags.js 
b/lib/parsoidSectionsUsingSectionTags.js
index a30d537..aab9cd3 100644
--- a/lib/parsoidSectionsUsingSectionTags.js
+++ b/lib/parsoidSectionsUsingSectionTags.js
@@ -1,5 +1,6 @@
 'use strict';
 
+const domino = require('domino');
 const parsoidDomUtils = require('parsoid-dom-utils');
 const NodeType = require('./nodeType');
 
@@ -18,7 +19,7 @@
 
 /**
  * Gets the section number from Parsoid.
- * @param {!DOMElement} sectionElement a <section> DOM element
+ * @param {!Element} sectionElement a <section> DOM element
  * @return {int} the section number as reported by Parsoid
  */
 function getSectionNumber(sectionElement) {
@@ -66,7 +67,7 @@
  * In this case, starts a new section object when a <section> tag is 
encountered that is a
  * direct descendant
  * @param {!Logger} logger the app's bunyan logger
- * @param {!DOMNode} node the node to visit
+ * @param {!Node} node the node to visit
  * @param {!Object[]} allSections the array containing the results
  * @param {!Object} state some state to pass around
  */
@@ -103,7 +104,7 @@
  * descending so we don't duplicate content since elsewhere outerHTML is used 
(which contains
  * content of sub-nodes).
  * @param {!Logger} logger the app's bunyan logger
- * @param {!DOMElement} rootElement the root of the DOM tree which needs to be 
traversed
+ * @param {!Element} rootElement the root of the DOM tree which needs to be 
traversed
  * @param {!Object[]} allSections holds the results
  */
 function traverseDF(logger, rootElement, allSections) {
@@ -130,9 +131,19 @@
     return allSections;
 }
 
+/**
+ * @param {!document} doc the parsed DOM Document of the Parsoid output
+ * @return {!Document} a new DOM Document containing only the lead section
+ */
+function justLeadSection(doc) {
+    const currentSectionElement = 
doc.querySelector('section[data-mw-section-id]');
+    return domino.createDocument(currentSectionElement.innerHTML);
+}
+
 module.exports = {
     addSectionTags,
     getSectionsText,
+    justLeadSection,
     testing: {
         parseSections: traverseDF,
         shouldLogInvalidSectionWarning,
diff --git a/lib/summary.js b/lib/summary.js
index 6e18df5..37e5450 100644
--- a/lib/summary.js
+++ b/lib/summary.js
@@ -77,10 +77,9 @@
  * @param {!String} html page content and metadata from Parsoid
  * @param {!Object} revTid revision and tid from Parsoid
  * @param {!Object} meta metadata from MW API
- * @param {!Logger} logger a bunyan logger
  * @return {!Object} a summary 2.0 spec-compliant page summary object
  */
-function buildSummary(domain, title, html, revTid, meta, logger) {
+function buildSummary(domain, title, html, revTid, meta) {
     const isContentModelWikitext = meta.contentmodel === 'wikitext';
     const isWhiteListedNamespace = SUMMARY_NS_WHITELIST.includes(meta.ns);
     const isRedirect = meta.redirect;
@@ -98,10 +97,7 @@
     }
 
     const doc = domino.createDocument(html);
-    parsoidSections.addSectionDivs(doc);
-    const sections = parsoidSections.getSectionsText(doc, logger);
-
-    const leadSectionDoc = domino.createDocument(sections[0].text);
+    const leadSectionDoc = parsoidSections.justLeadSection(doc);
     const intro = transforms.extractLeadIntroduction(leadSectionDoc);
     const summary = intro.length ? transforms.summarize(intro) : { extract: 
'', extract_html: '' };
 
diff --git a/routes/summary.js b/routes/summary.js
index 07577e3..9bd89af 100644
--- a/routes/summary.js
+++ b/routes/summary.js
@@ -32,7 +32,7 @@
         const revTid = parsoid.getRevAndTidFromEtag(response.html.headers);
         const title = Title.newFromText(req.params.title, response.siteinfo);
         const summary = lib.buildSummary(req.params.domain, title,
-            response.html.body, revTid, response.meta, req.logger);
+            response.html.body, revTid, response.meta);
         res.status(summary.code);
         if (summary.code === 200) {
             delete summary.code;
diff --git a/test/lib/parsoid/parsoid-sections-div-element-test.js 
b/test/lib/parsoid/parsoid-sections-div-element-test.js
index 04622b9..df407ba 100644
--- a/test/lib/parsoid/parsoid-sections-div-element-test.js
+++ b/test/lib/parsoid/parsoid-sections-div-element-test.js
@@ -92,4 +92,14 @@
         assertSection0(sections);
         assertSection1(sections, extraText);
     });
+
+    describe('justLeadSection', () => {
+        it('should just return the first section', () => {
+            const doc = domino.createDocument('<body><p>text0</p>' +
+                '<h2 id="foo">foo</h2>text1' +
+                '</body>');
+            const leadSectionDoc = parsoid.justLeadSection(doc);
+            assert.deepEqual(leadSectionDoc.body.innerHTML, '<p>text0</p>');
+        });
+    });
 });
diff --git a/test/lib/parsoid/parsoid-sections-section-elements-tests.js 
b/test/lib/parsoid/parsoid-sections-section-elements-tests.js
index 36d8644..c05eb79 100644
--- a/test/lib/parsoid/parsoid-sections-section-elements-tests.js
+++ b/test/lib/parsoid/parsoid-sections-section-elements-tests.js
@@ -167,4 +167,14 @@
                 detail: 'Cannot find heading for section number 1.'
             }]]);
     });
+
+    describe('justLeadSection', () => {
+        it('should just return the first section', () => {
+            const doc = domino.createDocument(
+                '<section data-mw-section-id="0"><p>text0</p></section>' +
+                '<section data-mw-section-id="1"><h2 
id="foo">foo</h2>text1</section>');
+            const leadSectionDoc = 
parsoidSectionsUsingSectionTags.justLeadSection(doc);
+            assert.deepEqual(leadSectionDoc.body.innerHTML, '<p>text0</p>');
+        });
+    });
 });

-- 
To view, visit https://gerrit.wikimedia.org/r/404247
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: merged
Gerrit-Change-Id: I02828100fd1a81c94e1a842c3264caba51c634ed
Gerrit-PatchSet: 1
Gerrit-Project: mediawiki/services/mobileapps
Gerrit-Branch: master
Gerrit-Owner: BearND <bsitzm...@wikimedia.org>
Gerrit-Reviewer: Fjalapeno <cfl...@wikimedia.org>
Gerrit-Reviewer: Jdlrobson <jrob...@wikimedia.org>
Gerrit-Reviewer: Mholloway <mhollo...@wikimedia.org>
Gerrit-Reviewer: Mhurd <mh...@wikimedia.org>
Gerrit-Reviewer: Ppchelko <ppche...@wikimedia.org>
Gerrit-Reviewer: jenkins-bot <>

_______________________________________________
MediaWiki-commits mailing list
MediaWiki-commits@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to