BearND has uploaded a new change for review. ( 
https://gerrit.wikimedia.org/r/404246 )

Change subject: Initial streamlining getting the summary HTML
......................................................................

Initial streamlining getting the summary HTML

* Replace pageJsonPromise with getParsoidHtml
pageJsonPromise does too many things we don't need:
** parsePronunciation
** parseSpokenWikipedia
** stripUnneededMarkup
** addSectionDivs (only need the first section), maybe tackle later

We also don't need them this early because if we return early in the 204
cases all this computation is thrown away.

Change-Id: I6a1a31aa5a1e5e608a37b0e8722c1c26b6ea4411
---
M lib/parsoid-access.js
M lib/summary.js
M routes/summary.js
3 files changed, 22 insertions(+), 11 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/mediawiki/services/mobileapps 
refs/changes/46/404246/1

diff --git a/lib/parsoid-access.js b/lib/parsoid-access.js
index 8633db3..15eeb8a 100644
--- a/lib/parsoid-access.js
+++ b/lib/parsoid-access.js
@@ -221,6 +221,7 @@
     getParsoidHtml,
     getRevisionFromEtag,
     getRevAndTidFromEtag,
+    getModified,
 
     // VisibleForTesting
     _getBaseUri: getBaseUri,
diff --git a/lib/summary.js b/lib/summary.js
index 59db23b..6e18df5 100644
--- a/lib/summary.js
+++ b/lib/summary.js
@@ -1,6 +1,8 @@
 'use strict';
 
 const domino = require('domino');
+const parsoid = require('./parsoid-access');
+const parsoidSections = require('./parsoidSections');
 const transforms = require('./transforms');
 
 const NS_MAIN = 0;
@@ -72,11 +74,13 @@
  * Build a page summary
  * @param {!String} domain the request domain
  * @param {!Object} title a mediawiki-title object for the page title
- * @param {!Object} page page content and metadata from Parsoid
+ * @param {!String} html page content and metadata from Parsoid
+ * @param {!Object} revTid revision and tid from Parsoid
  * @param {!Object} meta metadata from MW API
+ * @param {!Logger} logger a bunyan logger
  * @return {!Object} a summary 2.0 spec-compliant page summary object
  */
-function buildSummary(domain, title, page, meta) {
+function buildSummary(domain, title, html, revTid, meta, logger) {
     const isContentModelWikitext = meta.contentmodel === 'wikitext';
     const isWhiteListedNamespace = SUMMARY_NS_WHITELIST.includes(meta.ns);
     const isRedirect = meta.redirect;
@@ -93,8 +97,12 @@
         return { code: 204 };
     }
 
-    const leadText = domino.createDocument(page.sections[0].text);
-    const intro = transforms.extractLeadIntroduction(leadText);
+    const doc = domino.createDocument(html);
+    parsoidSections.addSectionDivs(doc);
+    const sections = parsoidSections.getSectionsText(doc, logger);
+
+    const leadSectionDoc = domino.createDocument(sections[0].text);
+    const intro = transforms.extractLeadIntroduction(leadSectionDoc);
     const summary = intro.length ? transforms.summarize(intro) : { extract: 
'', extract_html: '' };
 
     return Object.assign({
@@ -109,9 +117,9 @@
         originalimage: meta.originalimage,
         lang: meta.lang,
         dir: meta.dir,
-        revision: page.revision,
-        tid: page.tid,
-        timestamp: page.lastmodified,
+        revision: revTid.revision,
+        tid: revTid.tid,
+        timestamp: parsoid.getModified(doc),
         description: meta.description,
         coordinates: meta.geo && {
             lat: meta.geo.latitude,
diff --git a/routes/summary.js b/routes/summary.js
index 21ad57b..07577e3 100644
--- a/routes/summary.js
+++ b/routes/summary.js
@@ -19,22 +19,24 @@
 let app;
 
 /**
- * GET {domain}/v1/page/summary/{title}/{revision?}/{tid?}
+ * GET {domain}/v1/page/summary/{title}{/revision?}{/tid?}
  * Extracts a summary of a given wiki page limited to one paragraph of text
  */
 router.get('/summary/:title/:revision?/:tid?', (req, res) => {
     return BBPromise.props({
-        page: parsoid.pageJsonPromise(app, req, false),
+        html: parsoid.getParsoidHtml(app, req),
         meta: mwapi.getMetadata(app, req),
         title: mwapi.getTitleObj(app, req),
         siteinfo: mwapi.getSiteInfo(app, req)
     }).then((response) => {
+        const revTid = parsoid.getRevAndTidFromEtag(response.html.headers);
         const title = Title.newFromText(req.params.title, response.siteinfo);
-        const summary = lib.buildSummary(req.params.domain, title, 
response.page, response.meta);
+        const summary = lib.buildSummary(req.params.domain, title,
+            response.html.body, revTid, response.meta, req.logger);
         res.status(summary.code);
         if (summary.code === 200) {
             delete summary.code;
-            mUtil.setETag(res, summary.revision, summary.tid);
+            mUtil.setETag(res, revTid.revision, revTid.tid);
             mUtil.setContentType(res, mUtil.CONTENT_TYPES.summary);
             res.send(summary);
         }

-- 
To view, visit https://gerrit.wikimedia.org/r/404246
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: I6a1a31aa5a1e5e608a37b0e8722c1c26b6ea4411
Gerrit-PatchSet: 1
Gerrit-Project: mediawiki/services/mobileapps
Gerrit-Branch: master
Gerrit-Owner: BearND <bsitzm...@wikimedia.org>

_______________________________________________
MediaWiki-commits mailing list
MediaWiki-commits@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to