BearND has uploaded a new change for review. ( https://gerrit.wikimedia.org/r/404246 )
Change subject: Initial streamlining getting the summary HTML ...................................................................... Initial streamlining getting the summary HTML * Replace pageJsonPromise with getParsoidHtml pageJsonPromise does too many things we don't need: ** parsePronunciation ** parseSpokenWikipedia ** stripUnneededMarkup ** addSectionDivs (only need the first section), maybe tackle later We also don't need them this early because if we return early in the 204 cases all this computation is thrown away. Change-Id: I6a1a31aa5a1e5e608a37b0e8722c1c26b6ea4411 --- M lib/parsoid-access.js M lib/summary.js M routes/summary.js 3 files changed, 22 insertions(+), 11 deletions(-) git pull ssh://gerrit.wikimedia.org:29418/mediawiki/services/mobileapps refs/changes/46/404246/1 diff --git a/lib/parsoid-access.js b/lib/parsoid-access.js index 8633db3..15eeb8a 100644 --- a/lib/parsoid-access.js +++ b/lib/parsoid-access.js @@ -221,6 +221,7 @@ getParsoidHtml, getRevisionFromEtag, getRevAndTidFromEtag, + getModified, // VisibleForTesting _getBaseUri: getBaseUri, diff --git a/lib/summary.js b/lib/summary.js index 59db23b..6e18df5 100644 --- a/lib/summary.js +++ b/lib/summary.js @@ -1,6 +1,8 @@ 'use strict'; const domino = require('domino'); +const parsoid = require('./parsoid-access'); +const parsoidSections = require('./parsoidSections'); const transforms = require('./transforms'); const NS_MAIN = 0; @@ -72,11 +74,13 @@ * Build a page summary * @param {!String} domain the request domain * @param {!Object} title a mediawiki-title object for the page title - * @param {!Object} page page content and metadata from Parsoid + * @param {!String} html page content and metadata from Parsoid + * @param {!Object} revTid revision and tid from Parsoid * @param {!Object} meta metadata from MW API + * @param {!Logger} logger a bunyan logger * @return {!Object} a summary 2.0 spec-compliant page summary object */ -function buildSummary(domain, title, page, meta) { +function buildSummary(domain, title, html, revTid, meta, logger) { const isContentModelWikitext = meta.contentmodel === 'wikitext'; const isWhiteListedNamespace = SUMMARY_NS_WHITELIST.includes(meta.ns); const isRedirect = meta.redirect; @@ -93,8 +97,12 @@ return { code: 204 }; } - const leadText = domino.createDocument(page.sections[0].text); - const intro = transforms.extractLeadIntroduction(leadText); + const doc = domino.createDocument(html); + parsoidSections.addSectionDivs(doc); + const sections = parsoidSections.getSectionsText(doc, logger); + + const leadSectionDoc = domino.createDocument(sections[0].text); + const intro = transforms.extractLeadIntroduction(leadSectionDoc); const summary = intro.length ? transforms.summarize(intro) : { extract: '', extract_html: '' }; return Object.assign({ @@ -109,9 +117,9 @@ originalimage: meta.originalimage, lang: meta.lang, dir: meta.dir, - revision: page.revision, - tid: page.tid, - timestamp: page.lastmodified, + revision: revTid.revision, + tid: revTid.tid, + timestamp: parsoid.getModified(doc), description: meta.description, coordinates: meta.geo && { lat: meta.geo.latitude, diff --git a/routes/summary.js b/routes/summary.js index 21ad57b..07577e3 100644 --- a/routes/summary.js +++ b/routes/summary.js @@ -19,22 +19,24 @@ let app; /** - * GET {domain}/v1/page/summary/{title}/{revision?}/{tid?} + * GET {domain}/v1/page/summary/{title}{/revision?}{/tid?} * Extracts a summary of a given wiki page limited to one paragraph of text */ router.get('/summary/:title/:revision?/:tid?', (req, res) => { return BBPromise.props({ - page: parsoid.pageJsonPromise(app, req, false), + html: parsoid.getParsoidHtml(app, req), meta: mwapi.getMetadata(app, req), title: mwapi.getTitleObj(app, req), siteinfo: mwapi.getSiteInfo(app, req) }).then((response) => { + const revTid = parsoid.getRevAndTidFromEtag(response.html.headers); const title = Title.newFromText(req.params.title, response.siteinfo); - const summary = lib.buildSummary(req.params.domain, title, response.page, response.meta); + const summary = lib.buildSummary(req.params.domain, title, + response.html.body, revTid, response.meta, req.logger); res.status(summary.code); if (summary.code === 200) { delete summary.code; - mUtil.setETag(res, summary.revision, summary.tid); + mUtil.setETag(res, revTid.revision, revTid.tid); mUtil.setContentType(res, mUtil.CONTENT_TYPES.summary); res.send(summary); } -- To view, visit https://gerrit.wikimedia.org/r/404246 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: newchange Gerrit-Change-Id: I6a1a31aa5a1e5e608a37b0e8722c1c26b6ea4411 Gerrit-PatchSet: 1 Gerrit-Project: mediawiki/services/mobileapps Gerrit-Branch: master Gerrit-Owner: BearND <bsitzm...@wikimedia.org> _______________________________________________ MediaWiki-commits mailing list MediaWiki-commits@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits