jenkins-bot has submitted this change and it was merged. ( https://gerrit.wikimedia.org/r/330836 )
Change subject: New: add last 5 days of pageviews to most-read response ...................................................................... New: add last 5 days of pageviews to most-read response Bug: T148445 Change-Id: Ie450eb77b855fc2ecb18aebccaca0c833499f326 --- M lib/dateUtil.js M lib/feed/most-read.js A lib/pageviews.js M spec.yaml M test/features/most-read/most-read.js M test/lib/dateUtil/date-util-test.js 6 files changed, 235 insertions(+), 35 deletions(-) Approvals: BearND: Looks good to me, approved jenkins-bot: Verified diff --git a/lib/dateUtil.js b/lib/dateUtil.js index 6c19363..a137f4b 100644 --- a/lib/dateUtil.js +++ b/lib/dateUtil.js @@ -51,11 +51,29 @@ return `${req.params.yyyy}-${req.params.mm}-${req.params.dd}Z`; }; +/** + * @param {!string} str e.g., 2016122800 + * @return {!string} e.g., 2016-12-28Z + */ +dateUtil.iso8601DateFromYYYYMMDD = function(str) { + return str.replace(/(\d{4})(\d{2})(\d{2}).*/g, '$1-$2-$3Z'); +}; + dateUtil.pad = function(number) { if (number < 10) { return `0${number}`; } return number; +}; + +/** + @param {!Date} date + @param {!number} days + */ +dateUtil.addDays = (date, days) => { + const copy = new Date(date); + copy.setDate(copy.getDate() + days); + return copy; }; /** @@ -73,6 +91,14 @@ }-${dateUtil.pad(date.getUTCDate())}`; }; +/** + * @param {!Date} date + * @return {!String} e.g., 20160516 + */ +dateUtil.formatYYYYMMDD = (date) => { + return dateUtil.formatISODate(date).replace(/-/g, ''); +}; + // Validate the input date by checking whether UTC-format constructed from the // input components matches the values actually provided. dateUtil.isValidDate = function(dateString, year, month, day) { diff --git a/lib/feed/most-read.js b/lib/feed/most-read.js index 37cbd3d..c8de0e3 100644 --- a/lib/feed/most-read.js +++ b/lib/feed/most-read.js @@ -6,11 +6,21 @@ const BBPromise = require('bluebird'); const mUtil = require('../mobile-util'); -const api = require('../api-util'); const mwapi = require('../mwapi'); const filter = require('./most-read-filter'); const dateUtil = require('../dateUtil'); +const pageviews = require('../pageviews'); +/** + * @public {!string} date ISO 8601 timestamp of pageviews recorded + * @public {!number} views Integer pageviews on date + */ +class DatedPageviews { + constructor(date, views) { + this.date = date; + this.views = views; + } +} /** * Construct a list of title strings from the array of good article objects. @@ -23,29 +33,24 @@ return result; } -function getTopPageviews(app, req, aggregated) { - const yesterday = new Date(dateUtil.getRequestedDate(req) - dateUtil.ONE_DAY); - const year = aggregated ? yesterday.getUTCFullYear() : req.params.yyyy; - const month = aggregated ? dateUtil.pad(yesterday.getUTCMonth() + 1) : req.params.mm; - const day = aggregated ? dateUtil.pad(yesterday.getUTCDate()) : req.params.dd; +function getTopPageviews(app, req, domain, date) { const apiDomain = 'wikimedia.org'; - const targetDomain = mUtil.removeTLD(mUtil.mobileToCanonical(req.params.domain)); - const basePath = 'metrics/pageviews/top/%proj/%platform/%y/%m/%d' - .replace('%proj', targetDomain) - .replace('%y', year) - .replace('%m', month) - .replace('%d', day); - - const desktopPath = basePath.replace('%platform', 'desktop'); - const combinedPath = basePath.replace('%platform', 'all-access'); - const restReq = { headers: { accept: 'application/json; charset=utf-8' } }; + const client = new pageviews.Client(app, apiDomain, restReq); + // todo: remove manual bot filtering when a user agent parameter is available for top as in the + // per-article endpoint return BBPromise.props({ - desktop: api.restApiGet(app, apiDomain, desktopPath, restReq), - combined: api.restApiGet(app, apiDomain, combinedPath, restReq) + desktop: client.reqTop(domain, pageviews.Platform.DESKTOP_WEB, date), + combined: client.reqTop(domain, pageviews.Platform.ALL, date) + }); +} + +function pageviewsPageRspToDatedPageviews(rsp) { + return rsp.body.items.map((item) => { + return new DatedPageviews(dateUtil.iso8601DateFromYYYYMMDD(item.timestamp), item.views); }); } @@ -61,7 +66,12 @@ dateUtil.throwDateError(); } - return getTopPageviews(app, req, aggregated) + const targetDomain = mUtil.removeTLD(mUtil.mobileToCanonical(req.params.domain)); + + const reqDate = dateUtil.getRequestedDate(req); + const date = aggregated ? dateUtil.addDays(reqDate, -1) : reqDate; + + return getTopPageviews(app, req, targetDomain, date) .then((response) => { // We're working mainly with the overall list of top pageviews, and cut // this off at 50 (the max that can be sent in a single MW API query). @@ -83,8 +93,8 @@ const combinedArticlesSlice = combinedArticles && combinedArticles.slice(0, QUERY_TITLES); const desktopArticles = firstDesktopItems && firstDesktopItems.articles; const desktopArticlesSlice = desktopArticles && desktopArticles.slice(0, DESKTOP_TITLES); - goodTitles = filter.filterBotTraffic(combinedArticlesSlice, desktopArticlesSlice); + goodTitles = filter.filterBotTraffic(combinedArticlesSlice, desktopArticlesSlice); if (mUtil.isEmpty(goodTitles)) { mUtil.throw404('No results found.'); } @@ -111,9 +121,23 @@ mUtil.fillInMemberKeys(goodTitles, ['title', 'article']); mUtil.mergeByProp(goodTitles, pages, 'title', true); - goodTitles = filter.filterSpecialPages(goodTitles, mainPageTitle); - const results = goodTitles.map((entry) => { + goodTitles = filter.filterSpecialPages(goodTitles, mainPageTitle); + if (mUtil.isEmpty(goodTitles)) { + mUtil.throw404('No results found.'); + } + + const apiDomain = 'wikimedia.org'; + const restReq = { + headers: { accept: 'application/json; charset=utf-8' } + }; + const client = new pageviews.Client(app, apiDomain, restReq); + + const targetDomain = mUtil.removeTLD(mUtil.mobileToCanonical(req.params.domain)); + const start = dateUtil.addDays(new Date(resultsDate), -4); + const end = new Date(resultsDate); + + const articles = goodTitles.map((entry) => { return Object.assign(entry, { $merge: [ mUtil.getRbPageSummaryUrl(app.restbase_tpl, req.params.domain, entry.article) @@ -123,22 +147,25 @@ ns: undefined, terms: undefined, title: undefined, - revisions: undefined + revisions: undefined, + view_history: client.reqPage(targetDomain, pageviews.Platform.ALL, + pageviews.Agent.USER, entry.article, pageviews.Granularity.DAILY, start, end) + .then(pageviewsPageRspToDatedPageviews) }); }); - if (results.length) { + return BBPromise.all(articles.map((article) => BBPromise.props(article))) + .then((response) => { return { payload: { date: resultsDate, - articles: results + articles: response }, meta: { revision: dateUtil.dateStringFrom(req) } }; - } - mUtil.throw404('No results found.'); + }); }).catch((err) => { // Catch and handle the error if this is an aggregated request and the // pageview data are not yet loaded. diff --git a/lib/pageviews.js b/lib/pageviews.js new file mode 100644 index 0000000..5750d04 --- /dev/null +++ b/lib/pageviews.js @@ -0,0 +1,78 @@ +'use strict'; + +const api = require('./api-util'); +const dateUtil = require('./dateUtil'); +const urlencode = require('locutus/php/url/urlencode'); + +module.exports.Platform = { + ALL: 'all-access', + DESKTOP_WEB: 'desktop', + MOBILE_WEB: 'mobile-web', + MOBILE_APP: 'mobile-app' +}; + +module.exports.Agent = { + ALL: 'all-agents', + USER: 'user', + SPIDER: 'spider', + BOT: 'bot' +}; + +module.exports.Granularity = { + DAILY: 'daily' +}; + +/** + * @private {!Object} app + * @private {!string} apiDomain RESTBase API domain; e.g., wikimedia.org + * @private {!Object} req + */ +module.exports.Client = class { + /** + * @param {!Object} app + * @param {!string} apiDomain RESTBase API domain; e.g., wikimedia.org + * @param {!Object} req + */ + constructor(app, apiDomain, req) { + this.app = app; + this.apiDomain = apiDomain; + this.req = req; + } + + /** + * @param {!string} domain Top level project domain to filter; e.g., de.wikipedia + * @param {!Platform} platform Device platform to filter + * @param {!Agent} agent User agent to filter + * @param {!string} title Normalized article title + * @param {!Granularity} granularity Result time unit + * @param {!Date} start Inclusive start date + * @param {!Date} end Inclusive end date + * @return {!Promise} Daily pageviews on domain from platform for title from [start, end] + */ + reqPage(domain, platform, agent, title, granularity, start, end) { + const titleEncoded = urlencode(title); + const startStr = dateUtil.formatYYYYMMDD(start); + const endStr = dateUtil.formatYYYYMMDD(end); + // eslint-disable-next-line max-len + const path = `metrics/pageviews/per-article/${domain}/${platform}/${agent}/${titleEncoded}/${granularity}/${startStr}/${endStr}`; + return api + .restApiGet(this.app, this.apiDomain, path, this.req) + .then(api.checkResponseStatus); + } + + /** + * @param {!string} domain Top level project domain to filter; e.g., de.wikipedia + * @param {!Platform} platform Device platform to filter + * @param {!Date} date + * @return {!Promise} Top pageviews on domain from platform for date + */ + reqTop(domain, platform, date) { + const year = date.getUTCFullYear(); + const month = dateUtil.pad(date.getUTCMonth() + 1); + const day = dateUtil.pad(date.getUTCDate()); + const path = `metrics/pageviews/top/${domain}/${platform}/${year}/${month}/${day}`; + return api + .restApiGet(this.app, this.apiDomain, path, this.req) + .then(api.checkResponseStatus); + } +}; diff --git a/spec.yaml b/spec.yaml index bee6e52..32f6699 100644 --- a/spec.yaml +++ b/spec.yaml @@ -603,6 +603,15 @@ - width - height + dated_pageview: + type: object + date: + type: string + description: ISO 8601 timestamp of pageviews recorded + views: + type: integer + description: Number of views on date + mostread_article: type: object properties: @@ -612,6 +621,10 @@ views: type: integer description: Number of views on the requested day + view_history: + type: array + items: + $ref: '#/definitions/dated_pageview' rank: type: integer description: Position in the list of most viewed articles diff --git a/test/features/most-read/most-read.js b/test/features/most-read/most-read.js index 35779d7..e66a7ff 100644 --- a/test/features/most-read/most-read.js +++ b/test/features/most-read/most-read.js @@ -2,21 +2,16 @@ const preq = require('preq'); const assert = require('../../utils/assert'); +const dateUtil = require('../../../lib/dateUtil'); const server = require('../../utils/server'); const headers = require('../../utils/headers'); const testUtil = require('../../utils/testUtil'); const BLACKLIST = require('../../../etc/feed/blacklist'); -function addDays(date, days) { - const result = new Date(date); - result.setDate(result.getDate() + days); - return result; -} - const date = new Date(); -const beforeDate = addDays(date, -5); +const beforeDate = dateUtil.addDays(date, -5); const dateString = testUtil.constructTestDate(beforeDate); -const afterDate = addDays(date, 5); +const afterDate = dateUtil.addDays(date, 5); const futureDateString = testUtil.constructTestDate(afterDate); describe('most-read articles', function() { @@ -42,6 +37,21 @@ assert.ok(elem.$merge && elem.$merge[0], '$merge uri should be present'); const title = elem.$merge[0].substring(mergeUriPrefix.length); assert.ok(BLACKLIST.indexOf(title) === -1, 'blacklisted title'); + }); + }); + }); + + it('Should contain pageview history', () => { + const nextDay = new Date('2017-01-02Z'); + const uri = `${server.config.uri}es.wikipedia.org/v1/page/most-read/2017/01/01`; + return preq.get({ uri }) + .then((res) => { + res.body.articles.forEach((article) => { + assert.deepEqual(article.view_history.length, 5); + for (const history of article.view_history) { + assert.ok(history.views > 0); + assert.ok(new Date(history.date) < nextDay); + } }); }); }); @@ -79,6 +89,15 @@ }); }); + it('Should provide pageviews from day prior when aggregated flag is set', () => { + const dayPrior = '2016-12-31Z'; + const uri = `${server.config.uri}da.wikipedia.org/v1/page/most-read/2017/01/01`; + return preq.get({ uri, query: { aggregated: true } }) + .then((res) => { + assert.deepEqual(res.body.articles[0].view_history[4].date, dayPrior); + }); + }); + it('Should throw 404 for request with no results', () => { const uri = `${server.config.uri}zh-classical.wikipedia.org/v1/page/most-read/2016/11/12`; return preq.get({ uri }) diff --git a/test/lib/dateUtil/date-util-test.js b/test/lib/dateUtil/date-util-test.js index 8290d12..cde3ef2 100644 --- a/test/lib/dateUtil/date-util-test.js +++ b/test/lib/dateUtil/date-util-test.js @@ -17,6 +17,43 @@ assert.equal(actual.getUTCDate(), 15); }); + it('iso8601DateFromYYYYMMDD', () => { + const date = '1999123112'; + const expected = '1999-12-31Z'; + assert.deepEqual(dateUtil.iso8601DateFromYYYYMMDD(date), expected); + }); + + it('addDays positive', () => { + const date = new Date('1999-12-31'); + const expected = new Date('2000-01-01'); + assert.deepEqual(dateUtil.addDays(date, 1), expected); + }); + + it('addDays zero', () => { + const date = new Date('2000-01-01'); + const expected = new Date(date); + assert.deepEqual(dateUtil.addDays(date, 0), expected); + }); + + it('addDays negative', () => { + const date = new Date('2000-01-01'); + const expected = new Date('1999-12-31'); + assert.deepEqual(dateUtil.addDays(date, -1), expected); + }); + + it('addDays immutable', () => { + const date = new Date('2000-01-01'); + const expected = new Date(date); + dateUtil.addDays(date, 1); + assert.deepEqual(date, expected); + }); + + it('formatYYYYMMDD', () => { + const date = new Date('2000-01-01'); + const expected = '20000101'; + assert.deepEqual(dateUtil.formatYYYYMMDD(date), expected); + }); + it('date format validation should reject invalid formats', () => { assert.ok(!dateUtil.validate('2016-7-4')); assert.ok(!dateUtil.validate('2016-07-4')); -- To view, visit https://gerrit.wikimedia.org/r/330836 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: merged Gerrit-Change-Id: Ie450eb77b855fc2ecb18aebccaca0c833499f326 Gerrit-PatchSet: 7 Gerrit-Project: mediawiki/services/mobileapps Gerrit-Branch: master Gerrit-Owner: Niedzielski <sniedziel...@wikimedia.org> Gerrit-Reviewer: BearND <bsitzm...@wikimedia.org> Gerrit-Reviewer: Dbrant <dbr...@wikimedia.org> Gerrit-Reviewer: Fjalapeno <cfl...@wikimedia.org> Gerrit-Reviewer: GWicke <gwi...@wikimedia.org> Gerrit-Reviewer: Jdlrobson <jrob...@wikimedia.org> Gerrit-Reviewer: Jhernandez <jhernan...@wikimedia.org> Gerrit-Reviewer: Mholloway <mhollo...@wikimedia.org> Gerrit-Reviewer: Mhurd <mh...@wikimedia.org> Gerrit-Reviewer: Mobrovac <mobro...@wikimedia.org> Gerrit-Reviewer: Niedzielski <sniedziel...@wikimedia.org> Gerrit-Reviewer: Ppchelko <ppche...@wikimedia.org> Gerrit-Reviewer: Sniedzielski <sniedziel...@wikimedia.org> Gerrit-Reviewer: jenkins-bot <> _______________________________________________ MediaWiki-commits mailing list MediaWiki-commits@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits