jenkins-bot has submitted this change and it was merged. ( https://gerrit.wikimedia.org/r/392727 )
Change subject: Batch MW API query requests ...................................................................... Batch MW API query requests The MW API only accepts a list of up to 50 titles per query.[1] We'll need more than that to get full media results. Adds batching to accomplish this. [1] https://www.mediawiki.org/wiki/API:Query#Specifying_pages Change-Id: Id2c433127e92ff9137808553187008c7d8969394 --- M lib/api-util.js M lib/media.js M lib/mobile-util.js M routes/media.js M test/lib/api-util/api-util-test.js M test/lib/mobile-util/mobile-util-test.js 6 files changed, 121 insertions(+), 18 deletions(-) Approvals: BearND: Looks good to me, approved jenkins-bot: Verified diff --git a/lib/api-util.js b/lib/api-util.js index d6059d7..efb4686 100644 --- a/lib/api-util.js +++ b/lib/api-util.js @@ -1,10 +1,12 @@ 'use strict'; - +const BBPromise = require('bluebird'); const preq = require('preq'); const sUtil = require('./util'); const Template = require('swagger-router').Template; const HTTPError = sUtil.HTTPError; + +const MAX_BATCH_SIZE = 50; /** @@ -40,6 +42,34 @@ return response; }); +} + + +function _batch(arr, size) { + const res = []; + while (arr.length > 0) { + res.push(arr.splice(0, size)); + } + return res; +} + + +/** + * Calls the MW API with the supplied query as its body + * @param {!Object} app the application object + * @param {string} domain the domain to issue the request to + * @param {?Object} query an object with all the query parameters for the MW API + * @param {!String[]} titles list of titles to request by batch + * @param {?Integer} size number of titles to request per batch + * @return {!Object[]} combined results of the batched queries + */ +function mwApiGetBatched(app, domain, query, titles, size = MAX_BATCH_SIZE) { + const reqs = _batch(titles, size).map((res) => { + return mwApiGet(app, domain, Object.assign(query, { titles: res.join('|') })); + }); + return BBPromise.all(reqs).then((response) => { + return response.reduce((result, batch) => result.concat(batch.body.query.pages), []); + }); } @@ -128,8 +158,13 @@ module.exports = { mwApiGet, + mwApiGetBatched, restApiGet, setupApiTemplates, - checkResponseStatus + checkResponseStatus, + + test: { + _batch + } }; diff --git a/lib/media.js b/lib/media.js index 5177a8f..39a4d43 100644 --- a/lib/media.js +++ b/lib/media.js @@ -106,8 +106,12 @@ } /** - * Gets the gallery content from MW API - * TODO: ensure that all media items are correctly accounted for on very large articles + * Gets metadata about media items from MW API. + * + * Batches requests by sets of 50 since we may be dealing with large title sets, and 50 titles is + * the max we can specify at a time through the query title parameter. + * + * https://www.mediawiki.org/wiki/API:Query#Specifying_pages */ function getMetadataFromApi(app, req, titles) { const query = { @@ -117,13 +121,11 @@ prop: 'videoinfo', viprop: 'url|dimensions|mime|extmetadata|derivatives', viurlwidth: MAX_IMAGE_WIDTH, - titles: titles.join('|'), continue: '' }; - return api.mwApiGet(app, req.params.domain, query).then((response) => { - const pages = response.body.query && response.body.query.pages; - const items = pages ? makeResults(pages) : []; - return { items }; + + return api.mwApiGetBatched(app, req.params.domain, query, titles).then((response) => { + return { items: makeResults(response) }; }); } diff --git a/lib/mobile-util.js b/lib/mobile-util.js index 6e90839..b5fb879 100644 --- a/lib/mobile-util.js +++ b/lib/mobile-util.js @@ -132,23 +132,30 @@ return anchor && mUtil.removeFragment(mUtil.removeLinkPrefix(anchor.getAttribute('href'))); }; -// Merge two arrays of objects by the specified property. -// Stolen from https://jsfiddle.net/guya/eAWKR/. -mUtil.mergeByProp = function(arr1, arr2, prop, pushIfKeyNotFound) { +/** + * Merge two arrays of objects by the specified property. Modifies arr1 in place and preserves its + * order. + * @param {?Object[]} arr1 An array of objects + * @param {?Object[]} arr2 An array of objects + * @param {!String} prop the property on which to merge the objects in the arrays + * @param {!Boolean} push whether to extend arr1 with objects from arr2 for which 'prop' is + * defined but no object in arr1 has the same value + * Credit: https://jsfiddle.net/guya/eAWKR/. + */ +mUtil.mergeByProp = function(arr1, arr2, prop, push) { if (!arr1 || !arr2) { return; } arr2.forEach((arr2obj) => { - const arr1obj = arr1.find((arr1obj) => { - return arr1obj[prop] === arr2obj[prop]; - }); + // Find the object in arr1 with the same value of 'prop' as this object in arr2 + const arr1obj = arr1.find(arr1obj => arr1obj[prop] === arr2obj[prop]); // If the object already exists, extend it with the new values from // arr2, otherwise conditionally add the new object to arr1. if (arr1obj) { Object.assign(arr1obj, arr2obj); - } else if (pushIfKeyNotFound) { + } else if (push) { arr1.push(arr2obj); } }); @@ -317,6 +324,10 @@ }); }; +mUtil.deduplicate = function(array) { + return Array.from(new Set(array)); +}; + /* jslint bitwise: true */ /* eslint no-bitwise: ["error", { "allow": ["<<"] }] */ mUtil.hashCode = function(string) { diff --git a/routes/media.js b/routes/media.js index 0ada4c8..b4ec043 100644 --- a/routes/media.js +++ b/routes/media.js @@ -27,7 +27,7 @@ return; } const mediaList = media.getMediaItemInfoFromPage(selection); - const titles = mediaList.map(item => item.title); + const titles = mUtil.deduplicate(mediaList.map(item => item.title)); return BBPromise.props({ metadata: media.getMetadataFromApi(app, req, titles), siteinfo: mwapi.getSiteInfo(app, req) diff --git a/test/lib/api-util/api-util-test.js b/test/lib/api-util/api-util-test.js index 4080c58..5059c18 100644 --- a/test/lib/api-util/api-util-test.js +++ b/test/lib/api-util/api-util-test.js @@ -1,7 +1,9 @@ 'use strict'; +const BBPromise = require('bluebird'); const assert = require('../../utils/assert'); const mwapi = require('../../../lib/mwapi'); +const api = require('../../../lib/api-util').test; const logger = require('bunyan').createLogger({ name: 'test-logger', @@ -9,6 +11,8 @@ }); logger.log = function(a, b) {}; + +const list = [0, 1, 2, 3, 4]; describe('lib:apiUtil', () => { @@ -21,4 +25,27 @@ }, /api_error/); }); }); + + it('batching works correctly', () => { + const batches = api._batch(list, 2); + assert.deepEqual(batches.length, 3); + assert.deepEqual(batches[0].length, 2); + assert.deepEqual(batches[1].length, 2); + assert.deepEqual(batches[2].length, 1); + }); + + it('order is preserved when Array.reduce is called on resolved BBPromise.all batches', () => { + const batches = api._batch(list, 2); + const promises = BBPromise.all(batches.map((batch) => { + return new BBPromise(resolve => setTimeout(() => resolve(batch), batch.length * 10)); + })); + promises.then((response) => { + const result = response.reduce((arr, batch) => arr.concat(batch), []); + assert.deepEqual(result[0], 0); + assert.deepEqual(result[1], 1); + assert.deepEqual(result[2], 2); + assert.deepEqual(result[3], 3); + assert.deepEqual(result[4], 4); + }); + }); }); diff --git a/test/lib/mobile-util/mobile-util-test.js b/test/lib/mobile-util/mobile-util-test.js index 579da6d..cdbdf23 100644 --- a/test/lib/mobile-util/mobile-util-test.js +++ b/test/lib/mobile-util/mobile-util-test.js @@ -19,6 +19,24 @@ const arr6 = [ obj1, obj3 ]; const arr7 = [ obj4, obj2 ]; +const ordered = [ + { order: 1, join: 'foo' }, + { order: 2, join: 'bar' }, + { order: 3, join: 'baz' } +]; + +const unordered = [ + { join: 'bar', extra: 'doc' }, + { join: 'baz', extra: 'dickory' }, + { join: 'foo', extra: 'hickory' }, +]; + +const combined = [ + { order: 1, join: 'foo', extra: 'hickory' }, + { order: 2, join: 'bar', extra: 'doc' }, + { order: 3, join: 'baz', extra: 'dickory' } +]; + describe('lib:mobile-util', () => { it('removeTLD should remove TLD', () => { assert.deepEqual(mUtil.removeTLD('ru.wikipedia.org'), 'ru.wikipedia'); @@ -44,7 +62,17 @@ assert.deepEqual(mUtil.extractDbTitleFromAnchor(link), 'My_db_title'); }); - it('mergeByProp should merge two objects by shared property', () => { + it('mergeByProp should preserve order of arr1', () => { + mUtil.mergeByProp(ordered, unordered, 'join', false); + assert.deepEqual(ordered, combined); + }); + + it('mergeByProp should not add obj if no obj in arr1 exists w/ prop=value & push=false', () => { + mUtil.mergeByProp(arr1, arr2, 'again', false); + assert.deepEqual(arr1, arr6); + }); + + it('mergeByProp should add obj if no obj in arr1 exists w/ prop=value & push=true', () => { mUtil.mergeByProp(arr1, arr2, 'again', true); assert.deepEqual(arr1, arr5); }); -- To view, visit https://gerrit.wikimedia.org/r/392727 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: merged Gerrit-Change-Id: Id2c433127e92ff9137808553187008c7d8969394 Gerrit-PatchSet: 17 Gerrit-Project: mediawiki/services/mobileapps Gerrit-Branch: master Gerrit-Owner: Mholloway <[email protected]> Gerrit-Reviewer: BearND <[email protected]> Gerrit-Reviewer: Fjalapeno <[email protected]> Gerrit-Reviewer: Gergő Tisza <[email protected]> Gerrit-Reviewer: Jdlrobson <[email protected]> Gerrit-Reviewer: Mholloway <[email protected]> Gerrit-Reviewer: Mhurd <[email protected]> Gerrit-Reviewer: Ppchelko <[email protected]> Gerrit-Reviewer: jenkins-bot <> _______________________________________________ MediaWiki-commits mailing list [email protected] https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits
