jenkins-bot has submitted this change and it was merged. ( 
https://gerrit.wikimedia.org/r/392727 )

Change subject: Batch MW API query requests
......................................................................


Batch MW API query requests

The MW API only accepts a list of up to 50 titles per query.[1]  We'll
need more than that to get full media results.  Adds batching to
accomplish this.

[1] https://www.mediawiki.org/wiki/API:Query#Specifying_pages

Change-Id: Id2c433127e92ff9137808553187008c7d8969394
---
M lib/api-util.js
M lib/media.js
M lib/mobile-util.js
M routes/media.js
M test/lib/api-util/api-util-test.js
M test/lib/mobile-util/mobile-util-test.js
6 files changed, 121 insertions(+), 18 deletions(-)

Approvals:
  BearND: Looks good to me, approved
  jenkins-bot: Verified



diff --git a/lib/api-util.js b/lib/api-util.js
index d6059d7..efb4686 100644
--- a/lib/api-util.js
+++ b/lib/api-util.js
@@ -1,10 +1,12 @@
 'use strict';
 
-
+const BBPromise = require('bluebird');
 const preq = require('preq');
 const sUtil = require('./util');
 const Template = require('swagger-router').Template;
 const HTTPError = sUtil.HTTPError;
+
+const MAX_BATCH_SIZE = 50;
 
 
 /**
@@ -40,6 +42,34 @@
         return response;
     });
 
+}
+
+
+function _batch(arr, size) {
+    const res = [];
+    while (arr.length > 0) {
+        res.push(arr.splice(0, size));
+    }
+    return res;
+}
+
+
+/**
+ * Calls the MW API with the supplied query as its body
+ * @param {!Object} app the application object
+ * @param {string} domain the domain to issue the request to
+ * @param {?Object} query an object with all the query parameters for the MW 
API
+ * @param {!String[]} titles list of titles to request by batch
+ * @param {?Integer} size number of titles to request per batch
+ * @return {!Object[]} combined results of the batched queries
+ */
+function mwApiGetBatched(app, domain, query, titles, size = MAX_BATCH_SIZE) {
+    const reqs = _batch(titles, size).map((res) => {
+        return mwApiGet(app, domain, Object.assign(query, { titles: 
res.join('|') }));
+    });
+    return BBPromise.all(reqs).then((response) => {
+        return response.reduce((result, batch) => 
result.concat(batch.body.query.pages), []);
+    });
 }
 
 
@@ -128,8 +158,13 @@
 
 module.exports = {
     mwApiGet,
+    mwApiGetBatched,
     restApiGet,
     setupApiTemplates,
-    checkResponseStatus
+    checkResponseStatus,
+
+    test: {
+        _batch
+    }
 };
 
diff --git a/lib/media.js b/lib/media.js
index 5177a8f..39a4d43 100644
--- a/lib/media.js
+++ b/lib/media.js
@@ -106,8 +106,12 @@
 }
 
 /**
- * Gets the gallery content from MW API
- * TODO: ensure that all media items are correctly accounted for on very large 
articles
+ * Gets metadata about media items from MW API.
+ *
+ * Batches requests by sets of 50 since we may be dealing with large title 
sets, and 50 titles is
+ * the max we can specify at a time through the query title parameter.
+ *
+ * https://www.mediawiki.org/wiki/API:Query#Specifying_pages
  */
 function getMetadataFromApi(app, req, titles) {
     const query = {
@@ -117,13 +121,11 @@
         prop: 'videoinfo',
         viprop: 'url|dimensions|mime|extmetadata|derivatives',
         viurlwidth: MAX_IMAGE_WIDTH,
-        titles: titles.join('|'),
         continue: ''
     };
-    return api.mwApiGet(app, req.params.domain, query).then((response) => {
-        const pages = response.body.query && response.body.query.pages;
-        const items = pages ? makeResults(pages) : [];
-        return { items };
+
+    return api.mwApiGetBatched(app, req.params.domain, query, 
titles).then((response) => {
+        return { items: makeResults(response) };
     });
 }
 
diff --git a/lib/mobile-util.js b/lib/mobile-util.js
index 6e90839..b5fb879 100644
--- a/lib/mobile-util.js
+++ b/lib/mobile-util.js
@@ -132,23 +132,30 @@
     return anchor && 
mUtil.removeFragment(mUtil.removeLinkPrefix(anchor.getAttribute('href')));
 };
 
-// Merge two arrays of objects by the specified property.
-// Stolen from https://jsfiddle.net/guya/eAWKR/.
-mUtil.mergeByProp = function(arr1, arr2, prop, pushIfKeyNotFound) {
+/**
+ * Merge two arrays of objects by the specified property.  Modifies arr1 in 
place and preserves its
+ * order.
+ * @param {?Object[]} arr1 An array of objects
+ * @param {?Object[]} arr2 An array of objects
+ * @param {!String} prop the property on which to merge the objects in the 
arrays
+ * @param {!Boolean} push whether to extend arr1 with objects from arr2 for 
which 'prop' is
+ * defined but no object in arr1 has the same value
+ * Credit: https://jsfiddle.net/guya/eAWKR/.
+ */
+mUtil.mergeByProp = function(arr1, arr2, prop, push) {
     if (!arr1 || !arr2) {
         return;
     }
 
     arr2.forEach((arr2obj) => {
-        const arr1obj = arr1.find((arr1obj) => {
-            return arr1obj[prop] === arr2obj[prop];
-        });
+        // Find the object in arr1 with the same value of 'prop' as this 
object in arr2
+        const arr1obj = arr1.find(arr1obj => arr1obj[prop] === arr2obj[prop]);
 
         // If the object already exists, extend it with the new values from
         // arr2, otherwise conditionally add the new object to arr1.
         if (arr1obj) {
             Object.assign(arr1obj, arr2obj);
-        } else if (pushIfKeyNotFound) {
+        } else if (push) {
             arr1.push(arr2obj);
         }
     });
@@ -317,6 +324,10 @@
     });
 };
 
+mUtil.deduplicate = function(array) {
+    return Array.from(new Set(array));
+};
+
 /* jslint bitwise: true */
 /* eslint no-bitwise: ["error", { "allow": ["<<"] }] */
 mUtil.hashCode = function(string) {
diff --git a/routes/media.js b/routes/media.js
index 0ada4c8..b4ec043 100644
--- a/routes/media.js
+++ b/routes/media.js
@@ -27,7 +27,7 @@
             return;
         }
         const mediaList = media.getMediaItemInfoFromPage(selection);
-        const titles = mediaList.map(item => item.title);
+        const titles = mUtil.deduplicate(mediaList.map(item => item.title));
         return BBPromise.props({
             metadata: media.getMetadataFromApi(app, req, titles),
             siteinfo: mwapi.getSiteInfo(app, req)
diff --git a/test/lib/api-util/api-util-test.js 
b/test/lib/api-util/api-util-test.js
index 4080c58..5059c18 100644
--- a/test/lib/api-util/api-util-test.js
+++ b/test/lib/api-util/api-util-test.js
@@ -1,7 +1,9 @@
 'use strict';
 
+const BBPromise = require('bluebird');
 const assert = require('../../utils/assert');
 const mwapi = require('../../../lib/mwapi');
+const api = require('../../../lib/api-util').test;
 
 const logger = require('bunyan').createLogger({
     name: 'test-logger',
@@ -9,6 +11,8 @@
 });
 
 logger.log = function(a, b) {};
+
+const list = [0, 1, 2, 3, 4];
 
 describe('lib:apiUtil', () => {
 
@@ -21,4 +25,27 @@
             }, /api_error/);
         });
     });
+
+    it('batching works correctly', () => {
+        const batches = api._batch(list, 2);
+        assert.deepEqual(batches.length, 3);
+        assert.deepEqual(batches[0].length, 2);
+        assert.deepEqual(batches[1].length, 2);
+        assert.deepEqual(batches[2].length, 1);
+    });
+
+    it('order is preserved when Array.reduce is called on resolved 
BBPromise.all batches', () => {
+        const batches = api._batch(list, 2);
+        const promises = BBPromise.all(batches.map((batch) => {
+            return new BBPromise(resolve => setTimeout(() => resolve(batch), 
batch.length * 10));
+        }));
+        promises.then((response) => {
+            const result = response.reduce((arr, batch) => arr.concat(batch), 
[]);
+            assert.deepEqual(result[0], 0);
+            assert.deepEqual(result[1], 1);
+            assert.deepEqual(result[2], 2);
+            assert.deepEqual(result[3], 3);
+            assert.deepEqual(result[4], 4);
+        });
+    });
 });
diff --git a/test/lib/mobile-util/mobile-util-test.js 
b/test/lib/mobile-util/mobile-util-test.js
index 579da6d..cdbdf23 100644
--- a/test/lib/mobile-util/mobile-util-test.js
+++ b/test/lib/mobile-util/mobile-util-test.js
@@ -19,6 +19,24 @@
 const arr6 = [ obj1, obj3 ];
 const arr7 = [ obj4, obj2 ];
 
+const ordered = [
+    { order: 1, join: 'foo' },
+    { order: 2, join: 'bar' },
+    { order: 3, join: 'baz' }
+];
+
+const unordered = [
+    { join: 'bar', extra: 'doc' },
+    { join: 'baz', extra: 'dickory' },
+    { join: 'foo', extra: 'hickory' },
+];
+
+const combined = [
+    { order: 1, join: 'foo', extra: 'hickory' },
+    { order: 2, join: 'bar', extra: 'doc' },
+    { order: 3, join: 'baz', extra: 'dickory' }
+];
+
 describe('lib:mobile-util', () => {
     it('removeTLD should remove TLD', () => {
         assert.deepEqual(mUtil.removeTLD('ru.wikipedia.org'), 'ru.wikipedia');
@@ -44,7 +62,17 @@
         assert.deepEqual(mUtil.extractDbTitleFromAnchor(link), 'My_db_title');
     });
 
-    it('mergeByProp should merge two objects by shared property', () => {
+    it('mergeByProp should preserve order of arr1', () => {
+        mUtil.mergeByProp(ordered, unordered, 'join', false);
+        assert.deepEqual(ordered, combined);
+    });
+
+    it('mergeByProp should not add obj if no obj in arr1 exists w/ prop=value 
& push=false', () => {
+        mUtil.mergeByProp(arr1, arr2, 'again', false);
+        assert.deepEqual(arr1, arr6);
+    });
+
+    it('mergeByProp should add obj if no obj in arr1 exists w/ prop=value & 
push=true', () => {
         mUtil.mergeByProp(arr1, arr2, 'again', true);
         assert.deepEqual(arr1, arr5);
     });

-- 
To view, visit https://gerrit.wikimedia.org/r/392727
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: merged
Gerrit-Change-Id: Id2c433127e92ff9137808553187008c7d8969394
Gerrit-PatchSet: 17
Gerrit-Project: mediawiki/services/mobileapps
Gerrit-Branch: master
Gerrit-Owner: Mholloway <[email protected]>
Gerrit-Reviewer: BearND <[email protected]>
Gerrit-Reviewer: Fjalapeno <[email protected]>
Gerrit-Reviewer: GergÅ‘ Tisza <[email protected]>
Gerrit-Reviewer: Jdlrobson <[email protected]>
Gerrit-Reviewer: Mholloway <[email protected]>
Gerrit-Reviewer: Mhurd <[email protected]>
Gerrit-Reviewer: Ppchelko <[email protected]>
Gerrit-Reviewer: jenkins-bot <>

_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to