Mholloway has uploaded a new change for review. ( 
https://gerrit.wikimedia.org/r/394235 )

Change subject: [TEST:PS7] Batch MW API query requests
......................................................................

[TEST:PS7] Batch MW API query requests

The MW API only accepts a list of up to 50 titles per query.[1]  We'll
need more than that to get full media results.  Adds batching to
accomplish this.

[1] https://www.mediawiki.org/wiki/API:Query#Specifying_pages

Change-Id: I125c865a2c094ae8a9f70a1f71d8873039db0580
---
M lib/api-util.js
M lib/media.js
M lib/mobile-util.js
M test/lib/mobile-util/mobile-util-test.js
4 files changed, 84 insertions(+), 17 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/mediawiki/services/mobileapps 
refs/changes/35/394235/1

diff --git a/lib/api-util.js b/lib/api-util.js
index d6059d7..54e57f8 100644
--- a/lib/api-util.js
+++ b/lib/api-util.js
@@ -1,10 +1,12 @@
 'use strict';
 
-
+const BBPromise = require('bluebird');
 const preq = require('preq');
 const sUtil = require('./util');
 const Template = require('swagger-router').Template;
 const HTTPError = sUtil.HTTPError;
+
+const MAX_BATCH_SIZE = 50;
 
 
 /**
@@ -40,6 +42,32 @@
         return response;
     });
 
+}
+
+
+/**
+ * Calls the MW API with the supplied query as its body
+ * @param {!Object} app the application object
+ * @param {string} domain the domain to issue the request to
+ * @param {?Object} query an object with all the query parameters for the MW 
API
+ * @param {!String[]} titles list of titles to request by batch
+ * @param {?Integer} batchSize number of titles to request per batch
+ * @return {!Object[]} combined results of the batched queries
+ */
+function mwApiGetBatched(app, domain, query, titles, batchSize = 
MAX_BATCH_SIZE) {
+    const req = {};
+    let i = 0;
+    while (titles.length > 0) {
+        const batch = titles.splice(0, batchSize);
+        req[`_${i++}`] = mwApiGet(app, domain, Object.assign(query, { titles: 
batch.join('|') }));
+    }
+    return BBPromise.props(req).then((response) => {
+        let result = [];
+        Object.values(response).forEach((batchResponse) => {
+            result = result.concat(batchResponse.body.query && 
batchResponse.body.query.pages);
+        });
+        return result;
+    });
 }
 
 
@@ -128,8 +156,10 @@
 
 module.exports = {
     mwApiGet,
+    mwApiGetBatched,
     restApiGet,
     setupApiTemplates,
-    checkResponseStatus
+    checkResponseStatus,
+    MAX_BATCH_SIZE
 };
 
diff --git a/lib/media.js b/lib/media.js
index fe541ee..ad0e33f 100644
--- a/lib/media.js
+++ b/lib/media.js
@@ -104,8 +104,12 @@
 }
 
 /**
- * Gets the gallery content from MW API
- * TODO: ensure that all media items are correctly accounted for on very large 
articles
+ * Gets metadata about media items from MW API.
+ *
+ * Batches requests by sets of 50 since we may be dealing with large title 
sets, and 50 titles is
+ * the max we can specify at a time through the query title parameter.
+ *
+ * https://www.mediawiki.org/wiki/API:Query#Specifying_pages
  */
 function getMetadataFromApi(app, req, titles) {
     const query = {
@@ -115,13 +119,11 @@
         prop: 'videoinfo',
         viprop: 'url|dimensions|mime|extmetadata|derivatives',
         viurlwidth: MAX_IMAGE_WIDTH,
-        titles: titles.join('|'),
         continue: ''
     };
-    return api.mwApiGet(app, req.params.domain, query).then((response) => {
-        const pages = response.body.query && response.body.query.pages;
-        const items = pages ? makeResults(pages) : [];
-        return { items };
+
+    return api.mwApiGetBatched(app, req.params.domain, query, 
titles).then((response) => {
+        return { items: response ? makeResults(response) : [] };
     });
 }
 
diff --git a/lib/mobile-util.js b/lib/mobile-util.js
index fb74aad..1738224 100644
--- a/lib/mobile-util.js
+++ b/lib/mobile-util.js
@@ -136,23 +136,30 @@
     return anchor && 
mUtil.removeFragment(mUtil.removeLinkPrefix(anchor.getAttribute('href')));
 };
 
-// Merge two arrays of objects by the specified property.
-// Stolen from https://jsfiddle.net/guya/eAWKR/.
-mUtil.mergeByProp = function(arr1, arr2, prop, pushIfKeyNotFound) {
+/**
+ * Merge two arrays of objects by the specified property.  Modifies arr1 in 
place and preserves its
+ * order.
+ * @param {?Object[]} arr1 An array of objects
+ * @param {?Object[]} arr2 An array of objects
+ * @param {!String} prop the property on which to merge the objects in the 
arrays
+ * @param {!Boolean} push whether to extend arr1 with objects from arr2 for 
which 'prop' is
+ * defined but no object in arr1 has the same value
+ * Credit: https://jsfiddle.net/guya/eAWKR/.
+ */
+mUtil.mergeByProp = function(arr1, arr2, prop, push) {
     if (!arr1 || !arr2) {
         return;
     }
 
     arr2.forEach((arr2obj) => {
-        const arr1obj = arr1.find((arr1obj) => {
-            return arr1obj[prop] === arr2obj[prop];
-        });
+        // Find the object in arr1 with the same value of 'prop' as this 
object in arr2
+        const arr1obj = arr1.find(arr1obj => arr1obj[prop] === arr2obj[prop]);
 
         // If the object already exists, extend it with the new values from
         // arr2, otherwise conditionally add the new object to arr1.
         if (arr1obj) {
             Object.assign(arr1obj, arr2obj);
-        } else if (pushIfKeyNotFound) {
+        } else if (push) {
             arr1.push(arr2obj);
         }
     });
diff --git a/test/lib/mobile-util/mobile-util-test.js 
b/test/lib/mobile-util/mobile-util-test.js
index 579da6d..cdbdf23 100644
--- a/test/lib/mobile-util/mobile-util-test.js
+++ b/test/lib/mobile-util/mobile-util-test.js
@@ -19,6 +19,24 @@
 const arr6 = [ obj1, obj3 ];
 const arr7 = [ obj4, obj2 ];
 
+const ordered = [
+    { order: 1, join: 'foo' },
+    { order: 2, join: 'bar' },
+    { order: 3, join: 'baz' }
+];
+
+const unordered = [
+    { join: 'bar', extra: 'doc' },
+    { join: 'baz', extra: 'dickory' },
+    { join: 'foo', extra: 'hickory' },
+];
+
+const combined = [
+    { order: 1, join: 'foo', extra: 'hickory' },
+    { order: 2, join: 'bar', extra: 'doc' },
+    { order: 3, join: 'baz', extra: 'dickory' }
+];
+
 describe('lib:mobile-util', () => {
     it('removeTLD should remove TLD', () => {
         assert.deepEqual(mUtil.removeTLD('ru.wikipedia.org'), 'ru.wikipedia');
@@ -44,7 +62,17 @@
         assert.deepEqual(mUtil.extractDbTitleFromAnchor(link), 'My_db_title');
     });
 
-    it('mergeByProp should merge two objects by shared property', () => {
+    it('mergeByProp should preserve order of arr1', () => {
+        mUtil.mergeByProp(ordered, unordered, 'join', false);
+        assert.deepEqual(ordered, combined);
+    });
+
+    it('mergeByProp should not add obj if no obj in arr1 exists w/ prop=value 
& push=false', () => {
+        mUtil.mergeByProp(arr1, arr2, 'again', false);
+        assert.deepEqual(arr1, arr6);
+    });
+
+    it('mergeByProp should add obj if no obj in arr1 exists w/ prop=value & 
push=true', () => {
         mUtil.mergeByProp(arr1, arr2, 'again', true);
         assert.deepEqual(arr1, arr5);
     });

-- 
To view, visit https://gerrit.wikimedia.org/r/394235
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: I125c865a2c094ae8a9f70a1f71d8873039db0580
Gerrit-PatchSet: 1
Gerrit-Project: mediawiki/services/mobileapps
Gerrit-Branch: master
Gerrit-Owner: Mholloway <[email protected]>

_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to