Mholloway has uploaded a new change for review. ( 
https://gerrit.wikimedia.org/r/392727 )

Change subject: Batch MW API query requests
......................................................................

Batch MW API query requests

The MW API only accepts a list of up to 50 titles per query.[1]  We'll
need more than that to get full media results.  Adds batching to
accomplish this.

[1] https://www.mediawiki.org/wiki/API:Query#Specifying_pages

Change-Id: Id2c433127e92ff9137808553187008c7d8969394
---
M lib/api-util.js
M lib/media.js
2 files changed, 33 insertions(+), 14 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/mediawiki/services/mobileapps 
refs/changes/27/392727/1

diff --git a/lib/api-util.js b/lib/api-util.js
index d6059d7..cd4626d 100644
--- a/lib/api-util.js
+++ b/lib/api-util.js
@@ -6,6 +6,8 @@
 const Template = require('swagger-router').Template;
 const HTTPError = sUtil.HTTPError;
 
+const MAX_BATCH_SIZE = 50;
+
 
 /**
  * Calls the MW API with the supplied query as its body
@@ -130,6 +132,7 @@
     mwApiGet,
     restApiGet,
     setupApiTemplates,
-    checkResponseStatus
+    checkResponseStatus,
+    MAX_BATCH_SIZE
 };
 
diff --git a/lib/media.js b/lib/media.js
index 022a61b..7f3373e 100644
--- a/lib/media.js
+++ b/lib/media.js
@@ -1,5 +1,6 @@
 'use strict';
 
+const BBPromise = require('bluebird');
 const api = require('./api-util');
 
 const MIN_IMAGE_SIZE = 64;
@@ -96,22 +97,37 @@
 }
 
 /**
- * Gets the gallery content from MW API
- * TODO: ensure that all media items are correctly accounted for on very large 
articles
+ * Gets the gallery content from MW API.
+ *
+ * Batches requests by sets of 50 since we may be dealing with large title 
sets, and 50 titles is
+ * the max we can specify at a time through the query title parameter.
+ *
+ * https://www.mediawiki.org/wiki/API:Query#Specifying_pages
  */
 function getMetadataFromApi(app, req, titles) {
-    const query = {
-        action: 'query',
-        format: 'json',
-        formatversion: 2,
-        prop: 'videoinfo',
-        viprop: 'url|dimensions|mime|extmetadata|derivatives',
-        viurlwidth: MAX_IMAGE_WIDTH,
-        titles: titles.join('|'),
-        continue: ''
+    const query = (batch) => {
+        return {
+            action: 'query',
+            format: 'json',
+            formatversion: 2,
+            prop: 'videoinfo',
+            viprop: 'url|dimensions|mime|extmetadata|derivatives',
+            viurlwidth: MAX_IMAGE_WIDTH,
+            titles: batch.join('|'),
+            continue: ''
+        };
     };
-    return api.mwApiGet(app, req.params.domain, query).then((response) => {
-        const pages = response.body.query && response.body.query.pages;
+    const request = {};
+    let count = 0;
+    while (titles.length > 0) {
+        const batch = titles.splice(0, api.MAX_BATCH_SIZE);
+        request[`batch_${count++}`] = api.mwApiGet(app, req.params.domain, 
query(batch));
+    }
+    return BBPromise.props(request).then((response) => {
+        let pages = [];
+        Object.keys(response).forEach((key) => {
+            pages = pages.concat(response[key].body.query && 
response[key].body.query.pages);
+        });
         const items = pages ? makeResults(pages) : [];
         return { items };
     });

-- 
To view, visit https://gerrit.wikimedia.org/r/392727
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: Id2c433127e92ff9137808553187008c7d8969394
Gerrit-PatchSet: 1
Gerrit-Project: mediawiki/services/mobileapps
Gerrit-Branch: master
Gerrit-Owner: Mholloway <[email protected]>

_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to