Mholloway has uploaded a new change for review. ( 
https://gerrit.wikimedia.org/r/395161 )

Change subject: Further cleanup of lib/most-read
......................................................................

Further cleanup of lib/most-read

Note: This change removes the pageid field from the response items.
(Hence not marking this with Hygiene.) Is pageid actually used by clients?

Change-Id: I9d95e77330cfd3e65c8d456b1b69f2c60ed1cce8
---
M lib/feed/most-read-filter.js
M lib/feed/most-read.js
M test/diff/diff.js
M test/diff/results/page_most-read-enwiki-2016-01-01.json
4 files changed, 52 insertions(+), 120 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/mediawiki/services/mobileapps 
refs/changes/61/395161/1

diff --git a/lib/feed/most-read-filter.js b/lib/feed/most-read-filter.js
index f99177e..269b346 100644
--- a/lib/feed/most-read-filter.js
+++ b/lib/feed/most-read-filter.js
@@ -21,10 +21,10 @@
     return BLACKLIST.indexOf(title) !== -1;
 }
 
-function filterSpecialPages(pages, mainPageTitle) {
+function filterSpecial(articles, mainPageTitle) {
     const mainPageRegExp = new RegExp(`^${escape(mainPageTitle)}$`, 'i');
-    return pages.filter((page) => {
-        return page.ns === 0 && !mainPageRegExp.test(page.title);
+    return articles.filter((page) => {
+        return page.ns === 0 && !mainPageRegExp.test(page.article);
     });
 }
 
@@ -34,7 +34,7 @@
  * relatively even mix of the two, are presumed to be inflated by bot traffic
  * and not of sufficient human interest to include in the feed.
  */
-function filterBotTraffic(allPlatformsMostRead, desktopMostRead) {
+function filterBots(allPlatformsMostRead, desktopMostRead) {
 
     if (BOT_FILTER_THRESHOLD < 0 || BOT_FILTER_THRESHOLD > 1) {
         throw new HTTPError({
@@ -62,6 +62,6 @@
 }
 
 module.exports = {
-    filterSpecialPages,
-    filterBotTraffic
+    filterSpecial,
+    filterBots
 };
diff --git a/lib/feed/most-read.js b/lib/feed/most-read.js
index 6ea5333..22e856f 100644
--- a/lib/feed/most-read.js
+++ b/lib/feed/most-read.js
@@ -7,9 +7,11 @@
 const BBPromise = require('bluebird');
 const mUtil = require('../mobile-util');
 const mwapi = require('../mwapi');
-const filter = require('./most-read-filter');
+const filterSpecial = require('./most-read-filter').filterSpecial;
+const filterBots = require('./most-read-filter').filterBots;
 const dateUtil = require('../dateUtil');
 const pageviews = require('../pageviews');
+const Title = require('mediawiki-title').Title;
 
 /**
  * @public {!string} date ISO 8601 timestamp of pageviews recorded
@@ -23,14 +25,8 @@
 }
 
 function getTopPageviews(app, req, domain, date) {
-    const apiDomain = 'wikimedia.org';
-    const restReq = {
-        headers: { accept: 'application/json; charset=utf-8' }
-    };
-    const client = new pageviews.Client(app, apiDomain, restReq);
-
-    // todo: remove manual bot filtering when a user agent parameter is 
available for top as in the
-    //       per-article endpoint
+    const restReq = { headers: { accept: 'application/json; charset=utf-8' } };
+    const client = new pageviews.Client(app, 'wikimedia.org', restReq);
     return BBPromise.props({
         desktop: client.reqTop(domain, pageviews.Platform.DESKTOP_WEB, date),
         combined: client.reqTop(domain, pageviews.Platform.ALL, date)
@@ -43,29 +39,33 @@
     });
 }
 
-function promise(app, req) {
-    const aggregated = !!req.query.aggregated;
-    let goodTitles;
-    let resultsDate;
+function getViewHistory(app, domain, startDate, endDate, entry) {
+    const restReq = { headers: { accept: 'application/json; charset=utf-8' } };
+    const client = new pageviews.Client(app, 'wikimedia.org', restReq);
+    return client.reqPage(mUtil.removeTLD(domain), pageviews.Platform.ALL, 
pageviews.Agent.USER,
+        entry.article, pageviews.Granularity.DAILY, startDate, endDate)
+        .then(pageviewsPageRspToDatedPageviews);
+}
 
+function promise(app, req) {
     if (req.params.domain === 'fy.wikipedia.org') {
         return BBPromise.resolve({ meta: {} });
     }
 
     if (!dateUtil.validate(dateUtil.hyphenDelimitedDateString(req))) {
-        if (aggregated) {
+        if (req.query.aggregated) {
             return BBPromise.resolve({ meta: {} });
         }
         dateUtil.throwDateError();
     }
 
-    const targetDomain = 
mUtil.removeTLD(mUtil.mobileToCanonical(req.params.domain));
-
     const reqDate = dateUtil.getRequestedDate(req);
-    const date = aggregated ? dateUtil.addDays(reqDate, -1) : reqDate;
+    const rspDate = req.query.aggregated ? dateUtil.addDays(reqDate, -1) : 
reqDate;
 
-    return getTopPageviews(app, req, targetDomain, date)
-    .then((response) => {
+    return BBPromise.props({
+        pageviews: getTopPageviews(app, req, 
mUtil.removeTLD(req.params.domain), rspDate),
+        siteinfo: mwapi.getSiteInfo(app, req)
+    }).then((response) => {
         // We're working mainly with the overall list of top pageviews, and cut
         // this off at 50 (the max that can be sent in a single MW API query).
         // We'll keep twice as many desktop-only pageviews for the comparator
@@ -76,8 +76,11 @@
         const QUERY_TITLES = mwapi.API_QUERY_MAX_TITLES;
         const DESKTOP_TITLES = 2 * QUERY_TITLES;
 
-        const combinedResults = response.combined && response.combined.body;
-        const desktopResults = response.desktop && response.desktop.body;
+        const mainPage = response.siteinfo.general && 
response.siteinfo.general.mainpage;
+        const mainPageTitle = Title.newFromText(mainPage, response.siteinfo);
+
+        const combinedResults = response.pageviews.combined && 
response.pageviews.combined.body;
+        const desktopResults = response.pageviews.desktop && 
response.pageviews.desktop.body;
         const combinedItems = combinedResults && combinedResults.items;
         const firstCombinedItems = combinedItems && combinedResults.items[0];
         const desktopItems = desktopResults && desktopResults.items;
@@ -87,83 +90,45 @@
         const desktopArticles = firstDesktopItems && 
firstDesktopItems.articles;
         const desktopSlice = desktopArticles && desktopArticles.slice(0, 
DESKTOP_TITLES);
 
-        goodTitles = filter.filterBotTraffic(combinedSlice, desktopSlice)
-            .map(i => Object.assign({ title: i.article }, i));
-
-        if (mUtil.isEmpty(goodTitles)) {
-            mUtil.throw404('No results found.');
-        }
-
         const year = firstCombinedItems.year;
         const month = firstCombinedItems.month;
         const day = firstCombinedItems.day;
-        resultsDate = `${year}-${month}-${day}Z`;
-        return mwapi.getMostReadMetadata(app, req, goodTitles.map(i => 
i.article).join('|'));
-    }).then((response) => {
-        const query = response.body && response.body.query;
-        const normalizations = query && query.normalized;
-        const pages = query && query.pages;
-        const mainPageTitle = query && query.general && query.general.mainpage;
-
-        if (mUtil.isEmpty(pages)) {
-            mUtil.throw404('No results found.');
-        }
-
-        if (normalizations) {
-            const adjusted = normalizations.map(norm => ({ article: norm.from, 
title: norm.to }));
-            mUtil.mergeByProp(goodTitles, adjusted, 'article');
-        }
-
-        mUtil.mergeByProp(goodTitles, pages, 'title', true);
-
-        goodTitles = filter.filterSpecialPages(goodTitles, mainPageTitle);
-        if (mUtil.isEmpty(goodTitles)) {
-            mUtil.throw404('No results found.');
-        }
-
-        const apiDomain = 'wikimedia.org';
-        const restReq = {
-            headers: { accept: 'application/json; charset=utf-8' }
-        };
-        const client = new pageviews.Client(app, apiDomain, restReq);
-
-        const targetDomain = 
mUtil.removeTLD(mUtil.mobileToCanonical(req.params.domain));
+        const resultsDate = `${year}-${month}-${day}Z`;
         const start = dateUtil.addDays(new Date(resultsDate), -4);
         const end = new Date(resultsDate);
 
-        const articles = goodTitles.map((entry) => {
-            return Object.assign(entry, {
-                $merge: [
-                    mUtil.getRbPageSummaryUrl(app.restbase_tpl, 
req.params.domain, entry.article)
-                ],
-                article: undefined,
-                fromencoded: undefined,
-                ns: undefined,
-                terms: undefined,
-                title: undefined,
-                revisions: undefined,
-                view_history: client.reqPage(targetDomain, 
pageviews.Platform.ALL,
-                    pageviews.Agent.USER, entry.article, 
pageviews.Granularity.DAILY, start, end)
-                     .then(pageviewsPageRspToDatedPageviews)
-            });
+        const titles = filterSpecial(filterBots(combinedSlice, 
desktopSlice).map((entry) => {
+            const ns = Title.newFromText(entry.article, 
response.siteinfo).getNamespace()._id;
+            return Object.assign({ ns }, entry);
+        }), mainPageTitle.getPrefixedDBKey());
+
+        if (mUtil.isEmpty(titles)) {
+            mUtil.throw404('No results found.');
+        }
+
+        const result = titles.map((entry) => {
+            entry.$merge = [
+                mUtil.getRbPageSummaryUrl(app.restbase_tpl, req.params.domain, 
entry.article)
+            ];
+            entry.view_history = getViewHistory(app, req.params.domain, start, 
end, entry);
+            delete entry.article;
+            delete entry.ns;
+            return entry;
         });
 
-        return BBPromise.all(articles.map(article => BBPromise.props(article)))
-        .then((response) => {
+        return BBPromise.all(result.map(article => 
BBPromise.props(article))).then((response) => {
             return {
                 payload: {
                     date: resultsDate,
                     articles: response
                 },
-                meta: {
-                    revision: dateUtil.dateStringFrom(req)
-                }
+                meta: { revision: dateUtil.dateStringFrom(req) }
             };
         });
     }).catch((err) => {
         // Catch and handle the error if this is an aggregated request and the
         // pageview data are not yet loaded.
-        if (aggregated && err.status === 404) {
+        if (req.query.aggregated && err.status === 404) {
             return BBPromise.resolve({ meta: {} });
         }
         throw err;
diff --git a/test/diff/diff.js b/test/diff/diff.js
index dc3075f..8dc78e5 100644
--- a/test/diff/diff.js
+++ b/test/diff/diff.js
@@ -6,7 +6,7 @@
 const server = require('../utils/server.js');
 const testSpec = require('./test-spec');
 
-describe('diff', function() {
+describe.only('diff', function() {
 
     this.timeout(20000); // eslint-disable-line no-invalid-this
 
diff --git a/test/diff/results/page_most-read-enwiki-2016-01-01.json 
b/test/diff/results/page_most-read-enwiki-2016-01-01.json
index 6a62b8e..13d85ca 100644
--- a/test/diff/results/page_most-read-enwiki-2016-01-01.json
+++ b/test/diff/results/page_most-read-enwiki-2016-01-01.json
@@ -4,7 +4,6 @@
     {
       "views": 609871,
       "rank": 4,
-      "pageid": 423882,
       "$merge": [
         "https://en.wikipedia.org/api/rest_v1/page/summary/Natalie_Cole";
       ],
@@ -34,7 +33,6 @@
     {
       "views": 353499,
       "rank": 6,
-      "pageid": 14723194,
       "$merge": [
         
"https://en.wikipedia.org/api/rest_v1/page/summary/Star_Wars%3A_The_Force_Awakens";
       ],
@@ -64,7 +62,6 @@
     {
       "views": 301290,
       "rank": 7,
-      "pageid": 4319540,
       "$merge": [
         "https://en.wikipedia.org/api/rest_v1/page/summary/Bobby_Leach";
       ],
@@ -94,7 +91,6 @@
     {
       "views": 231465,
       "rank": 8,
-      "pageid": 89697,
       "$merge": [
         "https://en.wikipedia.org/api/rest_v1/page/summary/Wayne_Rogers";
       ],
@@ -124,7 +120,6 @@
     {
       "views": 211832,
       "rank": 9,
-      "pageid": 26678,
       "$merge": [
         "https://en.wikipedia.org/api/rest_v1/page/summary/Star_Wars";
       ],
@@ -154,7 +149,6 @@
     {
       "views": 199732,
       "rank": 10,
-      "pageid": 166538,
       "$merge": [
         "https://en.wikipedia.org/api/rest_v1/page/summary/Nat_King_Cole";
       ],
@@ -184,7 +178,6 @@
     {
       "views": 177494,
       "rank": 11,
-      "pageid": 406800,
       "$merge": [
         "https://en.wikipedia.org/api/rest_v1/page/summary/Auld_Lang_Syne";
       ],
@@ -214,7 +207,6 @@
     {
       "views": 159010,
       "rank": 13,
-      "pageid": 8169073,
       "$merge": [
         
"https://en.wikipedia.org/api/rest_v1/page/summary/Antarctic_Snow_Cruiser";
       ],
@@ -244,7 +236,6 @@
     {
       "views": 150121,
       "rank": 14,
-      "pageid": 3133353,
       "$merge": [
         "https://en.wikipedia.org/api/rest_v1/page/summary/Steven_Avery";
       ],
@@ -274,7 +265,6 @@
     {
       "views": 135978,
       "rank": 15,
-      "pageid": 48233019,
       "$merge": [
         "https://en.wikipedia.org/api/rest_v1/page/summary/Christian_McCaffrey";
       ],
@@ -304,7 +294,6 @@
     {
       "views": 126263,
       "rank": 17,
-      "pageid": 43159611,
       "$merge": [
         
"https://en.wikipedia.org/api/rest_v1/page/summary/The_Revenant_(2015_film)"
       ],
@@ -334,7 +323,6 @@
     {
       "views": 118283,
       "rank": 18,
-      "pageid": 5002871,
       "$merge": [
         
"https://en.wikipedia.org/api/rest_v1/page/summary/One_World_Trade_Center";
       ],
@@ -364,7 +352,6 @@
     {
       "views": 114709,
       "rank": 20,
-      "pageid": 48863911,
       "$merge": [
         "https://en.wikipedia.org/api/rest_v1/page/summary/Making_a_Murderer";
       ],
@@ -394,7 +381,6 @@
     {
       "views": 112527,
       "rank": 22,
-      "pageid": 4763194,
       "$merge": [
         "https://en.wikipedia.org/api/rest_v1/page/summary/Guide_horse";
       ],
@@ -424,7 +410,6 @@
     {
       "views": 100066,
       "rank": 24,
-      "pageid": 20816992,
       "$merge": [
         
"https://en.wikipedia.org/api/rest_v1/page/summary/Sherlock_(TV_series)"
       ],
@@ -454,7 +439,6 @@
     {
       "views": 98587,
       "rank": 25,
-      "pageid": 1808479,
       "$merge": [
         "https://en.wikipedia.org/api/rest_v1/page/summary/William_Phelps_Eno";
       ],
@@ -484,7 +468,6 @@
     {
       "views": 98467,
       "rank": 26,
-      "pageid": 43308964,
       "$merge": [
         "https://en.wikipedia.org/api/rest_v1/page/summary/The_Hateful_Eight";
       ],
@@ -514,7 +497,6 @@
     {
       "views": 97137,
       "rank": 27,
-      "pageid": 44789934,
       "$merge": [
         "https://en.wikipedia.org/api/rest_v1/page/summary/Deaths_in_2015";
       ],
@@ -544,7 +526,6 @@
     {
       "views": 93848,
       "rank": 28,
-      "pageid": 45360934,
       "$merge": [
         "https://en.wikipedia.org/api/rest_v1/page/summary/Dilwale_(2015_film)"
       ],
@@ -574,7 +555,6 @@
     {
       "views": 93087,
       "rank": 29,
-      "pageid": 42618962,
       "$merge": [
         "https://en.wikipedia.org/api/rest_v1/page/summary/Daisy_Ridley";
       ],
@@ -604,7 +584,6 @@
     {
       "views": 91340,
       "rank": 30,
-      "pageid": 44135542,
       "$merge": [
         "https://en.wikipedia.org/api/rest_v1/page/summary/Joy_(film)"
       ],
@@ -634,7 +613,6 @@
     {
       "views": 89733,
       "rank": 31,
-      "pageid": 18642,
       "$merge": [
         "https://en.wikipedia.org/api/rest_v1/page/summary/Lemmy";
       ],
@@ -664,7 +642,6 @@
     {
       "views": 89212,
       "rank": 32,
-      "pageid": 59892,
       "$merge": [
         
"https://en.wikipedia.org/api/rest_v1/page/summary/List_of_highest-grossing_films";
       ],
@@ -694,7 +671,6 @@
     {
       "views": 88362,
       "rank": 33,
-      "pageid": 314475,
       "$merge": [
         "https://en.wikipedia.org/api/rest_v1/page/summary/Carrie_Fisher";
       ],
@@ -724,7 +700,6 @@
     {
       "views": 81104,
       "rank": 34,
-      "pageid": 52742228,
       "$merge": [
         "https://en.wikipedia.org/api/rest_v1/page/summary/Deaths_in_2016";
       ],
@@ -754,7 +729,6 @@
     {
       "views": 74736,
       "rank": 38,
-      "pageid": 41373899,
       "$merge": [
         "https://en.wikipedia.org/api/rest_v1/page/summary/Ethan_Couch";
       ],
@@ -784,7 +758,6 @@
     {
       "views": 74297,
       "rank": 39,
-      "pageid": 13979626,
       "$merge": [
         "https://en.wikipedia.org/api/rest_v1/page/summary/Joy_Mangano";
       ],
@@ -814,7 +787,6 @@
     {
       "views": 74022,
       "rank": 40,
-      "pageid": 51387,
       "$merge": [
         "https://en.wikipedia.org/api/rest_v1/page/summary/2016";
       ],
@@ -844,7 +816,6 @@
     {
       "views": 71623,
       "rank": 42,
-      "pageid": 16746099,
       "$merge": [
         "https://en.wikipedia.org/api/rest_v1/page/summary/Jeddah_Tower";
       ],
@@ -874,7 +845,6 @@
     {
       "views": 71379,
       "rank": 43,
-      "pageid": 54011865,
       "$merge": [
         "https://en.wikipedia.org/api/rest_v1/page/summary/Bajirao_I";
       ],
@@ -904,7 +874,6 @@
     {
       "views": 70520,
       "rank": 45,
-      "pageid": 4836239,
       "$merge": [
         
"https://en.wikipedia.org/api/rest_v1/page/summary/The_Sound_of_Music_(film)"
       ],
@@ -934,7 +903,6 @@
     {
       "views": 68904,
       "rank": 47,
-      "pageid": 36044408,
       "$merge": [
         "https://en.wikipedia.org/api/rest_v1/page/summary/Adam_Driver";
       ],
@@ -964,7 +932,6 @@
     {
       "views": 66125,
       "rank": 49,
-      "pageid": 156045,
       "$merge": [
         "https://en.wikipedia.org/api/rest_v1/page/summary/M*A*S*H_(TV_series)"
       ],

-- 
To view, visit https://gerrit.wikimedia.org/r/395161
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: I9d95e77330cfd3e65c8d456b1b69f2c60ed1cce8
Gerrit-PatchSet: 1
Gerrit-Project: mediawiki/services/mobileapps
Gerrit-Branch: master
Gerrit-Owner: Mholloway <[email protected]>

_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to