Mholloway has uploaded a new change for review. (
https://gerrit.wikimedia.org/r/395161 )
Change subject: Further cleanup of lib/most-read
......................................................................
Further cleanup of lib/most-read
Note: This change removes the pageid field from the response items.
(Hence not marking this with Hygiene.) Is pageid actually used by clients?
Change-Id: I9d95e77330cfd3e65c8d456b1b69f2c60ed1cce8
---
M lib/feed/most-read-filter.js
M lib/feed/most-read.js
M test/diff/diff.js
M test/diff/results/page_most-read-enwiki-2016-01-01.json
4 files changed, 52 insertions(+), 120 deletions(-)
git pull ssh://gerrit.wikimedia.org:29418/mediawiki/services/mobileapps
refs/changes/61/395161/1
diff --git a/lib/feed/most-read-filter.js b/lib/feed/most-read-filter.js
index f99177e..269b346 100644
--- a/lib/feed/most-read-filter.js
+++ b/lib/feed/most-read-filter.js
@@ -21,10 +21,10 @@
return BLACKLIST.indexOf(title) !== -1;
}
-function filterSpecialPages(pages, mainPageTitle) {
+function filterSpecial(articles, mainPageTitle) {
const mainPageRegExp = new RegExp(`^${escape(mainPageTitle)}$`, 'i');
- return pages.filter((page) => {
- return page.ns === 0 && !mainPageRegExp.test(page.title);
+ return articles.filter((page) => {
+ return page.ns === 0 && !mainPageRegExp.test(page.article);
});
}
@@ -34,7 +34,7 @@
* relatively even mix of the two, are presumed to be inflated by bot traffic
* and not of sufficient human interest to include in the feed.
*/
-function filterBotTraffic(allPlatformsMostRead, desktopMostRead) {
+function filterBots(allPlatformsMostRead, desktopMostRead) {
if (BOT_FILTER_THRESHOLD < 0 || BOT_FILTER_THRESHOLD > 1) {
throw new HTTPError({
@@ -62,6 +62,6 @@
}
module.exports = {
- filterSpecialPages,
- filterBotTraffic
+ filterSpecial,
+ filterBots
};
diff --git a/lib/feed/most-read.js b/lib/feed/most-read.js
index 6ea5333..22e856f 100644
--- a/lib/feed/most-read.js
+++ b/lib/feed/most-read.js
@@ -7,9 +7,11 @@
const BBPromise = require('bluebird');
const mUtil = require('../mobile-util');
const mwapi = require('../mwapi');
-const filter = require('./most-read-filter');
+const filterSpecial = require('./most-read-filter').filterSpecial;
+const filterBots = require('./most-read-filter').filterBots;
const dateUtil = require('../dateUtil');
const pageviews = require('../pageviews');
+const Title = require('mediawiki-title').Title;
/**
* @public {!string} date ISO 8601 timestamp of pageviews recorded
@@ -23,14 +25,8 @@
}
function getTopPageviews(app, req, domain, date) {
- const apiDomain = 'wikimedia.org';
- const restReq = {
- headers: { accept: 'application/json; charset=utf-8' }
- };
- const client = new pageviews.Client(app, apiDomain, restReq);
-
- // todo: remove manual bot filtering when a user agent parameter is
available for top as in the
- // per-article endpoint
+ const restReq = { headers: { accept: 'application/json; charset=utf-8' } };
+ const client = new pageviews.Client(app, 'wikimedia.org', restReq);
return BBPromise.props({
desktop: client.reqTop(domain, pageviews.Platform.DESKTOP_WEB, date),
combined: client.reqTop(domain, pageviews.Platform.ALL, date)
@@ -43,29 +39,33 @@
});
}
-function promise(app, req) {
- const aggregated = !!req.query.aggregated;
- let goodTitles;
- let resultsDate;
+function getViewHistory(app, domain, startDate, endDate, entry) {
+ const restReq = { headers: { accept: 'application/json; charset=utf-8' } };
+ const client = new pageviews.Client(app, 'wikimedia.org', restReq);
+ return client.reqPage(mUtil.removeTLD(domain), pageviews.Platform.ALL,
pageviews.Agent.USER,
+ entry.article, pageviews.Granularity.DAILY, startDate, endDate)
+ .then(pageviewsPageRspToDatedPageviews);
+}
+function promise(app, req) {
if (req.params.domain === 'fy.wikipedia.org') {
return BBPromise.resolve({ meta: {} });
}
if (!dateUtil.validate(dateUtil.hyphenDelimitedDateString(req))) {
- if (aggregated) {
+ if (req.query.aggregated) {
return BBPromise.resolve({ meta: {} });
}
dateUtil.throwDateError();
}
- const targetDomain =
mUtil.removeTLD(mUtil.mobileToCanonical(req.params.domain));
-
const reqDate = dateUtil.getRequestedDate(req);
- const date = aggregated ? dateUtil.addDays(reqDate, -1) : reqDate;
+ const rspDate = req.query.aggregated ? dateUtil.addDays(reqDate, -1) :
reqDate;
- return getTopPageviews(app, req, targetDomain, date)
- .then((response) => {
+ return BBPromise.props({
+ pageviews: getTopPageviews(app, req,
mUtil.removeTLD(req.params.domain), rspDate),
+ siteinfo: mwapi.getSiteInfo(app, req)
+ }).then((response) => {
// We're working mainly with the overall list of top pageviews, and cut
// this off at 50 (the max that can be sent in a single MW API query).
// We'll keep twice as many desktop-only pageviews for the comparator
@@ -76,8 +76,11 @@
const QUERY_TITLES = mwapi.API_QUERY_MAX_TITLES;
const DESKTOP_TITLES = 2 * QUERY_TITLES;
- const combinedResults = response.combined && response.combined.body;
- const desktopResults = response.desktop && response.desktop.body;
+ const mainPage = response.siteinfo.general &&
response.siteinfo.general.mainpage;
+ const mainPageTitle = Title.newFromText(mainPage, response.siteinfo);
+
+ const combinedResults = response.pageviews.combined &&
response.pageviews.combined.body;
+ const desktopResults = response.pageviews.desktop &&
response.pageviews.desktop.body;
const combinedItems = combinedResults && combinedResults.items;
const firstCombinedItems = combinedItems && combinedResults.items[0];
const desktopItems = desktopResults && desktopResults.items;
@@ -87,83 +90,45 @@
const desktopArticles = firstDesktopItems &&
firstDesktopItems.articles;
const desktopSlice = desktopArticles && desktopArticles.slice(0,
DESKTOP_TITLES);
- goodTitles = filter.filterBotTraffic(combinedSlice, desktopSlice)
- .map(i => Object.assign({ title: i.article }, i));
-
- if (mUtil.isEmpty(goodTitles)) {
- mUtil.throw404('No results found.');
- }
-
const year = firstCombinedItems.year;
const month = firstCombinedItems.month;
const day = firstCombinedItems.day;
- resultsDate = `${year}-${month}-${day}Z`;
- return mwapi.getMostReadMetadata(app, req, goodTitles.map(i =>
i.article).join('|'));
- }).then((response) => {
- const query = response.body && response.body.query;
- const normalizations = query && query.normalized;
- const pages = query && query.pages;
- const mainPageTitle = query && query.general && query.general.mainpage;
-
- if (mUtil.isEmpty(pages)) {
- mUtil.throw404('No results found.');
- }
-
- if (normalizations) {
- const adjusted = normalizations.map(norm => ({ article: norm.from,
title: norm.to }));
- mUtil.mergeByProp(goodTitles, adjusted, 'article');
- }
-
- mUtil.mergeByProp(goodTitles, pages, 'title', true);
-
- goodTitles = filter.filterSpecialPages(goodTitles, mainPageTitle);
- if (mUtil.isEmpty(goodTitles)) {
- mUtil.throw404('No results found.');
- }
-
- const apiDomain = 'wikimedia.org';
- const restReq = {
- headers: { accept: 'application/json; charset=utf-8' }
- };
- const client = new pageviews.Client(app, apiDomain, restReq);
-
- const targetDomain =
mUtil.removeTLD(mUtil.mobileToCanonical(req.params.domain));
+ const resultsDate = `${year}-${month}-${day}Z`;
const start = dateUtil.addDays(new Date(resultsDate), -4);
const end = new Date(resultsDate);
- const articles = goodTitles.map((entry) => {
- return Object.assign(entry, {
- $merge: [
- mUtil.getRbPageSummaryUrl(app.restbase_tpl,
req.params.domain, entry.article)
- ],
- article: undefined,
- fromencoded: undefined,
- ns: undefined,
- terms: undefined,
- title: undefined,
- revisions: undefined,
- view_history: client.reqPage(targetDomain,
pageviews.Platform.ALL,
- pageviews.Agent.USER, entry.article,
pageviews.Granularity.DAILY, start, end)
- .then(pageviewsPageRspToDatedPageviews)
- });
+ const titles = filterSpecial(filterBots(combinedSlice,
desktopSlice).map((entry) => {
+ const ns = Title.newFromText(entry.article,
response.siteinfo).getNamespace()._id;
+ return Object.assign({ ns }, entry);
+ }), mainPageTitle.getPrefixedDBKey());
+
+ if (mUtil.isEmpty(titles)) {
+ mUtil.throw404('No results found.');
+ }
+
+ const result = titles.map((entry) => {
+ entry.$merge = [
+ mUtil.getRbPageSummaryUrl(app.restbase_tpl, req.params.domain,
entry.article)
+ ];
+ entry.view_history = getViewHistory(app, req.params.domain, start,
end, entry);
+ delete entry.article;
+ delete entry.ns;
+ return entry;
});
- return BBPromise.all(articles.map(article => BBPromise.props(article)))
- .then((response) => {
+ return BBPromise.all(result.map(article =>
BBPromise.props(article))).then((response) => {
return {
payload: {
date: resultsDate,
articles: response
},
- meta: {
- revision: dateUtil.dateStringFrom(req)
- }
+ meta: { revision: dateUtil.dateStringFrom(req) }
};
});
}).catch((err) => {
// Catch and handle the error if this is an aggregated request and the
// pageview data are not yet loaded.
- if (aggregated && err.status === 404) {
+ if (req.query.aggregated && err.status === 404) {
return BBPromise.resolve({ meta: {} });
}
throw err;
diff --git a/test/diff/diff.js b/test/diff/diff.js
index dc3075f..8dc78e5 100644
--- a/test/diff/diff.js
+++ b/test/diff/diff.js
@@ -6,7 +6,7 @@
const server = require('../utils/server.js');
const testSpec = require('./test-spec');
-describe('diff', function() {
+describe.only('diff', function() {
this.timeout(20000); // eslint-disable-line no-invalid-this
diff --git a/test/diff/results/page_most-read-enwiki-2016-01-01.json
b/test/diff/results/page_most-read-enwiki-2016-01-01.json
index 6a62b8e..13d85ca 100644
--- a/test/diff/results/page_most-read-enwiki-2016-01-01.json
+++ b/test/diff/results/page_most-read-enwiki-2016-01-01.json
@@ -4,7 +4,6 @@
{
"views": 609871,
"rank": 4,
- "pageid": 423882,
"$merge": [
"https://en.wikipedia.org/api/rest_v1/page/summary/Natalie_Cole"
],
@@ -34,7 +33,6 @@
{
"views": 353499,
"rank": 6,
- "pageid": 14723194,
"$merge": [
"https://en.wikipedia.org/api/rest_v1/page/summary/Star_Wars%3A_The_Force_Awakens"
],
@@ -64,7 +62,6 @@
{
"views": 301290,
"rank": 7,
- "pageid": 4319540,
"$merge": [
"https://en.wikipedia.org/api/rest_v1/page/summary/Bobby_Leach"
],
@@ -94,7 +91,6 @@
{
"views": 231465,
"rank": 8,
- "pageid": 89697,
"$merge": [
"https://en.wikipedia.org/api/rest_v1/page/summary/Wayne_Rogers"
],
@@ -124,7 +120,6 @@
{
"views": 211832,
"rank": 9,
- "pageid": 26678,
"$merge": [
"https://en.wikipedia.org/api/rest_v1/page/summary/Star_Wars"
],
@@ -154,7 +149,6 @@
{
"views": 199732,
"rank": 10,
- "pageid": 166538,
"$merge": [
"https://en.wikipedia.org/api/rest_v1/page/summary/Nat_King_Cole"
],
@@ -184,7 +178,6 @@
{
"views": 177494,
"rank": 11,
- "pageid": 406800,
"$merge": [
"https://en.wikipedia.org/api/rest_v1/page/summary/Auld_Lang_Syne"
],
@@ -214,7 +207,6 @@
{
"views": 159010,
"rank": 13,
- "pageid": 8169073,
"$merge": [
"https://en.wikipedia.org/api/rest_v1/page/summary/Antarctic_Snow_Cruiser"
],
@@ -244,7 +236,6 @@
{
"views": 150121,
"rank": 14,
- "pageid": 3133353,
"$merge": [
"https://en.wikipedia.org/api/rest_v1/page/summary/Steven_Avery"
],
@@ -274,7 +265,6 @@
{
"views": 135978,
"rank": 15,
- "pageid": 48233019,
"$merge": [
"https://en.wikipedia.org/api/rest_v1/page/summary/Christian_McCaffrey"
],
@@ -304,7 +294,6 @@
{
"views": 126263,
"rank": 17,
- "pageid": 43159611,
"$merge": [
"https://en.wikipedia.org/api/rest_v1/page/summary/The_Revenant_(2015_film)"
],
@@ -334,7 +323,6 @@
{
"views": 118283,
"rank": 18,
- "pageid": 5002871,
"$merge": [
"https://en.wikipedia.org/api/rest_v1/page/summary/One_World_Trade_Center"
],
@@ -364,7 +352,6 @@
{
"views": 114709,
"rank": 20,
- "pageid": 48863911,
"$merge": [
"https://en.wikipedia.org/api/rest_v1/page/summary/Making_a_Murderer"
],
@@ -394,7 +381,6 @@
{
"views": 112527,
"rank": 22,
- "pageid": 4763194,
"$merge": [
"https://en.wikipedia.org/api/rest_v1/page/summary/Guide_horse"
],
@@ -424,7 +410,6 @@
{
"views": 100066,
"rank": 24,
- "pageid": 20816992,
"$merge": [
"https://en.wikipedia.org/api/rest_v1/page/summary/Sherlock_(TV_series)"
],
@@ -454,7 +439,6 @@
{
"views": 98587,
"rank": 25,
- "pageid": 1808479,
"$merge": [
"https://en.wikipedia.org/api/rest_v1/page/summary/William_Phelps_Eno"
],
@@ -484,7 +468,6 @@
{
"views": 98467,
"rank": 26,
- "pageid": 43308964,
"$merge": [
"https://en.wikipedia.org/api/rest_v1/page/summary/The_Hateful_Eight"
],
@@ -514,7 +497,6 @@
{
"views": 97137,
"rank": 27,
- "pageid": 44789934,
"$merge": [
"https://en.wikipedia.org/api/rest_v1/page/summary/Deaths_in_2015"
],
@@ -544,7 +526,6 @@
{
"views": 93848,
"rank": 28,
- "pageid": 45360934,
"$merge": [
"https://en.wikipedia.org/api/rest_v1/page/summary/Dilwale_(2015_film)"
],
@@ -574,7 +555,6 @@
{
"views": 93087,
"rank": 29,
- "pageid": 42618962,
"$merge": [
"https://en.wikipedia.org/api/rest_v1/page/summary/Daisy_Ridley"
],
@@ -604,7 +584,6 @@
{
"views": 91340,
"rank": 30,
- "pageid": 44135542,
"$merge": [
"https://en.wikipedia.org/api/rest_v1/page/summary/Joy_(film)"
],
@@ -634,7 +613,6 @@
{
"views": 89733,
"rank": 31,
- "pageid": 18642,
"$merge": [
"https://en.wikipedia.org/api/rest_v1/page/summary/Lemmy"
],
@@ -664,7 +642,6 @@
{
"views": 89212,
"rank": 32,
- "pageid": 59892,
"$merge": [
"https://en.wikipedia.org/api/rest_v1/page/summary/List_of_highest-grossing_films"
],
@@ -694,7 +671,6 @@
{
"views": 88362,
"rank": 33,
- "pageid": 314475,
"$merge": [
"https://en.wikipedia.org/api/rest_v1/page/summary/Carrie_Fisher"
],
@@ -724,7 +700,6 @@
{
"views": 81104,
"rank": 34,
- "pageid": 52742228,
"$merge": [
"https://en.wikipedia.org/api/rest_v1/page/summary/Deaths_in_2016"
],
@@ -754,7 +729,6 @@
{
"views": 74736,
"rank": 38,
- "pageid": 41373899,
"$merge": [
"https://en.wikipedia.org/api/rest_v1/page/summary/Ethan_Couch"
],
@@ -784,7 +758,6 @@
{
"views": 74297,
"rank": 39,
- "pageid": 13979626,
"$merge": [
"https://en.wikipedia.org/api/rest_v1/page/summary/Joy_Mangano"
],
@@ -814,7 +787,6 @@
{
"views": 74022,
"rank": 40,
- "pageid": 51387,
"$merge": [
"https://en.wikipedia.org/api/rest_v1/page/summary/2016"
],
@@ -844,7 +816,6 @@
{
"views": 71623,
"rank": 42,
- "pageid": 16746099,
"$merge": [
"https://en.wikipedia.org/api/rest_v1/page/summary/Jeddah_Tower"
],
@@ -874,7 +845,6 @@
{
"views": 71379,
"rank": 43,
- "pageid": 54011865,
"$merge": [
"https://en.wikipedia.org/api/rest_v1/page/summary/Bajirao_I"
],
@@ -904,7 +874,6 @@
{
"views": 70520,
"rank": 45,
- "pageid": 4836239,
"$merge": [
"https://en.wikipedia.org/api/rest_v1/page/summary/The_Sound_of_Music_(film)"
],
@@ -934,7 +903,6 @@
{
"views": 68904,
"rank": 47,
- "pageid": 36044408,
"$merge": [
"https://en.wikipedia.org/api/rest_v1/page/summary/Adam_Driver"
],
@@ -964,7 +932,6 @@
{
"views": 66125,
"rank": 49,
- "pageid": 156045,
"$merge": [
"https://en.wikipedia.org/api/rest_v1/page/summary/M*A*S*H_(TV_series)"
],
--
To view, visit https://gerrit.wikimedia.org/r/395161
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: newchange
Gerrit-Change-Id: I9d95e77330cfd3e65c8d456b1b69f2c60ed1cce8
Gerrit-PatchSet: 1
Gerrit-Project: mediawiki/services/mobileapps
Gerrit-Branch: master
Gerrit-Owner: Mholloway <[email protected]>
_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits