Fdans has uploaded a new change for review. (
https://gerrit.wikimedia.org/r/393591 )
Change subject: [wip] Add pageviews by country endpoint
......................................................................
[wip] Add pageviews by country endpoint
Adds a new tops endpoint to aqs that returns top countries by number
of pageviews for a given project and time range.
Change-Id: I88c28812b8f2a854ed9ec5abacbc5c42c85db194
---
M sys/pageviews.js
M sys/pageviews.yaml
M test/aqs_test_module.yaml
M test/features/pageviews/pageviews.js
M v1/pageviews.yaml
5 files changed, 218 insertions(+), 0 deletions(-)
git pull ssh://gerrit.wikimedia.org:29418/analytics/aqs
refs/changes/91/393591/1
diff --git a/sys/pageviews.js b/sys/pageviews.js
index a9ef7cd..f3a532a 100644
--- a/sys/pageviews.js
+++ b/sys/pageviews.js
@@ -31,6 +31,7 @@
articleFlat: 'pageviews.per.article.flat',
project_v2: 'pageviews.per.project.v2',
tops: 'top.pageviews',
+ bycountry: 'top.bycountry'
};
const tableSchemas = {
@@ -92,6 +93,27 @@
index: [
{ attribute: 'project', type: 'hash' },
{ attribute: 'access', type: 'hash' },
+ { attribute: 'year', type: 'hash' },
+ { attribute: 'month', type: 'hash' },
+ { attribute: 'day', type: 'hash' },
+ ]
+ },
+ bycountry: {
+ table: tables.bycountry,
+ version: 1,
+ attributes: {
+ project: 'string',
+ year: 'string',
+ month: 'string',
+ day: 'string',
+ // this is deprecated, it used to be json stringified to look like:
+ // [{\"rank\": 1, \"article\": \"<<title>>\", \"views\": 123}, ...]
+ countries: 'string',
+ // this will be preferred to articles and uses the same format
+ countriesJSON: 'json'
+ },
+ index: [
+ { attribute: 'project', type: 'hash' },
{ attribute: 'year', type: 'hash' },
{ attribute: 'month', type: 'hash' },
{ attribute: 'day', type: 'hash' },
@@ -309,6 +331,54 @@
});
};
+PJVS.prototype.pageviewsByCountry = function(hyper, req) {
+ const rp = req.params;
+
+ aqsUtil.validateYearMonthDay(rp);
+
+ const dataRequest = hyper.get({
+ uri: tableURI(rp.domain, tables.bycountry),
+ body: {
+ table: tables.bycountry,
+ attributes: {
+ project: rp.project,
+ year: rp.year,
+ month: rp.month,
+ day: rp.day
+ }
+ }
+
+ }).catch(aqsUtil.notFoundCatcher);
+
+ return dataRequest.then(aqsUtil.normalizeResponse).then((res) => {
+ if (res.body.items) {
+ res.body.items.forEach((item) => {
+ // prefer the articlesJSON column if it's loaded
+ if (item.countriesJSON !== null) {
+ item.countries = item.countriesJSON;
+ } else {
+ try {
+ item.countries = JSON.parse(item.countries);
+ } catch (e) {
+ throw new HTTPError({
+ status: 500,
+ body: {
+ type: 'error',
+ description: 'This response contained invalid
JSON, we are ' +
+ 'working on fixing the problem, but until
then you can ' +
+ 'try a different date.'
+ }
+ });
+ }
+ }
+ delete item.countriesJSON;
+ });
+ }
+
+ return res;
+ });
+};
+
module.exports = function(options) {
const pjvs = new PJVS(options);
@@ -319,6 +389,7 @@
pageviewsForArticle: pjvs.pageviewsForArticleFlat.bind(pjvs),
pageviewsForProjects: pjvs.pageviewsForProjects.bind(pjvs),
pageviewsForTops: pjvs.pageviewsForTops.bind(pjvs),
+ pageviewsByCountry: pjvs.pageviewsByCountry.bind(pjvs),
},
resources: [
{
@@ -332,6 +403,10 @@
// top pageviews table
uri: `/{domain}/sys/table/${tables.tops}`,
body: tableSchemas.tops,
+ }, {
+ // pageviews by country table
+ uri: `/{domain}/sys/table/${tables.bycountry}`,
+ body: tableSchemas.bycountry,
}
]
};
diff --git a/sys/pageviews.yaml b/sys/pageviews.yaml
index cfb824a..afc2eb4 100644
--- a/sys/pageviews.yaml
+++ b/sys/pageviews.yaml
@@ -11,3 +11,7 @@
get:
summary: query top pageviews
operationId: pageviewsForTops
+ /bycountry/{project}/{year}/{month}/{day}:
+ get:
+ summary: query top countries by pageviews
+ operationId: pageviewsByCountry
diff --git a/test/aqs_test_module.yaml b/test/aqs_test_module.yaml
index 6d3f0bf..81e2af0 100644
--- a/test/aqs_test_module.yaml
+++ b/test/aqs_test_module.yaml
@@ -96,6 +96,23 @@
articlesJSON: '{{request.body.articles}}'
x-monitor: false
+ /pageviews/insert-bycountry/{project}/{year}/{month}/{day}:
+ post:
+ x-request-handler:
+ - put_to_storage:
+ request:
+ method: 'put'
+ uri: '/{domain}/sys/table/top.bycountry/'
+ body:
+ table: 'top.bycountry'
+ attributes:
+ project: '{{request.params.project}}'
+ year: '{{request.params.year}}'
+ month: '{{request.params.month}}'
+ day: '{{request.params.day}}'
+ countriesJSON: '{{request.body.countries}}'
+ x-monitor: false
+
/legacy/pagecounts/insert-aggregate/{project}/{access-site}/{granularity}/{timestamp}/{count}:
post:
x-request-handler:
diff --git a/test/features/pageviews/pageviews.js
b/test/features/pageviews/pageviews.js
index f8aca78..28eb689 100644
--- a/test/features/pageviews/pageviews.js
+++ b/test/features/pageviews/pageviews.js
@@ -31,6 +31,10 @@
top: {
all: '/pageviews/top/en.wikipedia/mobile-web/2015/01/all-days',
insert:
'/pageviews/insert-top/en.wikipedia/mobile-web/2015/01/all-days'
+ },
+ bycountry: {
+ all: '/pageviews/bycountry/en.wikipedia/2015/01/all-days',
+ insert: '/pageviews/insert-bycountry/en.wikipedia/2015/01/all-days'
}
}
var projectEndpointStrip =
'/pageviews/aggregate/www.en.wikipedia.org/all-access/all-agents/hourly/1969010100/1971010100';
@@ -381,4 +385,35 @@
assert.deepEqual(res.body.items[0].articles[1].article, 'two\\');
});
});
+
+ // By country test
+
+ it('should return the correct countries after insertion', function () {
+ return preq.post({
+ uri: server.config.aqsURL + endpoints.bycountry.insert,
+ body: {
+ countries: [{
+ rank: 1,
+ country: 'Republic of Mriiii\'duuh',
+ views: 2000
+ },{
+ rank: 2,
+ country: 'Kingdom of OOOOOOOOOOH',
+ views: 1000
+ }
+ ]
+ },
+ headers: { 'content-type': 'application/json' }
+
+ }).then(function() {
+ return preq.get({
+ uri: server.config.aqsURL + endpoints.bycountry.all
+ });
+ }).then(function(res) {
+ assert.deepEqual(res.body.items.length, 1);
+ assert.deepEqual(res.body.items[0].countries[0].country, 'Republic
of Mriiii\'duuh');
+ assert.deepEqual(res.body.items[0].countries[1].views, 1000);
+ assert.deepEqual(res.body.items[0].countries[1].country, 'Kingdom
of OOOOOOOOOOH');
+ });
+ })
});
diff --git a/v1/pageviews.yaml b/v1/pageviews.yaml
index 3f6ed18..51706ce 100644
--- a/v1/pageviews.yaml
+++ b/v1/pageviews.yaml
@@ -282,6 +282,75 @@
views: 0
rank: 1
+ /bycountry/{project}/{year}/{month}/{day}:
+ get:
+ tags:
+ - Pageviews data
+ summary: Get pageviews by number of page views.
+ description: |
+ Lists the top 100 countries that visit this project the most, by
timestamp.
+ Stability:
[experimental](https://www.mediawiki.org/wiki/API_versioning#Experimental)
+ produces:
+ - application/json
+ parameters:
+ - name: project
+ in: path
+ description: The name of any Wikimedia project formatted like
{language code}.{project name}, for example en.wikipedia. You may pass
en.wikipedia.org and the .org will be stripped off. For projects like commons
without language codes, use commons.wikimedia. For projects like
www.mediawiki.org, you can use that full string, or just use mediawiki or
mediawiki.org.
+ type: string
+ required: true
+ - name: year
+ in: path
+ description: The year of the date for which to retrieve top
countries, in YYYY format.
+ type: string
+ required: true
+ - name: month
+ in: path
+ description: The month of the date for which to retrieve top
countries, in MM format. If you want to get the top countries of a whole month,
the day parameter should be all-days.
+ type: string
+ required: true
+ - name: day
+ in: path
+ description: The day of the date for which to retrieve top
countries, in DD format.
+ type: string
+ required: true
+ responses:
+ '200':
+ description: The list of top countries by pageviews in the project
+ schema:
+ $ref: '#/definitions/bycountry'
+ default:
+ description: Error
+ schema:
+ $ref: '#/definitions/problem'
+ x-request-handler:
+ - get_from_backend:
+ request:
+ uri:
/{domain}/sys/pageviews/bycountry/{project}/{year}/{month}/{day}
+ x-monitor: true
+ x-amples:
+ - title: Get top countries by page views
+ request:
+ params:
+ domain: wikimedia.org
+ project: en.wikipedia
+ year: '1970'
+ month: '01'
+ day: '01'
+ response:
+ status: 200
+ headers:
+ content-type: application/json
+ body:
+ items:
+ - project: en.wikipedia
+ year: '1970'
+ month: '01'
+ day: '01'
+ countries:
+ - country: '-'
+ views: 0
+ rank: 1
+
definitions:
# A https://tools.ietf.org/html/draft-nottingham-http-problem
problem:
@@ -370,3 +439,21 @@
articles:
# format for this is a json array: [{rank: 1, article:
<<title>>, views: 123}, ...]
type: string
+
+ bycountry:
+ properties:
+ items:
+ type: array
+ items:
+ properties:
+ project:
+ type: string
+ year:
+ type: string
+ month:
+ type: string
+ day:
+ type: string
+ articles:
+ # format for this is a json array: [{rank: 1, article:
<<title>>, views: 123}, ...]
+ type: string
--
To view, visit https://gerrit.wikimedia.org/r/393591
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: newchange
Gerrit-Change-Id: I88c28812b8f2a854ed9ec5abacbc5c42c85db194
Gerrit-PatchSet: 1
Gerrit-Project: analytics/aqs
Gerrit-Branch: master
Gerrit-Owner: Fdans <[email protected]>
_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits