Milimetric has submitted this change and it was merged.
Change subject: timeseries working in bytes_added and edits
......................................................................
timeseries working in bytes_added and edits
Change-Id: I6a9b7abe43d72905b237311af4dca0206fa95d8a
---
M tests/test_metrics/test_bytes_added.py
M tests/test_metrics/test_namespace_edits.py
M tests/test_metrics/test_revert_rate.py
M tests/test_metrics/test_timeseries.py
M wikimetrics/controllers/authentication.py
M wikimetrics/metrics/bytes_added.py
M wikimetrics/metrics/namespace_edits.py
M wikimetrics/metrics/timeseries_metric.py
M wikimetrics/templates/authenticate.html
M wikimetrics/templates/forms/metric_configuration.html
M wikimetrics/utils.py
11 files changed, 516 insertions(+), 142 deletions(-)
Approvals:
Milimetric: Verified; Looks good to me, approved
diff --git a/tests/test_metrics/test_bytes_added.py
b/tests/test_metrics/test_bytes_added.py
index 2af5a0a..1afb769 100644
--- a/tests/test_metrics/test_bytes_added.py
+++ b/tests/test_metrics/test_bytes_added.py
@@ -1,6 +1,6 @@
from nose.tools import assert_true, assert_equal
from tests.fixtures import DatabaseTest
-from wikimetrics.metrics import BytesAdded
+from wikimetrics.metrics import BytesAdded, TimeseriesChoices
class BytesAddedTest(DatabaseTest):
@@ -112,3 +112,127 @@
'net_sum': 100,
}
assert_equal(results[self.editors[0].user_id], expected1)
+
+
+class BytesAddedTimeseriesTest(DatabaseTest):
+
+ def setUp(self):
+ DatabaseTest.setUp(self)
+ self.create_test_cohort(
+ editor_count=4,
+ revisions_per_editor=4,
+ # in order, all in 2013:
+ # 1/1, 1/5, 1/9, 1/13, 2/2, 2/6, 2/10, 2/14, 3/3, 3/7, 3/15, 4/4,
4/12, 4/16
+ revision_timestamps=[
+ [20130101010000, 20130202000000, 20130303000000,
20130404000000],
+ [20130105000000, 20130206000000, 20130307000000,
20130408000000],
+ [20130109000000, 20130210000000, 20130311000000,
20130412000000],
+ [20130113000000, 20130214000000, 20130315000000,
20130416000000],
+ ],
+ # in order:
+ # 100,1100,1200,1300,0,200,400,600,800,700,600,500,590,550,600,650
+ revision_lengths=[
+ [100, 0, 800, 590],
+ [1100, 200, 700, 550],
+ [1200, 400, 600, 600],
+ [1300, 600, 500, 650],
+ ],
+ )
+
+ def test_timeseries_by_hour(self):
+
+ metric = BytesAdded(
+ namespaces=[0],
+ start_date='2013-01-01 00:00:00',
+ end_date='2013-01-01 03:00:00',
+ positive_only_sum=False,
+ negative_only_sum=False,
+ absolute_sum=False,
+ timeseries=TimeseriesChoices.HOUR,
+ )
+
+ results = metric(list(self.cohort), self.mwSession)
+ expected1 = {
+ 'net_sum': {
+ '2013-01-01 00:00:00' : 0,
+ '2013-01-01 01:00:00' : 100,
+ '2013-01-01 02:00:00' : 0,
+ }
+ }
+ assert_equal(results[self.editors[0].user_id], expected1)
+
+ def test_timeseries_by_day(self):
+
+ metric = BytesAdded(
+ namespaces=[0],
+ start_date='2013-01-01 00:00:00',
+ end_date='2013-01-14 00:00:00',
+ positive_only_sum=False,
+ negative_only_sum=False,
+ absolute_sum=False,
+ timeseries=TimeseriesChoices.DAY,
+ )
+
+ results = metric(list(self.cohort), self.mwSession)
+ expected1 = {
+ 'net_sum': {
+ '2013-01-01 00:00:00' : 100,
+ '2013-01-02 00:00:00' : 0,
+ '2013-01-03 00:00:00' : 0,
+ '2013-01-04 00:00:00' : 0,
+ '2013-01-05 00:00:00' : 0,
+ '2013-01-06 00:00:00' : 0,
+ '2013-01-07 00:00:00' : 0,
+ '2013-01-08 00:00:00' : 0,
+ '2013-01-09 00:00:00' : 0,
+ '2013-01-10 00:00:00' : 0,
+ '2013-01-11 00:00:00' : 0,
+ '2013-01-12 00:00:00' : 0,
+ '2013-01-13 00:00:00' : 0,
+ }
+ }
+ assert_equal(results[self.editors[0].user_id], expected1)
+
+ def test_timeseries_by_month(self):
+
+ metric = BytesAdded(
+ namespaces=[0],
+ start_date='2013-01-01 00:00:00',
+ end_date='2013-04-06 00:00:00',
+ positive_only_sum=False,
+ negative_only_sum=False,
+ absolute_sum=False,
+ timeseries=TimeseriesChoices.MONTH,
+ )
+
+ results = metric(list(self.cohort), self.mwSession)
+ expected1 = {
+ 'net_sum': {
+ '2013-01-01 00:00:00' : 100,
+ '2013-02-01 00:00:00' : -1300,
+ '2013-03-01 00:00:00' : 200,
+ '2013-04-01 00:00:00' : 90,
+ }
+ }
+ assert_equal(results[self.editors[0].user_id], expected1)
+
+ def test_timeseries_by_year(self):
+
+ metric = BytesAdded(
+ namespaces=[0],
+ start_date='2013-01-01 00:00:00',
+ end_date='2014-01-14 00:00:00',
+ positive_only_sum=False,
+ negative_only_sum=False,
+ absolute_sum=False,
+ timeseries=TimeseriesChoices.YEAR,
+ )
+
+ results = metric(list(self.cohort), self.mwSession)
+ expected1 = {
+ 'net_sum': {
+ '2013-01-01 00:00:00' : -910,
+ '2014-01-01 00:00:00' : 0,
+ }
+ }
+ assert_equal(results[self.editors[0].user_id], expected1)
diff --git a/tests/test_metrics/test_namespace_edits.py
b/tests/test_metrics/test_namespace_edits.py
index 2102d26..2f01cb7 100644
--- a/tests/test_metrics/test_namespace_edits.py
+++ b/tests/test_metrics/test_namespace_edits.py
@@ -78,6 +78,8 @@
results = report.task.delay(report).get()
assert_true(results is not None)
+ print self.test_mediawiki_user_id_evan
+ print results
assert_equal(results[self.test_mediawiki_user_id_evan]['edits'], 0)
def test_namespace_edits_namespace_filter_no_namespace(self):
@@ -209,12 +211,13 @@
timeseries=TimeseriesChoices.DAY,
)
results = metric(list(self.cohort), self.mwSession)
+ print results
assert_equal(
results[self.editors[0].user_id]['edits'],
{
- '2012-12-31' : 1,
- '2013-01-01' : 2,
+ '2012-12-31 00:00:00' : 1,
+ '2013-01-01 00:00:00' : 2,
}
)
diff --git a/tests/test_metrics/test_revert_rate.py
b/tests/test_metrics/test_revert_rate.py
index 70398d6..cef7762 100644
--- a/tests/test_metrics/test_revert_rate.py
+++ b/tests/test_metrics/test_revert_rate.py
@@ -6,7 +6,6 @@
from wikimetrics.models import Cohort, MetricReport
-
class RevertRateTest(DatabaseTest):
def setUp(self):
@@ -38,7 +37,7 @@
self.editors[0].user_id: {
'edits': 3,
'reverts': 1,
- 'revert_rate': float(1)/float(3),
+ 'revert_rate': float(1) / float(3),
},
# User B had no reverts
self.editors[1].user_id: {
@@ -47,12 +46,15 @@
'revert_rate': 0,
},
}
-
+
# check user A's results
- assert_equal(results[self.editors[0].user_id],
results_should_be[self.editors[0].user_id])
-
+ assert_equal(
+ results[self.editors[0].user_id],
+ results_should_be[self.editors[0].user_id]
+ )
+
# check user B's results
- assert_equal(results[self.editors[1].user_id],
results_should_be[self.editors[0].user_id])
-
-
-
+ assert_equal(
+ results[self.editors[1].user_id],
+ results_should_be[self.editors[0].user_id]
+ )
diff --git a/tests/test_metrics/test_timeseries.py
b/tests/test_metrics/test_timeseries.py
index e874526..a5e3535 100644
--- a/tests/test_metrics/test_timeseries.py
+++ b/tests/test_metrics/test_timeseries.py
@@ -2,6 +2,7 @@
from nose.tools import assert_equals
from wikimetrics.metrics.timeseries_metric import (
TimeseriesMetric,
+ TimeseriesChoices,
)
from tests.fixtures import DatabaseTest
@@ -16,22 +17,164 @@
(0, 0, date.year),
2, 3
)
- assert_equals(t1, '2010')
+ assert_equals(t1, '2010-01-01 00:00:00')
t2 = m.get_date_from_tuple(
(0, 0, date.year, date.month),
2, 4
)
- assert_equals(t2, '2010-01')
+ assert_equals(t2, '2010-01-01 00:00:00')
t3 = m.get_date_from_tuple(
(0, 0, date.year, date.month, date.day),
2, 5
)
- assert_equals(t3, '2010-01-02')
+ assert_equals(t3, '2010-01-02 00:00:00')
t4 = m.get_date_from_tuple(
(0, 0, date.year, date.month, date.day, date.hour),
2, 6
)
assert_equals(t4, '2010-01-02 03:00:00')
+
+ def test_fill_in_missing_datetimes_hour(self):
+ m = TimeseriesMetric(
+ start_date='2013-01-01 23:00:00',
+ end_date='2013-01-03 00:00:00',
+ timeseries=TimeseriesChoices.HOUR,
+ )
+
+ results = {
+ 1: {
+ 'test': {
+ '2013-01-02 01:00:00': 12,
+ '2013-01-02 14:00:00': 11,
+ }
+ }
+ }
+ r = m.fill_in_missing_datetimes(results, [('test', 1, 0)])
+ assert_equals(r, {
+ 1: {
+ 'test': {
+ '2013-01-01 23:00:00': 0,
+ '2013-01-02 00:00:00': 0,
+ '2013-01-02 01:00:00': 12,
+ '2013-01-02 02:00:00': 0,
+ '2013-01-02 03:00:00': 0,
+ '2013-01-02 04:00:00': 0,
+ '2013-01-02 05:00:00': 0,
+ '2013-01-02 06:00:00': 0,
+ '2013-01-02 07:00:00': 0,
+ '2013-01-02 08:00:00': 0,
+ '2013-01-02 09:00:00': 0,
+ '2013-01-02 10:00:00': 0,
+ '2013-01-02 11:00:00': 0,
+ '2013-01-02 12:00:00': 0,
+ '2013-01-02 13:00:00': 0,
+ '2013-01-02 14:00:00': 11,
+ '2013-01-02 15:00:00': 0,
+ '2013-01-02 16:00:00': 0,
+ '2013-01-02 17:00:00': 0,
+ '2013-01-02 18:00:00': 0,
+ '2013-01-02 19:00:00': 0,
+ '2013-01-02 20:00:00': 0,
+ '2013-01-02 21:00:00': 0,
+ '2013-01-02 22:00:00': 0,
+ '2013-01-02 23:00:00': 0,
+ }
+ }
+ })
+
+ def test_fill_in_missing_datetimes_day(self):
+ m = TimeseriesMetric(
+ start_date='2013-01-01 00:00:00',
+ end_date='2013-01-05 00:00:00',
+ timeseries=TimeseriesChoices.DAY,
+ )
+
+ results = {
+ 1: {
+ 'test': {
+ '2013-01-02 00:00:00': 23,
+ '2013-01-04 00:00:00': 19,
+ }
+ },
+ 2: {
+ 'test': {
+ '2013-01-03 00:00:00': 23,
+ '2013-01-04 00:00:00': 19,
+ }
+ }
+ }
+ r = m.fill_in_missing_datetimes(results, [('test', 1, 0)])
+
+ assert_equals(r, {
+ 1: {
+ 'test': {
+ '2013-01-01 00:00:00': 0,
+ '2013-01-02 00:00:00': 23,
+ '2013-01-03 00:00:00': 0,
+ '2013-01-04 00:00:00': 19,
+ }
+ },
+ 2: {
+ 'test': {
+ '2013-01-01 00:00:00': 0,
+ '2013-01-02 00:00:00': 0,
+ '2013-01-03 00:00:00': 23,
+ '2013-01-04 00:00:00': 19,
+ }
+ }
+ })
+
+ def test_fill_in_missing_datetimes_month(self):
+ m = TimeseriesMetric(
+ start_date='2013-01-02 00:00:00',
+ end_date='2013-03-05 00:00:00',
+ timeseries=TimeseriesChoices.MONTH,
+ )
+
+ results = {
+ 1: {
+ 'test': {
+ '2013-01-02 00:00:00': 12,
+ '2013-03-02 00:00:00': 1,
+ }
+ },
+ }
+ r = m.fill_in_missing_datetimes(results, [('test', 1, 0)])
+
+ assert_equals(r, {
+ 1: {
+ 'test': {
+ '2013-01-02 00:00:00': 12,
+ '2013-02-02 00:00:00': 0,
+ '2013-03-02 00:00:00': 1,
+ }
+ },
+ })
+
+ def test_fill_in_missing_datetimes_year(self):
+ m = TimeseriesMetric(
+ start_date='2013-03-10 00:00:00',
+ end_date='2015-03-05 00:00:00',
+ timeseries=TimeseriesChoices.YEAR,
+ )
+
+ results = {
+ 1: {
+ 'test': {
+ '2013-03-10 00:00:00': 12,
+ }
+ },
+ }
+ r = m.fill_in_missing_datetimes(results, [('test', 1, 0)])
+
+ assert_equals(r, {
+ 1: {
+ 'test': {
+ '2013-03-10 00:00:00': 12,
+ '2014-03-10 00:00:00': 0,
+ }
+ },
+ })
diff --git a/wikimetrics/controllers/authentication.py
b/wikimetrics/controllers/authentication.py
index 48dea53..e74c8ab 100644
--- a/wikimetrics/controllers/authentication.py
+++ b/wikimetrics/controllers/authentication.py
@@ -40,6 +40,7 @@
and not request.path == 'favicon.ico'
and not getattr(app.view_functions[request.endpoint], 'is_public',
False)
):
+ # TODO: make request.url relative or check X-Forwarded-Proto to match
the protocol
flash('Please Login before visiting {0}'.format(request.url), 'info')
return redirect(url_for('login', next=request.url))
diff --git a/wikimetrics/metrics/bytes_added.py
b/wikimetrics/metrics/bytes_added.py
index 0f5a81c..ea7efed 100644
--- a/wikimetrics/metrics/bytes_added.py
+++ b/wikimetrics/metrics/bytes_added.py
@@ -1,6 +1,6 @@
from ..utils import thirty_days_ago, today
from ..models import Revision, Page
-from metric import Metric
+from timeseries_metric import TimeseriesMetric
from form_fields import (
BetterDateTimeField,
BetterBooleanField,
@@ -16,7 +16,7 @@
]
-class BytesAdded(Metric):
+class BytesAdded(TimeseriesMetric):
"""
This class implements bytes added logic.
An instance of the class is callable and will compute four different
aggregations of
@@ -66,8 +66,6 @@
description = 'Compute different aggregations of the bytes\
contributed or removed from a mediawiki project'
- start_date = BetterDateTimeField(default=thirty_days_ago)
- end_date = BetterDateTimeField(default=today)
namespaces = CommaSeparatedIntegerListField(
None,
[Required()],
@@ -98,6 +96,7 @@
PreviousRevision = session.query(Revision.rev_len,
Revision.rev_id).subquery()
BC = session.query(
Revision.rev_user,
+ Revision.rev_timestamp,
label(
'byte_change',
cast(Revision.rev_len, Integer)
@@ -115,52 +114,43 @@
.filter(Revision.rev_timestamp > start_date)\
.filter(Revision.rev_timestamp <= end_date)\
.subquery()
- # TODO: figure out why between isn't quite working with these
timestamps
- #.filter(between(
- # Revision.rev_timestamp, self.start_date.data,
self.end_date.data
- #))\
- bytes_added_by_user = session.query(
- BC.c.rev_user,
- func.sum(BC.c.byte_change).label('net_sum'),
- func.sum(func.abs(BC.c.byte_change)).label('absolute_sum'),
- func.sum(case(
- [(BC.c.byte_change > 0, BC.c.byte_change)], else_=0
- )).label('positive_only_sum'),
- func.sum(case(
- [(BC.c.byte_change < 0, BC.c.byte_change)], else_=0
- )).label('negative_only_sum'),
- )\
- .group_by(BC.c.rev_user)\
- .all()
+ bytes_added_by_user =
session.query(BC.c.rev_user).group_by(BC.c.rev_user)
- result_dict = {}
- for user_id, net, absolute, positive, negative in bytes_added_by_user:
-
- result_dict[user_id] = {}
- if self.net_sum.data:
- result_dict[user_id]['net_sum'] = net
- if self.absolute_sum.data:
- result_dict[user_id]['absolute_sum'] = absolute
- if self.positive_only_sum.data:
- result_dict[user_id]['positive_only_sum'] = positive
- if self.negative_only_sum.data:
- result_dict[user_id]['negative_only_sum'] = negative
-
- return {
- user_id: result_dict.get(user_id, self.make_default())
- for user_id in user_ids
- }
-
- def make_default(self):
- default = dict()
+ # add submetrics as columns to the output
+ submetrics = []
+ index = 1
if self.net_sum.data:
- default['net_sum'] = None
- if self.absolute_sum.data:
- default['absolute_sum'] = None
- if self.positive_only_sum.data:
- default['positive_only_sum'] = None
- if self.negative_only_sum.data:
- default['negative_only_sum'] = None
+ submetrics.append(('net_sum', index, 0))
+ bytes_added_by_user = bytes_added_by_user.add_column(
+ func.sum(BC.c.byte_change).label('net_sum')
+ )
+ index += 1
- return default
+ if self.absolute_sum.data:
+ submetrics.append(('absolute_sum', index, 0))
+ bytes_added_by_user = bytes_added_by_user.add_column(
+ func.sum(func.abs(BC.c.byte_change)).label('absolute_sum'),
+ )
+ index += 1
+
+ if self.positive_only_sum.data:
+ submetrics.append(('positive_only_sum', index, 0))
+ bytes_added_by_user = bytes_added_by_user.add_column(
+ func.sum(case(
+ [(BC.c.byte_change > 0, BC.c.byte_change)], else_=0
+ )).label('positive_only_sum'),
+ )
+ index += 1
+
+ if self.negative_only_sum.data:
+ submetrics.append(('negative_only_sum', index, 0))
+ bytes_added_by_user = bytes_added_by_user.add_column(
+ func.sum(case(
+ [(BC.c.byte_change < 0, BC.c.byte_change)], else_=0
+ )).label('negative_only_sum'),
+ )
+ index += 1
+
+ query = self.apply_timeseries(bytes_added_by_user, rev=BC.c)
+ return self.results_by_user(user_ids, query, submetrics,
date_index=index)
diff --git a/wikimetrics/metrics/namespace_edits.py
b/wikimetrics/metrics/namespace_edits.py
index 750b1fe..0872c32 100644
--- a/wikimetrics/metrics/namespace_edits.py
+++ b/wikimetrics/metrics/namespace_edits.py
@@ -1,8 +1,8 @@
-from ..utils import thirty_days_ago, today
from sqlalchemy import func
from timeseries_metric import TimeseriesMetric, TimeseriesChoices
-from form_fields import CommaSeparatedIntegerListField, BetterDateTimeField
+from form_fields import CommaSeparatedIntegerListField
from wtforms.validators import Required
+from wikimetrics.utils import thirty_days_ago, today
from wikimetrics.models import Page, Revision
@@ -37,8 +37,6 @@
'namespace of a mediawiki project'
)
- start_date = BetterDateTimeField(default=thirty_days_ago)
- end_date = BetterDateTimeField(default=today)
namespaces = CommaSeparatedIntegerListField(
None,
[Required()],
@@ -68,44 +66,10 @@
.group_by(Revision.rev_user)
query = self.apply_timeseries(query)
-
- # construct dict from query results, taking into account timeseries
- revisions_by_user = self.get_dictionary_by_user(query.all())
- return {
- user_id: {'edits': revisions_by_user.get(user_id, 0)}
- for user_id in user_ids
- }
-
- def get_dictionary_by_user(self, query_results):
- """
- Parameters
- query_results : list of tuples in the format:
- (user_id, edit_count[, year[, month[, day[,
hour]]]])
- Returns
- dictionary of results by user, in the following format:
- user_id: edit_count
-
- or
-
- user_id: {
- year[, month[, day[, hour]]]: edit_count,
- year[, month[, day[, hour]]]: edit_count,
- ...
- }
- """
- # handle simple cases (no results or no timeseries)
- if not query_results:
- return
- if self.timeseries.data == TimeseriesChoices.NONE:
- return dict(query_results)
-
- # get results by user and by date
- results = {}
- for row in query_results:
- user_id = row[0]
- edits = row[1]
- if not user_id in results:
- results[user_id] = {}
- results[user_id][self.get_date_from_tuple(row, 2, len(row))] =
edits
-
- return results
+ return self.results_by_user(
+ user_ids,
+ query,
+ [('edits', 1, 0)],
+ submetric_default=0,
+ date_index=2,
+ )
diff --git a/wikimetrics/metrics/timeseries_metric.py
b/wikimetrics/metrics/timeseries_metric.py
index 4e524d4..83423f1 100644
--- a/wikimetrics/metrics/timeseries_metric.py
+++ b/wikimetrics/metrics/timeseries_metric.py
@@ -1,8 +1,12 @@
from sqlalchemy import func
from datetime import datetime
+from dateutil.relativedelta import relativedelta
from wtforms import SelectField
-from metric import Metric
+
from wikimetrics.models import Revision
+from wikimetrics.utils import thirty_days_ago, today, format_pretty_date
+from metric import Metric
+from form_fields import CommaSeparatedIntegerListField, BetterDateTimeField
__all__ = ['TimeseriesChoices', 'TimeseriesMetric']
@@ -23,7 +27,9 @@
output.
"""
- timeseries = SelectField(
+ start_date = BetterDateTimeField(default=thirty_days_ago)
+ end_date = BetterDateTimeField(default=today)
+ timeseries = SelectField(
'Time Series by',
default=TimeseriesChoices.NONE,
choices=[
@@ -35,12 +41,14 @@
],
)
- def apply_timeseries(self, query):
+ def apply_timeseries(self, query, rev=Revision):
"""
Take a query and slice it up into equal time intervals
Parameters
- query : a sql alchemy query
+ query : a sql alchemy query
+ rev : defaults to Revision, specifies the object that
+ contains the appropriate rev_timestamp
Returns
The query parameter passed in, with a grouping by the desired time
slice
@@ -50,44 +58,183 @@
if choice == TimeseriesChoices.NONE:
return query
- query = query.add_column(func.year(Revision.rev_timestamp))
- query = query.group_by(func.year(Revision.rev_timestamp))
+ query = query.add_column(func.year(rev.rev_timestamp))
+ query = query.group_by(func.year(rev.rev_timestamp))
if choice == TimeseriesChoices.YEAR:
return query
- query = query.add_column(func.month(Revision.rev_timestamp))
- query = query.group_by(func.month(Revision.rev_timestamp))
+ query = query.add_column(func.month(rev.rev_timestamp))
+ query = query.group_by(func.month(rev.rev_timestamp))
if choice == TimeseriesChoices.MONTH:
return query
- query = query.add_column(func.day(Revision.rev_timestamp))
- query = query.group_by(func.day(Revision.rev_timestamp))
+ query = query.add_column(func.day(rev.rev_timestamp))
+ query = query.group_by(func.day(rev.rev_timestamp))
if choice == TimeseriesChoices.DAY:
return query
- query = query.add_column(func.hour(Revision.rev_timestamp))
- query = query.group_by(func.hour(Revision.rev_timestamp))
+ query = query.add_column(func.hour(rev.rev_timestamp))
+ query = query.group_by(func.hour(rev.rev_timestamp))
if choice == TimeseriesChoices.HOUR:
return query
- def get_date_from_tuple(self, row_tuple, start_index, stop_index):
- date_pieces = row_tuple[start_index:stop_index]
- date_string = ''
- if len(date_pieces) > 0:
- date_string += str(date_pieces[0])
- if len(date_pieces) > 1:
- date_string += '-'
- date_string += str(date_pieces[1]).rjust(2, '0')
- if len(date_pieces) > 2:
- date_string += '-'
- date_string += str(date_pieces[2]).rjust(2, '0')
- if len(date_pieces) > 3:
- date_string += ' '
- date_string += str(date_pieces[3]).rjust(2, '0')
- date_string += ':00:00'
+ def results_by_user(self, user_ids, query, submetrics,
+ submetric_default=None, date_index=None):
+ """
+ Get results by user for a timeseries-enabled metric
- return date_string
+ Parameters
+ user_ids : list of integer ids to return results for
+ query : sqlalchemy query to fetch results
+ submetrics : list of tuples of the form (label, index,
default)
+ submetric_default : default value to assign at the submetric
level
+ date_index : index of the year date part in the result
row,
+ in case this is a timeseries query
+
+ Returns
+ A dictionary of user_ids to results, shaped depending on
timeseries:
+ user_id: {
+ 'submetric 1': {
+ 'date slice 1': submetric_1_value,
+ 'date slice 2': submetric_1_value,
+ ...
+ },
+ 'submetric 2': ...
+
+ OR
+
+ 'submetric 1': submetric_1_value,
+ 'submetric 2': submetric_2_value,
+ ...
+ }
+ """
+ # get a dictionary of user_ids to their metric results
+ results = self.submetrics_by_user(query, submetrics, date_index)
+
+ # make a default return dictionary for users not found by the query
+ submetric_defaults = dict()
+ for label, index, default in submetrics:
+ if self.timeseries.data == TimeseriesChoices.NONE:
+ submetric_defaults[label] = submetric_default
+ else:
+ submetric_defaults[label] = dict()
+
+ # populate users not found by the query with the default created above
+ results = {
+ user_id: results.get(user_id, submetric_defaults)
+ for user_id in user_ids
+ }
+
+ # in timeseries results, fill in missing date-times
+ results = self.fill_in_missing_datetimes(results, submetrics)
+ return results
+
+ def submetrics_by_user(self, query, submetrics, date_index=None):
+ """
+ Same as results_by_user, except doesn't return results for users not
found in
+ the query_results list.
+ """
+ query_results = query.all()
+
+ # handle simple cases (no results or no timeseries)
+ if not query_results:
+ return dict()
+
+ # get results by user and by date
+ results = {}
+ for row in query_results:
+ user_id = row[0]
+ if not user_id in results:
+ results[user_id] = {}
+
+ date_slice = None
+ if self.timeseries.data != TimeseriesChoices.NONE:
+ date_slice = self.get_date_from_tuple(row, date_index,
len(row))
+
+ for label, index, default in submetrics:
+ if date_slice:
+ if not label in results[user_id]:
+ results[user_id][label] = dict()
+ results[user_id][label][date_slice] = row[index]
+ else:
+ results[user_id][label] = row[index]
+
+ return results
+
+ def fill_in_missing_datetimes(self, results_by_user, submetrics):
+ """
+ Starting from a sparse set of timeseries results, fill in default
values
+ for the specified list of sub-metrics. If self.timeseries is NONE,
this
+ is a simple identity function.
+
+ Parameters
+ results_by_user : dictionary of submetrics dictionaries by user
+ submetrics : list of tuples of the form (label, index,
default)
+
+ Returns
+ the results, filled in with default values
+ """
+ if self.timeseries.data == TimeseriesChoices.NONE:
+ return results_by_user
+
+ slice_delta = self.get_delta_from_choice()
+ timeseries_slices = dict()
+ slice_to_default = self.start_date.data
+ while slice_to_default < self.end_date.data:
+ date_key = format_pretty_date(slice_to_default)
+ timeseries_slices[date_key] = None
+ slice_to_default += slice_delta
+
+ for user_id, user_submetrics in results_by_user.iteritems():
+ for label, i, default in submetrics:
+ if not label or not user_submetrics or not label in
user_submetrics:
+ continue
+ defaults = timeseries_slices.copy()
+ defaults.update(user_submetrics[label])
+ for k, v in defaults.iteritems():
+ if not v:
+ defaults[k] = default
+ user_submetrics[label] = defaults
+
+ return results_by_user
+
+ def get_delta_from_choice(self):
+ """
+ Given a user's choice of timeseries grouping,
+ return a delta that would be one "slice" wide
+ """
+ if self.timeseries.data == TimeseriesChoices.NONE:
+ return relativedelta(hours=0)
+ if self.timeseries.data == TimeseriesChoices.HOUR:
+ return relativedelta(hours=1)
+ if self.timeseries.data == TimeseriesChoices.DAY:
+ return relativedelta(days=1)
+ if self.timeseries.data == TimeseriesChoices.MONTH:
+ return relativedelta(months=1)
+ if self.timeseries.data == TimeseriesChoices.YEAR:
+ return relativedelta(years=1)
+
+ def get_date_from_tuple(self, row_tuple, start_index, stop_index):
+ """
+ Suppose you have a tuple like this:
+ ([data], [data], ... , year, month, day, [data], [data])
+ Then this function will parse out the year, month, day, and hour
+ into a date string. Anything beyond year, month, day is optional.
+ """
+ date_pieces = row_tuple[start_index:stop_index]
+ year, month, day, hour = 1970, 1, 1, 0
+
+ if len(date_pieces) > 0:
+ year = date_pieces[0]
+ if len(date_pieces) > 1:
+ month = date_pieces[1]
+ if len(date_pieces) > 2:
+ day = date_pieces[2]
+ if len(date_pieces) > 3:
+ hour = date_pieces[3]
+
+ return format_pretty_date(datetime(year, month, day, hour))
diff --git a/wikimetrics/templates/authenticate.html
b/wikimetrics/templates/authenticate.html
index bff57d4..e88a5c8 100644
--- a/wikimetrics/templates/authenticate.html
+++ b/wikimetrics/templates/authenticate.html
@@ -18,5 +18,5 @@
</li>
</ul>
-<p>What does it mean to log in with a service above? The method we use is
called <a href="http://en.wikipedia.org/wiki/OAuth">OAuth</a>. In plain words,
it allows a provider to vouch that you are who you say you are. For example,
if you click on "log in with Google" above, Google will ask you if you will
allow the Wikimetrics application access to your private Google data. For our
purpose, we will only access your email address and name to verify your
identity. We will not share any of your data with Google or anyone else.
Since Wikimetrics is an open source project, you can see exactly how we
interact with Google and the other providers. As of this writing, that logic
<a
href="https://github.com/wikimedia/analytics-wikimetrics/blob/master/wikimetrics/controllers/authentication.py">lives
here</a>.</p>
+<p>What does it mean to log in with a service above? The method we use is
called <a href="https://en.wikipedia.org/wiki/OAuth">OAuth</a>. In plain
words, it allows a provider to vouch that you are who you say you are. For
example, if you click on "log in with Google" above, Google will ask you if you
will allow the Wikimetrics application access to your private Google data. For
our purpose, we will only access your email address and name to verify your
identity. We will not share any of your data with Google or anyone else.
Since Wikimetrics is an open source project, you can see exactly how we
interact with Google and the other providers. As of this writing, that logic
<a
href="https://github.com/wikimedia/analytics-wikimetrics/blob/master/wikimetrics/controllers/authentication.py">lives
here</a>.</p>
{% endblock %}
diff --git a/wikimetrics/templates/forms/metric_configuration.html
b/wikimetrics/templates/forms/metric_configuration.html
index 4d272c9..623cf48 100644
--- a/wikimetrics/templates/forms/metric_configuration.html
+++ b/wikimetrics/templates/forms/metric_configuration.html
@@ -23,9 +23,9 @@
<div class="controls">
<p>refers to the division of pages by namespace in wiki
projects, example:</p>
<ul>
- <li><a target="_blank"
href="http://de.wikipedia.org/wiki/Wikipedia:Namespace">German Wikipedia
Namespaces</a></li>
- <li><a target="_blank"
href="http://commons.wikimedia.org/wiki/Help:Namespaces">Wikimedia Commons
Namespaces</a></li>
- <li><a target="_blank"
href="http://en.wikipedia.org/wiki/Wikipedia:Namespace">English Wikipedia
Namespaces</a></li>
+ <li><a target="_blank"
href="https://de.wikipedia.org/wiki/Wikipedia:Namespace">German Wikipedia
Namespaces</a></li>
+ <li><a target="_blank"
href="https://commons.wikimedia.org/wiki/Help:Namespaces">Wikimedia Commons
Namespaces</a></li>
+ <li><a target="_blank"
href="https://en.wikipedia.org/wiki/Wikipedia:Namespace">English Wikipedia
Namespaces</a></li>
</ul>
</div>
{% endif %}
diff --git a/wikimetrics/utils.py b/wikimetrics/utils.py
index 275a8c0..738226d 100644
--- a/wikimetrics/utils.py
+++ b/wikimetrics/utils.py
@@ -5,7 +5,7 @@
# Format string for datetime.strptime for MediaWiki timestamps.
-# See <http://www.mediawiki.org/wiki/Manual:Timestamp>.
+# See <https://www.mediawiki.org/wiki/Manual:Timestamp>.
MEDIAWIKI_TIMESTAMP = '%Y%m%d%H%M%S'
# This format is used in the UI and output
PRETTY_TIMESTAMP = '%Y-%m-%d %H:%M:%S'
--
To view, visit https://gerrit.wikimedia.org/r/84706
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: merged
Gerrit-Change-Id: I6a9b7abe43d72905b237311af4dca0206fa95d8a
Gerrit-PatchSet: 2
Gerrit-Project: analytics/wikimetrics
Gerrit-Branch: master
Gerrit-Owner: Milimetric <[email protected]>
Gerrit-Reviewer: Milimetric <[email protected]>
_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits