Milimetric has submitted this change and it was merged.
Change subject: added start/end date to namespaces also, more tests
......................................................................
added start/end date to namespaces
also, more tests
Change-Id: Ic54e20c00e986ed492528726fc49468e1a941e63
---
M scripts/test
M tests/test_controllers/test_reports.py
M tests/test_metrics/test_namespace_edits.py
M tests/test_models/test_mappings.py
A tests/test_models/test_metric_report.py
A tests/test_models/test_multi_project_metric_report.py
M wikimetrics/controllers/reports.py
M wikimetrics/database.py
M wikimetrics/metrics/bytes_added.py
M wikimetrics/metrics/namespace_edits.py
M wikimetrics/metrics/revert_rate.py
M wikimetrics/models/persistent_report.py
M wikimetrics/models/user.py
13 files changed, 353 insertions(+), 37 deletions(-)
Approvals:
Milimetric: Verified; Looks good to me, approved
diff --git a/scripts/test b/scripts/test
index 6603e99..95c9663 100755
--- a/scripts/test
+++ b/scripts/test
@@ -1,3 +1,3 @@
# for example:
-# scripts/test "models
tests/test_controllers/test_cohorts.py:TestCohortsController"
-rm .coverage *.db ; find -name *.pyc | xargs rm ; nosetests --cover-erase -e $1
+# scripts/test "tests/test_controllers/test_cohorts.py:TestCohortsController"
+rm .coverage *.db ; find -name *.pyc | xargs rm ; nosetests --cover-erase $1
diff --git a/tests/test_controllers/test_reports.py
b/tests/test_controllers/test_reports.py
index c8de2d4..df6e977 100644
--- a/tests/test_controllers/test_reports.py
+++ b/tests/test_controllers/test_reports.py
@@ -14,7 +14,7 @@
return filter(lambda j : j['status'] == status, collection)
-class TestReportsController(WebTest):
+class ReportsControllerTest(WebTest):
def test_index(self):
response = self.app.get('/reports/', follow_redirects=True)
@@ -120,6 +120,13 @@
self.session.expunge(report_new)
report_new.update_status()
assert_equal(report_new.status, celery.states.SUCCESS)
+
+ # Change this report to look like the old style, to test that still
works
+ # TODO: delete this test on October 1st
+ report.result_key = report.queue_result_key
+ self.session.commit()
+ result = get_celery_task_result(task, report)
+ assert_true(result is not None)
def test_report_result_csv_error(self):
response = self.app.get('/reports/result/blah.csv')
@@ -128,3 +135,117 @@
def test_report_result_json_error(self):
response = self.app.get('/reports/result/blah.json')
assert_true(response.data.find('isError') >= 0)
+
+ def test_report_result_average_only_csv(self):
+ # Make the request
+ desired_responses = [{
+ 'name': 'Edits - test',
+ 'cohort': {
+ 'id': self.test_cohort_id,
+ },
+ 'metric': {
+ 'name': 'NamespaceEdits',
+ 'namespaces': [0, 1, 2],
+ 'start_date': '2013-06-01',
+ 'end_date': '2013-09-01',
+ 'individualResults': False,
+ 'aggregateResults': True,
+ 'aggregateSum': False,
+ 'aggregateAverage': True,
+ 'aggregateStandardDeviation': False,
+ },
+ }]
+ json_to_post = json.dumps(desired_responses)
+
+ response = self.app.post('/reports/create/', data=dict(
+ responses=json_to_post
+ ))
+
+ # Wait a second for the task to get processed
+ time.sleep(1)
+
+ # Check that the task has been created
+ response = self.app.get('/reports/list/')
+ parsed = json.loads(response.data)
+ result_key = parsed['reports'][-1]['result_key']
+ task, report = get_celery_task(result_key)
+
+ # Check the csv result
+ response = self.app.get('/reports/result/{0}.csv'.format(result_key))
+ assert_true(response.data.find('Average') >= 0)
+
+ def test_report_result_sum_only_csv(self):
+ # Make the request
+ desired_responses = [{
+ 'name': 'Edits - test',
+ 'cohort': {
+ 'id': self.test_cohort_id,
+ },
+ 'metric': {
+ 'name': 'NamespaceEdits',
+ 'namespaces': [0, 1, 2],
+ 'start_date': '2013-06-01',
+ 'end_date': '2013-09-01',
+ 'individualResults': False,
+ 'aggregateResults': True,
+ 'aggregateSum': True,
+ 'aggregateAverage': False,
+ 'aggregateStandardDeviation': False,
+ },
+ }]
+ json_to_post = json.dumps(desired_responses)
+
+ response = self.app.post('/reports/create/', data=dict(
+ responses=json_to_post
+ ))
+
+ # Wait a second for the task to get processed
+ time.sleep(1)
+
+ # Check that the task has been created
+ response = self.app.get('/reports/list/')
+ parsed = json.loads(response.data)
+ result_key = parsed['reports'][-1]['result_key']
+ task, report = get_celery_task(result_key)
+
+ # Check the csv result
+ response = self.app.get('/reports/result/{0}.csv'.format(result_key))
+ assert_true(response.data.find('Sum') >= 0)
+
+ def test_report_result_std_dev_only_csv(self):
+ # Make the request
+ desired_responses = [{
+ 'name': 'Edits - test',
+ 'cohort': {
+ 'id': self.test_cohort_id,
+ },
+ 'metric': {
+ 'name': 'NamespaceEdits',
+ 'namespaces': [0, 1, 2],
+ 'start_date': '2013-06-01',
+ 'end_date': '2013-09-01',
+ 'individualResults': False,
+ 'aggregateResults': True,
+ 'aggregateSum': False,
+ 'aggregateAverage': False,
+ 'aggregateStandardDeviation': True,
+ },
+ }]
+ json_to_post = json.dumps(desired_responses)
+
+ response = self.app.post('/reports/create/', data=dict(
+ responses=json_to_post
+ ))
+
+ # Wait a second for the task to get processed
+ time.sleep(1)
+
+ # Check that the task has been created
+ response = self.app.get('/reports/list/')
+ parsed = json.loads(response.data)
+ result_key = parsed['reports'][-1]['result_key']
+ task, report = get_celery_task(result_key)
+
+ # Check the csv result
+ response = self.app.get('/reports/result/{0}.csv'.format(result_key))
+ assert_true(response.data.find('Standard Deviation') >= 0)
diff --git a/tests/test_metrics/test_namespace_edits.py
b/tests/test_metrics/test_namespace_edits.py
index 4d62e03..c8d6c8f 100644
--- a/tests/test_metrics/test_namespace_edits.py
+++ b/tests/test_metrics/test_namespace_edits.py
@@ -1,30 +1,53 @@
from nose.tools import assert_true, assert_equal
-from tests.fixtures import DatabaseTest, QueueDatabaseTest
+from tests.fixtures import DatabaseWithCohortTest, QueueDatabaseTest
from wikimetrics.metrics import NamespaceEdits
from wikimetrics.models import Cohort, MetricReport
-class NamespaceEditsDatabaseTest(DatabaseTest):
+class NamespaceEditsDatabaseTest(DatabaseWithCohortTest):
def test_finds_edits(self):
- cohort = self.session.query(Cohort).filter_by(name='test').one()
-
- metric = NamespaceEdits()
- results = metric(list(cohort), self.mwSession)
+ metric = NamespaceEdits(
+ namespaces=[0],
+ start_date='2013-06-01',
+ end_date='2013-08-01',
+ )
+ results = metric(list(self.cohort), self.mwSession)
assert_true(results is not None)
assert_equal(results[self.test_mediawiki_user_id]['edits'], 2)
assert_equal(results[self.test_mediawiki_user_id_evan]['edits'], 3)
def test_reports_zero_edits(self):
- cohort = self.session.query(Cohort).filter_by(name='test').one()
-
- metric = NamespaceEdits()
- results = metric(list(cohort), self.mwSession)
+ metric = NamespaceEdits(
+ namespaces=[0],
+ start_date='2013-06-01',
+ end_date='2013-08-01',
+ )
+ results = metric(list(self.cohort), self.mwSession)
assert_true(results is not None)
assert_equal(results[self.test_mediawiki_user_id_andrew]['edits'], 0)
+
+ def test_uses_date_range(self):
+
+ metric = NamespaceEdits(
+ namespaces=[0],
+ )
+ assert_true(not metric.validate())
+
+ metric = NamespaceEdits(
+ namespaces=[0],
+ start_date='2013-07-01',
+ end_date='2013-07-02',
+ )
+ metric.fake_csrf()
+ assert_true(metric.validate())
+
+ results = metric(list(self.cohort), self.mwSession)
+ print results
+ assert_equal(results[self.dan_id]['edits'], 1)
class NamespaceEditsFullTest(QueueDatabaseTest):
@@ -32,7 +55,11 @@
def test_namespace_edits(self):
cohort = self.session.query(Cohort).filter_by(name='test').one()
- metric = NamespaceEdits()
+ metric = NamespaceEdits(
+ namespaces=[0],
+ start_date='2013-06-01',
+ end_date='2013-08-01',
+ )
report = MetricReport(metric, list(cohort), 'enwiki')
results = report.task.delay(report).get()
print 'results: %s' % results
@@ -43,8 +70,11 @@
def test_namespace_edits_namespace_filter(self):
cohort = self.session.query(Cohort).filter_by(name='test').one()
- namespaces = [3]
- metric = NamespaceEdits(namespaces=namespaces)
+ metric = NamespaceEdits(
+ namespaces=[3],
+ start_date='2013-06-01',
+ end_date='2013-08-01',
+ )
report = MetricReport(metric, list(cohort), 'enwiki')
results = report.task.delay(report).get()
@@ -54,8 +84,11 @@
def test_namespace_edits_namespace_filter_no_namespace(self):
cohort = self.session.query(Cohort).filter_by(name='test').one()
- namespaces = []
- metric = NamespaceEdits(namespaces=namespaces)
+ metric = NamespaceEdits(
+ namespaces=[],
+ start_date='2013-06-01',
+ end_date='2013-08-01',
+ )
report = MetricReport(metric, list(cohort), 'enwiki')
results = report.task.delay(report).get()
@@ -65,8 +98,11 @@
def test_namespace_edits_with_multiple_namespaces(self):
cohort = self.session.query(Cohort).filter_by(name='test').one()
- namespaces = [0, 209]
- metric = NamespaceEdits(namespaces=namespaces)
+ metric = NamespaceEdits(
+ namespaces=[0, 209],
+ start_date='2013-06-01',
+ end_date='2013-08-06',
+ )
report = MetricReport(metric, list(cohort), 'enwiki')
results = report.task.delay(report).get()
@@ -76,8 +112,11 @@
def
test_namespace_edits_with_multiple_namespaces_when_passing_string_list(self):
cohort = self.session.query(Cohort).filter_by(name='test').one()
- namespaces = '0, 209'
- metric = NamespaceEdits(namespaces=namespaces)
+ metric = NamespaceEdits(
+ namespaces='0, 209',
+ start_date='2013-06-01',
+ end_date='2013-08-06',
+ )
report = MetricReport(metric, list(cohort), 'enwiki')
results = report.task.delay(report).get()
diff --git a/tests/test_models/test_mappings.py
b/tests/test_models/test_mappings.py
index 707a91f..6090cb4 100644
--- a/tests/test_models/test_mappings.py
+++ b/tests/test_models/test_mappings.py
@@ -91,3 +91,30 @@
.all()
print cohorts
assert_equal(len(cohorts), 2, "User Evan should own 2 cohorts")
+
+ #***********
+ # String representation tests
+ #***********
+ def test_report_repr(self):
+ r = self.session.query(PersistentReport).get(self.test_report_id)
+ assert_true(str(r).find('PersistentReport') >= 0)
+
+ def test_user_repr(self):
+ u = self.session.query(User).get(self.test_user_id)
+ assert_true(str(u).find('User') >= 0)
+
+ def test_cohort_repr(self):
+ c = self.session.query(Cohort).get(self.test_cohort_id)
+ assert_true(str(c).find('Cohort') >= 0)
+
+ def test_cohort_user_repr(self):
+ cu = self.session.query(CohortUser).get(self.test_cohort_user_id)
+ assert_true(str(cu).find('CohortUser') >= 0)
+
+ def test_wikiuser_repr(self):
+ wu = self.session.query(WikiUser).get(self.test_wiki_user_id)
+ assert_true(str(wu).find('WikiUser') >= 0)
+
+ def test_cohort_wikiuser_repr(self):
+ cwu =
self.session.query(CohortWikiUser).get(self.test_cohort_wiki_user_id)
+ assert_true(str(cwu).find('CohortWikiUser') >= 0)
diff --git a/tests/test_models/test_metric_report.py
b/tests/test_models/test_metric_report.py
new file mode 100644
index 0000000..c183375
--- /dev/null
+++ b/tests/test_models/test_metric_report.py
@@ -0,0 +1,48 @@
+from nose.tools import assert_equals, assert_true
+from wikimetrics.metrics import metric_classes
+from wikimetrics.models import (
+ MetricReport
+)
+from ..fixtures import DatabaseTest
+
+
+class MetricReportTest(DatabaseTest):
+
+ def test_basic_response(self):
+ metric = metric_classes['NamespaceEdits'](
+ name = 'NamespaceEdits',
+ namespaces = [0, 1, 2],
+ start_date = '2013-06-01',
+ end_date = '2013-09-01',
+ )
+ mr = MetricReport(
+ metric,
+ [
+ self.test_mediawiki_user_id,
+ self.test_mediawiki_user_id_evan,
+ self.test_mediawiki_user_id_andrew,
+ ],
+ 'enwiki'
+ )
+
+ result = mr.run()
+ assert_equals(result[self.test_mediawiki_user_id]['edits'], 2)
+
+ def test_repr(self):
+ metric = metric_classes['NamespaceEdits'](
+ name = 'NamespaceEdits',
+ namespaces = [0, 1, 2],
+ start_date = '2013-06-01',
+ end_date = '2013-09-01',
+ )
+ mr = MetricReport(
+ metric,
+ [
+ self.test_mediawiki_user_id,
+ self.test_mediawiki_user_id_evan,
+ self.test_mediawiki_user_id_andrew,
+ ],
+ 'enwiki'
+ )
+
+ assert_true(str(mr).find('MetricReport') >= 0)
diff --git a/tests/test_models/test_multi_project_metric_report.py
b/tests/test_models/test_multi_project_metric_report.py
new file mode 100644
index 0000000..25bf91c
--- /dev/null
+++ b/tests/test_models/test_multi_project_metric_report.py
@@ -0,0 +1,65 @@
+from nose.tools import assert_equals, assert_true
+from wikimetrics.metrics import metric_classes
+from wikimetrics.models import (
+ MultiProjectMetricReport, PersistentReport, Cohort,
+)
+from ..fixtures import QueueDatabaseTest, DatabaseTest
+
+
+class MultiProjectMetricReportTest(QueueDatabaseTest):
+
+ def test_basic_response(self):
+ cohort = self.session.query(Cohort).get(self.test_cohort_id)
+ metric = metric_classes['NamespaceEdits'](
+ name = 'NamespaceEdits',
+ namespaces = [0, 1, 2],
+ start_date = '2013-06-01',
+ end_date = '2013-09-01',
+ )
+ mr = MultiProjectMetricReport(cohort, metric, 'enwiki')
+
+ result = mr.task.delay(mr).get()
+
+ result_key = self.session.query(PersistentReport)\
+ .filter(PersistentReport.id == mr.persistent_id)\
+ .one()\
+ .result_key
+
+
assert_equals(result[result_key][self.test_mediawiki_user_id]['edits'], 2)
+
+
+class MultiProjectMetricReportWithoutQueueTest(DatabaseTest):
+
+ def test_finish(self):
+ cohort = self.session.query(Cohort).get(self.test_cohort_id)
+ metric = metric_classes['NamespaceEdits'](
+ name = 'NamespaceEdits',
+ namespaces = [0, 1, 2],
+ start_date = '2013-06-01',
+ end_date = '2013-09-01',
+ )
+ mr = MultiProjectMetricReport(cohort, metric, 'enwiki')
+
+ finished = mr.finish([
+ {
+ 1: {'edits': 2},
+ 2: {'edits': 3},
+ 3: {'edits': 0},
+ None: {'edits': 0}
+ }
+ ])
+
+ assert_equals(finished[mr.result_key][1]['edits'], 2)
+ assert_equals(finished[mr.result_key][2]['edits'], 3)
+
+ def test_repr(self):
+ cohort = self.session.query(Cohort).get(self.test_cohort_id)
+ metric = metric_classes['NamespaceEdits'](
+ name = 'NamespaceEdits',
+ namespaces = [0, 1, 2],
+ start_date = '2013-06-01',
+ end_date = '2013-09-01',
+ )
+ mr = MultiProjectMetricReport(cohort, metric, 'enwiki')
+
+ assert_true(str(mr).find('MultiProjectMetricReport') >= 0)
diff --git a/wikimetrics/controllers/reports.py
b/wikimetrics/controllers/reports.py
index 4c25261..901a282 100644
--- a/wikimetrics/controllers/reports.py
+++ b/wikimetrics/controllers/reports.py
@@ -79,6 +79,7 @@
def get_celery_task_result(celery_task, db_report):
# this indicates an old style result, the celery task result can be
returned directly
+ # TODO: delete this logic on October 1st, as all old results will have
expired by then
if db_report.result_key == db_report.queue_result_key:
return celery_task.get()
# otherwise, it's a new style result, the celery task is a dictionary
diff --git a/wikimetrics/database.py b/wikimetrics/database.py
index bfeb771..6e3b013 100644
--- a/wikimetrics/database.py
+++ b/wikimetrics/database.py
@@ -154,8 +154,6 @@
# TODO: these numbers are hardcoded, is that ok?
num_hosts = 7
host_projects = map(get_host_projects, range(1, num_hosts + 1))
- #pool = Pool(num_hosts)
- #host_projects = pool.map(get_host_projects, range(1, num_hosts +
1))
project_host_map = {}
host_fmt = 's{0}'
for host_id, projects in host_projects:
diff --git a/wikimetrics/metrics/bytes_added.py
b/wikimetrics/metrics/bytes_added.py
index 5699a44..da6fb55 100644
--- a/wikimetrics/metrics/bytes_added.py
+++ b/wikimetrics/metrics/bytes_added.py
@@ -89,13 +89,14 @@
* positive_only_sum : bytes added
* negative_only_sum : bytes removed
"""
- PreviousRevision = session.query(Revision.rev_len,
Revision.rev_id).subquery()
+ # get the dates to act properly in any environment
start_date = self.start_date.data
end_date = self.end_date.data
if session.bind.name == 'mysql':
start_date = mediawiki_date(self.start_date)
end_date = mediawiki_date(self.end_date)
+ PreviousRevision = session.query(Revision.rev_len,
Revision.rev_id).subquery()
BC = session.query(
Revision.rev_user,
label(
diff --git a/wikimetrics/metrics/namespace_edits.py
b/wikimetrics/metrics/namespace_edits.py
index 0497c1a..a40d8c0 100644
--- a/wikimetrics/metrics/namespace_edits.py
+++ b/wikimetrics/metrics/namespace_edits.py
@@ -1,6 +1,8 @@
+from ..utils import thirty_days_ago, today, mediawiki_date
from sqlalchemy import func
from metric import Metric
from form_fields import CommaSeparatedIntegerListField
+from wtforms import DateField
from wtforms.validators import Required
from wikimetrics.models import Page, Revision
@@ -36,6 +38,8 @@
'namespace of a mediawiki project'
)
+ start_date = DateField(default=thirty_days_ago)
+ end_date = DateField(default=today)
namespaces = CommaSeparatedIntegerListField(
None,
[Required()],
@@ -52,6 +56,13 @@
Returns:
dictionary from user ids to the number of edit found.
"""
+ # get the dates to act properly in any environment
+ start_date = self.start_date.data
+ end_date = self.end_date.data
+ if session.bind.name == 'mysql':
+ start_date = mediawiki_date(self.start_date)
+ end_date = mediawiki_date(self.end_date)
+
# directly construct dict from query results
revisions_by_user = dict(
session
@@ -59,6 +70,8 @@
.join(Page)
.filter(Page.page_namespace.in_(self.namespaces.data))
.filter(Revision.rev_user.in_(user_ids))
+ .filter(Revision.rev_timestamp >= start_date)\
+ .filter(Revision.rev_timestamp <= end_date)\
.group_by(Revision.rev_user)
.all()
)
diff --git a/wikimetrics/metrics/revert_rate.py
b/wikimetrics/metrics/revert_rate.py
index c2c532f..9ec6bd7 100644
--- a/wikimetrics/metrics/revert_rate.py
+++ b/wikimetrics/metrics/revert_rate.py
@@ -48,13 +48,13 @@
description='0, 2, 4, etc.',
)
- def __call__(self, user_ids, session):
- """
- Parameters:
- user_ids : list of mediawiki user ids to find edit reverts for
- session : sqlalchemy session open on a mediawiki database
+ #def __call__(self, user_ids, session):
+ #"""
+ #Parameters:
+ #user_ids : list of mediawiki user ids to find edit reverts for
+ #session : sqlalchemy session open on a mediawiki database
- Returns:
- dictionary from user ids to the number of edit reverts found.
- """
- return {user: None for user in user_ids}
+ #Returns:
+ #dictionary from user ids to the number of edit reverts found.
+ #"""
+ #return {user: None for user in user_ids}
diff --git a/wikimetrics/models/persistent_report.py
b/wikimetrics/models/persistent_report.py
index 79f5b57..2414490 100644
--- a/wikimetrics/models/persistent_report.py
+++ b/wikimetrics/models/persistent_report.py
@@ -37,3 +37,6 @@
existing_session = db.get_session()
existing_session.add(self)
existing_session.commit()
+
+ def __repr__(self):
+ return '<PersistentReport("{0}")>'.format(self.id)
diff --git a/wikimetrics/models/user.py b/wikimetrics/models/user.py
index 604c561..ab0d177 100644
--- a/wikimetrics/models/user.py
+++ b/wikimetrics/models/user.py
@@ -32,9 +32,6 @@
# Flask-Login properties
authenticated = Column(Boolean, default=False)
active = Column(Boolean, default=False)
-
- def __repr__(self):
- return '<User("{0}")>'.format(self.id)
@staticmethod
def get(session, user_id):
@@ -86,3 +83,6 @@
to return a unicode id.
"""
return unicode(self.id)
+
+ def __repr__(self):
+ return '<User("{0}")>'.format(self.id)
--
To view, visit https://gerrit.wikimedia.org/r/81031
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: merged
Gerrit-Change-Id: Ic54e20c00e986ed492528726fc49468e1a941e63
Gerrit-PatchSet: 1
Gerrit-Project: analytics/wikimetrics
Gerrit-Branch: master
Gerrit-Owner: Milimetric <[email protected]>
Gerrit-Reviewer: Milimetric <[email protected]>
_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits