jenkins-bot has submitted this change and it was merged.
Change subject: Update PagesCreated metric
......................................................................
Update PagesCreated metric
* Make the namespace parameter include all namespaces if left blank
* Make a new parameter "Include Deleted" that counts deleted pages
(looking at the archive table)
* Update corresponding labeling, UI, and tests
Bug: 71009
Change-Id: I5b3940e0946e90a35f17dade7ea3692f194bc523
---
M tests/fixtures.py
M tests/test_metrics/test_pages_created.py
M wikimetrics/metrics/pages_created.py
M wikimetrics/models/mediawiki/archive.py
4 files changed, 91 insertions(+), 15 deletions(-)
Approvals:
Nuria: Looks good to me, approved
jenkins-bot: Verified
diff --git a/tests/fixtures.py b/tests/fixtures.py
index 2bba639..3ac24b4 100644
--- a/tests/fixtures.py
+++ b/tests/fixtures.py
@@ -572,7 +572,8 @@
query = self.mwSession.query(
Revision.rev_timestamp,
Revision.rev_user,
- Page.page_namespace
+ Revision.rev_parent_id,
+ Page.page_namespace,
).join(Page)
revisions = query.all()
@@ -582,6 +583,7 @@
'ar_rev_id': None,
'ar_timestamp': r.rev_timestamp,
'ar_user': r.rev_user,
+ 'ar_parent_id': r.rev_parent_id,
'ar_namespace': r.page_namespace,
}
for r in revisions
diff --git a/tests/test_metrics/test_pages_created.py
b/tests/test_metrics/test_pages_created.py
index 3d492b4..e1bf8e4 100644
--- a/tests/test_metrics/test_pages_created.py
+++ b/tests/test_metrics/test_pages_created.py
@@ -23,6 +23,26 @@
assert_equal(results[self.editors[0].user_id]["pages_created"], 3)
assert_equal(results[self.editors[1].user_id]["pages_created"], 1)
+ def test_case_uses_namespace_filter(self):
+ metric = PagesCreated(
+ namespaces=[0],
+ start_date='2013-06-19 00:00:00',
+ end_date='2013-08-21 00:00:00'
+ )
+ results = metric(self.editor_ids, self.mwSession)
+ assert_equal(results[self.editors[0].user_id]["pages_created"], 0)
+ assert_equal(results[self.editors[1].user_id]["pages_created"], 0)
+
+ def test_case_no_namespace_includes_all(self):
+ metric = PagesCreated(
+ namespaces=[],
+ start_date='2013-06-19 00:00:00',
+ end_date='2013-08-21 00:00:00'
+ )
+ results = metric(self.editor_ids, self.mwSession)
+ assert_equal(results[self.editors[0].user_id]["pages_created"], 3)
+ assert_equal(results[self.editors[1].user_id]["pages_created"], 1)
+
# same thing as before, but this time we leave one page created
# out of the date range to see if date ranges work properly
def test_case_uses_date_range(self):
@@ -61,3 +81,27 @@
assert_equal(results[self.editors[1].user_id]["pages_created"], 1)
# NOTE: this is a bit precarious as it assumes the order of test data
inserts
assert_equal(results[self.editors[0].user_id + 2]["pages_created"], 3)
+
+ def test_case_basic_with_archive(self):
+ self.archive_revisions()
+ self.test_case_basic()
+
+ def test_case_uses_namespace_filter_with_archive(self):
+ self.archive_revisions()
+ self.test_case_uses_namespace_filter()
+
+ def test_case_no_namespace_includes_all_with_archive(self):
+ self.archive_revisions()
+ self.test_case_no_namespace_includes_all()
+
+ def test_case_uses_date_range_with_archive(self):
+ self.archive_revisions()
+ self.test_case_uses_date_range()
+
+ def test_filters_out_other_editors_with_archive(self):
+ self.archive_revisions()
+ self.test_filters_out_other_editors()
+
+ def test_runs_for_an_entire_wiki_with_archive(self):
+ self.archive_revisions()
+ self.test_runs_for_an_entire_wiki()
diff --git a/wikimetrics/metrics/pages_created.py
b/wikimetrics/metrics/pages_created.py
index ebd5df1..c417157 100644
--- a/wikimetrics/metrics/pages_created.py
+++ b/wikimetrics/metrics/pages_created.py
@@ -1,9 +1,10 @@
from sqlalchemy import func
+from sqlalchemy.sql.expression import label
from wtforms.validators import Required
from wikimetrics.utils import thirty_days_ago, today
-from wikimetrics.forms.fields import CommaSeparatedIntegerListField,
BetterDateTimeField
-from wikimetrics.models import Page, Revision
+from wikimetrics.forms.fields import CommaSeparatedIntegerListField,
BetterBooleanField
+from wikimetrics.models import Page, Revision, Archive
from timeseries_metric import TimeseriesMetric
@@ -36,11 +37,14 @@
'pages_created': 0,
}
+ include_deleted = BetterBooleanField(
+ default=True,
+ description='Count pages that have been deleted',
+ )
namespaces = CommaSeparatedIntegerListField(
None,
- [Required()],
default='0',
- description='0, 2, 4, etc.',
+ description='0, 2, 4, etc. (leave blank for *all*)',
)
def __call__(self, user_ids, session):
@@ -56,21 +60,46 @@
start_date = self.start_date.data
end_date = self.end_date.data
- query = session\
- .query(Revision.rev_user, func.count(Page.page_id))\
- .join(Page)\
- .filter(Page.page_namespace.in_(self.namespaces.data))\
+ revisions = session\
+ .query(
+ label('user_id', Revision.rev_user),
+ label('timestamp', Revision.rev_timestamp)
+ )\
.filter(Revision.rev_parent_id == 0)\
.filter(Revision.rev_timestamp > start_date)\
- .filter(Revision.rev_timestamp <= end_date)\
- .group_by(Revision.rev_user)
-
- pages_by_user = self.filter(query, user_ids)
+ .filter(Revision.rev_timestamp <= end_date)
- query = self.apply_timeseries(pages_by_user)
+ archives = session\
+ .query(
+ label('user_id', Archive.ar_user),
+ label('timestamp', Archive.ar_timestamp)
+ )\
+ .filter(Archive.ar_parent_id == 0)\
+ .filter(Archive.ar_timestamp > start_date)\
+ .filter(Archive.ar_timestamp <= end_date)
+
+ if self.namespaces.data and len(self.namespaces.data) > 0:
+ revisions = revisions.join(Page)\
+ .filter(Page.page_namespace.in_(self.namespaces.data))
+ archives = archives\
+ .filter(Archive.ar_namespace.in_(self.namespaces.data))
+
+ revisions = self.filter(revisions, user_ids, column=Revision.rev_user)
+ archives = self.filter(archives, user_ids, column=Archive.ar_user)
+
+ both = revisions
+ if self.include_deleted.data:
+ both = both.union_all(archives)
+ both = both.subquery()
+
+ query = session.query(both.c.user_id, func.count())\
+ .group_by(both.c.user_id)
+
+ query = self.apply_timeseries(query, column=both.c.timestamp)
+
return self.results_by_user(
user_ids,
query,
- [('pages_created', 1, 0)],
+ [(self.id, 1, 0)],
date_index=2,
)
diff --git a/wikimetrics/models/mediawiki/archive.py
b/wikimetrics/models/mediawiki/archive.py
index 3fe616e..a8c1fd7 100644
--- a/wikimetrics/models/mediawiki/archive.py
+++ b/wikimetrics/models/mediawiki/archive.py
@@ -43,3 +43,4 @@
ar_flags = Column(TINYBLOB, nullable=False, default='')
ar_rev_id = Column(Integer, nullable=True)
ar_deleted = Column(Boolean, nullable=False, default=False)
+ ar_parent_id = Column(Integer, nullable=True)
--
To view, visit https://gerrit.wikimedia.org/r/161472
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: merged
Gerrit-Change-Id: I5b3940e0946e90a35f17dade7ea3692f194bc523
Gerrit-PatchSet: 4
Gerrit-Project: analytics/wikimetrics
Gerrit-Branch: master
Gerrit-Owner: Milimetric <[email protected]>
Gerrit-Reviewer: Nuria <[email protected]>
Gerrit-Reviewer: jenkins-bot <>
_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits