Milimetric has uploaded a new change for review.

  https://gerrit.wikimedia.org/r/161472

Change subject: Update PagesCreated metric
......................................................................

Update PagesCreated metric

* Make the namespace parameter include all namespaces if left blank
* Make a new parameter "Include Deleted" that counts deleted pages
  (looking at the archive table)
* Update corresponding labeling, UI, and tests

Change-Id: I5b3940e0946e90a35f17dade7ea3692f194bc523
---
M tests/fixtures.py
M tests/test_metrics/test_pages_created.py
M wikimetrics/metrics/pages_created.py
M wikimetrics/models/mediawiki/archive.py
4 files changed, 91 insertions(+), 15 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/analytics/wikimetrics 
refs/changes/72/161472/1

diff --git a/tests/fixtures.py b/tests/fixtures.py
index 2bba639..3ac24b4 100644
--- a/tests/fixtures.py
+++ b/tests/fixtures.py
@@ -572,7 +572,8 @@
         query = self.mwSession.query(
             Revision.rev_timestamp,
             Revision.rev_user,
-            Page.page_namespace
+            Revision.rev_parent_id,
+            Page.page_namespace,
         ).join(Page)
         revisions = query.all()
 
@@ -582,6 +583,7 @@
                     'ar_rev_id': None,
                     'ar_timestamp': r.rev_timestamp,
                     'ar_user': r.rev_user,
+                    'ar_parent_id': r.rev_parent_id,
                     'ar_namespace': r.page_namespace,
                 }
                 for r in revisions
diff --git a/tests/test_metrics/test_pages_created.py 
b/tests/test_metrics/test_pages_created.py
index 3d492b4..e1bf8e4 100644
--- a/tests/test_metrics/test_pages_created.py
+++ b/tests/test_metrics/test_pages_created.py
@@ -23,6 +23,26 @@
         assert_equal(results[self.editors[0].user_id]["pages_created"], 3)
         assert_equal(results[self.editors[1].user_id]["pages_created"], 1)
 
+    def test_case_uses_namespace_filter(self):
+        metric = PagesCreated(
+            namespaces=[0],
+            start_date='2013-06-19 00:00:00',
+            end_date='2013-08-21 00:00:00'
+        )
+        results = metric(self.editor_ids, self.mwSession)
+        assert_equal(results[self.editors[0].user_id]["pages_created"], 0)
+        assert_equal(results[self.editors[1].user_id]["pages_created"], 0)
+
+    def test_case_no_namespace_includes_all(self):
+        metric = PagesCreated(
+            namespaces=[],
+            start_date='2013-06-19 00:00:00',
+            end_date='2013-08-21 00:00:00'
+        )
+        results = metric(self.editor_ids, self.mwSession)
+        assert_equal(results[self.editors[0].user_id]["pages_created"], 3)
+        assert_equal(results[self.editors[1].user_id]["pages_created"], 1)
+
     # same thing as before, but this time we leave one page created
     # out of the date range to see if date ranges work properly
     def test_case_uses_date_range(self):
@@ -61,3 +81,27 @@
         assert_equal(results[self.editors[1].user_id]["pages_created"], 1)
         # NOTE: this is a bit precarious as it assumes the order of test data 
inserts
         assert_equal(results[self.editors[0].user_id + 2]["pages_created"], 3)
+
+    def test_case_basic_with_archive(self):
+        self.archive_revisions()
+        self.test_case_basic()
+
+    def test_case_uses_namespace_filter_with_archive(self):
+        self.archive_revisions()
+        self.test_case_uses_namespace_filter()
+
+    def test_case_no_namespace_includes_all_with_archive(self):
+        self.archive_revisions()
+        self.test_case_no_namespace_includes_all()
+
+    def test_case_uses_date_range_with_archive(self):
+        self.archive_revisions()
+        self.test_case_uses_date_range()
+
+    def test_filters_out_other_editors_with_archive(self):
+        self.archive_revisions()
+        self.test_filters_out_other_editors()
+
+    def test_runs_for_an_entire_wiki_with_archive(self):
+        self.archive_revisions()
+        self.test_runs_for_an_entire_wiki()
diff --git a/wikimetrics/metrics/pages_created.py 
b/wikimetrics/metrics/pages_created.py
index ebd5df1..c417157 100644
--- a/wikimetrics/metrics/pages_created.py
+++ b/wikimetrics/metrics/pages_created.py
@@ -1,9 +1,10 @@
 from sqlalchemy import func
+from sqlalchemy.sql.expression import label
 from wtforms.validators import Required
 
 from wikimetrics.utils import thirty_days_ago, today
-from wikimetrics.forms.fields import CommaSeparatedIntegerListField, 
BetterDateTimeField
-from wikimetrics.models import Page, Revision
+from wikimetrics.forms.fields import CommaSeparatedIntegerListField, 
BetterBooleanField
+from wikimetrics.models import Page, Revision, Archive
 from timeseries_metric import TimeseriesMetric
 
 
@@ -36,11 +37,14 @@
         'pages_created': 0,
     }
 
+    include_deleted = BetterBooleanField(
+        default=True,
+        description='Count pages that have been deleted',
+    )
     namespaces = CommaSeparatedIntegerListField(
         None,
-        [Required()],
         default='0',
-        description='0, 2, 4, etc.',
+        description='0, 2, 4, etc. (leave blank for *all*)',
     )
     
     def __call__(self, user_ids, session):
@@ -56,21 +60,46 @@
         start_date = self.start_date.data
         end_date = self.end_date.data
         
-        query = session\
-            .query(Revision.rev_user, func.count(Page.page_id))\
-            .join(Page)\
-            .filter(Page.page_namespace.in_(self.namespaces.data))\
+        revisions = session\
+            .query(
+                label('user_id', Revision.rev_user),
+                label('timestamp', Revision.rev_timestamp)
+            )\
             .filter(Revision.rev_parent_id == 0)\
             .filter(Revision.rev_timestamp > start_date)\
-            .filter(Revision.rev_timestamp <= end_date)\
-            .group_by(Revision.rev_user)
-        
-        pages_by_user = self.filter(query, user_ids)
+            .filter(Revision.rev_timestamp <= end_date)
 
-        query = self.apply_timeseries(pages_by_user)
+        archives = session\
+            .query(
+                label('user_id', Archive.ar_user),
+                label('timestamp', Archive.ar_timestamp)
+            )\
+            .filter(Archive.ar_parent_id == 0)\
+            .filter(Archive.ar_timestamp > start_date)\
+            .filter(Archive.ar_timestamp <= end_date)
+
+        if self.namespaces.data and len(self.namespaces.data) > 0:
+            revisions = revisions.join(Page)\
+                .filter(Page.page_namespace.in_(self.namespaces.data))
+            archives = archives\
+                .filter(Archive.ar_namespace.in_(self.namespaces.data))
+
+        revisions = self.filter(revisions, user_ids, column=Revision.rev_user)
+        archives = self.filter(archives, user_ids, column=Archive.ar_user)
+
+        both = revisions
+        if self.include_deleted.data:
+            both = both.union_all(archives)
+        both = both.subquery()
+
+        query = session.query(both.c.user_id, func.count())\
+            .group_by(both.c.user_id)
+
+        query = self.apply_timeseries(query, column=both.c.timestamp)
+
         return self.results_by_user(
             user_ids,
             query,
-            [('pages_created', 1, 0)],
+            [(self.id, 1, 0)],
             date_index=2,
         )
diff --git a/wikimetrics/models/mediawiki/archive.py 
b/wikimetrics/models/mediawiki/archive.py
index 3fe616e..a8c1fd7 100644
--- a/wikimetrics/models/mediawiki/archive.py
+++ b/wikimetrics/models/mediawiki/archive.py
@@ -43,3 +43,4 @@
     ar_flags      = Column(TINYBLOB, nullable=False, default='')
     ar_rev_id     = Column(Integer, nullable=True)
     ar_deleted    = Column(Boolean, nullable=False, default=False)
+    ar_parent_id  = Column(Integer, nullable=True)

-- 
To view, visit https://gerrit.wikimedia.org/r/161472
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: I5b3940e0946e90a35f17dade7ea3692f194bc523
Gerrit-PatchSet: 1
Gerrit-Project: analytics/wikimetrics
Gerrit-Branch: master
Gerrit-Owner: Milimetric <[email protected]>

_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to