jenkins-bot has submitted this change and it was merged.

Change subject: Make apply timeseries more flexible
......................................................................


Make apply timeseries more flexible

Change-Id: Ie6f209c35b6ab8b2a696493d921376da060cc740
---
M tests/test_metrics/test_namespace_edits.py
M wikimetrics/metrics/bytes_added.py
M wikimetrics/metrics/namespace_edits.py
M wikimetrics/metrics/timeseries_metric.py
4 files changed, 72 insertions(+), 27 deletions(-)

Approvals:
  Nuria: Looks good to me, approved
  jenkins-bot: Verified



diff --git a/tests/test_metrics/test_namespace_edits.py 
b/tests/test_metrics/test_namespace_edits.py
index 133997c..05f3e3d 100644
--- a/tests/test_metrics/test_namespace_edits.py
+++ b/tests/test_metrics/test_namespace_edits.py
@@ -81,6 +81,22 @@
         )
         assert_true(not metric.validate())
 
+    def test_filters_out_other_editors_with_archive(self):
+        self.archive_revisions()
+        self.test_filters_out_other_editors()
+
+    def test_runs_for_an_entire_wiki_with_archive(self):
+        self.archive_revisions()
+        self.test_runs_for_an_entire_wiki()
+
+    def test_finds_edits_with_archive(self):
+        self.archive_revisions()
+        self.test_finds_edits()
+
+    def test_reports_zero_edits_with_archive(self):
+        self.archive_revisions()
+        self.test_reports_zero_edits()
+
 
 class NamespaceEditsFullTest(QueueDatabaseTest):
     def setUp(self):
@@ -162,6 +178,9 @@
         assert_true(results is not None)
         assert_equal(results[self.editor(0)]['edits'], 3)
 
+    def test_all_with_archive(self):
+        pass
+
 
 class NamespaceEditsTimestampTest(DatabaseTest):
     
@@ -206,6 +225,9 @@
         )
         results = metric(self.editor_ids, self.mwSession)
         assert_equal(results[self.editors[0].user_id]['edits'], 1)
+
+    def test_all_with_archive(self):
+        pass
 
 
 class NamespaceEditsTimeseriesTest(DatabaseTest):
@@ -264,3 +286,6 @@
                 '2013-01-01 01:00:00' : 1,
             }
         )
+
+    def test_all_with_archive(self):
+        pass
diff --git a/wikimetrics/metrics/bytes_added.py 
b/wikimetrics/metrics/bytes_added.py
index 0e057c5..eed9120 100644
--- a/wikimetrics/metrics/bytes_added.py
+++ b/wikimetrics/metrics/bytes_added.py
@@ -154,5 +154,5 @@
 
         self.default_result = {s[0]: s[2] for s in submetrics}
 
-        query = self.apply_timeseries(bytes_added_by_user, rev=BC.c)
+        query = self.apply_timeseries(bytes_added_by_user, 
column=BC.c.rev_timestamp)
         return self.results_by_user(user_ids, query, submetrics, 
date_index=index)
diff --git a/wikimetrics/metrics/namespace_edits.py 
b/wikimetrics/metrics/namespace_edits.py
index e55697b..c952682 100644
--- a/wikimetrics/metrics/namespace_edits.py
+++ b/wikimetrics/metrics/namespace_edits.py
@@ -1,8 +1,9 @@
 from sqlalchemy import func
+from sqlalchemy.sql.expression import label
 from wtforms.validators import Required
 
 from wikimetrics.utils import thirty_days_ago, today
-from wikimetrics.models import Page, Revision
+from wikimetrics.models import Page, Revision, Archive
 from wikimetrics.forms.fields import CommaSeparatedIntegerListField
 from timeseries_metric import TimeseriesMetric
 
@@ -12,9 +13,9 @@
     This class implements namespace edits logic.
     An instance of the class is callable and will compute the number of edits
     for each user in a passed-in list.
-    
+
     This sql query was used as a starting point for the sqlalchemy query:
-    
+
      select r.rev_user, r.count(*)
        from revision r
                 inner join
@@ -24,7 +25,7 @@
         and p.page_namespace in ([parameterized])
       group by rev_user
     """
-    
+
     show_in_ui  = True
     id          = 'edits'
     label       = 'Edits'
@@ -43,32 +44,51 @@
         default='0',
         description='0, 2, 4, etc.',
     )
-    
+
     def __call__(self, user_ids, session):
         """
         Parameters:
             user_ids    : list of mediawiki user ids to find edit for
             session     : sqlalchemy session open on a mediawiki database
-        
+
         Returns:
             dictionary from user ids to the number of edit found.
         """
         start_date = self.start_date.data
         end_date = self.end_date.data
-        
-        query = session\
-            .query(Revision.rev_user, func.count(Revision.rev_id))\
+
+        revisions = session\
+            .query(
+                label('user_id', Revision.rev_user),
+                label('timestamp', Revision.rev_timestamp)
+            )\
             .join(Page)\
             .filter(Page.page_namespace.in_(self.namespaces.data))\
             .filter(Revision.rev_timestamp > start_date)\
-            .filter(Revision.rev_timestamp <= end_date)\
-            .group_by(Revision.rev_user)
-        
-        query = self.filter(query, user_ids)
-        query = self.apply_timeseries(query)
+            .filter(Revision.rev_timestamp <= end_date)
+
+        archives = session\
+            .query(
+                label('user_id', Archive.ar_user),
+                label('timestamp', Archive.ar_timestamp)
+            )\
+            .filter(Archive.ar_namespace.in_(self.namespaces.data))\
+            .filter(Archive.ar_timestamp > start_date)\
+            .filter(Archive.ar_timestamp <= end_date)
+
+        revisions = self.filter(revisions, user_ids, column=Revision.rev_user)
+        archives = self.filter(archives, user_ids, column=Archive.ar_user)
+
+        both = revisions.union_all(archives).subquery()
+
+        query = session.query(both.c.user_id, func.count())\
+            .group_by(both.c.user_id)
+
+        query = self.apply_timeseries(query, column=both.c.timestamp)
+
         return self.results_by_user(
             user_ids,
             query,
-            [('edits', 1, 0)],
+            [(self.id, 1, 0)],
             date_index=2,
         )
diff --git a/wikimetrics/metrics/timeseries_metric.py 
b/wikimetrics/metrics/timeseries_metric.py
index e2854aa..a09a7fb 100644
--- a/wikimetrics/metrics/timeseries_metric.py
+++ b/wikimetrics/metrics/timeseries_metric.py
@@ -32,14 +32,14 @@
         ],
     )
     
-    def apply_timeseries(self, query, rev=Revision):
+    def apply_timeseries(self, query, column=Revision.rev_timestamp):
         """
         Take a query and slice it up into equal time intervals
         
         Parameters
             query   : a sql alchemy query
-            rev     : defaults to Revision, specifies the object that
-                      contains the appropriate rev_timestamp
+            column  : defaults to Revision.rev_timestamp, specifies the 
timestamp
+                      column to use for the timeseries
         
         Returns
             The query parameter passed in, with a grouping by the desired time 
slice
@@ -49,26 +49,26 @@
         if choice == TimeseriesChoices.NONE:
             return query
         
-        query = query.add_column(func.year(rev.rev_timestamp))
-        query = query.group_by(func.year(rev.rev_timestamp))
+        query = query.add_column(func.year(column))
+        query = query.group_by(func.year(column))
         
         if choice == TimeseriesChoices.YEAR:
             return query
         
-        query = query.add_column(func.month(rev.rev_timestamp))
-        query = query.group_by(func.month(rev.rev_timestamp))
+        query = query.add_column(func.month(column))
+        query = query.group_by(func.month(column))
         
         if choice == TimeseriesChoices.MONTH:
             return query
         
-        query = query.add_column(func.day(rev.rev_timestamp))
-        query = query.group_by(func.day(rev.rev_timestamp))
+        query = query.add_column(func.day(column))
+        query = query.group_by(func.day(column))
         
         if choice == TimeseriesChoices.DAY:
             return query
         
-        query = query.add_column(func.hour(rev.rev_timestamp))
-        query = query.group_by(func.hour(rev.rev_timestamp))
+        query = query.add_column(func.hour(column))
+        query = query.group_by(func.hour(column))
         
         if choice == TimeseriesChoices.HOUR:
             return query

-- 
To view, visit https://gerrit.wikimedia.org/r/161366
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: merged
Gerrit-Change-Id: Ie6f209c35b6ab8b2a696493d921376da060cc740
Gerrit-PatchSet: 1
Gerrit-Project: analytics/wikimetrics
Gerrit-Branch: master
Gerrit-Owner: Milimetric <[email protected]>
Gerrit-Reviewer: Nuria <[email protected]>
Gerrit-Reviewer: jenkins-bot <>

_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to