Milimetric has submitted this change and it was merged.

Change subject: pages created timeseries implemented
......................................................................


pages created timeseries implemented

Change-Id: I16121f5324d2e6101ee23cfc41bcba72f692ec2e
---
M tests/test_models/test_aggregate_report.py
M wikimetrics/metrics/pages_created.py
M wikimetrics/metrics/survivors.py
M wikimetrics/models/report_nodes/aggregate_report.py
4 files changed, 200 insertions(+), 44 deletions(-)

Approvals:
  Milimetric: Verified; Looks good to me, approved



diff --git a/tests/test_models/test_aggregate_report.py 
b/tests/test_models/test_aggregate_report.py
index 7a932d9..cb789ae 100644
--- a/tests/test_models/test_aggregate_report.py
+++ b/tests/test_models/test_aggregate_report.py
@@ -1,6 +1,8 @@
 from decimal import Decimal
 from nose.tools import assert_equals, assert_true
-from wikimetrics.metrics import metric_classes
+from wikimetrics.metrics import (
+    metric_classes, NamespaceEdits, TimeseriesChoices,
+)
 from wikimetrics.models import (
     Aggregation, AggregateReport, PersistentReport, Cohort,
 )
@@ -96,7 +98,6 @@
         )
         assert_equals(
             finished[ar.result_key][Aggregation.AVG]['edits'],
-            # TODO: Again, figure out this crazy "None" user id
             Decimal(1.25)
         )
         assert_equals(
@@ -125,6 +126,127 @@
         
         assert_true(str(ar).find('AggregateReport') >= 0)
 
+
+class AggregateReportTimeseriesTest(QueueDatabaseTest):
+    
+    def setUp(self):
+        DatabaseTest.setUp(self)
+        self.create_test_cohort(
+            editor_count=4,
+            revisions_per_editor=4,
+            revision_timestamps=[
+                [20121231230000, 20130101003000, 20130101010000, 
20140101010000],
+                [20130101120000, 20130102000000, 20130102120000, 
20130103120000],
+                [20130101000000, 20130108000000, 20130116000000, 
20130216000000],
+                [20130101000000, 20130201000000, 20140101000000, 
20140102000000],
+            ],
+            revision_lengths=10
+        )
+    
+    def test_timeseries_day(self):
+        metric = NamespaceEdits(
+            namespaces=[0],
+            start_date='2012-12-31 00:00:00',
+            end_date='2013-01-03 00:00:00',
+            timeseries=TimeseriesChoices.DAY,
+        )
+        ar = AggregateReport(
+            self.cohort,
+            metric,
+            individual=True,
+            aggregate=True,
+            aggregate_sum=True,
+            aggregate_average=True,
+            aggregate_std_deviation=True,
+            user_id=self.test_user_id,
+        )
+        results = ar.task.delay(ar).get()
+        
+        self.session.commit()
+        aggregate_key = self.session.query(PersistentReport)\
+            .filter(PersistentReport.id == ar.persistent_id)\
+            .one()\
+            .result_key
+        
+        assert_equals(
+            
results[aggregate_key][Aggregation.IND][0][self.editors[0].user_id]['edits'],
+            {
+                '2012-12-31 00:00:00' : 1,
+                '2013-01-01 00:00:00' : 2,
+                '2013-01-02 00:00:00' : 0,
+            }
+        )
+        assert_equals(
+            results[aggregate_key][Aggregation.SUM]['edits'],
+            {
+                '2012-12-31 00:00:00' : 1,
+                '2013-01-01 00:00:00' : 5,
+                '2013-01-02 00:00:00' : 2,
+            }
+        )
+        assert_equals(
+            results[aggregate_key][Aggregation.AVG]['edits'],
+            {
+                '2012-12-31 00:00:00' : Decimal(0.25),
+                '2013-01-01 00:00:00' : Decimal(1.25),
+                '2013-01-02 00:00:00' : Decimal(0.5),
+            }
+        )
+    
+    def test_finish_timeseries(self):
+        metric = NamespaceEdits(
+            namespaces=[0],
+            start_date='2012-12-31 00:00:00',
+            end_date='2013-01-03 00:00:00',
+            timeseries=TimeseriesChoices.DAY,
+        )
+        ar = AggregateReport(
+            self.cohort,
+            metric,
+            individual=True,
+            aggregate=True,
+            aggregate_sum=True,
+            aggregate_average=True,
+            aggregate_std_deviation=True,
+            user_id=self.test_user_id,
+        )
+        
+        finished = ar.finish([
+            {
+                'namespace edits - fake cohort' : {
+                    1: {'edits': {'date1': 1, 'date2': 2}},
+                    2: {'edits': {'date1': 0, 'date2': 1}},
+                    3: {'edits': {'date1': 0, 'date2': 0}},
+                    None: {'edits': {'date1': None, 'date2': None}}
+                }
+            },
+            {
+                'some other metric - fake cohort' : {
+                    1: {'other_sub_metric': {'date3': Decimal(2.3), 'date4': 
0}},
+                    2: {'other_sub_metric': {'date3': 0, 'date4': 
Decimal(3.4)}},
+                    3: {'other_sub_metric': {'date3': None, 'date4': None}},
+                    None: {'other_sub_metric': {'date3': None, 'date4': None}}
+                }
+            },
+        ])
+        
+        assert_equals(
+            finished[ar.result_key][Aggregation.SUM]['edits'],
+            {'date1': 1, 'date2': 3}
+        )
+        assert_equals(
+            finished[ar.result_key][Aggregation.SUM]['other_sub_metric'],
+            {'date3': Decimal(2.3), 'date4': Decimal(3.4)}
+        )
+        assert_equals(
+            finished[ar.result_key][Aggregation.AVG]['edits'],
+            {'date1': 0.25, 'date2': 0.75}
+        )
+        assert_equals(
+            finished[ar.result_key][Aggregation.AVG]['other_sub_metric'],
+            {'date3': 0.575, 'date4': 0.85}
+        )
+
 # NOTE: a sample output of AggregateReport:
 #{
     #'f5ca5afe-6b2d-4052-bd51-6cbeaeba5eb9': {
diff --git a/wikimetrics/metrics/pages_created.py 
b/wikimetrics/metrics/pages_created.py
index ac68888..8e8a30b 100644
--- a/wikimetrics/metrics/pages_created.py
+++ b/wikimetrics/metrics/pages_created.py
@@ -1,6 +1,6 @@
 from ..utils import thirty_days_ago, today
 from sqlalchemy import func
-from metric import Metric
+from timeseries_metric import TimeseriesMetric
 from form_fields import CommaSeparatedIntegerListField, BetterDateTimeField
 from wtforms.validators import Required
 from wikimetrics.models import Page, Revision
@@ -9,7 +9,7 @@
 __all__ = ['PagesCreated']
 
 
-class PagesCreated(Metric):
+class PagesCreated(TimeseriesMetric):
     """
     This class counts the pages created by editors over a period of time.
 
@@ -34,9 +34,6 @@
          editor in a time interval'
     )
     
-    start_date  = BetterDateTimeField(default=thirty_days_ago)
-    end_date    = BetterDateTimeField(default=today)
-    
     namespaces = CommaSeparatedIntegerListField(
         None,
         [Required()],
@@ -57,20 +54,21 @@
         start_date = self.start_date.data
         end_date = self.end_date.data
         
-        pages_by_user = dict(
-            session
-            .query(Revision.rev_user, func.count(Page.page_id))
-            .join(Page)
-            .filter(Page.page_namespace.in_(self.namespaces.data))
-            .filter(Revision.rev_parent_id == 0)
-            .filter(Revision.rev_user.in_(user_ids))
-            .filter(Revision.rev_timestamp > start_date)
-            .filter(Revision.rev_timestamp <= end_date)
+        pages_by_user = session\
+            .query(Revision.rev_user, func.count(Page.page_id))\
+            .join(Page)\
+            .filter(Page.page_namespace.in_(self.namespaces.data))\
+            .filter(Revision.rev_parent_id == 0)\
+            .filter(Revision.rev_user.in_(user_ids))\
+            .filter(Revision.rev_timestamp > start_date)\
+            .filter(Revision.rev_timestamp <= end_date)\
             .group_by(Revision.rev_user)
-            .all()
-        )
         
-        return {
-            user_id: {'pages_created': pages_by_user.get(user_id, 0)}
-            for user_id in user_ids
-        }
+        query = self.apply_timeseries(pages_by_user)
+        return self.results_by_user(
+            user_ids,
+            query,
+            [('pages_created', 1, 0)],
+            submetric_default=0,
+            date_index=2,
+        )
diff --git a/wikimetrics/metrics/survivors.py b/wikimetrics/metrics/survivors.py
index d290e81..2ef8449 100644
--- a/wikimetrics/metrics/survivors.py
+++ b/wikimetrics/metrics/survivors.py
@@ -23,7 +23,7 @@
 
     """
     
-    show_in_ui  = True
+    show_in_ui  = False
     id          = 'survivors'
     label       = 'Survivors'
     description = (
diff --git a/wikimetrics/models/report_nodes/aggregate_report.py 
b/wikimetrics/models/report_nodes/aggregate_report.py
index b7514cf..0f228e0 100644
--- a/wikimetrics/models/report_nodes/aggregate_report.py
+++ b/wikimetrics/models/report_nodes/aggregate_report.py
@@ -97,32 +97,68 @@
         for results_by_user in list_of_results:
             for user_id in results_by_user.keys():
                 for key in results_by_user[user_id]:
-                    if not key in aggregation:
-                        aggregation[key] = 0
-                        helper[key] = dict()
-                        helper[key]['sum'] = Decimal(0.0)
-                        helper[key]['count'] = 0
-                    
                     value = results_by_user[user_id][key]
                     if not value:
+                        # NOTE: value should never be None in a timeseries 
result
                         value = Decimal(0)
                     
-                    helper[key]['sum'] += Decimal(value)
-                    helper[key]['count'] += 1
+                    # handle timeseries aggregation
+                    if isinstance(value, dict):
+                        if not key in aggregation:
+                            aggregation[key] = dict()
+                            helper[key] = dict()
+                            for subkey in value:
+                                aggregation[key][subkey] = 0
+                                helper[key][subkey] = dict()
+                                helper[key][subkey]['sum'] = Decimal(0.0)
+                                helper[key][subkey]['count'] = 0
+                        
+                        for subkey in value:
+                            value_subkey = value[subkey] or Decimal(0)
+                            
+                            helper[key][subkey]['sum'] += Decimal(value_subkey)
+                            helper[key][subkey]['count'] += 1
+                            
+                            if type_of_aggregate == Aggregation.SUM:
+                                aggregation[key][subkey] = round(
+                                    helper[key][subkey]['sum'],
+                                    4
+                                )
+                            elif type_of_aggregate == Aggregation.AVG:
+                                cummulative_sum = helper[key][subkey]['sum']
+                                count = helper[key][subkey]['count']
+                                aggregation[key][subkey] = round(
+                                    cummulative_sum / count,
+                                    4
+                                )
+                            elif type_of_aggregate == Aggregation.STD:
+                                aggregation[key][subkey] = 'Not Implemented'
+                                pass
                     
-                    if type_of_aggregate == Aggregation.SUM:
-                        aggregation[key] = round(
-                            helper[key]['sum'],
-                            4
-                        )
-                    elif type_of_aggregate == Aggregation.AVG:
-                        aggregation[key] = round(
-                            helper[key]['sum'] / helper[key]['count'],
-                            4
-                        )
-                    elif type_of_aggregate == Aggregation.STD:
-                        aggregation[key] = 'Not Implemented'
-                        pass
+                    # handle normal aggregation
+                    else:
+                        if not key in aggregation:
+                            aggregation[key] = 0
+                            helper[key] = dict()
+                            helper[key]['sum'] = Decimal(0.0)
+                            helper[key]['count'] = 0
+                        
+                        helper[key]['sum'] += Decimal(value)
+                        helper[key]['count'] += 1
+                        
+                        if type_of_aggregate == Aggregation.SUM:
+                            aggregation[key] = round(
+                                helper[key]['sum'],
+                                4
+                            )
+                        elif type_of_aggregate == Aggregation.AVG:
+                            aggregation[key] = round(
+                                helper[key]['sum'] / helper[key]['count'],
+                                4
+                            )
+                        elif type_of_aggregate == Aggregation.STD:
+                            aggregation[key] = 'Not Implemented'
+                            pass
         
         return aggregation
     

-- 
To view, visit https://gerrit.wikimedia.org/r/84764
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: merged
Gerrit-Change-Id: I16121f5324d2e6101ee23cfc41bcba72f692ec2e
Gerrit-PatchSet: 1
Gerrit-Project: analytics/wikimetrics
Gerrit-Branch: master
Gerrit-Owner: Milimetric <dandree...@wikimedia.org>
Gerrit-Reviewer: Milimetric <dandree...@wikimedia.org>

_______________________________________________
MediaWiki-commits mailing list
MediaWiki-commits@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to