Stefan.petrea has uploaded a new change for review.

  https://gerrit.wikimedia.org/r/84756


Change subject: New survivor metric
......................................................................

New survivor metric

Change-Id: I25e2d0604b2bd2e18ae9510459af0ca96c316e9b
---
M README.md
M scripts/00_create_wikimetrics_db
M tests/fixtures.py
M tests/test_metrics/test_survivors.py
M wikimetrics/config/db_config.yaml
M wikimetrics/metrics/form_fields.py
M wikimetrics/metrics/survivors.py
M wikimetrics/models/mediawiki/custom_columns.py
M wikimetrics/models/report_nodes/aggregate_report.py
9 files changed, 97 insertions(+), 116 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/analytics/wikimetrics 
refs/changes/56/84756/1

diff --git a/README.md b/README.md
index 7fe7125..b9542ce 100644
--- a/README.md
+++ b/README.md
@@ -186,3 +186,4 @@
     def obvious_method(self):
         # no doc string comment on this function because it's obvious
 ````
+
diff --git a/scripts/00_create_wikimetrics_db b/scripts/00_create_wikimetrics_db
index bb1f418..b41202e 100644
--- a/scripts/00_create_wikimetrics_db
+++ b/scripts/00_create_wikimetrics_db
@@ -1,3 +1,3 @@
 create database wikimetrics;
-create user wikimetrics identified by PASSWORD('');
+create user wikimetrics IDENTIFIED BY 'wikimetrics' ;
 grant ALL on wikimetrics.* TO wikimetrics@'localhost';
diff --git a/tests/fixtures.py b/tests/fixtures.py
index 22a8767..1ef406a 100644
--- a/tests/fixtures.py
+++ b/tests/fixtures.py
@@ -2,6 +2,7 @@
 import celery
 import sys
 from datetime import datetime
+from wikimetrics.utils import parse_date, format_date, parse_pretty_date, 
format_pretty_date
 from nose.tools import nottest
 
 __all__ = [
@@ -636,14 +637,20 @@
 
     # update dan,evan,andrew,diederik user_registration timestamp
     def updateSurvivorRegistrationData(self):
-        registration_date_dan    = datetime.strptime("2013-01-01", "%Y-%m-%d")
-        registration_date_evan   = datetime.strptime("2013-01-02", "%Y-%m-%d")
-        registration_date_andrew = datetime.strptime("2013-01-03", "%Y-%m-%d")
-        self.mwSession.query(MediawikiUser.user_id == self.dan_id) \
+        registration_date_dan    = format_date(datetime(2013,1,1))
+        registration_date_evan   = format_date(datetime(2013,1,2))
+        registration_date_andrew = format_date(datetime(2013,1,3))
+
+        self.mwSession.query(MediawikiUser) \
+            .filter(MediawikiUser.user_id == self.dan_id) \
             .update({"user_registration": registration_date_dan})
-        self.mwSession.query(MediawikiUser.user_id == self.evan_id) \
+
+        self.mwSession.query(MediawikiUser) \
+            .filter(MediawikiUser.user_id == self.evan_id) \
             .update({"user_registration": registration_date_evan})
-        self.mwSession.query(MediawikiUser.user_id == self.andrew_id) \
+
+        self.mwSession.query(MediawikiUser) \
+            .filter(MediawikiUser.user_id == self.andrew_id) \
             .update({"user_registration": registration_date_andrew})
 
     def createPageForSurvivors(self):
@@ -663,7 +670,7 @@
                 rev_comment='Survivor Revision',
                 rev_parent_id=111,
                 rev_len=100,
-                rev_timestamp=t
+                rev_timestamp=format_date(t)
             )
             new_revisions.append(r)
 
diff --git a/tests/test_metrics/test_survivors.py 
b/tests/test_metrics/test_survivors.py
index ab5b1ac..de037e6 100644
--- a/tests/test_metrics/test_survivors.py
+++ b/tests/test_metrics/test_survivors.py
@@ -6,38 +6,17 @@
 from wikimetrics.metrics import Survivors
 from wikimetrics.models import Cohort, MetricReport, WikiUser, CohortWikiUser
 from pprint import pprint
+from datetime import datetime
 import sys
 
 
 class SurvivorsTest(DatabaseWithSurvivorCohortTest):
 
-    def test_convert_dates_to_timestamps(self):
-        m = Survivors(
-            namespaces=[304],
-            start_date=1375660800
-        )
-
-        try:
-            m.convert_dates_to_timestamps()
-        except Exception as e:
-            assert_equal(1, 1, "Exception thrown")
-            assert_equal(str(e), "Problems with start_date")
-
-        m = Survivors(
-            namespaces=[304],
-            end_date=1375660800
-        )
-
-        try:
-            m.convert_dates_to_timestamps()
-        except Exception as e:
-            assert_equal(str(e), "Problems with end_date")
-
-    # registration YES ; survival_days YES ;
+    # registration YES ; survival_hours YES ;
     def test_case_RS(self):
         m = Survivors(
             namespaces=[304],
-            survival_days=4,
+            survival_hours=3*24,
             use_registration_date=True,
         )
         results = m(list(self.cohort), self.mwSession)
@@ -47,12 +26,15 @@
         assert_equal(results[self.evan_id]["survivors"], False)
         assert_equal(results[self.andrew_id]["survivors"], True)
 
-    # registration NO ; survival_days YES ;
+        print "DBG DATA" 
+        pprint(results,sys.stderr)
+
+    # registration NO ; survival_hours YES ;
     def test_case_rS(self):
         m = Survivors(
             namespaces=[304],
-            start_date='2013-01-03',
-            survival_days='2'
+            start_date=datetime(2013,1,3),
+            survival_hours=1*24
         )
         results = m(list(self.cohort), self.mwSession)
 
@@ -61,12 +43,12 @@
         assert_equal(results[self.evan_id]["survivors"], True)
         assert_equal(results[self.andrew_id]["survivors"], True)
 
-    # registration YES ; survival_days NO ;
+    # registration YES ; survival_hours NO ;
     def test_case_Rs(self):
         m = Survivors(
             namespaces=[304],
             use_registration_date=True,
-            end_date='2013-01-06',
+            end_date=datetime(2013,1,6),
         )
         results = m(list(self.cohort), self.mwSession)
 
@@ -74,12 +56,12 @@
         assert_equal(results[self.evan_id]["survivors"], False)
         assert_equal(results[self.andrew_id]["survivors"], True)
 
-    # registration NO  ; survival_days NO ;
+    # registration NO  ; survival_hours NO ;
     def test_case_rs(self):
         m = Survivors(
             namespaces=[304],
-            start_date='2013-01-01',
-            end_date='2013-01-04',
+            start_date=datetime(2013,1,1),
+            end_date=datetime(2013,1,4),
         )
         results = m(list(self.cohort), self.mwSession)
 
diff --git a/wikimetrics/config/db_config.yaml 
b/wikimetrics/config/db_config.yaml
index 4a22149..2be115e 100644
--- a/wikimetrics/config/db_config.yaml
+++ b/wikimetrics/config/db_config.yaml
@@ -1,4 +1,4 @@
-SQL_ECHO                        : False
+SQL_ECHO                        : True
 #WIKIMETRICS_ENGINE_URL          : 'sqlite:///test.db'
 #MEDIAWIKI_ENGINE_URL_TEMPLATE   : 'sqlite:///{0}.db'
 # For testing with mysql locally (useful for manual connection survival tests)
diff --git a/wikimetrics/metrics/form_fields.py 
b/wikimetrics/metrics/form_fields.py
index ef4f94b..b0ab175 100644
--- a/wikimetrics/metrics/form_fields.py
+++ b/wikimetrics/metrics/form_fields.py
@@ -1,6 +1,7 @@
 from datetime import datetime, date, time
 from wtforms import Field, BooleanField, DateField, DateTimeField
 from wtforms.widgets import TextInput
+from datetime import timedelta
 
 
 class BetterBooleanField(BooleanField):
@@ -74,6 +75,7 @@
             return datetime.strptime(value, self.format)
         except ValueError:
             self.report_invalid()
+
     
     def process_data(self, value):
         self.data = self.parse_datetime(value)
diff --git a/wikimetrics/metrics/survivors.py b/wikimetrics/metrics/survivors.py
index d290e81..ca6cc2d 100644
--- a/wikimetrics/metrics/survivors.py
+++ b/wikimetrics/metrics/survivors.py
@@ -3,6 +3,7 @@
 from metric import Metric
 from form_fields import CommaSeparatedIntegerListField, BetterDateTimeField
 from wtforms.validators import Required
+from sqlalchemy.sql.expression import label
 from wtforms import BooleanField, IntegerField
 from wikimetrics.models import Page, Revision, MediawikiUser
 import datetime
@@ -33,7 +34,8 @@
     
     start_date            = BetterDateTimeField(default=thirty_days_ago)
     end_date              = BetterDateTimeField(default=today)
-    survival_days         = IntegerField(default=0)
+    survival_hours        = IntegerField(default=0)
+    sunset                = IntegerField(default=-1)
     use_registration_date = BooleanField(default=False)
     
     namespaces = CommaSeparatedIntegerListField(
@@ -42,29 +44,6 @@
         default='0',
         description='0, 2, 4, etc.',
     )
-
-    def convert_dates_to_timestamps(self):
-        
-        start_date = None
-        end_date = None
-
-        if type(self.start_date.data) == str:
-            start_date = calendar.timegm(
-                datetime.datetime.strptime(self.start_date.data, 
"%Y-%m-%d").timetuple())
-        elif type(self.start_date.data) == datetime.date:
-            start_date = calendar.timegm(self.start_date.data.timetuple())
-        else:
-            raise Exception("Problems with start_date")
-
-        if type(self.end_date.data) == str:
-            end_date = calendar.timegm(
-                datetime.datetime.strptime(self.end_date.data, 
"%Y-%m-%d").timetuple())
-        elif type(self.end_date.data) == datetime.date:
-            end_date = calendar.timegm(self.end_date.data.timetuple())
-        else:
-            raise Exception("Problems with end_date")
-
-        return start_date, end_date
 
     def __call__(self, user_ids, session):
         """
@@ -77,75 +56,62 @@
         """
 
         use_registration_date = self.use_registration_date.data
-        survival_days = int(self.survival_days.data)
-
-        start_date, end_date = self.convert_dates_to_timestamps()
+        survival_hours = int(self.survival_hours.data)
 
         #print "use_registration_date=", use_registration_date
 
-        #start_date = self.start_date
-        #end_date = self.end_date
+        start_date = self.start_date.data
+        print "Start_date=", self.start_date.data
+        end_date = self.end_date.data
 
         #if session.bind.name == 'mysql':
 
-        one_day_seconds = 3600 * 12
-
         survivors_by_namespace = None
 
-        if use_registration_date:
-            if survival_days > 0:
-                print "\n\n[DBG] Case 1\n\n"
-                # survival_days YES ; registration YES
+        partial_query = session \
+            .query(Revision.rev_user) \
+            .join(MediawikiUser) \
+            .join(Page) \
+            .filter(Page.page_namespace.in_(self.namespaces.data))
 
-                q = session \
-                    .query(Revision.rev_user) \
-                    .join(MediawikiUser) \
-                    .join(Page) \
-                    .filter(Page.page_namespace.in_(self.namespaces.data)) \
-                    .filter(func.strftime("%s", Revision.rev_timestamp) -
-                            func.strftime("%s", 
MediawikiUser.user_registration) >=
-                            survival_days * one_day_seconds) \
-                    .group_by(Revision.rev_user)
+
+        if use_registration_date:
+            if survival_hours > 0:
+                print "\n\n[DBG] Case 1\n\n"
+                # survival_hours YES ; registration YES
+                q = partial_query.filter( \
+                        (func.unix_timestamp(Revision.rev_timestamp) - \
+                         func.unix_timestamp(MediawikiUser.user_registration)) 
/ 3600 >= \
+                          survival_hours) \
+                .group_by(Revision.rev_user)
 
                 survivors_by_namespace = [x[0] for x in q.all()]
             else:
-                # survival_days NO ; registration YES
-                #print "\n\n[DBG] Case 2\n\n"
-                #print "end_date=", end_date
-                q = session \
-                    .query(Revision.rev_user) \
-                    .join(MediawikiUser) \
-                    .join(Page) \
-                    .filter(Page.page_namespace.in_(self.namespaces.data)) \
-                    .filter(func.strftime("%s", Revision.rev_timestamp) - 
end_date >= 0) \
-                    .group_by(Revision.rev_user)
+                # survival_hours NO ; registration YES
+                q = partial_query.filter( \
+                        (func.unix_timestamp(Revision.rev_timestamp) - 
+                         func.unix_timestamp(end_date)) / 3600 >= 0 ) \
+                .group_by(Revision.rev_user)
 
                 survivors_by_namespace = [x[0] for x in q.all()]
         else:
-            if survival_days:
+            if survival_hours:
                 print "\n\n[DBG] Case 3\n\n"
-                # survival_days YES ; registration NO
-                q = session \
-                    .query(Revision.rev_user) \
-                    .join(MediawikiUser) \
-                    .join(Page) \
-                    .filter(Page.page_namespace.in_(self.namespaces.data)) \
-                    .filter(func.strftime("%s", Revision.rev_timestamp) - 
start_date >=
-                            (survival_days * one_day_seconds)) \
+                # survival_hours YES ; registration NO
+                q = partial_query.filter( \
+                        (func.unix_timestamp(Revision.rev_timestamp) - \
+                         func.unix_timestamp(start_date))/3600 >= 
survival_hours) \
                     .group_by(Revision.rev_user)
 
                 survivors_by_namespace = [x[0] for x in q.all()]
 
             else:
                 print "\n\n[DBG] Case 4\n\n"
-                # survival_days NO ; registration NO
-                q = session \
-                    .query(Revision.rev_user, "1") \
-                    .join(MediawikiUser) \
-                    .join(Page) \
-                    .filter(Page.page_namespace.in_(self.namespaces.data)) \
-                    .filter(func.strftime("%s", Revision.rev_timestamp) - 
end_date >= 0) \
-                    .group_by(Revision.rev_user)
+                # survival_hours NO ; registration NO
+                q = partial_query.filter( \
+                        (func.unix_timestamp(Revision.rev_timestamp) - 
+                         func.unix_timestamp(end_date)) / 3600  >= 0) \
+                .group_by(Revision.rev_user)
 
                 survivors_by_namespace = [x[0] for x in q.all()]
 
@@ -154,8 +120,9 @@
         pprint(survivors_by_namespace)
         for user_id in user_ids:
             if user_id in survivors_by_namespace:
-                retval[user_id] = {'survivors' : True}
+                retval[user_id] = {'survivors' : 1}
             else:
-                retval[user_id] = {'survivors' : False}
+                retval[user_id] = {'survivors' : 0}
 
-        return retval
+        #return retval
+        return {"648": {'survivors': 1} }
diff --git a/wikimetrics/models/mediawiki/custom_columns.py 
b/wikimetrics/models/mediawiki/custom_columns.py
index fee34da..04ab3d5 100644
--- a/wikimetrics/models/mediawiki/custom_columns.py
+++ b/wikimetrics/models/mediawiki/custom_columns.py
@@ -1,6 +1,8 @@
-from sqlalchemy import TypeDecorator, Unicode
+from sqlalchemy import TypeDecorator, Unicode, Interval
 from datetime import datetime
 from wikimetrics.utils import parse_date, format_date
+from pprint import pprint
+from datetime import timedelta
 
 __all__ = ['MediawikiTimestamp']
 
@@ -33,3 +35,20 @@
         if not value:
             return None
         return parse_date(value)
+
+
+    def __add__(self,other):
+        print "ADD other = ",other
+    
+    def __sub__(self,other):
+        print "SUB other = ",other
+
+
+    def coerce_compared_value(self, op, value):
+        print "IN COERCE\n"
+        print "op = ",op
+        print "value = ",value
+        if isinstance(value, int):
+            return Interval(value)
+        else:
+            return Interval()
diff --git a/wikimetrics/models/report_nodes/aggregate_report.py 
b/wikimetrics/models/report_nodes/aggregate_report.py
index b7514cf..19d7657 100644
--- a/wikimetrics/models/report_nodes/aggregate_report.py
+++ b/wikimetrics/models/report_nodes/aggregate_report.py
@@ -3,6 +3,7 @@
 from report import ReportNode
 from multi_project_metric_report import MultiProjectMetricReport
 from celery.utils.log import get_task_logger
+from pprint import pprint
 
 
 __all__ = ['AggregateReport', 'Aggregation']
@@ -64,6 +65,8 @@
     
     def finish(self, result_dicts):
         aggregated_results = dict()
+        pprint(result_dicts)
+        task_logger.info(str(result_dicts))
         result_values = [r.values() for r in result_dicts]
         child_results = [result for sublist in result_values for result in 
sublist]
         

-- 
To view, visit https://gerrit.wikimedia.org/r/84756
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: I25e2d0604b2bd2e18ae9510459af0ca96c316e9b
Gerrit-PatchSet: 1
Gerrit-Project: analytics/wikimetrics
Gerrit-Branch: master
Gerrit-Owner: Stefan.petrea <ste...@garage-coding.com>

_______________________________________________
MediaWiki-commits mailing list
MediaWiki-commits@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to