Stefan.petrea has uploaded a new change for review. https://gerrit.wikimedia.org/r/84756
Change subject: New survivor metric ...................................................................... New survivor metric Change-Id: I25e2d0604b2bd2e18ae9510459af0ca96c316e9b --- M README.md M scripts/00_create_wikimetrics_db M tests/fixtures.py M tests/test_metrics/test_survivors.py M wikimetrics/config/db_config.yaml M wikimetrics/metrics/form_fields.py M wikimetrics/metrics/survivors.py M wikimetrics/models/mediawiki/custom_columns.py M wikimetrics/models/report_nodes/aggregate_report.py 9 files changed, 97 insertions(+), 116 deletions(-) git pull ssh://gerrit.wikimedia.org:29418/analytics/wikimetrics refs/changes/56/84756/1 diff --git a/README.md b/README.md index 7fe7125..b9542ce 100644 --- a/README.md +++ b/README.md @@ -186,3 +186,4 @@ def obvious_method(self): # no doc string comment on this function because it's obvious ```` + diff --git a/scripts/00_create_wikimetrics_db b/scripts/00_create_wikimetrics_db index bb1f418..b41202e 100644 --- a/scripts/00_create_wikimetrics_db +++ b/scripts/00_create_wikimetrics_db @@ -1,3 +1,3 @@ create database wikimetrics; -create user wikimetrics identified by PASSWORD(''); +create user wikimetrics IDENTIFIED BY 'wikimetrics' ; grant ALL on wikimetrics.* TO wikimetrics@'localhost'; diff --git a/tests/fixtures.py b/tests/fixtures.py index 22a8767..1ef406a 100644 --- a/tests/fixtures.py +++ b/tests/fixtures.py @@ -2,6 +2,7 @@ import celery import sys from datetime import datetime +from wikimetrics.utils import parse_date, format_date, parse_pretty_date, format_pretty_date from nose.tools import nottest __all__ = [ @@ -636,14 +637,20 @@ # update dan,evan,andrew,diederik user_registration timestamp def updateSurvivorRegistrationData(self): - registration_date_dan = datetime.strptime("2013-01-01", "%Y-%m-%d") - registration_date_evan = datetime.strptime("2013-01-02", "%Y-%m-%d") - registration_date_andrew = datetime.strptime("2013-01-03", "%Y-%m-%d") - self.mwSession.query(MediawikiUser.user_id == self.dan_id) \ + registration_date_dan = format_date(datetime(2013,1,1)) + registration_date_evan = format_date(datetime(2013,1,2)) + registration_date_andrew = format_date(datetime(2013,1,3)) + + self.mwSession.query(MediawikiUser) \ + .filter(MediawikiUser.user_id == self.dan_id) \ .update({"user_registration": registration_date_dan}) - self.mwSession.query(MediawikiUser.user_id == self.evan_id) \ + + self.mwSession.query(MediawikiUser) \ + .filter(MediawikiUser.user_id == self.evan_id) \ .update({"user_registration": registration_date_evan}) - self.mwSession.query(MediawikiUser.user_id == self.andrew_id) \ + + self.mwSession.query(MediawikiUser) \ + .filter(MediawikiUser.user_id == self.andrew_id) \ .update({"user_registration": registration_date_andrew}) def createPageForSurvivors(self): @@ -663,7 +670,7 @@ rev_comment='Survivor Revision', rev_parent_id=111, rev_len=100, - rev_timestamp=t + rev_timestamp=format_date(t) ) new_revisions.append(r) diff --git a/tests/test_metrics/test_survivors.py b/tests/test_metrics/test_survivors.py index ab5b1ac..de037e6 100644 --- a/tests/test_metrics/test_survivors.py +++ b/tests/test_metrics/test_survivors.py @@ -6,38 +6,17 @@ from wikimetrics.metrics import Survivors from wikimetrics.models import Cohort, MetricReport, WikiUser, CohortWikiUser from pprint import pprint +from datetime import datetime import sys class SurvivorsTest(DatabaseWithSurvivorCohortTest): - def test_convert_dates_to_timestamps(self): - m = Survivors( - namespaces=[304], - start_date=1375660800 - ) - - try: - m.convert_dates_to_timestamps() - except Exception as e: - assert_equal(1, 1, "Exception thrown") - assert_equal(str(e), "Problems with start_date") - - m = Survivors( - namespaces=[304], - end_date=1375660800 - ) - - try: - m.convert_dates_to_timestamps() - except Exception as e: - assert_equal(str(e), "Problems with end_date") - - # registration YES ; survival_days YES ; + # registration YES ; survival_hours YES ; def test_case_RS(self): m = Survivors( namespaces=[304], - survival_days=4, + survival_hours=3*24, use_registration_date=True, ) results = m(list(self.cohort), self.mwSession) @@ -47,12 +26,15 @@ assert_equal(results[self.evan_id]["survivors"], False) assert_equal(results[self.andrew_id]["survivors"], True) - # registration NO ; survival_days YES ; + print "DBG DATA" + pprint(results,sys.stderr) + + # registration NO ; survival_hours YES ; def test_case_rS(self): m = Survivors( namespaces=[304], - start_date='2013-01-03', - survival_days='2' + start_date=datetime(2013,1,3), + survival_hours=1*24 ) results = m(list(self.cohort), self.mwSession) @@ -61,12 +43,12 @@ assert_equal(results[self.evan_id]["survivors"], True) assert_equal(results[self.andrew_id]["survivors"], True) - # registration YES ; survival_days NO ; + # registration YES ; survival_hours NO ; def test_case_Rs(self): m = Survivors( namespaces=[304], use_registration_date=True, - end_date='2013-01-06', + end_date=datetime(2013,1,6), ) results = m(list(self.cohort), self.mwSession) @@ -74,12 +56,12 @@ assert_equal(results[self.evan_id]["survivors"], False) assert_equal(results[self.andrew_id]["survivors"], True) - # registration NO ; survival_days NO ; + # registration NO ; survival_hours NO ; def test_case_rs(self): m = Survivors( namespaces=[304], - start_date='2013-01-01', - end_date='2013-01-04', + start_date=datetime(2013,1,1), + end_date=datetime(2013,1,4), ) results = m(list(self.cohort), self.mwSession) diff --git a/wikimetrics/config/db_config.yaml b/wikimetrics/config/db_config.yaml index 4a22149..2be115e 100644 --- a/wikimetrics/config/db_config.yaml +++ b/wikimetrics/config/db_config.yaml @@ -1,4 +1,4 @@ -SQL_ECHO : False +SQL_ECHO : True #WIKIMETRICS_ENGINE_URL : 'sqlite:///test.db' #MEDIAWIKI_ENGINE_URL_TEMPLATE : 'sqlite:///{0}.db' # For testing with mysql locally (useful for manual connection survival tests) diff --git a/wikimetrics/metrics/form_fields.py b/wikimetrics/metrics/form_fields.py index ef4f94b..b0ab175 100644 --- a/wikimetrics/metrics/form_fields.py +++ b/wikimetrics/metrics/form_fields.py @@ -1,6 +1,7 @@ from datetime import datetime, date, time from wtforms import Field, BooleanField, DateField, DateTimeField from wtforms.widgets import TextInput +from datetime import timedelta class BetterBooleanField(BooleanField): @@ -74,6 +75,7 @@ return datetime.strptime(value, self.format) except ValueError: self.report_invalid() + def process_data(self, value): self.data = self.parse_datetime(value) diff --git a/wikimetrics/metrics/survivors.py b/wikimetrics/metrics/survivors.py index d290e81..ca6cc2d 100644 --- a/wikimetrics/metrics/survivors.py +++ b/wikimetrics/metrics/survivors.py @@ -3,6 +3,7 @@ from metric import Metric from form_fields import CommaSeparatedIntegerListField, BetterDateTimeField from wtforms.validators import Required +from sqlalchemy.sql.expression import label from wtforms import BooleanField, IntegerField from wikimetrics.models import Page, Revision, MediawikiUser import datetime @@ -33,7 +34,8 @@ start_date = BetterDateTimeField(default=thirty_days_ago) end_date = BetterDateTimeField(default=today) - survival_days = IntegerField(default=0) + survival_hours = IntegerField(default=0) + sunset = IntegerField(default=-1) use_registration_date = BooleanField(default=False) namespaces = CommaSeparatedIntegerListField( @@ -42,29 +44,6 @@ default='0', description='0, 2, 4, etc.', ) - - def convert_dates_to_timestamps(self): - - start_date = None - end_date = None - - if type(self.start_date.data) == str: - start_date = calendar.timegm( - datetime.datetime.strptime(self.start_date.data, "%Y-%m-%d").timetuple()) - elif type(self.start_date.data) == datetime.date: - start_date = calendar.timegm(self.start_date.data.timetuple()) - else: - raise Exception("Problems with start_date") - - if type(self.end_date.data) == str: - end_date = calendar.timegm( - datetime.datetime.strptime(self.end_date.data, "%Y-%m-%d").timetuple()) - elif type(self.end_date.data) == datetime.date: - end_date = calendar.timegm(self.end_date.data.timetuple()) - else: - raise Exception("Problems with end_date") - - return start_date, end_date def __call__(self, user_ids, session): """ @@ -77,75 +56,62 @@ """ use_registration_date = self.use_registration_date.data - survival_days = int(self.survival_days.data) - - start_date, end_date = self.convert_dates_to_timestamps() + survival_hours = int(self.survival_hours.data) #print "use_registration_date=", use_registration_date - #start_date = self.start_date - #end_date = self.end_date + start_date = self.start_date.data + print "Start_date=", self.start_date.data + end_date = self.end_date.data #if session.bind.name == 'mysql': - one_day_seconds = 3600 * 12 - survivors_by_namespace = None - if use_registration_date: - if survival_days > 0: - print "\n\n[DBG] Case 1\n\n" - # survival_days YES ; registration YES + partial_query = session \ + .query(Revision.rev_user) \ + .join(MediawikiUser) \ + .join(Page) \ + .filter(Page.page_namespace.in_(self.namespaces.data)) - q = session \ - .query(Revision.rev_user) \ - .join(MediawikiUser) \ - .join(Page) \ - .filter(Page.page_namespace.in_(self.namespaces.data)) \ - .filter(func.strftime("%s", Revision.rev_timestamp) - - func.strftime("%s", MediawikiUser.user_registration) >= - survival_days * one_day_seconds) \ - .group_by(Revision.rev_user) + + if use_registration_date: + if survival_hours > 0: + print "\n\n[DBG] Case 1\n\n" + # survival_hours YES ; registration YES + q = partial_query.filter( \ + (func.unix_timestamp(Revision.rev_timestamp) - \ + func.unix_timestamp(MediawikiUser.user_registration)) / 3600 >= \ + survival_hours) \ + .group_by(Revision.rev_user) survivors_by_namespace = [x[0] for x in q.all()] else: - # survival_days NO ; registration YES - #print "\n\n[DBG] Case 2\n\n" - #print "end_date=", end_date - q = session \ - .query(Revision.rev_user) \ - .join(MediawikiUser) \ - .join(Page) \ - .filter(Page.page_namespace.in_(self.namespaces.data)) \ - .filter(func.strftime("%s", Revision.rev_timestamp) - end_date >= 0) \ - .group_by(Revision.rev_user) + # survival_hours NO ; registration YES + q = partial_query.filter( \ + (func.unix_timestamp(Revision.rev_timestamp) - + func.unix_timestamp(end_date)) / 3600 >= 0 ) \ + .group_by(Revision.rev_user) survivors_by_namespace = [x[0] for x in q.all()] else: - if survival_days: + if survival_hours: print "\n\n[DBG] Case 3\n\n" - # survival_days YES ; registration NO - q = session \ - .query(Revision.rev_user) \ - .join(MediawikiUser) \ - .join(Page) \ - .filter(Page.page_namespace.in_(self.namespaces.data)) \ - .filter(func.strftime("%s", Revision.rev_timestamp) - start_date >= - (survival_days * one_day_seconds)) \ + # survival_hours YES ; registration NO + q = partial_query.filter( \ + (func.unix_timestamp(Revision.rev_timestamp) - \ + func.unix_timestamp(start_date))/3600 >= survival_hours) \ .group_by(Revision.rev_user) survivors_by_namespace = [x[0] for x in q.all()] else: print "\n\n[DBG] Case 4\n\n" - # survival_days NO ; registration NO - q = session \ - .query(Revision.rev_user, "1") \ - .join(MediawikiUser) \ - .join(Page) \ - .filter(Page.page_namespace.in_(self.namespaces.data)) \ - .filter(func.strftime("%s", Revision.rev_timestamp) - end_date >= 0) \ - .group_by(Revision.rev_user) + # survival_hours NO ; registration NO + q = partial_query.filter( \ + (func.unix_timestamp(Revision.rev_timestamp) - + func.unix_timestamp(end_date)) / 3600 >= 0) \ + .group_by(Revision.rev_user) survivors_by_namespace = [x[0] for x in q.all()] @@ -154,8 +120,9 @@ pprint(survivors_by_namespace) for user_id in user_ids: if user_id in survivors_by_namespace: - retval[user_id] = {'survivors' : True} + retval[user_id] = {'survivors' : 1} else: - retval[user_id] = {'survivors' : False} + retval[user_id] = {'survivors' : 0} - return retval + #return retval + return {"648": {'survivors': 1} } diff --git a/wikimetrics/models/mediawiki/custom_columns.py b/wikimetrics/models/mediawiki/custom_columns.py index fee34da..04ab3d5 100644 --- a/wikimetrics/models/mediawiki/custom_columns.py +++ b/wikimetrics/models/mediawiki/custom_columns.py @@ -1,6 +1,8 @@ -from sqlalchemy import TypeDecorator, Unicode +from sqlalchemy import TypeDecorator, Unicode, Interval from datetime import datetime from wikimetrics.utils import parse_date, format_date +from pprint import pprint +from datetime import timedelta __all__ = ['MediawikiTimestamp'] @@ -33,3 +35,20 @@ if not value: return None return parse_date(value) + + + def __add__(self,other): + print "ADD other = ",other + + def __sub__(self,other): + print "SUB other = ",other + + + def coerce_compared_value(self, op, value): + print "IN COERCE\n" + print "op = ",op + print "value = ",value + if isinstance(value, int): + return Interval(value) + else: + return Interval() diff --git a/wikimetrics/models/report_nodes/aggregate_report.py b/wikimetrics/models/report_nodes/aggregate_report.py index b7514cf..19d7657 100644 --- a/wikimetrics/models/report_nodes/aggregate_report.py +++ b/wikimetrics/models/report_nodes/aggregate_report.py @@ -3,6 +3,7 @@ from report import ReportNode from multi_project_metric_report import MultiProjectMetricReport from celery.utils.log import get_task_logger +from pprint import pprint __all__ = ['AggregateReport', 'Aggregation'] @@ -64,6 +65,8 @@ def finish(self, result_dicts): aggregated_results = dict() + pprint(result_dicts) + task_logger.info(str(result_dicts)) result_values = [r.values() for r in result_dicts] child_results = [result for sublist in result_values for result in sublist] -- To view, visit https://gerrit.wikimedia.org/r/84756 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: newchange Gerrit-Change-Id: I25e2d0604b2bd2e18ae9510459af0ca96c316e9b Gerrit-PatchSet: 1 Gerrit-Project: analytics/wikimetrics Gerrit-Branch: master Gerrit-Owner: Stefan.petrea <ste...@garage-coding.com> _______________________________________________ MediaWiki-commits mailing list MediaWiki-commits@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits