Milimetric has uploaded a new change for review.
https://gerrit.wikimedia.org/r/80572
Change subject: up to 84 percent coverage now
......................................................................
up to 84 percent coverage now
Change-Id: I4859027f1c1b1d780f76120d997f5c2c6ab1a0de
---
M tests/test_controllers/test_cohorts.py
M wikimetrics/controllers/cohorts.py
M wikimetrics/database.py
M wikimetrics/utils.py
4 files changed, 164 insertions(+), 19 deletions(-)
git pull ssh://gerrit.wikimedia.org:29418/analytics/wikimetrics
refs/changes/72/80572/1
diff --git a/tests/test_controllers/test_cohorts.py
b/tests/test_controllers/test_cohorts.py
index 0c5e860..fb585a3 100644
--- a/tests/test_controllers/test_cohorts.py
+++ b/tests/test_controllers/test_cohorts.py
@@ -4,9 +4,16 @@
from tests.fixtures import WebTest
from wikimetrics.controllers.cohorts import (
parse_username,
+ parse_records,
normalize_user,
normalize_newlines,
+ normalize_project,
to_safe_json,
+ get_wikiuser_by_name,
+ get_wikiuser_by_id,
+ project_name_for_link,
+ link_to_user_page,
+ validate_records,
)
from wikimetrics.models import Cohort
@@ -175,3 +182,135 @@
safe_json,
'\\"{\\\\"quotes\\\\":\\\\"Hes said: \\\\"Real Artists
Ship.\\\\"\\\\"}\\"'
)
+
+ def test_parse_records_with_project(self):
+ parsed = parse_records(
+ [
+ ['dan', 'enwiki']
+ ],
+ None
+ )
+ assert_equal(len(parsed), 1)
+ assert_equal(parsed[0]['username'], 'Dan')
+ assert_equal(parsed[0]['raw_username'], 'Dan')
+ assert_equal(parsed[0]['project'], 'enwiki')
+
+ def test_parse_records_without_project(self):
+ parsed = parse_records(
+ [
+ ['dan']
+ ],
+ 'enwiki'
+ )
+ assert_equal(len(parsed), 1)
+ assert_equal(parsed[0]['username'], 'Dan')
+ assert_equal(parsed[0]['raw_username'], 'Dan')
+ assert_equal(parsed[0]['project'], 'enwiki')
+
+ def test_parse_records_with_shorthand_project(self):
+ parsed = parse_records(
+ [
+ ['dan', 'en']
+ ],
+ None
+ )
+ assert_equal(len(parsed), 1)
+ assert_equal(parsed[0]['username'], 'Dan')
+ assert_equal(parsed[0]['raw_username'], 'Dan')
+ assert_equal(parsed[0]['project'], 'en')
+
+ def test_parse_records_with_utf8(self):
+ parsed = parse_records(
+ [
+ # TODO: use weird characters U+0064 d d
+ [u'dan', 'en']
+ ],
+ None
+ )
+ assert_equal(len(parsed), 1)
+ assert_equal(parsed[0]['username'], 'Dan')
+ assert_equal(parsed[0]['raw_username'], 'Dan')
+ assert_equal(parsed[0]['project'], 'en')
+
+ def test_normalize_project_shorthand(self):
+ normal = normalize_project('en')
+ assert_equal(normal, 'enwiki')
+
+ def test_normalize_project_uppercase(self):
+ normal = normalize_project('ENWIKI')
+ assert_equal(normal, 'enwiki')
+
+ def test_normalize_project_nonexistent(self):
+ normal = normalize_project('blah')
+ assert_equal(normal, None)
+
+ def test_get_wikiuser_by_name(self):
+ user = get_wikiuser_by_name('Dan', 'enwiki')
+ assert_equal(user.user_name, 'Dan')
+
+ def test_get_wikiuser_by_name_nonexistent(self):
+ nonexistent = get_wikiuser_by_name('blahblahblah', 'enwiki')
+ assert_equal(nonexistent, None)
+
+ def test_get_wikiuser_by_id(self):
+ user = get_wikiuser_by_id(self.test_mediawiki_user_id, 'enwiki')
+ assert_equal(user.user_name, 'Dan')
+
+ def test_get_wikiuser_by_id_nonexistent(self):
+ nonexistent = get_wikiuser_by_id(123124124, 'enwiki')
+ assert_equal(nonexistent, None)
+
+ def test_normalize_user_by_name(self):
+ normalized_user = normalize_user('Dan', 'enwiki')
+ assert_equal(normalized_user[0], self.test_mediawiki_user_id)
+ assert_equal(normalized_user[1], 'Dan')
+
+ def test_normalize_user_by_name_nonexistent(self):
+ normalized_user = normalize_user('DanNotExists', 'enwiki')
+ assert_equal(normalized_user, None)
+
+ def test_normalize_user_by_id(self):
+ normalized_user = normalize_user(str(self.test_mediawiki_user_id),
'enwiki')
+ assert_equal(normalized_user[0], self.test_mediawiki_user_id)
+ assert_equal(normalized_user[1], 'Dan')
+
+ def test_normalize_user_by_id_nonexistent(self):
+ normalized_user = normalize_user('123124124', 'enwiki')
+ assert_equal(normalized_user, None)
+
+ def test_project_name_for_link(self):
+ project = project_name_for_link('en')
+ assert_equal(project, 'en')
+
+ def test_project_name_for_link_with_wiki(self):
+ project = project_name_for_link('enwiki')
+ assert_equal(project, 'en')
+
+ def test_link_to_user_page(self):
+ link = link_to_user_page('Dan', 'en')
+ assert_equal(link, 'https://en.wikipedia.org/wiki/User:Dan')
+
+ def test_validate_records(self):
+ (valid, invalid) = validate_records([
+ {
+ 'project': 'enwiki',
+ 'username': 'Dan',
+ 'raw_username': 'Dan',
+ },
+ {
+ 'project': 'blah',
+ 'username': 'Dan',
+ 'raw_username': 'Dan',
+ },
+ {
+ 'project': 'enwiki',
+ 'username': 'blah',
+ 'raw_username': 'blah',
+ },
+ ])
+
+ assert_equal(len(valid), 1)
+ assert_equal(len(invalid), 2)
+ assert_equal(valid[0]['user_id'], self.test_mediawiki_user_id)
+ assert_equal(invalid[0]['reason_invalid'], 'invalid project: blah')
+ assert_equal(invalid[1]['reason_invalid'], 'invalid user_name /
user_id: blah')
diff --git a/wikimetrics/controllers/cohorts.py
b/wikimetrics/controllers/cohorts.py
index 1943561..2dcfc4e 100644
--- a/wikimetrics/controllers/cohorts.py
+++ b/wikimetrics/controllers/cohorts.py
@@ -3,7 +3,7 @@
from flask import url_for, flash, render_template, redirect, request
from flask.ext.login import current_user
from sqlalchemy.orm.exc import NoResultFound, MultipleResultsFound
-from ..utils import json_response, json_error, json_redirect
+from ..utils import json_response, json_error, json_redirect,
deduplicate_by_key
from ..configurables import app, db
from ..models import (
Cohort, CohortUser, CohortUserRole,
@@ -332,7 +332,7 @@
.one()
db_session.close()
return wikiuser
- except:
+ except (MultipleResultsFound, NoResultFound):
db_session.close()
return None
@@ -345,7 +345,7 @@
.one()
db_session.close()
return wikiuser
- except:
+ except (MultipleResultsFound, NoResultFound):
db_session.close()
return None
@@ -365,16 +365,6 @@
return None
-def deduplicate(list_of_objects, key_function):
- uniques = dict()
- for o in list_of_objects:
- key = key_function(o)
- if not key in uniques:
- uniques[key] = o
-
- return uniques.values()
-
-
def project_name_for_link(project):
if project.endswith('wiki'):
return project[:len(project) - 4]
@@ -383,7 +373,7 @@
def link_to_user_page(username, project):
project = project_name_for_link(project)
- return 'https://%s.wikipedia.org/wiki/User:%s' % (project, username)
+ return 'https://{0}.wikipedia.org/wiki/User:{1}'.format(project, username)
def validate_records(records):
@@ -416,5 +406,5 @@
record['user_id'], record['username'] = normalized_user
valid.append(record)
- valid = deduplicate(valid, lambda record: record['username'])
+ valid = deduplicate_by_key(valid, lambda record: record['username'])
return (valid, invalid)
diff --git a/wikimetrics/database.py b/wikimetrics/database.py
index 52fc801..abfe2c2 100644
--- a/wikimetrics/database.py
+++ b/wikimetrics/database.py
@@ -5,6 +5,7 @@
It uses Flask's handy config module to configure itself.
"""
import json
+import os
from os.path import exists
from urllib2 import urlopen
#from multiprocessing import Pool
@@ -63,7 +64,7 @@
self.mediawiki_engines = {}
self.mediawiki_sessionmakers = {}
- self.project_host_map = self.get_project_host_map(usecache=False)
+ self.project_host_map = self.get_project_host_map(usecache=True)
def get_session(self):
"""
@@ -155,10 +156,15 @@
host = host_fmt.format(host_id)
for project in projects:
project_host_map[project] = host
- if usecache:
- json.dump(project_host_map, open(cache_name, 'w'))
- else:
+ if usecache and os.access(cache_name, os.W_OK):
+ try:
+ json.dump(project_host_map, open(cache_name, 'w'))
+ except:
+ pass # no rights to write the file, it's OK
+ elif os.access(cache_name, os.R_OK):
project_host_map = json.load(open(cache_name))
+ else:
+ raise Exception('Project host map could not be fetched or read')
return project_host_map
diff --git a/wikimetrics/utils.py b/wikimetrics/utils.py
index 5af9d47..08ea3dc 100644
--- a/wikimetrics/utils.py
+++ b/wikimetrics/utils.py
@@ -79,6 +79,16 @@
return [x for x in sequence if x not in seen and not seen_add(x)]
+def deduplicate_by_key(list_of_objects, key_function):
+ uniques = dict()
+ for o in list_of_objects:
+ key = key_function(o)
+ if not key in uniques:
+ uniques[key] = o
+
+ return uniques.values()
+
+
def mediawiki_date(date_field):
date = datetime.datetime.strptime(date_field.data, date_field.format)
return date.strftime('%Y%m%d%H%M%S')
--
To view, visit https://gerrit.wikimedia.org/r/80572
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: newchange
Gerrit-Change-Id: I4859027f1c1b1d780f76120d997f5c2c6ab1a0de
Gerrit-PatchSet: 1
Gerrit-Project: analytics/wikimetrics
Gerrit-Branch: master
Gerrit-Owner: Milimetric <[email protected]>
_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits