Milimetric has uploaded a new change for review.

  https://gerrit.wikimedia.org/r/80572


Change subject: up to 84 percent coverage now
......................................................................

up to 84 percent coverage now

Change-Id: I4859027f1c1b1d780f76120d997f5c2c6ab1a0de
---
M tests/test_controllers/test_cohorts.py
M wikimetrics/controllers/cohorts.py
M wikimetrics/database.py
M wikimetrics/utils.py
4 files changed, 164 insertions(+), 19 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/analytics/wikimetrics 
refs/changes/72/80572/1

diff --git a/tests/test_controllers/test_cohorts.py 
b/tests/test_controllers/test_cohorts.py
index 0c5e860..fb585a3 100644
--- a/tests/test_controllers/test_cohorts.py
+++ b/tests/test_controllers/test_cohorts.py
@@ -4,9 +4,16 @@
 from tests.fixtures import WebTest
 from wikimetrics.controllers.cohorts import (
     parse_username,
+    parse_records,
     normalize_user,
     normalize_newlines,
+    normalize_project,
     to_safe_json,
+    get_wikiuser_by_name,
+    get_wikiuser_by_id,
+    project_name_for_link,
+    link_to_user_page,
+    validate_records,
 )
 from wikimetrics.models import Cohort
 
@@ -175,3 +182,135 @@
             safe_json,
             '\\"{\\\\"quotes\\\\":\\\\"Hes said: \\\\"Real Artists 
Ship.\\\\"\\\\"}\\"'
         )
+    
+    def test_parse_records_with_project(self):
+        parsed = parse_records(
+            [
+                ['dan', 'enwiki']
+            ],
+            None
+        )
+        assert_equal(len(parsed), 1)
+        assert_equal(parsed[0]['username'], 'Dan')
+        assert_equal(parsed[0]['raw_username'], 'Dan')
+        assert_equal(parsed[0]['project'], 'enwiki')
+    
+    def test_parse_records_without_project(self):
+        parsed = parse_records(
+            [
+                ['dan']
+            ],
+            'enwiki'
+        )
+        assert_equal(len(parsed), 1)
+        assert_equal(parsed[0]['username'], 'Dan')
+        assert_equal(parsed[0]['raw_username'], 'Dan')
+        assert_equal(parsed[0]['project'], 'enwiki')
+    
+    def test_parse_records_with_shorthand_project(self):
+        parsed = parse_records(
+            [
+                ['dan', 'en']
+            ],
+            None
+        )
+        assert_equal(len(parsed), 1)
+        assert_equal(parsed[0]['username'], 'Dan')
+        assert_equal(parsed[0]['raw_username'], 'Dan')
+        assert_equal(parsed[0]['project'], 'en')
+    
+    def test_parse_records_with_utf8(self):
+        parsed = parse_records(
+            [
+                # TODO: use weird characters U+0064    d       d
+                [u'dan', 'en']
+            ],
+            None
+        )
+        assert_equal(len(parsed), 1)
+        assert_equal(parsed[0]['username'], 'Dan')
+        assert_equal(parsed[0]['raw_username'], 'Dan')
+        assert_equal(parsed[0]['project'], 'en')
+    
+    def test_normalize_project_shorthand(self):
+        normal = normalize_project('en')
+        assert_equal(normal, 'enwiki')
+    
+    def test_normalize_project_uppercase(self):
+        normal = normalize_project('ENWIKI')
+        assert_equal(normal, 'enwiki')
+    
+    def test_normalize_project_nonexistent(self):
+        normal = normalize_project('blah')
+        assert_equal(normal, None)
+    
+    def test_get_wikiuser_by_name(self):
+        user = get_wikiuser_by_name('Dan', 'enwiki')
+        assert_equal(user.user_name, 'Dan')
+    
+    def test_get_wikiuser_by_name_nonexistent(self):
+        nonexistent = get_wikiuser_by_name('blahblahblah', 'enwiki')
+        assert_equal(nonexistent, None)
+    
+    def test_get_wikiuser_by_id(self):
+        user = get_wikiuser_by_id(self.test_mediawiki_user_id, 'enwiki')
+        assert_equal(user.user_name, 'Dan')
+    
+    def test_get_wikiuser_by_id_nonexistent(self):
+        nonexistent = get_wikiuser_by_id(123124124, 'enwiki')
+        assert_equal(nonexistent, None)
+    
+    def test_normalize_user_by_name(self):
+        normalized_user = normalize_user('Dan', 'enwiki')
+        assert_equal(normalized_user[0], self.test_mediawiki_user_id)
+        assert_equal(normalized_user[1], 'Dan')
+    
+    def test_normalize_user_by_name_nonexistent(self):
+        normalized_user = normalize_user('DanNotExists', 'enwiki')
+        assert_equal(normalized_user, None)
+    
+    def test_normalize_user_by_id(self):
+        normalized_user = normalize_user(str(self.test_mediawiki_user_id), 
'enwiki')
+        assert_equal(normalized_user[0], self.test_mediawiki_user_id)
+        assert_equal(normalized_user[1], 'Dan')
+    
+    def test_normalize_user_by_id_nonexistent(self):
+        normalized_user = normalize_user('123124124', 'enwiki')
+        assert_equal(normalized_user, None)
+    
+    def test_project_name_for_link(self):
+        project = project_name_for_link('en')
+        assert_equal(project, 'en')
+    
+    def test_project_name_for_link_with_wiki(self):
+        project = project_name_for_link('enwiki')
+        assert_equal(project, 'en')
+    
+    def test_link_to_user_page(self):
+        link = link_to_user_page('Dan', 'en')
+        assert_equal(link, 'https://en.wikipedia.org/wiki/User:Dan')
+    
+    def test_validate_records(self):
+        (valid, invalid) = validate_records([
+            {
+                'project': 'enwiki',
+                'username': 'Dan',
+                'raw_username': 'Dan',
+            },
+            {
+                'project': 'blah',
+                'username': 'Dan',
+                'raw_username': 'Dan',
+            },
+            {
+                'project': 'enwiki',
+                'username': 'blah',
+                'raw_username': 'blah',
+            },
+        ])
+        
+        assert_equal(len(valid), 1)
+        assert_equal(len(invalid), 2)
+        assert_equal(valid[0]['user_id'], self.test_mediawiki_user_id)
+        assert_equal(invalid[0]['reason_invalid'], 'invalid project: blah')
+        assert_equal(invalid[1]['reason_invalid'], 'invalid user_name / 
user_id: blah')
diff --git a/wikimetrics/controllers/cohorts.py 
b/wikimetrics/controllers/cohorts.py
index 1943561..2dcfc4e 100644
--- a/wikimetrics/controllers/cohorts.py
+++ b/wikimetrics/controllers/cohorts.py
@@ -3,7 +3,7 @@
 from flask import url_for, flash, render_template, redirect, request
 from flask.ext.login import current_user
 from sqlalchemy.orm.exc import NoResultFound, MultipleResultsFound
-from ..utils import json_response, json_error, json_redirect
+from ..utils import json_response, json_error, json_redirect, 
deduplicate_by_key
 from ..configurables import app, db
 from ..models import (
     Cohort, CohortUser, CohortUserRole,
@@ -332,7 +332,7 @@
             .one()
         db_session.close()
         return wikiuser
-    except:
+    except (MultipleResultsFound, NoResultFound):
         db_session.close()
         return None
 
@@ -345,7 +345,7 @@
             .one()
         db_session.close()
         return wikiuser
-    except:
+    except (MultipleResultsFound, NoResultFound):
         db_session.close()
         return None
 
@@ -365,16 +365,6 @@
     return None
 
 
-def deduplicate(list_of_objects, key_function):
-    uniques = dict()
-    for o in list_of_objects:
-        key = key_function(o)
-        if not key in uniques:
-            uniques[key] = o
-    
-    return uniques.values()
-
-
 def project_name_for_link(project):
     if project.endswith('wiki'):
         return project[:len(project) - 4]
@@ -383,7 +373,7 @@
 
 def link_to_user_page(username, project):
     project = project_name_for_link(project)
-    return 'https://%s.wikipedia.org/wiki/User:%s' % (project, username)
+    return 'https://{0}.wikipedia.org/wiki/User:{1}'.format(project, username)
 
 
 def validate_records(records):
@@ -416,5 +406,5 @@
         record['user_id'], record['username'] = normalized_user
         valid.append(record)
     
-    valid = deduplicate(valid, lambda record: record['username'])
+    valid = deduplicate_by_key(valid, lambda record: record['username'])
     return (valid, invalid)
diff --git a/wikimetrics/database.py b/wikimetrics/database.py
index 52fc801..abfe2c2 100644
--- a/wikimetrics/database.py
+++ b/wikimetrics/database.py
@@ -5,6 +5,7 @@
 It uses Flask's handy config module to configure itself.
 """
 import json
+import os
 from os.path import exists
 from urllib2 import urlopen
 #from multiprocessing import Pool
@@ -63,7 +64,7 @@
         
         self.mediawiki_engines = {}
         self.mediawiki_sessionmakers = {}
-        self.project_host_map = self.get_project_host_map(usecache=False)
+        self.project_host_map = self.get_project_host_map(usecache=True)
     
     def get_session(self):
         """
@@ -155,10 +156,15 @@
                 host = host_fmt.format(host_id)
                 for project in projects:
                     project_host_map[project] = host
-            if usecache:
-                json.dump(project_host_map, open(cache_name, 'w'))
-        else:
+            if usecache and os.access(cache_name, os.W_OK):
+                try:
+                    json.dump(project_host_map, open(cache_name, 'w'))
+                except:
+                    pass  # no rights to write the file, it's OK
+        elif os.access(cache_name, os.R_OK):
             project_host_map = json.load(open(cache_name))
+        else:
+            raise Exception('Project host map could not be fetched or read')
         
         return project_host_map
 
diff --git a/wikimetrics/utils.py b/wikimetrics/utils.py
index 5af9d47..08ea3dc 100644
--- a/wikimetrics/utils.py
+++ b/wikimetrics/utils.py
@@ -79,6 +79,16 @@
     return [x for x in sequence if x not in seen and not seen_add(x)]
 
 
+def deduplicate_by_key(list_of_objects, key_function):
+    uniques = dict()
+    for o in list_of_objects:
+        key = key_function(o)
+        if not key in uniques:
+            uniques[key] = o
+    
+    return uniques.values()
+
+
 def mediawiki_date(date_field):
     date = datetime.datetime.strptime(date_field.data, date_field.format)
     return date.strftime('%Y%m%d%H%M%S')

-- 
To view, visit https://gerrit.wikimedia.org/r/80572
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: I4859027f1c1b1d780f76120d997f5c2c6ab1a0de
Gerrit-PatchSet: 1
Gerrit-Project: analytics/wikimetrics
Gerrit-Branch: master
Gerrit-Owner: Milimetric <[email protected]>

_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to