Milimetric has uploaded a new change for review.
https://gerrit.wikimedia.org/r/97327
Change subject: Validate against either user_id or user_name
......................................................................
Validate against either user_id or user_name
Change-Id: I9067d57078d43f8c08adf5e3d0ec7dbefe68f788
---
M tests/fixtures.py
M tests/test_controllers/test_cohorts.py
M tests/test_models/test_validate_cohort.py
M wikimetrics/controllers/cohorts.py
M wikimetrics/controllers/forms/cohort_upload.py
M wikimetrics/models/cohort.py
M wikimetrics/models/validate_cohort.py
M wikimetrics/templates/cohorts.html
M wikimetrics/templates/csv_upload.html
M wikimetrics/templates/forms/csv_upload.html
10 files changed, 115 insertions(+), 68 deletions(-)
git pull ssh://gerrit.wikimedia.org:29418/analytics/wikimetrics
refs/changes/27/97327/1
diff --git a/tests/fixtures.py b/tests/fixtures.py
index 420edd5..2da2d11 100644
--- a/tests/fixtures.py
+++ b/tests/fixtures.py
@@ -287,6 +287,7 @@
wu.validating_cohort = self.cohort.id
wu.valid = None
self.cohort.validated = False
+ self.cohort.validate_as_user_ids = True
self.session.commit()
@nottest
@@ -538,7 +539,8 @@
name='demo-survivor-cohort',
enabled=True,
public=True,
- validated=True
+ validated=True,
+ validate_as_user_ids=True,
)
self.session.add(self.cohort)
self.session.commit()
diff --git a/tests/test_controllers/test_cohorts.py
b/tests/test_controllers/test_cohorts.py
index 85f3fd1..822bcc2 100644
--- a/tests/test_controllers/test_cohorts.py
+++ b/tests/test_controllers/test_cohorts.py
@@ -65,7 +65,7 @@
def test_detail_by_name_after_async_validate(self):
self.helper_reset_validation()
- validate_cohort = ValidateCohort(self.cohort.id)
+ validate_cohort = ValidateCohort(self.cohort)
async_result = validate_cohort.task.delay(validate_cohort)
self.cohort.validation_queue_key = async_result.task_id
async_result.get()
@@ -176,6 +176,7 @@
name=self.cohort.name,
project='enwiki',
csv='just has to be set to something for this test',
+ validate_as_user_ids='True',
))
assert_equal(response.status_code, 200)
assert_true(response.data.find('<h3>Create a Cohort by Uploading a
CSV') >= 0)
@@ -186,6 +187,7 @@
name='new_cohort_name',
project='enwiki',
csv=(StringIO('actual validation tested elsewhere'), 'cohort.csv'),
+ validate_as_user_ids='True',
))
assert_equal(response.status_code, 302)
assert_true(response.data.find('href="/cohorts/#') >= 0)
@@ -195,6 +197,7 @@
name='new_cohort_name',
project='enwiki',
csv='not a file',
+ validate_as_user_ids='True',
))
assert_true(response.data.find('Server error while processing your
upload') >= 0)
diff --git a/tests/test_models/test_validate_cohort.py
b/tests/test_models/test_validate_cohort.py
index 93055c3..9067406 100644
--- a/tests/test_models/test_validate_cohort.py
+++ b/tests/test_models/test_validate_cohort.py
@@ -24,7 +24,9 @@
def test_validate_cohorts(self):
self.helper_reset_validation()
- v = ValidateCohort(self.cohort.id)
+ self.cohort.validate_as_user_ids = False
+ self.session.commit()
+ v = ValidateCohort(self.cohort)
v.validate_records(self.session, self.cohort)
assert_equal(self.cohort.validated, True)
@@ -37,11 +39,12 @@
def test_validate_cohorts_with_invalid_wikiusers(self):
self.helper_reset_validation()
+ self.cohort.validate_as_user_ids = False
wikiusers = self.session.query(WikiUser).all()
wikiusers[0].project = 'blah'
wikiusers[1].mediawiki_username = 'blah'
self.session.commit()
- v = ValidateCohort(self.cohort.id)
+ v = ValidateCohort(self.cohort)
v.validate_records(self.session, self.cohort)
assert_equal(self.cohort.validated, True)
@@ -113,5 +116,6 @@
class BasicTests(unittest.TestCase):
def test_repr(self):
- v = ValidateCohort(1)
+ cohort = Cohort(id=1)
+ v = ValidateCohort(cohort)
assert_equal(str(v), '<ValidateCohort("1")>')
diff --git a/wikimetrics/controllers/cohorts.py
b/wikimetrics/controllers/cohorts.py
index 2c03e13..5e3f89f 100644
--- a/wikimetrics/controllers/cohorts.py
+++ b/wikimetrics/controllers/cohorts.py
@@ -213,16 +213,15 @@
try:
cohort = Cohort.get_safely(session, current_user.id, by_id=cohort_id)
name = cohort.name
+ vc = ValidateCohort(cohort)
+ vc.task.delay(vc)
+ return json_response(message='Validating cohort "{0}"'.format(name))
except Unauthorized:
return json_error('You are not allowed to access this cohort')
except NoResultFound:
return json_error('This cohort does not exist')
finally:
session.close()
-
- vc = ValidateCohort(cohort_id)
- vc.task.delay(vc)
- return json_response(message='Validating cohort "{0}"'.format(name))
@app.route('/cohorts/delete/<int:cohort_id>', methods=['POST'])
diff --git a/wikimetrics/controllers/forms/cohort_upload.py
b/wikimetrics/controllers/forms/cohort_upload.py
index 8a2e5ac..67ec460 100644
--- a/wikimetrics/controllers/forms/cohort_upload.py
+++ b/wikimetrics/controllers/forms/cohort_upload.py
@@ -1,5 +1,5 @@
import csv
-from wtforms import StringField, FileField, TextAreaField
+from wtforms import StringField, FileField, TextAreaField, RadioField
from wtforms.validators import Required
from secure_form import WikimetricsSecureForm
@@ -9,10 +9,14 @@
"""
Defines the fields necessary to upload a cohort from a csv file
"""
- name = StringField([Required()])
- description = TextAreaField()
- project = StringField('Default Project', [Required()])
- csv = FileField('CSV File', [Required()])
+ name = StringField([Required()])
+ description = TextAreaField()
+ project = StringField('Default Project', [Required()])
+ csv = FileField('CSV File', [Required()])
+ validate_as_user_ids = RadioField('Validate As', [Required()], choices=[
+ ('True', 'User Ids (Numbers found in the user_id column of the user
table)'),
+ ('False', 'User Names (Names found in the user_name column of the user
table)')
+ ])
@classmethod
def from_request(cls, request):
diff --git a/wikimetrics/models/cohort.py b/wikimetrics/models/cohort.py
index c879d85..e94e61c 100644
--- a/wikimetrics/models/cohort.py
+++ b/wikimetrics/models/cohort.py
@@ -37,6 +37,7 @@
enabled = Column(Boolean)
public = Column(Boolean, default=False)
validated = Column(Boolean, default=False)
+ validate_as_user_ids = Column(Boolean, default=True)
validation_queue_key = Column(String(50))
def __repr__(self):
diff --git a/wikimetrics/models/validate_cohort.py
b/wikimetrics/models/validate_cohort.py
index 86d90d2..b4b0220 100644
--- a/wikimetrics/models/validate_cohort.py
+++ b/wikimetrics/models/validate_cohort.py
@@ -6,6 +6,7 @@
from sqlalchemy.orm.exc import NoResultFound, MultipleResultsFound
from sqlalchemy.sql.expression import label, between, and_, or_
from wikimetrics.utils import deduplicate_by_key
+from wikimetrics.controllers.forms.cohort_upload import parse_username
from wikimetrics.models import (
MediawikiUser, Cohort, CohortUser, CohortUserRole, WikiUser, CohortWikiUser
)
@@ -31,14 +32,18 @@
"""
task = async_validate
- def __init__(self, cohort_id):
+ def __init__(self, cohort):
"""
- Initialize validation, set metadata
-
Parameters:
- cohort_id : an existing cohort with validated == False
+ cohort : an existing cohort
+
+ Instantiates with these properties:
+ cohort_id : id of an existing cohort with validated
== False
+ validate_as_user_ids : if True, records will be checked against
user_id
+ if False, records are checked against
user_name
"""
- self.cohort_id = cohort_id
+ self.cohort_id = cohort.id
+ self.validate_as_user_ids = cohort.validate_as_user_ids
@classmethod
def from_upload(cls, cohort_upload, owner_user_id):
@@ -59,6 +64,7 @@
enabled=True,
public=False,
validated=False,
+ validate_as_user_ids=cohort_upload.validate_as_user_ids.data ==
'True',
)
session = db.get_session()
try:
@@ -85,8 +91,9 @@
]
)
session.commit()
- return cls(cohort.id)
- except:
+ return cls(cohort)
+ except Exception, e:
+ app.logger.error(str(e))
return None
finally:
session.close()
@@ -152,7 +159,11 @@
# validate bunches of records to update the UI but not kill
performance
if len(wikiusers_by_project[wu.project]) > 999:
- validate_users(wikiusers_by_project[wu.project],
wu.project)
+ validate_users(
+ wikiusers_by_project[wu.project],
+ wu.project,
+ self.validate_as_user_ids
+ )
session.commit()
wikiusers_by_project[wu.project] = []
except:
@@ -161,7 +172,7 @@
# validate anything that wasn't big enough for a batch
for project, wikiusers in wikiusers_by_project.iteritems():
if len(wikiusers) > 0:
- validate_users(wikiusers, project)
+ validate_users(wikiusers, project, self.validate_as_user_ids)
session.commit()
unique_and_validated = deduplicate_by_key(
@@ -203,46 +214,50 @@
return new_proj
-def validate_users(wikiusers, project):
+def validate_users(wikiusers, project, validate_as_user_ids):
+ """
+ Parameters
+ wikiusers : the wikiusers with a candidate
mediawiki_username
+ project : the project these wikiusers should belong to
+ validate_as_user_ids : if True, records will be checked against
user_id
+ if False, records are checked against
user_name
+ """
session = db.get_mw_session(project)
users_dict = {wu.mediawiki_username: wu for wu in wikiusers}
try:
- # validate any user_name matches and get them out of the dictionary
- matches = session.query(MediawikiUser) \
- .filter(MediawikiUser.user_name.in_(users_dict.keys())) \
- .all()
- for match in matches:
- update_valid_user(users_dict, match.user_name, match)
+ # validate
+ if validate_as_user_ids:
+ keys_as_ints = [int(k) for k in users_dict.keys() if k.isdigit()]
+ clause = MediawikiUser.user_id.in_(keys_as_ints)
+ else:
+ clause = MediawikiUser.user_name.in_(users_dict.keys())
- # weed out any keys that are not integers, turn others into integers
- for key in users_dict.keys():
- wu = users_dict.pop(key)
- if key.isdigit():
- users_dict[int(key)] = wu
+ matches = session.query(MediawikiUser).filter(clause).all()
+ # update results
+ for match in matches:
+ if validate_as_user_ids:
+ key = str(match.user_id)
else:
- update_invalid_user(wu, key)
+ key = parse_username(match.user_name)
+ users_dict[key].mediawiki_username = match.user_name
+ users_dict[key].mediawiki_userid = match.user_id
+ users_dict[key].valid = True
+ users_dict[key].reason_invalid = None
+ # remove valid matches
+ users_dict.pop(key)
- matches = session.query(MediawikiUser) \
- .filter(MediawikiUser.user_id.in_(users_dict.keys())) \
- .all()
- for match in matches:
- update_valid_user(users_dict, match.user_id, match)
-
- for key, invalid in users_dict.iteritems():
- update_invalid_user(invalid, key)
+ # mark the rest invalid
+ for key in users_dict.keys():
+ if validate_as_user_ids:
+ users_dict[key].reason_invalid = 'invalid user_id:
{0}'.format(key)
+ else:
+ users_dict[key].reason_invalid = 'invalid user_name:
{0}'.format(key)
+ users_dict[key].valid = False
+ except Exception, e:
+ # clear out the dictionary in case of an exception, and raise the
exception
+ for key in users_dict.keys():
+ users_dict.pop(key)
+ raise e
finally:
session.close()
-
-
-def update_valid_user(users_dict, key, match):
- users_dict[key].mediawiki_username = match.user_name
- users_dict[key].mediawiki_userid = match.user_id
- users_dict[key].valid = True
- users_dict[key].reason_invalid = None
- users_dict.pop(key)
-
-
-def update_invalid_user(wikiuser, key):
- wikiuser.reason_invalid = 'invalid user_name/user_id: {0}'.format(key)
- wikiuser.valid = False
diff --git a/wikimetrics/templates/cohorts.html
b/wikimetrics/templates/cohorts.html
index 60216cd..be2e6f1 100644
--- a/wikimetrics/templates/cohorts.html
+++ b/wikimetrics/templates/cohorts.html
@@ -40,6 +40,8 @@
<a class="btn" data-bind="attr: {href: '#' + id},
click: $root.view"><span class="icon-refresh"></span> refresh</a>
<!-- /ko -->
</p>
+ </div>
+ <div data-bind="if: valid_count() < total_count()">
<p>
<a data-bind="click: $root.deleteCohort"
class="btn btn-danger"
@@ -55,7 +57,7 @@
<span data-bind="text: validated_count"></span> of
<span data-bind="text: total_count"></span> cohort members.
<span data-bind="text: valid_count"></span> are valid,
- <a data-bind="attr: {href: '/cohorts/detail/invalid-users/' +
id()}" target="_blank">
+ <a data-bind="attr: {href: '/cohorts/detail/invalid-users/' +
id}" target="_blank">
<span data-bind="text: invalid_count"></span> are invalid.
</a>
</p>
@@ -69,7 +71,7 @@
<td data-bind="text: mediawiki_username"></td>
</tr>
</tbody>
- <tfoot data-bind="if: validated_count() > 0 && total_count() >
3">
+ <tfoot data-bind="if: valid_count() > 3">
<tr>
<td colspan="2">
<a data-bind="click: $root.loadWikiusers"
diff --git a/wikimetrics/templates/csv_upload.html
b/wikimetrics/templates/csv_upload.html
index f458c51..fd5aea5 100644
--- a/wikimetrics/templates/csv_upload.html
+++ b/wikimetrics/templates/csv_upload.html
@@ -3,15 +3,22 @@
{% include "forms/csv_upload.html" %}
<div>
<h6>File Format</h6>
- <p>User Metrics expects a CSV file with the following format (the
username, project header row is not required)</p>
- <pre><code>username,project
-Evan (WMF),en</code></pre>
- <ul>
- <li><strong>username</strong>: straightforward utf-8 encoded name or a
user id.</li>
- <li><strong>project</strong>: the langauge or project code on which
that username exists.</li>
- <!--li><strong>reference date</strong>: a user-specific date which can
be used as a reference point to compute certain metrics over relative time
periods (i.e. the first month after signing up). Also, <code>userstats</code>
uses <code>datutil.parser</code> to interpret date strings, so you have a
little flexibility in the exact date format you use.</li>
- <li><strong>cohort id</strong>: an arbitrary string identifier which
will be used to group users with the same identifier into a cohort, on which
you can then aggregate statistics and compare.</li-->
- </ul>
+ <p>Wikimetrics expects a CSV file in one of the two formats below.
Project is the langauge or project code in which that username or userid
exists. Project is optional and will default to the cohort's default project.
+ <pre>user_name,project</pre>
+ <pre>user_id,project</pre>
+ </p>
+
+ <br/>
+ <h6>For Example</h6>
+ Specify EITHER user names:
+ <pre>Evan (WMF),en
+Milimetric,en
+Without project is ok too
+User Names can even contain commas, and projects at the end,enwiki</pre>
+ OR User Ids:
+ <pre>123,en
+234,en
+456</pre>
</div>
{% endblock %}
diff --git a/wikimetrics/templates/forms/csv_upload.html
b/wikimetrics/templates/forms/csv_upload.html
index ea75cf5..03a2878 100644
--- a/wikimetrics/templates/forms/csv_upload.html
+++ b/wikimetrics/templates/forms/csv_upload.html
@@ -49,6 +49,16 @@
{{ validation.messages(form.csv) }}
</div>
</div>
+ <div class="control-group">
+ <label
class="control-label">{{form.validate_as_user_ids.label}}</label>
+ <div class="controls">
+ {% for radio in form.validate_as_user_ids %}
+ {{ radio(class="required") }} {{ radio.label(style="display:
inline-block;") }}
+ <br/>
+ {% endfor %}
+ {{ validation.messages(form.validate_as_user_ids) }}
+ </div>
+ </div>
<div class="form-actions">
<input type="submit" class="btn btn-primary" value="Upload CSV"/>
</div>
--
To view, visit https://gerrit.wikimedia.org/r/97327
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: newchange
Gerrit-Change-Id: I9067d57078d43f8c08adf5e3d0ec7dbefe68f788
Gerrit-PatchSet: 1
Gerrit-Project: analytics/wikimetrics
Gerrit-Branch: master
Gerrit-Owner: Milimetric <[email protected]>
_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits