Milimetric has submitted this change and it was merged.

Change subject: more validation and handling of special characters
......................................................................


more validation and handling of special characters

Change-Id: Ibf2c3832a7eb42626fd01162fcf3df85a5c6cbf5
---
M user_metrics/api/templates/csv_upload.html
M user_metrics/api/templates/csv_upload_review.html
M user_metrics/api/views.py
3 files changed, 37 insertions(+), 24 deletions(-)

Approvals:
  Milimetric: Verified; Looks good to me, approved



diff --git a/user_metrics/api/templates/csv_upload.html 
b/user_metrics/api/templates/csv_upload.html
index 10c4ffe..9fc6467 100644
--- a/user_metrics/api/templates/csv_upload.html
+++ b/user_metrics/api/templates/csv_upload.html
@@ -3,14 +3,14 @@
 {% include "csv_upload_form.html" %}
 <div>
     <h6>File Format</h6>
-    <p>User Metrics expects a CSV file with the following format (the 
username, project header row is not required)</p>
+    <p>User Metrics expects a CSV file with the following format (the 
username, project header row is not required).  Do pay special attention to the 
use of underscore in user names.  Most user names do not have underscores but 
may be recorded that way in your file by people copying from a user page url.  
If those users show up as invalid, you may want to try them with spaces instead 
of underscores.</p>
     <pre><code>username,project
-Evan (WMF),en</code></pre>
+Evan (WMF),en
+Most Users Do Not Have Underscores In Their Names,en
+If_This_Fails_try_replacing_underscores_with_spaces,en</code></pre>
     <ul>
         <li><strong>username</strong>: straightforward utf-8 encoded name or a 
user id.</li>
         <li><strong>project</strong>: the langauge or project code on which 
that username exists.</li>
-        <!--li><strong>reference date</strong>: a user-specific date which can 
be used as a reference point to compute certain metrics over relative time 
periods (i.e. the first month after signing up).  Also, <code>userstats</code> 
uses <code>datutil.parser</code> to interpret date strings, so you have a 
little flexibility in the exact date format you use.</li>
-        <li><strong>cohort id</strong>: an arbitrary string identifier which 
will be used to group users with the same identifier into a cohort, on which 
you can then aggregate statistics and compare.</li-->
     </ul>
 </div>
 {% endblock %}
diff --git a/user_metrics/api/templates/csv_upload_review.html 
b/user_metrics/api/templates/csv_upload_review.html
index af87828..b744bd5 100644
--- a/user_metrics/api/templates/csv_upload_review.html
+++ b/user_metrics/api/templates/csv_upload_review.html
@@ -22,11 +22,6 @@
 
 <h5>Try uploading again after fixing the issues</h5>
 {% include "csv_upload_form.html" %}
-<!--form action="/uploads/cohort/validate" method="POST" 
class="form-horizontal">
-    <div class="form-actions">
-        <input type="submit" class="btn" value="Re-Check These Users"/>
-    </div>
-</form-->
 {% endif %}
 
 <hr/>
@@ -73,6 +68,9 @@
             event.stopPropagation();
             
             form = $(this);
+            formButton = $('.btn-primary', this);
+            originalFormButtonText = formButton.val();
+            formButton.val('Finishing Upload ...');
             $.ajax({
                 url: form.attr('action'),
                 type: 'post',
@@ -81,12 +79,18 @@
                     cohort_name: cohort_name,
                     cohort_project: cohort_project
                 }
-            }).done(redirect);
+            })
+                .done(function(response){
+                    if (response === '<<error>>'){
+                        alert('There was a problem uploading, visit 
#wikimedia-analytics on freenode and get help');
+                    } else {
+                        location.href = response;
+                    }
+                })
+                .always(function(){
+                    formButton.val(originalFormButtonText);
+                });
         });
     });
-    
-    function redirect(to){
-        location.href = to;
-    };
 </script>
 {% endblock %}
diff --git a/user_metrics/api/views.py b/user_metrics/api/views.py
index 1655da7..c071d47 100644
--- a/user_metrics/api/views.py
+++ b/user_metrics/api/views.py
@@ -202,8 +202,8 @@
             return render_template('csv_upload_review.html',
                 valid=valid,
                 invalid=invalid,
-                valid_json=json.dumps(valid),
-                invalid_json=json.dumps(invalid),
+                valid_json=to_safe_json(valid),
+                invalid_json=to_safe_json(invalid),
                 cohort_name=cohort_name,
                 cohort_project=cohort_project,
                 wiki_projects=sorted(conf.PROJECT_DB_MAP.keys())
@@ -213,13 +213,24 @@
             flash('The file you uploaded was not in a valid format, could not 
be validated, or the project you specified is not configured on this instance 
of User Metrics API.')
             return redirect('/uploads/cohort')
 
+def to_safe_json(s):
+    return json.dumps(s).replace("'", "\\'").replace('"','\\"')
+
 def validate_cohort_name_allowed():
     cohort = request.args.get('cohort_name')
     available = query_mod.is_valid_cohort_query(cohort)
     return json.dumps(available)
 
 def parse_records(records, default_project):
-    return [{'username': parse_username(r[0]), 'project': r[1] if len(r) > 1 
else default_project} for r in records if r]
+    # NOTE: the reason for the crazy -1 and comma joins
+    # is that some users can have commas in their name
+    # TODO: This makes it impossible to add fields to the csv in the future,
+    # so maybe require the project to be the first field and the username to 
be the last
+    # or maybe change to a tsv format
+    return [{
+        'username': parse_username(",".join([str(p) for p in r[:-1]])),
+        'project': r[-1].decode('utf8') if len(r) > 1 else default_project
+    } for r in records if r]
 
 def parse_username(raw_name):
     stripped = str(raw_name).decode('utf8').strip()
@@ -240,11 +251,12 @@
             return new_proj
 
 def normalize_user(user_str, project):
-    uid = query_mod.is_valid_username_query(user_str, project)
+    user_str_sql_safe = user_str.encode('utf-8')
+    uid = query_mod.is_valid_username_query(user_str_sql_safe, project)
     if uid is not None:
         return (uid, user_str)
     elif user_str.isdigit():
-        username = query_mod.is_valid_uid_query(user_str, project)
+        username = query_mod.is_valid_uid_query(user_str_sql_safe, project)
         if username is not None:
             return (user_str, username)
         else:
@@ -268,7 +280,7 @@
 
 def link_to_user_page(username, project):
     project = project_name_for_link(project)
-    return 'https://%s.wikipedia.org/wiki/User:%s' % (project.decode('utf8'), 
username.decode('utf8'))
+    return 'https://%s.wikipedia.org/wiki/User:%s' % (project, username)
 
 def validate_records(records):
     valid = []
@@ -305,8 +317,6 @@
         project = request.form.get('cohort_project')
         users_json = request.form.get('users')
         users = json.loads(users_json)
-        for user in users:
-            user['username'] = user['username'].encode('utf8')
         # re-validate
         available = query_mod.is_valid_cohort_query(cohort_name)
         if not available:
@@ -322,13 +332,12 @@
         owner_id = current_user.id
         query_mod.create_cohort(cohort_name, project, owner=owner_id)
         query_mod.add_cohort_users(cohort_name, valid)
-        flash('Upload successful, your cohort is in the list below.')
         return url_for('cohort', cohort=cohort_name)
         
     except Exception, e:
         logging.exception(str(e))
         flash('There was a problem finishing the upload.  The cohort was not 
saved.')
-        return '/uploads/cohort'
+        return '<<error>>'
 
 
 def metric(metric=''):

-- 
To view, visit https://gerrit.wikimedia.org/r/68041
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: merged
Gerrit-Change-Id: Ibf2c3832a7eb42626fd01162fcf3df85a5c6cbf5
Gerrit-PatchSet: 2
Gerrit-Project: analytics/user-metrics
Gerrit-Branch: master
Gerrit-Owner: Milimetric <[email protected]>
Gerrit-Reviewer: Milimetric <[email protected]>

_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to