Milimetric has uploaded a new change for review.
https://gerrit.wikimedia.org/r/74393
Change subject: fixed csv upload user project parsing
......................................................................
fixed csv upload user project parsing
Change-Id: I4d14ea1873cea39b9776a397b3325d139d679c63
---
A tests/cohort_csvs/crazy_names.csv
A tests/cohort_csvs/for enwiki.csv
A tests/cohort_csvs/umapi_sample1.csv
A tests/cohort_csvs/unicode_cohort.csv
M wikimetrics/controllers/cohorts.py
5 files changed, 30 insertions(+), 4 deletions(-)
git pull ssh://gerrit.wikimedia.org:29418/analytics/wikimetrics
refs/changes/93/74393/1
diff --git a/tests/cohort_csvs/crazy_names.csv
b/tests/cohort_csvs/crazy_names.csv
new file mode 100644
index 0000000..bca2653
--- /dev/null
+++ b/tests/cohort_csvs/crazy_names.csv
@@ -0,0 +1,7 @@
+Chase me ladies, I'm the Cavalry,commonswiki
+Swim Team "Prishtina",commonswiki
+Dr. Azrael Tod,commonswiki
+Junghoon,park,commonswiki
+Khalsa250,000,commonswiki
+Xudognik,,999,commonswiki
+André Costa (WMSE),commonswiki
diff --git a/tests/cohort_csvs/for enwiki.csv b/tests/cohort_csvs/for enwiki.csv
new file mode 100644
index 0000000..76c614e
--- /dev/null
+++ b/tests/cohort_csvs/for enwiki.csv
@@ -0,0 +1 @@
+DISEman,enwiki
diff --git a/tests/cohort_csvs/umapi_sample1.csv
b/tests/cohort_csvs/umapi_sample1.csv
new file mode 100644
index 0000000..59c5cdc
--- /dev/null
+++ b/tests/cohort_csvs/umapi_sample1.csv
@@ -0,0 +1,3 @@
+DarTar,
+
+
diff --git a/tests/cohort_csvs/unicode_cohort.csv
b/tests/cohort_csvs/unicode_cohort.csv
new file mode 100644
index 0000000..5268781
--- /dev/null
+++ b/tests/cohort_csvs/unicode_cohort.csv
@@ -0,0 +1,4 @@
+تيسير سامى سلامة,ar
+سهام موسى عتما,ar
+نورام الشقيرى,ar
+562526,ar
diff --git a/wikimetrics/controllers/cohorts.py
b/wikimetrics/controllers/cohorts.py
index 1de4587..5542e7f 100644
--- a/wikimetrics/controllers/cohorts.py
+++ b/wikimetrics/controllers/cohorts.py
@@ -264,10 +264,21 @@
# TODO: This makes it impossible to add fields to the csv in the future,
# so maybe require the project to be the first field and the username to
be the last
# or maybe change to a tsv format
- return [{
- 'username': parse_username(",".join([str(p) for p in r[:-1]])),
- 'project': r[-1].decode('utf8') if len(r) > 1 else default_project
- } for r in records if r]
+ parsed = []
+ for r in records:
+ if r:
+ if len(r) > 1:
+ username = ",".join([str(p) for p in r[:-1]])
+ project = r[-1].decode('utf8') or default_project
+ else:
+ username = r[0]
+ project = default_project
+
+ parsed.append({
+ 'username': parse_username(username),
+ 'project': project,
+ })
+ return parsed
def parse_username(raw_name):
--
To view, visit https://gerrit.wikimedia.org/r/74393
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: newchange
Gerrit-Change-Id: I4d14ea1873cea39b9776a397b3325d139d679c63
Gerrit-PatchSet: 1
Gerrit-Project: analytics/wikimetrics
Gerrit-Branch: master
Gerrit-Owner: Milimetric <[email protected]>
_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits