Mwalker has submitted this change and it was merged. Change subject: Correctly uni-encoded CSV export ......................................................................
Correctly uni-encoded CSV export Change-Id: Ic16c70cd822fa2cbc68d0abf6ffd55940701ea64 --- A silverpop_export/unicode_csv_writer.py M silverpop_export/update.py 2 files changed, 42 insertions(+), 2 deletions(-) Approvals: Mwalker: Looks good to me, approved jenkins-bot: Verified diff --git a/silverpop_export/unicode_csv_writer.py b/silverpop_export/unicode_csv_writer.py new file mode 100644 index 0000000..8be2e9c --- /dev/null +++ b/silverpop_export/unicode_csv_writer.py @@ -0,0 +1,40 @@ +"""Unicode CSV wedge adapted from http://docs.python.org/2/library/csv.html""" + +import csv +import codecs +import cStringIO + + +class UnicodeCsvWriter: + """ + A CSV writer which will write rows to CSV file "f", + which is encoded in the given encoding. + """ + + def __init__(self, f, dialect=csv.excel, encoding="utf-8", **kwds): + # Redirect output to a queue + self.queue = cStringIO.StringIO() + self.writer = csv.writer(self.queue, dialect=dialect, **kwds) + self.stream = f + self.encoder = codecs.getincrementalencoder(encoding)() + + def writerow(self, row): + encoded_row = [] + for s in row: + if hasattr(s, 'encode'): + s = s.encode("utf-8") + encoded_row.append(s) + self.writer.writerow(encoded_row) + # Fetch UTF-8 output from the queue ... + data = self.queue.getvalue() + data = data.decode("utf-8") + # ... and reencode it into the target encoding + data = self.encoder.encode(data) + # write to the target stream + self.stream.write(data) + # empty queue + self.queue.truncate(0) + + def writerows(self, rows): + for row in rows: + self.writerow(row) diff --git a/silverpop_export/update.py b/silverpop_export/update.py index 8a9fa1b..a9c9d67 100644 --- a/silverpop_export/update.py +++ b/silverpop_export/update.py @@ -1,6 +1,5 @@ #!/usr/bin/env python -from csv import writer as CsvWriter import errno import re import time @@ -13,6 +12,7 @@ from sftp.client import Client as SftpClient from database.db import Connection as DbConnection, Query as DbQuery +import unicode_csv_writer import process.lock as lock @@ -55,7 +55,7 @@ if not hasattr(output, 'write'): output = open(output, 'wb') - w = CsvWriter(output) + w = unicode_csv_writer.UnicodeCsvWriter(output) gen = db.execute_paged(query=query, pageIndex=sort_by_index, pageSize=10000) -- To view, visit https://gerrit.wikimedia.org/r/148328 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: merged Gerrit-Change-Id: Ic16c70cd822fa2cbc68d0abf6ffd55940701ea64 Gerrit-PatchSet: 1 Gerrit-Project: wikimedia/fundraising/tools Gerrit-Branch: master Gerrit-Owner: Awight <awi...@wikimedia.org> Gerrit-Reviewer: Awight <awi...@wikimedia.org> Gerrit-Reviewer: Ejegg <eeggles...@wikimedia.org> Gerrit-Reviewer: Katie Horn <kh...@wikimedia.org> Gerrit-Reviewer: Mwalker <mwal...@wikimedia.org> Gerrit-Reviewer: Ssmith <ssm...@wikimedia.org> Gerrit-Reviewer: jenkins-bot <> _______________________________________________ MediaWiki-commits mailing list MediaWiki-commits@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits