jenkins-bot has submitted this change and it was merged.
Change subject: Switch to CRLF as line terminator for CSVs
......................................................................
Switch to CRLF as line terminator for CSVs
CRLF ending is dictated by RFC 4180. So we use it to be nice citizens.
Change-Id: I6c5a05937a589b56624e39d78cac409e09ebaf6a
---
M aggregator/projectcounts.py
M tests/test_projectcounts.py
2 files changed, 40 insertions(+), 28 deletions(-)
Approvals:
Mforns: Looks good to me, approved
jenkins-bot: Verified
diff --git a/aggregator/projectcounts.py b/aggregator/projectcounts.py
index fc426f4..31dd32d 100644
--- a/aggregator/projectcounts.py
+++ b/aggregator/projectcounts.py
@@ -30,6 +30,8 @@
PROJECTCOUNTS_STRFTIME_PATTERN = ('%%Y%s%%Y-%%m%sprojectcounts-%%Y%%m%%d-'
'%%H0000' % (os.sep, os.sep))
+CSV_LINE_ENDING = '\r\n'
+
cache = {}
@@ -158,7 +160,7 @@
if date_str != 'Date':
# No header line
- csv_data[date_str] = line.strip() + '\n'
+ csv_data[date_str] = line.strip() + CSV_LINE_ENDING
for date in util.generate_dates(first_date, last_date):
date_str = date.isoformat()
@@ -184,14 +186,16 @@
source_dir_abs, abbreviation, date)
# injecting obtained data
- csv_data[date_str] = '%s,%d,%d,%d\n' % (
+ csv_data[date_str] = '%s,%d,%d,%d%s' % (
date_str,
count_desktop,
count_mobile,
- count_zero)
+ count_zero,
+ CSV_LINE_ENDING)
with open(csv_file_abs, 'w') as csv_file:
- csv_file.write('Date,Desktop site,Mobile site,Zero site\n')
+ csv_file.write('Date,Desktop site,Mobile site,Zero site%s' % (
+ CSV_LINE_ENDING))
csv_file.writelines(sorted(csv_data.itervalues()))
@@ -226,7 +230,9 @@
if len(lines):
# Analyze last line
- last_line = (lines[-1]).split('\n', 1)[0]
+ last_line = (lines[-1]).split('\n', 1)[0] # Since the file is
+ # opened in text mode by default, line ends are normalized to
+ # LF, event though CRLF gets written.
last_line_split = last_line.split(',')
if len(last_line_split) == 4:
# Check if last line is not older than yesterday
diff --git a/tests/test_projectcounts.py b/tests/test_projectcounts.py
index c48c5d6..98ce3c7 100644
--- a/tests/test_projectcounts.py
+++ b/tests/test_projectcounts.py
@@ -54,7 +54,7 @@
def create_file(self, file_abs, lines):
with open(file_abs, 'w') as file:
for line in lines:
- file.write(line + '\n')
+ file.write(line + aggregator.CSV_LINE_ENDING)
def create_valid_aggregated_projects(self, tmp_dir_abs):
today = datetime.date.today()
@@ -73,14 +73,16 @@
for day_offset in range(-10, 0):
date = (today + datetime.timedelta(days=day_offset))
date_str = date.isoformat()
- file.write('%s,123456789,12345678,1234567\n' % (date_str))
+ file.write('%s,123456789,12345678,1234567%s' % (
+ date_str, aggregator.CSV_LINE_ENDING))
def assert_file_content_equals(self, actual_file_abs, expected_lines):
expected_lines.insert(0, 'Date,Desktop site,Mobile site,Zero site')
with open(actual_file_abs, 'r') as file:
for expected_line in expected_lines:
try:
- self.assertEquals(file.next(), expected_line + '\n')
+ self.assertEquals(file.next(), expected_line +
+ aggregator.CSV_LINE_ENDING)
except StopIteration:
self.fail("File '%s' is missing the line:\n%s" % (
actual_file_abs, expected_line))
@@ -451,11 +453,12 @@
enwiki_file_abs = os.path.join(tmp_dir_abs, 'enwiki.csv')
yesterday = aggregator.parse_string_to_date('yesterday')
- with open(enwiki_file_abs, 'w') as file:
- for day_offset in range(-10, 0):
- date = (yesterday + datetime.timedelta(days=day_offset))
- date_str = date.isoformat()
- file.write('%s,123456789,12345678,1234567\n' % (date_str))
+ lines = []
+ for day_offset in range(-10, 0):
+ date = (yesterday + datetime.timedelta(days=day_offset))
+ date_str = date.isoformat()
+ lines.append('%s,123456789,12345678,1234567' % (date_str))
+ self.create_file(enwiki_file_abs, lines)
issues = aggregator.get_validity_issues_for_aggregated_projectcounts(
tmp_dir_abs)
@@ -470,11 +473,12 @@
enwiki_file_abs = os.path.join(tmp_dir_abs, 'enwiki.csv')
today = datetime.date.today()
- with open(enwiki_file_abs, 'w') as file:
- for day_offset in range(-10, 0):
- date = (today + datetime.timedelta(days=day_offset))
- date_str = date.isoformat()
- file.write('%s,0,12345678,1234567\n' % (date_str))
+ lines = []
+ for day_offset in range(-10, 0):
+ date = (today + datetime.timedelta(days=day_offset))
+ date_str = date.isoformat()
+ lines.append('%s,0,12345678,1234567' % (date_str))
+ self.create_file(enwiki_file_abs, lines)
issues = aggregator.get_validity_issues_for_aggregated_projectcounts(
tmp_dir_abs)
@@ -489,11 +493,12 @@
enwiki_file_abs = os.path.join(tmp_dir_abs, 'enwiki.csv')
today = datetime.date.today()
- with open(enwiki_file_abs, 'w') as file:
- for day_offset in range(-10, 0):
- date = (today + datetime.timedelta(days=day_offset))
- date_str = date.isoformat()
- file.write('%s,123456789,0,1234567\n' % (date_str))
+ lines = []
+ for day_offset in range(-10, 0):
+ date = (today + datetime.timedelta(days=day_offset))
+ date_str = date.isoformat()
+ lines.append('%s,123456789,0,1234567' % (date_str))
+ self.create_file(enwiki_file_abs, lines)
issues = aggregator.get_validity_issues_for_aggregated_projectcounts(
tmp_dir_abs)
@@ -508,11 +513,12 @@
enwiki_file_abs = os.path.join(tmp_dir_abs, 'enwiki.csv')
today = datetime.date.today()
- with open(enwiki_file_abs, 'w') as file:
- for day_offset in range(-10, 0):
- date = (today + datetime.timedelta(days=day_offset))
- date_str = date.isoformat()
- file.write('%s,123456789,12345678,0\n' % (date_str))
+ lines = []
+ for day_offset in range(-10, 0):
+ date = (today + datetime.timedelta(days=day_offset))
+ date_str = date.isoformat()
+ lines.append('%s,123456789,12345678,0' % (date_str))
+ self.create_file(enwiki_file_abs, lines)
issues = aggregator.get_validity_issues_for_aggregated_projectcounts(
tmp_dir_abs)
--
To view, visit https://gerrit.wikimedia.org/r/172235
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: merged
Gerrit-Change-Id: I6c5a05937a589b56624e39d78cac409e09ebaf6a
Gerrit-PatchSet: 3
Gerrit-Project: analytics/aggregator
Gerrit-Branch: master
Gerrit-Owner: QChris <[email protected]>
Gerrit-Reviewer: Mforns <[email protected]>
Gerrit-Reviewer: jenkins-bot <>
_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits