QChris has uploaded a new change for review.

  https://gerrit.wikimedia.org/r/172235

Change subject: Switch to CRLF as line terminator for CSVs
......................................................................

Switch to CRLF as line terminator for CSVs

CRLF ending is dictated by RFC 4180. So we use it to be nice citizens.

Change-Id: I6c5a05937a589b56624e39d78cac409e09ebaf6a
---
M aggregator/projectcounts.py
M tests/test_projectcounts.py
2 files changed, 40 insertions(+), 28 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/analytics/aggregator 
refs/changes/35/172235/1

diff --git a/aggregator/projectcounts.py b/aggregator/projectcounts.py
index badad07..ef5969c 100644
--- a/aggregator/projectcounts.py
+++ b/aggregator/projectcounts.py
@@ -30,6 +30,8 @@
 PROJECTCOUNTS_STRFTIME_PATTERN = ('%%Y%s%%Y-%%m%sprojectcounts-%%Y%%m%%d-'
                                   '%%H0000' % (os.sep, os.sep))
 
+CSV_LINE_ENDING = '\r\n'
+
 cache = {}
 
 
@@ -158,7 +160,7 @@
 
                 if date_str != 'Date':
                     # No header line
-                    csv_data[date_str] = line.strip() + '\n'
+                    csv_data[date_str] = line.strip() + CSV_LINE_ENDING
 
         for date in util.generate_dates(first_date, last_date):
             date_str = date.isoformat()
@@ -184,14 +186,16 @@
                     source_dir_abs, abbreviation, date)
 
                 # injecting obtained data
-                csv_data[date_str] = '%s,%s,%s,%s\n' % (
+                csv_data[date_str] = '%s,%s,%s,%s%s' % (
                     date_str,
                     count_desktop,
                     count_mobile,
-                    count_zero)
+                    count_zero,
+                    CSV_LINE_ENDING)
 
         with open(csv_file_abs, 'w') as csv_file:
-            csv_file.write('Date,Desktop site,Mobile site,Zero site\n')
+            csv_file.write('Date,Desktop site,Mobile site,Zero site%s' % (
+                CSV_LINE_ENDING))
             csv_file.writelines(sorted(csv_data.itervalues()))
 
 
@@ -226,7 +230,9 @@
 
             if len(lines):
                 # Analyze last line
-                last_line = (lines[-1]).split('\n', 1)[0]
+                last_line = (lines[-1]).split('\n', 1)[0]  # Since the file is
+                # opened in text mode by default, line ends are normalized to
+                # LF, event though CRLF gets written.
                 last_line_split = last_line.split(',')
                 if len(last_line_split) == 4:
                     # Check if last line is not older than yesterday
diff --git a/tests/test_projectcounts.py b/tests/test_projectcounts.py
index c48c5d6..98ce3c7 100644
--- a/tests/test_projectcounts.py
+++ b/tests/test_projectcounts.py
@@ -54,7 +54,7 @@
     def create_file(self, file_abs, lines):
         with open(file_abs, 'w') as file:
             for line in lines:
-                file.write(line + '\n')
+                file.write(line + aggregator.CSV_LINE_ENDING)
 
     def create_valid_aggregated_projects(self, tmp_dir_abs):
         today = datetime.date.today()
@@ -73,14 +73,16 @@
                 for day_offset in range(-10, 0):
                     date = (today + datetime.timedelta(days=day_offset))
                     date_str = date.isoformat()
-                    file.write('%s,123456789,12345678,1234567\n' % (date_str))
+                    file.write('%s,123456789,12345678,1234567%s' % (
+                        date_str, aggregator.CSV_LINE_ENDING))
 
     def assert_file_content_equals(self, actual_file_abs, expected_lines):
         expected_lines.insert(0, 'Date,Desktop site,Mobile site,Zero site')
         with open(actual_file_abs, 'r') as file:
             for expected_line in expected_lines:
                 try:
-                    self.assertEquals(file.next(), expected_line + '\n')
+                    self.assertEquals(file.next(), expected_line +
+                                      aggregator.CSV_LINE_ENDING)
                 except StopIteration:
                     self.fail("File '%s' is missing the line:\n%s" % (
                         actual_file_abs, expected_line))
@@ -451,11 +453,12 @@
 
         enwiki_file_abs = os.path.join(tmp_dir_abs, 'enwiki.csv')
         yesterday = aggregator.parse_string_to_date('yesterday')
-        with open(enwiki_file_abs, 'w') as file:
-            for day_offset in range(-10, 0):
-                date = (yesterday + datetime.timedelta(days=day_offset))
-                date_str = date.isoformat()
-                file.write('%s,123456789,12345678,1234567\n' % (date_str))
+        lines = []
+        for day_offset in range(-10, 0):
+            date = (yesterday + datetime.timedelta(days=day_offset))
+            date_str = date.isoformat()
+            lines.append('%s,123456789,12345678,1234567' % (date_str))
+        self.create_file(enwiki_file_abs, lines)
 
         issues = aggregator.get_validity_issues_for_aggregated_projectcounts(
             tmp_dir_abs)
@@ -470,11 +473,12 @@
 
         enwiki_file_abs = os.path.join(tmp_dir_abs, 'enwiki.csv')
         today = datetime.date.today()
-        with open(enwiki_file_abs, 'w') as file:
-            for day_offset in range(-10, 0):
-                date = (today + datetime.timedelta(days=day_offset))
-                date_str = date.isoformat()
-                file.write('%s,0,12345678,1234567\n' % (date_str))
+        lines = []
+        for day_offset in range(-10, 0):
+            date = (today + datetime.timedelta(days=day_offset))
+            date_str = date.isoformat()
+            lines.append('%s,0,12345678,1234567' % (date_str))
+        self.create_file(enwiki_file_abs, lines)
 
         issues = aggregator.get_validity_issues_for_aggregated_projectcounts(
             tmp_dir_abs)
@@ -489,11 +493,12 @@
 
         enwiki_file_abs = os.path.join(tmp_dir_abs, 'enwiki.csv')
         today = datetime.date.today()
-        with open(enwiki_file_abs, 'w') as file:
-            for day_offset in range(-10, 0):
-                date = (today + datetime.timedelta(days=day_offset))
-                date_str = date.isoformat()
-                file.write('%s,123456789,0,1234567\n' % (date_str))
+        lines = []
+        for day_offset in range(-10, 0):
+            date = (today + datetime.timedelta(days=day_offset))
+            date_str = date.isoformat()
+            lines.append('%s,123456789,0,1234567' % (date_str))
+        self.create_file(enwiki_file_abs, lines)
 
         issues = aggregator.get_validity_issues_for_aggregated_projectcounts(
             tmp_dir_abs)
@@ -508,11 +513,12 @@
 
         enwiki_file_abs = os.path.join(tmp_dir_abs, 'enwiki.csv')
         today = datetime.date.today()
-        with open(enwiki_file_abs, 'w') as file:
-            for day_offset in range(-10, 0):
-                date = (today + datetime.timedelta(days=day_offset))
-                date_str = date.isoformat()
-                file.write('%s,123456789,12345678,0\n' % (date_str))
+        lines = []
+        for day_offset in range(-10, 0):
+            date = (today + datetime.timedelta(days=day_offset))
+            date_str = date.isoformat()
+            lines.append('%s,123456789,12345678,0' % (date_str))
+        self.create_file(enwiki_file_abs, lines)
 
         issues = aggregator.get_validity_issues_for_aggregated_projectcounts(
             tmp_dir_abs)

-- 
To view, visit https://gerrit.wikimedia.org/r/172235
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: I6c5a05937a589b56624e39d78cac409e09ebaf6a
Gerrit-PatchSet: 1
Gerrit-Project: analytics/aggregator
Gerrit-Branch: master
Gerrit-Owner: QChris <[email protected]>

_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to