jenkins-bot has submitted this change and it was merged.

Change subject: Recompute the "total sum" column upon rescaling
......................................................................


Recompute the "total sum" column upon rescaling

We rescaled the "total sum" column just like every other column.
Thereby, the "total sum" column need no longer be the total sum of the
other columns in that row (due to rounding errors, and missing values
in the rows to rescale).
Hence, we now recompute the "total sum" column upon rescaling and can
thereby guarantee that the "total sum" column of a rescaled row (i.e.:
for weekly, monthly, yearly) really is the total sum of the other
columns.

Change-Id: I5adf5fa5607730b08b7d45bdd21410b2f6c7a09e
---
M aggregator/projectcounts.py
M tests/test_projectcounts/test_helpers.py
M tests/test_projectcounts/test_monthly_aggregation.py
M tests/test_projectcounts/test_weekly_aggregation.py
M tests/test_projectcounts/test_yearly_aggregation.py
5 files changed, 196 insertions(+), 156 deletions(-)

Approvals:
  Nuria: Looks good to me, approved
  jenkins-bot: Verified



diff --git a/aggregator/projectcounts.py b/aggregator/projectcounts.py
index 565ddca..9e95e7f 100644
--- a/aggregator/projectcounts.py
+++ b/aggregator/projectcounts.py
@@ -195,6 +195,13 @@
     Each column is rescaled separatedly.
     Missing columns for good dates are not assumed to be 0.
 
+    The first column is ignored, and assumed to hold the date for the reading.
+
+    The second column is assumed to hold the sum of the remaining
+    columns. This column is not rescaled, but the recomputed by
+    summing the other rescaled columns. Thereby, we can guarantee that
+    the "total sum" always is the sum of the other columns.
+
     Upon other errors, a RuntimeError is raised.
 
     The rescaled counts are returned as list of integers.
@@ -215,7 +222,20 @@
             csv_line_items = csv_data[date_str].split(',')
         except KeyError:
             raise RuntimeError("No data for '%s'" % (date_str))
-        del csv_line_items[0]  # getting rid of date column
+
+        # Getting rid if date column. No need to aggregate date columns.
+        del csv_line_items[0]
+
+        # Getting rid of the "total sum" column.
+        # We always want the "total sum" column to be the sum of the
+        # other columns in the row. Hence, we cannot simply rescale
+        # the "total sum" column from the other rows, as that would on
+        # the one hand give rounding artifacts, and on the other hand
+        # would not work if some row is missing values for some
+        # columns. Therefore, we don't rescale the "total sum" column,
+        # but recompute it after the other columns' rescaled value is
+        # known.
+        del csv_line_items[0]
 
         if ret is None:
             ret = []
@@ -243,6 +263,9 @@
         ret = [(ret[i] * rescale_to) / aggregations[i] if aggregations[i]
                else None
                for i in range(columns)]
+
+        # Then recompute the "total sum" column and prepend it.
+        ret.insert(0, sum([0 if i is None else i for i in ret]))
     return ret
 
 
diff --git a/tests/test_projectcounts/test_helpers.py 
b/tests/test_projectcounts/test_helpers.py
index acc7c1f..1b6d4df 100644
--- a/tests/test_projectcounts/test_helpers.py
+++ b/tests/test_projectcounts/test_helpers.py
@@ -350,7 +350,7 @@
         dates = [datetime.date(2014, 8, 3)]
 
         csv_data = {
-            '2014-08-03': '2014-08-03,1,2,3'
+            '2014-08-03': '2014-08-03,3,2,1'
         }
 
         bad_dates = []
@@ -361,7 +361,7 @@
             bad_dates,
             1)
 
-        self.assertEquals(actual, [1, 2, 3])
+        self.assertEquals(actual, [3, 2, 1])
 
     def test_rescale_counts_more_days(self):
         dates = [
@@ -372,9 +372,9 @@
 
         csv_data = {
             '2014-08-03': '2014-08-03,1,2,3',
-            '2014-08-04': '2014-08-04,1000,100,10',
-            '2014-08-05': '2014-08-05,2000,200,20',
-            '2014-08-06': '2014-08-06,3000,300,30',
+            '2014-08-04': '2014-08-04,1110,1000,100,10',
+            '2014-08-05': '2014-08-05,2220,2000,200,20',
+            '2014-08-06': '2014-08-06,3330,3000,300,30',
             '2014-08-07': '2014-08-07,1,2,3',
         }
 
@@ -386,7 +386,7 @@
             bad_dates,
             3)
 
-        self.assertEquals(actual, [6000, 600, 60])
+        self.assertEquals(actual, [6660, 6000, 600, 60])
 
     def test_rescale_counts_more_days_downscale_int(self):
         dates = [
@@ -397,9 +397,9 @@
 
         csv_data = {
             '2014-08-03': '2014-08-03,1,2,3',
-            '2014-08-04': '2014-08-04,1000,100,10',
-            '2014-08-05': '2014-08-05,2000,200,20',
-            '2014-08-06': '2014-08-06,3002,301,31',
+            '2014-08-04': '2014-08-04,1110,1000,100,10',
+            '2014-08-05': '2014-08-05,2220,2000,200,20',
+            '2014-08-06': '2014-08-06,3334,3002,301,31',
             '2014-08-07': '2014-08-07,1,2,3',
         }
 
@@ -411,7 +411,7 @@
             bad_dates,
             1)
 
-        self.assertEquals(actual, [2000, 200, 20])
+        self.assertEquals(actual, [2220, 2000, 200, 20])
 
     def test_rescale_counts_more_days_upscale(self):
         dates = [
@@ -422,9 +422,9 @@
 
         csv_data = {
             '2014-08-03': '2014-08-03,1,2,3',
-            '2014-08-04': '2014-08-04,1000,100,10',
-            '2014-08-05': '2014-08-05,2000,200,20',
-            '2014-08-06': '2014-08-06,3000,300,30',
+            '2014-08-04': '2014-08-04,1110,1000,100,10',
+            '2014-08-05': '2014-08-05,2220,2000,200,20',
+            '2014-08-06': '2014-08-06,3330,3000,300,30',
             '2014-08-07': '2014-08-07,1,2,3',
         }
 
@@ -436,7 +436,7 @@
             bad_dates,
             5)
 
-        self.assertEquals(actual, [10000, 1000, 100])
+        self.assertEquals(actual, [11100, 10000, 1000, 100])
 
     def test_rescale_counts_more_days_bad_dates_middle(self):
         dates = [
@@ -447,9 +447,9 @@
 
         csv_data = {
             '2014-08-03': '2014-08-03,1,2,3',
-            '2014-08-04': '2014-08-04,1000,100,10',
-            '2014-08-05': '2014-08-05,2000,200,20',
-            '2014-08-06': '2014-08-06,3000,300,30',
+            '2014-08-04': '2014-08-04,1110,1000,100,10',
+            '2014-08-05': '2014-08-05,2220,2000,200,20',
+            '2014-08-06': '2014-08-06,3330,3000,300,30',
             '2014-08-07': '2014-08-07,1,2,3',
         }
 
@@ -463,7 +463,7 @@
             bad_dates,
             5)
 
-        self.assertEquals(actual, [10000, 1000, 100])
+        self.assertEquals(actual, [11100, 10000, 1000, 100])
 
     def test_rescale_counts_more_days_bad_dates_borders(self):
         dates = [
@@ -474,9 +474,9 @@
 
         csv_data = {
             '2014-08-03': '2014-08-03,1,2,3',
-            '2014-08-04': '2014-08-04,1000,100,10',
-            '2014-08-05': '2014-08-05,2000,200,20',
-            '2014-08-06': '2014-08-06,3000,300,30',
+            '2014-08-04': '2014-08-04,1110,1000,100,10',
+            '2014-08-05': '2014-08-05,2220,2000,200,20',
+            '2014-08-06': '2014-08-06,3330,3000,300,30',
             '2014-08-07': '2014-08-07,1,2,3',
         }
 
@@ -491,7 +491,7 @@
             bad_dates,
             5)
 
-        self.assertEquals(actual, [10000, 1000, 100])
+        self.assertEquals(actual, [11100, 10000, 1000, 100])
 
     def test_rescale_counts_more_days_bad_dates_skew(self):
         dates = [
@@ -502,9 +502,9 @@
 
         csv_data = {
             '2014-08-03': '2014-08-03,1,2,3',
-            '2014-08-04': '2014-08-04,1000,100,10',
-            '2014-08-05': '2014-08-05,2000,200,20',
-            '2014-08-06': '2014-08-06,3000,300,30',
+            '2014-08-04': '2014-08-04,1110,1000,100,10',
+            '2014-08-05': '2014-08-05,2220,2000,200,20',
+            '2014-08-06': '2014-08-06,3330,3000,300,30',
             '2014-08-07': '2014-08-07,1,2,3',
         }
 
@@ -519,7 +519,7 @@
             bad_dates,
             5)
 
-        self.assertEquals(actual, [5000, 500, 50])
+        self.assertEquals(actual, [5550, 5000, 500, 50])
 
     def test_rescale_counts_only_bad_dates(self):
         dates = [
@@ -586,10 +586,10 @@
 
         csv_data = {
             '2014-08-03': '2014-08-03,100',
-            '2014-08-04': '2014-08-04,0,3,5,0,7,10,',
-            '2014-08-05': '2014-08-05,1,,  ,0,8,  ,',
-            '2014-08-06': '2014-08-06,2,4,6,0,9,0 ,',
-            '2014-08-07': '2014-08-07,11,12,13,14,15,16,17',
+            '2014-08-04': '2014-08-04,18,0,3,5,0,7,10,',
+            '2014-08-05': '2014-08-05,9,1,,  ,0,8,  ,',
+            '2014-08-06': '2014-08-06,21,2,4,6,0,9,0 ,',
+            '2014-08-07': '2014-08-07,98,11,12,13,14,15,16,17',
         }
 
         bad_dates = []
@@ -600,7 +600,7 @@
             bad_dates,
             3)
 
-        self.assertEquals(actual, [3, 10, 16, 0, 24, 15, None])
+        self.assertEquals(actual, [68, 3, 10, 16, 0, 24, 15, None])
 
     def test_rescale_counts_zero_and_empty_columns_upscale(self):
         dates = [
@@ -611,10 +611,10 @@
 
         csv_data = {
             '2014-08-03': '2014-08-03,100',
-            '2014-08-04': '2014-08-04,0,3,5,0, , ,10,',
-            '2014-08-05': '2014-08-05,1,,  ,0,0,8,  ,',
-            '2014-08-06': '2014-08-06,2,4,6,0, ,9,0 ,',
-            '2014-08-07': '2014-08-07,11,12,13,14,15,16,17',
+            '2014-08-04': '2014-08-04,18,0,3,5,0, , ,10,',
+            '2014-08-05': '2014-08-05,9,1,,  ,0,0,8,  ,',
+            '2014-08-06': '2014-08-06,21,2,4,6,0, ,9,0 ,',
+            '2014-08-07': '2014-08-07,98,11,12,13,14,15,16,17',
         }
 
         bad_dates = []
@@ -625,7 +625,7 @@
             bad_dates,
             4)
 
-        self.assertEquals(actual, [4, 14, 22, 0, 0, 34, 20, None])
+        self.assertEquals(actual, [94, 4, 14, 22, 0, 0, 34, 20, None])
 
     def test_rescale_counts_shorter_second_column(self):
         dates = [
@@ -636,9 +636,9 @@
 
         csv_data = {
             '2014-08-03': '2014-08-03,100,200',
-            '2014-08-04': '2014-08-04,1,2',
-            '2014-08-05': '2014-08-05,3',
-            '2014-08-06': '2014-08-06,4,,',
+            '2014-08-04': '2014-08-04,3,1,2',
+            '2014-08-05': '2014-08-05,3,3',
+            '2014-08-06': '2014-08-06,4,4,,',
             '2014-08-07': '2014-08-07,300,400',
         }
 
@@ -650,4 +650,21 @@
             bad_dates,
             4)
 
-        self.assertEquals(actual, [10, 8, None])
+        self.assertEquals(actual, [18, 10, 8, None])
+
+    def test_rescale_override_total_column(self):
+        dates = [datetime.date(2014, 8, 3)]
+
+        csv_data = {
+            '2014-08-03': '2014-08-03,1,2,3'
+        }
+
+        bad_dates = []
+
+        actual = aggregator.rescale_counts(
+            csv_data,
+            dates,
+            bad_dates,
+            1)
+
+        self.assertEquals(actual, [5, 2, 3])
diff --git a/tests/test_projectcounts/test_monthly_aggregation.py 
b/tests/test_projectcounts/test_monthly_aggregation.py
index 42dce2b..346e5f5 100644
--- a/tests/test_projectcounts/test_monthly_aggregation.py
+++ b/tests/test_projectcounts/test_monthly_aggregation.py
@@ -40,14 +40,14 @@
             '2014-08-01': '2014-08-01,5,6,7,8',
             }
         for day in range(1, 32):
-            csv_data['2014-07-%02d' % day] = ('2014-07-%02d,%d0000,%d00,%d,1'
-                                              % (day, day, day, day))
+            csv_data['2014-07-%02d' % day] = ('2014-07-%02d,%d,%d00,%d,1'
+                                              % (day, day * 101 + 1, day, day))
 
         aggregator.update_monthly_csv(self.data_dir_abs, 'enwiki', csv_data,
                                       first_date, last_date)
 
         self.assert_file_content_equals(enwiki_file_abs, [
-            '2014-07,4800000,48000,480,30',
+            '2014-07,48510,48000,480,30',
             ])
 
     def test_monthly_csv_non_existing_csv_30_day_month(self):
@@ -61,14 +61,14 @@
             '2014-07-01': '2014-07-01,5,6,7,8',
             }
         for day in range(1, 31):
-            csv_data['2014-06-%02d' % day] = ('2014-06-%02d,%d0000,%d00,%d,1'
-                                              % (day, day, day, day))
+            csv_data['2014-06-%02d' % day] = ('2014-06-%02d,%d,%d00,%d,1'
+                                              % (day, day * 101 + 1, day, day))
 
         aggregator.update_monthly_csv(self.data_dir_abs, 'enwiki', csv_data,
                                       first_date, last_date)
 
         self.assert_file_content_equals(enwiki_file_abs, [
-            '2014-06,4650000,46500,465,30',
+            '2014-06,46995,46500,465,30',
             ])
 
     def test_monthly_csv_non_existing_csv_29_day_month(self):
@@ -82,14 +82,14 @@
             '2012-03-01': '2012-03-01,5,6,7,8',
             }
         for day in range(1, 30):
-            csv_data['2012-02-%02d' % day] = ('2012-02-%02d,%d0000,%d00,%d,1'
-                                              % (day, day, day, day))
+            csv_data['2012-02-%02d' % day] = ('2012-02-%02d,%d,%d00,%d,1'
+                                              % (day, day * 101 + 1, day, day))
 
         aggregator.update_monthly_csv(self.data_dir_abs, 'enwiki', csv_data,
                                       first_date, last_date)
 
         self.assert_file_content_equals(enwiki_file_abs, [
-            '2012-02,4500000,45000,450,30',
+            '2012-02,45480,45000,450,30',
             ])
 
     def test_monthly_csv_non_existing_csv_28_day_month(self):
@@ -103,14 +103,14 @@
             '2014-03-01': '2014-03-01,5,6,7,8',
             }
         for day in range(1, 30):
-            csv_data['2014-02-%02d' % day] = ('2014-02-%02d,%d0000,%d00,%d,1'
-                                              % (day, day, day, day))
+            csv_data['2014-02-%02d' % day] = ('2014-02-%02d,%d,%d00,%d,1'
+                                              % (day, day * 101 + 1, day, day))
 
         aggregator.update_monthly_csv(self.data_dir_abs, 'enwiki', csv_data,
                                       first_date, last_date)
 
         self.assert_file_content_equals(enwiki_file_abs, [
-            '2014-02,4350000,43500,435,30',
+            '2014-02,43965,43500,435,30',
             ])
 
     def test_monthly_csv_existing_csv_existing_month(self):
@@ -129,8 +129,8 @@
             '2014-08-01': '2014-08-01,5,6,7,8',
             }
         for day in range(1, 32):
-            csv_data['2014-07-%02d' % day] = ('2014-07-%02d,%d0000,%d00,%d,1'
-                                              % (day, day, day, day))
+            csv_data['2014-07-%02d' % day] = ('2014-07-%02d,%d,%d00,%d,1'
+                                              % (day, day * 101 + 1, day, day))
 
         aggregator.update_monthly_csv(self.data_dir_abs, 'enwiki', csv_data,
                                       first_date, last_date)
@@ -157,8 +157,8 @@
             '2014-08-01': '2014-08-01,5,6,7,8',
             }
         for day in range(1, 32):
-            csv_data['2014-07-%02d' % day] = ('2014-07-%02d,%d0000,%d00,%d,1'
-                                              % (day, day, day, day))
+            csv_data['2014-07-%02d' % day] = ('2014-07-%02d,%d,%d00,%d,1'
+                                              % (day, day * 101 + 1, day, day))
 
         aggregator.update_monthly_csv(self.data_dir_abs, 'enwiki', csv_data,
                                       first_date, last_date,
@@ -166,7 +166,7 @@
 
         self.assert_file_content_equals(enwiki_file_abs, [
             '2014-06,1,2,3,4',
-            '2014-07,4800000,48000,480,30',
+            '2014-07,48510,48000,480,30',
             '2014-08,8,9,10,11',
             ])
 
@@ -186,8 +186,8 @@
             '2014-08-01': '2014-08-01,5,6,7,8',
             }
         for day in range(1, 32):
-            csv_data['2014-07-%02d' % day] = ('2014-07-%02d,%d0000,%d00,%d,1'
-                                              % (day, day, day, day))
+            csv_data['2014-07-%02d' % day] = ('2014-07-%02d,%d,%d00,%d,1'
+                                              % (day, day * 101 + 1, day, day))
 
         bad_dates = [
             datetime.date(2014, 7, 3),
@@ -199,7 +199,7 @@
 
         self.assert_file_content_equals(enwiki_file_abs, [
             '2014-06,1,2,3,4',
-            '2014-07,5058620,50586,505,30',
+            '2014-07,51121,50586,505,30',
             '2014-08,8,9,10,11',
             ])
 
@@ -219,8 +219,8 @@
             '2014-07-31': '2014-08-01,5,6,7,8',
             }
         for day in range(1, 31):
-            csv_data['2014-07-%02d' % day] = ('2014-07-%02d,%d0000,%d00,%d,1'
-                                              % (day, day, day, day))
+            csv_data['2014-07-%02d' % day] = ('2014-07-%02d,%d,%d00,%d,1'
+                                              % (day, day * 101 + 1, day, day))
 
         bad_dates = [
             datetime.date(2014, 7, 3),
@@ -233,7 +233,7 @@
 
         self.assert_file_content_equals(enwiki_file_abs, [
             '2014-06,1,2,3,4',
-            '2014-07,4907142,49071,490,30',
+            '2014-07,49591,49071,490,30',
             '2014-08,8,9,10,11',
             ])
 
@@ -252,8 +252,8 @@
             '2014-08-01': '2014-08-01,5,6,7,8',
             }
         for day in range(1, 32):
-            csv_data['2014-07-%02d' % day] = ('2014-07-%02d,%d0000,%d00,%d,1'
-                                              % (day, day, day, day))
+            csv_data['2014-07-%02d' % day] = ('2014-07-%02d,%d,%d00,%d,1'
+                                              % (day, day * 101 + 1, day, day))
 
         bad_dates = [datetime.date(2014, 7, day) for day in range(1, 32)]
 
@@ -281,8 +281,8 @@
             '2014-08-01': '2014-08-01,5,6,7,8',
             }
         for day in range(1, 32):
-            csv_data['2014-07-%02d' % day] = ('2014-07-%02d,%d0000,%d00,%d,1'
-                                              % (day, day, day, day))
+            csv_data['2014-07-%02d' % day] = ('2014-07-%02d,%d,%d00,%d,1'
+                                              % (day, day * 101 + 1, day, day))
 
         bad_dates = [datetime.date(2014, 7, day) for day in range(1, 32)]
 
@@ -305,15 +305,15 @@
             '2014-08-01': '2014-08-01,5,6,7,8',
             }
         for day in range(1, 32):
-            csv_data['2014-07-%02d' % day] = ('2014-07-%02d,%d0000,%d00,%d,1'
-                                              % (day, day, day, day))
+            csv_data['2014-07-%02d' % day] = ('2014-07-%02d,%d,%d00,%d,1'
+                                              % (day, day * 101 + 1, day, day))
 
-        csv_data['2014-07-10'] = '2014-07-10,100000,0,10,1'
-        csv_data['2014-07-20'] = '2014-07-20,200000,2000,,1'
+        csv_data['2014-07-10'] = '2014-07-10,11,0,10,1'
+        csv_data['2014-07-20'] = '2014-07-20,2001,2000,,1'
 
         aggregator.update_monthly_csv(self.data_dir_abs, 'enwiki', csv_data,
                                       first_date, last_date)
 
         self.assert_file_content_equals(enwiki_file_abs, [
-            '2014-07,4800000,47032,476,30',
+            '2014-07,47538,47032,476,30',
             ])
diff --git a/tests/test_projectcounts/test_weekly_aggregation.py 
b/tests/test_projectcounts/test_weekly_aggregation.py
index 693dbf2..b15a0e4 100644
--- a/tests/test_projectcounts/test_weekly_aggregation.py
+++ b/tests/test_projectcounts/test_weekly_aggregation.py
@@ -37,13 +37,13 @@
 
         csv_data = {
             '2014-06-29': '2014-06-29,1,2,3,4',
-            '2014-06-30': '2014-06-30,1000000,1000,1,1',
-            '2014-07-01': '2014-07-01,2000000,2000,2,1',
-            '2014-07-02': '2014-07-02,3000000,3000,3,1',
-            '2014-07-03': '2014-07-03,4000000,4000,4,1',
-            '2014-07-04': '2014-07-04,5000000,5000,5,1',
-            '2014-07-05': '2014-07-05,6000000,6000,6,1',
-            '2014-07-06': '2014-07-06,7000000,7000,7,1',
+            '2014-06-30': '2014-06-30,1002,1000,1,1',
+            '2014-07-01': '2014-07-01,2003,2000,2,1',
+            '2014-07-02': '2014-07-02,3004,3000,3,1',
+            '2014-07-03': '2014-07-03,4005,4000,4,1',
+            '2014-07-04': '2014-07-04,5006,5000,5,1',
+            '2014-07-05': '2014-07-05,6007,6000,6,1',
+            '2014-07-06': '2014-07-06,7008,7000,7,1',
             '2014-07-07': '2014-07-07,5,6,7,8',
             }
 
@@ -51,7 +51,7 @@
                                      first_date, last_date)
 
         self.assert_file_content_equals(enwiki_file_abs, [
-            '2014W27,28000000,28000,28,7',
+            '2014W27,28035,28000,28,7',
             ])
 
     def test_weekly_csv_existing_csv_existing_week(self):
@@ -67,13 +67,13 @@
 
         csv_data = {
             '2014-06-29': '2014-06-29,1,2,3,4',
-            '2014-06-30': '2014-06-30,1000000,1000,1,1',
-            '2014-07-01': '2014-07-01,2000000,2000,2,1',
-            '2014-07-02': '2014-07-02,3000000,3000,3,1',
-            '2014-07-03': '2014-07-03,4000000,4000,4,1',
-            '2014-07-04': '2014-07-04,5000000,5000,5,1',
-            '2014-07-05': '2014-07-05,6000000,6000,6,1',
-            '2014-07-06': '2014-07-06,7000000,7000,7,1',
+            '2014-06-30': '2014-06-30,1002,1000,1,1',
+            '2014-07-01': '2014-07-01,2003,2000,2,1',
+            '2014-07-02': '2014-07-02,3004,3000,3,1',
+            '2014-07-03': '2014-07-03,4005,4000,4,1',
+            '2014-07-04': '2014-07-04,5006,5000,5,1',
+            '2014-07-05': '2014-07-05,6007,6000,6,1',
+            '2014-07-06': '2014-07-06,7008,7000,7,1',
             '2014-07-07': '2014-07-07,5,6,7,8',
             }
 
@@ -99,13 +99,13 @@
 
         csv_data = {
             '2014-06-29': '2014-06-29,1,2,3,4',
-            '2014-06-30': '2014-06-30,1000000,1000,1,1',
-            '2014-07-01': '2014-07-01,2000000,2000,2,1',
-            '2014-07-02': '2014-07-02,3000000,3000,3,1',
-            '2014-07-03': '2014-07-03,4000000,4000,4,1',
-            '2014-07-04': '2014-07-04,5000000,5000,5,1',
-            '2014-07-05': '2014-07-05,6000000,6000,6,1',
-            '2014-07-06': '2014-07-06,7000000,7000,7,1',
+            '2014-06-30': '2014-06-30,1002,1000,1,1',
+            '2014-07-01': '2014-07-01,2003,2000,2,1',
+            '2014-07-02': '2014-07-02,3004,3000,3,1',
+            '2014-07-03': '2014-07-03,4005,4000,4,1',
+            '2014-07-04': '2014-07-04,5006,5000,5,1',
+            '2014-07-05': '2014-07-05,6007,6000,6,1',
+            '2014-07-06': '2014-07-06,7008,7000,7,1',
             '2014-07-07': '2014-07-07,5,6,7,8',
             }
 
@@ -115,7 +115,7 @@
 
         self.assert_file_content_equals(enwiki_file_abs, [
             '2014W26,1,2,3,4',
-            '2014W27,28000000,28000,28,7',
+            '2014W27,28035,28000,28,7',
             '2014W28,8,9,10,11',
             ])
 
@@ -132,12 +132,12 @@
 
         csv_data = {
             '2014-06-29': '2014-06-29,1,2,3,4',
-            '2014-06-30': '2014-06-30,1000000,1000,1,1',
-            '2014-07-01': '2014-07-01,2000000,2000,2,1',
-            '2014-07-02': '2014-07-02,3000000,3000,3,1',
-            '2014-07-04': '2014-07-04,5000000,5000,5,1',
-            '2014-07-05': '2014-07-05,6000000,6000,6,1',
-            '2014-07-06': '2014-07-06,7000000,7000,7,1',
+            '2014-06-30': '2014-06-30,1002,1000,1,1',
+            '2014-07-01': '2014-07-01,2003,2000,2,1',
+            '2014-07-02': '2014-07-02,3004,3000,3,1',
+            '2014-07-04': '2014-07-04,5006,5000,5,1',
+            '2014-07-05': '2014-07-05,6007,6000,6,1',
+            '2014-07-06': '2014-07-06,7008,7000,7,1',
             '2014-07-07': '2014-07-07,5,6,7,8',
             }
 
@@ -151,7 +151,7 @@
 
         self.assert_file_content_equals(enwiki_file_abs, [
             '2014W26,1,2,3,4',
-            '2014W27,26600000,26600,26,7',
+            '2014W27,26633,26600,26,7',
             '2014W28,8,9,10,11',
             ])
 
@@ -168,12 +168,12 @@
 
         csv_data = {
             '2014-06-29': '2014-06-29,1,2,3,4',
-            '2014-06-30': '2014-06-30,1000000,1000,1,1',
-            '2014-07-01': '2014-07-01,2000000,2000,2,1',
-            '2014-07-02': '2014-07-02,3000000,3000,3,1',
-            '2014-07-04': '2014-07-04,5000000,5000,5,1',
-            '2014-07-05': '2014-07-05,6000000,6000,6,1',
-            '2014-07-06': '2014-07-06,7000000,7000,7,1',
+            '2014-06-30': '2014-06-30,1002,1000,1,1',
+            '2014-07-01': '2014-07-01,2003,2000,2,1',
+            '2014-07-02': '2014-07-02,3004,3000,3,1',
+            '2014-07-04': '2014-07-04,5006,5000,5,1',
+            '2014-07-05': '2014-07-05,6007,6000,6,1',
+            '2014-07-06': '2014-07-06,7008,7000,7,1',
             '2014-07-07': '2014-07-07,5,6,7,8',
             }
 
@@ -188,7 +188,7 @@
 
         self.assert_file_content_equals(enwiki_file_abs, [
             '2014W26,1,2,3,4',
-            '2014W27,21000000,21000,21,7',
+            '2014W27,21028,21000,21,7',
             '2014W28,8,9,10,11',
             ])
 
@@ -204,12 +204,12 @@
 
         csv_data = {
             '2014-06-29': '2014-06-29,1,2,3,4',
-            '2014-06-30': '2014-06-30,1000000,1000,1,1',
-            '2014-07-01': '2014-07-01,2000000,2000,2,1',
-            '2014-07-02': '2014-07-02,3000000,3000,3,1',
-            '2014-07-04': '2014-07-04,5000000,5000,5,1',
-            '2014-07-05': '2014-07-05,6000000,6000,6,1',
-            '2014-07-06': '2014-07-06,7000000,7000,7,1',
+            '2014-06-30': '2014-06-30,1002,1000,1,1',
+            '2014-07-01': '2014-07-01,2003,2000,2,1',
+            '2014-07-02': '2014-07-02,3004,3000,3,1',
+            '2014-07-04': '2014-07-04,5006,5000,5,1',
+            '2014-07-05': '2014-07-05,6007,6000,6,1',
+            '2014-07-06': '2014-07-06,7008,7000,7,1',
             '2014-07-07': '2014-07-07,5,6,7,8',
             }
 
@@ -244,12 +244,12 @@
 
         csv_data = {
             '2014-06-29': '2014-06-29,1,2,3,4',
-            '2014-06-30': '2014-06-30,1000000,1000,1,1',
-            '2014-07-01': '2014-07-01,2000000,2000,2,1',
-            '2014-07-02': '2014-07-02,3000000,3000,3,1',
-            '2014-07-04': '2014-07-04,5000000,5000,5,1',
-            '2014-07-05': '2014-07-05,6000000,6000,6,1',
-            '2014-07-06': '2014-07-06,7000000,7000,7,1',
+            '2014-06-30': '2014-06-30,1002,1000,1,1',
+            '2014-07-01': '2014-07-01,2003,2000,2,1',
+            '2014-07-02': '2014-07-02,3004,3000,3,1',
+            '2014-07-04': '2014-07-04,5006,5000,5,1',
+            '2014-07-05': '2014-07-05,6007,6000,6,1',
+            '2014-07-06': '2014-07-06,7008,7000,7,1',
             '2014-07-07': '2014-07-07,5,6,7,8',
             }
 
@@ -279,13 +279,13 @@
 
         csv_data = {
             '2014-06-29': '2014-06-29,1,2,3,4',
-            '2014-06-30': '2014-06-30,1000000,1000,1,1',
-            '2014-07-01': '2014-07-01,2000000,   0,2,1',
-            '2014-07-02': '2014-07-02,3000000,3000, ,1',
-            '2014-07-03': '2014-07-03,4000000,4000,4,1',
-            '2014-07-04': '2014-07-04,5000000,5000,5,1',
-            '2014-07-05': '2014-07-05,6000000,6000,6,1',
-            '2014-07-06': '2014-07-06,7000000,7000,7,1',
+            '2014-06-30': '2014-06-30,1002,1000,1,1',
+            '2014-07-01': '2014-07-01,   3,   0,2,1',
+            '2014-07-02': '2014-07-02,3001,3000, ,1',
+            '2014-07-03': '2014-07-03,4005,4000,4,1',
+            '2014-07-04': '2014-07-04,5006,5000,5,1',
+            '2014-07-05': '2014-07-05,6007,6000,6,1',
+            '2014-07-06': '2014-07-06,7008,7000,7,1',
             '2014-07-07': '2014-07-07,5,6,7,8',
             }
 
@@ -293,5 +293,5 @@
                                      first_date, last_date)
 
         self.assert_file_content_equals(enwiki_file_abs, [
-            '2014W27,28000000,26000,29,7',
+            '2014W27,26036,26000,29,7',
             ])
diff --git a/tests/test_projectcounts/test_yearly_aggregation.py 
b/tests/test_projectcounts/test_yearly_aggregation.py
index 88ec581..c84a633 100644
--- a/tests/test_projectcounts/test_yearly_aggregation.py
+++ b/tests/test_projectcounts/test_yearly_aggregation.py
@@ -43,14 +43,14 @@
             day = datetime.date(2014, 1, 1)
             day += datetime.timedelta(days=offset - 1)
             day_str = day.isoformat()
-            csv_data[day_str] = ('%s,%d0000,%d00,%d,1' %
-                                 (day_str, offset, offset, offset))
+            csv_data[day_str] = ('%s,%d,%d00,%d,1' %
+                                 (day_str, offset * 101 + 1, offset, offset))
 
         aggregator.update_yearly_csv(self.data_dir_abs, 'enwiki', csv_data,
                                      first_date, last_date)
 
         self.assert_file_content_equals(enwiki_file_abs, [
-            '2014,667950000,6679500,66795,365',
+            '2014,6746660,6679500,66795,365',
             ])
 
     def test_yearly_csv_non_existing_csv_366_day_year(self):
@@ -67,14 +67,14 @@
             day = datetime.date(2012, 1, 1)
             day += datetime.timedelta(days=offset - 1)
             day_str = day.isoformat()
-            csv_data[day_str] = ('%s,%d0000,%d00,%d,1' %
-                                 (day_str, offset, offset, offset))
+            csv_data[day_str] = ('%s,%d,%d00,%d,1' %
+                                 (day_str, offset * 101 + 1, offset, offset))
 
         aggregator.update_yearly_csv(self.data_dir_abs, 'enwiki', csv_data,
                                      first_date, last_date)
 
         self.assert_file_content_equals(enwiki_file_abs, [
-            '2012,669775000,6697750,66977,365',
+            '2012,6765092,6697750,66977,365',
             ])
 
     def test_yearly_csv_existing_csv_existing_year(self):
@@ -96,8 +96,8 @@
             day = datetime.date(2014, 1, 1)
             day += datetime.timedelta(days=offset - 1)
             day_str = day.isoformat()
-            csv_data[day_str] = ('%s,%d0000,%d00,%d,1' %
-                                 (day_str, offset, offset, offset))
+            csv_data[day_str] = ('%s,%d,%d00,%d,1' %
+                                 (day_str, offset * 101 + 1, offset, offset))
 
         aggregator.update_yearly_csv(self.data_dir_abs, 'enwiki', csv_data,
                                      first_date, last_date)
@@ -127,8 +127,8 @@
             day = datetime.date(2014, 1, 1)
             day += datetime.timedelta(days=offset - 1)
             day_str = day.isoformat()
-            csv_data[day_str] = ('%s,%d0000,%d00,%d,1' %
-                                 (day_str, offset, offset, offset))
+            csv_data[day_str] = ('%s,%d,%d00,%d,1' %
+                                 (day_str, offset * 101 + 1, offset, offset))
 
         aggregator.update_yearly_csv(self.data_dir_abs, 'enwiki', csv_data,
                                      first_date, last_date,
@@ -136,7 +136,7 @@
 
         self.assert_file_content_equals(enwiki_file_abs, [
             '2013,1,2,3,4',
-            '2014,667950000,6679500,66795,365',
+            '2014,6746660,6679500,66795,365',
             '2015,8,9,10,11',
             ])
 
@@ -159,8 +159,8 @@
             day = datetime.date(2014, 1, 1)
             day += datetime.timedelta(days=offset - 1)
             day_str = day.isoformat()
-            csv_data[day_str] = ('%s,%d0000,%d00,%d,1' %
-                                 (day_str, offset, offset, offset))
+            csv_data[day_str] = ('%s,%d,%d00,%d,1' %
+                                 (day_str, offset * 101 + 1, offset, offset))
 
         bad_dates = [
             datetime.date(2014, 7, 3),
@@ -172,7 +172,7 @@
 
         self.assert_file_content_equals(enwiki_file_abs, [
             '2013,1,2,3,4',
-            '2014,667919834,6679198,66791,365',
+            '2014,6746354,6679198,66791,365',
             '2015,8,9,10,11',
             ])
 
@@ -195,8 +195,8 @@
             day = datetime.date(2014, 1, 1)
             day += datetime.timedelta(days=offset - 1)
             day_str = day.isoformat()
-            csv_data[day_str] = ('%s,%d0000,%d00,%d,1' %
-                                 (day_str, offset, offset, offset))
+            csv_data[day_str] = ('%s,%d,%d00,%d,1' %
+                                 (day_str, offset * 101 + 1, offset, offset))
 
         bad_dates = [
             datetime.date(2014, 7, 3),
@@ -209,7 +209,7 @@
 
         self.assert_file_content_equals(enwiki_file_abs, [
             '2013,1,2,3,4',
-            '2014,666084668,6660846,66608,365',
+            '2014,6727819,6660846,66608,365',
             '2015,8,9,10,11',
             ])
 
@@ -233,8 +233,8 @@
             day = datetime.date(2014, 1, 1)
             day += datetime.timedelta(days=offset - 1)
             day_str = day.isoformat()
-            csv_data[day_str] = ('%s,%d0000,%d00,%d,1' %
-                                 (day_str, offset, offset, offset))
+            csv_data[day_str] = ('%s,%d,%d00,%d,1' %
+                                 (day_str, offset * 101 + 1, offset, offset))
             bad_dates.append(day)
 
         aggregator.update_yearly_csv(self.data_dir_abs, 'enwiki', csv_data,
@@ -266,8 +266,8 @@
             day = datetime.date(2014, 1, 1)
             day += datetime.timedelta(days=offset - 1)
             day_str = day.isoformat()
-            csv_data[day_str] = ('%s,%d0000,%d00,%d,1' %
-                                 (day_str, offset, offset, offset))
+            csv_data[day_str] = ('%s,%d,%d00,%d,1' %
+                                 (day_str, offset * 101 + 1, offset, offset))
             bad_dates.append(day)
 
         aggregator.update_yearly_csv(self.data_dir_abs, 'enwiki', csv_data,
@@ -292,15 +292,15 @@
             day = datetime.date(2014, 1, 1)
             day += datetime.timedelta(days=offset - 1)
             day_str = day.isoformat()
-            csv_data[day_str] = ('%s,%d0000,%d00,%d,1' %
-                                 (day_str, offset, offset, offset))
+            csv_data[day_str] = ('%s,%d,%d00,%d,1' %
+                                 (day_str, offset*101 + 1, offset, offset))
 
-        csv_data['2014-07-10'] = '2014-07-10,1910000,0,191,1'
-        csv_data['2014-07-20'] = '2014-07-20,2010000,20100,,1'
+        csv_data['2014-07-10'] = '2014-07-10,19101,0,191,1'
+        csv_data['2014-07-20'] = '2014-07-20,20101,20100,,1'
 
         aggregator.update_yearly_csv(self.data_dir_abs, 'enwiki', csv_data,
                                      first_date, last_date)
 
         self.assert_file_content_equals(enwiki_file_abs, [
-            '2014,667950000,6660400,66776,365',
+            '2014,6727541,6660400,66776,365',
             ])

-- 
To view, visit https://gerrit.wikimedia.org/r/183148
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: merged
Gerrit-Change-Id: I5adf5fa5607730b08b7d45bdd21410b2f6c7a09e
Gerrit-PatchSet: 1
Gerrit-Project: analytics/aggregator
Gerrit-Branch: master
Gerrit-Owner: QChris <[email protected]>
Gerrit-Reviewer: Nuria <[email protected]>
Gerrit-Reviewer: jenkins-bot <>

_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to