jenkins-bot has submitted this change and it was merged.
Change subject: Recompute the "total sum" column upon rescaling
......................................................................
Recompute the "total sum" column upon rescaling
We rescaled the "total sum" column just like every other column.
Thereby, the "total sum" column need no longer be the total sum of the
other columns in that row (due to rounding errors, and missing values
in the rows to rescale).
Hence, we now recompute the "total sum" column upon rescaling and can
thereby guarantee that the "total sum" column of a rescaled row (i.e.:
for weekly, monthly, yearly) really is the total sum of the other
columns.
Change-Id: I5adf5fa5607730b08b7d45bdd21410b2f6c7a09e
---
M aggregator/projectcounts.py
M tests/test_projectcounts/test_helpers.py
M tests/test_projectcounts/test_monthly_aggregation.py
M tests/test_projectcounts/test_weekly_aggregation.py
M tests/test_projectcounts/test_yearly_aggregation.py
5 files changed, 196 insertions(+), 156 deletions(-)
Approvals:
Nuria: Looks good to me, approved
jenkins-bot: Verified
diff --git a/aggregator/projectcounts.py b/aggregator/projectcounts.py
index 565ddca..9e95e7f 100644
--- a/aggregator/projectcounts.py
+++ b/aggregator/projectcounts.py
@@ -195,6 +195,13 @@
Each column is rescaled separatedly.
Missing columns for good dates are not assumed to be 0.
+ The first column is ignored, and assumed to hold the date for the reading.
+
+ The second column is assumed to hold the sum of the remaining
+ columns. This column is not rescaled, but the recomputed by
+ summing the other rescaled columns. Thereby, we can guarantee that
+ the "total sum" always is the sum of the other columns.
+
Upon other errors, a RuntimeError is raised.
The rescaled counts are returned as list of integers.
@@ -215,7 +222,20 @@
csv_line_items = csv_data[date_str].split(',')
except KeyError:
raise RuntimeError("No data for '%s'" % (date_str))
- del csv_line_items[0] # getting rid of date column
+
+ # Getting rid if date column. No need to aggregate date columns.
+ del csv_line_items[0]
+
+ # Getting rid of the "total sum" column.
+ # We always want the "total sum" column to be the sum of the
+ # other columns in the row. Hence, we cannot simply rescale
+ # the "total sum" column from the other rows, as that would on
+ # the one hand give rounding artifacts, and on the other hand
+ # would not work if some row is missing values for some
+ # columns. Therefore, we don't rescale the "total sum" column,
+ # but recompute it after the other columns' rescaled value is
+ # known.
+ del csv_line_items[0]
if ret is None:
ret = []
@@ -243,6 +263,9 @@
ret = [(ret[i] * rescale_to) / aggregations[i] if aggregations[i]
else None
for i in range(columns)]
+
+ # Then recompute the "total sum" column and prepend it.
+ ret.insert(0, sum([0 if i is None else i for i in ret]))
return ret
diff --git a/tests/test_projectcounts/test_helpers.py
b/tests/test_projectcounts/test_helpers.py
index acc7c1f..1b6d4df 100644
--- a/tests/test_projectcounts/test_helpers.py
+++ b/tests/test_projectcounts/test_helpers.py
@@ -350,7 +350,7 @@
dates = [datetime.date(2014, 8, 3)]
csv_data = {
- '2014-08-03': '2014-08-03,1,2,3'
+ '2014-08-03': '2014-08-03,3,2,1'
}
bad_dates = []
@@ -361,7 +361,7 @@
bad_dates,
1)
- self.assertEquals(actual, [1, 2, 3])
+ self.assertEquals(actual, [3, 2, 1])
def test_rescale_counts_more_days(self):
dates = [
@@ -372,9 +372,9 @@
csv_data = {
'2014-08-03': '2014-08-03,1,2,3',
- '2014-08-04': '2014-08-04,1000,100,10',
- '2014-08-05': '2014-08-05,2000,200,20',
- '2014-08-06': '2014-08-06,3000,300,30',
+ '2014-08-04': '2014-08-04,1110,1000,100,10',
+ '2014-08-05': '2014-08-05,2220,2000,200,20',
+ '2014-08-06': '2014-08-06,3330,3000,300,30',
'2014-08-07': '2014-08-07,1,2,3',
}
@@ -386,7 +386,7 @@
bad_dates,
3)
- self.assertEquals(actual, [6000, 600, 60])
+ self.assertEquals(actual, [6660, 6000, 600, 60])
def test_rescale_counts_more_days_downscale_int(self):
dates = [
@@ -397,9 +397,9 @@
csv_data = {
'2014-08-03': '2014-08-03,1,2,3',
- '2014-08-04': '2014-08-04,1000,100,10',
- '2014-08-05': '2014-08-05,2000,200,20',
- '2014-08-06': '2014-08-06,3002,301,31',
+ '2014-08-04': '2014-08-04,1110,1000,100,10',
+ '2014-08-05': '2014-08-05,2220,2000,200,20',
+ '2014-08-06': '2014-08-06,3334,3002,301,31',
'2014-08-07': '2014-08-07,1,2,3',
}
@@ -411,7 +411,7 @@
bad_dates,
1)
- self.assertEquals(actual, [2000, 200, 20])
+ self.assertEquals(actual, [2220, 2000, 200, 20])
def test_rescale_counts_more_days_upscale(self):
dates = [
@@ -422,9 +422,9 @@
csv_data = {
'2014-08-03': '2014-08-03,1,2,3',
- '2014-08-04': '2014-08-04,1000,100,10',
- '2014-08-05': '2014-08-05,2000,200,20',
- '2014-08-06': '2014-08-06,3000,300,30',
+ '2014-08-04': '2014-08-04,1110,1000,100,10',
+ '2014-08-05': '2014-08-05,2220,2000,200,20',
+ '2014-08-06': '2014-08-06,3330,3000,300,30',
'2014-08-07': '2014-08-07,1,2,3',
}
@@ -436,7 +436,7 @@
bad_dates,
5)
- self.assertEquals(actual, [10000, 1000, 100])
+ self.assertEquals(actual, [11100, 10000, 1000, 100])
def test_rescale_counts_more_days_bad_dates_middle(self):
dates = [
@@ -447,9 +447,9 @@
csv_data = {
'2014-08-03': '2014-08-03,1,2,3',
- '2014-08-04': '2014-08-04,1000,100,10',
- '2014-08-05': '2014-08-05,2000,200,20',
- '2014-08-06': '2014-08-06,3000,300,30',
+ '2014-08-04': '2014-08-04,1110,1000,100,10',
+ '2014-08-05': '2014-08-05,2220,2000,200,20',
+ '2014-08-06': '2014-08-06,3330,3000,300,30',
'2014-08-07': '2014-08-07,1,2,3',
}
@@ -463,7 +463,7 @@
bad_dates,
5)
- self.assertEquals(actual, [10000, 1000, 100])
+ self.assertEquals(actual, [11100, 10000, 1000, 100])
def test_rescale_counts_more_days_bad_dates_borders(self):
dates = [
@@ -474,9 +474,9 @@
csv_data = {
'2014-08-03': '2014-08-03,1,2,3',
- '2014-08-04': '2014-08-04,1000,100,10',
- '2014-08-05': '2014-08-05,2000,200,20',
- '2014-08-06': '2014-08-06,3000,300,30',
+ '2014-08-04': '2014-08-04,1110,1000,100,10',
+ '2014-08-05': '2014-08-05,2220,2000,200,20',
+ '2014-08-06': '2014-08-06,3330,3000,300,30',
'2014-08-07': '2014-08-07,1,2,3',
}
@@ -491,7 +491,7 @@
bad_dates,
5)
- self.assertEquals(actual, [10000, 1000, 100])
+ self.assertEquals(actual, [11100, 10000, 1000, 100])
def test_rescale_counts_more_days_bad_dates_skew(self):
dates = [
@@ -502,9 +502,9 @@
csv_data = {
'2014-08-03': '2014-08-03,1,2,3',
- '2014-08-04': '2014-08-04,1000,100,10',
- '2014-08-05': '2014-08-05,2000,200,20',
- '2014-08-06': '2014-08-06,3000,300,30',
+ '2014-08-04': '2014-08-04,1110,1000,100,10',
+ '2014-08-05': '2014-08-05,2220,2000,200,20',
+ '2014-08-06': '2014-08-06,3330,3000,300,30',
'2014-08-07': '2014-08-07,1,2,3',
}
@@ -519,7 +519,7 @@
bad_dates,
5)
- self.assertEquals(actual, [5000, 500, 50])
+ self.assertEquals(actual, [5550, 5000, 500, 50])
def test_rescale_counts_only_bad_dates(self):
dates = [
@@ -586,10 +586,10 @@
csv_data = {
'2014-08-03': '2014-08-03,100',
- '2014-08-04': '2014-08-04,0,3,5,0,7,10,',
- '2014-08-05': '2014-08-05,1,, ,0,8, ,',
- '2014-08-06': '2014-08-06,2,4,6,0,9,0 ,',
- '2014-08-07': '2014-08-07,11,12,13,14,15,16,17',
+ '2014-08-04': '2014-08-04,18,0,3,5,0,7,10,',
+ '2014-08-05': '2014-08-05,9,1,, ,0,8, ,',
+ '2014-08-06': '2014-08-06,21,2,4,6,0,9,0 ,',
+ '2014-08-07': '2014-08-07,98,11,12,13,14,15,16,17',
}
bad_dates = []
@@ -600,7 +600,7 @@
bad_dates,
3)
- self.assertEquals(actual, [3, 10, 16, 0, 24, 15, None])
+ self.assertEquals(actual, [68, 3, 10, 16, 0, 24, 15, None])
def test_rescale_counts_zero_and_empty_columns_upscale(self):
dates = [
@@ -611,10 +611,10 @@
csv_data = {
'2014-08-03': '2014-08-03,100',
- '2014-08-04': '2014-08-04,0,3,5,0, , ,10,',
- '2014-08-05': '2014-08-05,1,, ,0,0,8, ,',
- '2014-08-06': '2014-08-06,2,4,6,0, ,9,0 ,',
- '2014-08-07': '2014-08-07,11,12,13,14,15,16,17',
+ '2014-08-04': '2014-08-04,18,0,3,5,0, , ,10,',
+ '2014-08-05': '2014-08-05,9,1,, ,0,0,8, ,',
+ '2014-08-06': '2014-08-06,21,2,4,6,0, ,9,0 ,',
+ '2014-08-07': '2014-08-07,98,11,12,13,14,15,16,17',
}
bad_dates = []
@@ -625,7 +625,7 @@
bad_dates,
4)
- self.assertEquals(actual, [4, 14, 22, 0, 0, 34, 20, None])
+ self.assertEquals(actual, [94, 4, 14, 22, 0, 0, 34, 20, None])
def test_rescale_counts_shorter_second_column(self):
dates = [
@@ -636,9 +636,9 @@
csv_data = {
'2014-08-03': '2014-08-03,100,200',
- '2014-08-04': '2014-08-04,1,2',
- '2014-08-05': '2014-08-05,3',
- '2014-08-06': '2014-08-06,4,,',
+ '2014-08-04': '2014-08-04,3,1,2',
+ '2014-08-05': '2014-08-05,3,3',
+ '2014-08-06': '2014-08-06,4,4,,',
'2014-08-07': '2014-08-07,300,400',
}
@@ -650,4 +650,21 @@
bad_dates,
4)
- self.assertEquals(actual, [10, 8, None])
+ self.assertEquals(actual, [18, 10, 8, None])
+
+ def test_rescale_override_total_column(self):
+ dates = [datetime.date(2014, 8, 3)]
+
+ csv_data = {
+ '2014-08-03': '2014-08-03,1,2,3'
+ }
+
+ bad_dates = []
+
+ actual = aggregator.rescale_counts(
+ csv_data,
+ dates,
+ bad_dates,
+ 1)
+
+ self.assertEquals(actual, [5, 2, 3])
diff --git a/tests/test_projectcounts/test_monthly_aggregation.py
b/tests/test_projectcounts/test_monthly_aggregation.py
index 42dce2b..346e5f5 100644
--- a/tests/test_projectcounts/test_monthly_aggregation.py
+++ b/tests/test_projectcounts/test_monthly_aggregation.py
@@ -40,14 +40,14 @@
'2014-08-01': '2014-08-01,5,6,7,8',
}
for day in range(1, 32):
- csv_data['2014-07-%02d' % day] = ('2014-07-%02d,%d0000,%d00,%d,1'
- % (day, day, day, day))
+ csv_data['2014-07-%02d' % day] = ('2014-07-%02d,%d,%d00,%d,1'
+ % (day, day * 101 + 1, day, day))
aggregator.update_monthly_csv(self.data_dir_abs, 'enwiki', csv_data,
first_date, last_date)
self.assert_file_content_equals(enwiki_file_abs, [
- '2014-07,4800000,48000,480,30',
+ '2014-07,48510,48000,480,30',
])
def test_monthly_csv_non_existing_csv_30_day_month(self):
@@ -61,14 +61,14 @@
'2014-07-01': '2014-07-01,5,6,7,8',
}
for day in range(1, 31):
- csv_data['2014-06-%02d' % day] = ('2014-06-%02d,%d0000,%d00,%d,1'
- % (day, day, day, day))
+ csv_data['2014-06-%02d' % day] = ('2014-06-%02d,%d,%d00,%d,1'
+ % (day, day * 101 + 1, day, day))
aggregator.update_monthly_csv(self.data_dir_abs, 'enwiki', csv_data,
first_date, last_date)
self.assert_file_content_equals(enwiki_file_abs, [
- '2014-06,4650000,46500,465,30',
+ '2014-06,46995,46500,465,30',
])
def test_monthly_csv_non_existing_csv_29_day_month(self):
@@ -82,14 +82,14 @@
'2012-03-01': '2012-03-01,5,6,7,8',
}
for day in range(1, 30):
- csv_data['2012-02-%02d' % day] = ('2012-02-%02d,%d0000,%d00,%d,1'
- % (day, day, day, day))
+ csv_data['2012-02-%02d' % day] = ('2012-02-%02d,%d,%d00,%d,1'
+ % (day, day * 101 + 1, day, day))
aggregator.update_monthly_csv(self.data_dir_abs, 'enwiki', csv_data,
first_date, last_date)
self.assert_file_content_equals(enwiki_file_abs, [
- '2012-02,4500000,45000,450,30',
+ '2012-02,45480,45000,450,30',
])
def test_monthly_csv_non_existing_csv_28_day_month(self):
@@ -103,14 +103,14 @@
'2014-03-01': '2014-03-01,5,6,7,8',
}
for day in range(1, 30):
- csv_data['2014-02-%02d' % day] = ('2014-02-%02d,%d0000,%d00,%d,1'
- % (day, day, day, day))
+ csv_data['2014-02-%02d' % day] = ('2014-02-%02d,%d,%d00,%d,1'
+ % (day, day * 101 + 1, day, day))
aggregator.update_monthly_csv(self.data_dir_abs, 'enwiki', csv_data,
first_date, last_date)
self.assert_file_content_equals(enwiki_file_abs, [
- '2014-02,4350000,43500,435,30',
+ '2014-02,43965,43500,435,30',
])
def test_monthly_csv_existing_csv_existing_month(self):
@@ -129,8 +129,8 @@
'2014-08-01': '2014-08-01,5,6,7,8',
}
for day in range(1, 32):
- csv_data['2014-07-%02d' % day] = ('2014-07-%02d,%d0000,%d00,%d,1'
- % (day, day, day, day))
+ csv_data['2014-07-%02d' % day] = ('2014-07-%02d,%d,%d00,%d,1'
+ % (day, day * 101 + 1, day, day))
aggregator.update_monthly_csv(self.data_dir_abs, 'enwiki', csv_data,
first_date, last_date)
@@ -157,8 +157,8 @@
'2014-08-01': '2014-08-01,5,6,7,8',
}
for day in range(1, 32):
- csv_data['2014-07-%02d' % day] = ('2014-07-%02d,%d0000,%d00,%d,1'
- % (day, day, day, day))
+ csv_data['2014-07-%02d' % day] = ('2014-07-%02d,%d,%d00,%d,1'
+ % (day, day * 101 + 1, day, day))
aggregator.update_monthly_csv(self.data_dir_abs, 'enwiki', csv_data,
first_date, last_date,
@@ -166,7 +166,7 @@
self.assert_file_content_equals(enwiki_file_abs, [
'2014-06,1,2,3,4',
- '2014-07,4800000,48000,480,30',
+ '2014-07,48510,48000,480,30',
'2014-08,8,9,10,11',
])
@@ -186,8 +186,8 @@
'2014-08-01': '2014-08-01,5,6,7,8',
}
for day in range(1, 32):
- csv_data['2014-07-%02d' % day] = ('2014-07-%02d,%d0000,%d00,%d,1'
- % (day, day, day, day))
+ csv_data['2014-07-%02d' % day] = ('2014-07-%02d,%d,%d00,%d,1'
+ % (day, day * 101 + 1, day, day))
bad_dates = [
datetime.date(2014, 7, 3),
@@ -199,7 +199,7 @@
self.assert_file_content_equals(enwiki_file_abs, [
'2014-06,1,2,3,4',
- '2014-07,5058620,50586,505,30',
+ '2014-07,51121,50586,505,30',
'2014-08,8,9,10,11',
])
@@ -219,8 +219,8 @@
'2014-07-31': '2014-08-01,5,6,7,8',
}
for day in range(1, 31):
- csv_data['2014-07-%02d' % day] = ('2014-07-%02d,%d0000,%d00,%d,1'
- % (day, day, day, day))
+ csv_data['2014-07-%02d' % day] = ('2014-07-%02d,%d,%d00,%d,1'
+ % (day, day * 101 + 1, day, day))
bad_dates = [
datetime.date(2014, 7, 3),
@@ -233,7 +233,7 @@
self.assert_file_content_equals(enwiki_file_abs, [
'2014-06,1,2,3,4',
- '2014-07,4907142,49071,490,30',
+ '2014-07,49591,49071,490,30',
'2014-08,8,9,10,11',
])
@@ -252,8 +252,8 @@
'2014-08-01': '2014-08-01,5,6,7,8',
}
for day in range(1, 32):
- csv_data['2014-07-%02d' % day] = ('2014-07-%02d,%d0000,%d00,%d,1'
- % (day, day, day, day))
+ csv_data['2014-07-%02d' % day] = ('2014-07-%02d,%d,%d00,%d,1'
+ % (day, day * 101 + 1, day, day))
bad_dates = [datetime.date(2014, 7, day) for day in range(1, 32)]
@@ -281,8 +281,8 @@
'2014-08-01': '2014-08-01,5,6,7,8',
}
for day in range(1, 32):
- csv_data['2014-07-%02d' % day] = ('2014-07-%02d,%d0000,%d00,%d,1'
- % (day, day, day, day))
+ csv_data['2014-07-%02d' % day] = ('2014-07-%02d,%d,%d00,%d,1'
+ % (day, day * 101 + 1, day, day))
bad_dates = [datetime.date(2014, 7, day) for day in range(1, 32)]
@@ -305,15 +305,15 @@
'2014-08-01': '2014-08-01,5,6,7,8',
}
for day in range(1, 32):
- csv_data['2014-07-%02d' % day] = ('2014-07-%02d,%d0000,%d00,%d,1'
- % (day, day, day, day))
+ csv_data['2014-07-%02d' % day] = ('2014-07-%02d,%d,%d00,%d,1'
+ % (day, day * 101 + 1, day, day))
- csv_data['2014-07-10'] = '2014-07-10,100000,0,10,1'
- csv_data['2014-07-20'] = '2014-07-20,200000,2000,,1'
+ csv_data['2014-07-10'] = '2014-07-10,11,0,10,1'
+ csv_data['2014-07-20'] = '2014-07-20,2001,2000,,1'
aggregator.update_monthly_csv(self.data_dir_abs, 'enwiki', csv_data,
first_date, last_date)
self.assert_file_content_equals(enwiki_file_abs, [
- '2014-07,4800000,47032,476,30',
+ '2014-07,47538,47032,476,30',
])
diff --git a/tests/test_projectcounts/test_weekly_aggregation.py
b/tests/test_projectcounts/test_weekly_aggregation.py
index 693dbf2..b15a0e4 100644
--- a/tests/test_projectcounts/test_weekly_aggregation.py
+++ b/tests/test_projectcounts/test_weekly_aggregation.py
@@ -37,13 +37,13 @@
csv_data = {
'2014-06-29': '2014-06-29,1,2,3,4',
- '2014-06-30': '2014-06-30,1000000,1000,1,1',
- '2014-07-01': '2014-07-01,2000000,2000,2,1',
- '2014-07-02': '2014-07-02,3000000,3000,3,1',
- '2014-07-03': '2014-07-03,4000000,4000,4,1',
- '2014-07-04': '2014-07-04,5000000,5000,5,1',
- '2014-07-05': '2014-07-05,6000000,6000,6,1',
- '2014-07-06': '2014-07-06,7000000,7000,7,1',
+ '2014-06-30': '2014-06-30,1002,1000,1,1',
+ '2014-07-01': '2014-07-01,2003,2000,2,1',
+ '2014-07-02': '2014-07-02,3004,3000,3,1',
+ '2014-07-03': '2014-07-03,4005,4000,4,1',
+ '2014-07-04': '2014-07-04,5006,5000,5,1',
+ '2014-07-05': '2014-07-05,6007,6000,6,1',
+ '2014-07-06': '2014-07-06,7008,7000,7,1',
'2014-07-07': '2014-07-07,5,6,7,8',
}
@@ -51,7 +51,7 @@
first_date, last_date)
self.assert_file_content_equals(enwiki_file_abs, [
- '2014W27,28000000,28000,28,7',
+ '2014W27,28035,28000,28,7',
])
def test_weekly_csv_existing_csv_existing_week(self):
@@ -67,13 +67,13 @@
csv_data = {
'2014-06-29': '2014-06-29,1,2,3,4',
- '2014-06-30': '2014-06-30,1000000,1000,1,1',
- '2014-07-01': '2014-07-01,2000000,2000,2,1',
- '2014-07-02': '2014-07-02,3000000,3000,3,1',
- '2014-07-03': '2014-07-03,4000000,4000,4,1',
- '2014-07-04': '2014-07-04,5000000,5000,5,1',
- '2014-07-05': '2014-07-05,6000000,6000,6,1',
- '2014-07-06': '2014-07-06,7000000,7000,7,1',
+ '2014-06-30': '2014-06-30,1002,1000,1,1',
+ '2014-07-01': '2014-07-01,2003,2000,2,1',
+ '2014-07-02': '2014-07-02,3004,3000,3,1',
+ '2014-07-03': '2014-07-03,4005,4000,4,1',
+ '2014-07-04': '2014-07-04,5006,5000,5,1',
+ '2014-07-05': '2014-07-05,6007,6000,6,1',
+ '2014-07-06': '2014-07-06,7008,7000,7,1',
'2014-07-07': '2014-07-07,5,6,7,8',
}
@@ -99,13 +99,13 @@
csv_data = {
'2014-06-29': '2014-06-29,1,2,3,4',
- '2014-06-30': '2014-06-30,1000000,1000,1,1',
- '2014-07-01': '2014-07-01,2000000,2000,2,1',
- '2014-07-02': '2014-07-02,3000000,3000,3,1',
- '2014-07-03': '2014-07-03,4000000,4000,4,1',
- '2014-07-04': '2014-07-04,5000000,5000,5,1',
- '2014-07-05': '2014-07-05,6000000,6000,6,1',
- '2014-07-06': '2014-07-06,7000000,7000,7,1',
+ '2014-06-30': '2014-06-30,1002,1000,1,1',
+ '2014-07-01': '2014-07-01,2003,2000,2,1',
+ '2014-07-02': '2014-07-02,3004,3000,3,1',
+ '2014-07-03': '2014-07-03,4005,4000,4,1',
+ '2014-07-04': '2014-07-04,5006,5000,5,1',
+ '2014-07-05': '2014-07-05,6007,6000,6,1',
+ '2014-07-06': '2014-07-06,7008,7000,7,1',
'2014-07-07': '2014-07-07,5,6,7,8',
}
@@ -115,7 +115,7 @@
self.assert_file_content_equals(enwiki_file_abs, [
'2014W26,1,2,3,4',
- '2014W27,28000000,28000,28,7',
+ '2014W27,28035,28000,28,7',
'2014W28,8,9,10,11',
])
@@ -132,12 +132,12 @@
csv_data = {
'2014-06-29': '2014-06-29,1,2,3,4',
- '2014-06-30': '2014-06-30,1000000,1000,1,1',
- '2014-07-01': '2014-07-01,2000000,2000,2,1',
- '2014-07-02': '2014-07-02,3000000,3000,3,1',
- '2014-07-04': '2014-07-04,5000000,5000,5,1',
- '2014-07-05': '2014-07-05,6000000,6000,6,1',
- '2014-07-06': '2014-07-06,7000000,7000,7,1',
+ '2014-06-30': '2014-06-30,1002,1000,1,1',
+ '2014-07-01': '2014-07-01,2003,2000,2,1',
+ '2014-07-02': '2014-07-02,3004,3000,3,1',
+ '2014-07-04': '2014-07-04,5006,5000,5,1',
+ '2014-07-05': '2014-07-05,6007,6000,6,1',
+ '2014-07-06': '2014-07-06,7008,7000,7,1',
'2014-07-07': '2014-07-07,5,6,7,8',
}
@@ -151,7 +151,7 @@
self.assert_file_content_equals(enwiki_file_abs, [
'2014W26,1,2,3,4',
- '2014W27,26600000,26600,26,7',
+ '2014W27,26633,26600,26,7',
'2014W28,8,9,10,11',
])
@@ -168,12 +168,12 @@
csv_data = {
'2014-06-29': '2014-06-29,1,2,3,4',
- '2014-06-30': '2014-06-30,1000000,1000,1,1',
- '2014-07-01': '2014-07-01,2000000,2000,2,1',
- '2014-07-02': '2014-07-02,3000000,3000,3,1',
- '2014-07-04': '2014-07-04,5000000,5000,5,1',
- '2014-07-05': '2014-07-05,6000000,6000,6,1',
- '2014-07-06': '2014-07-06,7000000,7000,7,1',
+ '2014-06-30': '2014-06-30,1002,1000,1,1',
+ '2014-07-01': '2014-07-01,2003,2000,2,1',
+ '2014-07-02': '2014-07-02,3004,3000,3,1',
+ '2014-07-04': '2014-07-04,5006,5000,5,1',
+ '2014-07-05': '2014-07-05,6007,6000,6,1',
+ '2014-07-06': '2014-07-06,7008,7000,7,1',
'2014-07-07': '2014-07-07,5,6,7,8',
}
@@ -188,7 +188,7 @@
self.assert_file_content_equals(enwiki_file_abs, [
'2014W26,1,2,3,4',
- '2014W27,21000000,21000,21,7',
+ '2014W27,21028,21000,21,7',
'2014W28,8,9,10,11',
])
@@ -204,12 +204,12 @@
csv_data = {
'2014-06-29': '2014-06-29,1,2,3,4',
- '2014-06-30': '2014-06-30,1000000,1000,1,1',
- '2014-07-01': '2014-07-01,2000000,2000,2,1',
- '2014-07-02': '2014-07-02,3000000,3000,3,1',
- '2014-07-04': '2014-07-04,5000000,5000,5,1',
- '2014-07-05': '2014-07-05,6000000,6000,6,1',
- '2014-07-06': '2014-07-06,7000000,7000,7,1',
+ '2014-06-30': '2014-06-30,1002,1000,1,1',
+ '2014-07-01': '2014-07-01,2003,2000,2,1',
+ '2014-07-02': '2014-07-02,3004,3000,3,1',
+ '2014-07-04': '2014-07-04,5006,5000,5,1',
+ '2014-07-05': '2014-07-05,6007,6000,6,1',
+ '2014-07-06': '2014-07-06,7008,7000,7,1',
'2014-07-07': '2014-07-07,5,6,7,8',
}
@@ -244,12 +244,12 @@
csv_data = {
'2014-06-29': '2014-06-29,1,2,3,4',
- '2014-06-30': '2014-06-30,1000000,1000,1,1',
- '2014-07-01': '2014-07-01,2000000,2000,2,1',
- '2014-07-02': '2014-07-02,3000000,3000,3,1',
- '2014-07-04': '2014-07-04,5000000,5000,5,1',
- '2014-07-05': '2014-07-05,6000000,6000,6,1',
- '2014-07-06': '2014-07-06,7000000,7000,7,1',
+ '2014-06-30': '2014-06-30,1002,1000,1,1',
+ '2014-07-01': '2014-07-01,2003,2000,2,1',
+ '2014-07-02': '2014-07-02,3004,3000,3,1',
+ '2014-07-04': '2014-07-04,5006,5000,5,1',
+ '2014-07-05': '2014-07-05,6007,6000,6,1',
+ '2014-07-06': '2014-07-06,7008,7000,7,1',
'2014-07-07': '2014-07-07,5,6,7,8',
}
@@ -279,13 +279,13 @@
csv_data = {
'2014-06-29': '2014-06-29,1,2,3,4',
- '2014-06-30': '2014-06-30,1000000,1000,1,1',
- '2014-07-01': '2014-07-01,2000000, 0,2,1',
- '2014-07-02': '2014-07-02,3000000,3000, ,1',
- '2014-07-03': '2014-07-03,4000000,4000,4,1',
- '2014-07-04': '2014-07-04,5000000,5000,5,1',
- '2014-07-05': '2014-07-05,6000000,6000,6,1',
- '2014-07-06': '2014-07-06,7000000,7000,7,1',
+ '2014-06-30': '2014-06-30,1002,1000,1,1',
+ '2014-07-01': '2014-07-01, 3, 0,2,1',
+ '2014-07-02': '2014-07-02,3001,3000, ,1',
+ '2014-07-03': '2014-07-03,4005,4000,4,1',
+ '2014-07-04': '2014-07-04,5006,5000,5,1',
+ '2014-07-05': '2014-07-05,6007,6000,6,1',
+ '2014-07-06': '2014-07-06,7008,7000,7,1',
'2014-07-07': '2014-07-07,5,6,7,8',
}
@@ -293,5 +293,5 @@
first_date, last_date)
self.assert_file_content_equals(enwiki_file_abs, [
- '2014W27,28000000,26000,29,7',
+ '2014W27,26036,26000,29,7',
])
diff --git a/tests/test_projectcounts/test_yearly_aggregation.py
b/tests/test_projectcounts/test_yearly_aggregation.py
index 88ec581..c84a633 100644
--- a/tests/test_projectcounts/test_yearly_aggregation.py
+++ b/tests/test_projectcounts/test_yearly_aggregation.py
@@ -43,14 +43,14 @@
day = datetime.date(2014, 1, 1)
day += datetime.timedelta(days=offset - 1)
day_str = day.isoformat()
- csv_data[day_str] = ('%s,%d0000,%d00,%d,1' %
- (day_str, offset, offset, offset))
+ csv_data[day_str] = ('%s,%d,%d00,%d,1' %
+ (day_str, offset * 101 + 1, offset, offset))
aggregator.update_yearly_csv(self.data_dir_abs, 'enwiki', csv_data,
first_date, last_date)
self.assert_file_content_equals(enwiki_file_abs, [
- '2014,667950000,6679500,66795,365',
+ '2014,6746660,6679500,66795,365',
])
def test_yearly_csv_non_existing_csv_366_day_year(self):
@@ -67,14 +67,14 @@
day = datetime.date(2012, 1, 1)
day += datetime.timedelta(days=offset - 1)
day_str = day.isoformat()
- csv_data[day_str] = ('%s,%d0000,%d00,%d,1' %
- (day_str, offset, offset, offset))
+ csv_data[day_str] = ('%s,%d,%d00,%d,1' %
+ (day_str, offset * 101 + 1, offset, offset))
aggregator.update_yearly_csv(self.data_dir_abs, 'enwiki', csv_data,
first_date, last_date)
self.assert_file_content_equals(enwiki_file_abs, [
- '2012,669775000,6697750,66977,365',
+ '2012,6765092,6697750,66977,365',
])
def test_yearly_csv_existing_csv_existing_year(self):
@@ -96,8 +96,8 @@
day = datetime.date(2014, 1, 1)
day += datetime.timedelta(days=offset - 1)
day_str = day.isoformat()
- csv_data[day_str] = ('%s,%d0000,%d00,%d,1' %
- (day_str, offset, offset, offset))
+ csv_data[day_str] = ('%s,%d,%d00,%d,1' %
+ (day_str, offset * 101 + 1, offset, offset))
aggregator.update_yearly_csv(self.data_dir_abs, 'enwiki', csv_data,
first_date, last_date)
@@ -127,8 +127,8 @@
day = datetime.date(2014, 1, 1)
day += datetime.timedelta(days=offset - 1)
day_str = day.isoformat()
- csv_data[day_str] = ('%s,%d0000,%d00,%d,1' %
- (day_str, offset, offset, offset))
+ csv_data[day_str] = ('%s,%d,%d00,%d,1' %
+ (day_str, offset * 101 + 1, offset, offset))
aggregator.update_yearly_csv(self.data_dir_abs, 'enwiki', csv_data,
first_date, last_date,
@@ -136,7 +136,7 @@
self.assert_file_content_equals(enwiki_file_abs, [
'2013,1,2,3,4',
- '2014,667950000,6679500,66795,365',
+ '2014,6746660,6679500,66795,365',
'2015,8,9,10,11',
])
@@ -159,8 +159,8 @@
day = datetime.date(2014, 1, 1)
day += datetime.timedelta(days=offset - 1)
day_str = day.isoformat()
- csv_data[day_str] = ('%s,%d0000,%d00,%d,1' %
- (day_str, offset, offset, offset))
+ csv_data[day_str] = ('%s,%d,%d00,%d,1' %
+ (day_str, offset * 101 + 1, offset, offset))
bad_dates = [
datetime.date(2014, 7, 3),
@@ -172,7 +172,7 @@
self.assert_file_content_equals(enwiki_file_abs, [
'2013,1,2,3,4',
- '2014,667919834,6679198,66791,365',
+ '2014,6746354,6679198,66791,365',
'2015,8,9,10,11',
])
@@ -195,8 +195,8 @@
day = datetime.date(2014, 1, 1)
day += datetime.timedelta(days=offset - 1)
day_str = day.isoformat()
- csv_data[day_str] = ('%s,%d0000,%d00,%d,1' %
- (day_str, offset, offset, offset))
+ csv_data[day_str] = ('%s,%d,%d00,%d,1' %
+ (day_str, offset * 101 + 1, offset, offset))
bad_dates = [
datetime.date(2014, 7, 3),
@@ -209,7 +209,7 @@
self.assert_file_content_equals(enwiki_file_abs, [
'2013,1,2,3,4',
- '2014,666084668,6660846,66608,365',
+ '2014,6727819,6660846,66608,365',
'2015,8,9,10,11',
])
@@ -233,8 +233,8 @@
day = datetime.date(2014, 1, 1)
day += datetime.timedelta(days=offset - 1)
day_str = day.isoformat()
- csv_data[day_str] = ('%s,%d0000,%d00,%d,1' %
- (day_str, offset, offset, offset))
+ csv_data[day_str] = ('%s,%d,%d00,%d,1' %
+ (day_str, offset * 101 + 1, offset, offset))
bad_dates.append(day)
aggregator.update_yearly_csv(self.data_dir_abs, 'enwiki', csv_data,
@@ -266,8 +266,8 @@
day = datetime.date(2014, 1, 1)
day += datetime.timedelta(days=offset - 1)
day_str = day.isoformat()
- csv_data[day_str] = ('%s,%d0000,%d00,%d,1' %
- (day_str, offset, offset, offset))
+ csv_data[day_str] = ('%s,%d,%d00,%d,1' %
+ (day_str, offset * 101 + 1, offset, offset))
bad_dates.append(day)
aggregator.update_yearly_csv(self.data_dir_abs, 'enwiki', csv_data,
@@ -292,15 +292,15 @@
day = datetime.date(2014, 1, 1)
day += datetime.timedelta(days=offset - 1)
day_str = day.isoformat()
- csv_data[day_str] = ('%s,%d0000,%d00,%d,1' %
- (day_str, offset, offset, offset))
+ csv_data[day_str] = ('%s,%d,%d00,%d,1' %
+ (day_str, offset*101 + 1, offset, offset))
- csv_data['2014-07-10'] = '2014-07-10,1910000,0,191,1'
- csv_data['2014-07-20'] = '2014-07-20,2010000,20100,,1'
+ csv_data['2014-07-10'] = '2014-07-10,19101,0,191,1'
+ csv_data['2014-07-20'] = '2014-07-20,20101,20100,,1'
aggregator.update_yearly_csv(self.data_dir_abs, 'enwiki', csv_data,
first_date, last_date)
self.assert_file_content_equals(enwiki_file_abs, [
- '2014,667950000,6660400,66776,365',
+ '2014,6727541,6660400,66776,365',
])
--
To view, visit https://gerrit.wikimedia.org/r/183148
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: merged
Gerrit-Change-Id: I5adf5fa5607730b08b7d45bdd21410b2f6c7a09e
Gerrit-PatchSet: 1
Gerrit-Project: analytics/aggregator
Gerrit-Branch: master
Gerrit-Owner: QChris <[email protected]>
Gerrit-Reviewer: Nuria <[email protected]>
Gerrit-Reviewer: jenkins-bot <>
_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits